def test_basic(self): with tempfile.TemporaryDirectory() as remotedir, \ tempfile.TemporaryDirectory() as localdir: with open(remotedir + '/f0.txt', 'w') as fd: fd.write('hello0') syncer = tffilesync.Syncer(remotedir, localdir) self.assertListEqual(gfile.listdir(localdir), ['f0.txt']) self.assertEqual(_read_file(localdir + '/f0.txt'), 'hello0') with open(localdir + '/f1.txt', 'w') as fd: fd.write('hello1') _kick_sync(syncer) self.assertListEqual(sorted(gfile.listdir(localdir)), ['f0.txt', 'f1.txt']) self.assertListEqual(sorted(gfile.listdir(remotedir)), ['f0.txt', 'f1.txt']) self.assertEqual(_read_file(remotedir + '/f0.txt'), 'hello0') self.assertEqual(_read_file(remotedir + '/f1.txt'), 'hello1') time.sleep(2) with open(localdir + '/f1.txt', 'w') as fd: fd.write('hello2') _kick_sync(syncer) self.assertEqual(_read_file(remotedir + '/f1.txt'), 'hello2') self.assertListEqual(sorted(gfile.listdir(remotedir)), ['f0.txt', 'f1.txt']) syncer.stop()
def test_creates_new_trajectory_dirs(self): output_dir = self.get_temp_dir() env = self._create_env(output_dir=output_dir) self.assertEqual(set(gfile.listdir(output_dir)), set()) env.reset() self.assertEqual(set(gfile.listdir(output_dir)), {"0"}) env.reset() self.assertEqual(set(gfile.listdir(output_dir)), {"0", "1"})
def __init__(self, datapath): """ :param string datapath: filepath to training images """ # Store the datapath self.datapath = datapath self.im_shape = (None, None, 3) self.crop_im_shape = (256, 256, 3) self.total_imgs = None self.k = 3 self.vgg = self.build_vgg() print('Initiating DataLoader with data from {}'.format(datapath)) # Check data source if self.datapath.startswith('gs://'): self.content_bucket = os.path.join(self.datapath, 'content') self.style_bucket = os.path.join(self.datapath, 'style') print('Content bucket: ', self.content_bucket) print('Style bucket: ', self.style_bucket) self.content_img_paths = [ os.path.join(self.content_bucket, i) for i in listdir(self.content_bucket) ] self.style_img_paths = [ os.path.join(self.style_bucket, i) for i in listdir(self.style_bucket) ] self.num_content_pics = len(self.content_img_paths) self.num_style_pics = len(self.style_img_paths) print(">> Found {} content images in dataset".format( self.num_content_pics)) print(">> Found {} style images in dataset".format( self.num_style_pics)) else: self.style_img_paths = [] self.content_img_paths = [] for dirpath, _, filenames in os.walk(self.datapath): for filename in [ f for f in filenames if any(filetype in f.lower() for filetype in ['jpeg', 'png', 'jpg']) ]: if 'content' in dirpath: self.content_img_paths.append( os.path.join(dirpath, filename)) elif 'style' in dirpath: self.style_img_paths.append( os.path.join(dirpath, filename)) self.num_content_pics = len(self.content_img_paths) self.num_style_pics = len(self.style_img_paths) print(">> Found {} content images in dataset".format( self.num_content_pics)) print(">> Found {} style images in dataset".format( self.num_style_pics))
def _next_trajectory_dir(self): """Assigns a new output dir for a trajectory under self._output_dir. Directory names are consecutive integers starting from zero. New directory index is assigned as the maximum of past indices plus one. Directories that are not integers are ignored. Returns: A path of the new directory. """ trajectory_dirs = gfile.listdir(self._output_dir) def int_or_none(s): try: return int(s) except TypeError: return None past_trajectory_ids = [ trajectory_id for trajectory_id in map(int_or_none, trajectory_dirs) if trajectory_id is not None ] next_trajectory_id = max([-1] + past_trajectory_ids) + 1 return os.path.join(self._output_dir, str(next_trajectory_id))
def maybe_pick_models_to_evaluate(checkpoint_dir): """Pick a checkpoint to evaluate that has not been evaluated already.""" logging.info("Picking checkpoint to evaluate from %s.", checkpoint_dir) filenames = gfile.listdir(checkpoint_dir) filenames = [f[:-5] for f in filenames if f[-5:] == ".meta"] logging.info("Found existing checkpoints: %s", filenames) evaluated_filenames = [] if gfile.exists(os.path.join(checkpoint_dir, EVAL_FILENAME)): with gfile.GFile(os.path.join(checkpoint_dir, EVAL_FILENAME), "r") as f: evaluated_filenames = [ l.strip().split(",")[0] for l in f.readlines() ] logging.info("Found already evaluated checkpoints: %s", evaluated_filenames) checkpoints_to_evaluate = [ f for f in filenames if f not in evaluated_filenames ] logging.info("Remaining potential checkpoints: %s", checkpoints_to_evaluate) if checkpoints_to_evaluate: return os.path.join(checkpoint_dir, checkpoints_to_evaluate[0]) else: return None
def listdir_remote(path): """ Wrapper to list paths in local dirs (alternative to using a glob, I suppose) """ if is_remote_path(path): return gfile.listdir(path) return os.listdir(path)
def ls(self, path: str, recursive=False) -> List[File]: def _get_file_stats(path: str): stat = gfile.stat(path) return File(path=path, size=stat.length, mtime=int(stat.mtime_nsec / 1e9)) if not gfile.exists(path): return [] # If it is a file if not gfile.isdir(path): return [_get_file_stats(path)] files = [] if recursive: for root, _, res in gfile.walk(path): for file in res: if not gfile.isdir(os.path.join(root, file)): files.append(_get_file_stats(os.path.join(root, file))) else: for file in gfile.listdir(path): if not gfile.isdir(os.path.join(path, file)): files.append(_get_file_stats(os.path.join(path, file))) # Files only return files
def _list_dir(dir_path: str) -> _DirEntries: ents: _DirEntries = {} for name in gfile.listdir(dir_path): path = dir_path + '/' + name stat = gfile.stat(path) ents[name] = _FileStat(length=stat.length, mtime_nsec=stat.mtime_nsec, is_directory=stat.is_directory) return ents
def load_acic(self): """Loads semi-synthetic data. It updates the object DataSimulation. Args: self Returns: None """ self.data_path = self.param_data['data_path'] + 'ACIC/' if self.param_data['data_low_dimension']: true_ate_path = self.data_path + 'lowDim_trueATE.csv' self.data_path = self.data_path + 'low_dimensional_datasets/' else: true_ate_path = self.data_path + 'highDim_trueATE.csv' self.data_path = self.data_path + 'high_dimensional_datasets/' np.random.seed(self.seed) i = np.random.randint(0, len(gfile.listdir(self.data_path)), 1)[0] path = gfile.listdir(self.data_path)[i] with gfile.GFile(self.data_path +path, 'r') as f: data = pd.read_csv(f, delimiter=',') self.outcome = data['Y'].values self.treatment = data['A'].values self.covariates = data.drop(['Y', 'A'], axis=1).values scaler = StandardScaler() self.covariates = scaler.fit_transform(self.covariates) self.sample_size, self.num_covariates = self.covariates.shape self.linear, self.noise = False, False self.var_covariates = None self.treatment_prop = self.treatment.sum()/len(self.treatment) with gfile.GFile(true_ate_path, 'r') as f: true_ate = pd.read_csv(f, delimiter=',') path = path[:-4] true_ate_row = true_ate[true_ate['filename'] == path] self.tau = true_ate_row['trueATE'].values[0]
def get_latest_savedmodel_from_jobdir(job_dir: str) -> type(Predictor): """ Return the latest saved model from a given output directory of a trainer. :param job_dir: output directory of trainer """ export_dir = os.path.join(job_dir, constants.MODEL_PATH, 'export', 'best_exporter') latest = os.path.join( export_dir, max([ path for path in gfile.listdir(export_dir) if not path.startswith('temp') ])) return latest
def __init__(self, seed, param_data): super(LoadImages, self).__init__() self.name = 'ukb' path = param_data['data_path'] filenames = [os.path.join(path, item) for item in gfile.listdir(path)] tf_record_ds = tf.data.TFRecordDataset(filenames) features = {} features['image/encoded'] = tf.io.FixedLenFeature([], tf.string) features['image/id'] = tf.io.FixedLenFeature([1], tf.string) features[f'image/sim_{seed}_pi/value'] = tf.io.FixedLenFeature( [1], tf.float32) features[f'image/sim_{seed}_y/value'] = tf.io.FixedLenFeature( [1], tf.float32) features[f'image/sim_{seed}_mu0/value'] = tf.io.FixedLenFeature( [1], tf.float32) features[f'image/sim_{seed}_mu1/value'] = tf.io.FixedLenFeature( [1], tf.float32) ds = tf_record_ds.map( _get_parse_example_fn(features), num_parallel_calls=tf.data.AUTOTUNE) ds = ds.map(_decode_img, num_parallel_calls=tf.data.AUTOTUNE) ds = ds.map(lambda x: _filter_treatment(x, seed), num_parallel_calls=tf.data.AUTOTUNE) # split treated and non treated and pred (for full conterfactual). ds_treated = ds.filter(lambda x: x['t']) ds_control = ds.filter(lambda x: not x['t']) ds_treated = ds_treated.map(lambda x: _filter_cols(x, seed), num_parallel_calls=tf.data.AUTOTUNE) ds_control = ds_control.map(lambda x: _filter_cols(x, seed), num_parallel_calls=tf.data.AUTOTUNE) ds_all = ds.map(lambda x: _filter_cols_pred(x, seed), num_parallel_calls=tf.data.AUTOTUNE) ds_all_ps = ds.map(lambda x: _filter_cols_ps(x, seed), num_parallel_calls=tf.data.AUTOTUNE) self.dataset_treated = _get_dataset(ds_treated) self.dataset_control = _get_dataset(ds_control) self.dataset_all = _get_dataset(ds_all) self.dataset_all_ps = _get_dataset_ps(ds_all_ps)
def gen_csv_from_annotations( input_dir: str, output_file=constants.DEFAULT_CSV_FILENAME, out_path_prefix='', dataset_type=constants.DEFAULT_DATASET_TYPE): """Generates AutoML dataset CSV from annotation files. Args: input_dir: Directory of annotation files. output_file: Output CSV filename. out_path_prefix: Filepath prefix to prepend to the image files. e.g. src_image_filename = '/tmp/path/to/image.jpg' out_path_prefix = 'gs://bucket/images' output_image_filename = 'gs://bucket/images/image.jpg' dataset_type: Dataset type (TRAIN, VAL, TEST, UNSPECIFIED) to use for all the parsed images. """ if not gfile.exists(input_dir): raise ValueError('Input directory not found.') with gfile.GFile(os.path.expanduser(output_file), 'w') as outf: writer = csv.writer(outf, delimiter=',') for filename in gfile.listdir(os.path.expanduser(input_dir)): filepath = os.path.join(input_dir, filename) image_filename, boxes = annotation.read(filepath) out_image_filename = os.path.join(out_path_prefix, image_filename) for b in boxes: row = [ dataset_type, out_image_filename, b.label, b.xmin, b.ymin, '', '', b.xmax, b.ymax, '', '', ] writer.writerow(row)
def _load_data(split, dataset_config, outcomes): """Loads and parses TFRecords for the given dataset split. Elements are `TensorDictTriple`s and contain inputs, labels, and weights. The `inputs: TensorDict` must contain an `IMAGE_KEY` key with rgb tensor values of shape `dataset_config.image_size`. The `labels: TensorDict` should contain one key per outcome, while the `weights: TensorDict` should contain `subsample_weights` for each outcome. Args: split: The dataset split (train, validation, test, or predict). dataset_config: A dataset ConfigDict containing hparams and augmentations. outcomes: A list of outcome ConfigDicts used to define labels. Returns: A tf.data.Dataset containing decoded image tensors, labels, and weights. """ # Build features for parsing TFRecords. features = _build_tf_record_features(split, outcomes) # Fetch the set of UKB input TFRecord shards. filenames_ds: List[str] = [ filename for filename in gfile.listdir(str(dataset_config['path'])) if filename.startswith(dataset_config[split.value]) ] filenames_ds = [ dataset_config.path + '/' + filename for filename in filenames_ds ] # Convert each filepath to a TFRecord. tf_record_ds = tf.data.TFRecordDataset(filenames=filenames_ds) # Convert each TFRecord to a TensorDict. ds = tf_record_ds.map(_get_parse_example_fn(features), num_parallel_calls=tf.data.AUTOTUNE) # Rename keys and break features into inputs, labels, and weights. ds = ds.map(_get_rename_keys(features), num_parallel_calls=tf.data.AUTOTUNE) # Decode the images. ds = ds.map(_decode_img, num_parallel_calls=tf.data.AUTOTUNE) return ds
def get_aggregated_scores(self, pickle_dirs): """Processes all the VALAN score pickle files in a directory. Args: pickle_dirs: (list) of directories of VALAN score pickles. Returns: score_dict: (dict) for lists of SR/SPL/SDTW/NDTW scores. E.g. {'sr': [0.35, 0.34, 0.27], 'spl', [0.46, 0.34, 0.45], ...} avg_scores: (dict) the average of SR/SPL/SDTW/NDTW scores. E.g. {'sr': 0.30, 'spl': 0.40, ...} """ score_dict = {'sr': [], 'spl': [], 'sdtw': [], 'ndtw': []} pickle_paths = [] for pickle_dir in pickle_dirs: pickle_paths += [ os.path.join(pickle_dir, filename) for filename in gfile.listdir(pickle_dir) ] for pickle_path in pickle_paths: scores = self._get_scores(pickle_path) for score in scores: sr, spl, sdtw, ndtw = score score_dict['sr'].append(sr) score_dict['spl'].append(spl) score_dict['sdtw'].append(sdtw) score_dict['ndtw'].append(ndtw) avg_scores = { 'sr': np.mean(score_dict['sr']), 'spl': np.mean(score_dict['spl']), 'sdtw': np.mean(score_dict['sdtw']), 'ndtw': np.mean(score_dict['ndtw']) } return score_dict, avg_scores
def run_eval(self, experiment_dir, rng, step=None, work_units=None, model_names_r=None, widths_r=None): """Evaluate models in experiment_dir for R-Precision.""" logging.info('Local devices: %s', jax.local_devices()) logging.info('All devices: %s', jax.devices()) config = log.load_config_json(os.path.join(experiment_dir, '1')) logging.info('Config: %s', config) # Load retrieval models. if not model_names_r: model_names_r = config.retrieve_models models_r = [ helpers.load_image_text_model(name) for name in tqdm.tqdm(model_names_r, desc='loading retrieval models') ] if not widths_r: widths_r = config.retrieve_widths print('model_names_r', model_names_r) print('widths_r', widths_r) # Encode retrieval set text descriptions. z_clip_rs = [] # text encodings of queries with all retrieval models # shape: [n_models, n_queries, d_model for specific model] if config.queries_r: for _, encode_text, _, tokenize_fn in tqdm.tqdm( models_r, desc='embedding queries with retrieval models'): z_clip_r = encode_text(tokenize_fn(config.queries_r)) z_clip_rs.append(z_clip_r) # JIT rendering. kwargs_test = dict(rng=None, sigma_noise_std=0.) config_test_hq = ml_collections.ConfigDict(config) config_test_hq.update(config.test_hq) _, render_rays = helpers.init_nerf_model(rng.advance(1), config) @functools.partial(jax.pmap, in_axes=(0, None, None)) def render_test_hq_p(rays, variables, origin): return render_rays( rays=rays, variables=variables, config=config_test_hq, sc=1., mask_rad=config_test_hq.mr1, origin=origin, **kwargs_test)[0] # Render if work_units is None: work_units = gfile.listdir(experiment_dir) work_units = [int(wu) for wu in work_units if wu.isnumeric()] work_units.sort() work_unit_queries = [] work_unit_configs = [] n_wu = len(work_units) # create resolution -> n_wu -> 4ximg mapping all_renders_by_width = collections.defaultdict(list) for work_unit in tqdm.tqdm(work_units, 'Rendering all work units'): # Load query used to generate this object work_unit_dir = os.path.join(experiment_dir, str(work_unit)) wu_config = log.load_config_json(work_unit_dir) work_unit_configs.append(wu_config) work_unit_queries.append(wu_config.query) # not templated # Render the object _, renders = self.render_from_checkpoint(work_unit_dir, widths_r, render_test_hq_p, step) for width, render in renders.items(): all_renders_by_width[width].append(render) print('all_renders_by_width keys', list(all_renders_by_width.keys())) def aggregate(raw): raw = onp.array(raw).astype(onp.float) return { 'mean': onp.mean(raw), 'sem': stats.sem(raw), 'raw': raw, } metrics = { 'renders_by_width': jax.tree_map(onp.array, dict(all_renders_by_width)), 'work_unit_configs': work_unit_configs, 'work_unit_queries': work_unit_queries, } ## Embed images with all retrieval models pbar = tqdm.tqdm( zip(model_names_r, widths_r, z_clip_rs, models_r), desc='Embedding renderings', total=len(model_names_r)) for model_name, width, z_text, (encode_image, _, preprocess, _) in pbar: renders = all_renders_by_width[width] rgbs = np.array([rgb for rgb, _, _, _ in renders]) print('about to encode rgbs with shape', rgbs.shape) print(' model_name', model_name) print(' width', width) z_est = encode_image(preprocess(rgbs)) assert z_est.shape[0] == n_wu assert z_text.shape[0] == len(config.queries_r) cosine_sim = (z_est[:, None] * z_text[None]).sum(-1) # [n_wu, queries_r] idx_true = np.array( [config.queries_r.index(query) for query in work_unit_queries]) cosine_sim_true = np.take_along_axis( cosine_sim, idx_true[:, None], axis=1).squeeze(1) log_prob = nn.log_softmax(cosine_sim, axis=1) # normalize over captions log_likelihood = np.take_along_axis( log_prob, idx_true[:, None], axis=1).squeeze(1) correct = np.argmax(cosine_sim, axis=1) == idx_true metrics[model_name] = { 'val/retrieve_cosine_sim': aggregate(cosine_sim_true), 'val/retrieve_loss': aggregate(-log_likelihood), 'val/retrieve_acc': aggregate(correct), } metrics_path = os.path.join(experiment_dir, 'metrics.npy') with gfile.GFile(metrics_path, 'wb') as f: logging.info('Writing metrics to %s', metrics_path) onp.save(f, metrics) for k, v in metrics.items(): if k not in ('renders_by_width', 'work_unit_configs'): logging.info('Metric %s: %s', k, v) return metrics
def StatisticsGen( examples_uri: 'ExamplesUri', output_statistics_uri: 'ExampleStatisticsUri', schema_uri: 'SchemaUri' = None, exclude_splits: str = None, beam_pipeline_args: list = None, ) -> NamedTuple('Outputs', [ ('statistics_uri', 'ExampleStatisticsUri'), ]): from tfx.components.statistics_gen.component import StatisticsGen as component_class #Generated code import os import tempfile from tensorflow.io import gfile from google.protobuf import json_format, message from tfx.types import channel_utils, artifact_utils from tfx.components.base import base_executor arguments = locals().copy() component_class_args = {} for name, execution_parameter in component_class.SPEC_CLASS.PARAMETERS.items( ): argument_value = arguments.get(name, None) if argument_value is None: continue parameter_type = execution_parameter.type if isinstance(parameter_type, type) and issubclass( parameter_type, message.Message): argument_value_obj = parameter_type() json_format.Parse(argument_value, argument_value_obj) else: argument_value_obj = argument_value component_class_args[name] = argument_value_obj for name, channel_parameter in component_class.SPEC_CLASS.INPUTS.items(): artifact_path = arguments.get(name + '_uri') or arguments.get(name + '_path') if artifact_path: artifact = channel_parameter.type() artifact.uri = artifact_path.rstrip( '/' ) + '/' # Some TFX components require that the artifact URIs end with a slash if channel_parameter.type.PROPERTIES and 'split_names' in channel_parameter.type.PROPERTIES: # Recovering splits subdirs = gfile.listdir(artifact_path) # Workaround for https://github.com/tensorflow/tensorflow/issues/39167 subdirs = [subdir.rstrip('/') for subdir in subdirs] split_names = [ subdir.replace('Split-', '') for subdir in subdirs ] artifact.split_names = artifact_utils.encode_split_names( sorted(split_names)) component_class_args[name] = channel_utils.as_channel([artifact]) component_class_instance = component_class(**component_class_args) input_dict = channel_utils.unwrap_channel_dict( component_class_instance.inputs.get_all()) output_dict = {} exec_properties = component_class_instance.exec_properties # Generating paths for output artifacts for name, channel in component_class_instance.outputs.items(): artifact_path = arguments.get('output_' + name + '_uri') or arguments.get(name + '_path') if artifact_path: artifact = channel.type() artifact.uri = artifact_path.rstrip( '/' ) + '/' # Some TFX components require that the artifact URIs end with a slash artifact_list = [artifact] channel._artifacts = artifact_list output_dict[name] = artifact_list print('component instance: ' + str(component_class_instance)) executor_context = base_executor.BaseExecutor.Context( beam_pipeline_args=arguments.get('beam_pipeline_args'), tmp_dir=tempfile.gettempdir(), unique_id='tfx_component', ) executor = component_class_instance.executor_spec.executor_class( executor_context) executor.Do( input_dict=input_dict, output_dict=output_dict, exec_properties=exec_properties, ) return (output_statistics_uri, )
# Predicts recognition on Live camera input # with the help of predefined model, train # dataset and harcascade classifier. import cv2 from tensorflow.io.gfile import listdir from classes.model import Model # get class list class_names = listdir("Datasets/train/") # load model model = Model(len(class_names)) model.load_model() # predict live on camera input cap = cv2.VideoCapture(0) face_classifier = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml") for i in range(100): ret, frame = cap.read() faces = face_classifier.detectMultiScale(frame, 1.3, 5) if faces is (): preds = model.predict_on_cv(frame) # Crop all faces found else: cropped_faces = [] for (x, y, w, h) in faces: x = x - 10
def Transform( examples_path: InputPath('Examples'), schema_path: InputPath('Schema'), transform_graph_path: OutputPath('TransformGraph'), transformed_examples_path: OutputPath('Examples'), updated_analyzer_cache_path: OutputPath('TransformCache'), analyzer_cache_path: InputPath('TransformCache') = None, module_file: str = None, preprocessing_fn: str = None, force_tf_compat_v1: int = None, custom_config: str = None, splits_config: { 'JsonObject': { 'data_type': 'proto:tfx.components.transform.SplitsConfig' } } = None, ): from tfx.components.transform.component import Transform as component_class #Generated code import os import tempfile from tensorflow.io import gfile from google.protobuf import json_format, message from tfx.types import channel_utils, artifact_utils from tfx.components.base import base_executor arguments = locals().copy() component_class_args = {} for name, execution_parameter in component_class.SPEC_CLASS.PARAMETERS.items( ): argument_value = arguments.get(name, None) if argument_value is None: continue parameter_type = execution_parameter.type if isinstance(parameter_type, type) and issubclass( parameter_type, message.Message): argument_value_obj = parameter_type() json_format.Parse(argument_value, argument_value_obj) else: argument_value_obj = argument_value component_class_args[name] = argument_value_obj for name, channel_parameter in component_class.SPEC_CLASS.INPUTS.items(): artifact_path = arguments.get(name + '_uri') or arguments.get(name + '_path') if artifact_path: artifact = channel_parameter.type() artifact.uri = artifact_path.rstrip( '/' ) + '/' # Some TFX components require that the artifact URIs end with a slash if channel_parameter.type.PROPERTIES and 'split_names' in channel_parameter.type.PROPERTIES: # Recovering splits subdirs = gfile.listdir(artifact_path) # Workaround for https://github.com/tensorflow/tensorflow/issues/39167 subdirs = [subdir.rstrip('/') for subdir in subdirs] split_names = [ subdir.replace('Split-', '') for subdir in subdirs ] artifact.split_names = artifact_utils.encode_split_names( sorted(split_names)) component_class_args[name] = channel_utils.as_channel([artifact]) component_class_instance = component_class(**component_class_args) input_dict = channel_utils.unwrap_channel_dict( component_class_instance.inputs.get_all()) output_dict = {} exec_properties = component_class_instance.exec_properties # Generating paths for output artifacts for name, channel in component_class_instance.outputs.items(): artifact_path = arguments.get('output_' + name + '_uri') or arguments.get(name + '_path') if artifact_path: artifact = channel.type() artifact.uri = artifact_path.rstrip( '/' ) + '/' # Some TFX components require that the artifact URIs end with a slash artifact_list = [artifact] channel._artifacts = artifact_list output_dict[name] = artifact_list print('component instance: ' + str(component_class_instance)) executor_context = base_executor.BaseExecutor.Context( beam_pipeline_args=arguments.get('beam_pipeline_args'), tmp_dir=tempfile.gettempdir(), unique_id='tfx_component', ) executor = component_class_instance.executor_spec.executor_class( executor_context) executor.Do( input_dict=input_dict, output_dict=output_dict, exec_properties=exec_properties, )
import numpy as np from PIL import Image from numpy import clip from tensorflow.io import gfile import os DATA_DIR = os.getenv('DKUBE_INPUT_DATASETS', None) def read_image(filename): image1 = cv2.imread(filename) img = cv2.cvtColor(image1, cv2.COLOR_BGR2GRAY) ret, img = cv2.threshold(img, 120, 255, cv2.THRESH_TOZERO) mean, std = img.mean(), img.std() img = (img - mean) / std return img folders = gfile.listdir(DATA_DIR + '/raw_faces/') for each_folder in folders: files = gfile.listdir(DATA_DIR + '/raw_faces/' + each_folder) gfile.makedirs(DATA_DIR + "/binarized_faces/" + each_folder) for each_file in files: b_img = read_image(DATA_DIR + '/raw_faces/' + each_folder + '/' + each_file) # with gfile.GFile(DATA_DIR + '/binarized_faces/' + each_folder + '/' + each_file, 'wb') as f: # f.write(b_img) cv2.imwrite( DATA_DIR + '/binarized_faces/' + each_folder + '/' + each_file, b_img)