def load_config(config_path, config=None): """Loads configs from (possibly multiple) file(s). Args: config_path: Paths to configuration files. This can be a `list` of config file names, or a path to a directory in which all files are loaded, or a string of multiple file names separated by commas. config (dict, optional): A config dict to which new configurations are added. If `None`, a new config dict is created. Returns: A `dict` of configurations. """ fnames = [] if isinstance(config_path, (list, tuple)): fnames = list(config_path) elif gfile.IsDirectory(config_path): for fname in gfile.ListDirectory(config_path): fname = os.path.join(config_path, fname) if not gfile.IsDirectory(fname): fnames.append(fname) else: for fname in config_path.split(","): fname = fname.strip() if not fname: continue fnames.append(fname) if config is None: config = {} for fname in fnames: config = load_config_single(fname, config) return config
def execute_job(): config = JobExecutorConfig() make_needed_dirs(config) configure_logging(config) if config.prediction == 'BERT': match_predictor = BertMatchPredictor() elif config.prediction == 'KEYED_VECTORS': match_predictor = KeyedVectorsFormatPredictor() else: raise Exception("Wrong prediction mode") while True: logging.info("job iteration started") dir_in = config.dir_in files_names = [ f for f in gfile.ListDirectory(dir_in) if not gfile.IsDirectory(join(dir_in, f)) ] for file_name in files_names: logging.info(file_name) file_path = join(dir_in, file_name) try: match_predictor.predict(dir_in, file_name, config.dir_result) gfile.Rename(file_path, join(config.dir_success, file_name)) except Exception: logging.error(traceback.format_exc()) gfile.Rename(file_path, join(config.dir_error, file_name)) logging.info("job iteration finished") time.sleep(config.interval)
def _check_directory(filename: str) -> Optional[str]: if gfile.Exists(filename): if gfile.IsDirectory(filename): return None else: return '"%s" is expected to be a directory.' % filename else: return 'Expected directory %s does not exist.' % filename
def write_production(): """Copies staged templates to production directory. This function assumes that the template and associated metadata files are stored in a folder of the form gs://<template_staging_bucket>/<release_name>. It copies the templates from the <release_name> folder to two new locations: gs://<prod_bucket>/<release_name> and gs://<prod_bucket>/latest. Both folders contain identical contents; the <release_name> bucket is to allow customers to pin to a specific release and the `latest` folder gives the UI a location at which to point. Raises: GOSError if there was an error reading or writing a file. """ prod_root = FLAGS.template_prod_bucket template_staging_root = FLAGS.template_staging_bucket template_dir = os.path.join(template_staging_root, FLAGS.candidate_name) if not gfile.IsDirectory(template_dir): logging.fatal( 'Template staging directory %s does not exist or is not a ' 'directory.', template_dir) release_dir = os.path.join(prod_root, FLAGS.release_name) if gfile.IsDirectory(release_dir): logging.fatal( 'Template release directory %s already exists. Aborting.', template_dir) logging.info('Copying folder from %s to %s.', template_dir, release_dir) gfile.MkDir(release_dir) CopyRecursively(template_dir, release_dir) # TODO: If we ever delete templates, they will stick around in # `latest`; evaluate something rsync-like in the future. latest_dir = os.path.join(prod_root, LATEST_FOLDER_NAME) if gfile.Exists(latest_dir): if not gfile.IsDirectory(latest_dir): gfile.Remove(latest_dir) gfile.MkDir(latest_dir) else: gfile.MkDir(latest_dir) logging.info('Copying folder from %s to %s.', template_dir, latest_dir) CopyRecursively(template_dir, latest_dir, overwrite=True)
def CopyRecursively(src, dst, overwrite=False): entries = gfile.ListDirectory(src) for entry in entries: src_path = os.path.join(src, entry) dst_path = os.path.join(dst, entry) if gfile.IsDirectory(src_path): gfile.MkDir(dst_path) CopyRecursively(src_path, dst_path, overwrite) else: gfile.Copy(src_path, dst_path, overwrite)
def make_dir(dir_name: str) -> str: if gfile.Exists(dir_name): if gfile.IsDirectory(dir_name): return dir_name else: logging.fatal( 'Trying to create directory "%s", but there ' 'is a file with the same name', dir_name) gfile.MakeDirs(dir_name) return dir_name
def validate_hourly(working_dir, validate_name=None): """ compiles a list of games based on the new hourly directory format. Then calls validate on it """ holdout_dirs = gfile.ListDirectory(fsdb.holdout_dir()) holdout_files = (os.path.join(fsdb.holdout_dir(), d, f) for d in reversed(gfile.ListDirectory(fsdb.holdout_dir())) for f in gfile.ListDirectory(os.path.join(fsdb.holdout_dir(),d)) if gfile.IsDirectory(os.path.join(fsdb.holdout_dir(),d))) holdout_files = list(itertools.islice(holdout_files, 20000)) random.shuffle(holdout_files) dual_net.validate(holdout_files)
def __init__(self, log_dir): """Create a new SummaryWriter. Args: log_dir: path to record tfevents files in. """ # If needed, create log_dir directory as well as missing parent directories. if not gfile.IsDirectory(log_dir): gfile.MakeDirs(log_dir) self.writer = tf.summary.FileWriter(log_dir, graph=None) self.end_summaries = [] self.step = 0 self.closed = False
def validate_holdout_selfplay(): """Validate on held-out selfplay data.""" holdout_dirs = ( os.path.join(fsdb.holdout_dir(), d) for d in reversed(gfile.ListDirectory(fsdb.holdout_dir())) if gfile.IsDirectory(os.path.join(fsdb.holdout_dir(), d)) for f in gfile.ListDirectory(os.path.join(fsdb.holdout_dir(), d))) # This is a roundabout way of computing how many hourly directories we need # to read in order to encompass 20,000 holdout games. holdout_dirs = set(itertools.islice(holdout_dirs), 20000) cmd = ['python3', 'validate.py'] + list(holdout_dirs) + [ '--use_tpu', '--tpu_name={}'.format(TPU_NAME), '--flagfile=rl_loop/distributed_flags', '--expand_validation_dirs' ] mask_flags.run(cmd)
def visualize_dataset(dataset_name, output_path, num_animations=5, num_frames=20, fps=10): """Visualizes the data set by saving images to output_path. For each latent factor, outputs 16 images where only that latent factor is varied while all others are kept constant. Args: dataset_name: String with name of dataset as defined in named_data.py. output_path: String with path in which to create the visualizations. num_animations: Integer with number of distinct animations to create. num_frames: Integer with number of frames in each animation. fps: Integer with frame rate for the animation. """ data = named_data.get_named_ground_truth_data(dataset_name) random_state = np.random.RandomState(0) # Create output folder if necessary. path = os.path.join(output_path, dataset_name) if not gfile.IsDirectory(path): gfile.MakeDirs(path) # Create still images. for i in range(data.num_factors): factors = data.sample_factors(16, random_state) indices = [j for j in range(data.num_factors) if i != j] factors[:, indices] = factors[0, indices] images = data.sample_observations_from_factors(factors, random_state) visualize_util.grid_save_images( images, os.path.join(path, "variations_of_factor%s.png" % i)) # Create animations. for i in range(num_animations): base_factor = data.sample_factors(1, random_state) images = [] for j, num_atoms in enumerate(data.factors_num_values): factors = np.repeat(base_factor, num_frames, axis=0) factors[:, j] = visualize_util.cycle_factor(base_factor[0, j], num_atoms, num_frames) images.append( data.sample_observations_from_factors(factors, random_state)) visualize_util.save_animation( np.array(images), os.path.join(path, "animation%d.gif" % i), fps)
def visualize(model_dir, output_dir, overwrite=False, num_animations=5, num_frames=20, fps=10, num_points_irs=10000): """Takes trained model from model_dir and visualizes it in output_dir. Args: model_dir: Path to directory where the trained model is saved. output_dir: Path to output directory. overwrite: Boolean indicating whether to overwrite output directory. num_animations: Integer with number of distinct animations to create. num_frames: Integer with number of frames in each animation. fps: Integer with frame rate for the animation. num_points_irs: Number of points to be used for the IRS plots. """ # Fix the random seed for reproducibility. random_state = np.random.RandomState(0) # Create the output directory if necessary. if tf.gfile.IsDirectory(output_dir): if overwrite: tf.gfile.DeleteRecursively(output_dir) else: raise ValueError( "Directory already exists and overwrite is False.") # Automatically set the proper data set if necessary. We replace the active # gin config as this will lead to a valid gin config file where the data set # is present. # Obtain the dataset name from the gin config of the previous step. gin_config_file = os.path.join(model_dir, "results", "gin", "train.gin") gin_dict = results.gin_dict(gin_config_file) gin.bind_parameter("dataset.name", gin_dict["dataset.name"].replace("'", "")) # Automatically infer the activation function from gin config. activation_str = gin_dict["reconstruction_loss.activation"] if activation_str == "'logits'": activation = sigmoid elif activation_str == "'tanh'": activation = tanh else: raise ValueError( "Activation function could not be infered from gin config.") dataset = named_data.get_named_ground_truth_data() num_pics = 64 module_path = os.path.join(model_dir, "tfhub") with hub.eval_function_for_module(module_path) as f: # Save reconstructions. real_pics = dataset.sample_observations(num_pics, random_state) raw_pics = f(dict(images=real_pics), signature="reconstructions", as_dict=True)["images"] pics = activation(raw_pics) paired_pics = np.concatenate((real_pics, pics), axis=2) paired_pics = [ paired_pics[i, :, :, :] for i in range(paired_pics.shape[0]) ] results_dir = os.path.join(output_dir, "reconstructions") if not gfile.IsDirectory(results_dir): gfile.MakeDirs(results_dir) visualize_util.grid_save_images( paired_pics, os.path.join(results_dir, "reconstructions.jpg")) # Save samples. def _decoder(latent_vectors): return f(dict(latent_vectors=latent_vectors), signature="decoder", as_dict=True)["images"] num_latent = int(gin_dict["encoder.num_latent"]) num_pics = 64 random_codes = random_state.normal(0, 1, [num_pics, num_latent]) pics = activation(_decoder(random_codes)) results_dir = os.path.join(output_dir, "sampled") if not gfile.IsDirectory(results_dir): gfile.MakeDirs(results_dir) visualize_util.grid_save_images( pics, os.path.join(results_dir, "samples.jpg")) # Save latent traversals. result = f( dict(images=dataset.sample_observations(num_pics, random_state)), signature="gaussian_encoder", as_dict=True) means = result["mean"] logvars = result["logvar"] results_dir = os.path.join(output_dir, "traversals") if not gfile.IsDirectory(results_dir): gfile.MakeDirs(results_dir) for i in range(means.shape[1]): pics = activation( latent_traversal_1d_multi_dim(_decoder, means[i, :], None)) file_name = os.path.join(results_dir, "traversals{}.jpg".format(i)) visualize_util.grid_save_images([pics], file_name) # Save the latent traversal animations. results_dir = os.path.join(output_dir, "animated_traversals") if not gfile.IsDirectory(results_dir): gfile.MakeDirs(results_dir) # Cycle through quantiles of a standard Gaussian. for i, base_code in enumerate(means[:num_animations]): images = [] for j in range(base_code.shape[0]): code = np.repeat(np.expand_dims(base_code, 0), num_frames, axis=0) code[:, j] = visualize_util.cycle_gaussian( base_code[j], num_frames) images.append(np.array(activation(_decoder(code)))) filename = os.path.join(results_dir, "std_gaussian_cycle%d.gif" % i) visualize_util.save_animation(np.array(images), filename, fps) # Cycle through quantiles of a fitted Gaussian. for i, base_code in enumerate(means[:num_animations]): images = [] for j in range(base_code.shape[0]): code = np.repeat(np.expand_dims(base_code, 0), num_frames, axis=0) loc = np.mean(means[:, j]) total_variance = np.mean(np.exp(logvars[:, j])) + np.var( means[:, j]) code[:, j] = visualize_util.cycle_gaussian( base_code[j], num_frames, loc=loc, scale=np.sqrt(total_variance)) images.append(np.array(activation(_decoder(code)))) filename = os.path.join(results_dir, "fitted_gaussian_cycle%d.gif" % i) visualize_util.save_animation(np.array(images), filename, fps) # Cycle through [-2, 2] interval. for i, base_code in enumerate(means[:num_animations]): images = [] for j in range(base_code.shape[0]): code = np.repeat(np.expand_dims(base_code, 0), num_frames, axis=0) code[:, j] = visualize_util.cycle_interval( base_code[j], num_frames, -2., 2.) images.append(np.array(activation(_decoder(code)))) filename = os.path.join(results_dir, "fixed_interval_cycle%d.gif" % i) visualize_util.save_animation(np.array(images), filename, fps) # Cycle linearly through +-2 std dev of a fitted Gaussian. for i, base_code in enumerate(means[:num_animations]): images = [] for j in range(base_code.shape[0]): code = np.repeat(np.expand_dims(base_code, 0), num_frames, axis=0) loc = np.mean(means[:, j]) total_variance = np.mean(np.exp(logvars[:, j])) + np.var( means[:, j]) scale = np.sqrt(total_variance) code[:, j] = visualize_util.cycle_interval( base_code[j], num_frames, loc - 2. * scale, loc + 2. * scale) images.append(np.array(activation(_decoder(code)))) filename = os.path.join(results_dir, "conf_interval_cycle%d.gif" % i) visualize_util.save_animation(np.array(images), filename, fps) # Cycle linearly through minmax of a fitted Gaussian. for i, base_code in enumerate(means[:num_animations]): images = [] for j in range(base_code.shape[0]): code = np.repeat(np.expand_dims(base_code, 0), num_frames, axis=0) code[:, j] = visualize_util.cycle_interval( base_code[j], num_frames, np.min(means[:, j]), np.max(means[:, j])) images.append(np.array(activation(_decoder(code)))) filename = os.path.join(results_dir, "minmax_interval_cycle%d.gif" % i) visualize_util.save_animation(np.array(images), filename, fps) # Interventional effects visualization. factors = dataset.sample_factors(num_points_irs, random_state) obs = dataset.sample_observations_from_factors(factors, random_state) batch_size = 64 num_outputs = 0 latents = [] while num_outputs < obs.shape[0]: input_batch = obs[num_outputs:min(num_outputs + batch_size, obs.shape[0])] output_batch = f(dict(images=input_batch), signature="gaussian_encoder", as_dict=True)["mean"] latents.append(output_batch) num_outputs += batch_size latents = np.concatenate(latents) results_dir = os.path.join(output_dir, "interventional_effects") vis_all_interventional_effects(factors, latents, results_dir) # Finally, we clear the gin config that we have set. gin.clear_config()
# return (img_arr, tf.one_hot(label, num_classes), filename, classname) return (img_arr, label, filename, classname) if __name__ == '__main__': TFR_DIR = "gs://kfp-testing/retin_oct/conv_256_10may/tfrecords" LABEL_LIST = "gs://kfp-testing/retin_oct/conv_256_10may/labels.json" # TFR_DIR = "/home/aakashbajaj5311/conv_data_256/conv_256_10may/tfrecords/" # LABEL_LIST = "/home/aakashbajaj5311/conv_data_256/conv_256_10may/labels.json" train_path = os.path.join(TFR_DIR, "test") training_filenames = [] if tf_reader.IsDirectory(train_path): for filename in tf.gfile.ListDirectory(train_path): filepath = os.path.join(train_path, filename) training_filenames.append(filepath) else: print("Invalid training directory. Exiting.......\n") exit(0) # training_filenames = ["/home/techno/oct_data/retin_oct_conv_9may_tfrecords_test_test-00000-of-00005"] training_filenames = [training_filenames[0]] print(training_filenames) dataset = tf.data.TFRecordDataset(training_filenames) dataset = dataset.map(tfr_parser) iter = dataset.make_one_shot_iterator()
def __init__(self, dir, write_graph=True): if not gfile.IsDirectory(dir): gfile.MakeDirs(dir) self.writer = tf.summary.FileWriter( dir, graph=tf.get_default_graph() if write_graph else None)
def visualize_supervised(supervised_model_dir, trained_vae_model_dir, output_dir, overwrite=False): """Takes trained model from model_dir and visualizes it in output_dir. Args: model_dir: Path to directory where the trained model is saved. output_dir: Path to output directory. overwrite: Boolean indicating whether to overwrite output directory. num_animations: Integer with number of distinct animations to create. num_frames: Integer with number of frames in each animation. fps: Integer with frame rate for the animation. num_points_irs: Number of points to be used for the IRS plots. """ # Fix the random seed for reproducibility. random_state = np.random.RandomState(0) # Create the output directory if necessary. if tf.gfile.IsDirectory(output_dir): if overwrite: tf.gfile.DeleteRecursively(output_dir) else: raise ValueError( "Directory already exists and overwrite is False.") # Automatically set the proper data set if necessary. We replace the active # gin config as this will lead to a valid gin config file where the data set # is present. # Obtain the dataset name from the gin config of the previous step. gin_config_file = os.path.join(supervised_model_dir, "results", "gin", "evaluate.gin") gin_dict = results.gin_dict(gin_config_file) gin.bind_parameter("dataset.name", gin_dict["dataset.name"].replace("'", "")) # Automatically infer the activation function from gin config. activation_str = gin_dict["reconstruction_loss.activation"] if activation_str == "'logits'": activation = sigmoid elif activation_str == "'tanh'": activation = tanh else: raise ValueError( "Activation function could not be infered from gin config.") _, dataset = named_data.get_named_ground_truth_data() num_pics = 64 supervised_module_path = os.path.join(supervised_model_dir, "tfhub") with hub.eval_function_for_module(supervised_module_path) as f: trained_vae_path = os.path.join(trained_vae_model_dir, "tfhub") with hub.eval_function_for_module(trained_vae_path) as g: def _representation_function(x): """Computes representation vector for input images.""" output = g(dict(images=x), signature="representation", as_dict=True) return np.array(output["default"]) # Save reconstructions. real_pics = dataset.sample_observations(num_pics, random_state) # real_pics, _ = dataset.sample_observations_and_labels(num_pics, random_state) representations = _representation_function(real_pics) print(real_pics.shape, representations.shape) decoded_pics = f(dict(representations=representations), signature="reconstructions", as_dict=True)['images'] pics = activation(decoded_pics) paired_pics = np.concatenate((real_pics, pics), axis=2) paired_pics = [ paired_pics[i, :, :, :] for i in range(paired_pics.shape[0]) ] results_dir = os.path.join(output_dir, "reconstructions") if not gfile.IsDirectory(results_dir): gfile.MakeDirs(results_dir) visualize_util.grid_save_images( paired_pics, os.path.join(results_dir, "reconstructions.jpg")) # Finally, we clear the gin config that we have set. gin.clear_config()
return tf.estimator.export.ServingInputReceiver(features, received_tensors) if __name__ == '__main__': TFR_DIR = "/home/aakashbajaj5311/conv_256/conv_256_10may/tfrecords" LABEL_LIST = "/home/aakashbajaj5311/conv_256/conv_256_10may/labels.json" train_path = os.path.join(TFR_DIR, "train") test_path = os.path.join(TFR_DIR, "test") training_filenames = [] testing_filenames = [] if tf_reader.IsDirectory(train_path): for filename in tf.gfile.ListDirectory(train_path): filepath = os.path.join(train_path, filename) training_filenames.append(filepath) else: print("Invalid training directory. Exiting.......\n") exit(0) if tf_reader.IsDirectory(test_path): for filename in tf.gfile.ListDirectory(test_path): filepath = os.path.join(test_path, filename) testing_filenames.append(filepath) try: with tf_reader.GFile(LABEL_LIST, 'rb') as fl: labels_bytes = fl.read()