コード例 #1
0
def load_config(config_path, config=None):
    """Loads configs from (possibly multiple) file(s).

    Args:
        config_path: Paths to configuration files. This can be a `list` of
            config file names, or a path to a directory in which all files
            are loaded, or a string of multiple file names separated by commas.
        config (dict, optional): A config dict to which new configurations are
            added. If `None`, a new config dict is created.

    Returns:
        A `dict` of configurations.
    """
    fnames = []
    if isinstance(config_path, (list, tuple)):
        fnames = list(config_path)
    elif gfile.IsDirectory(config_path):
        for fname in gfile.ListDirectory(config_path):
            fname = os.path.join(config_path, fname)
            if not gfile.IsDirectory(fname):
                fnames.append(fname)
    else:
        for fname in config_path.split(","):
            fname = fname.strip()
            if not fname:
                continue
            fnames.append(fname)

    if config is None:
        config = {}

    for fname in fnames:
        config = load_config_single(fname, config)

    return config
コード例 #2
0
ファイル: job_executor.py プロジェクト: lkotlewski/bert
def execute_job():
    config = JobExecutorConfig()
    make_needed_dirs(config)
    configure_logging(config)
    if config.prediction == 'BERT':
        match_predictor = BertMatchPredictor()
    elif config.prediction == 'KEYED_VECTORS':
        match_predictor = KeyedVectorsFormatPredictor()
    else:
        raise Exception("Wrong prediction mode")

    while True:
        logging.info("job iteration started")
        dir_in = config.dir_in
        files_names = [
            f for f in gfile.ListDirectory(dir_in)
            if not gfile.IsDirectory(join(dir_in, f))
        ]
        for file_name in files_names:
            logging.info(file_name)
            file_path = join(dir_in, file_name)
            try:
                match_predictor.predict(dir_in, file_name, config.dir_result)
                gfile.Rename(file_path, join(config.dir_success, file_name))
            except Exception:
                logging.error(traceback.format_exc())
                gfile.Rename(file_path, join(config.dir_error, file_name))
        logging.info("job iteration finished")
        time.sleep(config.interval)
コード例 #3
0
def _check_directory(filename: str) -> Optional[str]:
  if gfile.Exists(filename):
    if gfile.IsDirectory(filename):
      return None
    else:
      return '"%s" is expected to be a directory.' % filename
  else:
    return 'Expected directory %s does not exist.' % filename
コード例 #4
0
def write_production():
    """Copies staged templates to production directory.

  This function assumes that the template and associated metadata files are
  stored in a folder of the form gs://<template_staging_bucket>/<release_name>.
  It copies the templates from the <release_name> folder to two new locations:
  gs://<prod_bucket>/<release_name> and gs://<prod_bucket>/latest. Both
  folders contain identical contents; the <release_name> bucket is to allow
  customers to pin to a specific release and the `latest` folder gives the UI
  a location at which to point.

  Raises:
    GOSError if there was an error reading or writing a file.
  """
    prod_root = FLAGS.template_prod_bucket
    template_staging_root = FLAGS.template_staging_bucket

    template_dir = os.path.join(template_staging_root, FLAGS.candidate_name)
    if not gfile.IsDirectory(template_dir):
        logging.fatal(
            'Template staging directory %s does not exist or is not a '
            'directory.', template_dir)

    release_dir = os.path.join(prod_root, FLAGS.release_name)
    if gfile.IsDirectory(release_dir):
        logging.fatal(
            'Template release directory %s already exists. Aborting.',
            template_dir)

    logging.info('Copying folder from %s to %s.', template_dir, release_dir)
    gfile.MkDir(release_dir)
    CopyRecursively(template_dir, release_dir)

    # TODO: If we ever delete templates, they will stick around in
    # `latest`; evaluate something rsync-like in the future.
    latest_dir = os.path.join(prod_root, LATEST_FOLDER_NAME)
    if gfile.Exists(latest_dir):
        if not gfile.IsDirectory(latest_dir):
            gfile.Remove(latest_dir)
            gfile.MkDir(latest_dir)
    else:
        gfile.MkDir(latest_dir)

    logging.info('Copying folder from %s to %s.', template_dir, latest_dir)
    CopyRecursively(template_dir, latest_dir, overwrite=True)
コード例 #5
0
def CopyRecursively(src, dst, overwrite=False):
    entries = gfile.ListDirectory(src)
    for entry in entries:
        src_path = os.path.join(src, entry)
        dst_path = os.path.join(dst, entry)
        if gfile.IsDirectory(src_path):
            gfile.MkDir(dst_path)
            CopyRecursively(src_path, dst_path, overwrite)
        else:
            gfile.Copy(src_path, dst_path, overwrite)
コード例 #6
0
def make_dir(dir_name: str) -> str:
  if gfile.Exists(dir_name):
    if gfile.IsDirectory(dir_name):
      return dir_name
    else:
      logging.fatal(
          'Trying to create directory "%s", but there '
          'is a file with the same name', dir_name)
  gfile.MakeDirs(dir_name)
  return dir_name
コード例 #7
0
ファイル: rl_loop.py プロジェクト: wtdeng/minigo
def validate_hourly(working_dir, validate_name=None):
    """ compiles a list of games based on the new hourly directory format. Then
    calls validate on it """

    holdout_dirs = gfile.ListDirectory(fsdb.holdout_dir())
    holdout_files = (os.path.join(fsdb.holdout_dir(), d, f)
                     for d in reversed(gfile.ListDirectory(fsdb.holdout_dir()))
                     for f in gfile.ListDirectory(os.path.join(fsdb.holdout_dir(),d))
                     if gfile.IsDirectory(os.path.join(fsdb.holdout_dir(),d)))
    holdout_files = list(itertools.islice(holdout_files, 20000))
    random.shuffle(holdout_files)
    dual_net.validate(holdout_files)
コード例 #8
0
    def __init__(self, log_dir):
        """Create a new SummaryWriter.

    Args:
      log_dir: path to record tfevents files in.
    """
        # If needed, create log_dir directory as well as missing parent directories.
        if not gfile.IsDirectory(log_dir):
            gfile.MakeDirs(log_dir)

        self.writer = tf.summary.FileWriter(log_dir, graph=None)
        self.end_summaries = []
        self.step = 0
        self.closed = False
コード例 #9
0
def validate_holdout_selfplay():
    """Validate on held-out selfplay data."""
    holdout_dirs = (
        os.path.join(fsdb.holdout_dir(), d)
        for d in reversed(gfile.ListDirectory(fsdb.holdout_dir()))
        if gfile.IsDirectory(os.path.join(fsdb.holdout_dir(), d))
        for f in gfile.ListDirectory(os.path.join(fsdb.holdout_dir(), d)))

    # This is a roundabout way of computing how many hourly directories we need
    # to read in order to encompass 20,000 holdout games.
    holdout_dirs = set(itertools.islice(holdout_dirs), 20000)
    cmd = ['python3', 'validate.py'] + list(holdout_dirs) + [
        '--use_tpu', '--tpu_name={}'.format(TPU_NAME),
        '--flagfile=rl_loop/distributed_flags', '--expand_validation_dirs'
    ]
    mask_flags.run(cmd)
コード例 #10
0
def visualize_dataset(dataset_name,
                      output_path,
                      num_animations=5,
                      num_frames=20,
                      fps=10):
    """Visualizes the data set by saving images to output_path.

  For each latent factor, outputs 16 images where only that latent factor is
  varied while all others are kept constant.

  Args:
    dataset_name: String with name of dataset as defined in named_data.py.
    output_path: String with path in which to create the visualizations.
    num_animations: Integer with number of distinct animations to create.
    num_frames: Integer with number of frames in each animation.
    fps: Integer with frame rate for the animation.
  """
    data = named_data.get_named_ground_truth_data(dataset_name)
    random_state = np.random.RandomState(0)

    # Create output folder if necessary.
    path = os.path.join(output_path, dataset_name)
    if not gfile.IsDirectory(path):
        gfile.MakeDirs(path)

    # Create still images.
    for i in range(data.num_factors):
        factors = data.sample_factors(16, random_state)
        indices = [j for j in range(data.num_factors) if i != j]
        factors[:, indices] = factors[0, indices]
        images = data.sample_observations_from_factors(factors, random_state)
        visualize_util.grid_save_images(
            images, os.path.join(path, "variations_of_factor%s.png" % i))

    # Create animations.
    for i in range(num_animations):
        base_factor = data.sample_factors(1, random_state)
        images = []
        for j, num_atoms in enumerate(data.factors_num_values):
            factors = np.repeat(base_factor, num_frames, axis=0)
            factors[:,
                    j] = visualize_util.cycle_factor(base_factor[0, j],
                                                     num_atoms, num_frames)
            images.append(
                data.sample_observations_from_factors(factors, random_state))
        visualize_util.save_animation(
            np.array(images), os.path.join(path, "animation%d.gif" % i), fps)
コード例 #11
0
def visualize(model_dir,
              output_dir,
              overwrite=False,
              num_animations=5,
              num_frames=20,
              fps=10,
              num_points_irs=10000):
    """Takes trained model from model_dir and visualizes it in output_dir.

    Args:
      model_dir: Path to directory where the trained model is saved.
      output_dir: Path to output directory.
      overwrite: Boolean indicating whether to overwrite output directory.
      num_animations: Integer with number of distinct animations to create.
      num_frames: Integer with number of frames in each animation.
      fps: Integer with frame rate for the animation.
      num_points_irs: Number of points to be used for the IRS plots.
    """
    # Fix the random seed for reproducibility.
    random_state = np.random.RandomState(0)

    # Create the output directory if necessary.
    if tf.gfile.IsDirectory(output_dir):
        if overwrite:
            tf.gfile.DeleteRecursively(output_dir)
        else:
            raise ValueError(
                "Directory already exists and overwrite is False.")

    # Automatically set the proper data set if necessary. We replace the active
    # gin config as this will lead to a valid gin config file where the data set
    # is present.
    # Obtain the dataset name from the gin config of the previous step.
    gin_config_file = os.path.join(model_dir, "results", "gin", "train.gin")
    gin_dict = results.gin_dict(gin_config_file)
    gin.bind_parameter("dataset.name",
                       gin_dict["dataset.name"].replace("'", ""))

    # Automatically infer the activation function from gin config.
    activation_str = gin_dict["reconstruction_loss.activation"]
    if activation_str == "'logits'":
        activation = sigmoid
    elif activation_str == "'tanh'":
        activation = tanh
    else:
        raise ValueError(
            "Activation function  could not be infered from gin config.")

    dataset = named_data.get_named_ground_truth_data()
    num_pics = 64
    module_path = os.path.join(model_dir, "tfhub")

    with hub.eval_function_for_module(module_path) as f:
        # Save reconstructions.
        real_pics = dataset.sample_observations(num_pics, random_state)
        raw_pics = f(dict(images=real_pics),
                     signature="reconstructions",
                     as_dict=True)["images"]
        pics = activation(raw_pics)
        paired_pics = np.concatenate((real_pics, pics), axis=2)
        paired_pics = [
            paired_pics[i, :, :, :] for i in range(paired_pics.shape[0])
        ]
        results_dir = os.path.join(output_dir, "reconstructions")
        if not gfile.IsDirectory(results_dir):
            gfile.MakeDirs(results_dir)
        visualize_util.grid_save_images(
            paired_pics, os.path.join(results_dir, "reconstructions.jpg"))

        # Save samples.
        def _decoder(latent_vectors):
            return f(dict(latent_vectors=latent_vectors),
                     signature="decoder",
                     as_dict=True)["images"]

        num_latent = int(gin_dict["encoder.num_latent"])
        num_pics = 64
        random_codes = random_state.normal(0, 1, [num_pics, num_latent])
        pics = activation(_decoder(random_codes))
        results_dir = os.path.join(output_dir, "sampled")
        if not gfile.IsDirectory(results_dir):
            gfile.MakeDirs(results_dir)
        visualize_util.grid_save_images(
            pics, os.path.join(results_dir, "samples.jpg"))

        # Save latent traversals.
        result = f(
            dict(images=dataset.sample_observations(num_pics, random_state)),
            signature="gaussian_encoder",
            as_dict=True)
        means = result["mean"]
        logvars = result["logvar"]
        results_dir = os.path.join(output_dir, "traversals")
        if not gfile.IsDirectory(results_dir):
            gfile.MakeDirs(results_dir)
        for i in range(means.shape[1]):
            pics = activation(
                latent_traversal_1d_multi_dim(_decoder, means[i, :], None))
            file_name = os.path.join(results_dir, "traversals{}.jpg".format(i))
            visualize_util.grid_save_images([pics], file_name)

        # Save the latent traversal animations.
        results_dir = os.path.join(output_dir, "animated_traversals")
        if not gfile.IsDirectory(results_dir):
            gfile.MakeDirs(results_dir)

        # Cycle through quantiles of a standard Gaussian.
        for i, base_code in enumerate(means[:num_animations]):
            images = []
            for j in range(base_code.shape[0]):
                code = np.repeat(np.expand_dims(base_code, 0),
                                 num_frames,
                                 axis=0)
                code[:, j] = visualize_util.cycle_gaussian(
                    base_code[j], num_frames)
                images.append(np.array(activation(_decoder(code))))
            filename = os.path.join(results_dir,
                                    "std_gaussian_cycle%d.gif" % i)
            visualize_util.save_animation(np.array(images), filename, fps)

        # Cycle through quantiles of a fitted Gaussian.
        for i, base_code in enumerate(means[:num_animations]):
            images = []
            for j in range(base_code.shape[0]):
                code = np.repeat(np.expand_dims(base_code, 0),
                                 num_frames,
                                 axis=0)
                loc = np.mean(means[:, j])
                total_variance = np.mean(np.exp(logvars[:, j])) + np.var(
                    means[:, j])
                code[:, j] = visualize_util.cycle_gaussian(
                    base_code[j],
                    num_frames,
                    loc=loc,
                    scale=np.sqrt(total_variance))
                images.append(np.array(activation(_decoder(code))))
            filename = os.path.join(results_dir,
                                    "fitted_gaussian_cycle%d.gif" % i)
            visualize_util.save_animation(np.array(images), filename, fps)

        # Cycle through [-2, 2] interval.
        for i, base_code in enumerate(means[:num_animations]):
            images = []
            for j in range(base_code.shape[0]):
                code = np.repeat(np.expand_dims(base_code, 0),
                                 num_frames,
                                 axis=0)
                code[:, j] = visualize_util.cycle_interval(
                    base_code[j], num_frames, -2., 2.)
                images.append(np.array(activation(_decoder(code))))
            filename = os.path.join(results_dir,
                                    "fixed_interval_cycle%d.gif" % i)
            visualize_util.save_animation(np.array(images), filename, fps)

        # Cycle linearly through +-2 std dev of a fitted Gaussian.
        for i, base_code in enumerate(means[:num_animations]):
            images = []
            for j in range(base_code.shape[0]):
                code = np.repeat(np.expand_dims(base_code, 0),
                                 num_frames,
                                 axis=0)
                loc = np.mean(means[:, j])
                total_variance = np.mean(np.exp(logvars[:, j])) + np.var(
                    means[:, j])
                scale = np.sqrt(total_variance)
                code[:, j] = visualize_util.cycle_interval(
                    base_code[j], num_frames, loc - 2. * scale,
                    loc + 2. * scale)
                images.append(np.array(activation(_decoder(code))))
            filename = os.path.join(results_dir,
                                    "conf_interval_cycle%d.gif" % i)
            visualize_util.save_animation(np.array(images), filename, fps)

        # Cycle linearly through minmax of a fitted Gaussian.
        for i, base_code in enumerate(means[:num_animations]):
            images = []
            for j in range(base_code.shape[0]):
                code = np.repeat(np.expand_dims(base_code, 0),
                                 num_frames,
                                 axis=0)
                code[:, j] = visualize_util.cycle_interval(
                    base_code[j], num_frames, np.min(means[:, j]),
                    np.max(means[:, j]))
                images.append(np.array(activation(_decoder(code))))
            filename = os.path.join(results_dir,
                                    "minmax_interval_cycle%d.gif" % i)
            visualize_util.save_animation(np.array(images), filename, fps)

        # Interventional effects visualization.
        factors = dataset.sample_factors(num_points_irs, random_state)
        obs = dataset.sample_observations_from_factors(factors, random_state)
        batch_size = 64
        num_outputs = 0
        latents = []
        while num_outputs < obs.shape[0]:
            input_batch = obs[num_outputs:min(num_outputs +
                                              batch_size, obs.shape[0])]
            output_batch = f(dict(images=input_batch),
                             signature="gaussian_encoder",
                             as_dict=True)["mean"]
            latents.append(output_batch)
            num_outputs += batch_size
        latents = np.concatenate(latents)

        results_dir = os.path.join(output_dir, "interventional_effects")
        vis_all_interventional_effects(factors, latents, results_dir)

    # Finally, we clear the gin config that we have set.
    gin.clear_config()
コード例 #12
0
	# return (img_arr, tf.one_hot(label, num_classes), filename, classname)
	return (img_arr, label, filename, classname)


if __name__ == '__main__':

	TFR_DIR = "gs://kfp-testing/retin_oct/conv_256_10may/tfrecords"
	LABEL_LIST = "gs://kfp-testing/retin_oct/conv_256_10may/labels.json"
	# TFR_DIR = "/home/aakashbajaj5311/conv_data_256/conv_256_10may/tfrecords/"
	# LABEL_LIST = "/home/aakashbajaj5311/conv_data_256/conv_256_10may/labels.json"

	train_path = os.path.join(TFR_DIR, "test")

	training_filenames = []

	if tf_reader.IsDirectory(train_path):
		for filename in tf.gfile.ListDirectory(train_path):
			filepath = os.path.join(train_path, filename)
			training_filenames.append(filepath)
	else:
		print("Invalid training directory. Exiting.......\n")
		exit(0)

	# training_filenames = ["/home/techno/oct_data/retin_oct_conv_9may_tfrecords_test_test-00000-of-00005"]
	training_filenames = [training_filenames[0]]
	print(training_filenames)

	dataset = tf.data.TFRecordDataset(training_filenames)
	dataset = dataset.map(tfr_parser)
	iter = dataset.make_one_shot_iterator()
コード例 #13
0
 def __init__(self, dir, write_graph=True):
     if not gfile.IsDirectory(dir):
         gfile.MakeDirs(dir)
     self.writer = tf.summary.FileWriter(
         dir, graph=tf.get_default_graph() if write_graph else None)
コード例 #14
0
def visualize_supervised(supervised_model_dir,
                         trained_vae_model_dir,
                         output_dir,
                         overwrite=False):
    """Takes trained model from model_dir and visualizes it in output_dir.

  Args:
    model_dir: Path to directory where the trained model is saved.
    output_dir: Path to output directory.
    overwrite: Boolean indicating whether to overwrite output directory.
    num_animations: Integer with number of distinct animations to create.
    num_frames: Integer with number of frames in each animation.
    fps: Integer with frame rate for the animation.
    num_points_irs: Number of points to be used for the IRS plots.
  """
    # Fix the random seed for reproducibility.
    random_state = np.random.RandomState(0)

    # Create the output directory if necessary.
    if tf.gfile.IsDirectory(output_dir):
        if overwrite:
            tf.gfile.DeleteRecursively(output_dir)
        else:
            raise ValueError(
                "Directory already exists and overwrite is False.")

    # Automatically set the proper data set if necessary. We replace the active
    # gin config as this will lead to a valid gin config file where the data set
    # is present.
    # Obtain the dataset name from the gin config of the previous step.
    gin_config_file = os.path.join(supervised_model_dir, "results", "gin",
                                   "evaluate.gin")
    gin_dict = results.gin_dict(gin_config_file)
    gin.bind_parameter("dataset.name",
                       gin_dict["dataset.name"].replace("'", ""))

    # Automatically infer the activation function from gin config.
    activation_str = gin_dict["reconstruction_loss.activation"]
    if activation_str == "'logits'":
        activation = sigmoid
    elif activation_str == "'tanh'":
        activation = tanh
    else:
        raise ValueError(
            "Activation function  could not be infered from gin config.")

    _, dataset = named_data.get_named_ground_truth_data()
    num_pics = 64
    supervised_module_path = os.path.join(supervised_model_dir, "tfhub")

    with hub.eval_function_for_module(supervised_module_path) as f:
        trained_vae_path = os.path.join(trained_vae_model_dir, "tfhub")
        with hub.eval_function_for_module(trained_vae_path) as g:

            def _representation_function(x):
                """Computes representation vector for input images."""
                output = g(dict(images=x),
                           signature="representation",
                           as_dict=True)
                return np.array(output["default"])

            # Save reconstructions.
            real_pics = dataset.sample_observations(num_pics, random_state)
            #      real_pics, _ = dataset.sample_observations_and_labels(num_pics, random_state)
            representations = _representation_function(real_pics)

        print(real_pics.shape, representations.shape)
        decoded_pics = f(dict(representations=representations),
                         signature="reconstructions",
                         as_dict=True)['images']
        pics = activation(decoded_pics)
        paired_pics = np.concatenate((real_pics, pics), axis=2)
        paired_pics = [
            paired_pics[i, :, :, :] for i in range(paired_pics.shape[0])
        ]
        results_dir = os.path.join(output_dir, "reconstructions")
        if not gfile.IsDirectory(results_dir):
            gfile.MakeDirs(results_dir)
        visualize_util.grid_save_images(
            paired_pics, os.path.join(results_dir, "reconstructions.jpg"))

    # Finally, we clear the gin config that we have set.
    gin.clear_config()
コード例 #15
0
    return tf.estimator.export.ServingInputReceiver(features, received_tensors)


if __name__ == '__main__':

    TFR_DIR = "/home/aakashbajaj5311/conv_256/conv_256_10may/tfrecords"
    LABEL_LIST = "/home/aakashbajaj5311/conv_256/conv_256_10may/labels.json"

    train_path = os.path.join(TFR_DIR, "train")
    test_path = os.path.join(TFR_DIR, "test")

    training_filenames = []
    testing_filenames = []

    if tf_reader.IsDirectory(train_path):
        for filename in tf.gfile.ListDirectory(train_path):
            filepath = os.path.join(train_path, filename)
            training_filenames.append(filepath)
    else:
        print("Invalid training directory. Exiting.......\n")
        exit(0)

    if tf_reader.IsDirectory(test_path):
        for filename in tf.gfile.ListDirectory(test_path):
            filepath = os.path.join(test_path, filename)
            testing_filenames.append(filepath)

    try:
        with tf_reader.GFile(LABEL_LIST, 'rb') as fl:
            labels_bytes = fl.read()