Example #1
0
def main(argv):
    del argv # Unused

    # Save all results in subdirectories of following path
    base_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), FLAGS.base_dir)

    # Overwrite output or not (for rerunning script)
    overwrite = True

    # Results directory of BetaTCVAE
    path_adagvae = os.path.join(base_path,FLAGS.output_dir)

    gin_bindings = [
        "model.random_seed = {}".format(FLAGS.seed),
        "subset.name = '{}'".format(FLAGS.subset),
        "encoder.num_latent = {}".format(FLAGS.dim)
    ]
    # Train model. Training is configured with a gin config
    train.train_with_gin(os.path.join(path_adagvae, 'model'), overwrite,
                         ['baselines/adagvae/adagvae_train.gin'], gin_bindings)

    # Extract mean representation of latent space
    representation_path = os.path.join(path_adagvae, "representation")
    model_path = os.path.join(path_adagvae, "model")
    postprocess_gin = ['baselines/adagvae/adagvae_postprocess.gin']  # This contains the settings.
    postprocess.postprocess_with_gin(model_path, representation_path, overwrite,
                                     postprocess_gin)

    # Compute DCI metric
    result_path = os.path.join(path_adagvae, "metrics", "dci")
    representation_path = os.path.join(path_adagvae, "representation")
    evaluate.evaluate_with_gin(representation_path, result_path, overwrite, ['baselines/adagvae/adagvae_dci.gin'])
Example #2
0
 def setUp(self):
     super(PostprocessTest, self).setUp()
     self.model_dir = self.create_tempdir(
         "model", cleanup=absltest.TempFileCleanup.OFF).full_path
     train.train_with_gin(self.model_dir, True, [
         resources.get_file(
             "config/tests/methods/unsupervised/train_test.gin")
     ], [])
Example #3
0
 def setUp(self):
     super(EvaluateTest, self).setUp()
     self.model_dir = self.create_tempdir(
         "model", cleanup=absltest.TempFileCleanup.OFF).full_path
     model_config = resources.get_file(
         "config/tests/methods/unsupervised/train_test.gin")
     train.train_with_gin(self.model_dir, True, [model_config])
     self.output_dir = self.create_tempdir(
         "output", cleanup=absltest.TempFileCleanup.OFF).full_path
     postprocess_config = resources.get_file(
         "config/tests/postprocessing/postprocess_test_configs/mean.gin")
     postprocess.postprocess_with_gin(self.model_dir, self.output_dir, True,
                                      [postprocess_config])
Example #4
0
    def setUp(self):
        super(EvaluateTest, self).setUp()
        self.model1_dir = self.create_tempdir(
            "model1/model", cleanup=absltest.TempFileCleanup.OFF).full_path
        self.model2_dir = self.create_tempdir(
            "model2/model", cleanup=absltest.TempFileCleanup.OFF).full_path
        model_config = resources.get_file(
            "config/tests/methods/unsupervised/train_test.gin")
        gin.clear_config()
        train.train_with_gin(self.model1_dir, True, [model_config])
        train.train_with_gin(self.model2_dir, True, [model_config])

        self.output_dir = self.create_tempdir(
            "output", cleanup=absltest.TempFileCleanup.OFF).full_path
Example #5
0
 def test_visualize_sigmoid(self, activation):
   activation_binding = (
       "reconstruction_loss.activation = '{}'".format(activation))
   self.model_dir = self.create_tempdir(
       "model_{}".format(activation),
       cleanup=absltest.TempFileCleanup.OFF).full_path
   train.train_with_gin(self.model_dir, True, [
       resources.get_file("config/tests/methods/unsupervised/train_test.gin")
   ], [activation_binding])
   visualize_model.visualize(
       self.model_dir,
       self.create_tempdir("visualization_{}".format(activation)).full_path,
       True,
       num_animations=1,
       num_frames=4)
Example #6
0
def train_tf_main():
    aicrowd_helpers.register_progress(0.0)
    start_time = time.time()
    train.train_with_gin(os.path.join(experiment_output_path, "model"),
                         overwrite, [get_full_path("model.gin")], gin_bindings)
    # path=os.path.join(experiment_output_path, str(time.time()))
    # train.train_with_gin(
    #     path, overwrite,
    #     [get_full_path("model.gin")], gin_bindings)
    elapsed_time = time.time() - start_time
    print(
        "##################################Elapsed TIME##############################"
    )
    print(elapsed_time)
    print(
        "##################################Elapsed TIME##############################"
    )
    ########################################################################
    # Register Progress (end of training, start of representation extraction)
    ########################################################################
    aicrowd_helpers.register_progress(0.90)
    # Extract the mean representation for both of these models.
    representation_path = os.path.join(experiment_output_path,
                                       "representation")
    model_path = os.path.join(experiment_output_path, "model")
    # model_path =path
    # representation_path=path
    # This contains the settings:
    postprocess_gin = [get_full_path("postprocess.gin")]
    postprocess.postprocess_with_gin(model_path, representation_path,
                                     overwrite, postprocess_gin)
    print("Written output to : ", experiment_output_path)
    ########################################################################
    # Register Progress (of representation extraction)
    ########################################################################
    aicrowd_helpers.register_progress(1.0)
    ########################################################################
    # Submit Results for evaluation
    ########################################################################
    cuda.close()
    aicrowd_helpers.submit()

    return elapsed_time, gin_bindings
Example #7
0
# By default, we do not overwrite output directories. Set this to True, if you
# want to overwrite (in particular, if you rerun this script several times).
overwrite = False

# 1. Train a standard VAE (already implemented in disentanglement_lib).
# ------------------------------------------------------------------------------

# We save the results in a `vae` subfolder.
path_vae = os.path.join(base_path, "vae")

# The main training protocol of disentanglement_lib is defined in the
# disentanglement_lib.methods.unsupervised.train module. To configure
# training we need to provide a gin config. For a standard VAE, you may have a
# look at model.gin on how to do this.
train.train_with_gin(os.path.join(path_vae, "model"), overwrite, ["model.gin"])
# After this command, you should have a `vae` subfolder with a model that was
# trained for a few steps (in reality, you will want to train many more steps).


# 2. Train a custom VAE model.
# ------------------------------------------------------------------------------
# To train a custom model, we have to provide an implementation of the class
# GaussianEncoderModel in the
# disentanglement_lib.methods.unsupervised.gaussian_encoder_model module.
# For simplicty, we will subclass the BaseVAE class in
# disentanglement_lib.methods.unsupervised.vae which will train a VAE style
# model where the loss is given by a reconstruction loss (configured via gin)
# plus a custom regularizer (needs to be implemented.)
@gin.configurable("BottleneckVAE"
                  )  # This will allow us to reference the model.
Example #8
0
#   # Two versions of the model are exported:
#   #   - one for "test" mode (the default tag)
#   #   - one for "training" mode ("is_training" tag)
#   # In the case that the encoder/decoder have dropout, or BN layers, these two
#   # graphs are different.
#   tags_and_args = [
#       ({"train"}, {"is_training": True}),
#       (set(), {"is_training": False}),
#   ]
#   spec = hub.create_module_spec(module_fn, tags_and_args=tags_and_args,
#                                 drop_collections=drop_collections)
#   spec.export(export_path, checkpoint_path=checkpoint_path)
########################################################################
aicrowd_helpers.register_progress(0.0)
start_time = time.time()
train.train_with_gin(os.path.join(experiment_output_path, "model"), overwrite,
                     [get_full_path("model.gin")], gin_bindings)
# path=os.path.join(experiment_output_path, str(time.time()))
# train.train_with_gin(
#     path, overwrite,
#     [get_full_path("model.gin")], gin_bindings)
elapsed_time = time.time() - start_time
print(
    "##################################Elapsed TIME##############################"
)
print(elapsed_time)
print(
    "##################################Elapsed TIME##############################"
)
########################################################################
# Register Progress (end of training, start of representation extraction)
########################################################################
# By default, we do not overwrite output directories. Set this to True, if you
# want to overwrite (in particular, if you rerun this script several times).
overwrite = True

# 1. Train a standard VAE (already implemented in disentanglement_lib).
# ------------------------------------------------------------------------------

# We save the results in a `vae` subfolder.
dataset = "modelnet"
repetitions = 1
model_name_list = ["tcvae", "betavae", "factorvae"]

print("Running for {} repetitions".format(repetitions))
for model_name in model_name_list:
    print("Running {}".format(model_name))
    for repetition in range(repetitions):

        print("Training repetition {}".format(repetition))
        path_vae = os.path.join(base_path, dataset +"_" + model_name + "_" + str(repetition))
        model_path = os.path.join(path_vae, model_name)
        print("Model path {}".format(model_path))
        print("Dataset {}".format(dataset))
        print("Model name {}".format(model_name))
        print([dataset+"_" + model_name + ".gin"])
        train.train_with_gin(os.path.join(model_path, "model_name"), overwrite, ["model.gin"])
        train.train_with_gin(os.path.join(model_path, "model_name"), overwrite, [dataset+"_" + model_name + ".gin"])




Example #10
0
 def test_train_model(self, gin_configs, gin_bindings):
   # We clear the gin config before running. Otherwise, if a prior test fails,
   # the gin config is locked and the current test fails.
   gin.clear_config()
   train.train_with_gin(self.create_tempdir().full_path, True, gin_configs,
                        gin_bindings)
Example #11
0
def main(unused_argv):
    # Obtain the study to reproduce.
    study = reproduce.STUDIES[FLAGS.study]
    dataset_names = ["cars3d", "smallnorb"]

    for dataset_name in dataset_names:
        postprocess_config_files = sorted(study.get_postprocess_config_files())
        for beta in [1e-3, 1e-2, 0.1, 1, 10, 100, 1000]:
            # Set correct output directory.
            if FLAGS.output_directory is None:
                output_directory = os.path.join("output", "{study}",
                                                dataset_name, "{beta}")
            else:
                output_directory = FLAGS.output_directory

            # Insert model number and study name into path if necessary.
            output_directory = output_directory.format(
                beta=str(beta), study="test_benchmark-experiment-6.1")

            # Model training (if model directory is not provided).

            model_bindings, model_config_file = get_model_configs(
                beta, dataset_name)
            logging.info("Training model...")
            model_dir = os.path.join(output_directory, "model")
            model_bindings = [
                "model.name = '{}'".format(
                    os.path.basename(model_config_file)).replace(".gin",
                                                                 ""),  # ,
                # "model.model_num = {}".format(FLAGS.model_num),
            ] + model_bindings
            train.train_with_gin(model_dir, FLAGS.overwrite,
                                 [model_config_file], model_bindings)

            # We visualize reconstructions, samples and latent space traversals.
            visualize_dir = os.path.join(output_directory, "visualizations")
            visualize_model.visualize(model_dir, visualize_dir,
                                      FLAGS.overwrite)

            # We extract the different representations and save them to disk.
            random_state = np.random.RandomState(0)
            postprocess_config_files = sorted(
                study.get_postprocess_config_files())
            for config in postprocess_config_files:
                post_name = os.path.basename(config).replace(".gin", "")
                logging.info("Extracting representation %s...", post_name)
                post_dir = os.path.join(output_directory, "postprocessed",
                                        post_name)
                postprocess_bindings = [
                    "postprocess.random_seed = {}".format(
                        random_state.randint(2**16)),
                    "postprocess.name = '{}'".format(post_name)
                ]
                postprocess.postprocess_with_gin(model_dir, post_dir,
                                                 FLAGS.overwrite, [config],
                                                 postprocess_bindings)

            #Get representations and save to disk
            gin.parse_config_files_and_bindings(
                [], ["dataset.name = {}".format("'{}'".format(dataset_name))])
            dataset = named_data.get_named_ground_truth_data()
            factors, reps = get_representations(dataset, post_dir,
                                                dataset_name)
            pickle.dump(factors, open(os.path.join(post_dir, "factors.p"),
                                      "wb"))
            pickle.dump(reps, open(os.path.join(post_dir, "reps.p"), "wb"))
            gin.clear_config()
Example #12
0
def main():
    parser = argparse.ArgumentParser(description='Project description.')
    parser.add_argument('--result_dir',
                        help='Results directory.',
                        type=str,
                        default='/mnt/hdd/repo_results/Ramiel/sweep')
    parser.add_argument('--study',
                        help='Name of the study.',
                        type=str,
                        default='unsupervised_study_v1')
    parser.add_argument('--model_gin',
                        help='Name of the gin config.',
                        type=str,
                        default='test_model.gin')
    parser.add_argument('--model_name',
                        help='Name of the model.',
                        type=str,
                        default='GroupVAE')
    parser.add_argument('--vae_beta',
                        help='Beta-VAE beta.',
                        type=str,
                        default='1')
    parser.add_argument('--hyps',
                        help='Hyperparameters of rec_mat_oth_spl_seed.',
                        type=str,
                        default='1_1_1_1_1_0')
    parser.add_argument('--overwrite',
                        help='Whether to overwrite output directory.',
                        type=_str_to_bool,
                        default=False)
    parser.add_argument('--dataset',
                        help='Dataset.',
                        type=str,
                        default='dsprites_full')
    parser.add_argument('--recons_type',
                        help='Reconstruction loss type.',
                        type=str,
                        default='bernoulli_loss')
    args = parser.parse_args()

    # 1. Settings
    study = reproduce.STUDIES[args.study]
    args.hyps = args.hyps.split('_')
    print()
    study.print_postprocess_config()
    print()
    study.print_eval_config()

    gpus = tf.config.experimental.list_physical_devices('GPU')
    if gpus:
        try:
            # Currently, memory growth needs to be the same across GPUs
            for gpu in gpus:
                tf.config.experimental.set_memory_growth(gpu, True)
            logical_gpus = tf.config.experimental.list_logical_devices('GPU')
            print(len(gpus), "Physical GPUs,", len(logical_gpus),
                  "Logical GPUs")
        except RuntimeError as e:
            # Memory growth must be set before GPUs have been initialized
            print(e)

    # Call training module to train the custom model.
    if args.model_name == "GroupVAE":
        dir_name = "GroupVAE-" + "-".join(args.hyps)
    elif args.model_name == "vae":
        dir_name = "LieVAE-" + args.vae_beta + "-" + args.hyps[5]
    output_directory = os.path.join(args.result_dir, dir_name)
    model_dir = os.path.join(output_directory, "model")
    gin_bindings = [
        "model.model = @" + args.model_name + "()",
        "vae.beta = " + args.vae_beta, "GroupVAE.hy_rec = " + args.hyps[0],
        "GroupVAE.hy_mat = " + args.hyps[1],
        "GroupVAE.hy_oth = " + args.hyps[2],
        "GroupVAE.hy_spl = " + args.hyps[3],
        "GroupVAE.hy_ncut = " + args.hyps[4],
        "model.random_seed = " + args.hyps[5],
        "dataset.name = '" + args.dataset + "'",
        "reconstruction_loss.loss_fn = @" + args.recons_type
    ]
    train.train_with_gin(model_dir, args.overwrite, [args.model_gin],
                         gin_bindings)

    # We fix the random seed for the postprocessing and evaluation steps (each
    # config gets a different but reproducible seed derived from a master seed of
    # 0). The model seed was set via the gin bindings and configs of the study.
    random_state = np.random.RandomState(0)

    # We extract the different representations and save them to disk.
    postprocess_config_files = sorted(study.get_postprocess_config_files())
    for config in postprocess_config_files:
        post_name = os.path.basename(config).replace(".gin", "")
        print("Extracting representation " + post_name + "...")
        post_dir = os.path.join(output_directory, "postprocessed", post_name)
        postprocess_bindings = [
            "postprocess.random_seed = {}".format(random_state.randint(2**32)),
            "postprocess.name = '{}'".format(post_name)
        ]
        postprocess.postprocess_with_gin(model_dir, post_dir, args.overwrite,
                                         [config], postprocess_bindings)

    # Iterate through the disentanglement metrics.
    eval_configs = sorted(study.get_eval_config_files())
    blacklist = ['downstream_task_logistic_regression.gin']
    # blacklist = [
    # 'downstream_task_logistic_regression.gin', 'beta_vae_sklearn.gin',
    # 'dci.gin', 'downstream_task_boosted_trees.gin', 'mig.gin',
    # 'modularity_explicitness.gin', 'sap_score.gin', 'unsupervised.gin'
    # ]
    for config in postprocess_config_files:
        post_name = os.path.basename(config).replace(".gin", "")
        post_dir = os.path.join(output_directory, "postprocessed", post_name)
        # Now, we compute all the specified scores.
        for gin_eval_config in eval_configs:
            if os.path.basename(gin_eval_config) not in blacklist:
                metric_name = os.path.basename(gin_eval_config).replace(
                    ".gin", "")
                print("Computing metric " + metric_name + " on " + post_name +
                      "...")
                metric_dir = os.path.join(output_directory, "metrics",
                                          post_name, metric_name)
                eval_bindings = [
                    "evaluation.random_seed = {}".format(
                        random_state.randint(2**32)),
                    "evaluation.name = '{}'".format(metric_name)
                ]
                evaluate.evaluate_with_gin(post_dir, metric_dir,
                                           args.overwrite, [gin_eval_config],
                                           eval_bindings)

    # We visualize reconstructions, samples and latent space traversals.
    visualize_dir = os.path.join(output_directory, "visualizations")
    visualize_model.visualize(model_dir, visualize_dir, args.overwrite)
def main():
  parser = argparse.ArgumentParser(description='Project description.')
  parser.add_argument('--study', help='Name of the study.', type=str, default='unsupervised_study_v1')
  parser.add_argument('--output_directory', help='Output directory of experiments.', type=str, default=None)
  parser.add_argument('--model_dir', help='Directory to take trained model from.', type=str, default=None)
  parser.add_argument('--model_num', help='Integer with model number to train.', type=int, default=None)
  parser.add_argument('--only_print', help='Whether to only print the hyperparameter settings.', type=_str_to_bool, default=False)
  parser.add_argument('--overwrite', help='Whether to overwrite output directory.', type=_str_to_bool, default=False)
  args = parser.parse_args()
  # logging.set_verbosity('error')
  # logging.set_stderrthreshold('error')
  gpus = tf.config.experimental.list_physical_devices('GPU')
  if gpus:
    try:
      # Currently, memory growth needs to be the same across GPUs
      for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)
      logical_gpus = tf.config.experimental.list_logical_devices('GPU')
      print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
      # Memory growth must be set before GPUs have been initialized
      print(e)

  # Obtain the study to reproduce.
  study = reproduce.STUDIES[args.study]

  # Print the hyperparameter settings.
  if args.model_dir is None:
    study.print_model_config(args.model_num)
  else:
    print("Model directory (skipped training):")
    print("--")
    print(args.model_dir)
  print()
  study.print_postprocess_config()
  print()
  study.print_eval_config()
  if args.only_print:
    return

  # Set correct output directory.
  if args.output_directory is None:
    if args.model_dir is None:
      output_directory = os.path.join("output", "{study}", "{model_num}")
    else:
      output_directory = "output"
  else:
    output_directory = args.output_directory

  # Insert model number and study name into path if necessary.
  output_directory = output_directory.format(model_num=str(args.model_num),
                                             study=str(args.study))

  # Model training (if model directory is not provided).
  if args.model_dir is None:
    model_bindings, model_config_file = study.get_model_config(args.model_num)
    print("Training model...")
    model_dir = os.path.join(output_directory, "model")
    model_bindings = [
        "model.name = '{}'".format(os.path.basename(model_config_file)).replace(
            ".gin", ""),
        "model.model_num = {}".format(args.model_num),
    ] + model_bindings
    train.train_with_gin(model_dir, args.overwrite, [model_config_file],
                         model_bindings)
  else:
    print("Skipped training...")
    model_dir = args.model_dir

  # We visualize reconstructions, samples and latent space traversals.
  visualize_dir = os.path.join(output_directory, "visualizations")
  visualize_model.visualize(model_dir, visualize_dir, args.overwrite)

  # We fix the random seed for the postprocessing and evaluation steps (each
  # config gets a different but reproducible seed derived from a master seed of
  # 0). The model seed was set via the gin bindings and configs of the study.
  random_state = np.random.RandomState(0)

  # We extract the different representations and save them to disk.
  postprocess_config_files = sorted(study.get_postprocess_config_files())
  for config in postprocess_config_files:
    post_name = os.path.basename(config).replace(".gin", "")
    print("Extracting representation %s..." % post_name)
    post_dir = os.path.join(output_directory, "postprocessed", post_name)
    postprocess_bindings = [
        "postprocess.random_seed = {}".format(random_state.randint(2**32)),
        "postprocess.name = '{}'".format(post_name)
    ]
    postprocess.postprocess_with_gin(model_dir, post_dir, args.overwrite,
                                     [config], postprocess_bindings)

  # Iterate through the disentanglement metrics.
  eval_configs = sorted(study.get_eval_config_files())
  blacklist = ['downstream_task_logistic_regression.gin']
  for config in postprocess_config_files:
    post_name = os.path.basename(config).replace(".gin", "")
    post_dir = os.path.join(output_directory, "postprocessed",
                            post_name)
    # Now, we compute all the specified scores.
    for gin_eval_config in eval_configs:
      if os.path.basename(gin_eval_config) not in blacklist:
        metric_name = os.path.basename(gin_eval_config).replace(".gin", "")
        print("Computing metric '%s' on '%s'..." % (metric_name, post_name))
        metric_dir = os.path.join(output_directory, "metrics", post_name,
                                  metric_name)
        eval_bindings = [
            "evaluation.random_seed = {}".format(random_state.randint(2**32)),
            "evaluation.name = '{}'".format(metric_name)
        ]
        evaluate.evaluate_with_gin(post_dir, metric_dir, args.overwrite,
                                   [gin_eval_config], eval_bindings)