def main(argv): del argv # Unused # Save all results in subdirectories of following path base_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), FLAGS.base_dir) # Overwrite output or not (for rerunning script) overwrite = True # Results directory of BetaTCVAE path_adagvae = os.path.join(base_path,FLAGS.output_dir) gin_bindings = [ "model.random_seed = {}".format(FLAGS.seed), "subset.name = '{}'".format(FLAGS.subset), "encoder.num_latent = {}".format(FLAGS.dim) ] # Train model. Training is configured with a gin config train.train_with_gin(os.path.join(path_adagvae, 'model'), overwrite, ['baselines/adagvae/adagvae_train.gin'], gin_bindings) # Extract mean representation of latent space representation_path = os.path.join(path_adagvae, "representation") model_path = os.path.join(path_adagvae, "model") postprocess_gin = ['baselines/adagvae/adagvae_postprocess.gin'] # This contains the settings. postprocess.postprocess_with_gin(model_path, representation_path, overwrite, postprocess_gin) # Compute DCI metric result_path = os.path.join(path_adagvae, "metrics", "dci") representation_path = os.path.join(path_adagvae, "representation") evaluate.evaluate_with_gin(representation_path, result_path, overwrite, ['baselines/adagvae/adagvae_dci.gin'])
def test_postprocess(self, gin_config): # We clear the gin config before running. Otherwise, if a prior test fails, # the gin config is locked and the current test fails. gin.clear_config() postprocess.postprocess_with_gin(self.model_dir, self.create_tempdir().full_path, True, [gin_config], [])
def setUp(self): super(EvaluateTest, self).setUp() self.model_dir = self.create_tempdir( "model", cleanup=absltest.TempFileCleanup.OFF).full_path model_config = resources.get_file( "config/tests/methods/unsupervised/train_test.gin") train.train_with_gin(self.model_dir, True, [model_config]) self.output_dir = self.create_tempdir( "output", cleanup=absltest.TempFileCleanup.OFF).full_path postprocess_config = resources.get_file( "config/tests/postprocessing/postprocess_test_configs/mean.gin") postprocess.postprocess_with_gin(self.model_dir, self.output_dir, True, [postprocess_config])
def train_tf_main(): aicrowd_helpers.register_progress(0.0) start_time = time.time() train.train_with_gin(os.path.join(experiment_output_path, "model"), overwrite, [get_full_path("model.gin")], gin_bindings) # path=os.path.join(experiment_output_path, str(time.time())) # train.train_with_gin( # path, overwrite, # [get_full_path("model.gin")], gin_bindings) elapsed_time = time.time() - start_time print( "##################################Elapsed TIME##############################" ) print(elapsed_time) print( "##################################Elapsed TIME##############################" ) ######################################################################## # Register Progress (end of training, start of representation extraction) ######################################################################## aicrowd_helpers.register_progress(0.90) # Extract the mean representation for both of these models. representation_path = os.path.join(experiment_output_path, "representation") model_path = os.path.join(experiment_output_path, "model") # model_path =path # representation_path=path # This contains the settings: postprocess_gin = [get_full_path("postprocess.gin")] postprocess.postprocess_with_gin(model_path, representation_path, overwrite, postprocess_gin) print("Written output to : ", experiment_output_path) ######################################################################## # Register Progress (of representation extraction) ######################################################################## aicrowd_helpers.register_progress(1.0) ######################################################################## # Submit Results for evaluation ######################################################################## cuda.close() aicrowd_helpers.submit() return elapsed_time, gin_bindings
def eval(study, output_directory, model_dir): # We fix the random seed for the postprocessing and evaluation steps (each # config gets a different but reproducible seed derived from a master seed of # 0). The model seed was set via the gin bindings and configs of the study. random_state = np.random.RandomState(0) # We extract the different representations and save them to disk. postprocess_config_files = sorted(study.get_postprocess_config_files()) for config in postprocess_config_files: post_name = os.path.basename(config).replace(".gin", "") logging.info("Extracting representation %s...", post_name) post_dir = os.path.join(output_directory, "postprocessed", post_name) postprocess_bindings = [ "postprocess.random_seed = {}".format(random_state.randint(2**16)), "postprocess.name = '{}'".format(post_name) ] postprocess.postprocess_with_gin(model_dir, post_dir, FLAGS.overwrite, [config], postprocess_bindings) # Iterate through the disentanglement metrics. eval_configs = sorted(study.get_eval_config_files()) for config in postprocess_config_files: post_name = os.path.basename(config).replace(".gin", "") post_dir = os.path.join(output_directory, "postprocessed", post_name) # Now, we compute all the specified scores. for gin_eval_config in eval_configs: metric_name = os.path.basename(gin_eval_config).replace(".gin", "") logging.info("Computing metric '%s' on '%s'...", metric_name, post_name) metric_dir = os.path.join(output_directory, "metrics", post_name, metric_name) eval_bindings = [ "evaluation.random_seed = {}".format( random_state.randint(2**16)), "evaluation.name = '{}'".format(metric_name) ] evaluate.evaluate_with_gin(post_dir, metric_dir, FLAGS.overwrite, [gin_eval_config], eval_bindings)
def main(unused_argv): base_path = "3dshapes_models" done = False while not done: try: print("\n\n*- Preprocessing '%s' \n\n" % (FLAGS.dataset)) preproces_gin_bindings = [ "dataset.name = '%s'" % (FLAGS.dataset), "preprocess.preprocess_fn = @split_train_and_validation_per_model", "split_train_and_validation_per_model.random_seed = %d" % (FLAGS.rng) ] preprocess.preprocess_with_gin(FLAGS.dataset, FLAGS.model, overwrite=FLAGS.overwrite, gin_config_files=None, gin_bindings=preproces_gin_bindings) print("\n\n*- Preprocessing DONE \n\n") done = True except: time.sleep(30) if FLAGS.model == "vae": gin_file = "3d_shape_vae.gin" if FLAGS.model == "bvae": gin_file = "3d_shape_bvae.gin" if FLAGS.model == "b8vae": gin_file = "3d_shape_b8vae.gin" if FLAGS.model == "fvae": gin_file = "3d_shape_fvae.gin" if FLAGS.model == "btcvae": gin_file = "3d_shape_btcvae.gin" if FLAGS.model == "annvae": gin_file = "3d_shape_annvae.gin" if FLAGS.model == "randomvae": gin_file = "3d_shape_randomvae.gin" print("\n\n*- Training '%s' \n\n" % (FLAGS.model)) vae_gin_bindings = [ "model.random_seed = %d" % (FLAGS.rng), "dataset.name = '%s'" % (FLAGS.dataset + '_' + FLAGS.model + '_' + str(FLAGS.rng)) ] vae_path = os.path.join(base_path, FLAGS.model + FLAGS.dataset + '_' + str(FLAGS.rng)) train_vae_path = os.path.join(vae_path, 'model') unsupervised_train_partial.train_with_gin(train_vae_path, FLAGS.overwrite, [gin_file], vae_gin_bindings) visualize_model.visualize(train_vae_path, vae_path + "/vis", FLAGS.overwrite) preprocess.destroy_train_and_validation_splits(FLAGS.dataset + '_' + FLAGS.model + '_' + str(FLAGS.rng)) print("\n\n*- Training DONE \n\n") print("\n\n*- Postprocessing '%s' \n\n" % (FLAGS.model)) postprocess_gin_bindings = [ "postprocess.postprocess_fn = @mean_representation", "dataset.name='dummy_data'", "postprocess.random_seed = %d" % (FLAGS.rng) ] representation_path = os.path.join(vae_path, "representation") model_path = os.path.join(vae_path, "model") postprocess.postprocess_with_gin(model_path, representation_path, FLAGS.overwrite, gin_config_files=None, gin_bindings=postprocess_gin_bindings) print("\n\n*- Postprocessing DONE \n\n") # --- Evaluate disentanglement metrics print("\n\n*- Evaluating MIG.") gin_bindings = [ "evaluation.evaluation_fn = @mig", "dataset.name='3dshapes'", "evaluation.random_seed = 0", "mig.num_train = 10000", "discretizer.discretizer_fn = @histogram_discretizer", "discretizer.num_bins = 20" ] result_path = os.path.join(vae_path, "metrics", "mig") evaluate.evaluate_with_gin(representation_path, result_path, FLAGS.overwrite, gin_bindings=gin_bindings) print("\n\n*- Evaluating BetaVEA.") gin_bindings = [ "evaluation.evaluation_fn = @beta_vae_sklearn", "dataset.name='3dshapes'", "evaluation.random_seed = 0", "beta_vae_sklearn.batch_size = 16", "beta_vae_sklearn.num_train = 10000", "beta_vae_sklearn.num_eval = 5000", "discretizer.discretizer_fn = @histogram_discretizer", "discretizer.num_bins = 20" ] result_path = os.path.join(vae_path, "metrics", "bvae") evaluate.evaluate_with_gin(representation_path, result_path, FLAGS.overwrite, gin_bindings=gin_bindings) print("\n\n*- Evaluating FactorVAE.") gin_bindings = [ "evaluation.evaluation_fn = @factor_vae_score", "dataset.name='3dshapes'", "evaluation.random_seed = 0", "factor_vae_score.batch_size = 16", "factor_vae_score.num_train = 10000", "factor_vae_score.num_eval = 5000", "factor_vae_score.num_variance_estimate = 10000", "discretizer.discretizer_fn = @histogram_discretizer", "discretizer.num_bins = 20" ] result_path = os.path.join(vae_path, "metrics", "fvae") evaluate.evaluate_with_gin(representation_path, result_path, FLAGS.overwrite, gin_bindings=gin_bindings) print("\n\n*- Evaluating DCI.") gin_bindings = [ "evaluation.evaluation_fn = @dci", "dataset.name='3dshapes'", "evaluation.random_seed = 0", "dci.batch_size = 16", "dci.num_train = 10000", "dci.num_test = 5000", "discretizer.discretizer_fn = @histogram_discretizer", "discretizer.num_bins = 20" ] result_path = os.path.join(vae_path, "metrics", "dci") evaluate.evaluate_with_gin(representation_path, result_path, FLAGS.overwrite, gin_bindings=gin_bindings) print("\n\n*- Evaluation COMPLETED \n\n") # --- Downstream tasks print("\n\n*- Training downstream factor regression '%s' \n\n" % (FLAGS.model)) downstream_regression_train_gin_bindings = [ "evaluation.evaluation_fn = @downstream_regression_on_representations", "dataset.name = '3dshapes_task'", "evaluation.random_seed = 0", "downstream_regression_on_representations.holdout_dataset_name = '3dshapes_holdout'", "downstream_regression_on_representations.num_train = [127500]", "downstream_regression_on_representations.num_test = 22500", "downstream_regression_on_representations.num_holdout = 80000", "predictor.predictor_fn = @mlp_regressor", "mlp_regressor.hidden_layer_sizes = [16, 8]", "mlp_regressor.activation = 'logistic'", "mlp_regressor.max_iter = 50", "mlp_regressor.random_state = 0" ] result_path = os.path.join(vae_path, "metrics", "factor_regression") evaluate.evaluate_with_gin( representation_path, result_path, FLAGS.overwrite, gin_config_files=None, gin_bindings=downstream_regression_train_gin_bindings) print("\n\n*- Training downstream factor regression DONE \n\n") print("\n\n*- Training downstream reconstruction '%s' \n\n" % (FLAGS.model)) downstream_reconstruction_train_gin_bindings = [ "supervised_model.model = @downstream_decoder()", "supervised_model.batch_size = 64", "supervised_model.training_steps = 30000", "supervised_model.eval_steps = 1000", "supervised_model.random_seed = 0", "supervised_model.holdout_dataset_name = '3dshapes_holdout'", "dataset.name='3dshapes_task'", "decoder_optimizer.optimizer_fn = @AdamOptimizer", "AdamOptimizer.beta1 = 0.9", "AdamOptimizer.beta2 = 0.999", "AdamOptimizer.epsilon = 1e-08", "AdamOptimizer.learning_rate = 0.0001", "AdamOptimizer.name = 'Adam'", "AdamOptimizer.use_locking = False", "decoder.decoder_fn = @deconv_decoder", "reconstruction_loss.loss_fn = @l2_loss" ] result_path = os.path.join(vae_path, "metrics", "reconstruction") supervised_train_partial.train_with_gin( result_path, representation_path, FLAGS.overwrite, gin_bindings=downstream_reconstruction_train_gin_bindings) visualize_model.visualize_supervised(result_path, representation_path, result_path + "/vis", FLAGS.overwrite) print("\n\n*- Training downstream reconstruction DONE \n\n") print("\n\n*- Training & evaluation COMPLETED \n\n")
train.train_with_gin(os.path.join(path_custom_vae, "model"), overwrite, ["model.gin"], gin_bindings) # As before, after this command, you should have a `BottleneckVAE` subfolder # with a model that was trained for a few steps. # 3. Extract the mean representation for both of these models. # ------------------------------------------------------------------------------ # To compute disentanglement metrics, we require a representation function that # takes as input an image and that outputs a vector with the representation. # We extract the mean of the encoder from both models using the following code. for path in [path_vae, path_custom_vae]: representation_path = os.path.join(path, "representation") model_path = os.path.join(path, "model") postprocess_gin = ["postprocess.gin"] # This contains the settings. # postprocess.postprocess_with_gin defines the standard extraction protocol. postprocess.postprocess_with_gin(model_path, representation_path, overwrite, postprocess_gin) # 4. Compute the Mutual Information Gap (already implemented) for both models. # ------------------------------------------------------------------------------ # The main evaluation protocol of disentanglement_lib is defined in the # disentanglement_lib.evaluation.evaluate module. Again, we have to provide a # gin configuration. We could define a .gin config file; however, in this case # we show how all the configuration settings can be set using gin bindings. # We use the Mutual Information Gap (with a low number of samples to make it # faster). To learn more, have a look at the different scores in # disentanglement_lib.evaluation.evaluate.metrics and the predefined .gin # configuration files in # disentanglement_lib/config/unsupervised_study_v1/metrics_configs/(...). gin_bindings = [ "evaluation.evaluation_fn = @mig", "dataset.name='auto'", "evaluation.random_seed = 0", "mig.num_train=1000",
def main(unused_argv): # Obtain the study to reproduce. study = reproduce.STUDIES[FLAGS.study] dataset_names = ["cars3d", "smallnorb"] for dataset_name in dataset_names: postprocess_config_files = sorted(study.get_postprocess_config_files()) for beta in [1e-3, 1e-2, 0.1, 1, 10, 100, 1000]: # Set correct output directory. if FLAGS.output_directory is None: output_directory = os.path.join("output", "{study}", dataset_name, "{beta}") else: output_directory = FLAGS.output_directory # Insert model number and study name into path if necessary. output_directory = output_directory.format( beta=str(beta), study="test_benchmark-experiment-6.1") # Model training (if model directory is not provided). model_bindings, model_config_file = get_model_configs( beta, dataset_name) logging.info("Training model...") model_dir = os.path.join(output_directory, "model") model_bindings = [ "model.name = '{}'".format( os.path.basename(model_config_file)).replace(".gin", ""), # , # "model.model_num = {}".format(FLAGS.model_num), ] + model_bindings train.train_with_gin(model_dir, FLAGS.overwrite, [model_config_file], model_bindings) # We visualize reconstructions, samples and latent space traversals. visualize_dir = os.path.join(output_directory, "visualizations") visualize_model.visualize(model_dir, visualize_dir, FLAGS.overwrite) # We extract the different representations and save them to disk. random_state = np.random.RandomState(0) postprocess_config_files = sorted( study.get_postprocess_config_files()) for config in postprocess_config_files: post_name = os.path.basename(config).replace(".gin", "") logging.info("Extracting representation %s...", post_name) post_dir = os.path.join(output_directory, "postprocessed", post_name) postprocess_bindings = [ "postprocess.random_seed = {}".format( random_state.randint(2**16)), "postprocess.name = '{}'".format(post_name) ] postprocess.postprocess_with_gin(model_dir, post_dir, FLAGS.overwrite, [config], postprocess_bindings) #Get representations and save to disk gin.parse_config_files_and_bindings( [], ["dataset.name = {}".format("'{}'".format(dataset_name))]) dataset = named_data.get_named_ground_truth_data() factors, reps = get_representations(dataset, post_dir, dataset_name) pickle.dump(factors, open(os.path.join(post_dir, "factors.p"), "wb")) pickle.dump(reps, open(os.path.join(post_dir, "reps.p"), "wb")) gin.clear_config()
def get_eval_res(uid): data = np.load('../../results/eval_output/{}.npz'.format(uid)) c = data['c'] x_rand = data['x_rand'] x_enc = data['x_enc'] x_randY = data['x_randY'] x_baseline_logits = data['x_baseline'] x_baseline = sigmoid(x_baseline_logits) model_dir = dlib_model_path[:-6] output_directory = '../../results/eval_output/{}/'.format(uid) config = '/hdd_c/data/disentanglement_lib/disentanglement_lib/config/unsupervised_study_v1/postprocess_configs/mean.gin' study = reproduce.STUDIES['unsupervised_study_v1'] random_state = np.random.RandomState(0) postprocess_config_files = sorted(study.get_postprocess_config_files()) for config in postprocess_config_files: post_name = os.path.basename(config).replace(".gin", "") #logging.info("Extracting representation %s...", post_name) post_dir = os.path.join(output_directory, "postprocessed", post_name) postprocess_bindings = [ "postprocess.random_seed = {}".format(random_state.randint(2**32)), "postprocess.name = '{}'".format(post_name) ] postprocess.postprocess_with_gin(model_dir, post_dir, True, [config], postprocess_bindings) post_processed_dir = post_dir + '/tfhub' with hub.eval_function_for_module(post_processed_dir) as f: # Save reconstructions. inputs = dta.images if inputs.ndim < 4: inputs = np.expand_dims(inputs, 3) inputs = inputs[:c.shape[0]] assert inputs.shape == x_baseline.shape inputs_c = f(dict(images=inputs), signature="representation", as_dict=True)["default"] baseline_c = f(dict(images=x_baseline), signature="representation", as_dict=True)["default"] x_rand_c = f(dict(images=x_rand), signature="representation", as_dict=True)["default"] x_enc_c = f(dict(images=x_enc), signature="representation", as_dict=True)["default"] x_randY_c = f(dict(images=x_randY), signature="representation", as_dict=True)["default"] eval_bindings = [ "evaluation.random_seed = {}".format(random_state.randint(2**32)), "evaluation.name = 'MI'" ] gin_config_files = [ '/hdd_c/data/disentanglement_lib/disentanglement_lib/config/unsupervised_study_v1/metric_configs/mig.gin' ] gin.parse_config_files_and_bindings(gin_config_files, eval_bindings) def compute_mi_matrix(mus_train, ys_train, need_discretized_1=False, need_discretized_2=False): score_dict = {} if need_discretized_1: mus_train = utils.make_discretizer(mus_train) if need_discretized_2: ys_train = utils.make_discretizer(ys_train) m = utils.discrete_mutual_info(mus_train, ys_train) assert m.shape[0] == mus_train.shape[0] assert m.shape[1] == ys_train.shape[0] # m is [num_latents, num_factors] entropy = utils.discrete_entropy(ys_train) return m, entropy # compute MI matrix x_rand_mi_matrix, x_rand_entropy = compute_mi_matrix( np.transpose(x_rand_c), np.transpose(inputs_c), True, True) x_enc_mi_matrix, x_enc_entropy = compute_mi_matrix(np.transpose(x_enc_c), np.transpose(inputs_c), True, True) baseline_mi_matrix, baseline_entropy = compute_mi_matrix( np.transpose(baseline_c), np.transpose(inputs_c), True, True) x_randY_mi_matrix, x_randY_entropy = compute_mi_matrix( np.transpose(x_randY_c), np.transpose(inputs_c), True, True) x_enc_mi_matrix_gd, x_enc_entropy_gd = compute_mi_matrix( np.transpose(x_enc_c), np.transpose(c), True, False) baseline_mi_matrix_gd, baseline_entropy_gd = compute_mi_matrix( np.transpose(baseline_c), np.transpose(c), True, False) # compute MI and MIG x_enc_mi_average = ( np.trace(np.divide(x_enc_mi_matrix, baseline_entropy)) / float(baseline_mi_matrix.shape[0])) x_enc_m = np.divide(x_enc_mi_matrix_gd, baseline_entropy_gd) sorted_x_enc_m = np.sort(x_enc_m, axis=0)[::-1] x_enc_MIG = (np.mean(sorted_x_enc_m[0, :] - sorted_x_enc_m[1, :])) x_rand_mi_average = ( np.trace(np.divide(x_rand_mi_matrix, baseline_entropy)) / float(baseline_mi_matrix.shape[0])) randY_mi_average = ( np.trace(np.divide(x_randY_mi_matrix, x_randY_entropy)) / float(x_randY_mi_matrix.shape[0])) baseline_mi_average = ( np.trace(np.divide(baseline_mi_matrix, baseline_entropy)) / float(baseline_mi_matrix.shape[0])) baseline_m = baseline_mi_matrix_gd sorted_baseline_m = np.sort(baseline_m, axis=0)[::-1] baseline_MIG = (np.mean( np.divide(sorted_baseline_m[0, :] - sorted_baseline_m[1, :], baseline_entropy_gd))) def get_fid_with_uid(uid): convert2image_path = '../../results/eval_output/converted_images/' originaldata_path = '../../dataset/{}'.format(args.dataset) data = np.load('../../results/eval_output/{}.npz'.format(uid)) path2inceptionnet = '../../inception' c = data['c'] x_rand = data['x_rand'] x_enc = data['x_enc'] x_baseline_logits = data['x_baseline'] x_baseline = sigmoid(x_baseline_logits) fid_list = get_fid_from_array([x_baseline, x_rand, x_enc], convert2image_path, originaldata_path, path2inceptionnet) return fid_list fid_list = get_fid_with_uid(args.id) print('Evaluation results:') print('Beta-TCVAE FID: {}'.format(fid_list[0])) print('DS-VAE FID (Random Y): {}'.format(fid_list[1])) print('DS-VAE FID: {}'.format(fid_list[2])) print('DS-VAE MIG: {}'.format(x_enc_MIG)) print('Beta-TCVAE MIG: {}'.format(baseline_MIG)) print('DS-VAE MI: {}'.format(x_enc_mi_average)) print('DS-VAE MI (Random Z): {}'.format(x_rand_mi_average)) print('DS-VAE MI (Random Y): {}'.format(randY_mi_average)) print('Beta-TCVAE MI: {}'.format(baseline_mi_average))
def main(): parser = argparse.ArgumentParser(description='Project description.') parser.add_argument('--result_dir', help='Results directory.', type=str, default='/mnt/hdd/repo_results/Ramiel/sweep') parser.add_argument('--study', help='Name of the study.', type=str, default='unsupervised_study_v1') parser.add_argument('--model_gin', help='Name of the gin config.', type=str, default='test_model.gin') parser.add_argument('--model_name', help='Name of the model.', type=str, default='GroupVAE') parser.add_argument('--vae_beta', help='Beta-VAE beta.', type=str, default='1') parser.add_argument('--hyps', help='Hyperparameters of rec_mat_oth_spl_seed.', type=str, default='1_1_1_1_1_0') parser.add_argument('--overwrite', help='Whether to overwrite output directory.', type=_str_to_bool, default=False) parser.add_argument('--dataset', help='Dataset.', type=str, default='dsprites_full') parser.add_argument('--recons_type', help='Reconstruction loss type.', type=str, default='bernoulli_loss') args = parser.parse_args() # 1. Settings study = reproduce.STUDIES[args.study] args.hyps = args.hyps.split('_') print() study.print_postprocess_config() print() study.print_eval_config() gpus = tf.config.experimental.list_physical_devices('GPU') if gpus: try: # Currently, memory growth needs to be the same across GPUs for gpu in gpus: tf.config.experimental.set_memory_growth(gpu, True) logical_gpus = tf.config.experimental.list_logical_devices('GPU') print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs") except RuntimeError as e: # Memory growth must be set before GPUs have been initialized print(e) # Call training module to train the custom model. if args.model_name == "GroupVAE": dir_name = "GroupVAE-" + "-".join(args.hyps) elif args.model_name == "vae": dir_name = "LieVAE-" + args.vae_beta + "-" + args.hyps[5] output_directory = os.path.join(args.result_dir, dir_name) model_dir = os.path.join(output_directory, "model") gin_bindings = [ "model.model = @" + args.model_name + "()", "vae.beta = " + args.vae_beta, "GroupVAE.hy_rec = " + args.hyps[0], "GroupVAE.hy_mat = " + args.hyps[1], "GroupVAE.hy_oth = " + args.hyps[2], "GroupVAE.hy_spl = " + args.hyps[3], "GroupVAE.hy_ncut = " + args.hyps[4], "model.random_seed = " + args.hyps[5], "dataset.name = '" + args.dataset + "'", "reconstruction_loss.loss_fn = @" + args.recons_type ] train.train_with_gin(model_dir, args.overwrite, [args.model_gin], gin_bindings) # We fix the random seed for the postprocessing and evaluation steps (each # config gets a different but reproducible seed derived from a master seed of # 0). The model seed was set via the gin bindings and configs of the study. random_state = np.random.RandomState(0) # We extract the different representations and save them to disk. postprocess_config_files = sorted(study.get_postprocess_config_files()) for config in postprocess_config_files: post_name = os.path.basename(config).replace(".gin", "") print("Extracting representation " + post_name + "...") post_dir = os.path.join(output_directory, "postprocessed", post_name) postprocess_bindings = [ "postprocess.random_seed = {}".format(random_state.randint(2**32)), "postprocess.name = '{}'".format(post_name) ] postprocess.postprocess_with_gin(model_dir, post_dir, args.overwrite, [config], postprocess_bindings) # Iterate through the disentanglement metrics. eval_configs = sorted(study.get_eval_config_files()) blacklist = ['downstream_task_logistic_regression.gin'] # blacklist = [ # 'downstream_task_logistic_regression.gin', 'beta_vae_sklearn.gin', # 'dci.gin', 'downstream_task_boosted_trees.gin', 'mig.gin', # 'modularity_explicitness.gin', 'sap_score.gin', 'unsupervised.gin' # ] for config in postprocess_config_files: post_name = os.path.basename(config).replace(".gin", "") post_dir = os.path.join(output_directory, "postprocessed", post_name) # Now, we compute all the specified scores. for gin_eval_config in eval_configs: if os.path.basename(gin_eval_config) not in blacklist: metric_name = os.path.basename(gin_eval_config).replace( ".gin", "") print("Computing metric " + metric_name + " on " + post_name + "...") metric_dir = os.path.join(output_directory, "metrics", post_name, metric_name) eval_bindings = [ "evaluation.random_seed = {}".format( random_state.randint(2**32)), "evaluation.name = '{}'".format(metric_name) ] evaluate.evaluate_with_gin(post_dir, metric_dir, args.overwrite, [gin_eval_config], eval_bindings) # We visualize reconstructions, samples and latent space traversals. visualize_dir = os.path.join(output_directory, "visualizations") visualize_model.visualize(model_dir, visualize_dir, args.overwrite)
def main(): parser = argparse.ArgumentParser(description='Project description.') parser.add_argument('--study', help='Name of the study.', type=str, default='unsupervised_study_v1') parser.add_argument('--output_directory', help='Output directory of experiments.', type=str, default=None) parser.add_argument('--model_dir', help='Directory to take trained model from.', type=str, default=None) parser.add_argument('--model_num', help='Integer with model number to train.', type=int, default=None) parser.add_argument('--only_print', help='Whether to only print the hyperparameter settings.', type=_str_to_bool, default=False) parser.add_argument('--overwrite', help='Whether to overwrite output directory.', type=_str_to_bool, default=False) args = parser.parse_args() # logging.set_verbosity('error') # logging.set_stderrthreshold('error') gpus = tf.config.experimental.list_physical_devices('GPU') if gpus: try: # Currently, memory growth needs to be the same across GPUs for gpu in gpus: tf.config.experimental.set_memory_growth(gpu, True) logical_gpus = tf.config.experimental.list_logical_devices('GPU') print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs") except RuntimeError as e: # Memory growth must be set before GPUs have been initialized print(e) # Obtain the study to reproduce. study = reproduce.STUDIES[args.study] # Print the hyperparameter settings. if args.model_dir is None: study.print_model_config(args.model_num) else: print("Model directory (skipped training):") print("--") print(args.model_dir) print() study.print_postprocess_config() print() study.print_eval_config() if args.only_print: return # Set correct output directory. if args.output_directory is None: if args.model_dir is None: output_directory = os.path.join("output", "{study}", "{model_num}") else: output_directory = "output" else: output_directory = args.output_directory # Insert model number and study name into path if necessary. output_directory = output_directory.format(model_num=str(args.model_num), study=str(args.study)) # Model training (if model directory is not provided). if args.model_dir is None: model_bindings, model_config_file = study.get_model_config(args.model_num) print("Training model...") model_dir = os.path.join(output_directory, "model") model_bindings = [ "model.name = '{}'".format(os.path.basename(model_config_file)).replace( ".gin", ""), "model.model_num = {}".format(args.model_num), ] + model_bindings train.train_with_gin(model_dir, args.overwrite, [model_config_file], model_bindings) else: print("Skipped training...") model_dir = args.model_dir # We visualize reconstructions, samples and latent space traversals. visualize_dir = os.path.join(output_directory, "visualizations") visualize_model.visualize(model_dir, visualize_dir, args.overwrite) # We fix the random seed for the postprocessing and evaluation steps (each # config gets a different but reproducible seed derived from a master seed of # 0). The model seed was set via the gin bindings and configs of the study. random_state = np.random.RandomState(0) # We extract the different representations and save them to disk. postprocess_config_files = sorted(study.get_postprocess_config_files()) for config in postprocess_config_files: post_name = os.path.basename(config).replace(".gin", "") print("Extracting representation %s..." % post_name) post_dir = os.path.join(output_directory, "postprocessed", post_name) postprocess_bindings = [ "postprocess.random_seed = {}".format(random_state.randint(2**32)), "postprocess.name = '{}'".format(post_name) ] postprocess.postprocess_with_gin(model_dir, post_dir, args.overwrite, [config], postprocess_bindings) # Iterate through the disentanglement metrics. eval_configs = sorted(study.get_eval_config_files()) blacklist = ['downstream_task_logistic_regression.gin'] for config in postprocess_config_files: post_name = os.path.basename(config).replace(".gin", "") post_dir = os.path.join(output_directory, "postprocessed", post_name) # Now, we compute all the specified scores. for gin_eval_config in eval_configs: if os.path.basename(gin_eval_config) not in blacklist: metric_name = os.path.basename(gin_eval_config).replace(".gin", "") print("Computing metric '%s' on '%s'..." % (metric_name, post_name)) metric_dir = os.path.join(output_directory, "metrics", post_name, metric_name) eval_bindings = [ "evaluation.random_seed = {}".format(random_state.randint(2**32)), "evaluation.name = '{}'".format(metric_name) ] evaluate.evaluate_with_gin(post_dir, metric_dir, args.overwrite, [gin_eval_config], eval_bindings)
# ------------------------------------------------------------------------------ # To compute disentanglement metrics, we require a representation function that # takes as input an image and that outputs a vector with the representation. # We extract the mean of the encoder from both models using the following code. if False: postprocess_gin_bindings = [ "postprocess.postprocess_fn = @mean_representation", "dataset.name='dummy_data'", "postprocess.random_seed = 0" ] for path in [path_vae]: representation_path = os.path.join(path, "representation") model_path = os.path.join(path, "model") # postprocess.postprocess_with_gin defines the standard extraction protocol. postprocess.postprocess_with_gin(model_path, representation_path, overwrite, gin_config_files=None, gin_bindings=postprocess_gin_bindings) # 4. Train a downstream task downstream_reconstruction_train_gin_bindings = [ "model.model = @downstream_decoder()", "model.batch_size = 64", "model.training_steps = 5", "model.random_seed = 0", "dataset.name='3dshapes_task_s1000'", "decoder_optimizer.optimizer_fn = @AdamOptimizer", "AdamOptimizer.beta1 = 0.9", "AdamOptimizer.beta2 = 0.999", "AdamOptimizer.epsilon = 1e-08", "AdamOptimizer.learning_rate = 0.0001", "AdamOptimizer.name = 'Adam'", "AdamOptimizer.use_locking = False", "decoder.decoder_fn = @deconv_decoder", "reconstruction_loss.loss_fn = @l2_loss" ]