def main(): args = parse_args() fs_utils.create_dir_if_not_exists(args.out_dir) dataset_loader = get_test_dataset_loader(args.dataset) choose_subset_func = lambda arr: arr model_has_dropout = args.model_name.endswith('_dropout') layers_indices = choose_layers_indices(args.model_name) models_paths = get_models_paths(args.model_file_path) main.logger.info("PyTorch device detected: %s" % DEVICE) layers_out_activations = do_all_predictions_and_aggregate(models_paths, lambda m: do_predictions_with_model( dataset_loader, m, model_has_dropout, layers_indices, args.feature_map_processing)) for li, li_out_activations in layers_out_activations.items(): out_file = "%s_%d_acts" % (args.feature_map_processing, li) if args.agg_mode == 'aggregate' or args.agg_mode == 'both': out_activations = np.hstack(tuple(li_out_activations)) acts_to_save = choose_subset_func(out_activations) np.save(op.join(args.out_dir, "%s.npy" % out_file), acts_to_save) if args.agg_mode == 'dump_all' or args.agg_mode == 'both': out_dir = op.join(args.out_dir, out_file) fs_utils.create_dir_if_not_exists(out_dir) for i, i_model_acts in enumerate(li_out_activations): acts_to_save = choose_subset_func(i_model_acts) model_index = int(op.splitext(op.basename(models_paths[i]))[0].split('_')[-1]) np.save(op.join(out_dir, "model_%d.npy" % model_index), acts_to_save)
def main(): args = parse_args() fs_utils.create_dir_if_not_exists(args.out_dir) main.logger.info("Before data generation") if args.num_components: data_generator = DataGenerator(args.num_components) gen_data = data_generator.generate_samples(args.examples_num, args.data_dim) data = gen_data['data'] if args.data_dim == 2: data_generator.plot_gen_data(gen_data, "Generated data, examples: %d, components: %d" % (data.shape[0], args.num_components), op.join(args.out_dir, "%s.png" % args.init_method)) else: data = DataGenerator(None).generate_samples(args.examples_num, args.data_dim) main.logger.info("Generated data of shape: %s" % str(data.shape)) if args.tf_mode == 'non_shared': cgs_sampler = CollapsedGibbsSampler(args.init_method, max_clusters_num=args.init_clusters_num, out_dir=args.out_dir) else: cgs_sampler = CollapsedGibbsSampler(args.init_method, max_clusters_num=args.init_clusters_num, out_dir=args.out_dir, tf_shared=True) cgs_sampler.fit(1000, data)
def main(): args = parse_args() fs_utils.create_dir_if_not_exists(args.out_dir) exp_paths = list_experiments_paths(args.experiment_root_dir) main.logger.info("Listed %d experiments paths" % len(exp_paths)) copy_to_out_dir(exp_paths, args.out_dir)
def main(): args = parse_args() if not args.num_features and not args.singular_values_thresh: raise ValueError( "Specify either --num_features or --singular_values_thresh") npy_array_files = list_npy_array_files(args.in_data_file) if len(npy_array_files) > 1: fs_utils.create_dir_if_not_exists(args.out_file_or_dir) process_array_files(npy_array_files, args.axis, args.num_features, args.singular_values_thresh, args.out_file_or_dir)
def main(): args = parse_args() train_config = fs_utils.read_json( op.join(op.dirname(args.vae_model_path), 'config.json')) input_shape = (args.how_many_per_cluster, train_config['ds']['image_size'], train_config['ds']['image_size'], 3) vae_model_obj = train_vae.create_model(train_config, input_shape, False) if args.how_many_per_cluster and args.grid_size: raise ValueError( "Specify either --how_many_per_cluster or --grid_size") if args.grid_size and (args.mode not in {'joint_mixture', 'factorial_mixture'}): raise ValueError( "Specify --grid_size only if mode is 'joint_mixture' or 'factorial_mixture'" ) if (args.mode.startswith('joint') or args.mode == 'factorial') and args.unobs_dims_num: raise ValueError( "--unobs_dims_num option can be given only if mode is [conditional|marginal]" ) if args.mode == 'conditional' or args.mode == 'marginal': fs_utils.create_dir_if_not_exists(args.out_vis_path) unobs_dims_num = args.unobs_dims_num if args.unobs_dims_num else 1 else: unobs_dims_num = None if args.mode.startswith('joint'): sampler = latent_space_sampler.LatentSpaceSampler(args.trace_pkl_path) else: sampler = conditional_latent_space_sampler.ConditionalLatentSpaceSampler( args.trace_pkl_path) sample_generator = SampleGenerator(train_config, vae_model_obj, sampler, args.vae_model_path, args.mode, unobs_dims_num) clusters_limit = args.clusters_limit if args.clusters_limit else None if args.grid_size: grid_size = [int(x) for x in args.grid_size.split('x')] else: grid_size = None sample_generator.generate_samples_from_latent_space( args.how_many_per_cluster, grid_size, args.out_vis_path, clusters_limit)
def train_vae(args): if args.restore_model_path: train_args = fs_utils.read_json( op.join(op.dirname(args.restore_model_path), 'config.json')) train_args['restore_model_path'] = args.restore_model_path if 'reg_type' not in train_args and args.reg_type.startswith('mmd'): raise ValueError( "No reg_type in restored config, specified reg_type == %s" % args.reg_type) if 'delta' not in train_args: train_args.update({'delta': None}) if 'arch' not in train_args: train_args.update({'arch': 'standard'}) dataset = dataset_restore_func(train_args, args.ds_type) else: dataset = dataset_create_func(args) train_args = { 'latent_dim': args.latent_dim, 'beta': args.beta, 'ds': dataset.settings(), 'gc': args.gc if args.gc else None, 'delta': args.delta if args.delta else None, 'reg_type': args.reg_type, 'arch': args.arch } dataset.batch_size = args.batch_size input_shape = (dataset.batch_size, dataset.img_size(), dataset.img_size(), 3) z_sigma_sq = 1. if train_args['reg_type'].startswith('mmd'): z_sigma_sq = 2. train_args.update({'z_sigma_sq': z_sigma_sq}) vae_model_obj = create_model(train_args, input_shape, True) fs_utils.create_dir_if_not_exists(args.out_weights_dir) fs_utils.write_json(train_args, op.join(args.out_weights_dir, 'config.json')) trainer = trainers.VaeTrainer(dataset, args.out_weights_dir, vae_model_obj, **train_args) trainer.train(args.epochs_num)
def main(): args = parse_args() fs_utils.create_dir_if_not_exists(args.out_results_dir) if 'eigact' in args.in_results_root_dir: in_results_dirs = [args.in_results_root_dir] else: in_results_dirs = [ op.join(args.in_results_root_dir, p) for p in os.listdir(args.in_results_root_dir) if op.isdir(op.join(args.in_results_root_dir, p)) ] dims_dict = {} for in_dir in in_results_dirs: out_dir = op.join(args.out_results_dir, "%s_results" % op.basename(in_dir)) fs_utils.create_dir_if_not_exists(out_dir) main.logger.info("Processing %s" % in_dir) clustering_result_list = process_single_model_dir(in_dir) data_dim = clustering_result_list[-1]['dim'] dims_dict[op.basename(in_dir)] = data_dim main.logger.info("Clusters num from last iteration: %d" % clustering_result_list[-1]['clusters_num']) plot_ll_curves({'model': clustering_result_list}, op.join(out_dir, 'll_plot.png')) plot_ll_curves({'model': clustering_result_list}, op.join(out_dir, 'clusters_dynamics.png'), key='clusters_num') if op.exists(op.join(args.in_results_root_dir, 'metadata.json')): metadata_dict = fs_utils.read_json( op.join(args.in_results_root_dir, 'metadata.json')) new_metadata_dict = { k: (metadata_dict[k], int(dims_dict[k])) for k in metadata_dict } fs_utils.write_json(new_metadata_dict, op.join(args.out_results_dir, 'metadata.json')) else: main.logger.info("Metadata file does not exist in %s" % args.in_results_root_dir)
def main(): args = parse_args() models_results_dirs = list_models_dirs(args.models_results_root_dir) fs_utils.create_dir_if_not_exists(args.out_dir) main.logger.info("Fetched %d clustering results directories" % len(models_results_dirs)) out_results_dict = {} for i, model_dir_path in enumerate(models_results_dirs): model_result_list = process_single_model_dir(model_dir_path) out_results_dict[op.basename(model_dir_path)] = model_result_list main.logger.info("Processed %d/%d model result dir: %s" % (i, len(models_results_dirs), model_dir_path)) fs_utils.write_json(out_results_dict, op.join(args.out_dir, "results.json")) plot_clusters_num_hist(out_results_dict, op.join(args.out_dir, 'clusters_hist.png')) plot_ll_curves(out_results_dict, op.join(args.out_dir, 'll_plot.png'))
def main(): args = parse_args() data = np.load(args.data_file) if isinstance(data, npyio.NpzFile): if not args.type: raise ValueError("Specify --type flag if the input is a NpzFile object") filters_data = data[args.type] else: filters_data = data main.logger.info("Loaded data from %s, shape: %s" % (op.abspath(args.data_file), str(filters_data.shape))) fs_utils.create_dir_if_not_exists(args.out_dir) np.random.shuffle(filters_data) shared = True if args.mode is 'shared' else False sampler = CollapsedGibbsSampler(init_strategy=args.init_type, max_clusters_num=args.max_clusters_num, tf_shared=shared, out_dir=args.out_dir, **{'skip_epochs_logging': args.skip_epochs_logging}) sampler.fit(args.iterations_num, filters_data)
def main(): args = parse_args() create_dir_if_not_exists(args.out_weights_dir) train_celeb_gan(args)
def main(): args = parse_args() fs_utils.create_dir_if_not_exists(args.out_weights_dir) train_vae(args)