def main():
    args = parse_args()

    fs_utils.create_dir_if_not_exists(args.out_dir)

    dataset_loader = get_test_dataset_loader(args.dataset)
    choose_subset_func = lambda arr: arr

    model_has_dropout = args.model_name.endswith('_dropout')

    layers_indices = choose_layers_indices(args.model_name)
    models_paths = get_models_paths(args.model_file_path)
    main.logger.info("PyTorch device detected: %s" % DEVICE)

    layers_out_activations = do_all_predictions_and_aggregate(models_paths,
                                                              lambda m: do_predictions_with_model(
                                                                  dataset_loader, m,
                                                                  model_has_dropout, layers_indices,
                                                                  args.feature_map_processing))

    for li, li_out_activations in layers_out_activations.items():
        out_file = "%s_%d_acts" % (args.feature_map_processing, li)
        if args.agg_mode == 'aggregate' or args.agg_mode == 'both':
            out_activations = np.hstack(tuple(li_out_activations))
            acts_to_save = choose_subset_func(out_activations)
            np.save(op.join(args.out_dir, "%s.npy" % out_file), acts_to_save)
        if args.agg_mode == 'dump_all' or args.agg_mode == 'both':
            out_dir = op.join(args.out_dir, out_file)
            fs_utils.create_dir_if_not_exists(out_dir)
            for i, i_model_acts in enumerate(li_out_activations):
                acts_to_save = choose_subset_func(i_model_acts)
                model_index = int(op.splitext(op.basename(models_paths[i]))[0].split('_')[-1])
                np.save(op.join(out_dir, "model_%d.npy" % model_index), acts_to_save)
def main():
    args = parse_args()
    fs_utils.create_dir_if_not_exists(args.out_dir)

    main.logger.info("Before data generation")
    if args.num_components:
        data_generator = DataGenerator(args.num_components)
        gen_data = data_generator.generate_samples(args.examples_num, args.data_dim)
        data = gen_data['data']
        if args.data_dim == 2:
            data_generator.plot_gen_data(gen_data, "Generated data, examples: %d, components: %d" %
                                         (data.shape[0], args.num_components), op.join(args.out_dir,
                                                                                       "%s.png" % args.init_method))
    else:
        data = DataGenerator(None).generate_samples(args.examples_num, args.data_dim)

    main.logger.info("Generated data of shape: %s" % str(data.shape))

    if args.tf_mode == 'non_shared':
        cgs_sampler = CollapsedGibbsSampler(args.init_method, max_clusters_num=args.init_clusters_num,
                                            out_dir=args.out_dir)
    else:
        cgs_sampler = CollapsedGibbsSampler(args.init_method, max_clusters_num=args.init_clusters_num,
                                            out_dir=args.out_dir, tf_shared=True)
    cgs_sampler.fit(1000, data)
def main():
    args = parse_args()
    fs_utils.create_dir_if_not_exists(args.out_dir)

    exp_paths = list_experiments_paths(args.experiment_root_dir)
    main.logger.info("Listed %d experiments paths" % len(exp_paths))

    copy_to_out_dir(exp_paths, args.out_dir)
def main():
    args = parse_args()

    if not args.num_features and not args.singular_values_thresh:
        raise ValueError(
            "Specify either --num_features or --singular_values_thresh")

    npy_array_files = list_npy_array_files(args.in_data_file)

    if len(npy_array_files) > 1:
        fs_utils.create_dir_if_not_exists(args.out_file_or_dir)

    process_array_files(npy_array_files, args.axis, args.num_features,
                        args.singular_values_thresh, args.out_file_or_dir)
def main():
    args = parse_args()
    train_config = fs_utils.read_json(
        op.join(op.dirname(args.vae_model_path), 'config.json'))

    input_shape = (args.how_many_per_cluster, train_config['ds']['image_size'],
                   train_config['ds']['image_size'], 3)
    vae_model_obj = train_vae.create_model(train_config, input_shape, False)

    if args.how_many_per_cluster and args.grid_size:
        raise ValueError(
            "Specify either --how_many_per_cluster or --grid_size")

    if args.grid_size and (args.mode
                           not in {'joint_mixture', 'factorial_mixture'}):
        raise ValueError(
            "Specify --grid_size only if mode is 'joint_mixture' or 'factorial_mixture'"
        )

    if (args.mode.startswith('joint')
            or args.mode == 'factorial') and args.unobs_dims_num:
        raise ValueError(
            "--unobs_dims_num option can be given only if mode is [conditional|marginal]"
        )

    if args.mode == 'conditional' or args.mode == 'marginal':
        fs_utils.create_dir_if_not_exists(args.out_vis_path)
        unobs_dims_num = args.unobs_dims_num if args.unobs_dims_num else 1
    else:
        unobs_dims_num = None

    if args.mode.startswith('joint'):
        sampler = latent_space_sampler.LatentSpaceSampler(args.trace_pkl_path)
    else:
        sampler = conditional_latent_space_sampler.ConditionalLatentSpaceSampler(
            args.trace_pkl_path)

    sample_generator = SampleGenerator(train_config, vae_model_obj, sampler,
                                       args.vae_model_path, args.mode,
                                       unobs_dims_num)
    clusters_limit = args.clusters_limit if args.clusters_limit else None
    if args.grid_size:
        grid_size = [int(x) for x in args.grid_size.split('x')]
    else:
        grid_size = None

    sample_generator.generate_samples_from_latent_space(
        args.how_many_per_cluster, grid_size, args.out_vis_path,
        clusters_limit)
def train_vae(args):
    if args.restore_model_path:
        train_args = fs_utils.read_json(
            op.join(op.dirname(args.restore_model_path), 'config.json'))
        train_args['restore_model_path'] = args.restore_model_path
        if 'reg_type' not in train_args and args.reg_type.startswith('mmd'):
            raise ValueError(
                "No reg_type in restored config, specified reg_type == %s" %
                args.reg_type)

        if 'delta' not in train_args:
            train_args.update({'delta': None})

        if 'arch' not in train_args:
            train_args.update({'arch': 'standard'})

        dataset = dataset_restore_func(train_args, args.ds_type)
    else:
        dataset = dataset_create_func(args)

        train_args = {
            'latent_dim': args.latent_dim,
            'beta': args.beta,
            'ds': dataset.settings(),
            'gc': args.gc if args.gc else None,
            'delta': args.delta if args.delta else None,
            'reg_type': args.reg_type,
            'arch': args.arch
        }

    dataset.batch_size = args.batch_size
    input_shape = (dataset.batch_size, dataset.img_size(), dataset.img_size(),
                   3)

    z_sigma_sq = 1.
    if train_args['reg_type'].startswith('mmd'):
        z_sigma_sq = 2.

    train_args.update({'z_sigma_sq': z_sigma_sq})

    vae_model_obj = create_model(train_args, input_shape, True)
    fs_utils.create_dir_if_not_exists(args.out_weights_dir)

    fs_utils.write_json(train_args, op.join(args.out_weights_dir,
                                            'config.json'))
    trainer = trainers.VaeTrainer(dataset, args.out_weights_dir, vae_model_obj,
                                  **train_args)

    trainer.train(args.epochs_num)
Ejemplo n.º 7
0
def main():
    args = parse_args()

    fs_utils.create_dir_if_not_exists(args.out_results_dir)

    if 'eigact' in args.in_results_root_dir:
        in_results_dirs = [args.in_results_root_dir]
    else:
        in_results_dirs = [
            op.join(args.in_results_root_dir, p)
            for p in os.listdir(args.in_results_root_dir)
            if op.isdir(op.join(args.in_results_root_dir, p))
        ]

    dims_dict = {}

    for in_dir in in_results_dirs:
        out_dir = op.join(args.out_results_dir,
                          "%s_results" % op.basename(in_dir))
        fs_utils.create_dir_if_not_exists(out_dir)

        main.logger.info("Processing %s" % in_dir)
        clustering_result_list = process_single_model_dir(in_dir)
        data_dim = clustering_result_list[-1]['dim']

        dims_dict[op.basename(in_dir)] = data_dim

        main.logger.info("Clusters num from last iteration: %d" %
                         clustering_result_list[-1]['clusters_num'])
        plot_ll_curves({'model': clustering_result_list},
                       op.join(out_dir, 'll_plot.png'))
        plot_ll_curves({'model': clustering_result_list},
                       op.join(out_dir, 'clusters_dynamics.png'),
                       key='clusters_num')

    if op.exists(op.join(args.in_results_root_dir, 'metadata.json')):
        metadata_dict = fs_utils.read_json(
            op.join(args.in_results_root_dir, 'metadata.json'))
        new_metadata_dict = {
            k: (metadata_dict[k], int(dims_dict[k]))
            for k in metadata_dict
        }
        fs_utils.write_json(new_metadata_dict,
                            op.join(args.out_results_dir, 'metadata.json'))
    else:
        main.logger.info("Metadata file does not exist in %s" %
                         args.in_results_root_dir)
def main():
    args = parse_args()
    models_results_dirs = list_models_dirs(args.models_results_root_dir)

    fs_utils.create_dir_if_not_exists(args.out_dir)

    main.logger.info("Fetched %d clustering results directories" % len(models_results_dirs))
    out_results_dict = {}

    for i, model_dir_path in enumerate(models_results_dirs):
        model_result_list = process_single_model_dir(model_dir_path)
        out_results_dict[op.basename(model_dir_path)] = model_result_list

        main.logger.info("Processed %d/%d model result dir: %s" % (i, len(models_results_dirs), model_dir_path))

    fs_utils.write_json(out_results_dict, op.join(args.out_dir, "results.json"))

    plot_clusters_num_hist(out_results_dict, op.join(args.out_dir, 'clusters_hist.png'))
    plot_ll_curves(out_results_dict, op.join(args.out_dir, 'll_plot.png'))
Ejemplo n.º 9
0
def main():
    args = parse_args()
    data = np.load(args.data_file)

    if isinstance(data, npyio.NpzFile):
        if not args.type:
            raise ValueError("Specify --type flag if the input is a NpzFile object")
        filters_data = data[args.type]
    else:
        filters_data = data
    main.logger.info("Loaded data from %s, shape: %s" % (op.abspath(args.data_file), str(filters_data.shape)))
    fs_utils.create_dir_if_not_exists(args.out_dir)

    np.random.shuffle(filters_data)

    shared = True if args.mode is 'shared' else False

    sampler = CollapsedGibbsSampler(init_strategy=args.init_type, max_clusters_num=args.max_clusters_num,
                                    tf_shared=shared, out_dir=args.out_dir,
                                    **{'skip_epochs_logging': args.skip_epochs_logging})

    sampler.fit(args.iterations_num, filters_data)
Ejemplo n.º 10
0
def main():
    args = parse_args()
    create_dir_if_not_exists(args.out_weights_dir)
    train_celeb_gan(args)
def main():
    args = parse_args()
    fs_utils.create_dir_if_not_exists(args.out_weights_dir)
    train_vae(args)