def main():
    args = parse_args()

    beta_traces_dict = list_traces_for_betas(args.clustering_results_root_dir,
                                             args.init_iteration,
                                             args.interval)
    final_result = run_diagonality_calculate(beta_traces_dict)
    fs_utils.write_json(final_result, args.out_results_file)
def save_sum_thresh_to_metadata_file(sum_thresh, out_arr_file):
    metadata_file = op.join(op.dirname(out_arr_file), 'metadata.json')
    key_name = op.splitext(op.basename(out_arr_file))[0]
    if op.exists(metadata_file):
        existing_struct = fs_utils.read_json(metadata_file)
        existing_struct[key_name] = sum_thresh
    else:
        existing_struct = {key_name: sum_thresh}

    fs_utils.write_json(existing_struct, metadata_file)
def train_vae(args):
    if args.restore_model_path:
        train_args = fs_utils.read_json(
            op.join(op.dirname(args.restore_model_path), 'config.json'))
        train_args['restore_model_path'] = args.restore_model_path
        if 'reg_type' not in train_args and args.reg_type.startswith('mmd'):
            raise ValueError(
                "No reg_type in restored config, specified reg_type == %s" %
                args.reg_type)

        if 'delta' not in train_args:
            train_args.update({'delta': None})

        if 'arch' not in train_args:
            train_args.update({'arch': 'standard'})

        dataset = dataset_restore_func(train_args, args.ds_type)
    else:
        dataset = dataset_create_func(args)

        train_args = {
            'latent_dim': args.latent_dim,
            'beta': args.beta,
            'ds': dataset.settings(),
            'gc': args.gc if args.gc else None,
            'delta': args.delta if args.delta else None,
            'reg_type': args.reg_type,
            'arch': args.arch
        }

    dataset.batch_size = args.batch_size
    input_shape = (dataset.batch_size, dataset.img_size(), dataset.img_size(),
                   3)

    z_sigma_sq = 1.
    if train_args['reg_type'].startswith('mmd'):
        z_sigma_sq = 2.

    train_args.update({'z_sigma_sq': z_sigma_sq})

    vae_model_obj = create_model(train_args, input_shape, True)
    fs_utils.create_dir_if_not_exists(args.out_weights_dir)

    fs_utils.write_json(train_args, op.join(args.out_weights_dir,
                                            'config.json'))
    trainer = trainers.VaeTrainer(dataset, args.out_weights_dir, vae_model_obj,
                                  **train_args)

    trainer.train(args.epochs_num)
Ejemplo n.º 4
0
def main():
    args = parse_args()

    fs_utils.create_dir_if_not_exists(args.out_results_dir)

    if 'eigact' in args.in_results_root_dir:
        in_results_dirs = [args.in_results_root_dir]
    else:
        in_results_dirs = [
            op.join(args.in_results_root_dir, p)
            for p in os.listdir(args.in_results_root_dir)
            if op.isdir(op.join(args.in_results_root_dir, p))
        ]

    dims_dict = {}

    for in_dir in in_results_dirs:
        out_dir = op.join(args.out_results_dir,
                          "%s_results" % op.basename(in_dir))
        fs_utils.create_dir_if_not_exists(out_dir)

        main.logger.info("Processing %s" % in_dir)
        clustering_result_list = process_single_model_dir(in_dir)
        data_dim = clustering_result_list[-1]['dim']

        dims_dict[op.basename(in_dir)] = data_dim

        main.logger.info("Clusters num from last iteration: %d" %
                         clustering_result_list[-1]['clusters_num'])
        plot_ll_curves({'model': clustering_result_list},
                       op.join(out_dir, 'll_plot.png'))
        plot_ll_curves({'model': clustering_result_list},
                       op.join(out_dir, 'clusters_dynamics.png'),
                       key='clusters_num')

    if op.exists(op.join(args.in_results_root_dir, 'metadata.json')):
        metadata_dict = fs_utils.read_json(
            op.join(args.in_results_root_dir, 'metadata.json'))
        new_metadata_dict = {
            k: (metadata_dict[k], int(dims_dict[k]))
            for k in metadata_dict
        }
        fs_utils.write_json(new_metadata_dict,
                            op.join(args.out_results_dir, 'metadata.json'))
    else:
        main.logger.info("Metadata file does not exist in %s" %
                         args.in_results_root_dir)
Ejemplo n.º 5
0
def main():
    args = parse_args()
    net_clustering_counts_path = op.join(args.clustering_dirs_root, 'clustering_counts.json')
    net_clustering_counts_prefix = args.prefix + '_' if args.prefix is not None else ''

    if not op.exists(net_clustering_counts_path):
        main.logger.info("Computing cluster counts from trace...")
        net_clustering_dirs = [op.join(args.clustering_dirs_root, d) for d in ['true_labels_ld', 'true_labels_aug_ld',
                                                                               'random_labels_ld']]
        net_clustering_counts = {net_clustering_counts_prefix + op.basename(d): collect_clusters_num_info(d,
            args.start_iteration, args.interval) for d in net_clustering_dirs if op.exists(d)}

        fs_utils.write_json(net_clustering_counts, net_clustering_counts_path)
        main.logger.info("All cluster counts computed")
    else:
        main.logger.info("Cluster counts already exist, exiting...")
        exit(-1)
def main():
    args = parse_args()
    models_results_dirs = list_models_dirs(args.models_results_root_dir)

    fs_utils.create_dir_if_not_exists(args.out_dir)

    main.logger.info("Fetched %d clustering results directories" % len(models_results_dirs))
    out_results_dict = {}

    for i, model_dir_path in enumerate(models_results_dirs):
        model_result_list = process_single_model_dir(model_dir_path)
        out_results_dict[op.basename(model_dir_path)] = model_result_list

        main.logger.info("Processed %d/%d model result dir: %s" % (i, len(models_results_dirs), model_dir_path))

    fs_utils.write_json(out_results_dict, op.join(args.out_dir, "results.json"))

    plot_clusters_num_hist(out_results_dict, op.join(args.out_dir, 'clusters_hist.png'))
    plot_ll_curves(out_results_dict, op.join(args.out_dir, 'll_plot.png'))
def main():
    args = parse_args()
    beta_traces = calculate_diagonality_of_representation.list_traces_for_betas(
        args.clustering_results_root_dir, args.init_iteration, args.interval)

    if not op.exists(args.out_results_file):
        beta_relative_entropies = {
            beta:
            estimate_entropy_from_clustering.do_entropy_estimation_for_traces(
                beta_trace_paths, args.samples_num, 'relative')
            for beta, beta_trace_paths in beta_traces.items()
        }
    else:
        beta_relative_entropies = {
            float(k): v
            for k, v in fs_utils.read_json(args.out_results_file).items()
        }

    fs_utils.write_json(beta_relative_entropies, args.out_results_file)
def main():
    args = parse_args()
    entropy_file_name = 'entropy_relative.json' if args.entropy_type == 'relative' else 'entropy.json'
    net_entropy_vals_path = op.join(args.clustering_dirs_root, entropy_file_name)
    net_entropy_vals_prefix = args.prefix + '_' if args.prefix is not None else ''

    if not op.exists(net_entropy_vals_path):
        main.logger.info("Computing entropy estimates from traces...")
        net_clustering_dirs = [op.join(args.clustering_dirs_root, d) for d in ['true_labels_ld', 'true_labels_aug_ld',
                                                                               'random_labels_ld']]
        net_entropy_vals = {net_entropy_vals_prefix + op.basename(d):
                            collect_entropy_val_info(d, args.start_iteration, args.interval, args.samples_num,
                                                     args.entropy_type) for d in net_clustering_dirs if op.exists(d)}

        fs_utils.write_json(net_entropy_vals, net_entropy_vals_path)
        main.logger.info("All entropy estimates computed")
    else:
        main.logger.info("Entropy estimates already exist, exiting...")
        exit(-1)
def main():
    args = parse_args()
    if not op.exists(args.out_results_file):
        beta_traces = calculate_diagonality_of_representation.list_traces_for_betas(
            args.clustering_results_root_dir, args.init_iteration,
            args.interval)
        beta_cluster_counts = {
            beta: [
                len(fs_utils.read_pickle(p)['cluster_assignment'])
                for p in beta_paths
            ]
            for beta, beta_paths in beta_traces.items()
        }
    else:
        beta_cluster_counts = {
            float(k): v
            for k, v in fs_utils.read_json(args.out_results_file).items()
        }
    fs_utils.write_json(beta_cluster_counts, args.out_results_file)