Esempio n. 1
0
def cross_validate_accuracy_over_saved_results(path_to_results,
                                               stepsize,
                                               n_steps,
                                               nfolds=20,
                                               starting_fold=30):
    path_to_params = os.path.join(path_to_results, 'params.yaml')

    params = TransformParameterParser(path_to_params).parse_params()
    print(params)
    cur_params = deepcopy(params)

    #evauntually uncomment this leaving asis in order ot keep the same results as before to compare.
    set_random_seeds(params)
    data_input = DataInput(params['data_params'])
    te_accs = []
    pushed_gates_per_fold = []
    starting_gates_per_fold = []
    diffs_per_fold = []

    for fold in range(starting_fold):
        data_input.split_data()

    for fold in range(starting_fold, nfolds + starting_fold):
        print('Running fold %d' % fold)
        cur_params['save_dir'] = os.path.join(params['save_dir'],
                                              'run%d' % fold)
        data_input.split_data()
        best_tr_acc, starting_gate, best_gate = push_converged_boundaries_given_data_input_and_params(
            cur_params, data_input, stepsize, n_steps, path_to_params)

        model = DepthOneModel([[['D1', best_gate[0], best_gate[1]],
                                ['D2', best_gate[2], best_gate[3]]]],
                              params['model_params'])
        fit_classifier_params(
            model, data_input,
            params['train_params']['learning_rate_classifier'])
        te_acc = compute_te_acc(model, data_input)
        print('te acc for fold %d is %.3f' % (fold, te_acc))
        te_accs.append(te_acc)
        pushed_gates_per_fold.append(best_gate)
        starting_gates_per_fold.append(starting_gate)
        diffs_per_fold.append(get_diff_between_gates(starting_gate, best_gate))
        print('Diff: ', get_diff_between_gates(starting_gate, best_gate))

    print('Te accs:', te_accs)
    print('Diffs per fold:', diffs_per_fold)
    with open(
            os.path.join(path_to_results,
                         'expanded_boundaries_te_accs_per_fold.pkl'),
            'wb') as f:
        pickle.dump(te_accs, f)
    with open(
            os.path.join(path_to_results,
                         'expanded_boundaries_diffs_per_fold.pkl'), 'wb') as f:
        pickle.dump(diffs_per_fold, f)
    with open(
            os.path.join(path_to_results,
                         'expanded_boundaries_best_pushed_gates_per_fold.pkl'),
            'wb') as f:
        pickle.dump(pushed_gates_per_fold, f)
Esempio n. 2
0
def evaluate_validation_performance_different_sizes(path_to_params,
                                                    path_to_transformer,
                                                    size_grid,
                                                    n_runs_per_size):
    params = TransformParameterParser(path_to_params).parse_params()
    print(params)
    check_consistency_of_params(params)

    with open(path_to_transformer, 'rb') as f:
        data_transformer = pickle.load(f)

    metrics = init_metrics(size_grid, n_runs_per_size)
    trackers = {}
    # since I'm evaluating performance as a function of box size initialized in the discriminative region, I make umap
    # deterministic for each different run, and only vary the seed for splitting the data
    metrics_to_print = ['log_loss', 'acc', 'avg_pos_feat', 'avg_neg_feat']
    for i, size in enumerate(size_grid):
        trackers[i] = {}
        for j, run in enumerate(range(n_runs_per_size)):
            params['random_seed'] = run
            model, tracker, data_transformer = run_once_with_fixed_size(
                params, size, run, data_transformer)

            update_all_metrics_to_print(tracker, metrics, i, j)

            trackers[i][j] = tracker
    # add saving later
    print_all_average_metrics(metrics)
    with open(os.path.join(params['save_dir'], 'trackers_per_run.pkl'),
              'wb') as f:
        pickle.dump(trackers, f)
    # just to save the grid with the metrics
    metrics['size_grid'] = size_grid
    with open(os.path.join(params['save_dir'], 'metrics_dict.pkl'), 'wb') as f:
        pickle.dump(metrics, f)
def main(path_to_config, transformer_path):
    params = TransformParameterParser(path_to_config).parse_params()
    set_random_seeds(params)
    data_input = init_data_input(params, transformer_path)
    grid_x, grid_y, size_grid = get_small_grid()
    plotter = HeatMapPlotter(params, data_input, grid_x, grid_y, size_grid)
    fig, _ = plotter.plot_loss_heat_maps_and_data_density()
    fig.savefig('heatmaps.png')
def load_data_input(path_to_params):
    params = TransformParameterParser(path_to_params).parse_params()
    print(params)

    set_random_seeds(params)

    data_input = DataInput(params['data_params'])
    data_input.split_data()
    return data_input
def main(path_to_params):
    start_time = time.time()

    params = TransformParameterParser(path_to_params).parse_params()
    print(params)

    if not os.path.exists(params['save_dir']):
        os.makedirs(params['save_dir'])

    with open(os.path.join(params['save_dir'], 'params.pkl'), 'wb') as f:
        pickle.dump(params, f)

    data_input = DataInput(params['data_params'])
    data_input.split_data()

    data_transformer = DataTransformerFactory(
        params['transform_params'],
        params['random_seed']).manufacture_transformer()
    data_input.embed_data_and_fit_transformer(\
        data_transformer,
        params['transform_params']['cells_to_subsample'],
        params['transform_params']['num_cells_for_transformer'])
    data_input.save_transformer(params['save_dir'])
    data_input.normalize_data()
    init_gate_tree = init_plot_and_save_gates(data_input, params)

    model = initialize_model(params['model_params'], init_gate_tree)
    data_input.prepare_data_for_training()
    performance_tracker = run_train_model(model, params['train_params'],
                                          data_input)

    model_save_path = os.path.join(params['save_dir'], 'model.pkl')
    torch.save(model.state_dict(), model_save_path)

    tracker_save_path = os.path.join(params['save_dir'], 'tracker.pkl')
    with open(tracker_save_path, 'wb') as f:
        pickle.dump(performance_tracker, f)
    results_plotter = DataAndGatesPlotterDepthOne(
        model, np.concatenate(data_input.x_tr))
    #fig, axes = plt.subplots(params['gate_init_params']['n_clusters'], figsize=(1 * params['gate_init_params']['n_clusters'], 3 * params['gate_init_params']['n_clusters']))
    results_plotter.plot_data_with_gates(
        np.array(
            np.concatenate([
                data_input.y_tr[i] *
                torch.ones([data_input.x_tr[i].shape[0], 1])
                for i in range(len(data_input.x_tr))
            ])))

    plt.savefig(os.path.join(params['save_dir'], 'final_gates.png'))
    print('Complete main loop took %.4f seconds' % (time.time() - start_time))
def load_saved_results(path_to_params, ret_params_too=False):
    start_time = time.time()

    params = TransformParameterParser(path_to_params).parse_params()
    print(params)

    torch.manual_seed(params['random_seed'])
    np.random.seed(params['random_seed'])

    data_input = load_and_prepare_data_input(params)

    model = DepthOneModel([[['D1', 0, 0], ['D2', 0, 0]]],
                          params['model_params'])
    model.load_state_dict(
        torch.load(os.path.join(params['save_dir'], 'model.pkl')))
    if ret_params_too:
        return data_input, model, params
    return data_input, model
Esempio n. 7
0
def cross_validate_just_first_gate(path_to_params, n_splits=20):
    params = TransformParameterParser(path_to_params).parse_params()
    print(params)
    check_consistency_of_params(params)
    trackers_per_seed = []
    models_per_seed = []
    for split in range(n_splits):
        params['random_seed'] = split + 1
        tracker, model = single_run_single_gate(params)

        trackers_per_seed.append(tracker)
        models_per_seed.append(model)
    with open(os.path.join(params['save_dir'], 'trackers_per_seed.pkl'),
              'wb') as f:
        pickle.dump(trackers_per_seed, f)
    with open(os.path.join(params['save_dir'], 'models_per_seed.pkl'),
              'wb') as f:
        pickle.dump(models_per_seed, f)
Esempio n. 8
0
def cross_validate_reg_settings(path_to_params):

    start_time = time.time()

    params = TransformParameterParser(path_to_params).parse_params()
    print(params)
    # TODO
    check_consistency_of_params(params)

    feat_diffs = params['cross_validate']['feat_diffs']
    neg_box_regs = params['cross_validate']['neg_box_regs']
    n_runs = params['cross_validate']['n_runs']
    avg_te_losses = {}
    avg_te_losses['feat_diff_vals'] = feat_diffs
    avg_te_losses['neg_box_reg_vals'] = neg_box_regs
    for feat_diff in feat_diffs:
        avg_te_losses[feat_diff] = {}
        for neg_box_reg in neg_box_regs:
            print('feat diff %.3f, neg_box_reg %.3f' %
                  (feat_diff, neg_box_reg))
            params['model_params']['feature_diff_penalty'] = feat_diff
            params['model_params']['negative_box_penalty'] = neg_box_reg
            te_losses = []
            te_accs = []
            for i in range(n_runs):
                params['random_seed'] = i
                tracker = main(params)
                te_losses.append(
                    tracker.metrics['te_log_loss'][-1].detach().cpu().numpy())
                te_accs.append(tracker.metrics['te_acc'][-1])

            avg_te_losses[feat_diff][neg_box_reg] = {\
                'te_log_loss': np.mean(np.array(te_losses)),
                'te_acc': np.mean(np.array(te_accs))
            }

    with open(os.path.join(params['save_dir'], 'avg_te_losses.pkl'),
              'wb') as f:
        pickle.dump(avg_te_losses, f)
def load_saved_model_and_matching_data_input(path_to_params):
    def set_random_seeds(params):
        torch.manual_seed(params['random_seed'])
        np.random.seed(params['random_seed'])

    start_time = time.time()

    params = TransformParameterParser(path_to_params).parse_params()
    print(params)

    #evauntually uncomment this leaving asis in order ot keep the same results as before to compare.
    set_random_seeds(params)

    data_input = DataInput(params['data_params'])
    data_input.split_data()
    print('%d samples in the training data' % len(data_input.x_tr))

    with open(os.path.join(params['save_dir'], 'trackers.pkl'), 'rb') as f:
        trackers = pickle.load(f)

    with open(os.path.join(params['save_dir'], 'transformer.pkl'), 'rb') as f:
        umapper = pickle.load(f)
    # FOR DEBUGGING ONLY
    #params['transform_params']['cells_to_subsample'] = 10
    data_input.embed_data(\
        umapper,
        cells_to_subsample=params['transform_params']['cells_to_subsample'],
        use_labels_to_transform_data=params['transform_params']['use_labels_to_transform_data']
    )
    data_input.normalize_data()
    data_input.convert_all_data_to_tensors()

    model = DepthOneModel([[['D1', 0, 0], ['D2', 0, 0]]],
                          params['model_params'])
    model.load_state_dict(
        torch.load(os.path.join(params['save_dir'], 'model.pkl')))
    return params, model, data_input, umapper
Esempio n. 10
0
def main_with_path(path_to_params):
    params = TransformParameterParser(path_to_params).parse_params()
    check_consistency_of_params(params)
    print(params)
    main(params)
def cross_validate(path_to_params, n_runs, start_seed=0):
    start_time = time.time()

    params = TransformParameterParser(path_to_params).parse_params()
    print(params)
    check_consistency_of_params(params)

    #evauntually uncomment this leaving asis in order ot keep the same results as before to compare.
    set_random_seeds(params)

    if not os.path.exists(params['save_dir']):
        os.makedirs(params['save_dir'])

    with open(os.path.join(params['save_dir'], 'params.pkl'), 'wb') as f:
        pickle.dump(params, f)

    data_input = DataInput(params['data_params'])
    te_accs = []
    tr_accs = []
    # to get to the correct new split at start
    for i in range(start_seed):
        data_input.split_data()

    for run in range(start_seed, n_runs):
        if not os.path.exists(os.path.join(params['save_dir'], 'run%d' % run)):
            os.makedirs(os.path.join(params['save_dir'], 'run%d' % run))
        savepath = os.path.join(params['save_dir'], 'run%d' % run)
        data_input.split_data()
        print(data_input.idxs_tr)

        data_transformer = DataTransformerFactory(
            params['transform_params'],
            params['random_seed']).manufacture_transformer()

        data_input.embed_data_and_fit_transformer(\
            data_transformer,
            cells_to_subsample=params['transform_params']['cells_to_subsample'],
            num_cells_for_transformer=params['transform_params']['num_cells_for_transformer'],
            use_labels_to_transform_data=params['transform_params']['use_labels_to_transform_data']
        )
        data_input.save_transformer(savepath)
        data_input.normalize_data()
        unused_cluster_gate_inits = init_plot_and_save_gates(
            data_input, params)
        #everything below differs from the other main_UMAP
        data_input.convert_all_data_to_tensors()
        init_gate_tree, unused_cluster_gate_inits = get_next_gate_tree(
            unused_cluster_gate_inits, data_input, params, model=None)
        model = initialize_model(params['model_params'], [init_gate_tree])
        performance_tracker = run_train_model(model, params['train_params'],
                                              data_input)

        model_save_path = os.path.join(savepath, 'model.pkl')
        torch.save(model.state_dict(), model_save_path)

        tracker_save_path = os.path.join(savepath, 'tracker.pkl')
        with open(tracker_save_path, 'wb') as f:
            pickle.dump(performance_tracker, f)
        results_plotter = DataAndGatesPlotterDepthOne(
            model, np.concatenate(data_input.x_tr))
        #fig, axes = plt.subplots(params['gate_init_params']['n_clusters'], figsize=(1 * params['gate_init_params']['n_clusters'], 3 * params['gate_init_params']['n_clusters']))
        results_plotter.plot_data_with_gates(
            np.array(
                np.concatenate([
                    data_input.y_tr[i] *
                    torch.ones([data_input.x_tr[i].shape[0], 1])
                    for i in range(len(data_input.x_tr))
                ])))

        plt.savefig(os.path.join(savepath, 'final_gates.png'))

        with open(os.path.join(savepath, 'configs.pkl'), 'wb') as f:
            pickle.dump(params, f)

        print('Complete main loop for run %d took %.4f seconds' %
              (run, time.time() - start_time))
        start_time = time.time()
        print('Accuracy tr %.3f, te %.3f' %
              (performance_tracker.metrics['tr_acc'][-1],
               performance_tracker.metrics['te_acc'][-1]))
        te_accs.append(performance_tracker.metrics['te_acc'][-1])
        tr_accs.append(performance_tracker.metrics['tr_acc'][-1])
    tr_accs = np.array(tr_accs)
    te_accs = np.array(te_accs)
    print('Average tr acc: %.3f, te acc %.3f' %
          (np.mean(tr_accs), np.mean(te_accs)))
    print('Std dev tr acc: %.3f, te_acc %.3f' %
          (np.std(tr_accs), np.std(te_accs)))
def main(path_to_params):
    start_time = time.time()

    params = TransformParameterParser(path_to_params).parse_params()
    print(params)
    check_consistency_of_params(params)

    set_random_seeds(params)

    if not os.path.exists(params['save_dir']):
        os.makedirs(params['save_dir'])

    with open(os.path.join(params['save_dir'], 'params.pkl'), 'wb') as f:
        pickle.dump(params, f)

    data_input = DataInput(params['data_params'])
    data_input.split_data()

    data_transformer = DataTransformerFactory(
        params['transform_params'],
        params['random_seed']).manufacture_transformer()
    data_input.embed_data_and_fit_transformer(\
        data_transformer,
        params['transform_params']['cells_to_subsample'],
        params['transform_params']['num_cells_for_transformer']
    )
    data_input.save_transformer(params['save_dir'])
    data_input.normalize_data()
    #everything below differs from the other main_UMAP

    multi_gate_initializer = MultipleGateInitializerHeuristic(
        data_input, params['model_params']['node_type'],
        params['gate_init_multi_heuristic_params'])
    init_gate_tree = [multi_gate_initializer.init_next_gate()]

    model = initialize_model(params['model_params'], init_gate_tree)
    data_input.prepare_data_for_training()
    trackers_per_step = []
    num_gates = params['gate_init_multi_heuristic_params']['num_gates']
    for i in range(num_gates):
        performance_tracker = run_train_model(model, params['train_params'],
                                              data_input)
        multi_gate_initializer.gates = model.get_gates()
        if not (i == num_gates - 1):
            print(model.get_gates())
            next_gate = multi_gate_initializer.init_next_gate()
            if next_gate is None:
                print(
                    'There are no non-overlapping initializations left to try!'
                )
                break
            model.add_node(next_gate)

    model_save_path = os.path.join(params['save_dir'], 'model.pkl')
    torch.save(model.state_dict(), model_save_path)

    tracker_save_path = os.path.join(params['save_dir'], 'tracker.pkl')
    with open(tracker_save_path, 'wb') as f:
        pickle.dump(performance_tracker, f)
    results_plotter = DataAndGatesPlotterDepthOne(
        model, np.concatenate(data_input.x_tr))
    #fig, axes = plt.subplots(params['gate_init_params']['n_clusters'], figsize=(1 * params['gate_init_params']['n_clusters'], 3 * params['gate_init_params']['n_clusters']))
    results_plotter.plot_data_with_gates(
        np.array(
            np.concatenate([
                data_input.y_tr[i] *
                torch.ones([data_input.x_tr[i].shape[0], 1])
                for i in range(len(data_input.x_tr))
            ])))

    plt.savefig(os.path.join(params['save_dir'], 'final_gates.png'))
    print('Complete main loop took %.4f seconds' % (time.time() - start_time))
Esempio n. 13
0
def push_converged_boundaries(path_to_params, stepsize, n_steps):
    start_time = time.time()

    params = TransformParameterParser(path_to_params).parse_params()
    print(params)

    #evauntually uncomment this leaving asis in order ot keep the same results as before to compare.
    set_random_seeds(params)

    data_input = DataInput(params['data_params'])
    data_input.split_data()
    print('%d samples in the training data' % len(data_input.x_tr))

    with open(os.path.join(params['save_dir'], 'trackers.pkl'), 'rb') as f:
        trackers = pickle.load(f)

    with open(os.path.join(params['save_dir'], 'transformer.pkl'), 'rb') as f:
        umapper = pickle.load(f)
    # FOR DEBUGGING ONLY
    #params['transform_params']['cells_to_subsample'] = 10
    data_input.embed_data(\
        umapper,
        cells_to_subsample=params['transform_params']['cells_to_subsample'],
        use_labels_to_transform_data=params['transform_params']['use_labels_to_transform_data']
    )
    data_input.normalize_data()
    data_input.convert_all_data_to_tensors()

    model = DepthOneModel([[['D1', 0, 0], ['D2', 0, 0]]],
                          params['model_params'])
    model.load_state_dict(
        torch.load(os.path.join(params['save_dir'], 'model.pkl')))

    init_acc = trackers[0].metrics['tr_acc'][-1]
    cur_best_acc = init_acc
    starting_gate = model.get_gates()[0]
    cur_gate = copy.deepcopy(starting_gate)
    cur_best_gate = copy.deepcopy(cur_gate)
    print('Starting gate:', starting_gate)
    counter = 0
    for left_step in range(n_steps):
        cur_gate[0] = starting_gate[0] - left_step * stepsize
        for right_step in range(n_steps):
            cur_gate[1] = starting_gate[1] + right_step * stepsize
            for down_step in range(n_steps):
                cur_gate[2] = starting_gate[2] - down_step * stepsize
                for up_step in range(n_steps):
                    cur_gate[3] = starting_gate[3] + up_step * stepsize
                    model = DepthOneModel([[['D1', cur_gate[0], cur_gate[1]],
                                            ['D2', cur_gate[2], cur_gate[3]]]],
                                          params['model_params'])
                    fit_classifier_params(
                        model, data_input,
                        params['train_params']['learning_rate_classifier'])
                    #                    model.nodes = None
                    #                    model.init_nodes([[['D1', cur_gate[0], cur_gate[1]], ['D2', cur_gate[2], cur_gate[3]]]])
                    cur_acc = compute_tr_acc(model, data_input)
                    #cur_acc = performance_tracker.metrics['tr_acc'][-1]
                    counter += 1
                    print(counter)
                    print(cur_gate)
                    print(cur_acc)
                    if cur_acc > cur_best_acc:
                        cur_best_acc = cur_acc
                        cur_best_gate = copy.deepcopy(cur_gate)

    print('Final acc %.3f, Initial acc %.3f' % (cur_best_acc, init_acc))
    print('Init/final gates', starting_gate, cur_best_gate)
def main(path_to_params):
    params = TransformParameterParser(path_to_params).parse_params()
    check_consistency_of_params(params)
    model, _ = main_with_sample_level_labels(params)
    main_cell_level_labels(params, model)
Esempio n. 15
0
def main(path_to_params):
    start_time = time.time()

    params = TransformParameterParser(path_to_params).parse_params()
    print(params)
    check_consistency_of_params(params)

    #evauntually uncomment this leaving asis in order ot keep the same results as before to compare.
    set_random_seeds(params)

    if not os.path.exists(params['save_dir']):
        os.makedirs(params['save_dir'])

    with open(os.path.join(params['save_dir'], 'params.pkl'), 'wb') as f:
        pickle.dump(params, f)

    data_input = DataInput(params['data_params'])
    data_input.split_data()
    print('%d samples in the training data' % len(data_input.x_tr))
    data_transformer = DataTransformerFactory(
        params['transform_params'],
        params['random_seed']).manufacture_transformer()

    data_input.embed_data_and_fit_transformer(\
        data_transformer,
        cells_to_subsample=params['transform_params']['cells_to_subsample'],
        num_cells_for_transformer=params['transform_params']['num_cells_for_transformer'],
        use_labels_to_transform_data=params['transform_params']['use_labels_to_transform_data']
    )
    # can't pickle opentsne objects
    if not params['transform_params'] == 'tsne':
        data_input.save_transformer(params['save_dir'])
    data_input.normalize_data()

    potential_gates = get_all_potential_gates(data_input, params)
    data_input.convert_all_data_to_tensors()
    model = initialize_model(params['model_params'], potential_gates)

    if params['train_params']['fix_gates']:
        model.freeze_gate_params()
    tracker = run_train_model(\
        model, params['train_params'], data_input
    )

    #   if params['transform_params']['embed_dim'] == 3:
    #       unused_cluster_gate_inits = init_gates(data_input, params)
    #   else:
    #       unused_cluster_gate_inits = init_plot_and_save_gates(data_input, params)
    #   #everything below differs from the other main_UMAP
    #   data_input.convert_all_data_to_tensors()
    #   init_gate_tree, unused_cluster_gate_inits = get_next_gate_tree(unused_cluster_gate_inits, data_input, params, model=None)
    #   model = initialize_model(params['model_params'], [init_gate_tree])
    #   trackers_per_round = []
    #   num_gates_left = len(unused_cluster_gate_inits)
    #   #print(num_gates_left, 'asdfasdfasdfasdfasdfasdfas')
    #   for i in range(num_gates_left + 1):
    #       performance_tracker = run_train_model(model, params['train_params'], data_input)
    #       trackers_per_round.append(performance_tracker.get_named_tuple_rep())
    #       if i == params['train_params']['num_gates_to_learn'] - 1:
    #           break
    #       if not i == num_gates_left:
    #           next_gate_tree, unused_cluster_gate_inits = get_next_gate_tree(unused_cluster_gate_inits, data_input, params, model=model)
    #           model.add_node(next_gate_tree)

    model_save_path = os.path.join(params['save_dir'], 'model.pkl')
    torch.save(model.state_dict(), model_save_path)

    tracker_save_path = os.path.join(params['save_dir'], 'tracker.pkl')
    #    trackers_per_round = [tracker.get_named_tuple_rep() for tracker in trackers_per_round]
    with open(tracker_save_path, 'wb') as f:
        pickle.dump(tracker, f)
    if params['plot_umap_reflection']:
        # reflection is about x=.5 since the data is already in umap space here
        reflected_data = []
        for data in data_input.x_tr:
            data[:, 0] = 1 - data[:, 0]
            reflected_data.append(data)
        data_input.x_tr = reflected_data
        gate_tree = model.get_gate_tree()
        reflected_gates = []
        for gate in gate_tree:
            print(gate)
            #order switches since reflected over x=.5
            low_reflected = 1 - gate[0][2]
            high_reflected = 1 - gate[0][1]
            gate[0][1] = low_reflected
            gate[0][2] = high_reflected
            print(gate)

            reflected_gates.append(gate)
        model.init_nodes(reflected_gates)
        print(model.init_nodes)
        print(model.get_gates())
    results_plotter = DataAndGatesPlotterDepthOne(
        model, np.concatenate(data_input.x_tr))
    #fig, axes = plt.subplots(params['gate_init_params']['n_clusters'], figsize=(1 * params['gate_init_params']['n_clusters'], 3 * params['gate_init_params']['n_clusters']))

    if params['transform_params']['embed_dim'] == 2:
        results_plotter.plot_data_with_gates(
            np.array(
                np.concatenate([
                    data_input.y_tr[i] *
                    torch.ones([data_input.x_tr[i].shape[0], 1])
                    for i in range(len(data_input.x_tr))
                ])))
        plt.savefig(os.path.join(params['save_dir'], 'final_gates.png'))
    else:
        fig_pos, ax_pos, fig_neg, ax_neg = results_plotter.plot_data_with_gates(
            np.array(
                np.concatenate([
                    data_input.y_tr[i] *
                    torch.ones([data_input.x_tr[i].shape[0], 1])
                    for i in range(len(data_input.x_tr))
                ])))
        with open(os.path.join(params['save_dir'], 'final_gates_pos_3d.pkl'),
                  'wb') as f:
            pickle.dump(fig_pos, f)

        with open(os.path.join(params['save_dir'], 'final_gates_neg_3d.pkl'),
                  'wb') as f:
            pickle.dump(fig_neg, f)

    with open(os.path.join(params['save_dir'], 'configs.pkl'), 'wb') as f:
        pickle.dump(params, f)

    print('Learned weights:', model.linear.weight)
    print('Complete main loop took %.4f seconds' % (time.time() - start_time))
def main(path_to_params):
    start_time = time.time()

    params = TransformParameterParser(path_to_params).parse_params()
    print(params)
    check_consistency_of_params(params)

    #evauntually uncomment this leaving asis in order ot keep the same results as before to compare.
    set_random_seeds(params)

    if not os.path.exists(params['save_dir']):
        os.makedirs(params['save_dir'])

    with open(os.path.join(params['save_dir'], 'params.pkl'), 'wb') as f:
        pickle.dump(params, f)

    data_input = DataInput(params['data_params'])
    data_input.split_data()

    data_transformer = DataTransformerFactory(
        params['transform_params'],
        params['random_seed']).manufacture_transformer()

    data_input.embed_data_and_fit_transformer(\
        data_transformer,
        cells_to_subsample=params['transform_params']['cells_to_subsample'],
        num_cells_for_transformer=params['transform_params']['num_cells_for_transformer'],
        use_labels_to_transform_data=params['transform_params']['use_labels_to_transform_data']
    )
    data_input.save_transformer(params['save_dir'])
    data_input.normalize_data()
    unused_cluster_gate_inits = init_plot_and_save_gates(data_input, params)

    data_input.convert_all_data_to_tensors()

    init_gate_tree, unused_cluster_gate_inits = get_next_gate_tree(
        unused_cluster_gate_inits, data_input, params, model=None)
    model1 = initialize_model(params['model_params'], [init_gate_tree])

    performance_tracker1 = run_train_model(model1, params['train_params'],
                                           data_input)

    model1_save_path = os.path.join(params['save_dir'], 'model1.pkl')
    torch.save(model1.state_dict(), model1_save_path)

    tracker1_save_path = os.path.join(params['save_dir'], 'tracker1.pkl')
    with open(tracker1_save_path, 'wb') as f:
        pickle.dump(performance_tracker1, f)

    # now select the data inside the learned model1 gate and re-run umap
    data_input.filter_data_inside_first_model_gate(model1)
    unused_cluster_gate_inits = init_plot_and_save_gates(data_input, params)

    data_transformer = DataTransformerFactory(
        params['transform_params'],
        params['random_seed']).manufacture_transformer()

    data_input.embed_data_and_fit_transformer(\
        data_transformer,
        cells_to_subsample=params['transform_params']['cells_to_subsample'],
        num_cells_for_transformer=params['transform_params']['num_cells_for_transformer'],
        use_labels_to_transform_data=params['transform_params']['use_labels_to_transform_data']
    )
    data_input.save_transformer(params['save_dir'])
    data_input.convert_all_data_to_tensors()

    init_gate_tree, _ = get_next_gate_tree(unused_cluster_gate_inits,
                                           data_input,
                                           params,
                                           model=None)
    model2 = initialize_model(params['model_params'], [init_gate_tree])

    performance_tracker2 = run_train_model(model2, params['train_params'],
                                           data_input)

    model2_save_path = os.path.join(params['save_dir'], 'model2.pkl')
    torch.save(model2.state_dict(), model2_save_path)

    tracker2_save_path = os.path.join(params['save_dir'], 'tracker2.pkl')
    with open(tracker2_save_path, 'wb') as f:
        pickle.dump(performance_tracker2, f)

    results_plotter = DataAndGatesPlotterDepthOne(
        model2, np.concatenate(data_input.x_tr))
    #fig, axes = plt.subplots(params['gate_init_params']['n_clusters'], figsize=(1 * params['gate_init_params']['n_clusters'], 3 * params['gate_init_params']['n_clusters']))
    results_plotter.plot_data_with_gates(
        np.array(
            np.concatenate([
                data_input.y_tr[i] *
                torch.ones([data_input.x_tr[i].shape[0], 1])
                for i in range(len(data_input.x_tr))
            ])))

    plt.savefig(os.path.join(params['save_dir'], 'final_gates.png'))

    with open(os.path.join(params['save_dir'], 'configs.pkl'), 'wb') as f:
        pickle.dump(params, f)

    print('Complete main loop took %.4f seconds' % (time.time() - start_time))
Esempio n. 17
0
def main_interpolating_gates(path_to_config, path_to_saved_model):
    params = TransformParameterParser(path_to_config).parse_params()
    set_random_seeds(params)
    model_gates = load_model_gates(path_to_saved_model, params)
    data_input = init_data_input(params, transformer_path)
    make_all_plots_interpolated(model_gates, 0, params, data_input) 
Esempio n. 18
0
def main_varying_each_dim_independently(path_to_config, path_to_saved_model):
    params = TransformParameterParser(path_to_config).parse_params()
    set_random_seeds(params)
    model_gates = load_model_gates(path_to_saved_model, params)
    data_input = init_data_input(params, transformer_path)
    make_all_plots(model_gates, params, data_input)