Exemple #1
0
def generate_data(n=10000, d=11, data_type='Syn1', seed=1, output_dir='.'):
    """
    :param n: Number of samples
    :param d: input dimension
    :param data_type: the name of the syn dataset
    :param seed: random seed for numpy
    """

    np.random.seed(seed)

    # X generation
    X = np.random.randn(n, d)

    # Y generation
    if data_type in ['Syn1', 'Syn2', 'Syn3']:
        Y = Basic_Label_Generation(X, data_type)

    elif data_type in ['Syn4', 'Syn5', 'Syn6']:
        Y = Complex_Label_Generation(X, data_type)

    data = np.concatenate([X, np.expand_dims(Y, axis=1)], axis=1)
    output_dir = os.path.join(output_dir, '{}_{}'.format(data_type, str(d)))
    create_dir(output_dir)
    output_path = os.path.join(output_dir, 'data.csv')
    pd.DataFrame(data=data).to_csv(output_path, index=False)
    create_dataset_partitions(output_path)
def cross_validation(config,
                     model_handler,
                     score_config,
                     folds,
                     gpus_list,
                     grid_params,
                     seeds_arr,
                     output_dir='./grid_search'):
    create_dir(output_dir)

    if 'experiment_number' not in config:
        config['experiment_number'] = 0

    valid_list = []
    for seed in seeds_arr:
        all_test_scores = []
        all_val_scores = []
        all_test_experiments = []
        for k in folds:
            config['random_seed'] = seed
            data = read_train_val_test_folds(
                config['csv'],
                k,
                apply_standardization=config['apply_standardization'])

            all_results, is_valid = distributed_grid_search(
                gpus_list,
                model_handler,
                copy.deepcopy(config),
                grid_parmas=copy.deepcopy(grid_params),
                name='iter_{}_seed_{}_conf'.format(k, seed),
                train_args=model_handler_params(data, score_config),
                output_dir=output_dir)
            config['experiment_number'] += len(all_results)

            best_val_score, best_val_experiment, correspond_test_score = get_best_val_result(
                all_results, score_config['score_increases'])
            all_test_scores.append(correspond_test_score)
            all_val_scores.append(best_val_score)
            all_test_experiments.append(best_val_experiment)
            valid_list.append({'fold': k, 'seed': seed, 'is valid': is_valid})

        write_cv_results(all_test_scores,
                         all_val_scores,
                         all_test_experiments,
                         output_dir,
                         name='_{}'.format(str(seed)))

    for valid_desc in valid_list:
        print(valid_desc)
Exemple #3
0
def create_dataset_partitions(csv_path,
                              dataset_name=None,
                              train_proportion=0.7,
                              k_folds=5,
                              seed=1):
    print(csv_path)
    np.random.seed(seed=seed)
    output_dir = os.path.dirname(csv_path) + '/cross_validation'
    create_dir(output_dir)
    create_dir(output_dir + '/StratifiedKFold')
    create_dir(output_dir + '/seed_{}'.format(seed))

    df = pd.read_csv(csv_path)
    if dataset_name is not None:
        dataset_handler = DatasetHandler.get_dataset_handler(dataset_name)
        df = dataset_handler(df)
    df = df.sample(frac=1, random_state=seed).reset_index(drop=True)
    y = df.iloc[:, -1].values
    kf = StratifiedKFold(n_splits=k_folds, shuffle=True, random_state=seed)
    val_proportion = 1 - (1. / k_folds) - train_proportion
    for i, (train_val_idx, test_idx) in enumerate(kf.split(df, y)):
        df_i = df.copy()
        test_df = df_i.iloc[test_idx]
        train_val_df = df_i.iloc[train_val_idx]
        y_train_val = train_val_df.iloc[:, -1].values
        train_df, val_df, _, _ = train_test_split(
            train_val_df,
            y_train_val,
            random_state=seed,
            stratify=y_train_val,
            test_size=val_proportion / (val_proportion + train_proportion))

        fold_dir = os.path.join(output_dir, 'fold_{}'.format(i))
        create_dir(fold_dir)
        train_df.to_csv(os.path.join(fold_dir, 'train.csv'), index=False)
        val_df.to_csv(os.path.join(fold_dir, 'val.csv'), index=False)
        test_df.to_csv(os.path.join(fold_dir, 'test.csv'), index=False)
def create_experiment_directory(config,
                                copy_model=True,
                                return_sub_dirs=False):
    experiment_dir = config['experiments_dir'] + '/{}'.format(
        config['experiment_number'])
    weights_dir = os.path.join(experiment_dir, "weights")
    logs_dir = os.path.join(experiment_dir, "logs")
    create_dir(experiment_dir)
    create_dir(weights_dir)
    create_dir(logs_dir)
    write_config(config, experiment_dir + '/configuration.json')
    if copy_model:
        copyfile(
            config['models_dir'] +
            '/model{}.py'.format(config['model_number']),
            experiment_dir + '/model.py')

    if return_sub_dirs:
        return experiment_dir, weights_dir, logs_dir
    else:
        return experiment_dir
Exemple #5
0
    for ablation_conf in ablation_study_configs:
        model_number = ablation_conf['model_number']
        grid_params = ablation_conf['grid']
        experiment_name = ablation_conf['exp_name']

        for competition_name in [
                CompetitionsHandler.GESTURE_PHASE,
                CompetitionsHandler.EYE_MOVEMENTS, CompetitionsHandler.GAS
        ]:
            shared_config[
                'experiments_dir'] = '{}/{}Competitions/{}/experiments/{}'.format(
                    base_dir, model, competition_name, experiment_name)
            output_dir = os.path.join(shared_config['experiments_dir'],
                                      'grid_search')
            create_dir(shared_config['experiments_dir'])

            shared_config['model_number'] = model_number
            shared_config['model_name'] = model
            shared_config['competition_name'] = competition_name
            config, score_config = CompetitionsHandler.get_configs(
                competition_name)
            config = CompetitionsHandler.merge_configs(shared_config, config)
            cross_validation(config,
                             ModelHandler,
                             score_config,
                             folds=folds,
                             gpus_list=gpus_list,
                             grid_params=grid_params,
                             output_dir=output_dir,
                             seeds_arr=seeds_arr)
            'FCN_with_feature_selection': FCN_with_feature_selection_grid}

if __name__ == '__main__':
    base_dir = 'path/to/base/dir'
    model_type_arr = ['FCN', 'FCN_with_oracle_mask', 'FCN_with_feature_selection']
    syn_names = ['Syn1', 'Syn2', 'Syn3', 'Syn4', 'Syn5', 'Syn6']
    d_arr = [11, 50, 100, 150, 200, 250, 300]
    folds = [0, 1, 2, 3, 4]
    gpus_list = ['0']
    seeds_arr = [1, 2, 3]

    for syn in syn_names:
        for model_type in model_type_arr:
            for d in d_arr:
                experiment_name = 'exp_{}_model_{}_d_{}'.format(syn, model_type, d)
                config['experiments_dir'] = '{}/FeatureSelectionSynExp/experiments/{}'.format(base_dir, experiment_name)
                config['csv'] = 'data/FeatureSelectionSynExp/{}_{}/data.csv'.format(syn, d)
                config['input_dim'] = d
                config['mask'] = Ground_Truth_Mask_Generation(d, syn)
                config['model_type'] = model_type

                output_dir = os.path.join(config['experiments_dir'], 'grid_search')
                create_dir(config['experiments_dir'])

                cross_validation(config, ModelHandler, score_config,
                                 folds=folds,
                                 gpus_list=gpus_list,
                                 grid_params=grid_map[model_type],
                                 output_dir=output_dir,
                                 seeds_arr=seeds_arr)