Esempio n. 1
0
def test_empty_X(tmpdir, fake_input_dir, output_dir, code_dir):
    # Configuration
    test_input_dir = os.path.join(str(tmpdir), 'test_input')
    test_output_dir = os.path.join(str(tmpdir), 'test_output')
    tests_log_path = os.path.join(code_dir, 'tests_scores_log.txt')
    shutil.copytree(fake_input_dir, test_input_dir)

    # Transformation
    train_files = glob_train_files(test_input_dir)
    adapt_files = glob_adapt_files(test_input_dir, 10000)

    for data_file in adapt_files + train_files:
        X, t = load_h5_input_file(data_file)
        # Corruption
        if np.random.rand() <= 0.1:
            X = np.array([])
        os.unlink(data_file)
        save_h5_input_file(data_file, X, t, output_dim=2)

    # Evaluation
    run_predict_on_test_dataset_and_output_score('test_empty_X_10%', code_dir,
                                                 fake_input_dir,
                                                 test_input_dir,
                                                 test_output_dir,
                                                 tests_log_path)
Esempio n. 2
0
def test_load_predict_matrixes_corrupted_t(fake_input_dir,
                                           tmpdir,
                                           hist_length=64 * 12 * 12):
    # Configuration
    test_input_dir = os.path.join(str(tmpdir), 'test_input')
    shutil.copytree(fake_input_dir, test_input_dir)

    # Transformation
    train_files = glob_train_files(test_input_dir)
    adapt_files = glob_adapt_files(test_input_dir, 10000)

    for data_file in adapt_files + train_files:
        X, t = load_h5_input_file(data_file)
        # Corruption
        t = corrupt_t(t)
        os.unlink(data_file)
        save_h5_input_file(data_file, X, t, output_dim=2)

    # Evaluation
    for eval_step in range(-1, 60, 5):
        original_X = load_data_matrix(fake_input_dir,
                                      eval_step=eval_step,
                                      hist_length=hist_length)
        X_with_corrupted_t = load_data_matrix(test_input_dir,
                                              eval_step=eval_step,
                                              hist_length=hist_length)
        np.testing.assert_array_equal(original_X, X_with_corrupted_t)
Esempio n. 3
0
def test_load_full_cache_matrixes_corrupted_t(fake_input_dir,
                                              tmpdir,
                                              hist_length=64 * 12 * 12):
    # Configuration
    test_input_dir = os.path.join(str(tmpdir), 'test_input')
    shutil.copytree(fake_input_dir, test_input_dir)

    # Transformation
    train_files = glob_train_files(test_input_dir)
    adapt_files = glob_adapt_files(test_input_dir, 10000)

    for data_file in adapt_files + train_files:
        X, t = load_h5_input_file(data_file)
        # Corruption
        t = corrupt_t(t)
        os.unlink(data_file)
        save_h5_input_file(data_file, X, t, output_dim=2)

    # Evaluation
    for eval_step in range(-1, 60, 5):
        original_X, original_t = load_full_cache_matrixes(
            fake_input_dir, eval_step=eval_step, steps_needed=hist_length)
        X_with_corrupted_t, corrupted_t = load_full_cache_matrixes(
            test_input_dir, eval_step=eval_step, steps_needed=hist_length)
        assert len(corrupted_t) == len(original_t)
        np.testing.assert_array_equal(original_X, X_with_corrupted_t)
        np.testing.assert_array_equal(original_t[~np.isnan(corrupted_t)],
                                      corrupted_t[~np.isnan(corrupted_t)])
        np.testing.assert_array_equal(original_t, fillna_t(corrupted_t))
Esempio n. 4
0
def test_t_diff_len_from_X(fake_input_dir, hist_length=16 * 12 * 12):
    data_file = os.path.join(fake_input_dir, 'adapt', 'X0.h5')
    X, t = load_h5_input_file(data_file)
    t = t[:-1]
    os.unlink(data_file)
    save_h5_input_file(data_file, X, t, output_dim=2)
    new_X, new_t = load_h5_input_file(data_file)
    assert new_X.shape[0] == new_t.shape[0]
    np.testing.assert_array_equal(X, new_X)
    np.testing.assert_array_equal(new_t, np.append(t, np.array([np.nan])))
Esempio n. 5
0
def test_read_ndim_2(fake_input_dir):
    train_files = glob_train_files(fake_input_dir)
    adapt_files = glob_adapt_files(fake_input_dir, 10000)

    for data_file in adapt_files + train_files:
        X, t = load_h5_input_file(data_file)
        print(X.shape)
        # Corruption
        os.unlink(data_file)
        save_h5_input_file(data_file, X, t, output_dim=2)
        X_copy, t_copy = load_h5_input_file(data_file)
        assert len(X_copy.shape) == 2
Esempio n. 6
0
def test_t_has_zeros(fake_input_dir, hist_length=16 * 12 * 12):
    data_file = os.path.join(fake_input_dir, 'adapt', 'X0.h5')
    X, t = load_h5_input_file(data_file)

    corrupted_t = t.copy()
    corrupted_t[np.random.choice(len(t), len(t) // 2, replace=False)] = 0

    os.unlink(data_file)
    save_h5_input_file(data_file, X, corrupted_t, output_dim=2)
    new_X, new_t = load_h5_input_file(data_file)

    assert new_X.shape[0] == new_t.shape[0]
    np.testing.assert_array_equal(X, new_X)
    corrupted_t_equivalent = corrupted_t.copy()
    corrupted_t_equivalent[corrupted_t_equivalent == 0] = np.nan
    np.testing.assert_array_equal(new_t, corrupted_t_equivalent)
    np.testing.assert_array_equal(t, fillna_t(new_t))
Esempio n. 7
0
def test_fill_timesteps_with_nan(tmpdir, fake_input_dir, output_dir, code_dir):
    # Configuration
    test_input_dir = os.path.join(str(tmpdir), 'test_input')
    test_output_dir = os.path.join(str(tmpdir), 'test_output')
    tests_log_path = os.path.join(code_dir, 'tests_scores_log.txt')
    shutil.copytree(fake_input_dir, test_input_dir)

    # Transformation
    adapt_files = glob_adapt_files(test_input_dir, 10000)

    for adapt_file in adapt_files:
        X, t = load_h5_input_file(adapt_file)
        # Corruption
        X, t = fill_with_nan(X, t, None)
        os.unlink(adapt_file)
        save_h5_input_file(adapt_file, X, t, output_dim=2)

    # Evaluation
    run_predict_on_test_dataset_and_output_score(
        'test_fill_timesteps_with_nan', code_dir, fake_input_dir,
        test_input_dir, test_output_dir, tests_log_path)
Esempio n. 8
0
def test_X_change_shape_no_predict(fake_input_dir,
                                   tmpdir,
                                   hist_length=64 * 12 * 12):
    # Transformation
    train_files = glob_train_files(fake_input_dir)
    adapt_files = glob_adapt_files(fake_input_dir, 10000)

    for data_file in adapt_files + train_files:
        X, t = load_h5_input_file(data_file)
        # Corruption
        if np.random.rand() <= 0.5:
            keep_size = X.shape[1] - 300
            X = X[:, :keep_size]
        os.unlink(data_file)
        save_h5_input_file(data_file, X, t, output_dim=2)

    # Evaluation
    cache_X = None
    cache_t = None
    for eval_step in range(-1, 30, 1):
        original_X, last_time_step, cache_X, cache_t = load_prediction_matrix(
            fake_input_dir, fake_input_dir, fake_input_dir, fake_input_dir,
            eval_step, eval_step - 1, hist_length, cache_X, cache_t)
Esempio n. 9
0
def test_sequential_load_corrupted_t(fake_input_dir,
                                     tmpdir,
                                     corrupt_fn_train,
                                     corrupt_fn_adapt,
                                     hist_length=64 * 12 * 12):
    # Configuration
    test_input_dir = os.path.join(str(tmpdir), 'test_input')
    shutil.copytree(fake_input_dir, test_input_dir)

    # Transformation
    train_files = glob_train_files(test_input_dir)
    adapt_files = glob_adapt_files(test_input_dir, 10000)

    for data_file in adapt_files + train_files:
        X, t = load_h5_input_file(data_file)
        # Corruption
        if data_file in train_files:
            X, t = corrupt_fn_train(X, t)
        else:
            X, t = corrupt_fn_adapt(X, t)
        os.unlink(data_file)
        save_h5_input_file(data_file, X, t, output_dim=2)

    # Evaluation
    cache_X = None
    cache_t = None
    cache_X_corrupted = None
    cache_t_corrupted = None
    for eval_step in range(-1, 30, 1):
        original_X, last_time_step, cache_X, cache_t = load_prediction_matrix(
            fake_input_dir, fake_input_dir, fake_input_dir, fake_input_dir,
            eval_step, eval_step - 1, hist_length, cache_X, cache_t)
        corrupted_X, last_time_step, cache_X_corrupted, cache_t_corrupted = load_prediction_matrix(
            test_input_dir, test_input_dir, test_input_dir, test_input_dir,
            eval_step, eval_step - 1, hist_length, cache_X_corrupted,
            cache_t_corrupted)
        np.testing.assert_array_equal(original_X, corrupted_X)
Esempio n. 10
0
def test_remove_half_lines(tmpdir, fake_input_dir, output_dir, code_dir):
    # Configuration
    test_input_dir = os.path.join(str(tmpdir), 'test_input')
    test_output_dir = os.path.join(str(tmpdir), 'test_output')
    tests_log_path = os.path.join(code_dir, 'tests_scores_log.txt')
    shutil.copytree(fake_input_dir, test_input_dir)

    # Transformation
    train_files = glob_train_files(test_input_dir)
    adapt_files = glob_adapt_files(test_input_dir, 10000)

    for data_file in adapt_files + train_files:
        X, t = load_h5_input_file(data_file)
        # Corruption
        X, t = remove_half_lines(X, t)
        os.unlink(data_file)
        save_h5_input_file(data_file, X, t, output_dim=2)

    # Evaluation
    run_predict_on_test_dataset_and_output_score('test_remove_half_lines',
                                                 code_dir, test_input_dir,
                                                 test_input_dir,
                                                 test_output_dir,
                                                 tests_log_path)