def test_empty_X(tmpdir, fake_input_dir, output_dir, code_dir): # Configuration test_input_dir = os.path.join(str(tmpdir), 'test_input') test_output_dir = os.path.join(str(tmpdir), 'test_output') tests_log_path = os.path.join(code_dir, 'tests_scores_log.txt') shutil.copytree(fake_input_dir, test_input_dir) # Transformation train_files = glob_train_files(test_input_dir) adapt_files = glob_adapt_files(test_input_dir, 10000) for data_file in adapt_files + train_files: X, t = load_h5_input_file(data_file) # Corruption if np.random.rand() <= 0.1: X = np.array([]) os.unlink(data_file) save_h5_input_file(data_file, X, t, output_dim=2) # Evaluation run_predict_on_test_dataset_and_output_score('test_empty_X_10%', code_dir, fake_input_dir, test_input_dir, test_output_dir, tests_log_path)
def test_load_predict_matrixes_corrupted_t(fake_input_dir, tmpdir, hist_length=64 * 12 * 12): # Configuration test_input_dir = os.path.join(str(tmpdir), 'test_input') shutil.copytree(fake_input_dir, test_input_dir) # Transformation train_files = glob_train_files(test_input_dir) adapt_files = glob_adapt_files(test_input_dir, 10000) for data_file in adapt_files + train_files: X, t = load_h5_input_file(data_file) # Corruption t = corrupt_t(t) os.unlink(data_file) save_h5_input_file(data_file, X, t, output_dim=2) # Evaluation for eval_step in range(-1, 60, 5): original_X = load_data_matrix(fake_input_dir, eval_step=eval_step, hist_length=hist_length) X_with_corrupted_t = load_data_matrix(test_input_dir, eval_step=eval_step, hist_length=hist_length) np.testing.assert_array_equal(original_X, X_with_corrupted_t)
def test_load_full_cache_matrixes_corrupted_t(fake_input_dir, tmpdir, hist_length=64 * 12 * 12): # Configuration test_input_dir = os.path.join(str(tmpdir), 'test_input') shutil.copytree(fake_input_dir, test_input_dir) # Transformation train_files = glob_train_files(test_input_dir) adapt_files = glob_adapt_files(test_input_dir, 10000) for data_file in adapt_files + train_files: X, t = load_h5_input_file(data_file) # Corruption t = corrupt_t(t) os.unlink(data_file) save_h5_input_file(data_file, X, t, output_dim=2) # Evaluation for eval_step in range(-1, 60, 5): original_X, original_t = load_full_cache_matrixes( fake_input_dir, eval_step=eval_step, steps_needed=hist_length) X_with_corrupted_t, corrupted_t = load_full_cache_matrixes( test_input_dir, eval_step=eval_step, steps_needed=hist_length) assert len(corrupted_t) == len(original_t) np.testing.assert_array_equal(original_X, X_with_corrupted_t) np.testing.assert_array_equal(original_t[~np.isnan(corrupted_t)], corrupted_t[~np.isnan(corrupted_t)]) np.testing.assert_array_equal(original_t, fillna_t(corrupted_t))
def test_t_diff_len_from_X(fake_input_dir, hist_length=16 * 12 * 12): data_file = os.path.join(fake_input_dir, 'adapt', 'X0.h5') X, t = load_h5_input_file(data_file) t = t[:-1] os.unlink(data_file) save_h5_input_file(data_file, X, t, output_dim=2) new_X, new_t = load_h5_input_file(data_file) assert new_X.shape[0] == new_t.shape[0] np.testing.assert_array_equal(X, new_X) np.testing.assert_array_equal(new_t, np.append(t, np.array([np.nan])))
def test_read_ndim_2(fake_input_dir): train_files = glob_train_files(fake_input_dir) adapt_files = glob_adapt_files(fake_input_dir, 10000) for data_file in adapt_files + train_files: X, t = load_h5_input_file(data_file) print(X.shape) # Corruption os.unlink(data_file) save_h5_input_file(data_file, X, t, output_dim=2) X_copy, t_copy = load_h5_input_file(data_file) assert len(X_copy.shape) == 2
def test_t_has_zeros(fake_input_dir, hist_length=16 * 12 * 12): data_file = os.path.join(fake_input_dir, 'adapt', 'X0.h5') X, t = load_h5_input_file(data_file) corrupted_t = t.copy() corrupted_t[np.random.choice(len(t), len(t) // 2, replace=False)] = 0 os.unlink(data_file) save_h5_input_file(data_file, X, corrupted_t, output_dim=2) new_X, new_t = load_h5_input_file(data_file) assert new_X.shape[0] == new_t.shape[0] np.testing.assert_array_equal(X, new_X) corrupted_t_equivalent = corrupted_t.copy() corrupted_t_equivalent[corrupted_t_equivalent == 0] = np.nan np.testing.assert_array_equal(new_t, corrupted_t_equivalent) np.testing.assert_array_equal(t, fillna_t(new_t))
def test_fill_timesteps_with_nan(tmpdir, fake_input_dir, output_dir, code_dir): # Configuration test_input_dir = os.path.join(str(tmpdir), 'test_input') test_output_dir = os.path.join(str(tmpdir), 'test_output') tests_log_path = os.path.join(code_dir, 'tests_scores_log.txt') shutil.copytree(fake_input_dir, test_input_dir) # Transformation adapt_files = glob_adapt_files(test_input_dir, 10000) for adapt_file in adapt_files: X, t = load_h5_input_file(adapt_file) # Corruption X, t = fill_with_nan(X, t, None) os.unlink(adapt_file) save_h5_input_file(adapt_file, X, t, output_dim=2) # Evaluation run_predict_on_test_dataset_and_output_score( 'test_fill_timesteps_with_nan', code_dir, fake_input_dir, test_input_dir, test_output_dir, tests_log_path)
def test_X_change_shape_no_predict(fake_input_dir, tmpdir, hist_length=64 * 12 * 12): # Transformation train_files = glob_train_files(fake_input_dir) adapt_files = glob_adapt_files(fake_input_dir, 10000) for data_file in adapt_files + train_files: X, t = load_h5_input_file(data_file) # Corruption if np.random.rand() <= 0.5: keep_size = X.shape[1] - 300 X = X[:, :keep_size] os.unlink(data_file) save_h5_input_file(data_file, X, t, output_dim=2) # Evaluation cache_X = None cache_t = None for eval_step in range(-1, 30, 1): original_X, last_time_step, cache_X, cache_t = load_prediction_matrix( fake_input_dir, fake_input_dir, fake_input_dir, fake_input_dir, eval_step, eval_step - 1, hist_length, cache_X, cache_t)
def test_sequential_load_corrupted_t(fake_input_dir, tmpdir, corrupt_fn_train, corrupt_fn_adapt, hist_length=64 * 12 * 12): # Configuration test_input_dir = os.path.join(str(tmpdir), 'test_input') shutil.copytree(fake_input_dir, test_input_dir) # Transformation train_files = glob_train_files(test_input_dir) adapt_files = glob_adapt_files(test_input_dir, 10000) for data_file in adapt_files + train_files: X, t = load_h5_input_file(data_file) # Corruption if data_file in train_files: X, t = corrupt_fn_train(X, t) else: X, t = corrupt_fn_adapt(X, t) os.unlink(data_file) save_h5_input_file(data_file, X, t, output_dim=2) # Evaluation cache_X = None cache_t = None cache_X_corrupted = None cache_t_corrupted = None for eval_step in range(-1, 30, 1): original_X, last_time_step, cache_X, cache_t = load_prediction_matrix( fake_input_dir, fake_input_dir, fake_input_dir, fake_input_dir, eval_step, eval_step - 1, hist_length, cache_X, cache_t) corrupted_X, last_time_step, cache_X_corrupted, cache_t_corrupted = load_prediction_matrix( test_input_dir, test_input_dir, test_input_dir, test_input_dir, eval_step, eval_step - 1, hist_length, cache_X_corrupted, cache_t_corrupted) np.testing.assert_array_equal(original_X, corrupted_X)
def test_remove_half_lines(tmpdir, fake_input_dir, output_dir, code_dir): # Configuration test_input_dir = os.path.join(str(tmpdir), 'test_input') test_output_dir = os.path.join(str(tmpdir), 'test_output') tests_log_path = os.path.join(code_dir, 'tests_scores_log.txt') shutil.copytree(fake_input_dir, test_input_dir) # Transformation train_files = glob_train_files(test_input_dir) adapt_files = glob_adapt_files(test_input_dir, 10000) for data_file in adapt_files + train_files: X, t = load_h5_input_file(data_file) # Corruption X, t = remove_half_lines(X, t) os.unlink(data_file) save_h5_input_file(data_file, X, t, output_dim=2) # Evaluation run_predict_on_test_dataset_and_output_score('test_remove_half_lines', code_dir, test_input_dir, test_input_dir, test_output_dir, tests_log_path)