def test_noise_files(fake_input_dir): train_files = glob_train_files(fake_input_dir) adapt_files = glob_adapt_files(fake_input_dir, 10000) open(os.path.join(fake_input_dir, 'train/Xm1/', 'Xfake.h5'), 'w').close() open(os.path.join(fake_input_dir, 'train/', 'Xfake.h5'), 'w').close() open(os.path.join(fake_input_dir, 'adapt/', 'Xfake.h5'), 'w').close() new_train_files = glob_train_files(fake_input_dir) new_adapt_files = glob_adapt_files(fake_input_dir, 10000) assert len(train_files) == len(new_train_files) assert len(adapt_files) == len(new_adapt_files)
def test_X_change_shape(tmpdir, fake_input_dir, output_dir, code_dir): # Configuration test_input_dir = os.path.join(str(tmpdir), 'test_input') test_output_dir = os.path.join(str(tmpdir), 'test_output') tests_log_path = os.path.join(code_dir, 'tests_scores_log.txt') shutil.copytree(fake_input_dir, test_input_dir) # Transformation train_files = glob_train_files(test_input_dir) adapt_files = glob_adapt_files(test_input_dir, 10000) for data_file in adapt_files + train_files: X, t = load_h5_input_file(data_file) # Corruption if np.random.rand() <= 0.5: keep_size = X.shape[1] - 300 X = X[:, :keep_size] os.unlink(data_file) save_h5_input_file(data_file, X, t, output_dim=2) # Evaluation run_predict_on_test_dataset_and_output_score('test_X_change_shape', code_dir, fake_input_dir, test_input_dir, test_output_dir, tests_log_path, run_compute_score=False)
def test_empty_X(tmpdir, fake_input_dir, output_dir, code_dir): # Configuration test_input_dir = os.path.join(str(tmpdir), 'test_input') test_output_dir = os.path.join(str(tmpdir), 'test_output') tests_log_path = os.path.join(code_dir, 'tests_scores_log.txt') shutil.copytree(fake_input_dir, test_input_dir) # Transformation train_files = glob_train_files(test_input_dir) adapt_files = glob_adapt_files(test_input_dir, 10000) for data_file in adapt_files + train_files: X, t = load_h5_input_file(data_file) # Corruption if np.random.rand() <= 0.1: X = np.array([]) os.unlink(data_file) save_h5_input_file(data_file, X, t, output_dim=2) # Evaluation run_predict_on_test_dataset_and_output_score('test_empty_X_10%', code_dir, fake_input_dir, test_input_dir, test_output_dir, tests_log_path)
def test_no_in_h5(fake_input_dir, tmpdir, hist_length=64 * 12 * 12): # Configuration test_input_dir = os.path.join(str(tmpdir), 'test_input') shutil.copytree(fake_input_dir, test_input_dir) # Transformation train_files = glob_train_files(test_input_dir) adapt_files = glob_adapt_files(test_input_dir, 10000) for data_file in adapt_files + train_files: X, t = load_h5_input_file(data_file) os.unlink(data_file) with h5py.File(data_file, driver='core', mode='w') as h5file: group = h5file group = group.create_group('X') group = group.create_group('value') group_X = group.create_group('X') group_X.create_dataset('value', data=X) # Evaluation cache_X = None cache_t = None cache_X_corrupted = None cache_t_corrupted = None for eval_step in range(-1, 30, 1): original_X, last_time_step, cache_X, cache_t = load_prediction_matrix( fake_input_dir, fake_input_dir, fake_input_dir, fake_input_dir, eval_step, eval_step - 1, hist_length, cache_X, cache_t) corrupted_X, last_time_step_2, cache_X_corrupted, cache_t_corrupted = load_prediction_matrix( test_input_dir, test_input_dir, test_input_dir, test_input_dir, eval_step, eval_step - 1, hist_length, cache_X_corrupted, cache_t_corrupted) np.testing.assert_array_equal(original_X, corrupted_X)
def test_load_predict_matrixes_corrupted_t(fake_input_dir, tmpdir, hist_length=64 * 12 * 12): # Configuration test_input_dir = os.path.join(str(tmpdir), 'test_input') shutil.copytree(fake_input_dir, test_input_dir) # Transformation train_files = glob_train_files(test_input_dir) adapt_files = glob_adapt_files(test_input_dir, 10000) for data_file in adapt_files + train_files: X, t = load_h5_input_file(data_file) # Corruption t = corrupt_t(t) os.unlink(data_file) save_h5_input_file(data_file, X, t, output_dim=2) # Evaluation for eval_step in range(-1, 60, 5): original_X = load_data_matrix(fake_input_dir, eval_step=eval_step, hist_length=hist_length) X_with_corrupted_t = load_data_matrix(test_input_dir, eval_step=eval_step, hist_length=hist_length) np.testing.assert_array_equal(original_X, X_with_corrupted_t)
def test_load_full_cache_matrixes_corrupted_t(fake_input_dir, tmpdir, hist_length=64 * 12 * 12): # Configuration test_input_dir = os.path.join(str(tmpdir), 'test_input') shutil.copytree(fake_input_dir, test_input_dir) # Transformation train_files = glob_train_files(test_input_dir) adapt_files = glob_adapt_files(test_input_dir, 10000) for data_file in adapt_files + train_files: X, t = load_h5_input_file(data_file) # Corruption t = corrupt_t(t) os.unlink(data_file) save_h5_input_file(data_file, X, t, output_dim=2) # Evaluation for eval_step in range(-1, 60, 5): original_X, original_t = load_full_cache_matrixes( fake_input_dir, eval_step=eval_step, steps_needed=hist_length) X_with_corrupted_t, corrupted_t = load_full_cache_matrixes( test_input_dir, eval_step=eval_step, steps_needed=hist_length) assert len(corrupted_t) == len(original_t) np.testing.assert_array_equal(original_X, X_with_corrupted_t) np.testing.assert_array_equal(original_t[~np.isnan(corrupted_t)], corrupted_t[~np.isnan(corrupted_t)]) np.testing.assert_array_equal(original_t, fillna_t(corrupted_t))
def test_read_ndim_2(fake_input_dir): train_files = glob_train_files(fake_input_dir) adapt_files = glob_adapt_files(fake_input_dir, 10000) for data_file in adapt_files + train_files: X, t = load_h5_input_file(data_file) print(X.shape) # Corruption os.unlink(data_file) save_h5_input_file(data_file, X, t, output_dim=2) X_copy, t_copy = load_h5_input_file(data_file) assert len(X_copy.shape) == 2
def test_fill_timesteps_with_nan(tmpdir, fake_input_dir, output_dir, code_dir): # Configuration test_input_dir = os.path.join(str(tmpdir), 'test_input') test_output_dir = os.path.join(str(tmpdir), 'test_output') tests_log_path = os.path.join(code_dir, 'tests_scores_log.txt') shutil.copytree(fake_input_dir, test_input_dir) # Transformation adapt_files = glob_adapt_files(test_input_dir, 10000) for adapt_file in adapt_files: X, t = load_h5_input_file(adapt_file) # Corruption X, t = fill_with_nan(X, t, None) os.unlink(adapt_file) save_h5_input_file(adapt_file, X, t, output_dim=2) # Evaluation run_predict_on_test_dataset_and_output_score( 'test_fill_timesteps_with_nan', code_dir, fake_input_dir, test_input_dir, test_output_dir, tests_log_path)
def test_X_change_shape_no_predict(fake_input_dir, tmpdir, hist_length=64 * 12 * 12): # Transformation train_files = glob_train_files(fake_input_dir) adapt_files = glob_adapt_files(fake_input_dir, 10000) for data_file in adapt_files + train_files: X, t = load_h5_input_file(data_file) # Corruption if np.random.rand() <= 0.5: keep_size = X.shape[1] - 300 X = X[:, :keep_size] os.unlink(data_file) save_h5_input_file(data_file, X, t, output_dim=2) # Evaluation cache_X = None cache_t = None for eval_step in range(-1, 30, 1): original_X, last_time_step, cache_X, cache_t = load_prediction_matrix( fake_input_dir, fake_input_dir, fake_input_dir, fake_input_dir, eval_step, eval_step - 1, hist_length, cache_X, cache_t)
def test_sequential_load_corrupted_t(fake_input_dir, tmpdir, corrupt_fn_train, corrupt_fn_adapt, hist_length=64 * 12 * 12): # Configuration test_input_dir = os.path.join(str(tmpdir), 'test_input') shutil.copytree(fake_input_dir, test_input_dir) # Transformation train_files = glob_train_files(test_input_dir) adapt_files = glob_adapt_files(test_input_dir, 10000) for data_file in adapt_files + train_files: X, t = load_h5_input_file(data_file) # Corruption if data_file in train_files: X, t = corrupt_fn_train(X, t) else: X, t = corrupt_fn_adapt(X, t) os.unlink(data_file) save_h5_input_file(data_file, X, t, output_dim=2) # Evaluation cache_X = None cache_t = None cache_X_corrupted = None cache_t_corrupted = None for eval_step in range(-1, 30, 1): original_X, last_time_step, cache_X, cache_t = load_prediction_matrix( fake_input_dir, fake_input_dir, fake_input_dir, fake_input_dir, eval_step, eval_step - 1, hist_length, cache_X, cache_t) corrupted_X, last_time_step, cache_X_corrupted, cache_t_corrupted = load_prediction_matrix( test_input_dir, test_input_dir, test_input_dir, test_input_dir, eval_step, eval_step - 1, hist_length, cache_X_corrupted, cache_t_corrupted) np.testing.assert_array_equal(original_X, corrupted_X)
def test_remove_half_lines(tmpdir, fake_input_dir, output_dir, code_dir): # Configuration test_input_dir = os.path.join(str(tmpdir), 'test_input') test_output_dir = os.path.join(str(tmpdir), 'test_output') tests_log_path = os.path.join(code_dir, 'tests_scores_log.txt') shutil.copytree(fake_input_dir, test_input_dir) # Transformation train_files = glob_train_files(test_input_dir) adapt_files = glob_adapt_files(test_input_dir, 10000) for data_file in adapt_files + train_files: X, t = load_h5_input_file(data_file) # Corruption X, t = remove_half_lines(X, t) os.unlink(data_file) save_h5_input_file(data_file, X, t, output_dim=2) # Evaluation run_predict_on_test_dataset_and_output_score('test_remove_half_lines', code_dir, test_input_dir, test_input_dir, test_output_dir, tests_log_path)