def result_continue_learning(): events_simple = pd.read_csv(FILE_PATH_SIMPLE, sep="\t") part_1 = events_simple.head(CONTINUE_SPLIT_POINT) part_2 = events_simple.tail(len(events_simple) - CONTINUE_SPLIT_POINT) assert len(part_1) > 0 assert len(part_2) > 0 part_path_1 = os.path.join(TMP_PATH, "event_file_simple_1.tab.gz") part_path_2 = os.path.join(TMP_PATH, "event_file_simple_2.tab.gz") part_1.to_csv(part_path_1, header=True, index=None, sep='\t', columns=["cues", "outcomes"], compression='gzip') part_2.to_csv(part_path_2, header=True, index=None, sep='\t', columns=["cues", "outcomes"], compression='gzip') del events_simple, part_1, part_2 result_part = ndl.ndl(part_path_1, ALPHA, BETAS) result = ndl.ndl(part_path_2, ALPHA, BETAS, weights=result_part) return result
def test_exceptions(): with pytest.raises(ValueError) as e_info: ndl.ndl(FILE_PATH_SIMPLE, ALPHA, BETAS, method='threading', weights=1) assert e_info == 'weights need to be None or xarray.DataArray with method=threading' with pytest.raises(ValueError) as e_info: ndl.ndl(FILE_PATH_SIMPLE, ALPHA, BETAS, method='magic') assert e_info == 'method needs to be either "threading" or "openmp"' with pytest.raises(ValueError) as e_info: ndl.dict_ndl(FILE_PATH_SIMPLE, ALPHA, BETAS, weights=1) assert e_info == 'weights needs to be either defaultdict or None' with pytest.raises(ValueError) as e_info: ndl.dict_ndl(FILE_PATH_MULTIPLE_CUES, ALPHA, BETAS, remove_duplicates=None) assert e_info == 'cues or outcomes needs to be unique: cues "a a"; outcomes "A"; use remove_duplicates=True' with pytest.raises(ValueError) as e_info: ndl.ndl(FILE_PATH_SIMPLE, ALPHA, BETAS, method='threading', len_sublists=-1) assert e_info == "'len_sublists' must be larger then one" with pytest.raises(ValueError) as e_info: ndl.dict_ndl(FILE_PATH_SIMPLE, ALPHA, BETAS, make_data_array="magic") assert e_info == "make_data_array must be True or False" with pytest.raises(ValueError) as e_info: ndl.dict_ndl(FILE_PATH_SIMPLE, ALPHA, BETAS, remove_duplicates="magic") assert e_info == "remove_duplicates must be None, True or False" with pytest.raises(ValueError) as e_info: ndl.ndl(FILE_PATH_SIMPLE, ALPHA, BETAS, remove_duplicates="magic") assert e_info == "remove_duplicates must be None, True or False" with pytest.raises(FileNotFoundError, match="No such file or directory") as e_info: ndl.ndl(FILE_PATH_SIMPLE, ALPHA, BETAS, temporary_directory="./magic")
def test_ordering_of_temporary_event_files(result_dict_ndl): result_ndl = ndl.ndl(FILE_PATH_SIMPLE, ALPHA, BETAS, method='threading', events_per_temporary_file=2) unequal, unequal_ratio = compare_arrays(FILE_PATH_SIMPLE, result_dict_ndl, result_ndl) print('%.2f ratio unequal' % unequal_ratio) assert len(unequal) == 0
def test_multiple_cues_dict_ndl_vs_ndl_threading(): result_dict_ndl = ndl.dict_ndl(FILE_PATH_MULTIPLE_CUES, ALPHA, BETAS, remove_duplicates=True) result_ndl_threading = ndl.ndl(FILE_PATH_MULTIPLE_CUES, ALPHA, BETAS, remove_duplicates=True, method='threading') unequal, unequal_ratio = compare_arrays(FILE_PATH_MULTIPLE_CUES, result_dict_ndl, result_ndl_threading) print('%.2f ratio unequal' % unequal_ratio) assert len(unequal) == 0
def test_save_to_netcdf4(result_ndl_openmp): weights = result_ndl_openmp.copy() # avoids changing shared test data path = os.path.join(TMP_PATH, "weights.nc") weights.to_netcdf(path) weights_read = xr.open_dataarray(path) # does not preserves the order of the OrderedDict for key, value in weights.attrs.items(): assert value == weights_read.attrs[key] weights_continued = ndl.ndl(FILE_PATH_SIMPLE, ALPHA, BETAS, method='openmp', weights=weights) path_continued = os.path.join(TMP_PATH, "weights_continued.nc") weights_continued.to_netcdf(path_continued) weights_continued_read = xr.open_dataarray(path_continued) for key, value in weights_continued.attrs.items(): assert value == weights_continued_read.attrs[key]
def result_ndl_openmp(): return ndl.ndl(FILE_PATH_SIMPLE, ALPHA, BETAS, method='openmp')
def result_ndl_threading(): return ndl.ndl(FILE_PATH_SIMPLE, ALPHA, BETAS, method='threading')
def test_provide_temporary_directory(): with tempfile.TemporaryDirectory(dir=TMP_PATH) as temporary_directory: ndl.ndl(FILE_PATH_SIMPLE, ALPHA, BETAS, temporary_directory=temporary_directory)
acc_hist = [] precision_hist = [] recall_hist = [] f1score_hist = [] # valid val_acc_hist = [] val_precision_hist = [] val_recall_hist = [] val_f1score_hist = [] # Train ndl to get the weight matrix weights = ndl(events = filtered_events_train_path, alpha = params['lr'], betas = (1, 1), method = "openmp", weights = weights, number_of_threads = num_threads, remove_duplicates = True, temporary_directory = temp_dir, verbose = False) # Predicted outcomes from the activations y_train_pred = ev.predict_outcomes_NDL(events_path = filtered_events_train_path, weights = weights, chunksize = chunksize, num_threads = num_threads) y_valid_pred = ev.predict_outcomes_NDL(events_path = filtered_events_valid_path, weights = weights, chunksize = chunksize, num_threads = num_threads)
from pyndl import ndl tens_path = "/vol/tensusers/timzee/cgn/" weights = ndl.ndl(events=tens_path + 'ndl_comp-a_tri.tab.gz', alpha=0.1, betas=(0.1, 0.1), method='openmp', remove_duplicates=False) # weights.to_netcdf(tens_path + 'ifadv_ndl_weights.nc') # xarray_extras.csv.to_csv(weights, tens_path + 'ifadv_pyndl_weights.csv') weights.to_pandas().to_csv(tens_path + 'comp-a_pyndl_weights.csv')