def run_upload_helper( folder_to_search: str, pattern: str, default_upload_args: dict = None) -> Union[List[dict], None]: """ Searches through a folder and uploads file that match pattern. Can extract upload dict key:value pairs from filenames. :param folder_to_search: abs or regular path to folder to search through e.g. 'C:/Users/bdyet/data/' :param pattern: only upload files that match this pattern. Key/Valye pairs to add to upload dict can be added with {keyname}, e.g. if the file name is eeg_subid32.edf, then subid{subjectid} will match that file, and also add subjectid=32 to the upload dict. types (either int or str) of the values will be inferred if known (e.g. subjectid->int, studyid->str) or can be set subid{subjectid=str} :param default_upload_args: common key/value pairs to add to all the matched files upload dic, as a string. e.g. 'studyid=PSTIM otherkey=somevalue' :return: a list of sucessfully uploaded files """ username = input('MednickDB Username: '******'%i files in folder, %i match pattern and are ready for upload' % (n_files, len(files_ready_to_upload))) print('Files are:') for idx, file_info in enumerate(files_ready_to_upload): print(idx, ':', file_info['filepath']) for key, value in file_info.items(): if key is not 'filepath': print(' ', key, '=', value) ans = input('Is this correct? (y-yes, n-no, a-yes to all, q-quit)?') if ans == 'a': print('--uploading all files') return None elif ans == 'y': files_to_actually_upload.append(file_info) print('--upload this file') continue elif ans == 'n': print('--skipped this file') continue else: print('Quitting, no files uploaded') return None files_actually_uploaded = [] for file in files_to_actually_upload: file_ = file.copy() with open(file_.pop('filepath'), 'rb') as uploaded_version: file_info = med_api.upload_file(fileobject=uploaded_version, fileformat=file_.pop('fileformat'), filetype=file_.pop('filetype'), studyid=file_.pop('studyid'), versionid=file_.pop('versionid'), **file_) #all the other stuff files_actually_uploaded.append(file_info) return files_to_actually_upload, files_actually_uploaded
def test_usecase_5(): # a) med_api = MednickAPI(server_address, '*****@*****.**', 'Pass1234') data_rows = med_api.get_data( query='studyid=TEST and data.memtesta.accuracy>=0.9', format='flat_dict') assert any([ dict_issubset(data_row, pytest.usecase_3_row2) for data_row in data_rows ])
def test_get_specifiers(): med_api = MednickAPI(server_address, '*****@*****.**', 'Pass1234') sids = med_api.get_unique_var_values('studyid', store='data') assert 'TEST' in sids vids = med_api.get_unique_var_values('versionid', studyid='TEST', store='data') assert vids == [1] sids = med_api.get_unique_var_values('subjectid', studyid='TEST', store='data') assert sids == [1, 2] vids = med_api.get_unique_var_values('visitid', studyid='TEST', store='data') assert vids == [1, 2] sids = med_api.get_unique_var_values('sessionid', studyid='TEST', store='data') assert sids == [1] filetypes = med_api.get_unique_var_values('filetype', studyid='TEST', store='data') assert set(filetypes) == { 'sleep_eeg', 'sleep_scoring', 'demographics', 'memtesta' }
def test_usecase_3(): # a) med_api = MednickAPI(server_address, '*****@*****.**', 'Pass1234') fid_for_manual_upload = med_api.extract_var( med_api.get_files(studyid='TEST'), '_id')[0] # get a random fid data_post = { 'studyid': 'TEST', 'filetype': 'memtesta', 'data': { 'accuracy': 0.9 }, 'versionid': 1, 'subjectid': 2, 'visitid': 1, 'sessionid': 1 } log = med_api.upload_data(**data_post, fid=fid_for_manual_upload) print('testcase3 log:') print(log) # b) time.sleep(5) # Give db 5 seconds to update correct_filename_versions = [ pytest.usecase_1_filename_version, pytest.usecase_2_filename_version ] filename_versions = med_api.extract_var( med_api.get_files(studyid='TEST', versionid=1), 'filename_version') assert all([ fid in correct_filename_versions for fid in filename_versions ]), "Missing expected filename versions from two previous usecases" # c) time.sleep(5) # Give db 5 seconds to update data_rows = med_api.get_data(studyid='TEST', versionid=1, format='flat_dict') correct_row_2 = pytest.usecase_2_row2.copy() correct_row_2.update({'memtesta.accuracy': 0.9, 'visitid': 1}) pytest.usecase_3_row2 = correct_row_2 correct_rows = [pytest.usecase_2_row1, correct_row_2] for correct_row in correct_rows: assert any( [dict_issubset(data_row, correct_row) for data_row in data_rows])
def test_usecase_1(): """runs usecase one from the mednickdb_usecase document (fid=)""" #a) med_api = MednickAPI(server_address, '*****@*****.**', 'pass1234') file_info_post = { 'fileformat': 'eeg', 'studyid': 'TEST', 'versionid': 1, 'subjectid': 1, 'visitid': 1, 'sessionid': 1, 'filetype': 'sleep_eeg', } file_data_real = file_info_post.copy() with open('testfiles/sleepfile1.edf', 'rb') as sleepfile: file_info_returned = med_api.upload_file(fileobject=sleepfile, **file_info_post) with open('testfiles/sleepfile1.edf', 'rb') as sleepfile: downloaded_sleepfile = med_api.download_file(file_info_returned['_id']) assert (downloaded_sleepfile == sleepfile.read()) # b) time.sleep(file_update_time) # give db 5 seconds to update file_info_get = med_api.get_file_by_fid(file_info_returned['_id']) file_info_post.update({ 'filename': 'sleepfile1.edf', 'filedir': 'uploads/TEST/1/1/1/1/sleep_eeg/' }) assert dict_issubset(file_info_get, file_info_post) time.sleep(data_update_time - file_update_time) # give db 5 seconds to update file_datas = med_api.get_data_from_single_file( filetype='sleep_eeg', fid=file_info_returned['_id'], format='flat_dict') file_data_real.pop('fileformat') file_data_real.pop('filetype') file_data_real.update({ 'sleep_eeg.eeg_nchan': 3, 'sleep_eeg.eeg_sfreq': 128, 'sleep_eeg.eeg_meas_date': 1041380737000, 'sleep_eeg.eeg_ch_names': ['C3A2', 'C4A1', 'ECG'] }) # add actual data in file. # TODO add all pytest.usecase_1_filedata = file_data_real pytest.usecase_1_filename_version = file_info_get['filename_version'] assert (any([ dict_issubset(file_data, file_data_real) for file_data in file_datas ])), "Is pyparse running? (and working)"
def mednickAPI_setup(): return MednickAPI(user, password)
return obj_out if __name__ == '__main__': """ Automatic parsing routine. Will pull from the database every 5 seconds and try to parse whatever is marked as unparsed If some error occurs, this is logged but not raised too, so that the regular db can continue as normal. TODO: we should probably alert an admin in this case (somehow, automatic email?) """ parse_rate = 5 #seconds per DB query problem_files = [] while True: #Run indefinatly try: med_api = MednickAPI('http://saclab.ss.uci.edu:8000', 'PyAutoParser', password='******') upload_kwargs = [ k for k, v in signature(med_api.upload_data).parameters.items() ] file_infos = med_api.get_unparsed_files(previous_versions=False) except ConnectionError: continue # retry connection if len(file_infos) > 0: print('Found', len(file_infos), 'unparsed files, beginning parse:') for file_info in file_infos: if file_info['filename'] in problem_files: continue
def test_clear_test_study(): """ Clear all data and files with the studyid of "TEST". This esentually refreshes the database for new testing. """ med_api = MednickAPI(server_address, '*****@*****.**', 'Pass1234') fids = med_api.extract_var(med_api.get_files(studyid='TEST'), '_id') if fids: for fid in fids: med_api.delete_file(fid, delete_all_versions=True) med_api.delete_data_from_single_file(fid) fids2 = med_api.extract_var(med_api.get_files(studyid='TEST'), '_id') assert fid not in fids2 assert (fids2 == []) deleted_fids = med_api.extract_var(med_api.get_deleted_files(), '_id') assert all([dfid in deleted_fids for dfid in fids]) med_api.delete_data(studyid='TEST') assert len(med_api.get_data( studyid='TEST', format='nested_dict')) == 0 #TODO after clearing up sourceid bug
def test_usecase_4(): # a) med_api = MednickAPI(server_address, '*****@*****.**', 'Pass1234') # b) uploading some scorefiles file_info1_post = { 'fileformat': 'sleep_scoring', 'studyid': 'TEST', 'versionid': 1, 'subjectid': 2, 'visitid': 1, 'sessionid': 1, 'filetype': 'sleep_scoring' } with open('testfiles/scorefile1.mat', 'rb') as scorefile1: fid1 = med_api.upload_file(scorefile1, **file_info1_post) file_info2_post = file_info1_post.copy() file_info2_post.update({'visitid': 2}) with open('testfiles/scorefile2.mat', 'rb') as scorefile2: fid2 = med_api.upload_file(scorefile2, **file_info2_post) scorefile1_data = { 'sleep_scoring.epochstage': [-1, -1, -1, 0, 0, 0, 0, 0, 0, 0], 'sleep_scoring.epochoffset': [0, 30, 60, 90, 120, 150, 180, 210, 240, 270], 'sleep_scoring.starttime': 1451635302000, 'sleep_scoring.mins_in_0': 3.5, 'sleep_scoring.mins_in_1': 0, 'sleep_scoring.mins_in_2': 0, 'sleep_scoring.mins_in_3': 0, 'sleep_scoring.mins_in_4': 0, 'sleep_scoring.sleep_efficiency': 0, 'sleep_scoring.total_sleep_time': 0 } scorefile2_data = { 'sleep_scoring.epochstage': [0, 0, 1, 1, 2, 2, 3, 3, 2, 2], 'sleep_scoring.epochoffset': [0, 30, 60, 90, 120, 150, 180, 210, 240, 270], 'sleep_scoring.starttime': 1451635302000, 'sleep_scoring.mins_in_0': 1, 'sleep_scoring.mins_in_1': 1, 'sleep_scoring.mins_in_2': 2, 'sleep_scoring.mins_in_3': 1, 'sleep_scoring.mins_in_4': 0, 'sleep_scoring.sleep_efficiency': 0.8, 'sleep_scoring.total_sleep_time': 4 } # c) time.sleep(data_update_time) # Give db 50 seconds to update data_rows = med_api.get_data(studyid='TEST', versionid=1, format='flat_dict') correct_row_1 = pytest.usecase_2_row1.copy() scorefile1_data.update(pytest.usecase_3_row2) correct_row_2 = scorefile1_data scorefile2_data.update(pytest.usecase_2_row2) correct_row_3 = scorefile2_data correct_rows = [correct_row_1, correct_row_2, correct_row_3] for correct_row in correct_rows: assert any( [dict_issubset(data_row, correct_row) for data_row in data_rows]) pytest.usecase_4_row1 = correct_row_1 pytest.usecase_4_row2 = correct_row_2 pytest.usecase_4_row3 = correct_row_3
def test_usecase_2(): # a) file_info_post = { 'filetype': 'demographics', 'fileformat': 'tabular', 'studyid': 'TEST', 'versionid': 1 } med_api = MednickAPI(server_address, '*****@*****.**', 'Pass1234') with open('testfiles/TEST_Demographics.xlsx', 'rb') as demofile: # b) file_info = med_api.upload_file(fileobject=demofile, **file_info_post) fid = file_info['_id'] downloaded_demo = med_api.download_file(fid) with open('testfiles/TEST_Demographics.xlsx', 'rb') as demofile: assert downloaded_demo == demofile.read() # c) time.sleep(file_update_time) # Give file db 5 seconds to update file_info_post.update({ 'filename': 'TEST_Demographics.xlsx', 'filedir': 'uploads/TEST/1/demographics/' }) file_info_get = med_api.get_file_by_fid(fid) assert dict_issubset(file_info_get, file_info_post) # d) time.sleep(data_update_time - file_update_time) # Give data db 50 seconds to update data_rows = med_api.get_data(studyid='TEST', versionid=1, format='flat_dict') correct_row1 = { 'studyid': 'TEST', 'versionid': 1, 'subjectid': 1, 'demographics.age': 23, 'demographics.sex': 'F', 'demographics.bmi': 23 } correct_row1.update(pytest.usecase_1_filedata) correct_row2 = { 'studyid': 'TEST', 'versionid': 1, 'subjectid': 2, 'demographics.age': 19, 'demographics.sex': 'M', 'demographics.bmi': 20 } correct_rows = [correct_row1, correct_row2] pytest.usecase_2_row1 = correct_row1 pytest.usecase_2_row2 = correct_row2 pytest.usecase_2_filename_version = file_info_get['filename_version'] for correct_row in correct_rows: assert any([ dict_issubset(data_row, correct_row) for data_row in data_rows ]), "demographics data downloaded does not match expected" # e) data_sleep_eeg = med_api.get_data( studyid='TEST', versionid=1, filetype='sleep_eeg')[ 0] #FIXME will fail here until filetype is query-able assert dict_issubset( data_sleep_eeg, pytest.usecase_1_filedata ), "sleep data downloaded does not match what was uploaded in usecase 1"