예제 #1
0
def run_upload_helper(
        folder_to_search: str,
        pattern: str,
        default_upload_args: dict = None) -> Union[List[dict], None]:
    """
    Searches through a folder and uploads file that match pattern. Can extract upload dict key:value pairs from filenames.
    :param folder_to_search: abs or regular path to folder to search through e.g. 'C:/Users/bdyet/data/'
    :param pattern: only upload files that match this pattern. Key/Valye pairs to add to upload dict can be added with {keyname},
        e.g. if the file name is eeg_subid32.edf, then subid{subjectid} will match that file, and also add subjectid=32 to the upload dict.
        types (either int or str) of the values will be inferred if known (e.g. subjectid->int, studyid->str) or can be set subid{subjectid=str}
    :param default_upload_args: common key/value pairs to add to all the matched files upload dic, as a string.
        e.g. 'studyid=PSTIM otherkey=somevalue'
    :return: a list of sucessfully uploaded files
    """
    username = input('MednickDB Username: '******'%i files in folder, %i match pattern and are ready for upload' %
          (n_files, len(files_ready_to_upload)))
    print('Files are:')
    for idx, file_info in enumerate(files_ready_to_upload):
        print(idx, ':', file_info['filepath'])
        for key, value in file_info.items():
            if key is not 'filepath':
                print('   ', key, '=', value)
        ans = input('Is this correct? (y-yes, n-no, a-yes to all, q-quit)?')
        if ans == 'a':
            print('--uploading all files')
            return None
        elif ans == 'y':
            files_to_actually_upload.append(file_info)
            print('--upload this file')
            continue
        elif ans == 'n':
            print('--skipped this file')
            continue
        else:
            print('Quitting, no files uploaded')
            return None

    files_actually_uploaded = []
    for file in files_to_actually_upload:
        file_ = file.copy()
        with open(file_.pop('filepath'), 'rb') as uploaded_version:
            file_info = med_api.upload_file(fileobject=uploaded_version,
                                            fileformat=file_.pop('fileformat'),
                                            filetype=file_.pop('filetype'),
                                            studyid=file_.pop('studyid'),
                                            versionid=file_.pop('versionid'),
                                            **file_)  #all the other stuff
            files_actually_uploaded.append(file_info)

    return files_to_actually_upload, files_actually_uploaded
def test_usecase_5():
    # a)
    med_api = MednickAPI(server_address, '*****@*****.**',
                         'Pass1234')
    data_rows = med_api.get_data(
        query='studyid=TEST and data.memtesta.accuracy>=0.9',
        format='flat_dict')
    assert any([
        dict_issubset(data_row, pytest.usecase_3_row2)
        for data_row in data_rows
    ])
def test_get_specifiers():
    med_api = MednickAPI(server_address, '*****@*****.**',
                         'Pass1234')
    sids = med_api.get_unique_var_values('studyid', store='data')
    assert 'TEST' in sids

    vids = med_api.get_unique_var_values('versionid',
                                         studyid='TEST',
                                         store='data')
    assert vids == [1]

    sids = med_api.get_unique_var_values('subjectid',
                                         studyid='TEST',
                                         store='data')
    assert sids == [1, 2]

    vids = med_api.get_unique_var_values('visitid',
                                         studyid='TEST',
                                         store='data')
    assert vids == [1, 2]

    sids = med_api.get_unique_var_values('sessionid',
                                         studyid='TEST',
                                         store='data')
    assert sids == [1]

    filetypes = med_api.get_unique_var_values('filetype',
                                              studyid='TEST',
                                              store='data')
    assert set(filetypes) == {
        'sleep_eeg', 'sleep_scoring', 'demographics', 'memtesta'
    }
def test_usecase_3():

    # a)
    med_api = MednickAPI(server_address, '*****@*****.**', 'Pass1234')
    fid_for_manual_upload = med_api.extract_var(
        med_api.get_files(studyid='TEST'), '_id')[0]  # get a random fid
    data_post = {
        'studyid': 'TEST',
        'filetype': 'memtesta',
        'data': {
            'accuracy': 0.9
        },
        'versionid': 1,
        'subjectid': 2,
        'visitid': 1,
        'sessionid': 1
    }
    log = med_api.upload_data(**data_post, fid=fid_for_manual_upload)
    print('testcase3 log:')
    print(log)
    # b)
    time.sleep(5)  # Give db 5 seconds to update
    correct_filename_versions = [
        pytest.usecase_1_filename_version, pytest.usecase_2_filename_version
    ]
    filename_versions = med_api.extract_var(
        med_api.get_files(studyid='TEST', versionid=1), 'filename_version')
    assert all([
        fid in correct_filename_versions for fid in filename_versions
    ]), "Missing expected filename versions from two previous usecases"

    # c)
    time.sleep(5)  # Give db 5 seconds to update
    data_rows = med_api.get_data(studyid='TEST',
                                 versionid=1,
                                 format='flat_dict')
    correct_row_2 = pytest.usecase_2_row2.copy()
    correct_row_2.update({'memtesta.accuracy': 0.9, 'visitid': 1})
    pytest.usecase_3_row2 = correct_row_2
    correct_rows = [pytest.usecase_2_row1, correct_row_2]
    for correct_row in correct_rows:
        assert any(
            [dict_issubset(data_row, correct_row) for data_row in data_rows])
def test_usecase_1():
    """runs usecase one from the mednickdb_usecase document (fid=)"""
    #a)
    med_api = MednickAPI(server_address, '*****@*****.**', 'pass1234')
    file_info_post = {
        'fileformat': 'eeg',
        'studyid': 'TEST',
        'versionid': 1,
        'subjectid': 1,
        'visitid': 1,
        'sessionid': 1,
        'filetype': 'sleep_eeg',
    }
    file_data_real = file_info_post.copy()
    with open('testfiles/sleepfile1.edf', 'rb') as sleepfile:
        file_info_returned = med_api.upload_file(fileobject=sleepfile,
                                                 **file_info_post)

    with open('testfiles/sleepfile1.edf', 'rb') as sleepfile:
        downloaded_sleepfile = med_api.download_file(file_info_returned['_id'])
        assert (downloaded_sleepfile == sleepfile.read())

    # b)
    time.sleep(file_update_time)  # give db 5 seconds to update
    file_info_get = med_api.get_file_by_fid(file_info_returned['_id'])
    file_info_post.update({
        'filename': 'sleepfile1.edf',
        'filedir': 'uploads/TEST/1/1/1/1/sleep_eeg/'
    })
    assert dict_issubset(file_info_get, file_info_post)

    time.sleep(data_update_time -
               file_update_time)  # give db 5 seconds to update
    file_datas = med_api.get_data_from_single_file(
        filetype='sleep_eeg',
        fid=file_info_returned['_id'],
        format='flat_dict')
    file_data_real.pop('fileformat')
    file_data_real.pop('filetype')
    file_data_real.update({
        'sleep_eeg.eeg_nchan': 3,
        'sleep_eeg.eeg_sfreq': 128,
        'sleep_eeg.eeg_meas_date': 1041380737000,
        'sleep_eeg.eeg_ch_names': ['C3A2', 'C4A1', 'ECG']
    })  # add actual data in file. # TODO add all
    pytest.usecase_1_filedata = file_data_real
    pytest.usecase_1_filename_version = file_info_get['filename_version']

    assert (any([
        dict_issubset(file_data, file_data_real) for file_data in file_datas
    ])), "Is pyparse running? (and working)"
def mednickAPI_setup():
    return MednickAPI(user, password)
예제 #7
0
    return obj_out


if __name__ == '__main__':
    """
    Automatic parsing routine. Will pull from the database every 5 seconds and try to parse whatever is marked as unparsed
    If some error occurs, this is logged but not raised too, so that the regular db can continue as normal.
    TODO: we should probably alert an admin in this case (somehow, automatic email?)
    """
    parse_rate = 5  #seconds per DB query
    problem_files = []
    while True:  #Run indefinatly
        try:
            med_api = MednickAPI('http://saclab.ss.uci.edu:8000',
                                 'PyAutoParser',
                                 password='******')
            upload_kwargs = [
                k
                for k, v in signature(med_api.upload_data).parameters.items()
            ]
            file_infos = med_api.get_unparsed_files(previous_versions=False)
        except ConnectionError:
            continue  # retry connection

        if len(file_infos) > 0:
            print('Found', len(file_infos), 'unparsed files, beginning parse:')

            for file_info in file_infos:
                if file_info['filename'] in problem_files:
                    continue
def test_clear_test_study():
    """
    Clear all data and files with the studyid of "TEST". This esentually refreshes the database for new testing.
    """
    med_api = MednickAPI(server_address, '*****@*****.**',
                         'Pass1234')
    fids = med_api.extract_var(med_api.get_files(studyid='TEST'), '_id')
    if fids:
        for fid in fids:
            med_api.delete_file(fid, delete_all_versions=True)
            med_api.delete_data_from_single_file(fid)
        fids2 = med_api.extract_var(med_api.get_files(studyid='TEST'), '_id')
        assert fid not in fids2
        assert (fids2 == [])
        deleted_fids = med_api.extract_var(med_api.get_deleted_files(), '_id')
        assert all([dfid in deleted_fids for dfid in fids])
    med_api.delete_data(studyid='TEST')
    assert len(med_api.get_data(
        studyid='TEST',
        format='nested_dict')) == 0  #TODO after clearing up sourceid bug
def test_usecase_4():
    # a)
    med_api = MednickAPI(server_address, '*****@*****.**',
                         'Pass1234')

    # b) uploading some scorefiles
    file_info1_post = {
        'fileformat': 'sleep_scoring',
        'studyid': 'TEST',
        'versionid': 1,
        'subjectid': 2,
        'visitid': 1,
        'sessionid': 1,
        'filetype': 'sleep_scoring'
    }
    with open('testfiles/scorefile1.mat', 'rb') as scorefile1:
        fid1 = med_api.upload_file(scorefile1, **file_info1_post)

    file_info2_post = file_info1_post.copy()
    file_info2_post.update({'visitid': 2})
    with open('testfiles/scorefile2.mat', 'rb') as scorefile2:
        fid2 = med_api.upload_file(scorefile2, **file_info2_post)

    scorefile1_data = {
        'sleep_scoring.epochstage': [-1, -1, -1, 0, 0, 0, 0, 0, 0, 0],
        'sleep_scoring.epochoffset':
        [0, 30, 60, 90, 120, 150, 180, 210, 240, 270],
        'sleep_scoring.starttime': 1451635302000,
        'sleep_scoring.mins_in_0': 3.5,
        'sleep_scoring.mins_in_1': 0,
        'sleep_scoring.mins_in_2': 0,
        'sleep_scoring.mins_in_3': 0,
        'sleep_scoring.mins_in_4': 0,
        'sleep_scoring.sleep_efficiency': 0,
        'sleep_scoring.total_sleep_time': 0
    }
    scorefile2_data = {
        'sleep_scoring.epochstage': [0, 0, 1, 1, 2, 2, 3, 3, 2, 2],
        'sleep_scoring.epochoffset':
        [0, 30, 60, 90, 120, 150, 180, 210, 240, 270],
        'sleep_scoring.starttime': 1451635302000,
        'sleep_scoring.mins_in_0': 1,
        'sleep_scoring.mins_in_1': 1,
        'sleep_scoring.mins_in_2': 2,
        'sleep_scoring.mins_in_3': 1,
        'sleep_scoring.mins_in_4': 0,
        'sleep_scoring.sleep_efficiency': 0.8,
        'sleep_scoring.total_sleep_time': 4
    }

    # c)
    time.sleep(data_update_time)  # Give db 50 seconds to update
    data_rows = med_api.get_data(studyid='TEST',
                                 versionid=1,
                                 format='flat_dict')
    correct_row_1 = pytest.usecase_2_row1.copy()
    scorefile1_data.update(pytest.usecase_3_row2)
    correct_row_2 = scorefile1_data
    scorefile2_data.update(pytest.usecase_2_row2)
    correct_row_3 = scorefile2_data
    correct_rows = [correct_row_1, correct_row_2, correct_row_3]
    for correct_row in correct_rows:
        assert any(
            [dict_issubset(data_row, correct_row) for data_row in data_rows])

    pytest.usecase_4_row1 = correct_row_1
    pytest.usecase_4_row2 = correct_row_2
    pytest.usecase_4_row3 = correct_row_3
def test_usecase_2():
    # a)

    file_info_post = {
        'filetype': 'demographics',
        'fileformat': 'tabular',
        'studyid': 'TEST',
        'versionid': 1
    }

    med_api = MednickAPI(server_address, '*****@*****.**',
                         'Pass1234')
    with open('testfiles/TEST_Demographics.xlsx', 'rb') as demofile:
        # b)
        file_info = med_api.upload_file(fileobject=demofile, **file_info_post)
        fid = file_info['_id']
        downloaded_demo = med_api.download_file(fid)
        with open('testfiles/TEST_Demographics.xlsx', 'rb') as demofile:
            assert downloaded_demo == demofile.read()

    # c)
    time.sleep(file_update_time)  # Give file db 5 seconds to update
    file_info_post.update({
        'filename': 'TEST_Demographics.xlsx',
        'filedir': 'uploads/TEST/1/demographics/'
    })
    file_info_get = med_api.get_file_by_fid(fid)
    assert dict_issubset(file_info_get, file_info_post)

    # d)
    time.sleep(data_update_time -
               file_update_time)  # Give data db 50 seconds to update
    data_rows = med_api.get_data(studyid='TEST',
                                 versionid=1,
                                 format='flat_dict')
    correct_row1 = {
        'studyid': 'TEST',
        'versionid': 1,
        'subjectid': 1,
        'demographics.age': 23,
        'demographics.sex': 'F',
        'demographics.bmi': 23
    }
    correct_row1.update(pytest.usecase_1_filedata)
    correct_row2 = {
        'studyid': 'TEST',
        'versionid': 1,
        'subjectid': 2,
        'demographics.age': 19,
        'demographics.sex': 'M',
        'demographics.bmi': 20
    }
    correct_rows = [correct_row1, correct_row2]

    pytest.usecase_2_row1 = correct_row1
    pytest.usecase_2_row2 = correct_row2
    pytest.usecase_2_filename_version = file_info_get['filename_version']

    for correct_row in correct_rows:
        assert any([
            dict_issubset(data_row, correct_row) for data_row in data_rows
        ]), "demographics data downloaded does not match expected"

    # e)
    data_sleep_eeg = med_api.get_data(
        studyid='TEST', versionid=1, filetype='sleep_eeg')[
            0]  #FIXME will fail here until filetype is query-able
    assert dict_issubset(
        data_sleep_eeg, pytest.usecase_1_filedata
    ), "sleep data downloaded does not match what was uploaded in usecase 1"