def test_retrieve_head(): test = mo.LcmsRun(name='foo') mo.store(test) old = len(mo.retrieve('lcmsrun', name='foo')) test.description = 'bar' mo.store(test) new = len(mo.retrieve('lcmsrun', name='foo')) assert new == old
def test_glob_query(): test1 = mo.LcmsRun(name='First') test2 = mo.LcmsRun(name='Second') test3 = mo.LcmsRun(name='Third') mo.store([test1, test2, test3]) items = mo.retrieve('lcmsrun', name='Fir%') assert items[-1].unique_id == test1.unique_id items = mo.retrieve('lcmsrun', name='%econd') assert items[-1].unique_id == test2.unique_id items = mo.retrieve('LcmsRuns', name='T%ir%') assert items[-1].unique_id == test3.unique_id
def test_remove_objects(): group = mo.Group(name='foo', items=[mo.Group(name='baz', description='hello')]) sub_id = group.items[0].unique_id mo.store(group) db = mo.retrieve('groups', unique_id=sub_id)[0] assert db.unique_id == sub_id mo.remove_objects(group, _override=True) test = mo.retrieve('groups', name='foo') assert not test test_sub = mo.retrieve('groups_items', target_id=sub_id) assert not test_sub
def test_remove_objects(): compound = mo.Compound(name='foo', MonoIsotopic_molecular_weight=1.0, reference_xrefs=[mo.ReferenceDatabase(name='baz')]) sub_id = compound.reference_xrefs[0].unique_id mo.store(compound) db = mo.retrieve('referencedatabase', unique_id=sub_id)[0] assert db.unique_id == sub_id mo.remove_objects(compound, _override=True) test = mo.retrieve('compound', name='foo') assert not test test_sub = mo.retrieve('compounds_reference_xrefs', target_id=sub_id) assert not test_sub
def my_handler(widget, content, buffers=None): if content['type'] == 'selection_change': row = content['rows'][0] # get the compounds in that atlas and display their content atlas_name = grid.df['Atlas Name'][row] atlas = metob.retrieve('Atlas', name=atlas_name, username="******") compound_vals_dict = dict() for i in compound_header: compound_vals_dict[i] = list() for x in atlas[0].compound_identifications: if x.compound: compound_vals_dict['Compound'].append(str(x.compound[0].name)) else: compound_vals_dict['Compound'].append(str(x.name)) compound_vals_dict['rt_max'].append(str(x.rt_references[0].rt_max)) compound_vals_dict['rt_min'].append(str(x.rt_references[0].rt_min)) compound_vals_dict['rt_peak'].append( str(x.rt_references[0].rt_peak)) compound_vals_dict['rt_units'].append( str(x.rt_references[0].rt_units)) compound_vals_dict['mz'].append(str(x.mz_references[0].mz)) compound_vals_dict['mz_tolerance'].append( str(x.mz_references[0].mz_tolerance)) compound_vals_dict['mz_tolerance_units'].append( str(x.mz_references[0].mz_tolerance_units)) compound_vals_dict['lcms_run'].append( str(x.rt_references[0].lcms_run)) grid2.df = pd.DataFrame.from_dict(compound_vals_dict) grid2.width = '100%'
def test_floating_point(): compound = mo.Compound(name='foo', mono_isotopic_molecular_weight=1.0) mo.store(compound) compound.mono_isotopic_molecular_weight = 1.000007 mo.store(compound) test = mo.retrieve('compound', name='foo')[-1] assert test.mono_isotopic_molecular_weight == 1.000007, test.mono_isotopic_molecular_weight
def get_neutral_inchi_and_name(use_pickle=True): import pickle if use_pickle: with open('metatlas_name.pickle', 'rb') as handle: metatlas_name = pickle.load(handle) with open('neutral_inchi.pickle', 'rb') as handle: neutral_inchi = pickle.load(handle) with open('neutral_mass.pickle', 'rb') as handle: neutral_mass = pickle.load(handle) else: c = metob.retrieve('Compound',inchi='InChI=%',username='******') neutral_inchi = [] metatlas_name = [] neutral_mass = [] for cc in c: myMol = Chem.MolFromInchi(cc.inchi.encode('utf-8')) myMol, neutralised = NeutraliseCharges(myMol) neutral_mass.append(Chem.Descriptors.ExactMolWt(myMol)) inchi = Chem.MolToInchi(myMol) neutral_inchi.append( inchi ) metatlas_name.append(cc.name) with open('metatlas_name.pickle', 'wb') as handle: pickle.dump(metatlas_name,handle) with open('neutral_inchi.pickle', 'wb') as handle: pickle.dump(neutral_inchi,handle) with open('neutral_inchi_key.pickle', 'wb') as handle: pickle.dump(neutral_inchi,handle) with open('neutral_mass.pickle', 'wb') as handle: pickle.dump(neutral_mass,handle) return metatlas_name,neutral_inchi, neutral_mass
def get_files_for_experiment(experiment_name): files = metob.retrieve('LcmsRun', username='******', experiment=experiment_name) flist = [] for f in files: flist.append(f.hdf5_file) flist = np.unique(flist) df = pd.DataFrame() for counter, f in enumerate(flist): df.loc[counter, 'file'] = os.path.basename(f) # del df['index'] df.set_index('file', drop=True, append=False, inplace=True) #df.reset_index(drop=True,inplace=True) options = qgrid.grid.defaults.grid_options options['defaultColumnWidth'] = 600 #mygrid = qgrid.show_grid(df, remote_js=True,grid_options = options) grid = qgrid.grid.QGridWidget(df=df, precision=6, grid_options=options, remote_js=True) def handle_msg(widget, content, buffers=None): if content['type'] == 'cell_change': obj = objects[content['row']] try: setattr(obj, content['column'], content['value']) except Exception: pass grid.on_msg(handle_msg) gui = widgets.Box([grid]) display(gui) return files
def mod_atlas_compound_RT_values(**kwargs): """ Parameters ---------- kwargs: dictionary that holds atlas (object or name), compound, rt_min, rt_max, and rt_peak Returns a modified atlas object ------- """ atlas = kwargs['atlas'] compound = kwargs['compound'] rt_min = kwargs['rt_min'] rt_max = kwargs['rt_max'] rt_peak = kwargs['rt_peak'] if isinstance(atlas, str): atlas = metob.retrieve('Atlas', name=atlas, username='******') num_compounds = len(atlas[0].compound_identifications) for x in range(num_compounds): cpd_name = atlas[0].compound_identifications[x].compound[0].name if compound == cpd_name: # adjust the rt_values atlas[0].compound_identifications[x].rt_references[ 0].rt_min = rt_min atlas[0].compound_identifications[x].rt_references[ 0].rt_max = rt_max atlas[0].compound_identifications[x].rt_references[ 0].rt_peak = rt_peak break return atlas
def get_files_for_experiment(experiment_name): files = metob.retrieve('LcmsRun',username='******',experiment=experiment_name) flist = [] for f in files: flist.append(f.hdf5_file) flist = np.unique(flist) df = pd.DataFrame() for counter,f in enumerate(flist): df.loc[counter,'file'] = os.path.basename(f) # del df['index'] df.set_index('file', drop=True, append=False, inplace=True) #df.reset_index(drop=True,inplace=True) options = qgrid.grid.defaults.grid_options options['defaultColumnWidth'] = 600 #mygrid = qgrid.show_grid(df, remote_js=True,grid_options = options) grid = qgrid.grid.QGridWidget(df=df, precision=6, grid_options=options, remote_js=True) def handle_msg(widget, content, buffers=None): if content['type'] == 'cell_change': obj = objects[content['row']] try: setattr(obj, content['column'], content['value']) except Exception: pass grid.on_msg(handle_msg) gui = widgets.Box([grid]) display(gui) return files
def test_store_all(): items = [] for klass in mo.WORKSPACE.subclass_lut.values(): items.append(klass()) mo.store(items) for klass in mo.WORKSPACE.subclass_lut.values(): name = klass.__name__ assert len(mo.retrieve(name))
def test_get_latest(): test = mo.Compound(name='hello') mo.store(test) test.name = 'goodbye' mo.store(test) test = mo.retrieve('compound', creation_time=test.creation_time) assert len(test) == 1, len(test) assert test[0].name == 'goodbye'
def test_simple_query(): test1 = mo.LcmsRun(name='First') first_version = test1.unique_id test1.description = "Hey there" mo.store(test1) assert test1.unique_id == first_version items = mo.retrieve('lcmsrun', name='First') assert items[-1].unique_id == test1.unique_id assert all([i.unique_id != first_version for i in items[:-1]])
def test_unique_links(): test = mo.Group(items=[mo.Group(items=[mo.LcmsRun()]), mo.LcmsRun()]) sub_version = test.items[1].unique_id test.items = [test.items[1]] mo.store(test) test.items = [] test = mo.retrieve('group', unique_id=test.unique_id)[0] assert len(test.items) == 1, len(test.items) assert test.items[0].unique_id == sub_version
def test_preserve_provenance(): test = mo.Group(items=[mo.Group(items=[mo.LcmsRun()]), mo.LcmsRun()]) test2 = mo.Group(items=[mo.Group(items=[mo.LcmsRun()]), mo.LcmsRun()]) mo.store([test, test2]) assert len(test.items) == 2 test.items = [] test2.items = [] mo.store([test, test2]) assert len(test.items) == 0 previous = mo.retrieve('group', unique_id=test.prev_uid)[0] assert len(previous.items) == 2, repr(previous)
def test_circular_reference(): test = mo.Group(items=[mo.Group(items=[mo.LcmsRun()]), mo.LcmsRun()]) orig_id = test.unique_id test.items[0].items.append(test) mo.store(test) test.items = [] test = mo.retrieve('group', unique_id=test.unique_id)[0] sub0 = test.items[0] assert len(sub0.items) == 2, sub0.items assert sub0.items[1].unique_id == orig_id assert test.unique_id == orig_id
def test_preserve_provenance(): test = mo.Group(items=[mo.Group(items=[mo.LcmsRun()]), mo.LcmsRun()]) test2 = mo.Group(items=[mo.Group(items=[mo.LcmsRun()]), mo.LcmsRun()]) mo.store([test, test2]) assert len(test.items) == 2 test.items = [] test2.items = [] mo.store([test, test2]) assert len(test.items) == 0 print(test.unique_id) previous = mo.retrieve('group', unique_id=test.prev_uid)[0] assert len(previous.items) == 2, repr(previous)
def test_user_preserve(): run = mo.LcmsRun(username='******') test = mo.Reference(name='hello', username='******', lcms_run=run) orig_id = test.unique_id mo.store(test, _override=True) assert test.unique_id == orig_id mo.store(test) assert test.unique_id != orig_id items = mo.retrieve('reference', username='******', name='hello') username = getpass.getuser() assert items[-2].username == 'foo' assert items[-1].username == username assert items[-2].lcms_run.username == 'foo' assert items[-1].lcms_run.username == 'foo' run.name = 'hello' mo.store(test) items = mo.retrieve('reference', username='******', creation_time=test.creation_time) return assert items[0].lcms_run.username == 'foo' assert items[1].lcms_run.username == username
def test_load_lcms_files(): paths = get_test_data().values() runs = mo.load_lcms_files(paths) for run in runs: assert run.mzml_file assert run.hdf5_file assert run.creation_time assert run.description assert run.name assert run.last_modified assert run.username assert run.unique_id assert mo.retrieve('lcmsrun', unique_id=run.unique_id)
def atlas_grid(sender): atlas_dict = dict() for i in atlas_header: atlas_dict[i] = list() wild_card = search_string.value atlas = metob.retrieve('Atlas', name=wild_card, username='******') for i, a in enumerate(atlas): atlas_dict['Atlas Name'].append(a.name) atlas_dict['No. Compounds'].append(str(len( a.compound_identifications))) atlas_dict['Last Modified'].append( str(datetime.utcfromtimestamp(a.last_modified))) grid.df = pd.DataFrame.from_dict(atlas_dict) grid.width = "100%"
def setup_atlas_values(df,rt_minutes_tolerance,mz_ppm_tolerance): print df.keys() temp_dict = df.to_dict()['RT'] for compound_name in temp_dict.keys(): if temp_dict[compound_name]: compound = metob.retrieve('Compound',name=compound_name,username='******')[-1] pos_mz = compound.mono_isotopic_molecular_weight + 1.007276 neg_mz = compound.mono_isotopic_molecular_weight - 1.007276 df.loc[compound_name,'pos_mz'] = pos_mz df.loc[compound_name,'neg_mz'] = neg_mz #df.loc[compound_name,'pos_mz_min'] = pos_mz - pos_mz * mz_ppm_tolerance / 1e6 #df.loc[compound_name,'pos_mz_max'] = pos_mz + pos_mz * mz_ppm_tolerance / 1e6 #df.loc[compound_name,'neg_mz_min'] = neg_mz - neg_mz * mz_ppm_tolerance / 1e6 #df.loc[compound_name,'neg_mz_max'] = neg_mz + neg_mz * mz_ppm_tolerance / 1e6 #TODO if rt_min and rt_max exist then use the real vlues df.loc[compound_name,'rt_min'] = float(temp_dict[compound_name]) - rt_minutes_tolerance df.loc[compound_name,'rt_max'] = float(temp_dict[compound_name]) + rt_minutes_tolerance return df
def test_recover(): test = mo.Group(items=[mo.Group(items=[mo.LcmsRun()]), mo.LcmsRun()]) test.name = 'howdy' top_version = test.unique_id sub_version = test.items[1].unique_id mo.store(test) mo.store(test) # should have no effect assert len(test.items) == 2 assert test.unique_id == top_version # make sure we can recover the previous version test.items = [] assert test.unique_id == top_version test = mo.retrieve('group', unique_id=top_version)[0] assert test.unique_id == top_version assert len(test.items) == 2, len(test.items) assert test.unique_id == top_version assert test.items[1].unique_id == sub_version
def get_files(groups, filename_substring, file_filters, is_group=False): """ if is_group is False, gets files from the experiment/folder name and filters with file_filters if is_group is True, gets files from the metatlas group name and filters with file_filters """ for i, g in enumerate(groups): if is_group == True: # get files as a metatlas group groups = dp.select_groups_for_analysis( name=g, do_print=False, most_recent=True, remove_empty=True, include_list=[], exclude_list=file_filters) #['QC','Blank']) new_files = [] for each_g in groups: for f in each_g.items: new_files.append(f) else: new_files = metob.retrieve('Lcmsruns', experiment=g, name=filename_substring, username='******') if i == 0: all_files = new_files else: all_files.extend(new_files) if len(new_files) == 0: print('##### %s has ZERO files!' % g) if len(file_filters) > 0: for i, ff in enumerate(file_filters): if i == 0: files = [f for f in all_files if not ff in f.name] else: files = [f for f in files if not ff in f.name] else: files = all_files files = remove_duplicate_files(files) return files
def setup_atlas_values(df, rt_minutes_tolerance, mz_ppm_tolerance): print df.keys() temp_dict = df.to_dict()['RT'] for compound_name in temp_dict.keys(): if temp_dict[compound_name]: compound = metob.retrieve('Compound', name=compound_name, username='******')[-1] pos_mz = compound.mono_isotopic_molecular_weight + 1.007276 neg_mz = compound.mono_isotopic_molecular_weight - 1.007276 df.loc[compound_name, 'pos_mz'] = pos_mz df.loc[compound_name, 'neg_mz'] = neg_mz #df.loc[compound_name,'pos_mz_min'] = pos_mz - pos_mz * mz_ppm_tolerance / 1e6 #df.loc[compound_name,'pos_mz_max'] = pos_mz + pos_mz * mz_ppm_tolerance / 1e6 #df.loc[compound_name,'neg_mz_min'] = neg_mz - neg_mz * mz_ppm_tolerance / 1e6 #df.loc[compound_name,'neg_mz_max'] = neg_mz + neg_mz * mz_ppm_tolerance / 1e6 #TODO if rt_min and rt_max exist then use the real vlues df.loc[compound_name, 'rt_min'] = float( temp_dict[compound_name]) - rt_minutes_tolerance df.loc[compound_name, 'rt_max'] = float( temp_dict[compound_name]) + rt_minutes_tolerance return df
def convert(file): ind = file[0] fname = file[1] sys.stdout.write('(%s): %s\n' % (ind + 1, fname)) sys.stdout.flush() # Get relevant information about the file. info = patt.match(os.path.abspath(fname)) if info: info = info.groupdict() else: sys.stdout.write("Invalid path name: %s\n" % fname) sys.stdout.flush() return dirname = os.path.dirname(fname) try: username = pwd.getpwuid(os.stat(fname).st_uid).pw_name except OSError: try: username = pwd.getpwuid(os.stat(dirname).st_uid).pw_name except Exception: username = info['username'] # Change to read only. try: os.chmod(fname, 0o660) except Exception as e: sys.stderr.write(str(e) + '\n') sys.stderr.flush() # Copy the original file to a pasteur backup. if os.environ['USER'] == 'pasteur': pasteur_path = fname.replace('raw_data', 'pasteur_backup') dname = os.path.dirname(pasteur_path) if not os.path.exists(dname): os.makedirs(dname) try: shutil.copy(fname, pasteur_path) except IOError as e: if (username not in readonly_files): readonly_files[username] = set() readonly_files[username].add(dirname) return # Get a lock on the mzml file to prevent interference. try: fid = open(fname, 'r') fcntl.flock(fid, fcntl.LOCK_EX | fcntl.LOCK_NB) except IOError: fid.close() msg = '%s already converting in another process\n' % fname sys.stderr.write(msg) sys.stderr.flush() return # Convert to HDF and store the entry in the database. try: hdf5_file = fname.replace('mzML', 'h5') sys.stderr.write('hdf5file is: %s' % hdf5_file) #Get Acquisition Time Here acquisition_time = get_acqtime_from_mzml(fname) mzml_to_hdf(fname, hdf5_file, True) os.chmod(hdf5_file, 0o660) description = info['experiment'] + ' ' + info['path'] ctime = os.stat(fname).st_ctime # Add this to the database unless it is already there try: runs = retrieve('lcmsrun', username='******', mzml_file=fname) except Exception: runs = list() if not len(runs): run = LcmsRun(name=info['path'], description=description, username=info['username'], experiment=info['experiment'], creation_time=ctime, last_modified=ctime, mzml_file=fname, hdf5_file=hdf5_file, acquisition_time=acquisition_time) store(run) except Exception as e: if 'exists but it can not be written' in str(e): if (username not in readonly_files): readonly_files[username] = set() readonly_files[username].add(dirname) else: msg = traceback.format_exception(*sys.exc_info()) msg.insert(0, 'Cannot convert %s' % fname) dat = info['username'] if (dat not in other_errors): other_errors[info['username']] = list() other_errors[info['username']].append('\n'.join(msg)) sys.stderr.write(str(e) + '\n') sys.stderr.flush() try: os.remove(hdf5_file) except: pass finally: fid.close()
import sys
import sys import pandas as pd import qgrid from metatlas import metatlas_objects as metob df = pd.read_csv( '/home/jimmy/data/atlas_finfo_to_be_loaded/20151208_Atlas_POS_HILIC_LS_Validated_RTcorr.csv', sep=',') df.columns = [x.lower() for x in df.columns] #qgrid.show_grid(df, precision=5) fetch_atlases = metob.retrieve('Atlas', name='%_LS_%', username='******') for c in fetch_atlases: print c.name for x in df.index: if not metob.retrieve('Compounds', name=df.name[x]): print df.name[x], "is not in database"
def test_stub_instance(): run = mo.LcmsRun(username='******') test = mo.Reference(name='hello', lcms_run=run) mo.store(test) item = mo.retrieve('reference', name='hello')[0] assert isinstance(item.lcms_run, mo.LcmsRun)
def test_store_stubs(): test = mo.Group(items=[mo.Group(items=[mo.LcmsRun()]), mo.LcmsRun()]) mo.store(test) test = mo.retrieve('group', unique_id=test.unique_id)[0] assert isinstance(test.items[0], mo.Group) mo.store(test)
def test_escape_glob(): test1 = mo.LcmsRun(description='Flow %') mo.store(test1) items = mo.retrieve('lcmsrun', description='Flow %%') assert items[-1].unique_id == test1.unique_id
import sys import pandas as pd import qgrid from metatlas import metatlas_objects as metob df = pd.read_csv('/home/jimmy/data/atlas_finfo_to_be_loaded/20151208_Atlas_POS_HILIC_LS_Validated_RTcorr.csv',sep = ',') df.columns = [x.lower() for x in df.columns] #qgrid.show_grid(df, precision=5) fetch_atlases = metob.retrieve('Atlas',name='%_LS_%', username='******') for c in fetch_atlases: print c.name for x in df.index: if not metob.retrieve('Compounds',name=df.name[x]): print df.name[x], "is not in database"
for directory in files[username]: print 'looking at', directory extension = '%s/%s/' % (username, directory) # list of all files in a given directory dir_files = glob.glob( os.path.join(origin_directory + extension, '*.h5')) # contains a list of dicts samples = process_files(dir_files) # a list containing a csv name # and a tuple of the non-blank, blank dataframes info = clean_up(samples, dir_files) csvs.append(info[0]) html_tables.append((info[1][0], info[1][1])) print 'attempting to send email' send_run_email(username, csvs, html_tables) print '\n' # for formatting purposes print 'finished task in %s seconds' % (time.time() - start_time) # Integrity monitor does two major things: # - Checks the database for invalid files and warns the user. # - Runs through all the raw data collected in the past 3 days and gives the # user a general report of the data collected. if __name__ == '__main__': check_metatlas() runs = metob.retrieve('lcmsruns', username='******') for i in runs: run_times[i.hdf5_file] = i.acquisition_time run_checker()