def test_store_atlas01(atlas, sqlite, username): atlas.name = "test_store_atlas01" atlas_list = metob.retrieve("Atlas", name=atlas.name, username=username) assert len(atlas_list) == 0 metob.store(atlas) second = metob.retrieve("Atlas", name=atlas.name, username=username) assert len(second) == 1
def test_retrieve_head(): test = mo.LcmsRun(name="foo") mo.store(test) old = len(mo.retrieve("lcmsrun", name="foo")) test.name = "bar" mo.store(test) new = len(mo.retrieve("lcmsrun", name="foo")) assert new == old
def test_retrieve01(sqlite): compound = mo.Compound(name="foo", inchi=ADENOSINE_INCHI, inchi_key="foobar") mo.store(compound) assert mo.retrieve("Compounds", inchi_key=[], username="******") == [] assert mo.retrieve("Compounds", inchi=[ADENOSINE_INCHI], username="******")[0].inchi == ADENOSINE_INCHI
def test_store_atlas03(metatlas_dataset, atlas, sqlite, username): metatlas_dataset.atlas.name = "test_store_atlas01" atlas_list = metob.retrieve("Atlas", name=metatlas_dataset.atlas.name, username=username) assert len(atlas_list) == 0 metatlas_dataset.store_atlas() second = metob.retrieve("Atlas", name=metatlas_dataset.atlas.name, username=username) assert len(second) == 1
def test_glob_query(sqlite): test1 = mo.LcmsRun(name="First") test2 = mo.LcmsRun(name="Second") test3 = mo.LcmsRun(name="Third") mo.store([test1, test2, test3]) items = mo.retrieve("lcmsrun", name="Fir%") assert items[-1].unique_id == test1.unique_id items = mo.retrieve("lcmsrun", name="%econd") assert items[-1].unique_id == test2.unique_id items = mo.retrieve("LcmsRuns", name="T%ir%") assert items[-1].unique_id == test3.unique_id
def test_remove_objects(sqlite): group = mo.Group(name="foo", items=[mo.Group(name="baz", description="hello")]) sub_id = group.items[0].unique_id mo.store(group) first = mo.retrieve("groups", unique_id=sub_id)[0] assert first.unique_id == sub_id mo.remove_objects(group, _override=True) test = mo.retrieve("groups", name="foo") assert not test test_sub = mo.retrieve("groups_items", target_id=sub_id) assert not test_sub
def test_store_atlas04(metatlas_dataset, sqlite, username): metatlas_dataset.atlas.name = "test_store_atlas01" atlas_list = metob.retrieve("Atlas", name=metatlas_dataset.atlas.name, username=username) assert len(atlas_list) == 0 metatlas_dataset.store_atlas() second = metob.retrieve("Atlas", name=metatlas_dataset.atlas.name, username=username) assert len(second) == 1 metatlas_dataset.store_atlas(even_if_exists=True) with pytest.raises(ValueError): metatlas_dataset.store_atlas()
def test_store_atlas02(metatlas_dataset, username): atlas_list = metob.retrieve("Atlas", name=metatlas_dataset.ids.source_atlas, username=username) assert len(atlas_list) == 1 second = metob.retrieve("Atlas", name=metatlas_dataset.atlas.name, username=username) assert len(second) == 1 metatlas_dataset.store_atlas(even_if_exists=True) second = metob.retrieve("Atlas", name=metatlas_dataset.atlas.name, username=username) assert len(second) == 1
def store_atlas(self, even_if_exists: bool = False) -> None: """ inputs: even_if_exists: if True, will save the atlas even if the atlas name already is in the database with your username side effects: Saves the altas to the database. Raises ValueError if even_if_exists==False and name is already in the database with your username """ start_time = datetime.datetime.now() name = self.atlas.name username = self.ids.username try: if not even_if_exists and len( metob.retrieve("Atlas", name=name, username=username)) > 0: raise ValueError( f"An atlas with name {name} and owned by {username} already exists." ) except ValueError as err: logger.exception(err) raise err metob.store(self.atlas) logger.info( "Atlas %s stored in database with owner %s in %s.", self.ids.atlas, self.ids.username, _duration_since(start_time), )
def test_floating_point(sqlite): compound = mo.Compound(name="foo", mono_isotopic_molecular_weight=1.0) mo.store(compound) compound.mono_isotopic_molecular_weight = 1.000007 mo.store(compound) test = mo.retrieve("compound", name="foo")[-1] assert test.mono_isotopic_molecular_weight == 1.000007, test.mono_isotopic_molecular_weight
def _get_atlas(self) -> None: """ Copy source atlas from database into current analysis atlas If the atlas does not yet exist, it will be copied from source_atlas and there will be an an additional side effect that all mz_tolerances in the resulting atlas get their value from source_atlas' atlas.compound_identifications[0].mz_references[0].mz_tolerance """ atlases = metob.retrieve("Atlas", name=self.ids.atlas, username=self.ids.username) if len(atlases) == 1: logger.warning( ("Destination atlas, %s, already exists, so not copying source atlas, " "%s, to destination. Not overwriting."), self.ids.atlas, self.ids.source_atlas, ) self.atlas = atlases[0] elif len(atlases) > 1: try: raise ValueError( (f"{len(atlases)} atlases with name {self.ids.atlas} " f"and owned by {self.ids.username} already exist.")) except ValueError as err: logger.exception(err) raise err elif self.ids.source_atlas is not None: self.atlas = self._clone_source_atlas() else: try: raise ValueError( "Could not load atlas as source_atlas is None.") except ValueError as err: logger.exception(err) raise err
def test_store_all(sqlite): items = [] for klass in metoh.Workspace.get_instance().subclass_lut.values(): items.append(klass()) mo.store(items) for klass in metoh.Workspace.get_instance().subclass_lut.values(): name = klass.__name__ assert len(mo.retrieve(name)) > 0
def test_get_latest(): test = mo.Compound(name="hello") mo.store(test) test.name = "goodbye" mo.store(test) test = mo.retrieve("compound", unique_id=test.unique_id) assert len(test) == 1 assert test[0].name == "goodbye"
def test_simple_query(sqlite): test1 = mo.LcmsRun(name="First") first_version = test1.unique_id test1.description = "Hey there" mo.store(test1) assert test1.unique_id == first_version items = mo.retrieve("lcmsrun", name="First") assert items[-1].unique_id == test1.unique_id assert all((i.unique_id != first_version for i in items[:-1]))
def get_compound(inchi_key: str) -> Optional[metob.Compound]: """ Returns first compound from database matching inchi_key or None if not found """ try: return metob.retrieve("Compounds", inchi_key=inchi_key, username="******")[0] except IndexError: return None
def test_unique_links(sqlite): test = mo.Group(items=[mo.Group(items=[mo.LcmsRun()]), mo.LcmsRun()]) sub_version = test.items[1].unique_id test.items = [test.items[1]] mo.store(test) test.items = [] test = mo.retrieve("group", unique_id=test.unique_id)[0] assert len(test.items) == 1, len(test.items) assert test.items[0].unique_id == sub_version
def test_preserve_provenance(sqlite): test = mo.Group(items=[mo.Group(items=[mo.LcmsRun()]), mo.LcmsRun()]) test2 = mo.Group(items=[mo.Group(items=[mo.LcmsRun()]), mo.LcmsRun()]) mo.store([test, test2]) assert len(test.items) == 2 test.items = [] test2.items = [] mo.store([test, test2]) assert len(test.items) == 0 previous = mo.retrieve("group", unique_id=test.prev_uid)[0] assert len(previous.items) == 2, repr(previous)
def test_circular_reference(sqlite): test = mo.Group(items=[mo.Group(items=[mo.LcmsRun()]), mo.LcmsRun()]) orig_id = test.unique_id test.items[0].items.append(test) mo.store(test) test.items = [] test = mo.retrieve("group", unique_id=test.unique_id)[0] sub0 = test.items[0] assert len(sub0.items) == 2, sub0.items assert sub0.items[1].unique_id == orig_id assert test.unique_id == orig_id
def test_filter_compounds05(mocker, metatlas_dataset_with_2_cids, username): original_rt_min = metatlas_dataset_with_2_cids.rts[1].rt_min print([r.rt_min for r in metatlas_dataset_with_2_cids.rts]) updated_rt_min = 9.99 metatlas_dataset_with_2_cids.set_rt(1, "rt_min", updated_rt_min) metatlas_dataset_with_2_cids.filter_compounds(remove_idxs=[0]) atlas = metob.retrieve("Atlas", name=metatlas_dataset_with_2_cids.atlas.name, username=username)[0] assert atlas.compound_identifications[0].rt_references[ 0].rt_min != original_rt_min assert atlas.compound_identifications[0].rt_references[ 0].rt_min == updated_rt_min
def get_qc_atlas( ids: AnalysisIdentifiers, rt_min_delta: Optional[float], rt_max_delta: Optional[float] ) -> Tuple[metob.Atlas, pd.DataFrame]: """Retreives template QC atlas and return tuple (atlas, atlas_df)""" qc_atlas_dict = QC_ATLASES[ids.polarity][ids.chromatography] qc_atlas_name = qc_atlas_dict["name"] username = qc_atlas_dict["username"] logger.info("Loading QC Atlas %s", qc_atlas_name) original_atlas = metob.retrieve("Atlas", name=qc_atlas_name, username=username)[0] atlas = adjust_atlas_rt_range(original_atlas, rt_min_delta, rt_max_delta) atlas_df = ma_data.make_atlas_df(atlas) atlas_df["label"] = [cid.name for cid in atlas.compound_identifications] return atlas, atlas_df
def test_user_preserve(sqlite): run = mo.LcmsRun(username="******") test = mo.Reference(name="hello", username="******", lcms_run=run) orig_id = test.unique_id mo.store(test, _override=True) assert test.unique_id == orig_id mo.store(test) assert test.unique_id != orig_id items = mo.retrieve("reference", username="******", name="hello") username = getpass.getuser() assert items[-2].username == "foo" assert items[-1].username == username assert items[-2].lcms_run.username == "foo" assert items[-1].lcms_run.username == "foo"
def get_atlas(name: AtlasName, username: Username) -> metob.Atlas: """Load atlas from database""" atlases = metob.retrieve("Atlas", name=name, username=username) try: if len(atlases) == 0: raise ValueError(f"Database does not contain an atlas {name} owned by {username}.") except ValueError as err: logger.exception(err) raise err try: if len(atlases) > 1: raise ValueError(f"Database contains more than one atlas {name} owned by {username}.") except ValueError as err: logger.exception(err) raise err return atlases[0]
def test_recover(sqlite): test = mo.Group(items=[mo.Group(items=[mo.LcmsRun()]), mo.LcmsRun()]) test.name = "howdy" top_version = test.unique_id sub_version = test.items[1].unique_id mo.store(test) mo.store(test) # should have no effect assert len(test.items) == 2 assert test.unique_id == top_version # make sure we can recover the previous version test.items = [] assert test.unique_id == top_version test = mo.retrieve("group", unique_id=top_version)[0] assert test.unique_id == top_version assert len(test.items) == 2, len(test.items) assert test.unique_id == top_version assert test.items[1].unique_id == sub_version
def test_load_atlas03(sqlite_with_atlas, atlas, username): results = metob.retrieve("Atlas", name=atlas.name, username=username) assert results[0].compound_identifications[0].rt_references[ 0].rt_peak == 2.1964640053707174
def test_load_atlas02(atlas, sqlite_with_atlas, username): results = metob.retrieve("Atlas", name=atlas.name, username=username) assert isinstance(results[0].compound_identifications[0], metob.CompoundIdentification)
def test_get_atlas04(metatlas_dataset, username): atlases = metob.retrieve("Atlas", name="This_atlas_does_not_exists", username=username) assert len(atlases) == 0
def test_store_atlas07(atlas, sqlite, username): atlas.name = "test_store_atlas07" metob.store(atlas) metoh.Workspace.instance = None atlases = metob.retrieve("Atlas", name=atlas.name, username=username) assert len(atlases) == 1
def test_store_atlas06(atlas, sqlite_with_atlas, username): atlas.name = "test atlas 06" metob.store(atlas) second = metob.retrieve("Atlas", name=atlas.name, username=username) assert len(second) == 1
def convert(ind, fname): """Helper function, converts a single file""" logger.info("Converting file number %d: %s", ind + 1, fname) # Get relevant information about the file. username = _file_name_to_username(fname, DEFAULT_USERNAME) info = patt.match(os.path.abspath(fname)) if info: info = info.groupdict() else: logger.error("Invalid path name: %s", fname) return dirname = os.path.dirname(fname) # Convert to HDF and store the entry in the database. try: hdf5_file = fname.replace('mzML', 'h5') logger.info("Generating h5 file: %s", hdf5_file) mzml_to_hdf(fname, hdf5_file, True) os.chmod( hdf5_file, 0o660 ) # this can be changed to 0o440 once everyone is on the current code # Add this to the database unless it is already there try: runs = retrieve('lcmsrun', username='******', mzml_file=fname) except Exception: runs = [] if not runs: ctime = os.stat(fname).st_ctime logger.info("LCMS run not in DB, inserting new entry.") run = LcmsRun(name=info['path'], description=f"{info['experiment']} {info['path']}", username=username, experiment=info['experiment'], creation_time=ctime, last_modified=ctime, mzml_file=fname, hdf5_file=hdf5_file, acquisition_time=get_acqtime_from_mzml(fname)) store(run) except Exception as e: logger.error("During file conversion: %s", str(e)) if 'exists but it can not be written' in str(e): logger.error("Cannot write to file within directory %s", dirname) if username not in readonly_files: readonly_files[username] = set() readonly_files[username].add(dirname) else: msg = traceback.format_exception(*sys.exc_info()) msg.insert(0, f"Cannot convert {fname}") dat = username if dat not in other_errors: other_errors[username] = [] other_errors[username].append('\n'.join(msg)) fail_path = fname.replace('raw_data', 'conversion_failures') logger.error("Moving mzml file to %s", fail_path) move_file(fname, fail_path) try: os.remove(hdf5_file) except: pass
def convert(file): ind = file[0] fname = file[1] sys.stdout.write('(%s): %s\n' % (ind + 1, fname)) sys.stdout.flush() # Get relevant information about the file. info = patt.match(os.path.abspath(fname)) if info: info = info.groupdict() else: sys.stdout.write("Invalid path name: %s\n" % fname) sys.stdout.flush() return dirname = os.path.dirname(fname) try: username = pwd.getpwuid(os.stat(fname).st_uid).pw_name except OSError: try: username = pwd.getpwuid(os.stat(dirname).st_uid).pw_name except Exception: username = info['username'] # Change to read only. try: os.chmod(fname, 0o660) except Exception as e: sys.stderr.write(str(e) + '\n') sys.stderr.flush() # # Copy the original file to a pasteur backup. # if os.environ['USER'] == 'pasteur': # pasteur_path = fname.replace('raw_data', 'pasteur_backup') # dname = os.path.dirname(pasteur_path) # if not os.path.exists(dname): # os.makedirs(dname) # try: # shutil.copy(fname, pasteur_path) # except IOError as e: # if (username not in readonly_files): # readonly_files[username] = set() # readonly_files[username].add(dirname) # return # Get a lock on the mzml file to prevent interference. try: fid = open(fname, 'r') fcntl.flock(fid, fcntl.LOCK_EX | fcntl.LOCK_NB) except IOError: fid.close() msg = '%s already converting in another process\n' % fname sys.stderr.write(msg) sys.stderr.flush() return # Convert to HDF and store the entry in the database. try: hdf5_file = fname.replace('mzML', 'h5') sys.stderr.write('hdf5file is: %s' % hdf5_file) #Get Acquisition Time Here acquisition_time = get_acqtime_from_mzml(fname) mzml_to_hdf(fname, hdf5_file, True) os.chmod(hdf5_file, 0o660) description = info['experiment'] + ' ' + info['path'] ctime = os.stat(fname).st_ctime # Add this to the database unless it is already there try: runs = retrieve('lcmsrun', username='******', mzml_file=fname) except Exception: runs = list() if not len(runs): run = LcmsRun(name=info['path'], description=description, username=info['username'], experiment=info['experiment'], creation_time=ctime, last_modified=ctime, mzml_file=fname, hdf5_file=hdf5_file, acquisition_time=acquisition_time) store(run) except Exception as e: if 'exists but it can not be written' in str(e): if (username not in readonly_files): readonly_files[username] = set() readonly_files[username].add(dirname) else: msg = traceback.format_exception(*sys.exc_info()) msg.insert(0, 'Cannot convert %s' % fname) dat = info['username'] if (dat not in other_errors): other_errors[info['username']] = list() other_errors[info['username']].append('\n'.join(msg)) sys.stderr.write(str(e) + '\n') sys.stderr.flush() try: os.remove(hdf5_file) except: pass finally: fid.close()