def store_atlas(self, even_if_exists: bool = False) -> None: """ inputs: even_if_exists: if True, will save the atlas even if the atlas name already is in the database with your username side effects: Saves the altas to the database. Raises ValueError if even_if_exists==False and name is already in the database with your username """ start_time = datetime.datetime.now() name = self.atlas.name username = self.ids.username try: if not even_if_exists and len( metob.retrieve("Atlas", name=name, username=username)) > 0: raise ValueError( f"An atlas with name {name} and owned by {username} already exists." ) except ValueError as err: logger.exception(err) raise err metob.store(self.atlas) logger.info( "Atlas %s stored in database with owner %s in %s.", self.ids.atlas, self.ids.username, _duration_since(start_time), )
def test_floating_point(sqlite): compound = mo.Compound(name="foo", mono_isotopic_molecular_weight=1.0) mo.store(compound) compound.mono_isotopic_molecular_weight = 1.000007 mo.store(compound) test = mo.retrieve("compound", name="foo")[-1] assert test.mono_isotopic_molecular_weight == 1.000007, test.mono_isotopic_molecular_weight
def set_rt(self, compound_idx: int, which: str, time: float) -> None: """ inputs: compound_idx: index of of compound to update which: 'rt_min', 'rt_max', or 'rt_peak' time: a floating point value for the number of minutes updates the RT value in database, self.atlas, self.atlas_df, self.data so that no datastructures need to be invalidated """ try: if self.atlas is None: raise ValueError("Cannot set RTs when atlas is None.") except ValueError as err: logger.exception(err) raise err assert which in ["rt_min", "rt_peak", "rt_max"] atlas_rt_ref = self.atlas.compound_identifications[ compound_idx].rt_references[0] setattr(atlas_rt_ref, which, time) for sample in self.data: setattr(sample[compound_idx]["identification"].rt_references[0], which, time) self.atlas_df.loc[compound_idx, which] = time metob.store(atlas_rt_ref) if which in ["rt_min", "rt_max"]: self._hits_valid_for_rt_bounds = False self._data_valid_for_rt_bounds = False
def test_store_atlas01(atlas, sqlite, username): atlas.name = "test_store_atlas01" atlas_list = metob.retrieve("Atlas", name=atlas.name, username=username) assert len(atlas_list) == 0 metob.store(atlas) second = metob.retrieve("Atlas", name=atlas.name, username=username) assert len(second) == 1
def test_nested(sqlite): test = mo.Group(items=[mo.Group(items=[mo.LcmsRun()]), mo.LcmsRun()]) assert len(test.items) == 2 test.items[1].name = "hello" orig_sub_version = test.items[1].unique_id assert len(test.items) == 2 mo.store(test) assert test.items[1].unique_id == orig_sub_version
def test_get_latest(): test = mo.Compound(name="hello") mo.store(test) test.name = "goodbye" mo.store(test) test = mo.retrieve("compound", unique_id=test.unique_id) assert len(test) == 1 assert test[0].name == "goodbye"
def test_retrieve_head(): test = mo.LcmsRun(name="foo") mo.store(test) old = len(mo.retrieve("lcmsrun", name="foo")) test.name = "bar" mo.store(test) new = len(mo.retrieve("lcmsrun", name="foo")) assert new == old
def test_store_all(sqlite): items = [] for klass in metoh.Workspace.get_instance().subclass_lut.values(): items.append(klass()) mo.store(items) for klass in metoh.Workspace.get_instance().subclass_lut.values(): name = klass.__name__ assert len(mo.retrieve(name)) > 0
def test_retrieve01(sqlite): compound = mo.Compound(name="foo", inchi=ADENOSINE_INCHI, inchi_key="foobar") mo.store(compound) assert mo.retrieve("Compounds", inchi_key=[], username="******") == [] assert mo.retrieve("Compounds", inchi=[ADENOSINE_INCHI], username="******")[0].inchi == ADENOSINE_INCHI
def test_simple_query(sqlite): test1 = mo.LcmsRun(name="First") first_version = test1.unique_id test1.description = "Hey there" mo.store(test1) assert test1.unique_id == first_version items = mo.retrieve("lcmsrun", name="First") assert items[-1].unique_id == test1.unique_id assert all((i.unique_id != first_version for i in items[:-1]))
def test_unique_links(sqlite): test = mo.Group(items=[mo.Group(items=[mo.LcmsRun()]), mo.LcmsRun()]) sub_version = test.items[1].unique_id test.items = [test.items[1]] mo.store(test) test.items = [] test = mo.retrieve("group", unique_id=test.unique_id)[0] assert len(test.items) == 1, len(test.items) assert test.items[0].unique_id == sub_version
def test_simple(sqlite): test = mo.Group() uid = test.unique_id mo.store(test) assert test.unique_id == uid assert test.prev_uid != "" test.name = "hello" mo.store(test) assert test.unique_id == uid assert test.prev_uid != ""
def test_glob_query(sqlite): test1 = mo.LcmsRun(name="First") test2 = mo.LcmsRun(name="Second") test3 = mo.LcmsRun(name="Third") mo.store([test1, test2, test3]) items = mo.retrieve("lcmsrun", name="Fir%") assert items[-1].unique_id == test1.unique_id items = mo.retrieve("lcmsrun", name="%econd") assert items[-1].unique_id == test2.unique_id items = mo.retrieve("LcmsRuns", name="T%ir%") assert items[-1].unique_id == test3.unique_id
def test_circular_reference(sqlite): test = mo.Group(items=[mo.Group(items=[mo.LcmsRun()]), mo.LcmsRun()]) orig_id = test.unique_id test.items[0].items.append(test) mo.store(test) test.items = [] test = mo.retrieve("group", unique_id=test.unique_id)[0] sub0 = test.items[0] assert len(sub0.items) == 2, sub0.items assert sub0.items[1].unique_id == orig_id assert test.unique_id == orig_id
def test_preserve_provenance(sqlite): test = mo.Group(items=[mo.Group(items=[mo.LcmsRun()]), mo.LcmsRun()]) test2 = mo.Group(items=[mo.Group(items=[mo.LcmsRun()]), mo.LcmsRun()]) mo.store([test, test2]) assert len(test.items) == 2 test.items = [] test2.items = [] mo.store([test, test2]) assert len(test.items) == 0 previous = mo.retrieve("group", unique_id=test.prev_uid)[0] assert len(previous.items) == 2, repr(previous)
def test_remove_objects(sqlite): group = mo.Group(name="foo", items=[mo.Group(name="baz", description="hello")]) sub_id = group.items[0].unique_id mo.store(group) first = mo.retrieve("groups", unique_id=sub_id)[0] assert first.unique_id == sub_id mo.remove_objects(group, _override=True) test = mo.retrieve("groups", name="foo") assert not test test_sub = mo.retrieve("groups_items", target_id=sub_id) assert not test_sub
def test_user_preserve(sqlite): run = mo.LcmsRun(username="******") test = mo.Reference(name="hello", username="******", lcms_run=run) orig_id = test.unique_id mo.store(test, _override=True) assert test.unique_id == orig_id mo.store(test) assert test.unique_id != orig_id items = mo.retrieve("reference", username="******", name="hello") username = getpass.getuser() assert items[-2].username == "foo" assert items[-1].username == username assert items[-2].lcms_run.username == "foo" assert items[-1].lcms_run.username == "foo"
def test_recover(sqlite): test = mo.Group(items=[mo.Group(items=[mo.LcmsRun()]), mo.LcmsRun()]) test.name = "howdy" top_version = test.unique_id sub_version = test.items[1].unique_id mo.store(test) mo.store(test) # should have no effect assert len(test.items) == 2 assert test.unique_id == top_version # make sure we can recover the previous version test.items = [] assert test.unique_id == top_version test = mo.retrieve("group", unique_id=top_version)[0] assert test.unique_id == top_version assert len(test.items) == 2, len(test.items) assert test.unique_id == top_version assert test.items[1].unique_id == sub_version
def set_note(self, compound_idx: int, which: str, value: str) -> None: """ inputs: compound_idx: index of of compound to update which: 'ms1_notes', 'ms2_notes' or 'identification_notes' value: a string with the note content updates the notes value in database, self.atlas, self.atlas_df, self.data so that no datastructures need to be invalidated """ try: if self.atlas is None: raise ValueError("Cannot set notes when atlas is None.") except ValueError as err: logger.exception(err) raise err assert which in ["ms1_notes", "ms2_notes", "identification_notes"] atlas_cid = self.atlas.compound_identifications[compound_idx] setattr(atlas_cid, which, value) data_cid = self.data[0][compound_idx]["identification"] setattr(data_cid, which, value) self.atlas_df.loc[compound_idx, which] = value metob.store(atlas_cid)
def create_c18_stds_atlases(source: os.PathLike, polarity: str, mz_tolerance: float = 10) -> None: data = pd.read_csv(source, sep="\t") std_inchi_keys = { "Phenylalanine": "COLNVLDHVKWLRT-QMMMGPOBSA-N", "L-Tryptophan": "QIVBCDIJIAJPQS-SECBINFHSA-N", "Salicylic acid": "YGSDEFSMJLZEOE-UHFFFAOYSA-N", # this one will not be found in c18_data... "2-Amino-3-bromo-5-methylbenzoic acid": "LCMZECCEEOQWLQ-UHFFFAOYSA-N", } abmba = "2-Amino-3-bromo-5-methylbenzoic acid" more_rows = pd.DataFrame({ "inchi_key": [std_inchi_keys[abmba]], "label": [abmba], "adduct": ["[M+H]+" if polarity == "positive" else "[M-H]-"], "polarity": [polarity], "rt_min": [4.5], "rt_peak": [4.7], "rt_max": [4.9], "mz": [228.97384 + (1.00727647 * (1 if polarity == "positive" else -1))], "confidence_category": "Platinum", }) if more_rows is not None: data = data.append(more_rows) acceptable = data[data["inchi_key"].isin(std_inchi_keys.values())] by_polarity = acceptable[acceptable["polarity"] == polarity] by_polarity = by_polarity.assign(label=None) by_polarity["rank"] = by_polarity["confidence_category"] == "Platinum" single = by_polarity.loc[by_polarity.groupby(["inchi_key" ])["rank"].idxmax()] name = f"C18_{datetime.today().strftime('%Y%m%d')}_QC_{polarity[:3].upper()}" atlas = make_atlas_from_df(single, name, polarity, mz_tolerance) metob.store(atlas)
def test_store_stubs(sqlite): test = mo.Group(items=[mo.Group(items=[mo.LcmsRun()]), mo.LcmsRun()]) mo.store(test) test = mo.retrieve("group", unique_id=test.unique_id)[0] assert isinstance(test.items[0], mo.Group) mo.store(test)
def convert(file): ind = file[0] fname = file[1] sys.stdout.write('(%s): %s\n' % (ind + 1, fname)) sys.stdout.flush() # Get relevant information about the file. info = patt.match(os.path.abspath(fname)) if info: info = info.groupdict() else: sys.stdout.write("Invalid path name: %s\n" % fname) sys.stdout.flush() return dirname = os.path.dirname(fname) try: username = pwd.getpwuid(os.stat(fname).st_uid).pw_name except OSError: try: username = pwd.getpwuid(os.stat(dirname).st_uid).pw_name except Exception: username = info['username'] # Change to read only. try: os.chmod(fname, 0o660) except Exception as e: sys.stderr.write(str(e) + '\n') sys.stderr.flush() # # Copy the original file to a pasteur backup. # if os.environ['USER'] == 'pasteur': # pasteur_path = fname.replace('raw_data', 'pasteur_backup') # dname = os.path.dirname(pasteur_path) # if not os.path.exists(dname): # os.makedirs(dname) # try: # shutil.copy(fname, pasteur_path) # except IOError as e: # if (username not in readonly_files): # readonly_files[username] = set() # readonly_files[username].add(dirname) # return # Get a lock on the mzml file to prevent interference. try: fid = open(fname, 'r') fcntl.flock(fid, fcntl.LOCK_EX | fcntl.LOCK_NB) except IOError: fid.close() msg = '%s already converting in another process\n' % fname sys.stderr.write(msg) sys.stderr.flush() return # Convert to HDF and store the entry in the database. try: hdf5_file = fname.replace('mzML', 'h5') sys.stderr.write('hdf5file is: %s' % hdf5_file) #Get Acquisition Time Here acquisition_time = get_acqtime_from_mzml(fname) mzml_to_hdf(fname, hdf5_file, True) os.chmod(hdf5_file, 0o660) description = info['experiment'] + ' ' + info['path'] ctime = os.stat(fname).st_ctime # Add this to the database unless it is already there try: runs = retrieve('lcmsrun', username='******', mzml_file=fname) except Exception: runs = list() if not len(runs): run = LcmsRun(name=info['path'], description=description, username=info['username'], experiment=info['experiment'], creation_time=ctime, last_modified=ctime, mzml_file=fname, hdf5_file=hdf5_file, acquisition_time=acquisition_time) store(run) except Exception as e: if 'exists but it can not be written' in str(e): if (username not in readonly_files): readonly_files[username] = set() readonly_files[username].add(dirname) else: msg = traceback.format_exception(*sys.exc_info()) msg.insert(0, 'Cannot convert %s' % fname) dat = info['username'] if (dat not in other_errors): other_errors[info['username']] = list() other_errors[info['username']].append('\n'.join(msg)) sys.stderr.write(str(e) + '\n') sys.stderr.flush() try: os.remove(hdf5_file) except: pass finally: fid.close()
def test_escape_glob(sqlite): test1 = mo.LcmsRun(description="Flow %") mo.store(test1) items = mo.retrieve("lcmsrun", description="Flow %%") assert items[-1].unique_id == test1.unique_id
def test_id_grade_trait(sqlite): id_grade = mo.IdentificationGrade(name="E") mo.store(id_grade) cid = mo.CompoundIdentification(identification_grade="e") assert cid.identification_grade.unique_id == id_grade.unique_id
def convert(ind, fname): """Helper function, converts a single file""" logger.info("Converting file number %d: %s", ind + 1, fname) # Get relevant information about the file. username = _file_name_to_username(fname, DEFAULT_USERNAME) info = patt.match(os.path.abspath(fname)) if info: info = info.groupdict() else: logger.error("Invalid path name: %s", fname) return dirname = os.path.dirname(fname) # Convert to HDF and store the entry in the database. try: hdf5_file = fname.replace('mzML', 'h5') logger.info("Generating h5 file: %s", hdf5_file) mzml_to_hdf(fname, hdf5_file, True) os.chmod( hdf5_file, 0o660 ) # this can be changed to 0o440 once everyone is on the current code # Add this to the database unless it is already there try: runs = retrieve('lcmsrun', username='******', mzml_file=fname) except Exception: runs = [] if not runs: ctime = os.stat(fname).st_ctime logger.info("LCMS run not in DB, inserting new entry.") run = LcmsRun(name=info['path'], description=f"{info['experiment']} {info['path']}", username=username, experiment=info['experiment'], creation_time=ctime, last_modified=ctime, mzml_file=fname, hdf5_file=hdf5_file, acquisition_time=get_acqtime_from_mzml(fname)) store(run) except Exception as e: logger.error("During file conversion: %s", str(e)) if 'exists but it can not be written' in str(e): logger.error("Cannot write to file within directory %s", dirname) if username not in readonly_files: readonly_files[username] = set() readonly_files[username].add(dirname) else: msg = traceback.format_exception(*sys.exc_info()) msg.insert(0, f"Cannot convert {fname}") dat = username if dat not in other_errors: other_errors[username] = [] other_errors[username].append('\n'.join(msg)) fail_path = fname.replace('raw_data', 'conversion_failures') logger.error("Moving mzml file to %s", fail_path) move_file(fname, fail_path) try: os.remove(hdf5_file) except: pass
def create_c18_template_atlases(source: os.PathLike, polarity: str) -> None: assert polarity in ["negative", "positive"] name = f"C18_{datetime.today().strftime('%Y%m%d')}_TPL_{polarity[:3].upper()}" new_atlas = generate_template_atlas(source, ["Gold", "Platinum"], polarity, name) metob.store(new_atlas)
def test_stub_instance(sqlite): run = mo.LcmsRun(username="******") test = mo.Reference(name="hello", lcms_run=run) mo.store(test, _override=True) item = mo.retrieve("reference", name="hello")[0] assert isinstance(item.lcms_run, mo.LcmsRun)
def test_store_atlas06(atlas, sqlite_with_atlas, username): atlas.name = "test atlas 06" metob.store(atlas) second = metob.retrieve("Atlas", name=atlas.name, username=username) assert len(second) == 1
def test_store_atlas07(atlas, sqlite, username): atlas.name = "test_store_atlas07" metob.store(atlas) metoh.Workspace.instance = None atlases = metob.retrieve("Atlas", name=atlas.name, username=username) assert len(atlases) == 1
def update_metatlas(directory): readonly_files = defaultdict(set) other_errors = defaultdict(list) directory = os.path.abspath(directory) # Sleep a random amount of time to avoid running at the same time as # other processes. time.sleep(random.random() * 2) mzml_files = check_output('find %s -name "*.mzML"' % directory, shell=True) mzml_files = mzml_files.decode('utf-8').splitlines() # Find valid h5 files newer than the format version timestamp. delta = int((time.time() - VERSION_TIMESTAMP) / 60) check = 'find %s -name "*.h5" -mmin -%s -size +2k' % (directory, delta) valid_files = check_output(check, shell=True).decode('utf-8').splitlines() valid_files = set(valid_files) new_files = [] for mzml_file in mzml_files: if mzml_file.replace('.mzML', '.h5') not in valid_files: new_files.append(mzml_file) patt = re.compile(r".+\/raw_data\/(?P<username>[^/]+)\/(?P<experiment>[^/]+)\/(?P<path>.+)") sys.stdout.write('Found %s files\n' % len(new_files)) sys.stdout.flush() for (ind, fname) in enumerate(new_files): sys.stdout.write('(%s of %s): %s\n' % (ind + 1, len(new_files), fname)) sys.stdout.flush() # Get relevant information about the file. info = patt.match(os.path.abspath(fname)) if info: info = info.groupdict() else: sys.stdout.write("Invalid path name: %s\n" % fname) sys.stdout.flush() continue dirname = os.path.dirname(fname) try: username = pwd.getpwuid(os.stat(fname).st_uid).pw_name except OSError: try: username = pwd.getpwuid(os.stat(dirname).st_uid).pw_name except Exception: username = info['username'] # Change to read only. try: os.chmod(fname, 0o660) except Exception as e: sys.stderr.write(str(e) + '\n') sys.stderr.flush() # Copy the original file to a pasteur backup. if getpass.getuser() == 'pasteur': pasteur_path = fname.replace('raw_data', 'pasteur_backup') dname = os.path.dirname(pasteur_path) if not os.path.exists(dname): os.makedirs(dname) try: shutil.copy(fname, pasteur_path) except IOError as e: readonly_files[username].add(dirname) continue # Get a lock on the mzml file to prevent interference. try: fid = open(fname, 'r') fcntl.flock(fid, fcntl.LOCK_EX | fcntl.LOCK_NB) except IOError: fid.close() msg = '%s already converting in another process\n' % fname sys.stderr.write(msg) sys.stderr.flush() continue # Convert to HDF and store the entry in the database. try: hdf5_file = fname.replace('mzML', 'h5') #Get Acquisition Time Here acquisition_time = get_acqtime_from_mzml(fname) mzml_to_hdf(fname, hdf5_file, True) os.chmod(hdf5_file, 0o660) description = info['experiment'] + ' ' + info['path'] ctime = os.stat(fname).st_ctime # Add this to the database unless it is already there try: runs = retrieve('lcmsrun', username='******', mzml_file=fname) except Exception: runs = list() if not len(runs): run = LcmsRun(name=info['path'], description=description, username=info['username'], experiment=info['experiment'], creation_time=ctime, last_modified=ctime, mzml_file=fname, hdf5_file=hdf5_file, acquisition_time = acquisition_time) store(run) except Exception as e: if 'exists but it can not be written' in str(e): readonly_files[username].add(dirname) else: msg = traceback.format_exception(*sys.exc_info()) msg.insert(0, 'Cannot convert %s' % fname) other_errors[info['username']].append('\n'.join(msg)) sys.stderr.write(str(e) + '\n') sys.stderr.flush() try: os.remove(hdf5_file) except: pass finally: fid.close() # Handle errors. from metatlas.metatlas_objects import find_invalid_runs invalid_runs = find_invalid_runs(_override=True) if readonly_files: for (username, dirnames) in readonly_files.items(): body = ("Please log in to NERSC and run 'chmod 777' on the " "following directories:\n%s" % ('\n'.join(dirnames))) send_mail('Metatlas Files are Inaccessible', username, body) if invalid_runs: grouped = defaultdict(list) for run in invalid_runs: grouped[run.username].append(run.mzml_file) for (username, filenames) in grouped.items(): body = 'You have runs that are not longer accessible\n' body += 'To remove them from the database, run the following on ipython.nersc.gov:\n\n' body += 'from metatlas.metatlas_objects import find_invalid_runs, remove_objects\n' body += 'remove_objects(find_invalid_runs())\n\n' body += 'The invalid runs are:\n%s' % ('\n'.join(filenames)) send_mail('Metatlas Runs are Invalid', username, body) if other_errors: for (username, errors) in other_errors.items(): body = 'Errored files found while loading in Metatlas files:\n\n%s' % '\n********************************\n'.join(errors) send_mail('Errors loading Metatlas files', username, body) sys.stdout.write('Done!\n') sys.stdout.flush()