Example #1
0
 def store_atlas(self, even_if_exists: bool = False) -> None:
     """
     inputs:
         even_if_exists: if True, will save the atlas even if the atlas name already is in the database
                         with your username
     side effects:
         Saves the altas to the database.
         Raises ValueError if even_if_exists==False and name is already in the database with your username
     """
     start_time = datetime.datetime.now()
     name = self.atlas.name
     username = self.ids.username
     try:
         if not even_if_exists and len(
                 metob.retrieve("Atlas", name=name, username=username)) > 0:
             raise ValueError(
                 f"An atlas with name {name} and owned by {username} already exists."
             )
     except ValueError as err:
         logger.exception(err)
         raise err
     metob.store(self.atlas)
     logger.info(
         "Atlas %s stored in database with owner %s in %s.",
         self.ids.atlas,
         self.ids.username,
         _duration_since(start_time),
     )
Example #2
0
def test_floating_point(sqlite):
    compound = mo.Compound(name="foo", mono_isotopic_molecular_weight=1.0)
    mo.store(compound)
    compound.mono_isotopic_molecular_weight = 1.000007
    mo.store(compound)
    test = mo.retrieve("compound", name="foo")[-1]
    assert test.mono_isotopic_molecular_weight == 1.000007, test.mono_isotopic_molecular_weight
Example #3
0
 def set_rt(self, compound_idx: int, which: str, time: float) -> None:
     """
     inputs:
         compound_idx: index of of compound to update
         which: 'rt_min', 'rt_max', or 'rt_peak'
         time: a floating point value for the number of minutes
     updates the RT value in database, self.atlas, self.atlas_df, self.data
     so that no datastructures need to be invalidated
     """
     try:
         if self.atlas is None:
             raise ValueError("Cannot set RTs when atlas is None.")
     except ValueError as err:
         logger.exception(err)
         raise err
     assert which in ["rt_min", "rt_peak", "rt_max"]
     atlas_rt_ref = self.atlas.compound_identifications[
         compound_idx].rt_references[0]
     setattr(atlas_rt_ref, which, time)
     for sample in self.data:
         setattr(sample[compound_idx]["identification"].rt_references[0],
                 which, time)
     self.atlas_df.loc[compound_idx, which] = time
     metob.store(atlas_rt_ref)
     if which in ["rt_min", "rt_max"]:
         self._hits_valid_for_rt_bounds = False
         self._data_valid_for_rt_bounds = False
Example #4
0
def test_store_atlas01(atlas, sqlite, username):
    atlas.name = "test_store_atlas01"
    atlas_list = metob.retrieve("Atlas", name=atlas.name, username=username)
    assert len(atlas_list) == 0
    metob.store(atlas)
    second = metob.retrieve("Atlas", name=atlas.name, username=username)
    assert len(second) == 1
Example #5
0
def test_nested(sqlite):
    test = mo.Group(items=[mo.Group(items=[mo.LcmsRun()]), mo.LcmsRun()])
    assert len(test.items) == 2
    test.items[1].name = "hello"
    orig_sub_version = test.items[1].unique_id
    assert len(test.items) == 2
    mo.store(test)
    assert test.items[1].unique_id == orig_sub_version
Example #6
0
def test_get_latest():
    test = mo.Compound(name="hello")
    mo.store(test)
    test.name = "goodbye"
    mo.store(test)
    test = mo.retrieve("compound", unique_id=test.unique_id)
    assert len(test) == 1
    assert test[0].name == "goodbye"
Example #7
0
def test_retrieve_head():
    test = mo.LcmsRun(name="foo")
    mo.store(test)
    old = len(mo.retrieve("lcmsrun", name="foo"))
    test.name = "bar"
    mo.store(test)
    new = len(mo.retrieve("lcmsrun", name="foo"))
    assert new == old
Example #8
0
def test_store_all(sqlite):
    items = []
    for klass in metoh.Workspace.get_instance().subclass_lut.values():
        items.append(klass())
    mo.store(items)
    for klass in metoh.Workspace.get_instance().subclass_lut.values():
        name = klass.__name__
        assert len(mo.retrieve(name)) > 0
Example #9
0
def test_retrieve01(sqlite):
    compound = mo.Compound(name="foo",
                           inchi=ADENOSINE_INCHI,
                           inchi_key="foobar")
    mo.store(compound)
    assert mo.retrieve("Compounds", inchi_key=[], username="******") == []
    assert mo.retrieve("Compounds", inchi=[ADENOSINE_INCHI],
                       username="******")[0].inchi == ADENOSINE_INCHI
Example #10
0
def test_simple_query(sqlite):
    test1 = mo.LcmsRun(name="First")
    first_version = test1.unique_id
    test1.description = "Hey there"
    mo.store(test1)
    assert test1.unique_id == first_version
    items = mo.retrieve("lcmsrun", name="First")
    assert items[-1].unique_id == test1.unique_id
    assert all((i.unique_id != first_version for i in items[:-1]))
Example #11
0
def test_unique_links(sqlite):
    test = mo.Group(items=[mo.Group(items=[mo.LcmsRun()]), mo.LcmsRun()])
    sub_version = test.items[1].unique_id
    test.items = [test.items[1]]
    mo.store(test)

    test.items = []
    test = mo.retrieve("group", unique_id=test.unique_id)[0]
    assert len(test.items) == 1, len(test.items)
    assert test.items[0].unique_id == sub_version
Example #12
0
def test_simple(sqlite):
    test = mo.Group()
    uid = test.unique_id
    mo.store(test)
    assert test.unique_id == uid
    assert test.prev_uid != ""
    test.name = "hello"
    mo.store(test)
    assert test.unique_id == uid
    assert test.prev_uid != ""
Example #13
0
def test_glob_query(sqlite):
    test1 = mo.LcmsRun(name="First")
    test2 = mo.LcmsRun(name="Second")
    test3 = mo.LcmsRun(name="Third")
    mo.store([test1, test2, test3])
    items = mo.retrieve("lcmsrun", name="Fir%")
    assert items[-1].unique_id == test1.unique_id
    items = mo.retrieve("lcmsrun", name="%econd")
    assert items[-1].unique_id == test2.unique_id
    items = mo.retrieve("LcmsRuns", name="T%ir%")
    assert items[-1].unique_id == test3.unique_id
Example #14
0
def test_circular_reference(sqlite):
    test = mo.Group(items=[mo.Group(items=[mo.LcmsRun()]), mo.LcmsRun()])
    orig_id = test.unique_id
    test.items[0].items.append(test)
    mo.store(test)
    test.items = []
    test = mo.retrieve("group", unique_id=test.unique_id)[0]
    sub0 = test.items[0]
    assert len(sub0.items) == 2, sub0.items
    assert sub0.items[1].unique_id == orig_id
    assert test.unique_id == orig_id
Example #15
0
def test_preserve_provenance(sqlite):
    test = mo.Group(items=[mo.Group(items=[mo.LcmsRun()]), mo.LcmsRun()])
    test2 = mo.Group(items=[mo.Group(items=[mo.LcmsRun()]), mo.LcmsRun()])
    mo.store([test, test2])
    assert len(test.items) == 2
    test.items = []
    test2.items = []
    mo.store([test, test2])
    assert len(test.items) == 0
    previous = mo.retrieve("group", unique_id=test.prev_uid)[0]
    assert len(previous.items) == 2, repr(previous)
Example #16
0
def test_remove_objects(sqlite):
    group = mo.Group(name="foo",
                     items=[mo.Group(name="baz", description="hello")])
    sub_id = group.items[0].unique_id
    mo.store(group)
    first = mo.retrieve("groups", unique_id=sub_id)[0]
    assert first.unique_id == sub_id
    mo.remove_objects(group, _override=True)
    test = mo.retrieve("groups", name="foo")
    assert not test
    test_sub = mo.retrieve("groups_items", target_id=sub_id)
    assert not test_sub
Example #17
0
def test_user_preserve(sqlite):
    run = mo.LcmsRun(username="******")
    test = mo.Reference(name="hello", username="******", lcms_run=run)
    orig_id = test.unique_id
    mo.store(test, _override=True)
    assert test.unique_id == orig_id
    mo.store(test)
    assert test.unique_id != orig_id
    items = mo.retrieve("reference", username="******", name="hello")
    username = getpass.getuser()
    assert items[-2].username == "foo"
    assert items[-1].username == username
    assert items[-2].lcms_run.username == "foo"
    assert items[-1].lcms_run.username == "foo"
Example #18
0
def test_recover(sqlite):
    test = mo.Group(items=[mo.Group(items=[mo.LcmsRun()]), mo.LcmsRun()])
    test.name = "howdy"
    top_version = test.unique_id
    sub_version = test.items[1].unique_id

    mo.store(test)
    mo.store(test)  # should have no effect
    assert len(test.items) == 2
    assert test.unique_id == top_version

    # make sure we can recover the previous version
    test.items = []
    assert test.unique_id == top_version
    test = mo.retrieve("group", unique_id=top_version)[0]
    assert test.unique_id == top_version
    assert len(test.items) == 2, len(test.items)
    assert test.unique_id == top_version
    assert test.items[1].unique_id == sub_version
Example #19
0
 def set_note(self, compound_idx: int, which: str, value: str) -> None:
     """
     inputs:
         compound_idx: index of of compound to update
         which: 'ms1_notes', 'ms2_notes' or 'identification_notes'
         value: a string with the note content
     updates the notes value in database, self.atlas, self.atlas_df, self.data
     so that no datastructures need to be invalidated
     """
     try:
         if self.atlas is None:
             raise ValueError("Cannot set notes when atlas is None.")
     except ValueError as err:
         logger.exception(err)
         raise err
     assert which in ["ms1_notes", "ms2_notes", "identification_notes"]
     atlas_cid = self.atlas.compound_identifications[compound_idx]
     setattr(atlas_cid, which, value)
     data_cid = self.data[0][compound_idx]["identification"]
     setattr(data_cid, which, value)
     self.atlas_df.loc[compound_idx, which] = value
     metob.store(atlas_cid)
Example #20
0
def create_c18_stds_atlases(source: os.PathLike,
                            polarity: str,
                            mz_tolerance: float = 10) -> None:
    data = pd.read_csv(source, sep="\t")
    std_inchi_keys = {
        "Phenylalanine": "COLNVLDHVKWLRT-QMMMGPOBSA-N",
        "L-Tryptophan": "QIVBCDIJIAJPQS-SECBINFHSA-N",
        "Salicylic acid": "YGSDEFSMJLZEOE-UHFFFAOYSA-N",
        # this one will not be found in c18_data...
        "2-Amino-3-bromo-5-methylbenzoic acid": "LCMZECCEEOQWLQ-UHFFFAOYSA-N",
    }
    abmba = "2-Amino-3-bromo-5-methylbenzoic acid"
    more_rows = pd.DataFrame({
        "inchi_key": [std_inchi_keys[abmba]],
        "label": [abmba],
        "adduct": ["[M+H]+" if polarity == "positive" else "[M-H]-"],
        "polarity": [polarity],
        "rt_min": [4.5],
        "rt_peak": [4.7],
        "rt_max": [4.9],
        "mz":
        [228.97384 + (1.00727647 * (1 if polarity == "positive" else -1))],
        "confidence_category":
        "Platinum",
    })
    if more_rows is not None:
        data = data.append(more_rows)
    acceptable = data[data["inchi_key"].isin(std_inchi_keys.values())]
    by_polarity = acceptable[acceptable["polarity"] == polarity]
    by_polarity = by_polarity.assign(label=None)
    by_polarity["rank"] = by_polarity["confidence_category"] == "Platinum"
    single = by_polarity.loc[by_polarity.groupby(["inchi_key"
                                                  ])["rank"].idxmax()]
    name = f"C18_{datetime.today().strftime('%Y%m%d')}_QC_{polarity[:3].upper()}"
    atlas = make_atlas_from_df(single, name, polarity, mz_tolerance)
    metob.store(atlas)
Example #21
0
def test_store_stubs(sqlite):
    test = mo.Group(items=[mo.Group(items=[mo.LcmsRun()]), mo.LcmsRun()])
    mo.store(test)
    test = mo.retrieve("group", unique_id=test.unique_id)[0]
    assert isinstance(test.items[0], mo.Group)
    mo.store(test)
Example #22
0
def convert(file):
    ind = file[0]
    fname = file[1]

    sys.stdout.write('(%s): %s\n' % (ind + 1, fname))
    sys.stdout.flush()

    # Get relevant information about the file.
    info = patt.match(os.path.abspath(fname))
    if info:
        info = info.groupdict()
    else:
        sys.stdout.write("Invalid path name: %s\n" % fname)
        sys.stdout.flush()
        return
    dirname = os.path.dirname(fname)
    try:
        username = pwd.getpwuid(os.stat(fname).st_uid).pw_name
    except OSError:
        try:
            username = pwd.getpwuid(os.stat(dirname).st_uid).pw_name
        except Exception:
            username = info['username']

    # Change to read only.
    try:
        os.chmod(fname, 0o660)
    except Exception as e:
        sys.stderr.write(str(e) + '\n')
        sys.stderr.flush()


#     # Copy the original file to a pasteur backup.
#     if os.environ['USER'] == 'pasteur':
#         pasteur_path = fname.replace('raw_data', 'pasteur_backup')
#         dname = os.path.dirname(pasteur_path)
#         if not os.path.exists(dname):
#             os.makedirs(dname)
#         try:
#             shutil.copy(fname, pasteur_path)
#         except IOError as e:
#             if (username not in readonly_files):
#                 readonly_files[username] = set()
#             readonly_files[username].add(dirname)
#             return

# Get a lock on the mzml file to prevent interference.
    try:
        fid = open(fname, 'r')
        fcntl.flock(fid, fcntl.LOCK_EX | fcntl.LOCK_NB)
    except IOError:
        fid.close()
        msg = '%s already converting in another process\n' % fname
        sys.stderr.write(msg)
        sys.stderr.flush()
        return

    # Convert to HDF and store the entry in the database.
    try:
        hdf5_file = fname.replace('mzML', 'h5')
        sys.stderr.write('hdf5file is: %s' % hdf5_file)
        #Get Acquisition Time Here
        acquisition_time = get_acqtime_from_mzml(fname)
        mzml_to_hdf(fname, hdf5_file, True)
        os.chmod(hdf5_file, 0o660)
        description = info['experiment'] + ' ' + info['path']
        ctime = os.stat(fname).st_ctime
        # Add this to the database unless it is already there
        try:
            runs = retrieve('lcmsrun', username='******', mzml_file=fname)
        except Exception:
            runs = list()
        if not len(runs):
            run = LcmsRun(name=info['path'],
                          description=description,
                          username=info['username'],
                          experiment=info['experiment'],
                          creation_time=ctime,
                          last_modified=ctime,
                          mzml_file=fname,
                          hdf5_file=hdf5_file,
                          acquisition_time=acquisition_time)
            store(run)
    except Exception as e:
        if 'exists but it can not be written' in str(e):
            if (username not in readonly_files):
                readonly_files[username] = set()
            readonly_files[username].add(dirname)
        else:
            msg = traceback.format_exception(*sys.exc_info())
            msg.insert(0, 'Cannot convert %s' % fname)
            dat = info['username']
            if (dat not in other_errors):
                other_errors[info['username']] = list()
            other_errors[info['username']].append('\n'.join(msg))
        sys.stderr.write(str(e) + '\n')
        sys.stderr.flush()
        try:
            os.remove(hdf5_file)
        except:
            pass
    finally:
        fid.close()
Example #23
0
def test_escape_glob(sqlite):
    test1 = mo.LcmsRun(description="Flow %")
    mo.store(test1)
    items = mo.retrieve("lcmsrun", description="Flow %%")
    assert items[-1].unique_id == test1.unique_id
Example #24
0
def test_id_grade_trait(sqlite):
    id_grade = mo.IdentificationGrade(name="E")
    mo.store(id_grade)
    cid = mo.CompoundIdentification(identification_grade="e")
    assert cid.identification_grade.unique_id == id_grade.unique_id
Example #25
0
def convert(ind, fname):
    """Helper function, converts a single file"""
    logger.info("Converting file number %d: %s", ind + 1, fname)

    # Get relevant information about the file.
    username = _file_name_to_username(fname, DEFAULT_USERNAME)
    info = patt.match(os.path.abspath(fname))
    if info:
        info = info.groupdict()
    else:
        logger.error("Invalid path name: %s", fname)
        return
    dirname = os.path.dirname(fname)

    # Convert to HDF and store the entry in the database.
    try:
        hdf5_file = fname.replace('mzML', 'h5')
        logger.info("Generating h5 file: %s", hdf5_file)
        mzml_to_hdf(fname, hdf5_file, True)
        os.chmod(
            hdf5_file, 0o660
        )  # this can be changed to 0o440 once everyone is on the current code
        # Add this to the database unless it is already there
        try:
            runs = retrieve('lcmsrun', username='******', mzml_file=fname)
        except Exception:
            runs = []
        if not runs:
            ctime = os.stat(fname).st_ctime
            logger.info("LCMS run not in DB, inserting new entry.")
            run = LcmsRun(name=info['path'],
                          description=f"{info['experiment']} {info['path']}",
                          username=username,
                          experiment=info['experiment'],
                          creation_time=ctime,
                          last_modified=ctime,
                          mzml_file=fname,
                          hdf5_file=hdf5_file,
                          acquisition_time=get_acqtime_from_mzml(fname))
            store(run)
    except Exception as e:
        logger.error("During file conversion: %s", str(e))
        if 'exists but it can not be written' in str(e):
            logger.error("Cannot write to file within directory %s", dirname)
            if username not in readonly_files:
                readonly_files[username] = set()
            readonly_files[username].add(dirname)
        else:
            msg = traceback.format_exception(*sys.exc_info())
            msg.insert(0, f"Cannot convert {fname}")
            dat = username
            if dat not in other_errors:
                other_errors[username] = []
            other_errors[username].append('\n'.join(msg))
            fail_path = fname.replace('raw_data', 'conversion_failures')
            logger.error("Moving mzml file to %s", fail_path)
            move_file(fname, fail_path)
        try:
            os.remove(hdf5_file)
        except:
            pass
Example #26
0
def create_c18_template_atlases(source: os.PathLike, polarity: str) -> None:
    assert polarity in ["negative", "positive"]
    name = f"C18_{datetime.today().strftime('%Y%m%d')}_TPL_{polarity[:3].upper()}"
    new_atlas = generate_template_atlas(source, ["Gold", "Platinum"], polarity,
                                        name)
    metob.store(new_atlas)
Example #27
0
def test_stub_instance(sqlite):
    run = mo.LcmsRun(username="******")
    test = mo.Reference(name="hello", lcms_run=run)
    mo.store(test, _override=True)
    item = mo.retrieve("reference", name="hello")[0]
    assert isinstance(item.lcms_run, mo.LcmsRun)
Example #28
0
def test_store_atlas06(atlas, sqlite_with_atlas, username):
    atlas.name = "test atlas 06"
    metob.store(atlas)
    second = metob.retrieve("Atlas", name=atlas.name, username=username)
    assert len(second) == 1
Example #29
0
def test_store_atlas07(atlas, sqlite, username):
    atlas.name = "test_store_atlas07"
    metob.store(atlas)
    metoh.Workspace.instance = None
    atlases = metob.retrieve("Atlas", name=atlas.name, username=username)
    assert len(atlases) == 1
Example #30
0
def update_metatlas(directory):
    readonly_files = defaultdict(set)
    other_errors = defaultdict(list)
    directory = os.path.abspath(directory)

    # Sleep a random amount of time to avoid running at the same time as
    # other processes.
    time.sleep(random.random() * 2)
    mzml_files = check_output('find %s -name "*.mzML"' % directory, shell=True)
    mzml_files = mzml_files.decode('utf-8').splitlines()

    # Find valid h5 files newer than the format version timestamp.
    delta = int((time.time() - VERSION_TIMESTAMP) / 60)
    check = 'find %s -name "*.h5" -mmin -%s -size +2k' % (directory, delta)
    valid_files = check_output(check, shell=True).decode('utf-8').splitlines()
    valid_files = set(valid_files)

    new_files = []
    for mzml_file in mzml_files:
        if mzml_file.replace('.mzML', '.h5') not in valid_files:
            new_files.append(mzml_file)



    patt = re.compile(r".+\/raw_data\/(?P<username>[^/]+)\/(?P<experiment>[^/]+)\/(?P<path>.+)")

    sys.stdout.write('Found %s files\n' % len(new_files))
    sys.stdout.flush()


    for (ind, fname) in enumerate(new_files):
        sys.stdout.write('(%s of %s): %s\n' % (ind + 1, len(new_files), fname))
        sys.stdout.flush()

        # Get relevant information about the file.
        info = patt.match(os.path.abspath(fname))
        if info:
            info = info.groupdict()
        else:
            sys.stdout.write("Invalid path name: %s\n" % fname)
            sys.stdout.flush()
            continue
        dirname = os.path.dirname(fname)
        try:
            username = pwd.getpwuid(os.stat(fname).st_uid).pw_name
        except OSError:
            try:
                username = pwd.getpwuid(os.stat(dirname).st_uid).pw_name
            except Exception:
                username = info['username']

        # Change to read only.
        try:
            os.chmod(fname, 0o660)
        except Exception as e:
            sys.stderr.write(str(e) + '\n')
            sys.stderr.flush()

        # Copy the original file to a pasteur backup.
        if getpass.getuser() == 'pasteur':
            pasteur_path = fname.replace('raw_data', 'pasteur_backup')
            dname = os.path.dirname(pasteur_path)
            if not os.path.exists(dname):
                os.makedirs(dname)
            try:
                shutil.copy(fname, pasteur_path)
            except IOError as e:
                readonly_files[username].add(dirname)
                continue

        # Get a lock on the mzml file to prevent interference.
        try:
            fid = open(fname, 'r')
            fcntl.flock(fid, fcntl.LOCK_EX | fcntl.LOCK_NB)
        except IOError:
            fid.close()
            msg = '%s already converting in another process\n' % fname
            sys.stderr.write(msg)
            sys.stderr.flush()
            continue

        # Convert to HDF and store the entry in the database.
        try:
            hdf5_file = fname.replace('mzML', 'h5')

            #Get Acquisition Time Here
            acquisition_time = get_acqtime_from_mzml(fname)
            mzml_to_hdf(fname, hdf5_file, True)
            os.chmod(hdf5_file, 0o660)
            description = info['experiment'] + ' ' + info['path']
            ctime = os.stat(fname).st_ctime
            # Add this to the database unless it is already there
            try:
                runs = retrieve('lcmsrun', username='******', mzml_file=fname)
            except Exception:
                runs = list()
            if not len(runs):
                run = LcmsRun(name=info['path'], description=description,
                              username=info['username'],
                              experiment=info['experiment'],
                              creation_time=ctime, last_modified=ctime,
                              mzml_file=fname, hdf5_file=hdf5_file, acquisition_time = acquisition_time)
                store(run)
        except Exception as e:
            if 'exists but it can not be written' in str(e):
                readonly_files[username].add(dirname)
            else:
                msg = traceback.format_exception(*sys.exc_info())
                msg.insert(0, 'Cannot convert %s' % fname)
                other_errors[info['username']].append('\n'.join(msg))
            sys.stderr.write(str(e) + '\n')
            sys.stderr.flush()
            try:
                os.remove(hdf5_file)
            except:
                pass
        finally:
            fid.close()

    # Handle errors.
    from metatlas.metatlas_objects import find_invalid_runs
    invalid_runs = find_invalid_runs(_override=True)

    if readonly_files:
        for (username, dirnames) in readonly_files.items():
            body = ("Please log in to NERSC and run 'chmod 777' on the "
                   "following directories:\n%s" % ('\n'.join(dirnames)))
            send_mail('Metatlas Files are Inaccessible', username, body)
    if invalid_runs:
        grouped = defaultdict(list)
        for run in invalid_runs:
            grouped[run.username].append(run.mzml_file)
        for (username, filenames) in grouped.items():
            body = 'You have runs that are not longer accessible\n'
            body += 'To remove them from the database, run the following on ipython.nersc.gov:\n\n'
            body += 'from metatlas.metatlas_objects import find_invalid_runs, remove_objects\n'
            body += 'remove_objects(find_invalid_runs())\n\n'
            body += 'The invalid runs are:\n%s' % ('\n'.join(filenames))
            send_mail('Metatlas Runs are Invalid', username, body)
    if other_errors:
        for (username, errors) in other_errors.items():
            body = 'Errored files found while loading in Metatlas files:\n\n%s' % '\n********************************\n'.join(errors)
            send_mail('Errors loading Metatlas files', username, body)
    sys.stdout.write('Done!\n')
    sys.stdout.flush()