Beispiel #1
0
    def __init__(self, extract_file=True):

        self.url = settings.get('lipidmaps_url')
        self.fname = settings.get('lipidmaps_fname')
        self.curl = _curl.Curl(self.url, large=True, silent=False)

        if extract_file:

            self.efname = os.path.join(settings.get('cachedir'),
                                       self.fname.split('/')[-1])

            with open(self.efname, 'wb') as efp:

                for l in self.curl.result[self.fname]:

                    efp.write(l)

            efp = open(
                os.path.join(settings.get('cachedir'),
                             self.fname.split('/')[-1]), 'rb')
            sdf.SdfReader.__init__(self, efp)

        else:
            sdf.SdfReader.__init__(self, self.curl.result[self.fname])

        self.nameproc = lipidname.LipidNameProcessor(database='lipidmaps',
                                                     iso=True)
Beispiel #2
0
    def __init__(self,
                 fname,
                 label=None,
                 charge=1,
                 rt_tolerance=None,
                 drift=1.0,
                 tolerance=None):
        """
        Provides methods for looking up MS2 scans from an MGF file.
        """

        session.Logger.__init__(self, name='mgf')

        self.fname = fname
        self.label = label
        self.charge = charge
        self.rt_tolerance = rt_tolerance or settings.get('deltart_threshold')
        self.drift = drift
        self.index()
        self.ms2_rt_within_range = settings.get('ms2_rt_within_range')
        self.tolerance = (tolerance
                          or settings.get('precursor_match_tolerance'))

        self._log('MGF reader initialized for file `%s`, '
                  'looking up MS2 spectra for precursor features %s, '
                  'with a mass tolerance of %.01f ppm.' % (
                      self.fname,
                      (' in RT range +/-%.02f' % self.rt_tolerance)
                      if self.ms2_rt_within_range else 'with ignoring RT',
                      self.tolerance,
                  ))
Beispiel #3
0
    def test_sec_profile_1(self):
        """ """

        peakspath = settings.get('peaks_gltpd1_invitro')
        secpath = settings.get('sec_gltpd1_invitro')

        reader = sample.SampleReader(
            input_type='peaks',
            fname=peakspath,
        )

        samples = reader.get_sampleset(
            sample_id_proc=sampleattrs.plate_sample_id_processor(), )

        secprofile = sampleattrs.SECProfile(
            sec_path=secpath,
            samples=samples,
        )

        assert secprofile.numof_samples == samples.numof_samples
        assert np.all(secprofile.attrs.sample_index_to_id ==
                      samples.attrs.sample_index_to_id)
        assert id(samples) in secprofile._sample_data
        assert id(secprofile) in samples._sample_data
        assert secprofile.profile.max() - 143.40397368421048 < 0.0001
Beispiel #4
0
    def test_sec_profile_2(self):
        """ """

        peakspath = settings.get('peaks_gltpd1_invivo')
        secpath = settings.get('sec_gltpd1_invivo')

        reader = sample.SampleReader(
            input_type='peaks',
            fname=peakspath,
        )

        samples = reader.get_sampleset(
            sample_id_proc=sampleattrs.plate_sample_id_processor(), )

        secprofile = sampleattrs.SECProfile(
            sec_path=secpath,
            samples=samples,
            start_volume=1.2,
            offsets=(0.015, 0.045),
            start_col=9,
            start_row='A',
            length=samples.numof_samples,
        )

        assert secprofile.numof_samples == samples.numof_samples
        assert np.all(secprofile.attrs.sample_index_to_id ==
                      samples.attrs.sample_index_to_id)
        assert id(samples) in secprofile._sample_data
        assert id(secprofile) in samples._sample_data
        assert secprofile.profile015.max() - 20.92373913043478 < 0.0001
        assert secprofile.profile045.argmax() == 2
        assert secprofile.profile015.argmax() == 3
        assert secprofile.profiles == {'profile045', 'profile015'}
Beispiel #5
0
    def __init__(
        self,
        resources=None,
        tolerance=None,
        fa_args=None,
        sph_args=None,
        build=True,
        verbose=False,
        database_preference=None,
    ):
        """
        Builds a database of molecules and provides methods for look up by
        masses and names. Metabolites are processed from databases like
        SwissLipids and LipidMaps and also autogenerated using classes
        defined in the `lipid` module.
        
        Args
        ----
        resources : dict
            Databases to use with arguments. Keys are database names, values
            are tuples of classes and arguments. Default SwissLipids and
            LipidMaps.
        tolerance : int
            Mass lookup tolerance in ppm.
        fa_args : dict
            Fatty acyl arguments for autogenerated metabolites.
        sph_args : dict
            Sphingosine base arguments for autogenerated metabolites.
        """

        self.verbose = verbose
        self.resources = resources or {
            'SwissLipids': (SwissLipids, {}),
            'LipidMaps': (LipidMaps, {})
        }

        self.tolerance = tolerance or settings.get('ms1_tolerance')
        self._daltons_tolerance = False

        self.fa_args = fa_args or {'c': (4, 36), 'u': (0, 10)}
        self.sph_args = sph_args or {'c': (16, 22), 'u': (0, 1)}

        self.database_preference = (database_preference
                                    or settings.get('database_preference'))

        if build:

            self.build()
Beispiel #6
0
    def __init__(
        self,
        mzs,
        ionmode,
        precursor=None,
        tolerance=None,
    ):
        """
        Annotates all fragments in MS2 scan with possible identites.
        
        Args
        ----
        :param np.ndarray mzs:
            MS2 scan fragment m/z's.
        :param str ionmode:
            MS ion mode; `pos` or `neg`.
        :param float precursor:
            Precursor ion m/z.
        :param tuple of arrays
        """

        self.mzs = mzs
        self.ionmode = ionmode
        self.precursor = precursor
        self.tolerance = tolerance or settings.get('ms2_tolerance')
Beispiel #7
0
    def get_default_file(self):
        """Returns the file name of the default fragment lists.
        
        These are stored in the `pfragmentsfile` and `nfragmentsfile`
        settings for positive and negative ion modes, respectively.
        The fragment list files should have at least 4 columns:
        * m/z as float
        * formula -- either formula or m/z should be provided,
        mass calculation from formula has priority over the
        mass in first column
        * human readable name
        * type: e.g. `[M+H]+`; importantly, for neutral losses
        this value must start with `NL`
        * headgroups (lipid classes), e.g.`PC;SM`
        
        See the built in fragment lists for examples.

        Parameters
        ----------

        Returns
        -------

        """

        return settings.get('%sfragmentsfile' %
                            ('p' if self.ionmode == 'pos' else 'n'))
Beispiel #8
0
    def mz_lowest_error_from_name(
        self,
        measured_mz,
        adduct,
        name=None,
        **kwargs,
    ):
        """
        Regarding a measured m/z and an assumed adduct type and name
        returns the m/z of the record with matching name and lowest error.
        """

        exmasses = self.masses_from_name(name=name, **kwargs)

        if exmasses is not None:

            adduct_method = (settings.get('ex2ad_all')[adduct])

            addmasses = np.array([
                getattr(formula.Formula(exmass), adduct_method)()
                for exmass in exmasses
            ])

            ppms = np.array(
                [common.ppm(addmass, measured_mz) for addmass in addmasses])

            return addmasses[np.argmin(np.abs(ppms))]
Beispiel #9
0
def new_logger(name=None, logdir=None, verbosity=None, **kwargs):
    """
    Returns a new logger with default settings (can be customized).

    Parameters
    ----------
    name : str
        Custom name for the log.
    logdir : str
        Path to the directoty to store log files.
    verbosity : int
        Verbosity level, lowest is 0. Messages from levels above this
        won't be written to the log..

    Returns
    -------
    ``log.Logger`` instance.
    """

    name = name or settings.get('module_name')
    logdir = logdir or '%s_log' % name

    return Logger(
        fname='%s__%s.log' % (
            name,
            Logger.timestamp().replace(' ', '_').replace(':', '.'),
        ),
        verbosity=0,
        logdir=logdir,
        **kwargs,
    )
Beispiel #10
0
    def cache_dir_exists(self):

        if self.cache_dir is None:

            self.cache_dir = settings.get('cachedir')

        if not os.path.exists(self.cache_dir):

            os.makedirs(self.cache_dir)
Beispiel #11
0
    def __init__(
        self,
        fname,
        verbosity=None,
        console_level=None,
        logdir=None,
        max_width=200,
    ):
        """
        fname : str
            Log file name.
        logdir : name
            Path to the directory containing the log files.
        verbosity : int
            Messages at and below this level will be written into the
            logfile. All other messages will be dropped.
        console_level : int
            Messages below this log level will be printed not only into
            logfile but also to the console.
        """
        @_log_flush_timeloop.job(interval=datetime.timedelta(
            seconds=settings.get('log_flush_interval')))
        def _flush():

            self.flush()

        _log_flush_timeloop.start(block=False)

        self.wrapper = textwrap.TextWrapper(
            width=max_width,
            subsequent_indent=' ' * 22,
            break_long_words=False,
        )
        self.logdir = self.get_logdir(logdir)
        self.fname = os.path.join(self.logdir, fname)
        self.verbosity = (verbosity if verbosity is not None else
                          settings.get('log_verbosity'))
        self.console_level = (console_level if console_level is not None else
                              settings.get('console_verbosity'))
        self.open_logfile()

        # sending some greetings
        self.msg('Welcome!')
        self.msg('Logger started, logging into `%s`.' % self.fname)
Beispiel #12
0
    def get_logdir(self, dirname=None):
        """
        Returns the path to log directory.
        Also creates the directory if does not exist.
        """

        dirname = dirname or '%s_log' % settings.get('module_name')

        os.makedirs(dirname, exist_ok=True)

        return dirname
Beispiel #13
0
    def __init__(self,
                 database='swisslipids',
                 with_alcohols=True,
                 with_coa=True,
                 iso=False):
        """
        Processes lipid names used in databases. Converts names to the
        standard used in this module and extracts carbon count and
        unsaturation information and other features.
        """

        self.database = database.lower()
        self.with_alcohols = with_alcohols
        self.with_coa = with_coa
        self.iso = iso
        self.lipnamesf = settings.get('lipnamesf')
        self.adducts_constraints = settings.get('adducts_constraints')

        self.gen_fa_greek()
        self.read_lipid_names()
Beispiel #14
0
    def test_sec_unicorn_asc(self):
        """ """

        path = settings.get('sec_unicorn_example')
        reader = sec.SECReader(path)

        highest = sorted(
            reader.profile(),
            key=lambda fr: fr.mean,
            reverse=True,
        )[0]

        assert highest.row == 'A' and highest.col == 10
Beispiel #15
0
    def test_protein_containing_samples(self):
        """ """

        peakspath = settings.get('peaks_gltpd1_invivo')
        secpath = settings.get('sec_gltpd1_invivo')

        reader = sample.SampleReader(
            input_type='peaks',
            fname=peakspath,
        )

        samples = reader.get_sampleset(
            sample_id_proc=sampleattrs.plate_sample_id_processor(), )

        secprofile = sampleattrs.SECProfile(
            sec_path=secpath,
            samples=samples,
            start_volume=1.2,
            offsets=(0.015, 0.045),
            start_col=9,
            start_row='A',
            length=samples.numof_samples,
        )

        pcs = secprofile.protein_containing_samples()

        assert np.all(
            pcs.selection == np.array([False, False, True, True, False]))

        pcs = secprofile.protein_containing_samples(manual=['A10', 'A11'])

        assert np.all(
            pcs.selection == np.array([False, True, True, False, False]))

        pcs = secprofile.protein_containing_samples(exclude=['A12'])

        assert np.all(
            pcs.selection == np.array([False, False, True, False, False]))
Beispiel #16
0
    def test_sampleset_from_peaks(self):
        """ """

        peaksfile = settings.get('peaks_example')

        reader = sample.SampleReader(input_type='peaks', fname=peaksfile)

        samples = reader.get_sampleset(
            sampleset_args={
                'sample_id_proc': sampleattrs.plate_sample_id_processor(),
            })

        assert abs(samples.mzs_by_sample[7, 3] - 375.0018) < 0.0001
        assert samples.attrs.sample_index_to_id[-1] == ('A', 12)
        assert samples.attrs.attrs[0].attrs['label']['sample_id'] == ('A', 6)
Beispiel #17
0
    def mz_from_name(
        self,
        adduct,
        name=None,
        database_preference=None,
        **kwargs,
    ):

        exmass = self.mass_from_name(
            name=name,
            database_preference=database_preference,
            **kwargs,
        )

        if exmass is not None:

            adduct_method = (settings.get('ex2ad_all')[adduct])

            return getattr(formula.Formula(exmass), adduct_method)()
Beispiel #18
0
    def test_sec_xls(self):
        """ """

        path = settings.get('sec_xls_example')
        reader = sec.SECReader(path)

        highest015 = sorted(
            reader.profile(start_volume=0.615),
            key=lambda fr: fr.mean,
            reverse=True,
        )[0]

        highest045 = sorted(
            reader.profile(start_volume=0.645),
            key=lambda fr: fr.mean,
            reverse=True,
        )[0]

        assert highest015.row == 'A' and highest015.col == 12
        assert highest045.row == 'A' and highest045.col == 11
Beispiel #19
0
    def set_paths(self):

        # default name for all files:
        # name of the input mzML with the path and extension removed
        if not hasattr(self, 'name'):

            input_file = self.profile_mzml or self.centroid_mzml

            self.name = os.path.splitext(os.path.basename(input_file))[0]

        # the working directory
        self.wd_root = self.wd_root or settings.get('ms_preproc_wd')
        self.wd = os.path.join(self.wd_root, self.name)
        os.makedirs(self.wd, exist_ok=True)

        self.centroid_mzml = self.centroid_mzml or '%s__peaks.mzML' % self.name
        self.centroid_mzml = os.path.join(self.wd, self.centroid_mzml)

        self.features_file = (self.features_file
                              or '%s__features.featureXML' % self.name)
        self.features_file = os.path.join(self.wd, self.features_file)
Beispiel #20
0
    def __init__(self,
                 levels=set(['Species']),
                 silent=False,
                 nameproc_args=None,
                 branched=False,
                 exact_mass_formula_fallback=True):
        """
        Downloads and serves the SwissLipids database.
        
        Automatically downloads the data at the first time and stores it in a
        cache file to be read from there at next usage. Scans the entire file
        and builds multiple indices in order to quickly access records upon
        request. Provides a number of methods to retrieve records either as
        lines or openbabel OBMol instances.
        
        Args
        ----
        :param set levels:
            Levels in SwissLipids hierarchy. By default only "species".
        :param bool branched:
            Include lipids with branched alkyl chain (iso).
        :param dict nameproc_args:
            Arguments passed to the name processor.
        :param bool exact_mass_formula_fallback:
            If exact mass not available form SwissLipids calculate it from
            the formula. This is dangerous because the formula is sometimes
            dehydrogenated and charged state while exact mass should be
            uncharged with all hydrogenes
        """

        self.silent = silent
        self.exact_mass_formula_fallback = exact_mass_formula_fallback
        self.nameproc_args = nameproc_args or {}
        self.set_levels(levels)
        self.url = settings.get('swisslipids_url')
        self.load()
        self.make_index()
Beispiel #21
0
    def adduct_lookup(
        self,
        mz,
        adducts=None,
        ionmode=None,
        charge=None,
        adduct_constraints=True,
        tolerance=None,
    ):
        """Does a series of lookups in the database assuming various adducts.
        Calculates the exact mass for the m/z for each possible adduct
        and searches these exact masses in the database.
        
        Returns a dict of tuples with 3-3 numpy arrays.
        Keys of the dict are adduct types. The arrays are exact masses,
        database record details and accuracies (ppm).

        Parameters
        ----------
        mz :
            
        adducts :
             (Default value = None)
        ionmode :
             (Default value = None)
        charge :
             (Default value = None)
        adduct_constraints :
             (Default value = True)
        tolerance :
             (Default value = None)

        Returns
        -------

        """

        result = {}

        mz = mzmod.Mz(mz)
        charge = (charge
                  if charge is not None else 1 if ionmode == 'pos' else -1)

        if not adducts and ionmode in {'pos', 'neg'}:

            # we look up all adducts we have a method for
            adducts = list(settings.get('ex2ad')[abs(charge)][ionmode].keys())

        ad_default = settings.get('adducts_default')[ionmode][abs(charge)]
        ad_constr = settings.get('adduct_constraints')[ionmode]

        exmethods = settings.get('ad2ex')[abs(charge)][ionmode]
        methods = dict((ad, exmethods[ad]) for ad in adducts)

        for ad, method in iteritems(methods):

            exmz = getattr(mz, method)()

            res = self.lookup_accuracy(exmz, tolerance=tolerance)

            if adduct_constraints:

                ires = tuple(
                    i for i in xrange(res[0].shape[0])
                    if ((res[1][i].hg not in ad_constr and ad in ad_default) or
                        (res[1][i].hg in ad_constr
                         and ad in ad_constr[res[1][i].hg])))

                res = (res[0][ires, ], res[1][ires, ], res[2][ires, ])

            if len(res[0]):

                result[ad] = res

        return result
Beispiel #22
0
from lipyd import settings
from lipyd import sample
from lipyd import sampleattrs

peaksfile = settings.get('peaks_example')
peaksfile = 'stard10_pos.csv'

reader = sample.SampleReader(
    input_type = 'peaks',
    fname = peaksfile
)

samples = reader.get_sampleset(
    sampleset_args = {
        'sample_id_proc': sampleattrs.plate_sample_id_processor(),
    }
)

idx = samples.mzs_by_sample[
    :,samples.attrs.sample_id_to_index[('A', 10)]
].argsort()

samples.sort_all(by = idx)

# m/z's for one sample:
samples.mzs_by_sample[:,samples.attrs.sample_id_to_index[('A', 10)]]
# intensities for one sample:
samples.intensities[:,samples.attrs.sample_id_to_index[('A', 10)]]
# RTs for one sample:
samples.rts[:,samples.attrs.sample_id_to_index[('A', 10)]]
Beispiel #23
0
# ## 9: MS2 fragment database

# Look up a negative mode fragment m/z in the database. It results an array with mass, fragment name, fragment type, aliphatic chain type, carbon count, unsaturation and charge in each row. At neutral losses the charge is 0.

# In[89]:

fragdb.lookup_neg(283.26)

# Now let's annotate an MS2 scan with possible fragment identifications. To do this we open an example MGF file included in the module. The `lipyd.mgf` module serves MS2 scans from MGF files on demand. Btw the `lipyd.settings` module gives easy access for and control over near 100 customizable parameters.

# In[104]:

from lipyd import mgf
from lipyd import settings

mgffile = settings.get('mgf_example')
mgfreader = mgf.MgfReader(mgffile)
precursor = 590.45536  # this is a Cer-1P
idx, rtdiff = mgfreader.lookup_scan_ids(precursor)

# We found the following scans for precursor 590.455:

# In[105]:

idx

# Select a scan from the ones above and annotate its fragments:

# In[106]:

scan = mgfreader.scan_by_id(1941)