Esempio n. 1
0
def _validate_moldb_df(df):
    errors = []
    for idx, row in df.iterrows():
        line_n = idx + 2
        for col in df.columns:
            if not row[col] or row[col].isspace():
                errors.append({
                    'line': line_n,
                    'row': row.values.tolist(),
                    'error': 'Empty value'
                })

        try:
            if '.' in row.formula:
                raise InvalidFormulaError('"." symbol not supported')
            parseSumFormula(row.formula)
        except Exception as e:
            errors.append({
                'line': line_n,
                'row': row.values.tolist(),
                'error': repr(e)
            })

    errors.sort(key=lambda d: d['line'])
    return errors
Esempio n. 2
0
    def centroids(self, formula):
        """
        Args
        -----
        formula : str

        Returns
        -----
            list[tuple]
        """
        try:
            pyisocalc.parseSumFormula(
                formula)  # tests that formula is parsable
            iso_pattern = isotopePattern(str(formula))
            iso_pattern.addCharge(int(self.charge))
            fwhm = self.sigma * SIGMA_TO_FWHM
            resolving_power = iso_pattern.masses[0] / fwhm
            instrument_model = InstrumentModel('tof', resolving_power)
            centr = iso_pattern.centroids(instrument_model)
            mzs_ = np.array(centr.masses)
            ints_ = 100. * np.array(centr.intensities)
            mzs_, ints_ = self._trim(mzs_, ints_, self.n_peaks)

            n = len(mzs_)
            mzs = np.zeros(self.n_peaks)
            mzs[:n] = np.array(mzs_)
            ints = np.zeros(self.n_peaks)
            ints[:n] = ints_

            return mzs, ints

        except Exception as e:
            logger.warning('%s - %s', formula, e)
            return None, None
Esempio n. 3
0
    def ion_centroids(self, sf, adduct):
        """
        Args
        ----
        sf : str
        adduct: str

        Returns
        -------
        : list of tuples
        """
        try:
            pyisocalc.parseSumFormula(
                sf + adduct)  # tests is the sf and adduct compatible
            iso_pattern = isotopePattern(str(sf + adduct))
            iso_pattern.addCharge(int(self.charge))
            fwhm = self.sigma * SIGMA_TO_FWHM
            resolving_power = iso_pattern.masses[0] / fwhm
            instrument_model = InstrumentModel('tof', resolving_power)
            centr = iso_pattern.centroids(instrument_model)
            mzs = np.array(centr.masses)
            ints = 100. * np.array(centr.intensities)
            mzs, ints = self._trim(mzs, ints, ISOTOPIC_PEAK_N)
            return mzs, ints

        except Exception as e:
            logger.warning('%s %s - %s', sf, adduct, e)
            return None, None
    def ion_centroids(self, sf, adduct):
        """
        Args
        ----
        sf : str
        adduct: str

        Returns
        -------
        : list of tuples
        """
        try:
            pyisocalc.parseSumFormula(sf + adduct)  # tests is the sf and adduct compatible
            iso_pattern = isotopePattern(str(sf + adduct))
            iso_pattern.addCharge(int(self.charge))
            fwhm = self.sigma * SIGMA_TO_FWHM
            resolving_power = iso_pattern.masses[0] / fwhm
            instrument_model = InstrumentModel('tof', resolving_power)
            centr = iso_pattern.centroids(instrument_model)
            mzs = np.array(centr.masses)
            ints = 100. * np.array(centr.intensities)
            mzs, ints = self._trim(mzs, ints, ISOTOPIC_PEAK_N)
            return mzs, ints

        except Exception as e:
            logger.warning('%s %s - %s', sf, adduct, e)
            return None, None
Esempio n. 5
0
 def parsable(sf):
     try:
         parseSumFormula(sf)
         return True
     except Exception as e:
         logger.warning(e)
         return False
Esempio n. 6
0
 def get_mass(self):
     logging.info(self.sum_formula)
     logging.info(pyisocalc.parseSumFormula(self.sum_formula))
     spec = pyisocalc.perfect_pattern(pyisocalc.parseSumFormula(self.sum_formula), charge=0)
     logging.info(spec)
     mass = spec.get_spectrum(source='centroids')[0][np.argmax(spec.get_spectrum(source='centroids')[1])]
     logging.info(mass)
     return mass
Esempio n. 7
0
 def get_mass(self):
     logging.info(self.sum_formula)
     logging.info(pyisocalc.parseSumFormula(self.sum_formula))
     spec = pyisocalc.perfect_pattern(pyisocalc.parseSumFormula(
         self.sum_formula),
                                      charge=0)
     logging.info(spec)
     mass = spec.get_spectrum(source='centroids')[0][np.argmax(
         spec.get_spectrum(source='centroids')[1])]
     logging.info(mass)
     return mass
Esempio n. 8
0
 def is_valid(sf):
     if '.' in sf:
         LOG.warning('"." in formula {}, skipping'.format(sf))
         return False
     try:
         parseSumFormula(sf)
     except Exception as e:
         LOG.warning(e)
         return False
     else:
         return True
Esempio n. 9
0
    def get_delta_atoms(self):
        def addElement(elDict, element, number):
            elDict.setdefault(element, []).append(number)

        self.delta_formula = self.delta_formula.strip()
        if all([
                self.delta_formula.startswith("+"),
                self.delta_formula.startswith("-")
        ]):
            self.delta_formula = "+" + self.delta_formula
        formula_split = re.split(u'([+-])', self.delta_formula)
        el_dict = {}
        for sign, el in zip(formula_split[1::2], formula_split[2::2]):
            this_el_dict = dict([
                (segment.element().name(),
                 int("{}1".format(sign)) * segment.amount())
                for segment in pyisocalc.parseSumFormula(el).get_segments()
            ])
            for this_el in this_el_dict:
                addElement(el_dict, this_el, this_el_dict[this_el])
        sign_dict = {1: "+", -1: "-"}
        for this_el in el_dict:
            el_dict[this_el] = sum(el_dict[this_el])
        el_string = "".join([
            "{}{}{}".format(sign_dict[np.sign(el_dict[el])], el,
                            abs(el_dict[el])) for el in el_dict
            if el_dict[el] != 0
        ])
        return el_string
Esempio n. 10
0
 def get_isotope_pattern(self, formula_adduct_string, charge):
     perfect_pattern = pyisocalc.perfect_pattern(pyisocalc.parseSumFormula(formula_adduct_string), charge=charge)
     sigma = self.sigma_at_mz(perfect_pattern.get_spectrum(source='centroids')[0][0])
     pts_per_mz = self.points_per_mz(sigma)
     spec = pyisocalc.apply_gaussian(perfect_pattern, sigma, pts_per_mz)
     centroided_mzs, centroided_ints, _ = gradient(*spec.get_spectrum())
     spec.add_centroids(centroided_mzs, centroided_ints)
     return spec
Esempio n. 11
0
 def parse_sf_string(self, sf_string):
     # string can be of form A1B2C3-D4E5+F
     from pyMSpec.pyisocalc.pyisocalc import parseSumFormula
     if all([sf_string[0] != '+', sf_string[0] != '-']):
         sf_string = '+' + sf_string
     sf = parseSumFormula(sf_string[1:])
     atoms = {}
     for segment in sf.get_segments():
         atoms[segment.element().name()] = segment.amount()
     return atoms
Esempio n. 12
0
 def get_isotope_pattern(self, formula_adduct_string, charge):
     perfect_pattern = pyisocalc.perfect_pattern(
         pyisocalc.parseSumFormula(formula_adduct_string), charge=charge)
     sigma = self.sigma_at_mz(
         perfect_pattern.get_spectrum(source='centroids')[0][0])
     pts_per_mz = self.points_per_mz(sigma)
     spec = pyisocalc.apply_gaussian(perfect_pattern, sigma, pts_per_mz)
     centroided_mzs, centroided_ints, _ = gradient(*spec.get_spectrum())
     spec.add_centroids(centroided_mzs, centroided_ints)
     return spec
Esempio n. 13
0
 def generate_molecular_weights(self, sf_string, charge=0):
     atoms = self.parse_sf_string(sf_string)
     sf_string = ''
     for a in sorted(atoms.keys()):
         if atoms[a] > 0:
             sf_string += '{}{}'.format(a, atoms[a])
         elif atoms[a] < 0:
             raise ValueError('negative elements for {} with {}'.format(a, atoms[a]))
     ms_output = complete_isodist(parseSumFormula(sf_string), charge=charge, output='', plot=False, sigma=0.01,
                                  resolution=2000000, cutoff=0.0001, do_centroid=False)
     return ms_output.get_spectrum(source='centroids')[0]
Esempio n. 14
0
def normalise_sf(sf_string):
    from pyMSpec.pyisocalc.pyisocalc import InvalidFormulaError, ParseError
    from pyMSpec.pyisocalc.pyisocalc import parseSumFormula
    import logging
    try:
        sf = parseSumFormula(sf_string)
    except (ParseError, InvalidFormulaError) as e:
        logging.debug(e)
        return ""
    except:
        logging.warning("failed to parse: {}".format(sf_string))
        return ""
    return sf.__unicode__()
Esempio n. 15
0
 def get_mz(self, adduct):
     """
     Calculate the precursor mass for this molecule with a given adduct
     :param adduct: object of class Adduct
     :return: float
     """
     try:
         formula = self.make_ion_formula(adduct)
         spec = pyisocalc.perfect_pattern(pyisocalc.parseSumFormula(formula), charge=adduct.charge)
         mass = spec.get_spectrum(source='centroids')[0][np.argmax(spec.get_spectrum(source='centroids')[1])]
         return mass
     except:
         logging.debug(self.name, adduct)
         return -1.
Esempio n. 16
0
 def get_mz(self, adduct):
     """
     Calculate the precursor mass for this molecule with a given adduct
     :param adduct: object of class Adduct
     :return: float
     """
     try:
         formula = self.make_ion_formula(adduct)
         spec = pyisocalc.perfect_pattern(
             pyisocalc.parseSumFormula(formula), charge=adduct.charge)
         mass = spec.get_spectrum(source='centroids')[0][np.argmax(
             spec.get_spectrum(source='centroids')[1])]
         return mass
     except:
         logging.debug(self.name, adduct)
         return -1.
Esempio n. 17
0
def calculate_isotope_patterns(sum_formulae, adduct='', instrument=[], isocalc_sig=0.01, isocalc_resolution=200000.,
                                   isocalc_do_centroid=True, charge=1,verbose=True):
    from pyMSpec.pyisocalc import pyisocalc
    ### Generate a mz list of peak centroids for each sum formula with the given adduct
    mz_list = {}
    for n, sum_formula in enumerate(sum_formulae):
        try:
            if verbose:
                print n/float(len(sum_formulae)), sum_formula, adduct
            try:
                isotope_ms = instrument.get_isotope_pattern(sum_formula+adduct, charge=charge)
                sf = pyisocalc.parseSumFormula("{}{}".format(sum_formula,adduct))
            except pyMSpec.pyisocalc.canopy.sum_formula.ParseError as e:
                print "error->", str(e), sum_formula, adduct
                continue
        except KeyError as e:
            if str(e).startswith("KeyError: "):
                print str(e)
                continue
        except ValueError as e:
            if str(e).startswith("Element not recognised"):
                print str(e)
                continue
            else:
                print sum_formula, adduct
                raise
        except pyisocalc.InvalidFormulaError as e:
            print str(e)
            continue
        #try:
            #isotope_ms = pyisocalc.complete_isodist(sf,sigma=isocalc_sig,charge=charge, pts_per_mz=isocalc_resolution)
            #isotope_ms = pyisocalc.isodist(sf, plot=False, sigma=isocalc_sig, charges=charge,
            #                           resolution=isocalc_resolution, do_centroid=isocalc_do_centroid)
        except MemoryError as e:
            #todo: print -> logging.debug
            print "Memory error: {}{}".format(sf, str(e))
            continue
        except KeyError as e:
            print "KeyError: {}".format(str(e))
            continue

        if not sum_formula in mz_list:
            mz_list[sum_formula] = {}
        mz_list[sum_formula][adduct] = isotope_ms.get_spectrum(source='centroids')
    return mz_list
Esempio n. 18
0
    def readRelationships(self):
        """
        currently links nodes through 'is_a', 'has_role'
        without any distinction between the two
        """
        logging.info('Loading CheBI ontology into a graph...')
        self._g = nx.DiGraph()
        self._nodes_by_id = {}
        self._ids_by_name = {}
        metabolite = None

        for term in oboTerms(self.filename):
            if 'id' not in term:
                continue
            if term.get('name') == 'metabolite':
                metabolite = term['id']
            if 'is_a' in term:
                for parent in term['is_a']:
                    self._g.add_edge(parent, term['id'])
            if 'relationship' in term:
                for parent in term['relationship']:
                    if parent.startswith('has_role'):
                        role = parent.split(' ', 1)[1]
                        self._g.add_edge(role, term['id'])

            for synonym in term.get('synonym', []):
                if 'RELATED FORMULA' in synonym:
                    s = synonym.split('"')[1]
                    term['sum_formula'] = s

                    try:
                        sf = pyisocalc.parseSumFormula(s)
                        term['_sf'] = sf
                    except:
                        pass
                elif 'RELATED InChI ' in synonym:
                    term['InChI'] = synonym.split('"')[1]
                elif 'RELATED InChIKey' in synonym:
                    term['InChIKey'] = synonym.split('"')[1]

            self._nodes_by_id[term['id']] = term
            self._ids_by_name[term['name']] = term['id']

        logging.info('Found {} terms.'.format(len(self._nodes_by_id)))
Esempio n. 19
0
    def readRelationships(self):
        """
        currently links nodes through 'is_a', 'has_role'
        without any distinction between the two
        """
        logging.info('Loading CheBI ontology into a graph...')
        self._g = nx.DiGraph()
        self._nodes_by_id = {}
        self._ids_by_name = {}
        metabolite = None

        for term in oboTerms(self.filename):
            if 'id' not in term:
                continue
            if term.get('name') == 'metabolite':
                metabolite = term['id']
            if 'is_a' in term:
                for parent in term['is_a']:
                    self._g.add_edge(parent, term['id'])
            if 'relationship' in term:
                for parent in term['relationship']:
                    if parent.startswith('has_role'):
                        role = parent.split(' ', 1)[1]
                        self._g.add_edge(role, term['id'])

            for synonym in term.get('synonym', []):
                if 'RELATED FORMULA' in synonym:
                    s = synonym.split('"')[1]
                    term['sum_formula'] = s

                    try:
                        sf = pyisocalc.parseSumFormula(s)
                        term['_sf'] = sf
                    except:
                        pass

                    break

            self._nodes_by_id[term['id']] = term
            self._ids_by_name[term['name']] = term['id']

        logging.info('Found {} terms.'.format(len(self._nodes_by_id)))
Esempio n. 20
0
    def get_delta_atoms(self):
        def addElement(elDict, element, number):
            elDict.setdefault(element, []).append(number)

        self.delta_formula = self.delta_formula.strip()
        if all([self.delta_formula.startswith("+"), self.delta_formula.startswith("-")]):
            self.delta_formula = "+" + self.delta_formula
        formula_split = re.split(u'([+-])', self.delta_formula)
        logging.debug(formula_split)
        el_dict = {}
        for sign, el in zip(formula_split[1::2], formula_split[2::2]):
            this_el_dict = dict([(segment.element().name(), int("{}1".format(sign)) * segment.amount()) for segment in
                                 pyisocalc.parseSumFormula(el).get_segments()])
            for this_el in this_el_dict:
                logging.debug(el_dict)
                addElement(el_dict, this_el, this_el_dict[this_el])
        sign_dict = {1: "+", -1: "-"}
        for this_el in el_dict:
            el_dict[this_el] = sum(el_dict[this_el])
        logging.debug(el_dict)
        el_string = "".join(["{}{}{}".format(sign_dict[np.sign(el_dict[el])], el, abs(el_dict[el])) for el in el_dict if
                             el_dict[el] != 0])
        logging.debug(el_string)
        return el_string
def normalized(sf):
    return str(pyisocalc.parseSumFormula(sf))
Esempio n. 22
0
 def _sf_elements(sf):
     return [
         seg.element().name() for seg in parseSumFormula(sf).get_segments()
     ]
Esempio n. 23
0
 def _sf_elements(sf):
     return [seg.element().name() for seg in parseSumFormula(sf).get_segments()]
Esempio n. 24
0
 def _isodist(self, sf_adduct):
     sf_adduct_obj = parseSumFormula(sf_adduct)
     return complete_isodist(sf_adduct_obj, sigma=self.sigma, charge=self.charge, pts_per_mz=self.pts_per_mz,
                             centroid_kwargs={'weighted_bins': 5})
 def normalize_sf(self, sf):
     return str(pyisocalc.parseSumFormula(sf))
args = parser.parse_args()
print args
instr = Instrument(args)
adducts = map(signedAdduct, args.adducts)
assert(args.dynrange > 1)
for adduct in adducts:
    assert(isValidAdduct(adduct))
detection_limit = 1.0 / args.dynrange

output_filename = os.path.join(os.getcwd(), os.path.expanduser(args.output))

db = set()
if args.db:
    for line in open(args.db):
        sf_str = str(pyisocalc.parseSumFormula(line.strip()))
        db.add(sf_str)
    print "target database size:", len(db)

layers = {}
noise = {}

with open(args.input) as f:
    with np.load(f) as data:
        W = data['W']
        H = data['H']
        shape = data['shape']
        mz_axis = data['mz_axis'][:, 0]
        nmf_ppms = data['mz_axis'][:, 1]
        noise['prob'] = data['noise_prob']
        noise['sqrt_avg'] = data['noise_sqrt_avg']
def test_compare_generate_complex_ion_formula_with_pymspec(formula, comma_separated_adducts):
    adducts = comma_separated_adducts.split(',')
    ion_formula = safe_generate_ion_formula(formula, *adducts)

    assert ion_formula == str(pyisocalc.parseSumFormula(formula + ''.join(adducts)))
def test_compare_generate_simple_ion_formula_with_pymspec(formula, adduct):
    ion_formula = generate_ion_formula(formula, adduct)

    assert ion_formula == str(pyisocalc.parseSumFormula(formula + adduct))
Esempio n. 29
0
 def get_principal_peak(self, formula_adduct_string, charge):
     perfect_pattern = pyisocalc.perfect_pattern(
         pyisocalc.parseSumFormula(formula_adduct_string),
         charge=charge).get_spectrum(source='centroids')
     return perfect_pattern[0][np.argmax(perfect_pattern[1])]