Beispiel #1
0
    def test_fromula_obj(self):
        fstr = 'C6H12O6'
        f = Formula.from_str(fstr)
        self.assertIsNotNone(f)

        self.assertEqual(f['C'], 6)
        self.assertEqual(f['H'], 12)
        self.assertEqual(f['O'], 6)

        self.assertEqual(str(f), fstr)

        f.remove('C5H3O12')
        self.assertEqual(str(f), 'CH9')

        d = {'C': 148, 'H': 122}
        f.add(d)
        self.assertEqual(str(f), 'C149H131')

        f2 = Formula(d)
        f.remove(f2)
        self.assertEqual(str(f), 'CH9')

        self.assertRaises(TypeError, f.add, ['C', 12, 'H', 32])

        f3 = Formula.from_str('C4H12')
        f4 = Formula(f3)
        self.assertIsNot(f3, f4)

        self.assertEqual(str(f4), 'C4H12')

        f5 = f3.remove('CH', new_obj=True)

        self.assertEqual(str(f3), 'C4H12')
        self.assertIsNot(f5, f3)
        self.assertEqual(str(f5), 'C3H11')

        # test + -
        i = Formula.from_str('C4H4O4')
        i += 'C4H4O4'
        self.assertEqual(str(i), 'C8H8O8')

        j = i + 'C2H2O2'
        self.assertIsNot(j, i)
        self.assertEqual(str(j), 'C10H10O10')
        self.assertEqual(str(i), 'C8H8O8')
Beispiel #2
0
def parse_metabolite_card(args):
    """
    @return:
    """
    filepath = args[0]
    emass_path = args[1]
    context = cElementTree.iterparse(filepath, events=('end', ))
    metab = list()
    try:
        for action, elem in context:
            if elem.tag == 'metabolite' and action == 'end':
                f = elem.find('chemical_formula').text
                metab.append(elem.find('accession').text)
                metab.append(elem.find('name').text)
                metab.append(f)
                metab.append(elem.find('inchi').text)
                metab.append(elem.find('inchikey').text[9:])
                try:
                    metab.append(
                        float(elem.find('monisotopic_moleculate_weight').text))
                except (ValueError, TypeError):
                    logging.info(
                        "Failed to parse monisotopic_moleculate_weight in {0}".
                        format(filepath))
                    metab.append(0.0)
                try:
                    metab.append(
                        float(elem.find('average_molecular_weight').text))
                except (ValueError, TypeError):
                    logging.info(
                        "Failed to parse average_molecular_weight in {0}".
                        format(filepath))
                    metab.append(0.0)
                metab.append(elem.find('description').text)
                # float(elem.find('description').text)
                metab.append("")
                # float(elem.find('description').text)
                metab.append("")
                # float(elem.find('description').text)
                # isotopic pattern elem.find('kegg_id').text
                metab.append(elem.find('kegg_id').text)

                formula = Formula.from_str(f)
                # add one H to have positive
                f1 = formula.add('H', new_obj=True)
                metab.append(get_theo_ip(emass_path, str(f1), min_rel_int=1.0))
                #
                f2 = formula.remove('H', new_obj=True)
                metab.append(get_theo_ip(emass_path, str(f2), min_rel_int=1.0))
    except (WindowsError, IOError, ValueError, TypeError) as e:
        logging.warn(
            "Error parsing metabolite card : {0} with following exception : \n {1}"
            .format(filepath, e))
        return None
    return tuple(metab)
def parse_metabolite_card(args):
    """
    @param filepath:  str
    @return:
    """
    filepath = args[0]
    emass_path = args[1]
    context = cElementTree.iterparse(filepath, events=('end',))
    metab = list()
    try:
        for action, elem in context:
            if elem.tag == 'metabolite' and action == 'end':
                f = elem.find('chemical_formula').text
                metab.append(elem.find('accession').text)
                metab.append(elem.find('name').text)
                metab.append(f)
                metab.append(elem.find('inchi').text)
                metab.append(elem.find('inchikey').text[9:])
                try:
                    metab.append(float(elem.find('monisotopic_moleculate_weight').text))
                except (ValueError, TypeError):
                    logging.info("Failed to parse monisotopic_moleculate_weight in {}".format(filepath))
                    metab.append(0.0)
                try:
                    metab.append(float(elem.find('average_molecular_weight').text))
                except (ValueError, TypeError):
                    logging.info("Failed to parse average_molecular_weight in {}".format(filepath))
                    metab.append(0.0)
                metab.append(elem.find('description').text)
                #float(elem.find('description').text)
                metab.append("")
                #float(elem.find('description').text)
                metab.append("")
                #float(elem.find('description').text)
                # isotopic pattern elem.find('kegg_id').text
                metab.append(elem.find('kegg_id').text)

                formula = Formula.from_str(f)
                # # add one H to have positive
                f1 = formula.add('H', new_obj=True)
                metab.append(get_theo_ip(emass_path, str(f1), min_rel_int=1.0))
                #
                f2 = formula.remove('H', new_obj=True)
                metab.append(get_theo_ip(emass_path, str(f2), min_rel_int=1.0))
    except (WindowsError, IOError, ValueError, TypeError) as e:
        logging.warn("Error parsing metabolite card : {} with following exception : \n {}".format(filepath, e))
        return None
    return tuple(metab)
Beispiel #4
0
 def test_theo_ip(self):
     fstr = 'C6H12O6'
     f = Formula.from_str(fstr)
     r = f.get_theo_ip()
     self.assertIsNotNone(r)
Beispiel #5
0
 def test_moz_bounds(self):
     f2 = Peakel(260.029718526 + 1.007276, 0.0, 0.0, 260.029718526)
     m, m_min, m_max = get_moz_bounds(f2, Formula.from_str('H1'), 10)
     self.assertAlmostEqual(m, 260.029718526, places=2)
Beispiel #6
0
    def assign_formula(self, features, for_adducts, with_tol_ppm=10.0):
        """
        assign molecular formula to features using multiprocessing module

        :param for_adducts: string adducts list
        :param features: list or set ? of features
        :param with_tol_ppm: mz tolerance in order to perform the look up
        :return: dictionary with key: feature, value: list of metabolites
        """
        m_count, not_found = 0, 0
        for for_adduct in for_adducts:
            formula = Formula.from_str(for_adduct)
            logging.info("searching for adducts: {0}".format(str(formula)))

            pool = multiprocessing.Pool(processes=multiprocessing.cpu_count())

            metabolites = []
            if self.bank == 'hmdb':
                args = [(self.HMDB_FILE, f, formula, with_tol_ppm)
                        for f in features]
                metabolites = pool.map(search_metabolites_for,
                                       args,
                                       chunksize=20)
            elif self.bank == 'lmsd':
                args = [(self.LMSD_FILE, f, formula, with_tol_ppm)
                        for f in features]
                metabolites = pool.map(search_lipids_for, args, chunksize=20)
            elif self.bank in {'lmsd + hmdb', 'hmdb + lmsd'}:
                logging.info('Searching in LMSD...')
                args_lmsd = [(self.LMSD_FILE, f, formula, with_tol_ppm)
                             for f in features]
                metabs_lmsd = pool.map(search_lipids_for,
                                       args_lmsd,
                                       chunksize=20)

                logging.info('Searching in HMDB...')
                args_hmdb = [(self.HMDB_FILE, f, formula, with_tol_ppm)
                             for f in features]
                metabs_hmdb = pool.map(search_metabolites_for,
                                       args_hmdb,
                                       chunksize=20)

                # merge the 2 results set
                for lmsd_met, hmdb_met in izip(metabs_lmsd, metabs_hmdb):
                    metabolites.append(lmsd_met + hmdb_met)

            # create Annotation objects
            for f, metabs in izip(features, metabolites):
                if not metabs:
                    not_found += 1
                else:
                    m_count += len(metabs)
                for_adducts_str = '[M{0}{1}]='.format(
                    '-' if f.polarity > 0 else '+', formula)
                f.annotations += [
                    Annotation(m, for_adducts_str) for m in metabs
                ]

            pool.close()
            try:
                pool.terminate()
            except OSError:
                # seen error on windows OS
                pass
        return m_count, not_found