def test_fromula_obj(self): fstr = 'C6H12O6' f = Formula.from_str(fstr) self.assertIsNotNone(f) self.assertEqual(f['C'], 6) self.assertEqual(f['H'], 12) self.assertEqual(f['O'], 6) self.assertEqual(str(f), fstr) f.remove('C5H3O12') self.assertEqual(str(f), 'CH9') d = {'C': 148, 'H': 122} f.add(d) self.assertEqual(str(f), 'C149H131') f2 = Formula(d) f.remove(f2) self.assertEqual(str(f), 'CH9') self.assertRaises(TypeError, f.add, ['C', 12, 'H', 32]) f3 = Formula.from_str('C4H12') f4 = Formula(f3) self.assertIsNot(f3, f4) self.assertEqual(str(f4), 'C4H12') f5 = f3.remove('CH', new_obj=True) self.assertEqual(str(f3), 'C4H12') self.assertIsNot(f5, f3) self.assertEqual(str(f5), 'C3H11') # test + - i = Formula.from_str('C4H4O4') i += 'C4H4O4' self.assertEqual(str(i), 'C8H8O8') j = i + 'C2H2O2' self.assertIsNot(j, i) self.assertEqual(str(j), 'C10H10O10') self.assertEqual(str(i), 'C8H8O8')
def parse_metabolite_card(args): """ @return: """ filepath = args[0] emass_path = args[1] context = cElementTree.iterparse(filepath, events=('end', )) metab = list() try: for action, elem in context: if elem.tag == 'metabolite' and action == 'end': f = elem.find('chemical_formula').text metab.append(elem.find('accession').text) metab.append(elem.find('name').text) metab.append(f) metab.append(elem.find('inchi').text) metab.append(elem.find('inchikey').text[9:]) try: metab.append( float(elem.find('monisotopic_moleculate_weight').text)) except (ValueError, TypeError): logging.info( "Failed to parse monisotopic_moleculate_weight in {0}". format(filepath)) metab.append(0.0) try: metab.append( float(elem.find('average_molecular_weight').text)) except (ValueError, TypeError): logging.info( "Failed to parse average_molecular_weight in {0}". format(filepath)) metab.append(0.0) metab.append(elem.find('description').text) # float(elem.find('description').text) metab.append("") # float(elem.find('description').text) metab.append("") # float(elem.find('description').text) # isotopic pattern elem.find('kegg_id').text metab.append(elem.find('kegg_id').text) formula = Formula.from_str(f) # add one H to have positive f1 = formula.add('H', new_obj=True) metab.append(get_theo_ip(emass_path, str(f1), min_rel_int=1.0)) # f2 = formula.remove('H', new_obj=True) metab.append(get_theo_ip(emass_path, str(f2), min_rel_int=1.0)) except (WindowsError, IOError, ValueError, TypeError) as e: logging.warn( "Error parsing metabolite card : {0} with following exception : \n {1}" .format(filepath, e)) return None return tuple(metab)
def parse_metabolite_card(args): """ @param filepath: str @return: """ filepath = args[0] emass_path = args[1] context = cElementTree.iterparse(filepath, events=('end',)) metab = list() try: for action, elem in context: if elem.tag == 'metabolite' and action == 'end': f = elem.find('chemical_formula').text metab.append(elem.find('accession').text) metab.append(elem.find('name').text) metab.append(f) metab.append(elem.find('inchi').text) metab.append(elem.find('inchikey').text[9:]) try: metab.append(float(elem.find('monisotopic_moleculate_weight').text)) except (ValueError, TypeError): logging.info("Failed to parse monisotopic_moleculate_weight in {}".format(filepath)) metab.append(0.0) try: metab.append(float(elem.find('average_molecular_weight').text)) except (ValueError, TypeError): logging.info("Failed to parse average_molecular_weight in {}".format(filepath)) metab.append(0.0) metab.append(elem.find('description').text) #float(elem.find('description').text) metab.append("") #float(elem.find('description').text) metab.append("") #float(elem.find('description').text) # isotopic pattern elem.find('kegg_id').text metab.append(elem.find('kegg_id').text) formula = Formula.from_str(f) # # add one H to have positive f1 = formula.add('H', new_obj=True) metab.append(get_theo_ip(emass_path, str(f1), min_rel_int=1.0)) # f2 = formula.remove('H', new_obj=True) metab.append(get_theo_ip(emass_path, str(f2), min_rel_int=1.0)) except (WindowsError, IOError, ValueError, TypeError) as e: logging.warn("Error parsing metabolite card : {} with following exception : \n {}".format(filepath, e)) return None return tuple(metab)
def test_theo_ip(self): fstr = 'C6H12O6' f = Formula.from_str(fstr) r = f.get_theo_ip() self.assertIsNotNone(r)
def test_moz_bounds(self): f2 = Peakel(260.029718526 + 1.007276, 0.0, 0.0, 260.029718526) m, m_min, m_max = get_moz_bounds(f2, Formula.from_str('H1'), 10) self.assertAlmostEqual(m, 260.029718526, places=2)
def assign_formula(self, features, for_adducts, with_tol_ppm=10.0): """ assign molecular formula to features using multiprocessing module :param for_adducts: string adducts list :param features: list or set ? of features :param with_tol_ppm: mz tolerance in order to perform the look up :return: dictionary with key: feature, value: list of metabolites """ m_count, not_found = 0, 0 for for_adduct in for_adducts: formula = Formula.from_str(for_adduct) logging.info("searching for adducts: {0}".format(str(formula))) pool = multiprocessing.Pool(processes=multiprocessing.cpu_count()) metabolites = [] if self.bank == 'hmdb': args = [(self.HMDB_FILE, f, formula, with_tol_ppm) for f in features] metabolites = pool.map(search_metabolites_for, args, chunksize=20) elif self.bank == 'lmsd': args = [(self.LMSD_FILE, f, formula, with_tol_ppm) for f in features] metabolites = pool.map(search_lipids_for, args, chunksize=20) elif self.bank in {'lmsd + hmdb', 'hmdb + lmsd'}: logging.info('Searching in LMSD...') args_lmsd = [(self.LMSD_FILE, f, formula, with_tol_ppm) for f in features] metabs_lmsd = pool.map(search_lipids_for, args_lmsd, chunksize=20) logging.info('Searching in HMDB...') args_hmdb = [(self.HMDB_FILE, f, formula, with_tol_ppm) for f in features] metabs_hmdb = pool.map(search_metabolites_for, args_hmdb, chunksize=20) # merge the 2 results set for lmsd_met, hmdb_met in izip(metabs_lmsd, metabs_hmdb): metabolites.append(lmsd_met + hmdb_met) # create Annotation objects for f, metabs in izip(features, metabolites): if not metabs: not_found += 1 else: m_count += len(metabs) for_adducts_str = '[M{0}{1}]='.format( '-' if f.polarity > 0 else '+', formula) f.annotations += [ Annotation(m, for_adducts_str) for m in metabs ] pool.close() try: pool.terminate() except OSError: # seen error on windows OS pass return m_count, not_found