def test_sample(self): with open("KEGG/compound.sample") as handle: records = list(Compound.parse(handle)) self.assertEqual(len(records), 8) self.assertEqual(records[1].entry, "C00017") self.assertEqual(records[1].mass, "") # Why? self.assertEqual(records[1].formula, "C2H4NO2R(C2H2NOR)n") self.assertEqual(records[1].name, ["Protein"]) self.assertEqual(records[1].pathway, [("PATH", "map00450", "Selenocompound metabolism")]) self.assertEqual(len(records[1].enzyme), 21) self.assertEqual(records[1].enzyme[0], ("2.3.2.6")) self.assertEqual(records[1].structures, []) self.assertEqual(records[1].dblinks[0], ("PubChem", ["3319"])) self.assertEqual( str(records[-1]).replace(" ", "").split("\n")[:10], [ "ENTRYC01386", "NAMENH2Mec", "7-Amino-4-methylcoumarin", "FORMULAC10H9NO2", "DBLINKSCAS:26093-31-2", "PubChem:4580", "ChEBI:51771", "ChEMBL:CHEMBL270672", "KNApSAcK:C00048593", "PDB-CCD:MCM", ], )
def t_KEGG_Compound(testfiles): """Tests Bio.KEGG.Compound functionality.""" for file in testfiles: fh = open(os.path.join("KEGG", file)) print "Testing Bio.KEGG.Compound on " + file + "\n\n" records = Compound.parse(fh) for record in records: print record print "\n"
def t_KEGG_Compound(testfiles): """Tests Bio.KEGG.Compound functionality.""" for file in testfiles: fh = open(os.path.join("KEGG", file)) print("Testing Bio.KEGG.Compound on " + file + "\n\n") records = Compound.parse(fh) for record in records: print(record) print("\n") fh.close()
def compound_records_to_df(file_path): """ Function parses all records using Biopython.Bio.KEGG.Compound parser, and returns a pandas dataframe. <Input> filepath = file path to a gzipped text file of KEGG enzyme records (str) <output> compound dataframe """ compound_fields = [ method for method in dir(Compound.Record()) if not method.startswith('_') ] data_matrix = [] with gzip.open(file_path, 'rt') as file: for record in Compound.parse(file): data_matrix.append( [getattr(record, field) for field in compound_fields]) compound_df = pd.DataFrame(data_matrix, columns=compound_fields) return compound_df
def test_sample(self): with open("KEGG/compound.sample") as handle: records = list(Compound.parse(handle)) self.assertEqual(len(records), 8) self.assertEqual(records[0].entry, "C00023") self.assertEqual(records[0].mass, "") # Why? self.assertEqual(records[0].formula, "Fe") self.assertEqual(records[0].name, ['Iron', 'Fe2+', 'Fe(II)', 'Fe3+', 'Fe(III)']) self.assertEqual(records[0].pathway, [('PATH', 'MAP00860', 'Porphyrin and chlorophyll metabolism')]) self.assertEqual(records[0].enzyme[0], ('1.1.3.22', 'C')) self.assertEqual(records[0].structures, []) self.assertEqual(records[0].dblinks[0], ('CAS', ['7439-89-6']))
def test_sample(self): with open("KEGG/compound.sample") as handle: records = list(Compound.parse(handle)) self.assertEqual(len(records), 8) self.assertEqual(records[1].entry, "C00017") self.assertEqual(records[1].mass, "") # Why? self.assertEqual(records[1].formula, "C2H4NO2R(C2H2NOR)n") self.assertEqual(records[1].name, ['Protein']) self.assertEqual(records[1].pathway, [('PATH', 'map00450', 'Selenocompound metabolism')]) self.assertEqual(len(records[1].enzyme), 21) self.assertEqual(records[1].enzyme[0], ('2.3.2.6')) self.assertEqual(records[1].structures, []) self.assertEqual(records[1].dblinks[0], ('PubChem', ['3319']))
def parse_kegg_compound(source, sdf=False): with open(source, "r") as inp: for record in Compound.parse(inp): record_out = OrderedDict() if "C" in record.entry or "D" in record.entry: for attribute in dir(record): if "_" not in attribute: record_out[attribute.upper()] = "" record_out[attribute.upper()] = getattr( record, attribute.lower()) if sdf: record_out["SDF"] = REST.GetMol(record_out["ENTRY"]) yield record_out
def dictionary(self, name): """ função que cria um dicionário com os id's do KEGG das reações/metabolitos como chaves e respetivos nomes como valores :param name: id do KEGG para a reação/substrato """ if name not in self.metabolites.keys( ) and name not in self.reactions.keys(): comp = Compound.parse(REST.kegg_get(name)) for c in comp: names = [] for n in c.name: names.append(n.lower()) if name.startswith('cpd:'): self.metabolites[name] = names else: self.reactions[name] = names
def test_sample(self): with open("KEGG/compound.sample") as handle: records = list(Compound.parse(handle)) self.assertEqual(len(records), 8) self.assertEqual(records[1].entry, "C00017") self.assertEqual(records[1].mass, "") # Why? self.assertEqual(records[1].formula, "C2H4NO2R(C2H2NOR)n") self.assertEqual(records[1].name, ['Protein']) self.assertEqual(records[1].pathway, [('PATH', 'map00450', 'Selenocompound metabolism')]) self.assertEqual(len(records[1].enzyme), 21) self.assertEqual(records[1].enzyme[0], ('2.3.2.6')) self.assertEqual(records[1].structures, []) self.assertEqual(records[1].dblinks[0], ('PubChem', ['3319'])) self.assertEqual(str(records[-1]).replace(" ", "").split("\n")[:10], ['ENTRYC01386', 'NAMENH2Mec', '7-Amino-4-methylcoumarin', 'FORMULAC10H9NO2', 'DBLINKSCAS:26093-31-2', 'PubChem:4580', 'ChEBI:51771', 'ChEMBL:CHEMBL270672', 'KNApSAcK:C00048593', 'PDB-CCD:MCM'])
def test_sample(self): with open("KEGG/compound.sample") as handle: records = list(Compound.parse(handle)) self.assertEqual(len(records), 8) self.assertEqual(records[1].entry, "C00017") self.assertEqual(records[1].mass, "") # Why? self.assertEqual(records[1].formula, "C2H4NO2R(C2H2NOR)n") self.assertEqual(records[1].name, ['Protein']) self.assertEqual(records[1].pathway, [('PATH', 'map00450', 'Selenocompound metabolism')]) self.assertEqual(len(records[1].enzyme), 21) self.assertEqual(records[1].enzyme[0], ('2.3.2.6')) self.assertEqual(records[1].structures, []) self.assertEqual(records[1].dblinks[0], ('PubChem', ['3319'])) self.assertEqual( str(records[-1]).replace(" ", "").split("\n")[:10], [ 'ENTRYC01386', 'NAMENH2Mec', '7-Amino-4-methylcoumarin', 'FORMULAC10H9NO2', 'DBLINKSCAS:26093-31-2', 'PubChem:4580', 'ChEBI:51771', 'ChEMBL:CHEMBL270672', 'KNApSAcK:C00048593', 'PDB-CCD:MCM' ])
def test_irregular(self): with open("KEGG/compound.irregular") as handle: records = list(Compound.parse(handle)) self.assertEqual(len(records), 2) self.assertEqual(records[0].entry, "C01454")
def test_sample(self): with open("KEGG/compound.sample") as handle: records = list(Compound.parse(handle)) self.assertEqual(len(records), 8) self.assertEqual(records[0].entry, "C00023")