def filter_in_experimental(handle): outhandle = open(handle.name + ".exp_evidence", "w") outhandle.write('!gaf-version: 2.0\n') for inrec in upg.gafiterator(handle): if upg.record_has(inrec, {'Evidence': GO_EXP_EC}): upg.writerec(inrec, outhandle) outhandle.close()
def filter_in_IEA(handle): outhandle = open(handle.name + ".IEA", "w") outhandle.write('!gaf-version: 2.0\n') for inrec in upg.gafiterator(handle): if inrec['Evidence'] == 'IEA': upg.writerec(inrec, outhandle) outhandle.close()
def extract_gaf(rec, outfile, GAFFIELDS, record, sp_id, taxon): t = () if sp_id.has_key(rec['DB_Object_ID']): GOAParser.writerec(rec, outfile, GAFFIELDS) if len(GAFFIELDS) == 15: t = (rec['DB'], rec['DB_Object_ID'], rec['DB_Object_Symbol'], ('|'.join(rec['Qualifier'])), rec['GO_ID'], ('|'.join(rec['DB:Reference'])), rec['Evidence'], ('|'.join(rec['With'])), rec['Aspect'], rec['DB_Object_Name'], ('|'.join(rec['Synonym'])), rec['DB_Object_Type'], ('|'.join(rec['Taxon_ID'])), rec['Date'], rec['Assigned_By']) elif len(GAFFIELDS) == 17: t = (rec['DB'], rec['DB_Object_ID'], rec['DB_Object_Symbol'], ('|'.join(rec['Qualifier'])), rec['GO_ID'], ('|'.join(rec['DB:Reference'])), rec['Evidence'], ('|'.join(rec['With'])), rec['Aspect'], ('|'.join(rec['DB_Object_Name'])), ('|'.join(rec['Synonym'])), rec['DB_Object_Type'], ('|'.join(rec['Taxon_ID'])), rec['Date'], rec['Assigned_By'], rec['Annotation_Extension'], rec['Gene_Product_Form_ID']) record.append(t) return record
def all_exclusive_IEA(handle): outhandle = open(handle.name + ".exclusive_IEA", "w") outhandle.write('!gaf-version: 2.0\n') for protrec in upg.gafbyproteiniterator(handle): if exclusive_IEA(protrec): for outrec in protrec: upg.writerec(outrec, outhandle) outhandle.close()
def all_hasnt_experimental(handle): outhandle = open(handle.name + ".noexp", "w") outhandle.write('!gaf-version: 2.0\n') for protrec in upg.gafbyproteiniterator(handle): if not has_experimental(protrec): for outrec in protrec: upg.writerec(outrec, outhandle) outhandle.close()
def test_selection_writing(self): """Test record_has, and writerec. Adapted from Bio.UniProt.GOA.py by Iddo Friedberg [email protected]. """ recs = [] filtered = [] # Fields to filter evidence = {"Evidence": {"ND"}} synonym = {"Synonym": {"YA19A_YEAST", "YAL019W-A"}} taxon_id = {"Taxon_ID": {"taxon:559292"}} # Temporal file to test writerec f_number, f_filtered = tempfile.mkstemp() os.close(f_number) # Open a file and select records as per filter with open("UniProt/goa_yeast.gaf", "r") as handle: for rec in GOA.gafiterator(handle): recs.append(rec) # Filtering if ( GOA.record_has(rec, taxon_id) and GOA.record_has(rec, evidence) and GOA.record_has(rec, synonym) ): filtered.append(rec) # Check number of filtered records self.assertEqual(len(filtered), 3) # Write the filtered records to a file using writerec with open(f_filtered, "w") as handle: # '!gaf-version: 2.1' handle.write("!gaf-version: 2.1 \n") # Adding file header for rec in filtered: GOA.writerec(rec, handle) # Open and read the file containing the filtered records recs_ff = [] # Records from filtered file with open(f_filtered, "r") as handle: for rec in GOA.gafiterator(handle): recs_ff.append(rec) # Delete test file os.remove(f_filtered) # Compare, recs saved by writerec and filtered recs self.assertEqual(filtered, recs_ff)
def extract_taxon(handle, in_taxid): """ Create a GAF file from a single taxon """ header = "!gaf-version: 2.0\n" if isinstance(in_taxid, int): taxid = str(in_taxid) taxid = in_taxid.strip() outfile = open("%s.taxon.%s" % (handle.name, taxid), 'w') outfile.write(header) for inrec in upg.gafiterator(handle): if inrec['Taxon_ID'][0].split(':')[1] == taxid: upg.writerec(inrec, outfile) outfile.close()
def extract_taxa(handle, taxalist): """ Create a GAF file from multiple taxa taxalist is a list of strings of taxid. Don't use list of int """ outfiles = {} header = "!gaf-version: 2.0\n" for taxid in taxalist: outfiles[taxid] = open("%s.taxon.%s" % (handle.name, taxid), 'w') outfiles[taxid].write(header) for inrec in upg.gafiterator(handle): cur_taxid = inrec['Taxon_ID'][0].split(':')[1] if cur_taxid in taxalist: upg.writerec(inrec, outfiles[cur_taxid]) for i in outfiles: outfiles[i].close()
def split_to_ontologies(handle): """Splits a GAF file into three ontology files """ header = "!gaf-version: 2.0\n" out_mfo = open("%s.MFO" % handle.name, 'w') out_bpo = open("%s.BPO" % handle.name, 'w') out_cco = open("%s.CCO" % handle.name, 'w') out_bpo.write(header) out_mfo.write(header) out_cco.write(header) for inrec in upg.gafiterator(handle): if inrec['Aspect'] == 'F': upg.writerec(inrec, out_mfo) elif inrec['Aspect'] == 'P': upg.writerec(inrec, out_bpo) elif inrec['Aspect'] == 'C': upg.writerec(inrec, out_cco) else: raise ValueError, 'unknown ontology aspect %s' % inrec['Aspect'] out_mfo.close() out_bpo.close() out_cco.close()
#!/usr/bin/env python import sys import argparse import target_prep as tp from Bio.UniProt import GOA as upg if __name__ == '__main__': # parser = argparse.ArgumentParser(description='Filter by field') # parser.add_argument('-o','--output') # parser.add_argument('-f','--field') outhandle = sys.stdout if len(sys.argv) == 5: outhandle = open(sys.argv[4],"w") outhandle.write('!gaf-version: 2.0\n') goodvals = {sys.argv[1]: set(sys.argv[2].split(','))} for inrec in upg.gafiterator(open(sys.argv[3])): if upg.record_has(inrec, goodvals): upg.writerec(inrec,outhandle)