def test_selection_writing(self): """Test record_has, and writerec. Adapted from Bio.UniProt.GOA.py by Iddo Friedberg [email protected]. """ recs = [] filtered = [] # Fields to filter evidence = {"Evidence": {"ND"}} synonym = {"Synonym": {"YA19A_YEAST", "YAL019W-A"}} taxon_id = {"Taxon_ID": {"taxon:559292"}} # Temporal file to test writerec f_number, f_filtered = tempfile.mkstemp() os.close(f_number) # Open a file and select records as per filter with open("UniProt/goa_yeast.gaf", "r") as handle: for rec in GOA.gafiterator(handle): recs.append(rec) # Filtering if ( GOA.record_has(rec, taxon_id) and GOA.record_has(rec, evidence) and GOA.record_has(rec, synonym) ): filtered.append(rec) # Check number of filtered records self.assertEqual(len(filtered), 3) # Write the filtered records to a file using writerec with open(f_filtered, "w") as handle: # '!gaf-version: 2.1' handle.write("!gaf-version: 2.1 \n") # Adding file header for rec in filtered: GOA.writerec(rec, handle) # Open and read the file containing the filtered records recs_ff = [] # Records from filtered file with open(f_filtered, "r") as handle: for rec in GOA.gafiterator(handle): recs_ff.append(rec) # Delete test file os.remove(f_filtered) # Compare, recs saved by writerec and filtered recs self.assertEqual(filtered, recs_ff)
def filter_in_experimental(handle): outhandle = open(handle.name + ".exp_evidence", "w") outhandle.write('!gaf-version: 2.0\n') for inrec in upg.gafiterator(handle): if upg.record_has(inrec, {'Evidence': GO_EXP_EC}): upg.writerec(inrec, outhandle) outhandle.close()
def has_experimental(goa_reclist): retval = False for rec in goa_reclist: if upg.record_has(rec, {'Evidence': GO_EXP_EC}): retval = True break return retval
def read_gaf_write_tab(gaf_file, include_mfo, outfile): Evidence = {'Evidence': set(['EXP','IDA','IPI','IMP','IGI','IEP','TAS','IC'])} if include_mfo: Aspect = {'Aspect':set(['P','F'])} else: Aspect = {'Aspect':set(['P'])} Evidence = {'Evidence': set(['EXP','IDA','IPI','IMP','IGI','IEP','TAS','IC'])} outhandle = open(outfile, 'w') ingafhandle = open(gaf_file,'r') counter = 0 for rec in GOA.gafiterator(ingafhandle): if GOA.record_has(rec, Aspect): if GOA.record_has(rec, Evidence): prot = rec['DB_Object_ID'] go = rec['GO_ID'] outhandle.write("%s\t%s\n" % (prot, go)) counter +=1 ingafhandle.close() outhandle.close() return(counter)
""" Returns the pmids of the papers this paper cites """ cites_list = [] handle = ez.efetch("pubmed", id=pmid, retmode="xml") pubmed_rec = ez.parse(handle).__next__() for ref in pubmed_rec['MedlineCitation']['CommentsCorrectionsList']: if ref.attributes['RefType'] == 'Cites': cites_list.append(str(ref['PMID'])) return cites_list f = open ("papers and citations.txt","w") st = "GO-annotated proteins supported by IGI evidence (Inferred from Genetic Interaction)\n" handle = open("gene_association.goa_yeast") proteins = goa.gafiterator(handle) Evi_Aspect = {"Evidence":set(["IGI"])} for protein in proteins: if goa.record_has(protein, Evi_Aspect): for p in protein['DB:Reference']: if p[:4] == "PMID": st += "Main PubMed reference: "+ p +"\n" citations = get_citations(p[5:]) for cit in citations: st += cit + " " st += "\n" f.write(st) f.close()
#!/usr/bin/env python import sys import argparse import target_prep as tp from Bio.UniProt import GOA as upg if __name__ == '__main__': # parser = argparse.ArgumentParser(description='Filter by field') # parser.add_argument('-o','--output') # parser.add_argument('-f','--field') outhandle = sys.stdout if len(sys.argv) == 5: outhandle = open(sys.argv[4],"w") outhandle.write('!gaf-version: 2.0\n') goodvals = {sys.argv[1]: set(sys.argv[2].split(','))} for inrec in upg.gafiterator(open(sys.argv[3])): if upg.record_has(inrec, goodvals): upg.writerec(inrec,outhandle)
""""" Retrieving protein references from the yeast association file in GAF 2.0 format according to different criteria """ """" Retrieve all references cited to annotate proteins with Experimental Evidence Codes """ handle = open("gene_association.goa_yeast") # open the association gene file of the yeast proteins = goa.gafiterator(handle) # read all records in the file Evidences = {"Evidence":set(["EXP", "IDA", "IPI", "IMP", "IGI", "IEP"])} print ("GO-annotated proteins supported by Experimental Evidence Code") for protein in proteins: if goa.record_has(protein, Evidences): print(protein['DB:Reference']) """" Retrieve all references cited to annotate proteins with Experimental Evidence Codes in the Molecular Function aspect of GO """ handle = open("gene_association.goa_yeast") proteins = goa.gafiterator(handle) Evi_Aspect = {"Evidence":set(["EXP", "IDA", "IPI", "IMP", "IGI", "IEP"]), "Aspect":set(["F"])} print ("GO-annotated proteins supported by Experimental Evidence Code in the Molecular Function Ontology") for protein in proteins: if goa.record_has(protein, Evi_Aspect): print(protein['DB:Reference']) print("\n")