def all_exclusive_IEA(handle): outhandle = open(handle.name + ".exclusive_IEA", "w") outhandle.write('!gaf-version: 2.0\n') for protrec in upg.gafbyproteiniterator(handle): if exclusive_IEA(protrec): for outrec in protrec: upg.writerec(outrec, outhandle) outhandle.close()
def all_hasnt_experimental(handle): outhandle = open(handle.name + ".noexp", "w") outhandle.write('!gaf-version: 2.0\n') for protrec in upg.gafbyproteiniterator(handle): if not has_experimental(protrec): for outrec in protrec: upg.writerec(outrec, outhandle) outhandle.close()
def became_experimental(handle_iea, handle_exp): """Identify Electronically annotated proteins that became experimentally annotated later handle_iea: gaf file with proteins exclusively IEA annotated handle_exp: gaf file with proteins experimentally annotated Note: files should contain only one ontology (either MFO, BPO or CCO) to make sense. """ expdict = {} outhandle = open(handle_exp.name + ".became_exp", "w") outhandle.write('!gaf-version: 2.0\n') # First read experimental into memory for exprec in upg.gafbyproteiniterator(handle_exp): expdict[exprec[0]['DB_Object_ID']] = exprec # Now read in the non-experimental for iearec in upg.gafbyproteiniterator(handle_iea): prot_id = iearec[0]['DB_Object_ID'] if prot_id in expdict: upg.writebyproteinrec(expdict[prot_id], outhandle) outhandle.close()
def species_stats(handle): """Statistics for species distributions in a gaf file""" taxa_count = defaultdict(int) for prot_rec in upg.gafbyproteiniterator(handle): taxa_count[prot_rec[0]['Taxon_ID'][0]] += 1 return taxa_count
print ("GO-annotated proteins supported by Experimental Evidence Code") for protein in proteins: if goa.record_has(protein, Evidences): print(protein['DB:Reference']) """" Retrieve all references cited to annotate proteins with Experimental Evidence Codes in the Molecular Function aspect of GO """ handle = open("gene_association.goa_yeast") proteins = goa.gafiterator(handle) Evi_Aspect = {"Evidence":set(["EXP", "IDA", "IPI", "IMP", "IGI", "IEP"]), "Aspect":set(["F"])} print ("GO-annotated proteins supported by Experimental Evidence Code in the Molecular Function Ontology") for protein in proteins: if goa.record_has(protein, Evi_Aspect): print(protein['DB:Reference']) print("\n") """" Retrieve all references cited to support the annotation of three UniProt proteins whose IDs are: O13516, YPR010C-A, STE2 """ handle = open("gene_association.goa_yeast") proteins = list(goa.gafbyproteiniterator(handle)) # get all records in yeast file groupped by object id IDs = {"DB_Object_ID":set(["A2P2R3","P00045","P50101"])} for protein in proteins: if goa.record_has(protein[0], IDs): print("Protein ID:" + protein[0]["DB_Object_ID"]+"\n"+"References:") for p in protein: print(p['DB:Reference']) print("\n")