예제 #1
0
def filter_in_experimental(handle):
    outhandle = open(handle.name + ".exp_evidence", "w")
    outhandle.write('!gaf-version: 2.0\n')
    for inrec in upg.gafiterator(handle):
        if upg.record_has(inrec, {'Evidence': GO_EXP_EC}):
            upg.writerec(inrec, outhandle)
    outhandle.close()
예제 #2
0
def filter_in_IEA(handle):
    outhandle = open(handle.name + ".IEA", "w")
    outhandle.write('!gaf-version: 2.0\n')
    for inrec in upg.gafiterator(handle):
        if inrec['Evidence'] == 'IEA':
            upg.writerec(inrec, outhandle)
    outhandle.close()
예제 #3
0
def extract_gaf(rec, outfile, GAFFIELDS, record, sp_id, taxon):

    t = ()

    if sp_id.has_key(rec['DB_Object_ID']):
        GOAParser.writerec(rec, outfile, GAFFIELDS)

        if len(GAFFIELDS) == 15:
            t = (rec['DB'], rec['DB_Object_ID'], rec['DB_Object_Symbol'],
                 ('|'.join(rec['Qualifier'])), rec['GO_ID'],
                 ('|'.join(rec['DB:Reference'])), rec['Evidence'],
                 ('|'.join(rec['With'])), rec['Aspect'], rec['DB_Object_Name'],
                 ('|'.join(rec['Synonym'])), rec['DB_Object_Type'],
                 ('|'.join(rec['Taxon_ID'])), rec['Date'], rec['Assigned_By'])

        elif len(GAFFIELDS) == 17:
            t = (rec['DB'], rec['DB_Object_ID'], rec['DB_Object_Symbol'],
                 ('|'.join(rec['Qualifier'])), rec['GO_ID'],
                 ('|'.join(rec['DB:Reference'])), rec['Evidence'],
                 ('|'.join(rec['With'])), rec['Aspect'],
                 ('|'.join(rec['DB_Object_Name'])), ('|'.join(rec['Synonym'])),
                 rec['DB_Object_Type'], ('|'.join(rec['Taxon_ID'])),
                 rec['Date'], rec['Assigned_By'], rec['Annotation_Extension'],
                 rec['Gene_Product_Form_ID'])

        record.append(t)

    return record
예제 #4
0
def all_exclusive_IEA(handle):
    outhandle = open(handle.name + ".exclusive_IEA", "w")
    outhandle.write('!gaf-version: 2.0\n')

    for protrec in upg.gafbyproteiniterator(handle):
        if exclusive_IEA(protrec):
            for outrec in protrec:
                upg.writerec(outrec, outhandle)
    outhandle.close()
예제 #5
0
def all_hasnt_experimental(handle):
    outhandle = open(handle.name + ".noexp", "w")
    outhandle.write('!gaf-version: 2.0\n')

    for protrec in upg.gafbyproteiniterator(handle):
        if not has_experimental(protrec):
            for outrec in protrec:
                upg.writerec(outrec, outhandle)
    outhandle.close()
예제 #6
0
    def test_selection_writing(self):
        """Test record_has, and writerec.

        Adapted from Bio.UniProt.GOA.py by Iddo Friedberg [email protected].
        """
        recs = []
        filtered = []

        # Fields to filter
        evidence = {"Evidence": {"ND"}}
        synonym = {"Synonym": {"YA19A_YEAST", "YAL019W-A"}}
        taxon_id = {"Taxon_ID": {"taxon:559292"}}

        # Temporal file to test writerec
        f_number, f_filtered = tempfile.mkstemp()
        os.close(f_number)

        # Open a file and select records as per filter
        with open("UniProt/goa_yeast.gaf", "r") as handle:
            for rec in GOA.gafiterator(handle):
                recs.append(rec)
                # Filtering
                if (
                    GOA.record_has(rec, taxon_id)
                    and GOA.record_has(rec, evidence)
                    and GOA.record_has(rec, synonym)
                ):
                    filtered.append(rec)

        # Check number of filtered records
        self.assertEqual(len(filtered), 3)

        # Write the filtered records to a file using writerec
        with open(f_filtered, "w") as handle:
            # '!gaf-version: 2.1'
            handle.write("!gaf-version: 2.1 \n")  # Adding file header
            for rec in filtered:
                GOA.writerec(rec, handle)

        # Open and read the file containing the filtered records
        recs_ff = []  # Records from filtered file
        with open(f_filtered, "r") as handle:
            for rec in GOA.gafiterator(handle):
                recs_ff.append(rec)

        # Delete test file
        os.remove(f_filtered)

        # Compare, recs saved by writerec and filtered recs
        self.assertEqual(filtered, recs_ff)
예제 #7
0
def extract_taxon(handle, in_taxid):
    """
    Create a GAF file from a single taxon
    """
    header = "!gaf-version: 2.0\n"
    if isinstance(in_taxid, int):
        taxid = str(in_taxid)
    taxid = in_taxid.strip()
    outfile = open("%s.taxon.%s" % (handle.name, taxid), 'w')
    outfile.write(header)
    for inrec in upg.gafiterator(handle):
        if inrec['Taxon_ID'][0].split(':')[1] == taxid:
            upg.writerec(inrec, outfile)
    outfile.close()
예제 #8
0
def extract_taxa(handle, taxalist):
    """
    Create a GAF file from multiple taxa
    taxalist is a list of strings of taxid. Don't use list of int
    """
    outfiles = {}
    header = "!gaf-version: 2.0\n"
    for taxid in taxalist:
        outfiles[taxid] = open("%s.taxon.%s" % (handle.name, taxid), 'w')
        outfiles[taxid].write(header)
    for inrec in upg.gafiterator(handle):
        cur_taxid = inrec['Taxon_ID'][0].split(':')[1]
        if cur_taxid in taxalist:
            upg.writerec(inrec, outfiles[cur_taxid])
    for i in outfiles:
        outfiles[i].close()
예제 #9
0
def split_to_ontologies(handle):
    """Splits a GAF file into three ontology files
    """
    header = "!gaf-version: 2.0\n"
    out_mfo = open("%s.MFO" % handle.name, 'w')
    out_bpo = open("%s.BPO" % handle.name, 'w')
    out_cco = open("%s.CCO" % handle.name, 'w')
    out_bpo.write(header)
    out_mfo.write(header)
    out_cco.write(header)
    for inrec in upg.gafiterator(handle):
        if inrec['Aspect'] == 'F':
            upg.writerec(inrec, out_mfo)
        elif inrec['Aspect'] == 'P':
            upg.writerec(inrec, out_bpo)
        elif inrec['Aspect'] == 'C':
            upg.writerec(inrec, out_cco)
        else:
            raise ValueError, 'unknown ontology aspect %s' % inrec['Aspect']
    out_mfo.close()
    out_bpo.close()
    out_cco.close()
예제 #10
0
#!/usr/bin/env python
import sys
import argparse
import target_prep as tp
from Bio.UniProt import GOA as upg
if __name__ == '__main__':
#    parser = argparse.ArgumentParser(description='Filter by field')
#    parser.add_argument('-o','--output')
#    parser.add_argument('-f','--field')
    outhandle = sys.stdout
    if len(sys.argv) == 5:
        outhandle = open(sys.argv[4],"w")
    outhandle.write('!gaf-version: 2.0\n')
    goodvals = {sys.argv[1]: set(sys.argv[2].split(','))}
    for inrec in upg.gafiterator(open(sys.argv[3])):
        if upg.record_has(inrec, goodvals):
            upg.writerec(inrec,outhandle)