Ejemplo n.º 1
0
 def test_gpi_iterator(self):
     """Test GOA GPI file iterator, gpi-version: 1.1."""
     recs = []
     with open("UniProt/gp_information.goa_yeast.28.gpi") as handle:
         for rec in GOA.gpi_iterator(handle):
             recs.append(rec)
     self.assertEqual(len(recs), 300)
     self.assertEqual(sorted(recs[0].keys()), sorted(GOA.GPI11FIELDS))
     # Check values of first record
     self.assertEqual(recs[0]["DB_Object_ID"], "A2P2R3")
     self.assertEqual(recs[0]["DB_Object_Symbol"], "YMR084W")
     self.assertEqual(
         recs[0]["DB_Object_Name"],
         [
             "Putative glutamine--fructose"
             "-6-phosphate aminotransferase"
             " [isomerizing]"
         ],
     )
     self.assertEqual(recs[0]["DB_Object_Synonym"],
                      ["YM084_YEAST", "YMR084W"])
     self.assertEqual(recs[0]["DB_Object_Type"], "protein")
     self.assertEqual(recs[0]["Taxon"], "taxon:559292")
     self.assertEqual(recs[0]["Parent_Object_ID"], "")
     self.assertEqual(recs[0]["DB_Xref"], [""])
     self.assertEqual(recs[0]["Gene_Product_Properties"],
                      ["db_subset=Swiss-Prot"])
Ejemplo n.º 2
0
 def test_gpi_iterator_one_two(self):
     """Test GOA GPI file iterator, gpi-version: 1.2."""
     recs = []
     with open("UniProt/goa_human_sample.gpi") as handle:
         for rec in GOA.gpi_iterator(handle):
             recs.append(rec)
     self.assertEqual(len(recs), 9)
     self.assertEqual(sorted(recs[0].keys()), sorted(GOA.GPI12FIELDS))
     # Check values of first record
     self.assertEqual(recs[0]["DB"], "UniProtKB")
     self.assertEqual(recs[0]["DB_Object_ID"], "A0A024R1R8")
     self.assertEqual(recs[0]["DB_Object_Symbol"], "hCG_2014768")
     self.assertEqual(recs[0]["DB_Object_Name"],
                      ["HCG2014768, isoform CRA_a"])
     self.assertEqual(recs[0]["DB_Object_Synonym"], ["hCG_2014768"])
     self.assertEqual(recs[0]["DB_Object_Type"], "protein")
     self.assertEqual(recs[0]["Taxon"], "taxon:9606")
     self.assertEqual(recs[0]["Parent_Object_ID"], "")
     self.assertEqual(recs[0]["DB_Xref"], [""])
     self.assertEqual(recs[0]["Gene_Product_Properties"],
                      ["db_subset=TrEMBL"])
Ejemplo n.º 3
0
def parse_gpi(infile, taxon=''):

    sp_id = defaultdict()

    infile_handle = open(infile, 'r')
    parser = GOAParser.gpi_iterator(infile_handle)

    for rec in parser:
        print rec.keys()
        if not rec.has_key('Gene_Product_Properties'):
            print "This version of the gp information file does not contain all required information"
            sys.exit(1)
        else:
            break

    for rec in parser:
        taxid = rec['Taxon'].split(':')[1].strip()
        db = rec['Gene_Product_Properties'][0].split('=')[1].strip()
        if db.startswith('Swiss-Prot') and taxon == taxid:
            sp_id[rec['DB_Object_ID']] = 1

    return sp_id
Ejemplo n.º 4
0
 def test_gpi_iterator(self):
     """Test GOA GPI file iterator."""
     recs = []
     with open('UniProt/gp_information.goa_yeast.28.gpi', 'r') as handle:
         for rec in GOA.gpi_iterator(handle):
             recs.append(rec)
     self.assertEqual(len(recs), 300)
     self.assertEqual(sorted(recs[0].keys()), sorted(GOA.GPI11FIELDS))
     # Check values of first record
     self.assertEqual(recs[0]['DB_Object_ID'], 'A2P2R3')
     self.assertEqual(recs[0]['DB_Object_Symbol'], 'YMR084W')
     self.assertEqual(recs[0]['DB_Object_Name'], [
         'Putative glutamine--fructose'
         '-6-phosphate aminotransferase'
         ' [isomerizing]'
     ])
     self.assertEqual(recs[0]['DB_Object_Synonym'],
                      ['YM084_YEAST', 'YMR084W'])
     self.assertEqual(recs[0]['DB_Object_Type'], 'protein')
     self.assertEqual(recs[0]['Taxon'], 'taxon:559292')
     self.assertEqual(recs[0]['Parent_Object_ID'], '')
     self.assertEqual(recs[0]['DB_Xref'], [''])
     self.assertEqual(recs[0]['Gene_Product_Properties'],
                      ['db_subset=Swiss-Prot'])