def test_parse2(self): "Parsing keywlist2.txt (without header and footer)" filename = os.path.join("SwissProt", "keywlist2.txt") handle = open(filename) records = KeyWList.parse(handle) # Testing the first record record = records.next() self.assertEqual(record["ID"], "2Fe-2S.") self.assertEqual(record["AC"], "KW-0001") self.assertEqual( record["DE"], "Protein which contains at least one 2Fe-2S iron-sulfur cluster: 2 iron atoms complexed to 2 inorganic sulfides and 4 sulfur atoms of cysteines from the protein." ) self.assertEqual( record["SY"], "Fe2S2; [2Fe-2S] cluster; [Fe2S2] cluster; Fe2/S2 (inorganic) cluster; Di-mu-sulfido-diiron; 2 iron, 2 sulfur cluster binding." ) self.assertEqual(len(record["GO"]), 1) self.assertEqual(record["GO"], ["GO:0051537; 2 iron, 2 sulfur cluster binding"]) self.assertEqual(len(record["HI"]), 2) self.assertEqual(record["HI"][0], "Ligand: Iron; Iron-sulfur; 2Fe-2S.") self.assertEqual(record["HI"][1], "Ligand: Metal-binding; 2Fe-2S.") self.assertEqual(record["CA"], "Ligand.") # Testing the second record record = records.next() self.assertEqual(record["ID"], "3D-structure.") self.assertEqual(record["AC"], "KW-0002") self.assertEqual( record["DE"], "Protein, or part of a protein, whose three-dimensional structure has been resolved experimentally (for example by X-ray crystallography or NMR spectroscopy) and whose coordinates are available in the PDB database. Can also be used for theoretical models." ) self.assertEqual(len(record["HI"]), 1) self.assertEqual(record["HI"][0], "Technical term: 3D-structure.") self.assertEqual(record["CA"], "Technical term.") # Testing the third record record = records.next() self.assertEqual(record["ID"], "3Fe-4S.") self.assertEqual(record["AC"], "KW-0003") self.assertEqual( record["DE"], "Protein which contains at least one 3Fe-4S iron-sulfur cluster: 3 iron atoms complexed to 4 inorganic sulfides and 3 sulfur atoms of cysteines from the protein. In a number of iron-sulfur proteins, the 4Fe-4S cluster can be reversibly converted by oxidation and loss of one iron ion to a 3Fe-4S cluster." ) self.assertEqual(record["SY"], "") self.assertEqual(len(record["GO"]), 1) self.assertEqual(record["GO"], ['GO:0051538; 3 iron, 4 sulfur cluster binding']) self.assertEqual(len(record["HI"]), 2) self.assertEqual(record["HI"][0], "Ligand: Iron; Iron-sulfur; 3Fe-4S.") self.assertEqual(record["HI"][1], "Ligand: Metal-binding; 3Fe-4S.") self.assertEqual(record["CA"], "Ligand.") handle.close()
def test_parse(self): "Parsing keywlist.txt" filename = os.path.join("SwissProt", "keywlist.txt") handle = open(filename) records = KeyWList.parse(handle) # Testing the first record record = records.next() self.assertEqual(record["ID"], "2Fe-2S.") self.assertEqual(record["AC"], "KW-0001") self.assertEqual( record["DE"], "Protein which contains at least one 2Fe-2S iron-sulfur cluster: 2 iron atoms complexed to 2 inorganic sulfides and 4 sulfur atoms of cysteines from the protein." ) self.assertEqual( record["SY"], "Fe2S2; [2Fe-2S] cluster; [Fe2S2] cluster; Fe2/S2 (inorganic) cluster; Di-mu-sulfido-diiron; 2 iron, 2 sulfur cluster binding." ) self.assertEqual(len(record["GO"]), 1) self.assertEqual(record["GO"], ["GO:0051537; 2 iron, 2 sulfur cluster binding"]) self.assertEqual(len(record["HI"]), 2) self.assertEqual(record["HI"][0], "Ligand: Iron; Iron-sulfur; 2Fe-2S.") self.assertEqual(record["HI"][1], "Ligand: Metal-binding; 2Fe-2S.") self.assertEqual(record["CA"], "Ligand.") # Testing the second record record = records.next() self.assertEqual(record["IC"], "Molecular function.") self.assertEqual(record["AC"], "KW-9992") self.assertEqual( record["DE"], "Keywords assigned to proteins due to their particular molecular function." ) # Testing the third record record = records.next() self.assertEqual(record["ID"], "Zymogen.") self.assertEqual(record["AC"], "KW-0865") self.assertEqual( record["DE"], "The enzymatically inactive precursor of mostly proteolytic enzymes." ) self.assertEqual(record["SY"], "Proenzyme.") self.assertEqual(len(record["HI"]), 1) self.assertEqual(record["HI"][0], "PTM: Zymogen.") self.assertEqual(record["CA"], "PTM.") handle.close()
def test_parse2(self): "Parsing keywlist2.txt (without header and footer)" filename = os.path.join("SwissProt", "keywlist2.txt") handle = open(filename) records = KeyWList.parse(handle) # Testing the first record record = next(records) self.assertEqual(record["ID"], "2Fe-2S.") self.assertEqual(record["AC"], "KW-0001") self.assertEqual(record["DE"], "Protein which contains at least one 2Fe-2S iron-sulfur cluster: 2 iron atoms complexed to 2 inorganic sulfides and 4 sulfur atoms of cysteines from the protein.") self.assertEqual(record["SY"], "Fe2S2; [2Fe-2S] cluster; [Fe2S2] cluster; Fe2/S2 (inorganic) cluster; Di-mu-sulfido-diiron; 2 iron, 2 sulfur cluster binding.") self.assertEqual(len(record["GO"]), 1) self.assertEqual(record["GO"], ["GO:0051537; 2 iron, 2 sulfur cluster binding"]) self.assertEqual(len(record["HI"]), 2) self.assertEqual(record["HI"][0], "Ligand: Iron; Iron-sulfur; 2Fe-2S.") self.assertEqual(record["HI"][1], "Ligand: Metal-binding; 2Fe-2S.") self.assertEqual(record["CA"], "Ligand.") # Testing the second record record = next(records) self.assertEqual(record["ID"], "3D-structure.") self.assertEqual(record["AC"], "KW-0002") self.assertEqual(record["DE"], "Protein, or part of a protein, whose three-dimensional structure has been resolved experimentally (for example by X-ray crystallography or NMR spectroscopy) and whose coordinates are available in the PDB database. Can also be used for theoretical models.") self.assertEqual(len(record["HI"]), 1) self.assertEqual(record["HI"][0], "Technical term: 3D-structure.") self.assertEqual(record["CA"], "Technical term.") # Testing the third record record = next(records) self.assertEqual(record["ID"], "3Fe-4S.") self.assertEqual(record["AC"], "KW-0003") self.assertEqual(record["DE"], "Protein which contains at least one 3Fe-4S iron-sulfur cluster: 3 iron atoms complexed to 4 inorganic sulfides and 3 sulfur atoms of cysteines from the protein. In a number of iron-sulfur proteins, the 4Fe-4S cluster can be reversibly converted by oxidation and loss of one iron ion to a 3Fe-4S cluster.") self.assertEqual(record["SY"], "") self.assertEqual(len(record["GO"]), 1) self.assertEqual(record["GO"], ['GO:0051538; 3 iron, 4 sulfur cluster binding']) self.assertEqual(len(record["HI"]), 2) self.assertEqual(record["HI"][0], "Ligand: Iron; Iron-sulfur; 3Fe-4S.") self.assertEqual(record["HI"][1], "Ligand: Metal-binding; 3Fe-4S.") self.assertEqual(record["CA"], "Ligand.") handle.close()
def test_parse(self): "Parsing keywlist.txt" filename = os.path.join("SwissProt", "keywlist.txt") handle = open(filename) records = KeyWList.parse(handle) # Testing the first record record = next(records) self.assertEqual(record["ID"], "2Fe-2S.") self.assertEqual(record["AC"], "KW-0001") self.assertEqual(record["DE"], "Protein which contains at least one 2Fe-2S iron-sulfur cluster: 2 iron atoms complexed to 2 inorganic sulfides and 4 sulfur atoms of cysteines from the protein.") self.assertEqual(record["SY"], "Fe2S2; [2Fe-2S] cluster; [Fe2S2] cluster; Fe2/S2 (inorganic) cluster; Di-mu-sulfido-diiron; 2 iron, 2 sulfur cluster binding.") self.assertEqual(len(record["GO"]), 1) self.assertEqual(record["GO"], ["GO:0051537; 2 iron, 2 sulfur cluster binding"]) self.assertEqual(len(record["HI"]), 2) self.assertEqual(record["HI"][0], "Ligand: Iron; Iron-sulfur; 2Fe-2S.") self.assertEqual(record["HI"][1], "Ligand: Metal-binding; 2Fe-2S.") self.assertEqual(record["CA"], "Ligand.") # Testing the second record record = next(records) self.assertEqual(record["IC"], "Molecular function.") self.assertEqual(record["AC"], "KW-9992") self.assertEqual(record["DE"], "Keywords assigned to proteins due to their particular molecular function.") # Testing the third record record = next(records) self.assertEqual(record["ID"], "Zymogen.") self.assertEqual(record["AC"], "KW-0865") self.assertEqual(record["DE"], "The enzymatically inactive precursor of mostly proteolytic enzymes.") self.assertEqual(record["SY"], "Proenzyme.") self.assertEqual(len(record["HI"]), 1) self.assertEqual(record["HI"][0], "PTM: Zymogen.") self.assertEqual(record["CA"], "PTM.") handle.close()
Created on Sat Jan 03 18:21:46 2015 @author: gabriel """ #script para análise das proteínas #importações from Bio.SwissProt import KeyWList import urllib from Bio import SwissProt from Bio.PDB import PDBList, PDBParser #análise geral de proteinas (baseado no código desenvolvido pelo grupo 10) handle = open("uniprot-mylist.txt") records = KeyWList.parse(handle) codes = [] review = open("proteinas_uniprot.txt", "w") for record in records: review.write("\n" + record['ID'] + "\n") review.write("\n" + record['DE'] + "\n") codes.append( record['AC'][:-1]) #remover ";" no final de cada código de acesso review.close() #análise individual das proteínas relevantes (baseado nos códigos desenvolvidos pelos grupos 10 e 7) f = open("analise_reviewed.txt", "w") for code in codes: data = urllib.urlopen("http://www.uniprot.org/uniprot/" + code + ".txt") while True: try:
#!/usr/bin/env python import os import re from multiprocessing import Pool from Bio import SeqIO from Bio import SwissProt from Bio.SwissProt import KeyWList handle = open("data/proteomes/UP000009229_760568.txt") handle = open("data/proteomes/UP000001554_7739.txt") #record = SwissProt.parse(handle) #descriptions = [record.description for record in SwissProt.parse(handle)] #len(descriptions) for record in SwissProt.parse(handle): print(record.__dict__) print(record.accessions) #print (record.cross_references) for db in record.cross_references: if (db[0] == "Pfam"): print db keywordfile = "/pfs/nobackup/home/w/wbasile/annotate_uniprot_proteomes/bin/keywlist.txt" handle = open(keywordfile) for keyword in KeyWList.parse(handle): print(keyword)
# -*- coding: utf-8 -*- """ Created on Fri Apr 1 12:16:35 2016 @author: Gungnir """ import os import re from Bio import ExPASy from Bio import SwissProt import gzip from Bio.SwissProt import KeyWList import urllib keywlist = open("keywlist.txt") records = KeyWList.parse(keywlist) for record in records: print(record['ID']) print(record['DE']) work_dir = os.getcwd() #data_dir = os.path.join() db_pdb = open('uniprot_sprot.dat') #descriptions = [record.description for record in SwissProt.parse(db_pdb)] print(dir(db_pdb))
proteinList.append(record) print "Record added to list" time.sleep(1) f.close() -------------------- from Bio.SwissProt import KeyWList dat = [] # initialize empty list # within each list have a protein # in that protein have a list of attributes # including ID, Number of transmembrane mentions handle = open("keywlist.txt") records = KeyWList.parse(handle) for record in records: print(record['ID']) # Go through each object in proteinList and save only those that have a single # mention of TRANSMEM noTMD = [] # initialize a list to keep track of how many TMD are in the protein locList = [] # initialize list to save the transmembrane tmdStart = []# initialize a list to save the start of the TMD tmdEnd = [] # initialize a list to save the end of the TMD entryName = [] # initialize a list to save the name of the sequence index = [] # sequence = [] #http://stackoverflow.com/questions/2917372/how-to-search-a-list-of-tuples-in-python#