Beispiel #1
0
    def test_parse2(self):
        "Parsing keywlist2.txt (without header and footer)"

        filename = os.path.join("SwissProt", "keywlist2.txt")
        handle = open(filename)
        records = KeyWList.parse(handle)

        # Testing the first record
        record = records.next()
        self.assertEqual(record["ID"], "2Fe-2S.")
        self.assertEqual(record["AC"], "KW-0001")
        self.assertEqual(
            record["DE"],
            "Protein which contains at least one 2Fe-2S iron-sulfur cluster: 2 iron atoms complexed to 2 inorganic sulfides and 4 sulfur atoms of cysteines from the protein."
        )
        self.assertEqual(
            record["SY"],
            "Fe2S2; [2Fe-2S] cluster; [Fe2S2] cluster; Fe2/S2 (inorganic) cluster; Di-mu-sulfido-diiron; 2 iron, 2 sulfur cluster binding."
        )
        self.assertEqual(len(record["GO"]), 1)
        self.assertEqual(record["GO"],
                         ["GO:0051537; 2 iron, 2 sulfur cluster binding"])
        self.assertEqual(len(record["HI"]), 2)
        self.assertEqual(record["HI"][0], "Ligand: Iron; Iron-sulfur; 2Fe-2S.")
        self.assertEqual(record["HI"][1], "Ligand: Metal-binding; 2Fe-2S.")
        self.assertEqual(record["CA"], "Ligand.")

        # Testing the second record
        record = records.next()
        self.assertEqual(record["ID"], "3D-structure.")
        self.assertEqual(record["AC"], "KW-0002")
        self.assertEqual(
            record["DE"],
            "Protein, or part of a protein, whose three-dimensional structure has been resolved experimentally (for example by X-ray crystallography or NMR spectroscopy) and whose coordinates are available in the PDB database. Can also be used for theoretical models."
        )
        self.assertEqual(len(record["HI"]), 1)
        self.assertEqual(record["HI"][0], "Technical term: 3D-structure.")
        self.assertEqual(record["CA"], "Technical term.")

        # Testing the third record
        record = records.next()
        self.assertEqual(record["ID"], "3Fe-4S.")
        self.assertEqual(record["AC"], "KW-0003")
        self.assertEqual(
            record["DE"],
            "Protein which contains at least one 3Fe-4S iron-sulfur cluster: 3 iron atoms complexed to 4 inorganic sulfides and 3 sulfur atoms of cysteines from the protein. In a number of iron-sulfur proteins, the 4Fe-4S cluster can be reversibly converted by oxidation and loss of one iron ion to a 3Fe-4S cluster."
        )
        self.assertEqual(record["SY"], "")
        self.assertEqual(len(record["GO"]), 1)
        self.assertEqual(record["GO"],
                         ['GO:0051538; 3 iron, 4 sulfur cluster binding'])
        self.assertEqual(len(record["HI"]), 2)
        self.assertEqual(record["HI"][0], "Ligand: Iron; Iron-sulfur; 3Fe-4S.")
        self.assertEqual(record["HI"][1], "Ligand: Metal-binding; 3Fe-4S.")
        self.assertEqual(record["CA"], "Ligand.")

        handle.close()
Beispiel #2
0
    def test_parse(self):
        "Parsing keywlist.txt"

        filename = os.path.join("SwissProt", "keywlist.txt")
        handle = open(filename)
        records = KeyWList.parse(handle)

        # Testing the first record
        record = records.next()
        self.assertEqual(record["ID"], "2Fe-2S.")
        self.assertEqual(record["AC"], "KW-0001")
        self.assertEqual(
            record["DE"],
            "Protein which contains at least one 2Fe-2S iron-sulfur cluster: 2 iron atoms complexed to 2 inorganic sulfides and 4 sulfur atoms of cysteines from the protein."
        )
        self.assertEqual(
            record["SY"],
            "Fe2S2; [2Fe-2S] cluster; [Fe2S2] cluster; Fe2/S2 (inorganic) cluster; Di-mu-sulfido-diiron; 2 iron, 2 sulfur cluster binding."
        )
        self.assertEqual(len(record["GO"]), 1)
        self.assertEqual(record["GO"],
                         ["GO:0051537; 2 iron, 2 sulfur cluster binding"])
        self.assertEqual(len(record["HI"]), 2)
        self.assertEqual(record["HI"][0], "Ligand: Iron; Iron-sulfur; 2Fe-2S.")
        self.assertEqual(record["HI"][1], "Ligand: Metal-binding; 2Fe-2S.")
        self.assertEqual(record["CA"], "Ligand.")

        # Testing the second record
        record = records.next()
        self.assertEqual(record["IC"], "Molecular function.")
        self.assertEqual(record["AC"], "KW-9992")
        self.assertEqual(
            record["DE"],
            "Keywords assigned to proteins due to their particular molecular function."
        )

        # Testing the third record
        record = records.next()
        self.assertEqual(record["ID"], "Zymogen.")
        self.assertEqual(record["AC"], "KW-0865")
        self.assertEqual(
            record["DE"],
            "The enzymatically inactive precursor of mostly proteolytic enzymes."
        )
        self.assertEqual(record["SY"], "Proenzyme.")
        self.assertEqual(len(record["HI"]), 1)
        self.assertEqual(record["HI"][0], "PTM: Zymogen.")
        self.assertEqual(record["CA"], "PTM.")

        handle.close()
Beispiel #3
0
    def test_parse2(self):
        "Parsing keywlist2.txt (without header and footer)"

        filename = os.path.join("SwissProt", "keywlist2.txt")
        handle = open(filename)
        records = KeyWList.parse(handle)

        # Testing the first record
        record = next(records)
        self.assertEqual(record["ID"], "2Fe-2S.")
        self.assertEqual(record["AC"], "KW-0001")
        self.assertEqual(record["DE"], "Protein which contains at least one 2Fe-2S iron-sulfur cluster: 2 iron atoms complexed to 2 inorganic sulfides and 4 sulfur atoms of cysteines from the protein.")
        self.assertEqual(record["SY"], "Fe2S2; [2Fe-2S] cluster; [Fe2S2] cluster; Fe2/S2 (inorganic) cluster; Di-mu-sulfido-diiron; 2 iron, 2 sulfur cluster binding.")
        self.assertEqual(len(record["GO"]), 1)
        self.assertEqual(record["GO"], ["GO:0051537; 2 iron, 2 sulfur cluster binding"])
        self.assertEqual(len(record["HI"]), 2)
        self.assertEqual(record["HI"][0], "Ligand: Iron; Iron-sulfur; 2Fe-2S.")
        self.assertEqual(record["HI"][1], "Ligand: Metal-binding; 2Fe-2S.")
        self.assertEqual(record["CA"], "Ligand.")

        # Testing the second record
        record = next(records)
        self.assertEqual(record["ID"], "3D-structure.")
        self.assertEqual(record["AC"], "KW-0002")
        self.assertEqual(record["DE"], "Protein, or part of a protein, whose three-dimensional structure has been resolved experimentally (for example by X-ray crystallography or NMR spectroscopy) and whose coordinates are available in the PDB database. Can also be used for theoretical models.")
        self.assertEqual(len(record["HI"]), 1)
        self.assertEqual(record["HI"][0], "Technical term: 3D-structure.")
        self.assertEqual(record["CA"], "Technical term.")

        # Testing the third record
        record = next(records)
        self.assertEqual(record["ID"], "3Fe-4S.")
        self.assertEqual(record["AC"], "KW-0003")
        self.assertEqual(record["DE"], "Protein which contains at least one 3Fe-4S iron-sulfur cluster: 3 iron atoms complexed to 4 inorganic sulfides and 3 sulfur atoms of cysteines from the protein. In a number of iron-sulfur proteins, the 4Fe-4S cluster can be reversibly converted by oxidation and loss of one iron ion to a 3Fe-4S cluster.")
        self.assertEqual(record["SY"], "")
        self.assertEqual(len(record["GO"]), 1)
        self.assertEqual(record["GO"], ['GO:0051538; 3 iron, 4 sulfur cluster binding'])
        self.assertEqual(len(record["HI"]), 2)
        self.assertEqual(record["HI"][0], "Ligand: Iron; Iron-sulfur; 3Fe-4S.")
        self.assertEqual(record["HI"][1], "Ligand: Metal-binding; 3Fe-4S.")
        self.assertEqual(record["CA"], "Ligand.")

        handle.close()
Beispiel #4
0
    def test_parse(self):
        "Parsing keywlist.txt"

        filename = os.path.join("SwissProt", "keywlist.txt")
        handle = open(filename)
        records = KeyWList.parse(handle)

        # Testing the first record
        record = next(records)
        self.assertEqual(record["ID"], "2Fe-2S.")
        self.assertEqual(record["AC"], "KW-0001")
        self.assertEqual(record["DE"], "Protein which contains at least one 2Fe-2S iron-sulfur cluster: 2 iron atoms complexed to 2 inorganic sulfides and 4 sulfur atoms of cysteines from the protein.")
        self.assertEqual(record["SY"], "Fe2S2; [2Fe-2S] cluster; [Fe2S2] cluster; Fe2/S2 (inorganic) cluster; Di-mu-sulfido-diiron; 2 iron, 2 sulfur cluster binding.")
        self.assertEqual(len(record["GO"]), 1)
        self.assertEqual(record["GO"], ["GO:0051537; 2 iron, 2 sulfur cluster binding"])
        self.assertEqual(len(record["HI"]), 2)
        self.assertEqual(record["HI"][0], "Ligand: Iron; Iron-sulfur; 2Fe-2S.")
        self.assertEqual(record["HI"][1], "Ligand: Metal-binding; 2Fe-2S.")
        self.assertEqual(record["CA"], "Ligand.")

        # Testing the second record
        record = next(records)
        self.assertEqual(record["IC"], "Molecular function.")
        self.assertEqual(record["AC"], "KW-9992")
        self.assertEqual(record["DE"], "Keywords assigned to proteins due to their particular molecular function.")

        # Testing the third record
        record = next(records)
        self.assertEqual(record["ID"], "Zymogen.")
        self.assertEqual(record["AC"], "KW-0865")
        self.assertEqual(record["DE"], "The enzymatically inactive precursor of mostly proteolytic enzymes.")
        self.assertEqual(record["SY"], "Proenzyme.")
        self.assertEqual(len(record["HI"]), 1)
        self.assertEqual(record["HI"][0], "PTM: Zymogen.")
        self.assertEqual(record["CA"], "PTM.")

        handle.close()
Beispiel #5
0
Created on Sat Jan 03 18:21:46 2015

@author: gabriel
"""

#script para análise das proteínas

#importações
from Bio.SwissProt import KeyWList
import urllib
from Bio import SwissProt
from Bio.PDB import PDBList, PDBParser

#análise geral de proteinas (baseado no código desenvolvido pelo grupo 10)
handle = open("uniprot-mylist.txt")
records = KeyWList.parse(handle)
codes = []
review = open("proteinas_uniprot.txt", "w")
for record in records:
    review.write("\n" + record['ID'] + "\n")
    review.write("\n" + record['DE'] + "\n")
    codes.append(
        record['AC'][:-1])  #remover ";" no final de cada código de acesso
review.close()

#análise individual das proteínas relevantes (baseado nos códigos desenvolvidos pelos grupos 10 e 7)
f = open("analise_reviewed.txt", "w")
for code in codes:
    data = urllib.urlopen("http://www.uniprot.org/uniprot/" + code + ".txt")
    while True:
        try:
Beispiel #6
0
#!/usr/bin/env python
import os
import re
from multiprocessing import Pool
from Bio import SeqIO
from Bio import SwissProt
from Bio.SwissProt import KeyWList

handle = open("data/proteomes/UP000009229_760568.txt")
handle = open("data/proteomes/UP000001554_7739.txt")
#record = SwissProt.parse(handle)
#descriptions = [record.description for record in SwissProt.parse(handle)]
#len(descriptions)
for record in SwissProt.parse(handle):
    print(record.__dict__)
    print(record.accessions)
    #print (record.cross_references)
    for db in record.cross_references:
        if (db[0] == "Pfam"):
            print db

keywordfile = "/pfs/nobackup/home/w/wbasile/annotate_uniprot_proteomes/bin/keywlist.txt"
handle = open(keywordfile)
for keyword in KeyWList.parse(handle):
    print(keyword)
Beispiel #7
0
# -*- coding: utf-8 -*-
"""
Created on Fri Apr  1 12:16:35 2016

@author: Gungnir
"""

import os
import re
from Bio import ExPASy
from Bio import SwissProt
import gzip
from Bio.SwissProt import KeyWList
import urllib

keywlist = open("keywlist.txt")
records = KeyWList.parse(keywlist)
for record in records:
    print(record['ID'])
    print(record['DE'])

work_dir = os.getcwd()
#data_dir = os.path.join()

db_pdb = open('uniprot_sprot.dat')
#descriptions = [record.description for record in SwissProt.parse(db_pdb)]
print(dir(db_pdb))
Beispiel #8
0
    proteinList.append(record)
    print "Record added to list"
    time.sleep(1)

f.close()

--------------------
from Bio.SwissProt import KeyWList

dat = [] # initialize empty list
         # within each list have a protein
         # in that protein have a list of attributes
         # including ID, Number of transmembrane mentions

handle = open("keywlist.txt")
records = KeyWList.parse(handle)
for record in records:
    print(record['ID'])

# Go through each object in proteinList and save only those that have a single
# mention of TRANSMEM

noTMD = [] # initialize a list to keep track of how many TMD are in the protein
locList = [] # initialize list to save the transmembrane
tmdStart = []# initialize a list to save the start of the TMD
tmdEnd = [] # initialize a list to save the end of the TMD
entryName = [] # initialize a list to save the name of the sequence
index = [] # 
sequence = []

#http://stackoverflow.com/questions/2917372/how-to-search-a-list-of-tuples-in-python#