def queryProsite(theSeqs):
    print("Currently querying Prosite...")
    prositeData = list()

    for i in theSeqs:
        handle = ScanProsite.scan(seq=i, skip="0")
        result = ScanProsite.read(handle)

        try:
            handle = ExPASy.get_prosite_entry(result[0]["signature_ac"])
            res = handle.read()

            splitted = res.split("\n")
            line = 0
            for a in range(0, len(splitted)):
                if splitted[a][0:2] == "DE":
                    line = a

            prositeData.append(splitted[line][5:-1])
            print(splitted[line][5:-1])
        except IndexError:
            prositeData.append(None)
            print(None)

    return prositeData
Esempio n. 2
0
def Scan_Prosite(entry, sk="off"):
    """  Scan_Prosite takes as arg : 
        entry = Uniprot ID ,PDB or SEQ 
        skip  = (default ="off" ) if "on" 
                skip patterns and profiles with hight probabilty 
                
    returns :
        df =(matchs and other features (start,end ,id,score...))
        number_of_matchs
        csv file corresponding to df
        
        """

    handle = ScanProsite.scan(entry, skip=sk)
    #By executing handle.read(), you can obtain the search results in raw XML format. Instead, let’s use
    #Bio.ExPASy.ScanProsite.read to parse the raw XML into a Python object:
    result = ScanProsite.read(handle)

    data = {}
    dict_list = [
        'sequence_ac', 'start', 'stop', 'signature_ac', 'score', 'level'
    ]
    data.fromkeys(dict_list)
    data = {k: [] for k in dict_list}
    df = pd.DataFrame(data)
    for k in result:
        df = df.append(k, ignore_index=True)

    number_of_matchs = result.n_match
    #df.to_csv("my_prosite_hits.csv")
    return (df)
Esempio n. 3
0
 def prosite(self):
     seq = input("select sequences id: ")
     seq_select = seq.split(' ')
     res = []
     for iD in seq_select:
         handle = ScanProsite.scan(seq=seq_select[iD]['seq'].seq)
         res.append(ScanProsite.read(handle))
     return res
def download_ProSite_motifs(query: str = 'P12004') -> defaultdict:
    """Performs ExPASy ProSite search for molecular signatures of the query protein. Accepts UniProt ID as a sole argument"""
    in_handle = ScanProsite.scan(seq=query)
    reader = ScanProsite.read(in_handle)
    storage_dict = defaultdict(dict)
    for motif in reader:
        storage_dict[motif['signature_id']]['start'] = motif['start']
        storage_dict[motif['signature_id']]['stop'] = motif['stop']

    return storage_dict
Esempio n. 5
0
 def scan_motifs(self, ids_seqs):
     '''
     Funçao para pesquisa de motivos a partir da Prosite
     :param ids_seqs: IDs das sequências do gestor
     :return results: resultados do scan
     '''
     results = []
     for id_seq in ids_seqs:
         handle = ScanProsite.scan(seq=self.seqs[id_seq]['seq'].seq)
         results.append(ScanProsite.read(handle))
     return results
 def SearchDomains(self,seqid): 
     if seqid in self.dseqs.keys():
         fastaseq = ">SEQ1\n" + str(self.dseqs[seqid].seq)
         result_handle = ScanProsite.scan(seq=fastaseq)
         result = ScanProsite.read(result_handle)
         if seqid in self.domains.keys():
             self.domains[seqid].append(result)
         else: self.domains[seqid] = [result]
         return result
     else: 
         print("Invalid ID")
         return False
Esempio n. 7
0
 def Prosite_Domain(self):
     from Bio import ExPASy
     from Bio.ExPASy import Prosite, ScanProsite
     try:
         handle = ScanProsite.scan(seq=self.__seq_input)
         result = ScanProsite.read(handle)
         if len(result) != 0:
             for res in range(len(result)):
                 prosite_acession = result[res]['signature_ac']
                 r = ExPASy.get_prosite_raw(prosite_acession)
                 html = Prosite.read(r)
                 r.close()
                 print('Foi encontrado um dominio %s.' % (html.name))
         else:
             print('Não foram encontradas correspondências.')
     except:
         print('A sequência fornecida não é uma sequência proteica.')
        #deleting file
        os.remove("prot.fasta")

print(missingProts)
np.save('Round2_protStructure.npy', seq_dict)
####4.Extract kinase domain and ATP binding pocket
kinaseDict = {}
ATPDict = {}

amissing, kmissing, count = 0, 0, 0
for key, sequence in seq_dict.items():
    count = count + 1
    print(key)
    if count % 50 == 0:
        time.sleep(60)  # sleep 1 mn for very 50 query to avoid timeout
    handle = ScanProsite.scan(seq=sequence)
    result = ScanProsite.read(handle)
    kinase, atp = 0, 0
    for i in range(
            len(result)
    ):  #I am looping over all results but there should be only one that ha$
        if result[i]['signature_ac'] == 'PS50011':  # Protein kinase domain
            kinaseDict[key] = sequence[result[i]['start']:result[i]['stop']]
            kinase = 1
        elif result[i]['signature_ac'] == 'PS00107':  # ATP binding pocket
            ATPDict[key] = sequence[result[i]['start']:result[i]['stop']]
            atp = 1
    if kinase == 0:
        kmissing = kmissing + 1
        print('kinase missing')
    if atp == 0:
def scanProsite(dic, tag):
    handle = ScanProsite.scan(seq=dic[tag].seq, lowscore=1)
    result = ScanProsite.read(handle)
    for i in range(len(result)):
        print(result[i])
Esempio n. 10
0
from pandas import DataFrame, read_csv
import pandas as pd

file = r'C:/Users/Kevin/Desktop/BIMM182_Project/Sequences.csv'
input = pd.read_csv(file)

motifset = []

for i in range(len(input)):
    sequence = input.iloc[i, 2]

    accessions = set()
    motifs = ""

    # Scan Prosite for matching motifs
    handle = ScanProsite.scan(seq=sequence, skip="off")
    result = ScanProsite.read(handle)

    # Obtain all accession motifs
    for hit in result:
        acc = hit.get('signature_ac')
        accessions.add(acc)

        # Get descriptions from accession numbers
    for accession in accessions:
        prof = ExPASy.get_prosite_raw(accession)
        text = prof.read()
        text = text.splitlines()
        desc = text[3]
        desc = str.split(desc, 'DE   ')
        desc = desc[1]
Esempio n. 11
0
from Bio import SeqIO
from Bio.ExPASy import ScanProsite

aligned_record = SeqIO.parse("/home/nadzhou/SEQs/spike_uniprot.fasta", "fasta")

start_end = []

for record in aligned_record:
    prosite_handle = ScanProsite.scan(record.seq)

    prosite_result = ScanProsite.read(prosite_handle)

    for rec in prosite_result:
        start_end.append((rec['start'], rec['stop']))

        print(record.seq[rec['start']:rec['stop']])
        print()

print(start_end)
Esempio n. 12
0
def scanSequence(sequence): 
    handle = ScanProsite.scan(seq=sequence)
    return ScanProsite.read(handle)
Esempio n. 13
0
#!/usr/bin/env python

from __future__ import print_function
import os

from Bio.ExPASy import ScanProsite

if __name__ == "__main__":
    with open(os.path.join('data', 'rosalind_prst.txt')) as dataset:
        protein_string = dataset.readline().rstrip()

    handle = ScanProsite.scan(protein_string)
    result = ScanProsite.read(handle)

    print(sorted(result, key=lambda x: x['start'])[-1]['signature_ac'])