def queryProsite(theSeqs): print("Currently querying Prosite...") prositeData = list() for i in theSeqs: handle = ScanProsite.scan(seq=i, skip="0") result = ScanProsite.read(handle) try: handle = ExPASy.get_prosite_entry(result[0]["signature_ac"]) res = handle.read() splitted = res.split("\n") line = 0 for a in range(0, len(splitted)): if splitted[a][0:2] == "DE": line = a prositeData.append(splitted[line][5:-1]) print(splitted[line][5:-1]) except IndexError: prositeData.append(None) print(None) return prositeData
def Scan_Prosite(entry, sk="off"): """ Scan_Prosite takes as arg : entry = Uniprot ID ,PDB or SEQ skip = (default ="off" ) if "on" skip patterns and profiles with hight probabilty returns : df =(matchs and other features (start,end ,id,score...)) number_of_matchs csv file corresponding to df """ handle = ScanProsite.scan(entry, skip=sk) #By executing handle.read(), you can obtain the search results in raw XML format. Instead, let’s use #Bio.ExPASy.ScanProsite.read to parse the raw XML into a Python object: result = ScanProsite.read(handle) data = {} dict_list = [ 'sequence_ac', 'start', 'stop', 'signature_ac', 'score', 'level' ] data.fromkeys(dict_list) data = {k: [] for k in dict_list} df = pd.DataFrame(data) for k in result: df = df.append(k, ignore_index=True) number_of_matchs = result.n_match #df.to_csv("my_prosite_hits.csv") return (df)
def prosite(self): seq = input("select sequences id: ") seq_select = seq.split(' ') res = [] for iD in seq_select: handle = ScanProsite.scan(seq=seq_select[iD]['seq'].seq) res.append(ScanProsite.read(handle)) return res
def download_ProSite_motifs(query: str = 'P12004') -> defaultdict: """Performs ExPASy ProSite search for molecular signatures of the query protein. Accepts UniProt ID as a sole argument""" in_handle = ScanProsite.scan(seq=query) reader = ScanProsite.read(in_handle) storage_dict = defaultdict(dict) for motif in reader: storage_dict[motif['signature_id']]['start'] = motif['start'] storage_dict[motif['signature_id']]['stop'] = motif['stop'] return storage_dict
def scan_motifs(self, ids_seqs): ''' Funçao para pesquisa de motivos a partir da Prosite :param ids_seqs: IDs das sequências do gestor :return results: resultados do scan ''' results = [] for id_seq in ids_seqs: handle = ScanProsite.scan(seq=self.seqs[id_seq]['seq'].seq) results.append(ScanProsite.read(handle)) return results
def SearchDomains(self,seqid): if seqid in self.dseqs.keys(): fastaseq = ">SEQ1\n" + str(self.dseqs[seqid].seq) result_handle = ScanProsite.scan(seq=fastaseq) result = ScanProsite.read(result_handle) if seqid in self.domains.keys(): self.domains[seqid].append(result) else: self.domains[seqid] = [result] return result else: print("Invalid ID") return False
def Prosite_Domain(self): from Bio import ExPASy from Bio.ExPASy import Prosite, ScanProsite try: handle = ScanProsite.scan(seq=self.__seq_input) result = ScanProsite.read(handle) if len(result) != 0: for res in range(len(result)): prosite_acession = result[res]['signature_ac'] r = ExPASy.get_prosite_raw(prosite_acession) html = Prosite.read(r) r.close() print('Foi encontrado um dominio %s.' % (html.name)) else: print('Não foram encontradas correspondências.') except: print('A sequência fornecida não é uma sequência proteica.')
#deleting file os.remove("prot.fasta") print(missingProts) np.save('Round2_protStructure.npy', seq_dict) ####4.Extract kinase domain and ATP binding pocket kinaseDict = {} ATPDict = {} amissing, kmissing, count = 0, 0, 0 for key, sequence in seq_dict.items(): count = count + 1 print(key) if count % 50 == 0: time.sleep(60) # sleep 1 mn for very 50 query to avoid timeout handle = ScanProsite.scan(seq=sequence) result = ScanProsite.read(handle) kinase, atp = 0, 0 for i in range( len(result) ): #I am looping over all results but there should be only one that ha$ if result[i]['signature_ac'] == 'PS50011': # Protein kinase domain kinaseDict[key] = sequence[result[i]['start']:result[i]['stop']] kinase = 1 elif result[i]['signature_ac'] == 'PS00107': # ATP binding pocket ATPDict[key] = sequence[result[i]['start']:result[i]['stop']] atp = 1 if kinase == 0: kmissing = kmissing + 1 print('kinase missing') if atp == 0:
def scanProsite(dic, tag): handle = ScanProsite.scan(seq=dic[tag].seq, lowscore=1) result = ScanProsite.read(handle) for i in range(len(result)): print(result[i])
from pandas import DataFrame, read_csv import pandas as pd file = r'C:/Users/Kevin/Desktop/BIMM182_Project/Sequences.csv' input = pd.read_csv(file) motifset = [] for i in range(len(input)): sequence = input.iloc[i, 2] accessions = set() motifs = "" # Scan Prosite for matching motifs handle = ScanProsite.scan(seq=sequence, skip="off") result = ScanProsite.read(handle) # Obtain all accession motifs for hit in result: acc = hit.get('signature_ac') accessions.add(acc) # Get descriptions from accession numbers for accession in accessions: prof = ExPASy.get_prosite_raw(accession) text = prof.read() text = text.splitlines() desc = text[3] desc = str.split(desc, 'DE ') desc = desc[1]
from Bio import SeqIO from Bio.ExPASy import ScanProsite aligned_record = SeqIO.parse("/home/nadzhou/SEQs/spike_uniprot.fasta", "fasta") start_end = [] for record in aligned_record: prosite_handle = ScanProsite.scan(record.seq) prosite_result = ScanProsite.read(prosite_handle) for rec in prosite_result: start_end.append((rec['start'], rec['stop'])) print(record.seq[rec['start']:rec['stop']]) print() print(start_end)
def scanSequence(sequence): handle = ScanProsite.scan(seq=sequence) return ScanProsite.read(handle)
#!/usr/bin/env python from __future__ import print_function import os from Bio.ExPASy import ScanProsite if __name__ == "__main__": with open(os.path.join('data', 'rosalind_prst.txt')) as dataset: protein_string = dataset.readline().rstrip() handle = ScanProsite.scan(protein_string) result = ScanProsite.read(handle) print(sorted(result, key=lambda x: x['start'])[-1]['signature_ac'])