def run(predService, seq, name, ssObject, startTime, post_data, pdbdata): tcount = 0 for t in threading.enumerate(): if t.getName() == name: tcount += 1 if tcount > siteLimit[name]: tempSS = ss.SS(name) tempSS.pred = "Queue Full" tempSS.conf = "Queue Full" tempSS.status = -1 else: #tempSS = predService.get(seq, tcount) tempSS = predService.get(seq) dbupdate(startTime, tempSS.name + "pred", tempSS.pred) dbupdate(startTime, tempSS.name + "conf", tempSS.conf) dbupdate(startTime, tempSS.name + "stat", tempSS.status) ssObject.append(tempSS) majority = batchtools.majorityVote(seq, ssObject) dbupdate(startTime, 'majorityvote', majority) post_data['completed'] += 1 if post_data['completed'] == post_data['total_sites']: print("All predictions completed.") if post_data['email'] != "": #if all completed and user email is not empty, send email print ("Sending results to " + post_data['email']) #create HTML and store it in post_data post_data.update({'output' : htmlmaker.createHTML(ssObject, seq, pdbdata, majority)}) emailtools.sendEmail(email_service, post_data['email'],"Prediction Results", post_data['output'])
def get(seq): SS = ss.SS("Yaspin") SS.status = 0 if (len(seq) > 4000): SS.pred += "Sequence longer than 4000" SS.conf += "Sequence longer than 4000" SS.status = 2 #error status print("YASPIN failed: Sequence longer than 4000") return SS #return SS so it will be readable as an ssObject session = GuerrillaMailSession() #Creates GuerrillaMail session email_address = session.get_session_state()[ 'email_address'] #retrieves temp email address payload = { 'seq': seq, 'mbjob[description]': 'testprot', 'nnmethod': 'dssp', 'smethod': 'nr', 'yaspin_align': 'YASPIN prediction', 'email': email_address } fasta = {'seq_file': ''} r = requests.post('http://www.ibi.vu.nl/programs/yaspinwww/', data=payload, files=fasta) if (r.status_code == 500): SS.pred += "Server Down" SS.conf += "Server Down" SS.status = 2 print("Yaspin Failed: Server Down") return SS result_url = r.url + 'results.out' requesturl = batchtools.requestWait(result_url, 'Yaspin Not Ready') if requesturl: raw = requesturl.text.splitlines() for i in range(len(raw)): if raw[i].startswith(" Pred:"): SS.pred += raw[i][6:].strip() if raw[i].startswith(" Conf:"): SS.conf += raw[i][6:].strip() SS.pred = SS.pred.replace('-', 'C') SS.status = 1 print("Yaspin Complete") else: SS.pred += "Yaspin failed to respond in time" SS.conf += "Yaspin failed to respond in time" SS.status = 2 #error status print("YASPIN failed: No response") return SS
def get(seq): SS = ss.SS("PSS") SS.status = 0 if time.time() < 1571702400.00: SS.pred = "PSS down until Oct 22" SS.conf = "PSS down until Oct 22" SS.status = 2 return SS if (len(seq) > 4000): SS.pred += "Sequence longer than 4000" SS.conf += "Sequence longer than 4000" SS.status = 2 #error status print("PSSPred failed: Sequence longer than 4000") return SS #return SS so it will be readable as an ssObject session = GuerrillaMailSession() #Creates GuerrillaMail session email_address = session.get_session_state()['email_address'] #retrieves temp email address payload = {'REPLY-E-MAIL': email_address, 'TARGET-NAME': 'testprot', 'SEQUENCE': seq} #Around 15 min for 4000 r= requests.post('https://zhanglab.ccmb.med.umich.edu/cgi-bin/PSSpred.pl', data=payload) soup = BeautifulSoup(r.text, 'html.parser') #Exit if no links available in the response if soup.a is None: SS.pred = "Failed to submit, server possibly under load" SS.conf = "Failed to submit, server possibly under load" SS.status = 2 return SS ssurl = soup.a.get('href') ssurl = ssurl + '/seq.SS' requesturl = batchtools.requestWait(ssurl, "PSSpred Not Ready") if requesturl: raw = requesturl.text.splitlines() for i in range(len(raw)): if raw[i].startswith("conf"): SS.conf += raw[i][6:].strip() if raw[i].startswith("SS"): SS.pred += raw[i][6:].strip() SS.status = 1 print("PSSPred Complete") else: SS.pred += "PSSPred failed to respond in time" SS.conf += "PSSPred failed to respond in time" SS.status = 2 #error status print("PSSPred failed: No response") return SS
def get(seq): SS = ss.SS("RaptorX") SS.status = 0 if (len(seq) < 27 or len(seq) > 4000): #<27 shouldnt happen with input validation SS.pred += "Sequence is longer than 4000" SS.conf += "Sequence is longer than 4000" SS.status = 2 #error status print("RaptorX failed: Sequence is shorter than 27 or longer than 4000") ''' SS.pred += "Sequence is shorter than 27 or longer than 4000" SS.conf += "Sequence is shorter than 27 or longer than 4000" SS.status = 2 #error status print("RaptorX failed: Sequence is shorter than 27 or longer than 4000") ''' return SS #return SS so it will be readable as an ssObject session = GuerrillaMailSession() #Creates GuerrillaMail session email_address = session.get_session_state()['email_address'] #retrieves temp email address payload = {'jobname': 'myprot', 'useProfile': 'true', 'predict_sub': 'Submit', 'email': email_address, 'sequences': seq} r = requests.post('http://raptorx.uchicago.edu/StructurePropertyPred/predict/pred/', data=payload) soup = BeautifulSoup(r.text,'html.parser') try: #try/catch in case a nucleotide/invalid sequence is entered url = soup.find(href=re.compile('http://raptorx.uchicago.edu/StructurePropertyPred/myjobs/')).get('href') zipid = url.split('_') zipid = zipid[1].replace('/','') raw = requests.get(url).text tree = html.fromstring(raw) treelist = tree.xpath('//*[@id="content"]/center[1]/text()') ''' #No cancel while treelist != []: print('RaptorX Not Ready') time.sleep(20) raw = requests.get(url).text tree = html.fromstring(raw) treelist = tree.xpath('//*[@id="content"]/center[1]/text()') ''' #Cancel after 20 min stime = time.time() while treelist != [] or time.time() > stime + 1200: print('RaptorX Not Ready') time.sleep(20) raw = requests.get(url).text tree = html.fromstring(raw) treelist = tree.xpath('//*[@id="content"]/center[1]/text()') if treelist == []: treelist = tree.xpath('//*[@id="infoval"]/script/text()') zippath = 'http://raptorx.uchicago.edu/' for word in treelist[0].split('"'): if word[:30] == '/StructurePropertyPred/myjobs/': zippath += word zipss = requests.get(zippath) while not zipss.ok: time.sleep(20) print('RaptorX Zip Not Ready') #zipss = batchtools.requestWait(zippath, 'RaptorX Zip Not Ready') mzip = request.urlopen(zippath) z= bytes(mzip.read()) zf = ZipFile(io.BytesIO(z), "r") ss3path = zipid + '/' + zipid + '.ss3.txt' sslist = zf.read(ss3path).decode('utf-8').splitlines()[2:] for i in range(len(sslist)): word = sslist[i].split() SS.pred += word[2] SS.hconf.append(word[3]) SS.econf.append(word[4]) SS.cconf.append(word[5]) #SS.conf = "No conf given, Need formula to determine from h/e/c conf" for i in range(len(SS.pred)): if SS.pred[i] == 'C': if SS.cconf[i] == "1.000": SS.conf += '9' #puts in 9 if 100% confidence level else: SS.conf += SS.cconf[i][2] #puts in the 3rd character from the individual conf value string e.g. 8 if string is 0.873 if SS.pred[i] == 'E': if SS.econf[i] == "1.000": SS.conf += '9' else: SS.conf += SS.econf[i][2] if SS.pred[i] == 'H': if SS.hconf[i] == "1.000": SS.conf += '9' else: SS.conf += SS.hconf[i][2] SS.status = 1 print("RaptorX Complete") else: SS.pred += "failed to respond in time" SS.conf += "failed to respond in time" SS.status = 2 #error status print("RaptorX failed: No response") except: SS.pred += "sequence not accepted" SS.conf += "sequence not accepted" SS.status = 4 print("RaptorX failed: sequence not accepted") print("RAPTOR::") print(SS.pred) print(SS.conf) return SS
def get(seq): SS = ss.SS("PSI") if (len(seq) < 30 or len(seq) > 1500): SS.pred += "Sequence is shorter than 30 or longer than 1500" SS.conf += "Sequence is shorter than 30 or longer than 1500" SS.status = 2 #error status print("PsiPred failed: Sequence is shorter than 30 or longer than 1500") return SS #return SS so it will be readable as an ssObject session = GuerrillaMailSession() #Creates GuerrillaMail session email_address = session.get_session_state()['email_address'] #retrieves temp email address url = 'http://bioinf.cs.ucl.ac.uk/psipred/api/submission/' payload = {'input_data': seq} data = {'job': 'psipred', 'submission_name': 'testing', 'email': email_address, } r = requests.post(url, data=data, files=payload, headers={'accept': 'application/json'}) try: #try/catch in case a nucleotide/invalid sequence is entered uuid = r.json()['UUID'] jsonurl = 'http://bioinf.cs.ucl.ac.uk/psipred/api/submission/' + uuid + '?format=json' r = requests.get(jsonurl) filesUUID = r.json()['submissions'][0]['UUID'] horiz = 'http://bioinf.cs.ucl.ac.uk/psipred/api/submissions/' + filesUUID + '.horiz' #Length 1500 takes around 5 min requesturl = batchtools.requestWait(horiz, 'PsiPred Not Ready') if requesturl: raw = requesturl.text.splitlines() for i in range(len(raw)): raw[i] = raw[i].strip() if raw[i].startswith("Conf"): SS.conf += raw[i][6:] if raw[i].startswith("Pred"): SS.pred += raw[i][6:] SS.status = 1 print("PsiPred Complete") else: SS.pred += "PsiPred failed to respond in time" SS.conf += "PsiPred failed to respond in time" SS.status = 2 #error status print("PsiPred failed: No response") except: SS.pred += "PsiPred failed: sequence not accepted" SS.conf += "PsiPred failed: sequence not accepted" SS.status = 4 print("PsiPred failed: sequence not accepted") print("PSI::") print(SS.pred) print(SS.conf) return SS
def get(seq): SS = ss.SS("JPred") if (len(seq) < 20 or len(seq) > 800): SS.pred += "Sequence is shorter than 20 or longer than 800" SS.conf += "Sequence is shorter than 20 or longer than 800" SS.status = 2 #error status print("JPred failed: Sequence is shorter than 20 or longer than 800") return SS #return SS so it will be readable as an ssObject session = GuerrillaMailSession() #Creates GuerrillaMail session email_address = session.get_session_state()[ 'email_address'] #retrieves temp email address payload = { 'email': email_address, 'queryName': 'testprot', 'input': 'seq', 'pdb': '1', '.submit': 'continue', 'seq': seq } r = requests.post( 'http://www.compbio.dundee.ac.uk/jpred4/cgi-bin/jpred_form', data=payload) try: #try/catch in case a nucleotide/invalid sequence is entered response = r.headers['Refresh'].split('?') jobid = response[1] joburl = 'http://www.compbio.dundee.ac.uk/jpred4/results/' + jobid + '/' + jobid + '.jnet' page = requests.get(joburl).text #No cancel while page[0] == '<': print("JpredSS Not Ready") time.sleep(20) page = requests.get(joburl).text ''' #Cancel after 15 min stime = time.time() while page[0] == '<' or time.time() > stime + 900: print("JpredSS Not Ready") time.sleep(20) page = requests.get(joburl).text ''' if page[0] != '<': raw = page.splitlines() SS.pred = raw[1].replace('jnetpred:', '') SS.pred = SS.pred.replace('-', 'C') #Replaces dashes with C SS.pred = SS.pred.replace(',', '') SS.conf = raw[2].replace('JNETCONF:', '') SS.conf = SS.conf.replace(',', '') SS.status = 1 print("JPred Complete") else: SS.pred += "JPred failed to respond in time" SS.conf += "JPred failed to respond in time" SS.status = 2 #error status print("JPred failed: No response") except: SS.pred += "JPred failed: sequence not accepted" SS.conf += "JPred failed: sequence not accepted" SS.status = 4 print("JPred failed: sequence not accepted") print("JPRED::") print(SS.pred) print(SS.conf) return SS
def get(seq): SS = ss.SS("SSPro") SS.status = 0 if (len(seq) > 400): SS.pred += "Sequence longer than 400" SS.conf += "Sequence longer than 400" SS.status = 2 #error status print("SSPro failed: Sequence longer than 400") return SS #return SS so it will be readable as an ssObject randName = batchtools.randBase62() session = GuerrillaMailSession() #Creates GuerrillaMail session email_address = session.get_session_state()[ 'email_address'] #retrieves temp email address payload = { 'amino_acids': seq, 'query_name': randName, 'email': email_address, 'ss': 'on' } r = requests.post( 'http://scratch.proteomics.ics.uci.edu/cgi-bin/new_server/sql_predict.cgi', data=payload) soup = BeautifulSoup(r.text, 'html.parser') msg = soup.find('p') if msg == None: SS.pred += "Failed to Submit" SS.conf += "Failed to Submit" SS.status = 2 #error status print("SSPro Failed to Submit") return SS if msg.text.split()[0] == 'ERROR:': SS.pred += "Queue Full" SS.conf += "Queue Full" SS.status = 2 #error status print("SSPro Queue Full") return SS query = 'from:([email protected]) subject:(Protein Structure Predictions for ' + randName + ')' stime = time.time() email_id = False #Waits indefinitely until results are out email_id, message = batchtools.emailRequestWait(session, query, "Name:", randName, "SSPro Not Ready", 60) #Cancels after 15 min. Length 400 sequences take 10-15 min in a batch #email_id, message = batchtools.emailRequestWait(session, query, "Name:", randName, "SSPro Not Ready", 60, 900) if email_id: message_parts = message.splitlines() index = 0 #current line finished_scrape = False while index < len(message_parts) and not finished_scrape: index += 1 if message_parts[ index] == "Predicted Secondary Structure (3 Class):": while message_parts[index]: #while not blank line index += 1 SS.pred += message_parts[index] finished_scrape = True SS.conf = "SSPro Does Not Provide Any Conf" SS.status = 3 print("SSpro Complete") else: SS.pred += "SSPro failed to respond in time" SS.conf += "SSPro failed to respond in time" SS.status = 2 #error status print("SSPro failed: No response") return SS
def get(seq): SS = ss.SS("Sable") if len(seq) <= 12: SS.status = 2 SS.pred += "Sequence is shorter than or equal to 12" SS.conf += "Sequence is shorter than or equal to 12" print("SABLE failed: Sequence is shorter than or equal to 12") SS.status = 0 randName = batchtools.randBase62() session = GuerrillaMailSession() #Creates GuerrillaMail session email_address = session.get_session_state()['email_address'] #retrieves temp email address payload = {'txtSeq': seq, 'seqName': randName, 'email': email_address, 'fileName':'', 'SS':'SS', 'version':'sable2', 'SAaction': 'wApproximator', 'SAvalue':'REAL'} r = requests.post('http://sable.cchmc.org/cgi-bin/sable_server_July2003.cgi', data = payload) #sable uses multiple emails to send results query = 'from:(sable) subject:(sable result) query: ' + randName #Length 4000 takes around 10 min message = '' stime = time.time() email_id = False #Waits indefinitely until results are out email_id, message = batchtools.emailRequestWait(session, query, "Query:", randName, "Sable Not Ready", 30) #Cancel in 15 min #email_id, message = batchtools.emailRequestWait(session, query, "Query:", randName, "Sable Not Ready", 30, 900) if email_id: #message = emailtools.decodeEmail(email_service, email_id) #print(message) message_parts = message.splitlines() #getting the prediction sequence and confidence index = 0 while message_parts[index][:11] != 'END_SECTION': if message_parts[index].startswith('>'): SS.pred += message_parts[index + 2].strip() SS.conf += message_parts[index + 3].strip() index + 4 #add 4 then 1 later to get to next set of prediction index += 1 #getting the probabilities for helix, beta strand, coil index += 1 #go past the prediction's 'END_SECTION' helixProb = '' betaProb = '' coilProb = '' while message_parts[index][:11] != 'END_SECTION': if message_parts[index].startswith('>'): helixProb += message_parts[index + 2][3:].strip() + ' ' betaProb += message_parts[index + 3][3:].strip() + ' ' coilProb += message_parts[index + 4][3:].strip() + ' ' index += 1 SS.hconf = helixProb.split() SS.econf = betaProb.split() SS.cconf = coilProb.split() SS.status = 1 print(SS.pred) print(SS.conf) print("Sable Complete") else: SS.pred += "Sable failed to respond in time" SS.conf += "Sable failed to respond in time" SS.status = 2 #error status print("Sable failed: No response") return SS