def RunQuery_wsdl(rawseq, filtered_seq, seqinfo): #{{{ errmsg = [] tmpdir = tempfile.mkdtemp(prefix="%s/static/tmp/tmp_" % (SITE_ROOT)) rstdir = tempfile.mkdtemp(prefix="%s/static/result/rst_" % (SITE_ROOT)) os.chmod(tmpdir, 0o755) os.chmod(rstdir, 0o755) jobid = os.path.basename(rstdir) seqinfo['jobid'] = jobid numseq = seqinfo['numseq'] # write files for the query jobinfofile = "%s/jobinfo" % (rstdir) rawseqfile = "%s/query.raw.fa" % (rstdir) seqfile_t = "%s/query.fa" % (tmpdir) seqfile_r = "%s/query.fa" % (rstdir) warnfile = "%s/warn.txt" % (tmpdir) jobinfo_str = "%s\t%s\t%s\t%s\t%d\t%s\t%s\t%s\n" % ( seqinfo['date'], jobid, seqinfo['client_ip'], seqinfo['numseq'], len(rawseq), seqinfo['jobname'], seqinfo['email'], seqinfo['method_submission']) errmsg.append(myfunc.WriteFile(jobinfo_str, jobinfofile, "w")) errmsg.append(myfunc.WriteFile(rawseq, rawseqfile, "w")) errmsg.append(myfunc.WriteFile(filtered_seq, seqfile_t, "w")) errmsg.append(myfunc.WriteFile(filtered_seq, seqfile_r, "w")) base_www_url = "http://" + seqinfo['hostname'] seqinfo['base_www_url'] = base_www_url seqinfo['numseq_this_user'] = 1 SubmitQueryToLocalQueue(seqinfo, tmpdir, rstdir, isOnlyGetCache=True) # changed 2015-03-26, any jobs submitted via wsdl is hadndel return jobid
def RunQuery_wsdl_local(rawseq, filtered_seq, seqinfo): #{{{ # submit the wsdl job to the local queue errmsg = [] tmpdir = tempfile.mkdtemp(prefix="%s/static/tmp/tmp_" % (SITE_ROOT)) rstdir = tempfile.mkdtemp(prefix="%s/static/result/rst_" % (SITE_ROOT)) os.chmod(tmpdir, 0o755) os.chmod(rstdir, 0o755) jobid = os.path.basename(rstdir) seqinfo['jobid'] = jobid numseq = seqinfo['numseq'] # write files for the query jobinfofile = "%s/jobinfo" % (rstdir) rawseqfile = "%s/query.raw.fa" % (rstdir) seqfile_t = "%s/query.fa" % (tmpdir) seqfile_r = "%s/query.fa" % (rstdir) warnfile = "%s/warn.txt" % (tmpdir) jobinfo_str = "%s\t%s\t%s\t%s\t%d\t%s\t%s\t%s\n" % ( seqinfo['date'], jobid, seqinfo['client_ip'], seqinfo['numseq'], len(rawseq), seqinfo['jobname'], seqinfo['email'], seqinfo['method_submission']) errmsg.append(myfunc.WriteFile(jobinfo_str, jobinfofile, "w")) errmsg.append(myfunc.WriteFile(rawseq, rawseqfile, "w")) errmsg.append(myfunc.WriteFile(filtered_seq, seqfile_t, "w")) errmsg.append(myfunc.WriteFile(filtered_seq, seqfile_r, "w")) base_www_url = "http://" + seqinfo['hostname'] seqinfo['base_www_url'] = base_www_url rtvalue = SubmitQueryToLocalQueue(seqinfo, tmpdir, rstdir) if rtvalue != 0: return "" else: return jobid
def SubmitJobToQueue( jobid, datapath, outpath, numseq, numseq_this_user, email, #{{{ host_ip, base_www_url): myfunc.WriteFile("Entering SubmitJobToQueue()\n", g_params['debugfile'], "a", True) fafile = "%s/query.fa" % (datapath) if numseq == -1: numseq = myfunc.CountFastaSeq(fafile) if numseq_this_user == -1: numseq_this_user = numseq name_software = "boctopus2" runjob = "%s %s/run_job.py" % (python_exec, rundir) scriptfile = "%s/runjob;%s;%s;%s;%s;%d.sh" % ( outpath, name_software, jobid, host_ip, email, numseq) code_str_list = [] code_str_list.append("#!/bin/bash") code_str_list.append("source %s/bin/activate" % (virt_env_path)) cmdline = "%s %s -outpath %s -tmpdir %s -jobid %s " % ( runjob, fafile, outpath, datapath, jobid) if email != "": cmdline += "-email \"%s\" " % (email) if base_www_url != "": cmdline += "-baseurl \"%s\" " % (base_www_url) if g_params['isForceRun']: cmdline += "-force " if g_params['isOnlyGetCache']: cmdline += "-only-get-cache " code_str_list.append(cmdline) code = "\n".join(code_str_list) msg = "Write scriptfile %s" % (scriptfile) myfunc.WriteFile(msg + "\n", g_params['debugfile'], "a", True) myfunc.WriteFile(code, scriptfile, mode="w", isFlush=True) os.chmod(scriptfile, 0o755) myfunc.WriteFile("Getting priority" + "\n", g_params['debugfile'], "a", True) priority = myfunc.GetSuqPriority(numseq_this_user) if email in vip_user_list: priority = 999999999.0 myfunc.WriteFile("priority=%d\n" % (priority), g_params['debugfile'], "a", True) st1 = webcom.SubmitSlurmJob(datapath, outpath, scriptfile, g_params['debugfile']) return st1
def index(request):#{{{ path_tmp = "%s/static/tmp"%(SITE_ROOT) path_md5 = "%s/static/md5"%(SITE_ROOT) if not os.path.exists(path_result): os.mkdir(path_result, 0o755) if not os.path.exists(path_result): os.mkdir(path_tmp, 0o755) if not os.path.exists(path_md5): os.mkdir(path_md5, 0o755) base_www_url_file = "%s/static/log/base_www_url.txt"%(SITE_ROOT) if not os.path.exists(base_www_url_file): base_www_url = "http://" + request.META['HTTP_HOST'] myfunc.WriteFile(base_www_url, base_www_url_file, "w", True) # read the local config file if exists configfile = "%s/config/config.json"%(SITE_ROOT) config = {} if os.path.exists(configfile): text = myfunc.ReadFile(configfile) config = json.loads(text) if rootname_progname in config: g_params.update(config[rootname_progname]) g_params['MAXSIZE_UPLOAD_FILE_IN_BYTE'] = g_params['MAXSIZE_UPLOAD_FILE_IN_MB'] * 1024*1024 return submit_seq(request)
def RunQuery(request, query):#{{{ errmsg = [] tmpdir = tempfile.mkdtemp(prefix="%s/static/tmp/tmp_"%(SITE_ROOT)) rstdir = tempfile.mkdtemp(prefix="%s/static/result/rst_"%(SITE_ROOT)) os.chmod(tmpdir, 0o755) os.chmod(rstdir, 0o755) jobid = os.path.basename(rstdir) query['jobid'] = jobid # write files for the query jobinfofile = "%s/jobinfo"%(rstdir) rawseqfile = "%s/query.raw.fa"%(rstdir) seqfile_t = "%s/query.fa"%(tmpdir) seqfile_r = "%s/query.fa"%(rstdir) warnfile = "%s/warn.txt"%(tmpdir) logfile = "%s/runjob.log"%(rstdir) variantfile_t = "%s/variants.fa"%(tmpdir) variantfile_r = "%s/variants.fa"%(rstdir) myfunc.WriteFile("tmpdir = %s\n"%(tmpdir), logfile, "a") jobinfo_str = "%s\t%s\t%s\t%s\t%d\t%s\t%s\t%s\n"%(query['date'], jobid, query['client_ip'], query['numseq'], len(query['rawseq']),query['jobname'], query['email'], query['method_submission']) errmsg.append(myfunc.WriteFile(jobinfo_str, jobinfofile, "w")) errmsg.append(myfunc.WriteFile(query['rawseq'], rawseqfile, "w")) errmsg.append(myfunc.WriteFile(query['filtered_seq'], seqfile_t, "w")) errmsg.append(myfunc.WriteFile(query['filtered_seq'], seqfile_r, "w")) errmsg.append(myfunc.WriteFile(query['filtered_variants'], variantfile_t, "w")) errmsg.append(myfunc.WriteFile(query['filtered_variants'], variantfile_r, "w")) base_www_url = "http://" + request.META['HTTP_HOST'] query['base_www_url'] = base_www_url # for single sequence job submitted via web interface, submit to local # queue if query['numseq'] <= 0: #not jobs are submitted to the front-end server, this value can be set to 1 if single sequence jobs submitted via web interface will be run on the front end query['numseq_this_user'] = 1 SubmitQueryToLocalQueue(query, tmpdir, rstdir, isOnlyGetCache=False) forceruntagfile = "%s/forcerun"%(rstdir) if query['isForceRun']: myfunc.WriteFile("", forceruntagfile) return jobid
def RunQuery(request, query):#{{{ errmsg = [] tmpdir = tempfile.mkdtemp(prefix="%s/static/tmp/tmp_"%(SITE_ROOT)) rstdir = tempfile.mkdtemp(prefix="%s/static/result/rst_"%(SITE_ROOT)) os.chmod(tmpdir, 0o755) os.chmod(rstdir, 0o755) jobid = os.path.basename(rstdir) query['jobid'] = jobid # write files for the query jobinfofile = "%s/jobinfo"%(rstdir) rawseqfile = "%s/query.raw.fa"%(rstdir) seqfile_t = "%s/query.fa"%(tmpdir) seqfile_r = "%s/query.fa"%(rstdir) warnfile = "%s/warn.txt"%(tmpdir) runjob_logfile = "%s/runjob.log"%(rstdir) myfunc.WriteFile("tmpdir = %s\n"%(tmpdir), runjob_logfile, "a", True) jobinfo_str = "%s\t%s\t%s\t%s\t%d\t%s\t%s\t%s\t%s\n"%(query['date'], jobid, query['client_ip'], query['numseq'], len(query['rawseq']),query['jobname'], query['email'], query['method_submission'], query['app_type']) errmsg.append(myfunc.WriteFile(jobinfo_str, jobinfofile, "w")) errmsg.append(myfunc.WriteFile(query['rawseq'], rawseqfile, "w")) errmsg.append(myfunc.WriteFile(query['filtered_seq'], seqfile_t, "w")) errmsg.append(myfunc.WriteFile(query['filtered_seq'], seqfile_r, "w")) base_www_url = "http://" + request.META['HTTP_HOST'] query['base_www_url'] = base_www_url # temporarily disable submission of SCAMPI-msa jobs until find a solution # that can be submitted to the computational node, 2017-08-10 if ((query['app_type'] == "SCAMPI-single" and query['numseq'] <= g_params['MAX_ALLOWD_NUMSEQ_single']) ): query['numseq_this_user'] = 1 SubmitQueryToLocalQueue(query, tmpdir, rstdir, isOnlyGetCache=False) elif ((query['app_type'] == "SCAMPI-msa" and query['numseq'] <= g_params['MAX_ALLOWD_NUMSEQ_msa']) ): query['numseq_this_user'] = 1 SubmitQueryToLocalQueue(query, tmpdir, rstdir, isOnlyGetCache=True) forceruntagfile = "%s/forcerun"%(rstdir) if query['isForceRun']: myfunc.WriteFile("", forceruntagfile) return jobid
def SubmitQueryToLocalQueue(query, tmpdir, rstdir, isOnlyGetCache=False):#{{{ scriptfile = "%s/app/submit_job_to_queue.py"%(SITE_ROOT) rstdir = "%s/%s"%(path_result, query['jobid']) errfile = "%s/runjob.err"%(rstdir) debugfile = "%s/debug.log"%(rstdir) #this log only for debugging logfile = "%s/runjob.log"%(rstdir) rmsg = "" cmd = [python_exec, scriptfile, "-nseq", "%d"%query['numseq'], "-nseq-this-user", "%d"%query['numseq_this_user'], "-jobid", query['jobid'], "-outpath", rstdir, "-datapath", tmpdir, "-baseurl", query['base_www_url'] ] if query['email'] != "": cmd += ["-email", query['email']] if query['client_ip'] != "": cmd += ["-host", query['client_ip']] if query['isForceRun']: cmd += ["-force"] if isOnlyGetCache: cmd += ["-only-get-cache"] cmdline = " ".join(cmd) try: rmsg = myfunc.check_output(cmd, stderr=subprocess.STDOUT) myfunc.WriteFile("cmdline: " + cmdline +"\n", debugfile, "a", True) myfunc.WriteFile(rmsg+"\n", debugfile, "a", True) except subprocess.CalledProcessError as e: failtagfile = "%s/%s"%(rstdir, "runjob.failed") if not os.path.exists(failtagfile): date = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") myfunc.WriteFile(date, failtagfile) myfunc.WriteFile(str(e)+"\n", errfile, "a", True) myfunc.WriteFile("cmdline: " + cmdline +"\n", debugfile, "a", True) myfunc.WriteFile(rmsg+"\n", errfile, "a", True) return 1 return 0
def submitjob_remote(ctx, seq="", fixtop="", jobname="", email="",#{{{ numseq_this_user="", isforcerun=""): seq = seq + "\n" #force add a new line for correct parsing the fasta file seqinfo = {} filtered_seq = webcom.ValidateSeq(seq, seqinfo, g_params) # ValidateFixtop(fixtop) #to be implemented if numseq_this_user != "" and numseq_this_user.isdigit(): seqinfo['numseq_this_user'] = int(numseq_this_user) else: seqinfo['numseq_this_user'] = 1 numseq_str = "%d"%(seqinfo['numseq']) warninfo = seqinfo['warninfo'] # print "\n\nreq\n", dir(ctx.transport.req) #debug # print "\n\n", ctx.transport.req.META['REMOTE_ADDR'] #debug # print "\n\n", ctx.transport.req.META['HTTP_HOST'] #debug jobid = "None" url = "None" if filtered_seq == "": errinfo = seqinfo['errinfo'] else: soap_req = ctx.transport.req try: client_ip = soap_req.META['REMOTE_ADDR'] except: client_ip = "" try: hostname = soap_req.META['HTTP_HOST'] except: hostname = "" # print client_ip # print hostname seqinfo['jobname'] = jobname seqinfo['email'] = email seqinfo['fixtop'] = fixtop seqinfo['date'] = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") seqinfo['client_ip'] = client_ip seqinfo['hostname'] = hostname seqinfo['method_submission'] = "wsdl" # for this method, wsdl is called only by the daemon script, isForceRun can be # set by the argument if isforcerun.upper()[:1] == "T": seqinfo['isForceRun'] = True else: seqinfo['isForceRun'] = False jobid = RunQuery_wsdl_local(seq, filtered_seq, seqinfo) if jobid == "": errinfo = "Failed to submit your job to the queue\n"+seqinfo['errinfo'] else: log_record = "%s\t%s\t%s\t%s\t%d\t%s\t%s\t%s\n"%(seqinfo['date'], jobid, seqinfo['client_ip'], seqinfo['numseq'], len(seq),seqinfo['jobname'], seqinfo['email'], seqinfo['method_submission']) main_logfile_query = "%s/%s/%s"%(SITE_ROOT, "static/log", "submitted_seq.log") myfunc.WriteFile(log_record, main_logfile_query, "a") divided_logfile_query = "%s/%s/%s"%(SITE_ROOT, "static/log/divided", "%s_submitted_seq.log"%(seqinfo['client_ip'])) if seqinfo['client_ip'] != "": myfunc.WriteFile(log_record, divided_logfile_query, "a") url = "http://" + hostname + g_params['BASEURL'] + "result/%s"%(jobid) file_seq_warning = "%s/%s/%s/%s"%(SITE_ROOT, "static/result", jobid, "query.warn.txt") if seqinfo['warninfo'] != "": myfunc.WriteFile(seqinfo['warninfo'], file_seq_warning, "a") errinfo = seqinfo['errinfo'] for s in [jobid, url, numseq_str, errinfo, warninfo]: yield s
def main(g_params):#{{{ submitjoblogfile = "%s/submitted_seq.log"%(path_log) runjoblogfile = "%s/runjob_log.log"%(path_log) finishedjoblogfile = "%s/finished_job.log"%(path_log) if not os.path.exists(path_cache): os.mkdir(path_cache) loop = 0 while 1: if os.path.exists("%s/CACHE_CLEANING_IN_PROGRESS"%(path_result)):#pause when cache cleaning is in progress continue # load the config file if exists configfile = "%s/config/config.json"%(basedir) config = {} if os.path.exists(configfile): text = myfunc.ReadFile(configfile) config = json.loads(text) if rootname_progname in config: g_params.update(config[rootname_progname]) if os.path.exists(black_iplist_file): g_params['blackiplist'] = myfunc.ReadIDList(black_iplist_file) os.environ['TZ'] = g_params['TZ'] time.tzset() avail_computenode = webcom.ReadComputeNode(computenodefile) # return value is a dict g_params['vip_user_list'] = myfunc.ReadIDList2(vip_email_file, col=0) num_avail_node = len(avail_computenode) webcom.loginfo("loop %d"%(loop), gen_logfile) isOldRstdirDeleted = False if loop % g_params['STATUS_UPDATE_FREQUENCY'][0] == g_params['STATUS_UPDATE_FREQUENCY'][1]: qdcom.RunStatistics_basic(webserver_root, gen_logfile, gen_errfile) isOldRstdirDeleted = webcom.DeleteOldResult(path_result, path_log, gen_logfile, MAX_KEEP_DAYS=g_params['MAX_KEEP_DAYS']) webcom.CleanServerFile(path_static, gen_logfile, gen_errfile) if 'DEBUG_ARCHIVE' in g_params and g_params['DEBUG_ARCHIVE']: webcom.loginfo("Run ArchiveLogFile, path_log=%s, threshold_logfilesize=%d"%(path_log, threshold_logfilesize), gen_logfile) webcom.ArchiveLogFile(path_log, threshold_logfilesize=threshold_logfilesize) qdcom.CreateRunJoblog(loop, isOldRstdirDeleted, g_params) # Get number of jobs submitted to the remote server based on the # runjoblogfile runjobidlist = myfunc.ReadIDList2(runjoblogfile,0) remotequeueDict = {} for node in avail_computenode: remotequeueDict[node] = [] for jobid in runjobidlist: rstdir = "%s/%s"%(path_result, jobid) remotequeue_idx_file = "%s/remotequeue_seqindex.txt"%(rstdir) if os.path.exists(remotequeue_idx_file): content = myfunc.ReadFile(remotequeue_idx_file) lines = content.split('\n') for line in lines: strs = line.split('\t') if len(strs)>=5: node = strs[1] remotejobid = strs[2] if node in remotequeueDict: remotequeueDict[node].append(remotejobid) cntSubmitJobDict = {} # format of cntSubmitJobDict {'node_ip': [INT, INT, STR]} for node in avail_computenode: queue_method = avail_computenode[node]['queue_method'] num_queue_job = len(remotequeueDict[node]) if num_queue_job >= 0: cntSubmitJobDict[node] = [num_queue_job, g_params['MAX_SUBMIT_JOB_PER_NODE'], queue_method] else: cntSubmitJobDict[node] = [g_params['MAX_SUBMIT_JOB_PER_NODE'], g_params['MAX_SUBMIT_JOB_PER_NODE'], queue_method] # entries in runjoblogfile includes jobs in queue or running hdl = myfunc.ReadLineByBlock(runjoblogfile) if not hdl.failure: lines = hdl.readlines() while lines != None: for line in lines: strs = line.split("\t") if len(strs) >= 11: jobid = strs[0] email = strs[4] try: numseq = int(strs[5]) except: numseq = 1 try: numseq_this_user = int(strs[10]) except: numseq_this_user = 1 rstdir = "%s/%s"%(path_result, jobid) finishtagfile = "%s/%s"%(rstdir, "runjob.finish") status = strs[1] webcom.loginfo("CompNodeStatus: %s"%(str(cntSubmitJobDict)), gen_logfile) runjob_lockfile = "%s/%s/%s.lock"%(path_result, jobid, "runjob.lock") if os.path.exists(runjob_lockfile): msg = "runjob_lockfile %s exists, ignore the job %s" %(runjob_lockfile, jobid) webcom.loginfo(msg, gen_logfile) continue #if IsHaveAvailNode(cntSubmitJobDict): if not g_params['DEBUG_NO_SUBMIT']: qdcom.SubmitJob(jobid, cntSubmitJobDict, numseq_this_user, g_params) qdcom.GetResult(jobid, g_params) # the start tagfile is written when got the first result qdcom.CheckIfJobFinished(jobid, numseq, email, g_params) lines = hdl.readlines() hdl.close() myfunc.WriteFile("sleep for %d seconds\n"%(g_params['SLEEP_INTERVAL']), gen_logfile, "a", True) time.sleep(g_params['SLEEP_INTERVAL']) loop += 1 return 0
def RunJob(infile, outpath, tmpdir, email, jobid, g_params): #{{{ all_begin_time = time.time() rootname = os.path.basename(os.path.splitext(infile)[0]) starttagfile = "%s/runjob.start" % (outpath) runjob_errfile = "%s/runjob.err" % (outpath) runjob_logfile = "%s/runjob.log" % (outpath) app_logfile = "%s/app.log" % (outpath) finishtagfile = "%s/runjob.finish" % (outpath) rmsg = "" resultpathname = jobid outpath_result = "%s/%s" % (outpath, resultpathname) tmp_outpath_result = "%s/%s" % (tmpdir, resultpathname) tarball = "%s.tar.gz" % (resultpathname) zipfile = "%s.zip" % (resultpathname) tarball_fullpath = "%s.tar.gz" % (outpath_result) zipfile_fullpath = "%s.zip" % (outpath_result) resultfile_text = "%s/%s" % (outpath_result, "query.top") mapfile = "%s/seqid_index_map.txt" % (outpath_result) finished_seq_file = "%s/finished_seqs.txt" % (outpath_result) finished_idx_file = "%s/finished_seqindex.txt" % (outpath) for folder in [outpath_result, tmp_outpath_result]: try: os.makedirs(folder) except OSError: msg = "Failed to create folder %s" % (folder) myfunc.WriteFile(msg + "\n", gen_errfile, "a") return 1 try: open(finished_seq_file, 'w').close() except: pass #first getting result from caches # ================================== maplist = [] maplist_simple = [] toRunDict = {} hdl = myfunc.ReadFastaByBlock(infile, method_seqid=0, method_seq=0) if hdl.failure: isOK = False else: webcom.WriteDateTimeTagFile(starttagfile, runjob_logfile, runjob_errfile) recordList = hdl.readseq() cnt = 0 origpath = os.getcwd() while recordList != None: for rd in recordList: isSkip = False # temp outpath for the sequence is always seq_0, and I feed # only one seq a time to the workflow tmp_outpath_this_seq = "%s/%s" % (tmp_outpath_result, "seq_%d" % 0) outpath_this_seq = "%s/%s" % (outpath_result, "seq_%d" % cnt) subfoldername_this_seq = "seq_%d" % (cnt) if os.path.exists(tmp_outpath_this_seq): try: shutil.rmtree(tmp_outpath_this_seq) except OSError: pass maplist.append( "%s\t%d\t%s\t%s" % ("seq_%d" % cnt, len(rd.seq), rd.description, rd.seq)) maplist_simple.append( "%s\t%d\t%s" % ("seq_%d" % cnt, len(rd.seq), rd.description)) if not g_params['isForceRun']: md5_key = hashlib.md5(rd.seq.encode('utf-8')).hexdigest() subfoldername = md5_key[:2] cachedir = "%s/%s/%s" % (path_cache, subfoldername, md5_key) zipfile_cache = cachedir + ".zip" if os.path.exists(cachedir) or os.path.exists( zipfile_cache): if os.path.exists(cachedir): try: shutil.copytree(cachedir, outpath_this_seq) except Exception as e: msg = "Failed to copytree %s -> %s" % ( cachedir, outpath_this_seq) date_str = time.strftime(FORMAT_DATETIME) myfunc.WriteFile( "[%s] %s with errmsg=%s\n" % (date_str, msg, str(e)), runjob_errfile, "a") elif os.path.exists(zipfile_cache): cmd = [ "unzip", zipfile_cache, "-d", outpath_result ] webcom.RunCmd(cmd, runjob_logfile, runjob_errfile) shutil.move("%s/%s" % (outpath_result, md5_key), outpath_this_seq) checkfile = "%s/query.predict.png" % (outpath_this_seq) fafile_this_seq = '%s/seq.fa' % (outpath_this_seq) if os.path.exists(outpath_this_seq) and os.path.exists( checkfile): info_finish = webcom.GetInfoFinish_Boctopus2( outpath_this_seq, cnt, len(rd.seq), rd.description, source_result="cached", runtime=0.0) myfunc.WriteFile("\t".join(info_finish) + "\n", finished_seq_file, "a", isFlush=True) myfunc.WriteFile("%d\n" % (cnt), finished_idx_file, "a", isFlush=True) isSkip = True if not isSkip: # first try to delete the outfolder if exists if os.path.exists(outpath_this_seq): try: shutil.rmtree(outpath_this_seq) except OSError: pass origIndex = cnt numTM = 0 toRunDict[origIndex] = [rd.seq, numTM, rd.description ] #init value for numTM is 0 cnt += 1 recordList = hdl.readseq() hdl.close() myfunc.WriteFile("\n".join(maplist_simple) + "\n", mapfile) if not g_params['isOnlyGetCache']: torun_all_seqfile = "%s/%s" % (tmp_outpath_result, "query.torun.fa") dumplist = [] for key in toRunDict: top = toRunDict[key][0] dumplist.append(">%s\n%s" % (str(key), top)) myfunc.WriteFile("\n".join(dumplist) + "\n", torun_all_seqfile, "w") del dumplist sortedlist = sorted(list(toRunDict.items()), key=lambda x: x[1][1], reverse=True) #format of sortedlist [(origIndex: [seq, numTM, description]), ...] # submit sequences one by one to the workflow according to orders in # sortedlist for item in sortedlist: origIndex = item[0] seq = item[1][0] description = item[1][2] subfoldername_this_seq = "seq_%d" % (origIndex) outpath_this_seq = "%s/%s" % (outpath_result, subfoldername_this_seq) tmp_outpath_this_seq = "%s/%s" % (tmp_outpath_result, "seq_%d" % (0)) if os.path.exists(tmp_outpath_this_seq): try: shutil.rmtree(tmp_outpath_this_seq) except OSError: pass seqfile_this_seq = "%s/%s" % (tmp_outpath_result, "query_%d.fa" % (origIndex)) seqcontent = ">query_%d\n%s\n" % (origIndex, seq) myfunc.WriteFile(seqcontent, seqfile_this_seq, "w") if not os.path.exists(seqfile_this_seq): msg = "Failed to generate seq file for index %d" % (origIndex) date_str = time.strftime(FORMAT_DATETIME) myfunc.WriteFile("[%s] %s\n" % (date_str, msg), runjob_errfile, "a", True) continue cmd = [runscript, seqfile_this_seq, tmp_outpath_result] (t_success, runtime_in_sec) = webcom.RunCmd(cmd, runjob_logfile, runjob_errfile, True) aaseqfile = "%s/seq.fa" % (tmp_outpath_this_seq) if not os.path.exists(aaseqfile): seqcontent = ">%s\n%s\n" % (description, seq) myfunc.WriteFile(seqcontent, aaseqfile, "w") if os.path.exists(tmp_outpath_this_seq): cmd = ["mv", "-f", tmp_outpath_this_seq, outpath_this_seq] (isCmdSuccess, t_runtime) = webcom.RunCmd(cmd, runjob_logfile, runjob_errfile) timefile = "%s/time.txt" % (tmp_outpath_result) targetfile = "%s/time.txt" % (outpath_this_seq) if os.path.exists(timefile) and os.path.exists( outpath_this_seq): try: shutil.move(timefile, targetfile) except: msg = "Failed to move %s/time.txt" % ( tmp_outpath_result) date_str = time.strftime(FORMAT_DATETIME) myfunc.WriteFile("[%s] %s\n" % (date_str, msg), runjob_errfile, "a", True) pass if isCmdSuccess: runtime = runtime_in_sec #in seconds info_finish = webcom.GetInfoFinish_Boctopus2( outpath_this_seq, origIndex, len(seq), description, source_result="newrun", runtime=runtime) myfunc.WriteFile("\t".join(info_finish) + "\n", finished_seq_file, "a", isFlush=True) # now write the text output for this seq info_this_seq = "%s\t%d\t%s\t%s" % ( "seq_%d" % origIndex, len(seq), description, seq) resultfile_text_this_seq = "%s/%s" % (outpath_this_seq, "query.result.txt") webcom.WriteBoctopusTextResultFile( resultfile_text_this_seq, outpath_result, [info_this_seq], runtime_in_sec, g_params['base_www_url']) # create or update the md5 cache # create cache only on the front-end figurefile = "%s/plot/query_0.png" % (outpath_this_seq) # Note: do not create cache is figure file does not exist if webcom.IsFrontEndNode(g_params['base_www_url'] ) and os.path.exists(figurefile): md5_key = hashlib.md5(seq.encode('utf-8')).hexdigest() subfoldername = md5_key[:2] md5_subfolder = "%s/%s" % (path_cache, subfoldername) cachedir = "%s/%s/%s" % (path_cache, subfoldername, md5_key) if os.path.exists(cachedir): try: shutil.rmtree(cachedir) except: msg = "Failed to shutil.rmtree(%s)" % ( cachedir) date_str = time.strftime(FORMAT_DATETIME) myfunc.WriteFile("[%s] %s\n" % (date_str, msg), runjob_errfile, "a", True) pass if not os.path.exists(md5_subfolder): try: os.makedirs(md5_subfolder) except: pass if os.path.exists(md5_subfolder ) and not os.path.exists(cachedir): cmd = ["mv", "-f", outpath_this_seq, cachedir] webcom.RunCmd(cmd, runjob_logfile, runjob_errfile) if not os.path.exists( outpath_this_seq) and os.path.exists(cachedir): rela_path = os.path.relpath( cachedir, outpath_result) #relative path try: os.chdir(outpath_result) os.symlink(rela_path, subfoldername_this_seq) except: pass all_end_time = time.time() all_runtime_in_sec = all_end_time - all_begin_time if not g_params['isOnlyGetCache'] or len(toRunDict) == 0: # now write the text output to a single file statfile = "%s/%s" % (outpath_result, "stat.txt") webcom.WriteBoctopusTextResultFile(resultfile_text, outpath_result, maplist, all_runtime_in_sec, g_params['base_www_url'], statfile=statfile) # now making zip instead (for windows users) # note that zip rq will zip the real data for symbolic links os.chdir(outpath) cmd = ["zip", "-rq", zipfile, resultpathname] webcom.RunCmd(cmd, runjob_logfile, runjob_errfile) # write finish tag file if os.path.exists(finished_seq_file): webcom.WriteDateTimeTagFile(finishtagfile, runjob_logfile, runjob_errfile) isSuccess = False if (os.path.exists(finishtagfile) and os.path.exists(zipfile_fullpath)): isSuccess = True else: isSuccess = False failedtagfile = "%s/runjob.failed" % (outpath) webcom.WriteDateTimeTagFile(failedtagfile, runjob_logfile, runjob_errfile) # send the result to email # do not sendmail at the cloud VM if webcom.IsFrontEndNode(g_params['base_www_url'] ) and myfunc.IsValidEmailAddress(email): if isSuccess: finish_status = "success" else: finish_status = "failed" webcom.SendEmail_on_finish( jobid, g_params['base_www_url'], finish_status, name_server="BOCTOPUS2", from_email="[email protected])", to_email=email, contact_email=contact_email, logfile=runjob_logfile, errfile=runjob_errfile) if os.path.exists(runjob_errfile) and os.path.getsize(runjob_errfile) > 1: return 1 else: date_str = time.strftime(FORMAT_DATETIME) try: shutil.rmtree(tmpdir) msg = "rmtree(%s)" % (tmpdir) myfunc.WriteFile("[%s] %s\n" % (date_str, msg), runjob_logfile, "a", True) except Exception as e: msg = "Failed to rmtree(%s)" % (tmpdir) myfunc.WriteFile("[%s] %s\n" % (date_str, msg), runjob_errfile, "a", True) pass return 0
def RunJob_msa(infile, outpath, tmpdir, email, jobid, g_params): #{{{ all_begin_time = time.time() rootname = os.path.basename(os.path.splitext(infile)[0]) runjob_errfile = "%s/runjob.err" % (outpath) runjob_logfile = "%s/runjob.log" % (outpath) starttagfile = "%s/runjob.start" % (outpath) finishtagfile = "%s/runjob.finish" % (outpath) failtagfile = "%s/runjob.failed" % (outpath) rmsg = "" qdinit_start_tagfile = "%s/runjob.qdinit.start" % (outpath) # if the daemon starts to process the job before the run_job.py running # in the local queue, skip it if os.path.exists(qdinit_start_tagfile): return 0 resultpathname = jobid outpath_result = "%s/%s" % (outpath, resultpathname) tmp_outpath_result = "%s/%s" % (tmpdir, resultpathname) tarball = "%s.tar.gz" % (resultpathname) zipfile = "%s.zip" % (resultpathname) tarball_fullpath = "%s.tar.gz" % (outpath_result) zipfile_fullpath = "%s.zip" % (outpath_result) resultfile_text = "%s/%s" % (outpath_result, "query.result.txt") mapfile = "%s/seqid_index_map.txt" % (outpath_result) finished_seq_file = "%s/finished_seqs.txt" % (outpath_result) finished_idx_file = "%s/finished_seqindex.txt" % (outpath) for folder in [outpath_result, tmp_outpath_result]: try: os.makedirs(folder) except OSError: msg = "Failed to create folder %s" % (folder) myfunc.WriteFile(msg + "\n", gen_errfile, "a") return 1 try: open(finished_seq_file, 'w').close() except: pass #first getting result from caches # ================================== maplist = [] toRunDict = {} hdl = myfunc.ReadFastaByBlock(infile, method_seqid=0, method_seq=0) if hdl.failure: isOK = False else: webcom.WriteDateTimeTagFile(starttagfile, runjob_logfile, runjob_errfile) cnt = 0 origpath = os.getcwd() con = sqlite3.connect(db_cache_SCAMPI2MSA) with con: cur = con.cursor() cur.execute(""" CREATE TABLE IF NOT EXISTS %s ( md5 VARCHAR(100), seq VARCHAR(30000), top VARCHAR(30000), PRIMARY KEY (md5) )""" % (dbmsa_tablename)) recordList = hdl.readseq() while recordList != None: for rd in recordList: isSkip = False if not g_params['isForceRun']: md5_key = hashlib.md5( rd.seq.encode('utf-8')).hexdigest() cmd = "SELECT md5, seq, top FROM %s WHERE md5 = \"%s\"" % ( dbmsa_tablename, md5_key) cur.execute(cmd) rows = cur.fetchall() for row in rows: top = row[2] numTM = myfunc.CountTM(top) # info_finish has 8 items info_finish = [ "seq_%d" % cnt, str(len(rd.seq)), str(numTM), "cached", str(0.0), rd.description, rd.seq, top ] myfunc.WriteFile("\t".join(info_finish) + "\n", finished_seq_file, "a", isFlush=True) myfunc.WriteFile("%d\n" % (cnt), finished_idx_file, "a", isFlush=True) isSkip = True if not isSkip: # first try to delete the outfolder if exists origIndex = cnt numTM = 0 toRunDict[origIndex] = [rd.seq, numTM, rd.description ] #init value for numTM is 0 cnt += 1 recordList = hdl.readseq() hdl.close() if not g_params['isOnlyGetCache']: torun_all_seqfile = "%s/%s" % (tmp_outpath_result, "query.torun.fa") dumplist = [] for key in toRunDict: top = toRunDict[key][0] dumplist.append(">%s\n%s" % (str(key), top)) myfunc.WriteFile("\n".join(dumplist) + "\n", torun_all_seqfile, "w") del dumplist sortedlist = sorted(list(toRunDict.items()), key=lambda x: x[1][1], reverse=True) #format of sortedlist [(origIndex: [seq, numTM, description]), ...] # submit sequences one by one to the workflow according to orders in # sortedlist for item in sortedlist: origIndex = item[0] seq = item[1][0] description = item[1][2] outpath_this_seq = "%s/%s" % (outpath_result, "seq_%d" % origIndex) tmp_outpath_this_seq = "%s/%s" % (tmp_outpath_result, "seq_%d" % (0)) if os.path.exists(tmp_outpath_this_seq): try: shutil.rmtree(tmp_outpath_this_seq) except OSError: pass try: os.makedirs(tmp_outpath_this_seq) except OSError: g_params['runjob_err'].append( "Failed to create the tmp_outpath_this_seq %s" % (tmp_outpath_this_seq)) continue seqfile_this_seq = "%s/%s" % (tmp_outpath_result, "query_%d.fa" % (origIndex)) seqcontent = ">%d\n%s\n" % (origIndex, seq) myfunc.WriteFile(seqcontent, seqfile_this_seq, "w") if not os.path.exists(seqfile_this_seq): g_params['runjob_err'].append( "failed to generate seq index %d" % (origIndex)) continue if not os.path.exists("%s/seq.fa" % (tmp_outpath_this_seq)): try: shutil.copyfile(seqfile_this_seq, "%s/seq.fa" % (tmp_outpath_this_seq)) except OSError: pass numCPU = 4 outtopfile = "%s/query.top" % (tmp_outpath_this_seq) cmd = [ runscript_msa, seqfile_this_seq, outtopfile, blastdir, blastdb ] (t_success, runtime_in_sec) = webcom.RunCmd(cmd, runjob_logfile, runjob_errfile, verbose=True) if os.path.exists(tmp_outpath_this_seq): cmd = ["mv", "-f", tmp_outpath_this_seq, outpath_this_seq] (isCmdSuccess, t_runtime) = webcom.RunCmd(cmd, runjob_logfile, runjob_errfile, verbose=True) if isCmdSuccess: runtime = runtime_in_sec #in seconds predfile = "%s/query.top" % (outpath_this_seq) (seqid, seqanno, top) = myfunc.ReadSingleFasta(predfile) numTM = myfunc.CountTM(top) # info_finish has 8 items info_finish = [ "seq_%d" % origIndex, str(len(seq)), str(numTM), "newrun", str(runtime), description, seq, top ] myfunc.WriteFile("\t".join(info_finish) + "\n", finished_seq_file, "a", isFlush=True) all_end_time = time.time() all_runtime_in_sec = all_end_time - all_begin_time if len(g_params['runjob_log']) > 0: rt_msg = myfunc.WriteFile("\n".join(g_params['runjob_log']) + "\n", runjob_logfile, "a") if rt_msg: g_params['runjob_err'].append(rt_msg) if not g_params['isOnlyGetCache'] or len(toRunDict) == 0: if os.path.exists(finished_seq_file): webcom.WriteDateTimeTagFile(finishtagfile, runjob_logfile, runjob_errfile) # now write the text output to a single file dumped_resultfile = "%s/%s" % (outpath_result, "query.top") statfile = "%s/%s" % (outpath_result, "stat.txt") webcom.WriteSCAMPI2MSATextResultFile(dumped_resultfile, outpath_result, maplist, all_runtime_in_sec, g_params['base_www_url'], statfile=statfile) # now making zip instead (for windows users) pwd = os.getcwd() os.chdir(outpath) cmd = ["zip", "-rq", zipfile, resultpathname] webcom.RunCmd(cmd, runjob_logfile, runjob_errfile) os.chdir(pwd) isSuccess = False if (os.path.exists(finishtagfile) and os.path.exists(zipfile_fullpath)): isSuccess = True # delete the tmpdir if succeeded shutil.rmtree(tmpdir) #DEBUG, keep tmpdir else: isSuccess = False webcom.WriteDateTimeTagFile(failtagfile, runjob_logfile, runjob_errfile) finish_status = "" #["success", "failed", "partly_failed"] if isSuccess: finish_status = "success" else: finish_status = "failed" # send the result to email # do not sendmail at the cloud VM if webcom.IsFrontEndNode(g_params['base_www_url'] ) and myfunc.IsValidEmailAddress(email): webcom.SendEmail_on_finish(jobid, g_params['base_www_url'], finish_status, name_server="SCAMPI2-msa", from_email="*****@*****.**", to_email=email, contact_email=contact_email, logfile=runjob_logfile, errfile=runjob_errfile) return 0
def submit_seq(request):#{{{ info = {} webcom.set_basic_config(request, info, g_params) # if this is a POST request we need to process the form data if request.method == 'POST': # create a form instance and populate it with data from the request: form = SubmissionForm(request.POST) # check whether it's valid: if form.is_valid(): # process the data in form.cleaned_data as required # redirect to a new URL: jobname = request.POST['jobname'] email = request.POST['email'] rawseq = request.POST['rawseq'] + "\n" # force add a new line variants = request.POST['variants'] #also POST variants from form Nfix = "" Cfix = "" fix_str = "" isForceRun = False try: Nfix = request.POST['Nfix'] except: pass try: Cfix = request.POST['Cfix'] except: pass try: fix_str = request.POST['fix_str'] except: pass if 'forcerun' in request.POST: isForceRun = True try: seqfile = request.FILES['seqfile'] except KeyError as MultiValueDictKeyError: seqfile = "" date_str = time.strftime(g_params['FORMAT_DATETIME']) query = {} query['rawseq'] = rawseq query['variants'] = variants query['seqfile'] = seqfile query['email'] = email query['jobname'] = jobname query['date'] = date_str query['client_ip'] = info['client_ip'] query['errinfo'] = "" query['method_submission'] = "web" query['Nfix'] = Nfix query['Cfix'] = Cfix query['fix_str'] = fix_str query['isForceRun'] = isForceRun query['username'] = info['username'] query['STATIC_URL'] = settings.STATIC_URL is_valid = webcom.ValidateQuery(request, query, g_params) if is_valid: jobid = RunQuery(request, query) # type of method_submission can be web or wsdl #date, jobid, IP, numseq, size, jobname, email, method_submission log_record = "%s\t%s\t%s\t%s\t%d\t%s\t%s\t%s\n"%(query['date'], jobid, query['client_ip'], query['numseq'], len(query['rawseq']),query['jobname'], query['email'], query['method_submission']) main_logfile_query = "%s/%s/%s"%(SITE_ROOT, "static/log", "submitted_seq.log") myfunc.WriteFile(log_record, main_logfile_query, "a") divided_logfile_query = "%s/%s/%s"%(SITE_ROOT, "static/log/divided", "%s_submitted_seq.log"%(info['client_ip'])) divided_logfile_finished_jobid = "%s/%s/%s"%(SITE_ROOT, "static/log/divided", "%s_finished_job.log"%(info['client_ip'])) if query['client_ip'] != "": myfunc.WriteFile(log_record, divided_logfile_query, "a") file_seq_warning = "%s/%s/%s/%s"%(SITE_ROOT, "static/result", jobid, "query.warn.txt") query['file_seq_warning'] = os.path.basename(file_seq_warning) if query['warninfo'] != "": myfunc.WriteFile(query['warninfo'], file_seq_warning, "a") query['jobid'] = jobid query['raw_query_seqfile'] = "query.raw.fa" query['BASEURL'] = g_params['BASEURL'] # start the qd_fe if not, in the background # cmd = [qd_fe_scriptfile] base_www_url = "http://" + request.META['HTTP_HOST'] # run the daemon only at the frontend if webcom.IsFrontEndNode(base_www_url): cmd = "nohup %s %s &"%(python_exec, qd_fe_scriptfile) os.system(cmd) if query['numseq'] < 0: #go to result page anyway info['jobcounter'] = webcom.GetJobCounter(info) return render(request, 'pred/thanks.html', info) else: return get_results(request, jobid) else: info['jobcounter'] = webcom.GetJobCounter(info) return render(request, 'pred/badquery.html', info) # if a GET (or any other method) we'll create a blank form else: form = SubmissionForm() jobcounter = webcom.GetJobCounter(info) info['form'] = form info['jobcounter'] = jobcounter return render(request, 'pred/submit_seq.html', info)
def get_results(request, jobid="1"): #{{{ resultdict = {} webcom.set_basic_config(request, resultdict, g_params) #img1 = "%s/%s/%s/%s"%(SITE_ROOT, "result", jobid, "PconsC2.s400.jpg") #url_img1 = serve(request, os.path.basename(img1), os.path.dirname(img1)) rstdir = "%s/%s" % (path_result, jobid) outpathname = jobid resultfile = "%s/%s/%s/%s" % (rstdir, jobid, outpathname, "query.result.txt") tarball = "%s/%s.tar.gz" % (rstdir, outpathname) zipfile = "%s/%s.zip" % (rstdir, outpathname) starttagfile = "%s/%s" % (rstdir, "runjob.start") finishtagfile = "%s/%s" % (rstdir, "runjob.finish") failtagfile = "%s/%s" % (rstdir, "runjob.failed") runjob_errfile = "%s/%s" % (rstdir, "runjob.err") query_seqfile = "%s/%s" % (rstdir, "query.fa") raw_query_seqfile = "%s/%s" % (rstdir, "query.raw.fa") seqid_index_mapfile = "%s/%s/%s" % (rstdir, jobid, "seqid_index_map.txt") finished_seq_file = "%s/%s/finished_seqs.txt" % (rstdir, jobid) statfile = "%s/%s/stat.txt" % (rstdir, jobid) method_submission = "web" finished_seq_file = "%s/%s/finished_seqs.txt" % (rstdir, jobid) part_predfile = "%s/%s/query.part.top" % (rstdir, jobid) jobinfofile = "%s/jobinfo" % (rstdir) jobinfo = myfunc.ReadFile(jobinfofile).strip() jobinfolist = jobinfo.split("\t") app_type = "SCAMPI-single" if len(jobinfolist) >= 8: submit_date_str = jobinfolist[0] numseq = int(jobinfolist[3]) jobname = jobinfolist[5] email = jobinfolist[6] method_submission = jobinfolist[7] try: app_type = jobinfolist[8] except: pass else: submit_date_str = "" numseq = 1 jobname = "" email = "" method_submission = "web" isValidSubmitDate = True try: submit_date = webcom.datetime_str_to_time(submit_date_str) except ValueError: isValidSubmitDate = False current_time = datetime.now(timezone(TZ)) resultdict['isResultFolderExist'] = True resultdict['errinfo'] = myfunc.ReadFile(runjob_errfile) status = "" queuetime = "" runtime = "" if not os.path.exists(rstdir): resultdict['isResultFolderExist'] = False resultdict['isFinished'] = False resultdict['isFailed'] = True resultdict['isStarted'] = False elif os.path.exists(failtagfile): resultdict['isFinished'] = False resultdict['isFailed'] = True resultdict['isStarted'] = True status = "Failed" start_date_str = myfunc.ReadFile(starttagfile).strip() isValidStartDate = True isValidFailedDate = True try: start_date = webcom.datetime_str_to_time(start_date_str) except ValueError: isValidStartDate = False failed_date_str = myfunc.ReadFile(failtagfile).strip() try: failed_date = webcom.datetime_str_to_time(failed_date_str) except ValueError: isValidFailedDate = False if isValidSubmitDate and isValidStartDate: queuetime = myfunc.date_diff(submit_date, start_date) if isValidStartDate and isValidFailedDate: runtime = myfunc.date_diff(start_date, failed_date) else: resultdict['isFailed'] = False if os.path.exists(finishtagfile): resultdict['isFinished'] = True resultdict['isStarted'] = True status = "Finished" isValidStartDate = True isValidFinishDate = True start_date_str = myfunc.ReadFile(starttagfile).strip() try: start_date = webcom.datetime_str_to_time(start_date_str) except ValueError: isValidStartDate = False finish_date_str = myfunc.ReadFile(finishtagfile).strip() try: finish_date = webcom.datetime_str_to_time(finish_date_str) except ValueError: isValidFinishDate = False if isValidSubmitDate and isValidStartDate: queuetime = myfunc.date_diff(submit_date, start_date) if isValidStartDate and isValidFinishDate: runtime = myfunc.date_diff(start_date, finish_date) else: resultdict['isFinished'] = False if os.path.exists(starttagfile): isValidStartDate = True start_date_str = myfunc.ReadFile(starttagfile).strip() try: start_date = webcom.datetime_str_to_time(start_date_str) except ValueError: isValidStartDate = False resultdict['isStarted'] = True status = "Running" if isValidSubmitDate and isValidStartDate: queuetime = myfunc.date_diff(submit_date, start_date) if isValidStartDate: runtime = myfunc.date_diff(start_date, current_time) else: resultdict['isStarted'] = False status = "Wait" if isValidSubmitDate: queuetime = myfunc.date_diff(submit_date, current_time) color_status = webcom.SetColorStatus(status) file_seq_warning = "%s/%s/%s/%s" % (SITE_ROOT, "static/result", jobid, "query.warn.txt") seqwarninfo = "" if os.path.exists(file_seq_warning): seqwarninfo = myfunc.ReadFile(file_seq_warning).strip() resultdict['file_seq_warning'] = os.path.basename(file_seq_warning) resultdict['seqwarninfo'] = seqwarninfo resultdict['app_type'] = app_type resultdict['jobid'] = jobid resultdict['jobname'] = jobname resultdict['outpathname'] = os.path.basename(outpathname) resultdict['resultfile'] = os.path.basename(resultfile) resultdict['tarball'] = os.path.basename(tarball) resultdict['zipfile'] = os.path.basename(zipfile) resultdict['submit_date'] = submit_date_str resultdict['queuetime'] = queuetime resultdict['runtime'] = runtime resultdict['status'] = status resultdict['color_status'] = color_status resultdict['numseq'] = numseq resultdict['query_seqfile'] = os.path.basename(query_seqfile) resultdict['raw_query_seqfile'] = os.path.basename(raw_query_seqfile) base_www_url = "http://" + request.META['HTTP_HOST'] # note that here one must add http:// in front of the url resultdict['url_result'] = "%s/pred/result/%s" % (base_www_url, jobid) num_finished = 0 if os.path.exists(finished_seq_file): lines = myfunc.ReadFile(finished_seq_file).split("\n") lines = [_f for _f in lines if _f] num_finished = len(lines) sum_run_time = 0.0 average_run_time_single = 0.1 # default average_run_time average_run_time_msa = 300 # default average_run_time num_finished = 0 cntnewrun = 0 cntcached = 0 topcontentList = [] # get seqid_index_map if os.path.exists(finished_seq_file): resultdict['index_table_header'] = [ "No.", "Length", "numTM", "RunTime(s)", "SequenceName", "Source" ] index_table_content_list = [] indexmap_content = myfunc.ReadFile(finished_seq_file).split("\n") cnt = 0 added_idx_set = set([]) for line in indexmap_content: strs = line.split("\t") if len(strs) >= 8: subfolder = strs[0] if not subfolder in added_idx_set: length_str = strs[1] numTM_str = strs[2] source = strs[3] try: runtime_in_sec_str = "%.1f" % (float(strs[4])) if source == "newrun": sum_run_time += float(strs[4]) cntnewrun += 1 elif source == "cached": cntcached += 1 except: runtime_in_sec_str = "" desp = strs[5] top = strs[7] rank = "%d" % (cnt + 1) index_table_content_list.append([ rank, length_str, numTM_str, runtime_in_sec_str, desp[:30], source ]) cnt += 1 added_idx_set.add(subfolder) topcontentList.append(">%s\n%s" % (desp, top)) if cntnewrun > 0: average_run_time_msa = sum_run_time / cntnewrun resultdict['index_table_content_list'] = index_table_content_list resultdict['indexfiletype'] = "finishedfile" resultdict['num_finished'] = cnt num_finished = cnt resultdict['percent_finished'] = "%.1f" % (float(cnt) / numseq * 100) else: resultdict['index_table_header'] = [] resultdict['index_table_content_list'] = [] resultdict['indexfiletype'] = "finishedfile" resultdict['num_finished'] = 0 resultdict['percent_finished'] = "%.1f" % (0.0) num_remain = numseq - num_finished myfunc.WriteFile("\n".join(topcontentList), part_predfile, "w") time_remain_in_sec = numseq * 120 # set default value if os.path.exists(starttagfile): start_date_str = myfunc.ReadFile(starttagfile).strip() isValidStartDate = False try: start_date_epoch = webcom.datetime_str_to_epoch(start_date_str) isValidStartDate = True except: pass if isValidStartDate: time_now = time.time() runtime_total_in_sec = float(time_now) - float(start_date_epoch) cnt_torun = numseq - cntcached # if cntnewrun <= 0: time_remain_in_sec = cnt_torun * 120 else: time_remain_in_sec = int(runtime_total_in_sec / float(cntnewrun) * cnt_torun + 0.5) time_remain = myfunc.second_to_human(time_remain_in_sec) resultdict['time_remain'] = time_remain qdinittagfile = "%s/runjob.qdinit" % (rstdir) if numseq <= 1: if method_submission == "web": if app_type == "SCAMPI-single": resultdict['refresh_interval'] = 1 else: resultdict['refresh_interval'] = 5.0 else: if app_type == "SCAMPI-single": resultdict['refresh_interval'] = 1.0 else: resultdict['refresh_interval'] = 5.0 else: #resultdict['refresh_interval'] = numseq * 2 addtime = int(math.sqrt(max(0, min(num_remain, num_finished)))) + 1 if app_type == "SCAMPI-single": resultdict['refresh_interval'] = max( 1, num_remain * average_run_time_single) else: if not os.path.exists(qdinittagfile): resultdict['refresh_interval'] = 2 else: if num_finished == 0: resultdict['refresh_interval'] = 5 else: resultdict['refresh_interval'] = 10 + addtime # get stat info if os.path.exists(statfile): #{{{ content = myfunc.ReadFile(statfile) lines = content.split("\n") for line in lines: strs = line.split() if len(strs) >= 2: resultdict[strs[0]] = strs[1] percent = "%.1f" % (int(strs[1]) / float(numseq) * 100) newkey = strs[0].replace('num_', 'per_') resultdict[newkey] = percent #}}} topfile = "%s/%s/query.top" % (rstdir, jobid) TM_listfile = "%s/%s/query.TM_list.txt" % (rstdir, jobid) nonTM_listfile = "%s/%s/query.nonTM_list.txt" % (rstdir, jobid) str_TMlist = [] str_nonTMlist = [] lenseq_list = [] num_TMPro = 0 if os.path.exists(topfile): (tmpidlist, tmpannolist, tmptoplist) = myfunc.ReadFasta(topfile) cnt_TMPro = 0 for ii in range(len(tmpidlist)): top = tmptoplist[ii] lenseq_list.append(len(top)) if top.find('M') != -1: cnt_TMPro += 1 str_TMlist.append(tmpannolist[ii]) else: str_nonTMlist.append(tmpannolist[ii]) num_TMPro = cnt_TMPro if not os.path.exists(TM_listfile) or os.path.getsize(TM_listfile) < 1: myfunc.WriteFile("\n".join(str_TMlist), TM_listfile, "w") if not os.path.exists( nonTM_listfile) or os.path.getsize(nonTM_listfile) < 1: myfunc.WriteFile("\n".join(str_nonTMlist), nonTM_listfile, "w") avg_lenseq = myfunc.FloatDivision(sum(lenseq_list), len(lenseq_list)) resultdict['avg_lenseq'] = int(avg_lenseq + 0.5) resultdict['app_type'] = app_type resultdict['num_TMPro'] = num_TMPro resultdict['per_TMPro'] = "%.1f" % ( myfunc.FloatDivision(num_TMPro, numseq) * 100) resultdict['num_nonTMPro'] = numseq - num_TMPro resultdict['per_nonTMPro'] = "%.1f" % ( 100.0 - myfunc.FloatDivision(num_TMPro, numseq) * 100) resultdict['num_finished'] = num_finished resultdict['percent_finished'] = "%.1f" % (float(num_finished) / numseq * 100) resultdict['jobcounter'] = webcom.GetJobCounter(resultdict) return render(request, 'pred/get_results.html', resultdict)
def RunJob(infile, outpath, tmpdir, email, jobid, g_params): #{{{ all_begin_time = time.time() rootname = os.path.basename(os.path.splitext(infile)[0]) starttagfile = "%s/runjob.start" % (outpath) runjob_errfile = "%s/runjob.err" % (outpath) runjob_logfile = "%s/runjob.log" % (outpath) app_logfile = "%s/app.log" % (outpath) finishtagfile = "%s/runjob.finish" % (outpath) failedtagfile = "%s/runjob.failed" % (outpath) query_parafile = "%s/query.para.txt" % (outpath) query_para = "" content = myfunc.ReadFile(query_parafile) if content != "": query_para = json.loads(content) rmsg = "" resultpathname = jobid outpath_result = "%s/%s" % (outpath, resultpathname) tmp_outpath_result = "%s/%s" % (tmpdir, resultpathname) tarball = "%s.tar.gz" % (resultpathname) zipfile = "%s.zip" % (resultpathname) tarball_fullpath = "%s.tar.gz" % (outpath_result) zipfile_fullpath = "%s.zip" % (outpath_result) resultfile_text = "%s/%s" % (outpath_result, "query.result.txt") mapfile = "%s/seqid_index_map.txt" % (outpath_result) finished_seq_file = "%s/finished_seqs.txt" % (outpath_result) for folder in [outpath_result, tmp_outpath_result]: try: os.makedirs(folder) except OSError: msg = "Failed to create folder %s" % (folder) myfunc.WriteFile(msg + "\n", gen_errfile, "a") return 1 try: open(finished_seq_file, 'w').close() except: pass #first getting result from caches # ================================== maplist = [] maplist_simple = [] toRunDict = {} hdl = myfunc.ReadFastaByBlock(infile, method_seqid=0, method_seq=0) if hdl.failure: isOK = False else: webcom.WriteDateTimeTagFile(starttagfile, runjob_logfile, runjob_errfile) recordList = hdl.readseq() cnt = 0 origpath = os.getcwd() while recordList != None: for rd in recordList: isSkip = False # temp outpath for the sequence is always seq_0, and I feed # only one seq a time to the workflow tmp_outpath_this_seq = "%s/%s" % (tmp_outpath_result, "seq_%d" % 0) outpath_this_seq = "%s/%s" % (outpath_result, "seq_%d" % cnt) subfoldername_this_seq = "seq_%d" % (cnt) if os.path.exists(tmp_outpath_this_seq): try: shutil.rmtree(tmp_outpath_this_seq) except OSError: pass maplist.append( "%s\t%d\t%s\t%s" % ("seq_%d" % cnt, len(rd.seq), rd.description, rd.seq)) maplist_simple.append( "%s\t%d\t%s" % ("seq_%d" % cnt, len(rd.seq), rd.description)) if not g_params['isForceRun']: md5_key = hashlib.md5( (rd.seq + str(query_para)).encode('utf-8')).hexdigest() subfoldername = md5_key[:2] cachedir = "%s/%s/%s" % (path_cache, subfoldername, md5_key) zipfile_cache = cachedir + ".zip" if os.path.exists(cachedir) or os.path.exists( zipfile_cache): if os.path.exists(cachedir): try: shutil.copytree(cachedir, outpath_this_seq) except Exception as e: msg = "Failed to copytree %s -> %s" % ( cachedir, outpath_this_seq) date_str = time.strftime(FORMAT_DATETIME) myfunc.WriteFile( "[%s] %s with errmsg=%s\n" % (date_str, msg, str(e)), runjob_errfile, "a") elif os.path.exists(zipfile_cache): cmd = [ "unzip", zipfile_cache, "-d", outpath_result ] webcom.RunCmd(cmd, runjob_logfile, runjob_errfile) shutil.move("%s/%s" % (outpath_result, md5_key), outpath_this_seq) if os.path.exists(outpath_this_seq): info_finish = webcom.GetInfoFinish_PRODRES( outpath_this_seq, cnt, len(rd.seq), rd.description, source_result="cached", runtime=0.0) myfunc.WriteFile("\t".join(info_finish) + "\n", finished_seq_file, "a", isFlush=True) isSkip = True if not isSkip: # first try to delete the outfolder if exists if os.path.exists(outpath_this_seq): try: shutil.rmtree(outpath_this_seq) except OSError: pass origIndex = cnt numTM = 0 toRunDict[origIndex] = [rd.seq, numTM, rd.description ] #init value for numTM is 0 cnt += 1 recordList = hdl.readseq() hdl.close() myfunc.WriteFile("\n".join(maplist_simple) + "\n", mapfile) if not g_params['isOnlyGetCache']: torun_all_seqfile = "%s/%s" % (tmp_outpath_result, "query.torun.fa") dumplist = [] for key in toRunDict: top = toRunDict[key][0] dumplist.append(">%s\n%s" % (str(key), top)) myfunc.WriteFile("\n".join(dumplist) + "\n", torun_all_seqfile, "w") del dumplist sortedlist = sorted(list(toRunDict.items()), key=lambda x: x[1][1], reverse=True) #format of sortedlist [(origIndex: [seq, numTM, description]), ...] # submit sequences one by one to the workflow according to orders in # sortedlist for item in sortedlist: origIndex = item[0] seq = item[1][0] description = item[1][2] subfoldername_this_seq = "seq_%d" % (origIndex) outpath_this_seq = "%s/%s" % (outpath_result, subfoldername_this_seq) tmp_outpath_this_seq = "%s/%s" % (tmp_outpath_result, "seq_%d" % (0)) if os.path.exists(tmp_outpath_this_seq): try: shutil.rmtree(tmp_outpath_this_seq) except OSError: pass seqfile_this_seq = "%s/%s" % (tmp_outpath_result, "query_%d.fa" % (origIndex)) seqcontent = ">query_%d\n%s\n" % (origIndex, seq) myfunc.WriteFile(seqcontent, seqfile_this_seq, "w") if not os.path.exists(seqfile_this_seq): msg = "failed to generate seq index %d" % (origIndex) date_str = time.strftime(g_params['FORMAT_DATETIME']) myfunc.WriteFile("[%s] %s\n" % (date_str, msg), runjob_errfile, "a", True) continue cmd = [ "python", runscript, "--input", seqfile_this_seq, "--output", tmp_outpath_this_seq, "--pfam-dir", path_pfamdatabase, "--pfamscan-script", path_pfamscanscript, "--fallback-db-fasta", blastdb ] if 'second_method' in query_para and query_para[ 'second_method'] != "": cmd += ['--second-search', query_para['second_method']] if 'pfamscan_evalue' in query_para and query_para[ 'pfamscan_evalue'] != "": cmd += ['--pfamscan_e-val', query_para['pfamscan_evalue']] elif 'pfamscan_bitscore' in query_para and query_para[ 'pfamscan_bitscore'] != "": cmd += ['--pfamscan_bitscore', query_para['pfamscan_bitscore']] if 'pfamscan_clanoverlap' in query_para: if query_para['pfamscan_clanoverlap'] == False: cmd += ['--pfamscan_clan-overlap', 'no'] else: cmd += ['--pfamscan_clan-overlap', 'yes'] if 'jackhmmer_iteration' in query_para and query_para[ 'jackhmmer_iteration'] != "": cmd += [ '--jackhmmer_max_iter', query_para['jackhmmer_iteration'] ] if 'jackhmmer_threshold_type' in query_para and query_para[ 'jackhmmer_threshold_type'] != "": cmd += [ '--jackhmmer-threshold-type', query_para['jackhmmer_threshold_type'] ] if 'jackhmmer_evalue' in query_para and query_para[ 'jackhmmer_evalue'] != "": cmd += ['--jackhmmer_e-val', query_para['jackhmmer_evalue']] elif 'jackhmmer_bitscore' in query_para and query_para[ 'jackhmmer_bitscore'] != "": cmd += [ '--jackhmmer_bit-score', query_para['jackhmmer_bitscore'] ] if 'psiblast_iteration' in query_para and query_para[ 'psiblast_iteration'] != "": cmd += ['--psiblast_iter', query_para['psiblast_iteration']] if 'psiblast_outfmt' in query_para and query_para[ 'psiblast_outfmt'] != "": cmd += ['--psiblast_outfmt', query_para['psiblast_outfmt']] (t_success, runtime_in_sec) = webcom.RunCmd(cmd, runjob_logfile, runjob_errfile, True) aaseqfile = "%s/seq.fa" % (tmp_outpath_this_seq + os.sep + "query_0") if not os.path.exists(aaseqfile): seqcontent = ">%s\n%s\n" % (description, seq) myfunc.WriteFile(seqcontent, aaseqfile, "w") if os.path.exists(tmp_outpath_this_seq): cmd = [ "mv", "-f", tmp_outpath_this_seq + os.sep + "query_0", outpath_this_seq ] isCmdSuccess = False (isCmdSuccess, t_runtime) = webcom.RunCmd(cmd, runjob_logfile, runjob_errfile, True) if not 'isKeepTempFile' in query_para or query_para[ 'isKeepTempFile'] == False: try: temp_result_folder = "%s/temp" % (outpath_this_seq) shutil.rmtree(temp_result_folder) except: msg = "Failed to delete the folder %s" % ( temp_result_folder) date_str = time.strftime(g_params['FORMAT_DATETIME']) myfunc.WriteFile("[%s] %s\n" % (date_str, msg), runjob_errfile, "a", True) flist = [ "%s/outputs/%s" % (outpath_this_seq, "Alignment.txt"), "%s/outputs/%s" % (outpath_this_seq, "tableOut.txt"), "%s/outputs/%s" % (outpath_this_seq, "fullOut.txt") ] for f in flist: if os.path.exists(f): try: os.remove(f) except: msg = "Failed to delete the file %s" % (f) date_str = time.strftime( g_params['FORMAT_DATETIME']) myfunc.WriteFile("[%s] %s\n" % (date_str, msg), runjob_errfile, "a", True) if isCmdSuccess: timefile = "%s/time.txt" % (outpath_this_seq) runtime = webcom.ReadRuntimeFromFile(timefile, default_runtime=0.0) info_finish = webcom.GetInfoFinish_PRODRES( outpath_this_seq, origIndex, len(seq), description, source_result="newrun", runtime=runtime) myfunc.WriteFile("\t".join(info_finish) + "\n", finished_seq_file, "a", isFlush=True) # now write the text output for this seq info_this_seq = "%s\t%d\t%s\t%s" % ( "seq_%d" % origIndex, len(seq), description, seq) resultfile_text_this_seq = "%s/%s" % (outpath_this_seq, "query.result.txt") #webcom.WriteSubconsTextResultFile(resultfile_text_this_seq, # outpath_result, [info_this_seq], runtime_in_sec, g_params['base_www_url']) # create or update the md5 cache # create cache only on the front-end if webcom.IsFrontEndNode(g_params['base_www_url']): md5_key = hashlib.md5( (seq + str(query_para)).encode('utf-8')).hexdigest() subfoldername = md5_key[:2] md5_subfolder = "%s/%s" % (path_cache, subfoldername) cachedir = "%s/%s/%s" % (path_cache, subfoldername, md5_key) # copy the zipped folder to the cache path origpath = os.getcwd() os.chdir(outpath_result) shutil.copytree("seq_%d" % (origIndex), md5_key) cmd = ["zip", "-rq", "%s.zip" % (md5_key), md5_key] webcom.RunCmd(cmd, runjob_logfile, runjob_logfile) if not os.path.exists(md5_subfolder): os.makedirs(md5_subfolder) shutil.move("%s.zip" % (md5_key), "%s.zip" % (cachedir)) shutil.rmtree( md5_key ) # delete the temp folder named as md5 hash os.chdir(origpath) # Add the finished date to the database date_str = time.strftime(FORMAT_DATETIME) webcom.InsertFinishDateToDB(date_str, md5_key, seq, finished_date_db) all_end_time = time.time() all_runtime_in_sec = all_end_time - all_begin_time if not g_params['isOnlyGetCache'] or len(toRunDict) == 0: # now write the text output to a single file statfile = "%s/%s" % (outpath_result, "stat.txt") #webcom.WriteSubconsTextResultFile(resultfile_text, outpath_result, maplist, # all_runtime_in_sec, g_params['base_www_url'], statfile=statfile) # now making zip instead (for windows users) # note that zip rq will zip the real data for symbolic links os.chdir(outpath) # cmd = ["tar", "-czf", tarball, resultpathname] cmd = ["zip", "-rq", zipfile, resultpathname] webcom.RunCmd(cmd, runjob_logfile, runjob_errfile) # write finish tag file if os.path.exists(finished_seq_file): webcom.WriteDateTimeTagFile(finishtagfile, runjob_logfile, runjob_errfile) isSuccess = False if (os.path.exists(finishtagfile) and os.path.exists(zipfile_fullpath)): isSuccess = True else: isSuccess = False webcom.WriteDateTimeTagFile(failedtagfile, runjob_logfile, runjob_errfile) # send the result to email # do not sendmail at the cloud VM if webcom.IsFrontEndNode(g_params['base_www_url'] ) and myfunc.IsValidEmailAddress(email): if isSuccess: finish_status = "success" else: finish_status = "failed" webcom.SendEmail_on_finish( jobid, g_params['base_www_url'], finish_status, name_server="PRODRES", from_email="*****@*****.**", to_email=email, contact_email=contact_email, logfile=runjob_logfile, errfile=runjob_errfile) if os.path.exists(runjob_errfile) and os.path.getsize(runjob_errfile) > 1: return 1 else: try: shutil.rmtree(tmpdir) msg = "rmtree(%s)" % (tmpdir) webcom.loginfo("rmtree(%s)" % (tmpdir), runjob_logfile) except Exception as e: msg = "Failed to rmtree(%s)" % (tmpdir) webcom.loginfo( "Failed to rmtree(%s) with error message: %s" % (tmpdir, str(e)), runjob_errfile) return 0
def main(g_params): #{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 rmsg = "" outpath = "" jobid = "" datapath = "" numseq = -1 numseq_this_user = -1 email = "" host_ip = "" base_www_url = "" i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: webcom.loginfo("Error! Wrong argument: %s" % (argv[i]), gen_errfile) return 1 isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-outpath", "--outpath"]: (outpath, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-email", "--email"]: (email, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-host", "--host"]: (host_ip, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-nseq", "--nseq"]: (numseq, i) = myfunc.my_getopt_int(argv, i) elif argv[i] in ["-nseq-this-user", "--nseq-this-user"]: (numseq_this_user, i) = myfunc.my_getopt_int(argv, i) elif argv[i] in ["-baseurl", "--baseurl"]: (base_www_url, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-jobid", "--jobid"]: (jobid, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-datapath", "--datapath"]: (datapath, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-force", "--force"]: g_params['isForceRun'] = True i += 1 elif argv[i] in ["-only-get-cache", "--only-get-cache"]: g_params['isOnlyGetCache'] = True i += 1 elif argv[i] in ["-q", "--q"]: g_params['isQuiet'] = True i += 1 else: webcom.loginfo("Error! Wrong argument: %s" % (argv[i]), gen_errfile) return 1 else: webcom.loginfo("Error! Wrong argument: %s" % (argv[i]), gen_errfile) return 1 if outpath == "": webcom.loginfo("outpath not set. exit", gen_errfile) return 1 elif not os.path.exists(outpath): cmd = ["mkdir", "-p", outpath] try: rmsg = subprocess.check_output(cmd, stderr=subprocess.STDOUT) except subprocess.CalledProcessError as e: print(e) print(rmsg) return 1 if jobid == "": webcom.loginfo("%s: jobid not set. exit" % (sys.argv[0]), gen_errfile) return 1 if datapath == "": webcom.loginfo("%s: datapath not set. exit" % (sys.argv[0]), gen_errfile) return 1 elif not os.path.exists(datapath): webcom.loginfo("%s: datapath does not exist. exit" % (sys.argv[0]), gen_errfile) return 1 elif not os.path.exists("%s/query.fa" % (datapath)): webcom.loginfo( "%s: file %s/query.fa does not exist. exit" % (sys.argv[0], datapath), gen_errfile) return 1 g_params['debugfile'] = "%s/debug.log" % (outpath) myfunc.WriteFile("Go to SubmitJobToQueue()\n", g_params['debugfile'], "a", True) return SubmitJobToQueue(jobid, datapath, outpath, numseq, numseq_this_user, email, host_ip, base_www_url)
def SubmitJobToQueue( jobid, datapath, outpath, nummodel, nummodel_this_user, email, #{{{ host_ip, base_www_url): myfunc.WriteFile("Entering SubmitJobToQueue()\n", g_params['debugfile'], "a") modelfile = "%s/query.pdb" % (datapath) seqfile = "%s/query.fa" % (datapath) if nummodel == -1: nummodel = myfunc.ReadFile(modelfile).count("\nENDMDL") if nummodel == 0: nummodel = 1 if nummodel_this_user == -1: nummodel_this_user = nummodel query_parafile = "%s/query.para.txt" % (outpath) query_para = {} content = myfunc.ReadFile(query_parafile) para_str = content if content != "": query_para = json.loads(content) try: name_software = query_para['name_software'] except KeyError: name_software = "proq3" runjob = "%s %s/run_job.py" % (python_exec, rundir) scriptfile = "%s/runjob,%s,%s,%s,%s,%d.sh" % ( outpath, name_software, jobid, host_ip, email, nummodel) code_str_list = [] code_str_list.append("#!/bin/bash") code_str_list.append("source %s/bin/activate" % (virt_env_path)) cmdline = "%s %s -outpath %s -tmpdir %s -jobid %s " % ( runjob, modelfile, outpath, datapath, jobid) if email != "": cmdline += "-email \"%s\" " % (email) if os.path.exists(seqfile): cmdline += "-fasta \"%s\" " % (seqfile) if base_www_url != "": cmdline += "-baseurl \"%s\" " % (base_www_url) if g_params['isForceRun']: cmdline += "-force " code_str_list.append(cmdline) code = "\n".join(code_str_list) msg = "Write scriptfile %s" % (scriptfile) myfunc.WriteFile(msg + "\n", g_params['debugfile'], "a") myfunc.WriteFile(code, scriptfile) os.chmod(scriptfile, 0o755) myfunc.WriteFile("Getting priority" + "\n", g_params['debugfile'], "a") priority = myfunc.GetSuqPriority(nummodel_this_user) if email in vip_user_list: priority = 999999999.0 myfunc.WriteFile("priority=%d\n" % (priority), g_params['debugfile'], "a") st1 = webcom.SubmitSlurmJob(datapath, outpath, scriptfile, g_params['debugfile']) return st1
def findjob(request):#{{{ info = {} errmsg = "" webcom.set_basic_config(request, info, g_params) all_logfile_query = "%s/%s/%s"%(SITE_ROOT, "static/log", "submitted_seq.log") info['header'] = ["No.", "JobID","JobName", "NumSeq", "Email", "Submit date"] matched_list = [] num_matched = 0 is_form_submitted = False info['jobid'] = "" info['jobname'] = "" if g_params['DEBUG']: myfunc.WriteFile("request.method=%s\n"%(str(request.method)), gen_logfile, "a", True) if request.method == 'GET': form = SubmissionForm_findjob(request.GET) if request.GET.get('do'): is_form_submitted = True if g_params['DEBUG']: myfunc.WriteFile("Enter POST\n", gen_logfile, "a", True) if form.is_valid(): if g_params['DEBUG']: myfunc.WriteFile("form.is_valid == True\n", gen_logfile, "a", True) st_jobid = request.GET.get('jobid') st_jobname = request.GET.get('jobname') matched_jobidlist = [] if not (st_jobid or st_jobname): errmsg = "Error! Neither Job ID nor Job Name is set." else: alljob_dict = myfunc.ReadSubmittedLogFile(all_logfile_query) all_jobidList = list(alljob_dict.keys()) all_jobnameList = [alljob_dict[x][1] for x in all_jobidList] if st_jobid: if st_jobid.startswith("rst_") and len(st_jobid) >= 5: for jobid in all_jobidList: if jobid.find(st_jobid) != -1: matched_jobidlist.append(jobid) else: errmsg = "Error! Searching text for Job ID must be started with 'rst_'\ and contains at least one char after 'rst_'" else: matched_jobidlist = all_jobidList if st_jobname: newli = [] for jobid in matched_jobidlist: jobname = alljob_dict[jobid][1] if jobname.find(st_jobname) != -1: newli.append(jobid) matched_jobidlist = newli num_matched = len(matched_jobidlist) for i in range(num_matched): jobid = matched_jobidlist[i] li = alljob_dict[jobid] submit_date_str = li[0] jobname = li[1] email = li[3] numseq_str = li[4] rstdir = "%s/%s"%(path_result, jobid) if os.path.exists(rstdir): matched_list.append([i+1, jobid, jobname, numseq_str, email, submit_date_str]) else: #errmsg = "Error! Neither Job ID nor Job Name is set." form = SubmissionForm_findjob() num_matched = len(matched_list) info['errmsg'] = errmsg info['form'] = form try: info['jobid'] = st_jobid except: pass try: info['jobname'] = st_jobname except: pass info['num_matched'] = num_matched info['content'] = matched_list info['BASEURL'] = g_params['BASEURL'] info['is_form_submitted'] = is_form_submitted info['jobcounter'] = webcom.GetJobCounter(info) return render(request, 'pred/findjob.html', info)
def CleanCachedResult(MAX_KEEP_DAYS, path_static, name_cachedir): # {{{ """Clean out-dated cached result""" path_log = "%s/log" % (path_static) path_stat = "%s/stat" % (path_log) path_result = "%s/result" % (path_static) path_cache = "%s/result/%s" % (path_static, name_cachedir) gen_logfile = "%s/%s.log" % (path_log, progname) gen_errfile = "%s/%s.err" % (path_log, progname) db = "%s/cached_job_finished_date.sqlite3" % (path_log) tmpdb = tempfile.mktemp(prefix="%s_" % (db)) msg = "copy db (%s) to tmpdb (%s)" % (db, tmpdb) date_str = time.strftime(FORMAT_DATETIME) myfunc.WriteFile("[%s] %s\n" % (date_str, msg), gen_logfile, "a", True) try: shutil.copyfile(db, tmpdb) except Exception as e: myfunc.WriteFile("[%s] %s\n" % (date_str, str(e)), gen_errfile, "a", True) return 1 md5listfile = "%s/cache_to_delete.md5list" % (path_log) con = sqlite3.connect(tmpdb) msg = "output the outdated md5 list to %s" % (md5listfile) date_str = time.strftime(FORMAT_DATETIME) myfunc.WriteFile("[%s] %s\n" % (date_str, msg), gen_logfile, "a", True) tablename = "data" with con: cur = con.cursor() fpout = open(md5listfile, "w") nn_mag = cur.execute("SELECT md5, date_finish FROM %s" % (tablename)) cnt = 0 chunk_size = 1000 while True: result = nn_mag.fetchmany(chunk_size) if not result: break else: for row in result: cnt += 1 md5_key = row[0] finish_date_str = row[1] finish_date = webcom.datetime_str_to_time(finish_date_str) current_time = datetime.now(timezone(TZ)) timeDiff = current_time - finish_date if timeDiff.days > MAX_KEEP_DAYS: fpout.write("%s\n" % (md5_key)) fpout.close() # delete cached result folder and delete the record msg = "Delete cached result folder and delete the record" date_str = time.strftime(FORMAT_DATETIME) myfunc.WriteFile("[%s] %s\n" % (date_str, msg), gen_logfile, "a", True) hdl = myfunc.ReadLineByBlock(md5listfile) lines = hdl.readlines() cnt = 0 while lines != None: for line in lines: line = line.strip() if line != "": cnt += 1 md5_key = line subfoldername = md5_key[:2] cachedir = "%s/%s/%s" % (path_cache, subfoldername, md5_key) zipfile_cache = cachedir + ".zip" date_str = time.strftime(FORMAT_DATETIME) if os.path.exists(zipfile_cache): try: os.remove(zipfile_cache) msg = "rm %s" % (zipfile_cache) myfunc.WriteFile("[%s] %s\n" % (date_str, msg), gen_logfile, "a", True) cmd_d = "DELETE FROM %s WHERE md5 = '%s'" % ( tablename, md5_key) cur.execute(cmd_d) except Exception as e: myfunc.WriteFile("[%s] %s\n" % (date_str, str(e)), gen_errfile, "a", True) pass lines = hdl.readlines() hdl.close() msg = "VACUUM the database %s" % (tmpdb) date_str = time.strftime(FORMAT_DATETIME) myfunc.WriteFile("[%s] %s\n" % (date_str, msg), gen_logfile, "a", True) cur.execute("VACUUM") # copy back msg = "cp tmpdb (%s) -> db (%s)" % (tmpdb, db) date_str = time.strftime(FORMAT_DATETIME) myfunc.WriteFile("[%s] %s\n" % (date_str, msg), gen_logfile, "a", True) try: shutil.copyfile(tmpdb, db) except Exception as e: myfunc.WriteFile("[%s] %s\n" % (date_str, str(e)), gen_errfile, "a", True) return 1 msg = "delete tmpdb (%s)" % (tmpdb) date_str = time.strftime(FORMAT_DATETIME) myfunc.WriteFile("[%s] %s\n" % (date_str, msg), gen_logfile, "a", True) try: os.remove(tmpdb) except Exception as e: myfunc.WriteFile("[%s] %s\n" % (date_str, str(e)), gen_errfile, "a", True) return 1
def SubmitJobToQueue( jobid, datapath, outpath, numseq, numseq_this_user, email, #{{{ host_ip, base_www_url): myfunc.WriteFile("Entering SubmitJobToQueue()\n", g_params['debugfile'], "a", True) fafile = "%s/query.fa" % (datapath) if numseq == -1: numseq = myfunc.CountFastaSeq(fafile) if numseq_this_user == -1: numseq_this_user = numseq query_parafile = "%s/query.para.txt" % (outpath) query_para = {} content = myfunc.ReadFile(query_parafile) para_str = content if content != "": query_para = json.loads(content) try: name_software = query_para['name_software'] except KeyError: name_software = "prodres" runjob = "%s %s/run_job.py" % (python_exec, rundir) scriptfile = "%s/runjob,%s,%s,%s,%s,%d.sh" % ( outpath, name_software, jobid, host_ip, email, numseq) code_str_list = [] code_str_list.append("#!/bin/bash") code_str_list.append("source %s/bin/activate" % (virt_env_path)) cmdline = "%s %s -outpath %s -tmpdir %s -jobid %s " % ( runjob, fafile, outpath, datapath, jobid) if email != "": cmdline += "-email \"%s\" " % (email) if base_www_url != "": cmdline += "-baseurl \"%s\" " % (base_www_url) if g_params['isForceRun']: cmdline += "-force " if g_params['isOnlyGetCache']: cmdline += "-only-get-cache " code_str_list.append(cmdline) code = "\n".join(code_str_list) msg = "Writting scriptfile %s" % (scriptfile) webcom.loginfo(msg, g_params['debugfile']) myfunc.WriteFile(code, scriptfile, mode="w", isFlush=True) os.chmod(scriptfile, 0o755) webcom.loginfo("Getting priority", g_params['debugfile']) priority = myfunc.GetSuqPriority(numseq_this_user) if email in vip_user_list: priority = 999999999.0 webcom.loginfo("priority=%d" % (priority), g_params['debugfile']) st1 = webcom.SubmitSlurmJob(datapath, outpath, scriptfile, g_params['debugfile']) return st1
return RunJob(infile, outpath, tmpdir, email, jobid, g_params) #}}} def InitGlobalParameter(): #{{{ g_params = {} g_params['isQuiet'] = True g_params['isForceRun'] = False g_params['isOnlyGetCache'] = False g_params['base_www_url'] = "" g_params['jobid'] = "" g_params['lockfile'] = "" return g_params #}}} if __name__ == '__main__': g_params = InitGlobalParameter() status = main(g_params) if os.path.exists(g_params['lockfile']): try: os.remove(g_params['lockfile']) except: myfunc.WriteFile( "Failed to delete lockfile %s\n" % (g_params['lockfile']), gen_errfile, "a", True) sys.exit(status)
def submitjob(ctx, seq="", fixtop="", jobname="", email=""):#{{{ seq = seq + "\n" #force add a new line for correct parsing the fasta file seqinfo = {} filtered_seq = webcom.ValidateSeq(seq, seqinfo, g_params) # ValidateFixtop(fixtop) #to be implemented jobid = "None" url = "None" numseq_str = "%d"%(seqinfo['numseq']) warninfo = seqinfo['warninfo'] errinfo = "" # print "\n\nreq\n", dir(ctx.transport.req) #debug # print "\n\n", ctx.transport.req.META['REMOTE_ADDR'] #debug # print "\n\n", ctx.transport.req.META['HTTP_HOST'] #debug if filtered_seq == "": errinfo = seqinfo['errinfo'] else: soap_req = ctx.transport.req try: client_ip = soap_req.META['REMOTE_ADDR'] except: client_ip = "" try: hostname = soap_req.META['HTTP_HOST'] except: hostname = "" # print client_ip # print hostname seqinfo['jobname'] = jobname seqinfo['email'] = email seqinfo['fixtop'] = fixtop seqinfo['date'] = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") seqinfo['client_ip'] = client_ip seqinfo['hostname'] = hostname seqinfo['method_submission'] = "wsdl" seqinfo['isForceRun'] = False # disable isForceRun if submitted by WSDL jobid = RunQuery_wsdl(seq, filtered_seq, seqinfo) if jobid == "": errinfo = "Failed to submit your job to the queue\n"+seqinfo['errinfo'] else: log_record = "%s\t%s\t%s\t%s\t%d\t%s\t%s\t%s\n"%(seqinfo['date'], jobid, seqinfo['client_ip'], seqinfo['numseq'], len(seq),seqinfo['jobname'], seqinfo['email'], seqinfo['method_submission']) main_logfile_query = "%s/%s/%s"%(SITE_ROOT, "static/log", "submitted_seq.log") myfunc.WriteFile(log_record, main_logfile_query, "a") divided_logfile_query = "%s/%s/%s"%(SITE_ROOT, "static/log/divided", "%s_submitted_seq.log"%(seqinfo['client_ip'])) if seqinfo['client_ip'] != "": myfunc.WriteFile(log_record, divided_logfile_query, "a") url = "http://" + hostname + g_params['BASEURL'] + "result/%s"%(jobid) file_seq_warning = "%s/%s/%s/%s"%(SITE_ROOT, "static/result", jobid, "query.warn.txt") if seqinfo['warninfo'] != "": myfunc.WriteFile(seqinfo['warninfo'], file_seq_warning, "a") errinfo = seqinfo['errinfo'] for s in [jobid, url, numseq_str, errinfo, warninfo]: yield s
def RunJob_single(infile, outpath, tmpdir, email, jobid, g_params): #{{{ all_begin_time = time.time() rootname = os.path.basename(os.path.splitext(infile)[0]) starttagfile = "%s/runjob.start" % (outpath) finishtagfile = "%s/runjob.finish" % (outpath) failtagfile = "%s/runjob.failed" % (outpath) runjob_errfile = "%s/runjob.err" % (outpath) runjob_logfile = "%s/runjob.log" % (outpath) rmsg = "" resultpathname = jobid outpath_result = "%s/%s" % (outpath, resultpathname) tmp_outpath_result = "%s/%s" % (tmpdir, resultpathname) tarball = "%s.tar.gz" % (resultpathname) zipfile = "%s.zip" % (resultpathname) tarball_fullpath = "%s.tar.gz" % (outpath_result) zipfile_fullpath = "%s.zip" % (outpath_result) resultfile_text = "%s/%s" % (outpath_result, "query.result.txt") outfile = "%s/%s" % (outpath_result, "query.top") isOK = True try: os.makedirs(tmp_outpath_result) isOK = True except OSError: msg = "Failed to create folder %s" % (tmp_outpath_result) myfunc.WriteFile(msg + "\n", runjob_errfile, "a") isOK = False pass tmp_outfile = "%s/%s" % (tmp_outpath_result, "query.top") try: os.makedirs(outpath_result) isOK = True except OSError: msg = "Failed to create folder %s" % (outpath_result) myfunc.WriteFile(msg + "\n", runjob_errfile, "a") isOK = False pass if isOK: webcom.WriteDateTimeTagFile(starttagfile, runjob_logfile, runjob_errfile) cmd = [runscript_single, infile, tmp_outfile] (t_success, runtime_in_sec) = webcom.RunCmd(cmd, runjob_logfile, runjob_errfile, verbose=True) if os.path.exists(tmp_outfile): cmd = ["mv", "-f", tmp_outfile, outfile] (isCmdSuccess, t_runtime) = webcom.RunCmd(cmd, runjob_logfile, runjob_errfile) if isCmdSuccess: runtime = runtime_in_sec #in seconds if len(g_params['runjob_log']) > 0: rt_msg = myfunc.WriteFile("\n".join(g_params['runjob_log']) + "\n", runjob_logfile, "a") if rt_msg: g_params['runjob_err'].append(rt_msg) if os.path.exists(outfile): webcom.WriteDateTimeTagFile(finishtagfile, runjob_logfile, runjob_errfile) isSuccess = False if (os.path.exists(finishtagfile) and os.path.exists(outfile)): isSuccess = True finish_status = "success" else: isSuccess = False finish_status = "failed" webcom.WriteDateTimeTagFile(failtagfile, runjob_logfile, runjob_errfile) # send the result to email # do not sendmail at the cloud VM if webcom.IsFrontEndNode( g_params['base_www_url']) and myfunc.IsValidEmailAddress(email): webcom.SendEmail_on_finish(jobid, g_params['base_www_url'], finish_status, name_server="SCAMPI2-single", from_email="*****@*****.**", to_email=email, contact_email=contact_email, logfile=runjob_logfile, errfile=runjob_errfile) return 0