Example #1
0
def SubmitSuqJob(suq_basedir, datapath, outpath, priority, scriptfile):  #{{{
    myfunc.WriteFile("Entering SubmitSuqJob()\n", g_params['debugfile'], "a")
    rmsg = ""
    cmd = [
        suq_exec, "-b", suq_basedir, "run", "-d", outpath, "-p",
        "%d" % (priority), scriptfile
    ]
    cmdline = " ".join(cmd)
    myfunc.WriteFile("cmdline: %s\n\n" % (cmdline), g_params['debugfile'], "a")
    MAX_TRY = 5
    cnttry = 0
    isSubmitSuccess = False
    while cnttry < MAX_TRY:
        try:
            myfunc.WriteFile(
                "run cmd: cnttry = %d, MAX_TRY=%d\n" % (cnttry, MAX_TRY),
                g_params['debugfile'], "a")
            rmsg = subprocess.check_output(cmd, stderr=subprocess.STDOUT)
            isSubmitSuccess = True
            break
        except subprocess.CalledProcessError, e:
            print e
            print rmsg
            myfunc.WriteFile(
                str(e) + "\n" + rmsg + "\n", g_params['debugfile'], "a")
            pass
        cnttry += 1
        time.sleep(0.05 + cnttry * 0.03)
Example #2
0
def SubmitJobToQueue(
        jobid,
        datapath,
        outpath,
        numseq,
        numseq_this_user,
        email,  #{{{
        host_ip,
        base_www_url):
    myfunc.WriteFile("Entering SubmitJobToQueue()\n", g_params['debugfile'],
                     "a")
    fafile = "%s/query.fa" % (datapath)

    if numseq == -1:
        numseq = myfunc.CountFastaSeq(fafile)
    if numseq_this_user == -1:
        numseq_this_user = numseq

    name_software = "boctopus2"
    runjob = "%s %s/run_job.py" % (python_exec, rundir)
    scriptfile = "%s/runjob;%s;%s;%s;%s;%d.sh" % (
        outpath, name_software, jobid, host_ip, email, numseq)
    code_str_list = []
    code_str_list.append("#!/bin/bash")
    cmdline = "%s %s -outpath %s -tmpdir %s -jobid %s " % (
        runjob, fafile, outpath, datapath, jobid)
    if email != "":
        cmdline += "-email \"%s\" " % (email)
    if base_www_url != "":
        cmdline += "-baseurl \"%s\" " % (base_www_url)
    if g_params['isForceRun']:
        cmdline += "-force "
    code_str_list.append(cmdline)

    code = "\n".join(code_str_list)

    msg = "Write scriptfile %s" % (scriptfile)
    myfunc.WriteFile(msg + "\n", g_params['debugfile'], "a")

    myfunc.WriteFile(code, scriptfile)
    os.chmod(scriptfile, 0755)

    myfunc.WriteFile("Getting priority" + "\n", g_params['debugfile'], "a")
    priority = myfunc.GetSuqPriority(numseq_this_user)
    priority = 10  # quick fix debug  2017-09-18

    if email in vip_user_list:
        priority = 999999999.0

    myfunc.WriteFile("priority=%d\n" % (priority), g_params['debugfile'], "a")

    st1 = SubmitSuqJob(suq_basedir, datapath, outpath, priority, scriptfile)

    return st1
Example #3
0
def WriteDateTimeTagFile(outfile, runjob_logfile, runjob_errfile):  # {{{
    if not os.path.exists(outfile):
        date_str = time.strftime(FORMAT_DATETIME)
        try:
            myfunc.WriteFile(date_str, outfile)
            msg = "Write tag file %s succeeded" % (outfile)
            myfunc.WriteFile("[%s] %s\n" % (date_str, msg), runjob_logfile,
                             "a", True)
        except Exception as e:
            msg = "Failed to write to file %s with message: \"%s\"" % (outfile,
                                                                       str(e))
            myfunc.WriteFile("[%s] %s\n" % (date_str, msg), runjob_errfile,
                             "a", True)
def RunJob(infile, outpath, tmpdir, email, jobid, g_params):  #{{{
    blastdb = "/data3/data/blastdb/swissprot"
    rootname = os.path.basename(os.path.splitext(infile)[0])
    starttagfile = "%s/runjob.start" % (outpath)
    runjob_errfile = "%s/runjob.err" % (outpath)
    runjob_logfile = "%s/runjob.log" % (outpath)
    finishtagfile = "%s/runjob.finish" % (outpath)
    tmp_outfile = "%s/query.result" % (tmpdir)
    resultpathname = jobid
    outpath_result = "%s/%s" % (outpath, resultpathname)
    outfile = "%s/query.result" % (outpath_result)
    tarball = "%s.tar.gz" % (resultpathname)
    tarball_fullpath = "%s.tar.gz" % (outpath_result)
    isOK = True
    try:
        os.makedirs(outpath_result)
        isOK = True
    except OSError:
        msg = "Failed to create folder %s" % (outpath_result)
        myfunc.WriteFile(msg + "\n", runjob_errfile, "a")
        isOK = False

    if isOK:
        g_params['runjob_log'].append("tmpdir = %s" % (tmpdir))
        #cmd = [script_getseqlen, infile, "-o", tmp_outfile , "-printid"]
        datetime = time.strftime("%Y-%m-%d %H:%M:%S")
        rt_msg = myfunc.WriteFile(datetime, starttagfile)
        if rt_msg:
            g_params['runjob_err'].append(rt_msg)

        cmd = [
            blastall, "-i", infile, "-p", "blastp", "-o", tmp_outfile, "-d",
            blastdb
        ]

        g_params['runjob_log'].append(" ".join(cmd))
        try:
            myfunc.check_output(cmd)
        except subprocess.CalledProcessError, e:
            g_params['runjob_err'].append(str(e))
            suqoutfilelist = glob.glob("%s/*.sh.*.out" % (tmpdir))
            if len(suqoutfilelist) > 0:
                suqoutfile = suqoutfilelist[0]
            g_params['runjob_err'].append(myfunc.ReadFile(suqoutfile))

        if os.path.exists(tmp_outfile):
            cmd = ["cp", "-f", tmp_outfile, outfile]
            try:
                myfunc.check_output(cmd)
            except subprocess.CalledProcessError, e:
                g_params['runjob_err'].append(str(e))
Example #5
0
def CleanServerFile(logfile, errfile):  #{{{
    """Clean old files on the server"""
    # clean tmp files
    msg = "CleanServerFile..."
    date_str = time.strftime(FORMAT_DATETIME)
    myfunc.WriteFile("[%s] %s\n" % (date_str, msg), logfile, "a", True)
    cmd = ["bash", "%s/clean_server_file.sh" % (rundir)]
    RunCmd(cmd, logfile, errfile)
Example #6
0
def RunCmd(cmd, runjob_logfile, runjob_errfile, verbose=False):  # {{{
    """Input cmd in list
       Run the command and also output message to logs
    """
    begin_time = time.time()

    isCmdSuccess = False
    cmdline = " ".join(cmd)
    date_str = time.strftime(FORMAT_DATETIME)
    rmsg = ""
    try:
        rmsg = subprocess.check_output(cmd)
        if verbose:
            msg = "workflow: %s" % (cmdline)
            myfunc.WriteFile("[%s] %s\n" % (date_str, msg), runjob_logfile,
                             "a", True)
        isCmdSuccess = True
    except subprocess.CalledProcessError, e:
        msg = "cmdline: %s\nFailed with message \"%s\"" % (cmdline, str(e))
        myfunc.WriteFile("[%s] %s\n" % (date_str, msg), runjob_errfile, "a",
                         True)
        isCmdSuccess = False
        pass
Example #7
0
def GetNumSameUserInQueue(suq_ls_content, basename_scriptfile, email,
                          host_ip):  #{{{
    myfunc.WriteFile("Entering GetNumSameUserInQueue()\n",
                     g_params['debugfile'], "a")
    num_same_user_in_queue = 0
    if email == "" and host_ip == "":
        num_same_user_in_queue = 0
    else:
        lines = suq_ls_content.split("\n")
        if email != "" and host_ip != "":
            for line in lines:
                if line.find(email) != -1 or line.find(host_ip) != -1:
                    num_same_user_in_queue += 1
        elif email != "":
            for line in lines:
                if line.find(email) != -1:
                    num_same_user_in_queue += 1
        elif host_ip != "":
            for line in lines:
                if line.find(host_ip) != -1:
                    num_same_user_in_queue += 1

    return num_same_user_in_queue
Example #8
0
            myfunc.WriteFile(
                "run cmd: cnttry = %d, MAX_TRY=%d\n" % (cnttry, MAX_TRY),
                g_params['debugfile'], "a")
            rmsg = subprocess.check_output(cmd, stderr=subprocess.STDOUT)
            isSubmitSuccess = True
            break
        except subprocess.CalledProcessError, e:
            print e
            print rmsg
            myfunc.WriteFile(
                str(e) + "\n" + rmsg + "\n", g_params['debugfile'], "a")
            pass
        cnttry += 1
        time.sleep(0.05 + cnttry * 0.03)
    if isSubmitSuccess:
        myfunc.WriteFile("Leaving SubmitSuqJob() with success\n\n",
                         g_params['debugfile'], "a")
        return 0
    else:
        myfunc.WriteFile("Leaving SubmitSuqJob() with error\n\n",
                         g_params['debugfile'], "a")
        return 1


#}}}
def main(g_params):  #{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1
def RunJob(infile, outpath, tmpdir, email, jobid, g_params):#{{{
    rootname = os.path.basename(os.path.splitext(infile)[0])
    starttagfile   = "%s/runjob.start"%(outpath)
    runjob_errfile = "%s/runjob.err"%(outpath)
    runjob_logfile = "%s/runjob.log"%(outpath)
    finishtagfile = "%s/runjob.finish"%(outpath)
    rmsg = ""


    resultpathname = jobid

    outpath_result = "%s/%s"%(outpath, resultpathname)
    tarball = "%s.tar.gz"%(resultpathname)
    zipfile = "%s.zip"%(resultpathname)
    tarball_fullpath = "%s.tar.gz"%(outpath_result)
    zipfile_fullpath = "%s.zip"%(outpath_result)
    outfile = "%s/%s/Topcons/topcons.top"%(outpath_result, "seq_%d"%(0))
    resultfile_text = "%s/%s"%(outpath_result, "query.result.txt")

    tmp_outpath_result = "%s/%s"%(tmpdir, resultpathname)
    isOK = True
    try:
        os.makedirs(tmp_outpath_result)
        isOK = True
    except OSError:
        msg = "Failed to create folder %s"%(tmp_outpath_result)
        myfunc.WriteFile(msg+"\n", runjob_errfile, "a")
        isOK = False

    print "isOK =", isOK

    if isOK:
        tmp_mapfile = "%s/seqid_index_map.txt"%(tmp_outpath_result)

        maplist = []
        maplist_simple = []
        hdl = myfunc.ReadFastaByBlock(infile, method_seqid=0, method_seq=0)
        if hdl.failure:
            isOK = False
        else:
            recordList = hdl.readseq()
            cnt = 0
            while recordList != None:
                for rd in recordList:
                    maplist.append("%s\t%d\t%s\t%s"%("seq_%d"%cnt, len(rd.seq),
                        rd.description, rd.seq))
                    maplist_simple.append("%s\t%d\t%s"%("seq_%d"%cnt, len(rd.seq),
                        rd.description))
                    cnt += 1
                recordList = hdl.readseq()
            hdl.close()
        myfunc.WriteFile("\n".join(maplist_simple), tmp_mapfile)

        if isOK:
#             g_params['runjob_log'].append("tmpdir = %s"%(tmpdir))
            #cmd = [script_getseqlen, infile, "-o", tmp_outfile , "-printid"]
            datetime = time.strftime("%Y-%m-%d %H:%M:%S")
            rt_msg = myfunc.WriteFile(datetime, starttagfile)
            if rt_msg:
                g_params['runjob_err'].append(rt_msg)

            cmd = [runscript, infile,  tmp_outpath_result, blastdir, blastdb ]
            g_params['runjob_log'].append(" ".join(cmd))
            begin_time = time.time()
            try:
                rmsg = subprocess.check_output(cmd)
            except subprocess.CalledProcessError, e:
                g_params['runjob_err'].append(str(e)+"\n")
                g_params['runjob_err'].append(rmsg + "\n")
                suqoutfilelist = glob.glob("%s/*.sh.*.out"%(tmpdir))
                if len(suqoutfilelist)>0:
                    suqoutfile = suqoutfilelist[0]
                g_params['runjob_err'].append(myfunc.ReadFile(suqoutfile))
            end_time = time.time()
            runtime_in_sec = end_time - begin_time

            if os.path.exists(tmp_outpath_result):
                cmd = ["cp","-rf", tmp_outpath_result, outpath]
                try:
                    subprocess.check_output(cmd)
                except subprocess.CalledProcessError, e:
                    g_params['runjob_err'].append(str(e))

            if len(g_params['runjob_log']) > 0 :
                rt_msg = myfunc.WriteFile("\n".join(g_params['runjob_log']), runjob_logfile, "a")
                if rt_msg:
                    g_params['runjob_err'].append(rt_msg)

            datetime = time.strftime("%Y-%m-%d %H:%M:%S")
            if os.path.exists(outfile):
                rt_msg = myfunc.WriteFile(datetime, finishtagfile)
                if rt_msg:
                    g_params['runjob_err'].append(rt_msg)

# now write the text output to a single file
            WriteTextResultFile(resultfile_text, maplist, runtime_in_sec)

            # now making zip instead (for windows users)
            pwd = os.getcwd()
            os.chdir(outpath)
#             cmd = ["tar", "-czf", tarball, resultpathname]
            cmd = ["zip", "-rq", zipfile, resultpathname]
            try:
                subprocess.check_output(cmd)
            except subprocess.CalledProcessError, e:
                g_params['runjob_err'].append(str(e))
Example #10
0
#             cmd = ["tar", "-czf", tarball, resultpathname]
            cmd = ["zip", "-rq", zipfile, resultpathname]
            try:
                subprocess.check_output(cmd)
            except subprocess.CalledProcessError, e:
                g_params['runjob_err'].append(str(e))
            os.chdir(pwd)

    isSuccess = False
    if (os.path.exists(finishtagfile) and os.path.exists(zipfile_fullpath)):
        isSuccess = True
    else:
        isSuccess = False
        failtagfile = "%s/runjob.failed"%(outpath)
        datetime = time.strftime("%Y-%m-%d %H:%M:%S")
        rt_msg = myfunc.WriteFile(datetime, failtagfile)
        if rt_msg:
            g_params['runjob_err'].append(rt_msg)

# send the result to email
    if myfunc.IsValidEmailAddress(email):
        from_email = "*****@*****.**"
        to_email = email
        subject = "Your result for TOPCONS2 JOBID=%s"%(jobid)
        if isSuccess:
            bodytext = """
Your result is ready at %s/pred/result/%s

Thanks for using TOPCONS2

        """%(g_params['base_www_url'], jobid)
def RunJob(infile, outpath, tmpdir, email, jobid, g_params):  #{{{
    all_begin_time = time.time()

    rootname = os.path.basename(os.path.splitext(infile)[0])
    starttagfile = "%s/runjob.start" % (outpath)
    runjob_errfile = "%s/runjob.err" % (outpath)
    runjob_logfile = "%s/runjob.log" % (outpath)
    finishtagfile = "%s/runjob.finish" % (outpath)
    rmsg = ""

    resultpathname = jobid

    outpath_result = "%s/%s" % (outpath, resultpathname)
    tarball = "%s.tar.gz" % (resultpathname)
    zipfile = "%s.zip" % (resultpathname)
    tarball_fullpath = "%s.tar.gz" % (outpath_result)
    zipfile_fullpath = "%s.zip" % (outpath_result)
    outfile = "%s/%s/Topcons/topcons.top" % (outpath_result, "seq_%d" % (0))
    resultfile_text = "%s/%s" % (outpath_result, "query.result.txt")
    mapfile = "%s/seqid_index_map.txt" % (outpath_result)
    finished_seq_file = "%s/finished_seqs.txt" % (outpath_result)

    tmp_outpath_result = "%s/%s" % (tmpdir, resultpathname)
    isOK = True
    try:
        os.makedirs(tmp_outpath_result)
        isOK = True
    except OSError:
        msg = "Failed to create folder %s" % (tmp_outpath_result)
        myfunc.WriteFile(msg + "\n", runjob_errfile, "a")
        isOK = False
        pass

    try:
        os.makedirs(outpath_result)
        isOK = True
    except OSError:
        msg = "Failed to create folder %s" % (outpath_result)
        myfunc.WriteFile(msg + "\n", runjob_errfile, "a")
        isOK = False
        pass

    if isOK:
        try:
            open(finished_seq_file, 'w').close()
        except:
            pass
#first getting result from caches
# ==================================

        maplist = []
        maplist_simple = []
        toRunDict = {}
        hdl = myfunc.ReadFastaByBlock(infile, method_seqid=0, method_seq=0)
        if hdl.failure:
            isOK = False
        else:
            datetime = time.strftime("%Y-%m-%d %H:%M:%S")
            rt_msg = myfunc.WriteFile(datetime, starttagfile)

            recordList = hdl.readseq()
            cnt = 0
            origpath = os.getcwd()
            while recordList != None:
                for rd in recordList:
                    isSkip = False
                    # temp outpath for the sequence is always seq_0, and I feed
                    # only one seq a time to the workflow
                    tmp_outpath_this_seq = "%s/%s" % (tmp_outpath_result,
                                                      "seq_%d" % 0)
                    outpath_this_seq = "%s/%s" % (outpath_result,
                                                  "seq_%d" % cnt)
                    subfoldername_this_seq = "seq_%d" % (cnt)
                    if os.path.exists(tmp_outpath_this_seq):
                        try:
                            shutil.rmtree(tmp_outpath_this_seq)
                        except OSError:
                            pass

                    maplist.append(
                        "%s\t%d\t%s\t%s" %
                        ("seq_%d" % cnt, len(rd.seq), rd.description, rd.seq))
                    maplist_simple.append(
                        "%s\t%d\t%s" %
                        ("seq_%d" % cnt, len(rd.seq), rd.description))
                    if not g_params['isForceRun']:
                        md5_key = hashlib.md5(rd.seq).hexdigest()
                        subfoldername = md5_key[:2]
                        md5_link = "%s/%s/%s" % (path_md5cache, subfoldername,
                                                 md5_key)
                        if os.path.exists(md5_link):
                            # create a symlink to the cache
                            rela_path = os.path.relpath(
                                md5_link, outpath_result)  #relative path
                            os.chdir(outpath_result)
                            os.symlink(rela_path, subfoldername_this_seq)

                            if os.path.exists(outpath_this_seq):
                                runtime = 0.0  #in seconds
                                topfile = "%s/%s/topcons.top" % (
                                    outpath_this_seq, "Topcons")
                                top = myfunc.ReadFile(topfile).strip()
                                numTM = myfunc.CountTM(top)
                                posSP = myfunc.GetSPPosition(top)
                                if len(posSP) > 0:
                                    isHasSP = True
                                else:
                                    isHasSP = False
                                info_finish = [
                                    "seq_%d" % cnt,
                                    str(len(rd.seq)),
                                    str(numTM),
                                    str(isHasSP), "cached",
                                    str(runtime), rd.description
                                ]
                                myfunc.WriteFile("\t".join(info_finish) + "\n",
                                                 finished_seq_file,
                                                 "a",
                                                 isFlush=True)
                                isSkip = True

                    if not isSkip:
                        # first try to delete the outfolder if exists
                        if os.path.exists(outpath_this_seq):
                            try:
                                shutil.rmtree(outpath_this_seq)
                            except OSError:
                                pass
                        origIndex = cnt
                        numTM = 0
                        toRunDict[origIndex] = [rd.seq, numTM, rd.description
                                                ]  #init value for numTM is 0

                    cnt += 1
                recordList = hdl.readseq()
            hdl.close()
        myfunc.WriteFile("\n".join(maplist_simple) + "\n", mapfile)

        # run scampi single to estimate the number of TM helices and then run
        # the query sequences in the descending order of numTM
        torun_all_seqfile = "%s/%s" % (tmp_outpath_result, "query.torun.fa")
        dumplist = []
        for key in toRunDict:
            top = toRunDict[key][0]
            dumplist.append(">%s\n%s" % (str(key), top))
        myfunc.WriteFile("\n".join(dumplist) + "\n", torun_all_seqfile, "w")
        del dumplist

        topfile_scampiseq = "%s/%s" % (tmp_outpath_result,
                                       "query.torun.fa.topo")
        if os.path.exists(torun_all_seqfile):
            # run scampi to estimate the number of TM helices
            cmd = [
                script_scampi, torun_all_seqfile, "-outpath",
                tmp_outpath_result
            ]
            try:
                rmsg = subprocess.check_output(cmd)
            except subprocess.CalledProcessError, e:
                g_params['runjob_err'].append(str(e) + "\n")
                pass
        if os.path.exists(topfile_scampiseq):
            (idlist_scampi, annolist_scampi,
             toplist_scampi) = myfunc.ReadFasta(topfile_scampiseq)
            for jj in xrange(len(idlist_scampi)):
                numTM = myfunc.CountTM(toplist_scampi[jj])
                try:
                    toRunDict[int(idlist_scampi[jj])][1] = numTM
                except (KeyError, ValueError, TypeError):
                    pass

        sortedlist = sorted(toRunDict.items(),
                            key=lambda x: x[1][1],
                            reverse=True)
        #format of sortedlist [(origIndex: [seq, numTM, description]), ...]

        # submit sequences one by one to the workflow according to orders in
        # sortedlist

        for item in sortedlist:
            #             g_params['runjob_log'].append("tmpdir = %s"%(tmpdir))
            #cmd = [script_getseqlen, infile, "-o", tmp_outfile , "-printid"]
            origIndex = item[0]
            seq = item[1][0]
            description = item[1][2]

            outpath_this_seq = "%s/%s" % (outpath_result, "seq_%d" % origIndex)
            tmp_outpath_this_seq = "%s/%s" % (tmp_outpath_result, "seq_%d" %
                                              (0))
            if os.path.exists(tmp_outpath_this_seq):
                try:
                    shutil.rmtree(tmp_outpath_this_seq)
                except OSError:
                    pass

            seqfile_this_seq = "%s/%s" % (tmp_outpath_result, "query_%d.fa" %
                                          (origIndex))
            seqcontent = ">%d\n%s\n" % (origIndex, seq)
            myfunc.WriteFile(seqcontent, seqfile_this_seq, "w")

            if not os.path.exists(seqfile_this_seq):
                g_params['runjob_err'].append(
                    "failed to generate seq index %d" % (origIndex))
                continue

            cmd = [
                runscript, seqfile_this_seq, tmp_outpath_result, blastdir,
                blastdb
            ]
            g_params['runjob_log'].append(" ".join(cmd))
            begin_time = time.time()
            try:
                rmsg = subprocess.check_output(cmd)
                g_params['runjob_log'].append("workflow:\n" + rmsg + "\n")
            except subprocess.CalledProcessError, e:
                g_params['runjob_err'].append(str(e) + "\n")
                g_params['runjob_err'].append(rmsg + "\n")
                pass
                #suqoutfilelist = glob.glob("%s/*.sh.*.out"%(tmpdir))
                #if len(suqoutfilelist)>0:
                #    suqoutfile = suqoutfilelist[0]
                #g_params['runjob_err'].append(myfunc.ReadFile(suqoutfile))
            end_time = time.time()
            runtime_in_sec = end_time - begin_time

            if os.path.exists(tmp_outpath_this_seq):
                cmd = ["mv", "-f", tmp_outpath_this_seq, outpath_this_seq]
                isCmdSuccess = False
                try:
                    subprocess.check_output(cmd)
                    isCmdSuccess = True
                except subprocess.CalledProcessError, e:
                    msg = "Failed to run prediction for sequence No. %d\n" % (
                        origIndex)
                    g_params['runjob_err'].append(msg)
                    g_params['runjob_err'].append(str(e) + "\n")
                    pass
                timefile = "%s/time.txt" % (tmp_outpath_result)
                targetfile = "%s/time.txt" % (outpath_this_seq)
                if os.path.exists(timefile) and os.path.exists(
                        outpath_this_seq):
                    try:
                        shutil.move(timefile, targetfile)
                    except:
                        g_params['runjob_err'].append(
                            "Failed to move %s/time.txt" %
                            (tmp_outpath_result) + "\n")
                        pass

                if isCmdSuccess:
                    runtime = runtime_in_sec  #in seconds
                    topfile = "%s/%s/topcons.top" % (outpath_this_seq,
                                                     "Topcons")
                    top = myfunc.ReadFile(topfile).strip()
                    numTM = myfunc.CountTM(top)
                    posSP = myfunc.GetSPPosition(top)
                    if len(posSP) > 0:
                        isHasSP = True
                    else:
                        isHasSP = False
                    info_finish = [
                        "seq_%d" % origIndex,
                        str(len(seq)),
                        str(numTM),
                        str(isHasSP), "newrun",
                        str(runtime), description
                    ]
                    myfunc.WriteFile("\t".join(info_finish) + "\n",
                                     finished_seq_file,
                                     "a",
                                     isFlush=True)
                    # now write the text output for this seq

                    info_this_seq = "%s\t%d\t%s\t%s" % (
                        "seq_%d" % origIndex, len(seq), description, seq)
                    resultfile_text_this_seq = "%s/%s" % (outpath_this_seq,
                                                          "query.result.txt")
                    myfunc.WriteTOPCONSTextResultFile(resultfile_text_this_seq,
                                                      outpath_result,
                                                      [info_this_seq],
                                                      runtime_in_sec,
                                                      g_params['base_www_url'])
                    # create or update the md5 cache
                    # create cache only on the front-end
                    if g_params['base_www_url'].find("topcons.net") != -1:
                        md5_key = hashlib.md5(seq).hexdigest()
                        subfoldername = md5_key[:2]
                        md5_subfolder = "%s/%s" % (path_md5cache,
                                                   subfoldername)
                        md5_link = "%s/%s/%s" % (path_md5cache, subfoldername,
                                                 md5_key)
                        if os.path.exists(md5_link):
                            try:
                                os.unlink(md5_link)
                            except:
                                pass
                        subfolder_md5 = "%s/%s" % (path_md5cache,
                                                   subfoldername)
                        if not os.path.exists(subfolder_md5):
                            try:
                                os.makedirs(subfolder_md5)
                            except:
                                pass

                        rela_path = os.path.relpath(
                            outpath_this_seq, md5_subfolder)  #relative path
                        try:
                            os.chdir(md5_subfolder)
                            os.symlink(rela_path, md5_key)
                        except:
                            pass
                            except:
                                pass

                        rela_path = os.path.relpath(
                            outpath_this_seq, md5_subfolder)  #relative path
                        try:
                            os.chdir(md5_subfolder)
                            os.symlink(rela_path, md5_key)
                        except:
                            pass

        all_end_time = time.time()
        all_runtime_in_sec = all_end_time - all_begin_time

        if len(g_params['runjob_log']) > 0:
            rt_msg = myfunc.WriteFile("\n".join(g_params['runjob_log']) + "\n",
                                      runjob_logfile, "a")
            if rt_msg:
                g_params['runjob_err'].append(rt_msg)

        datetime = time.strftime("%Y-%m-%d %H:%M:%S")
        if os.path.exists(finished_seq_file):
            rt_msg = myfunc.WriteFile(datetime, finishtagfile)
            if rt_msg:
                g_params['runjob_err'].append(rt_msg)

# now write the text output to a single file
        statfile = "%s/%s" % (outpath_result, "stat.txt")
        myfunc.WriteTOPCONSTextResultFile(resultfile_text,
                                          outpath_result,
                                          maplist,
                                          all_runtime_in_sec,
Example #13
0
def start_boctopus(infile, blastpath, modHome, hmmfilename, ws_cytosolic, ws_extracellular, ws_lipidfacing, ws_porefacing, \
    fakedbpath, dbpath, blastpgppath, hhsearchpath, hhblitspath, rpath):
    print "boctopus2 will start with ", infile

#     f = open(infile, "r")#{{{ DELETED
#     lines = f.readlines()
#     f.close()
# 
#     pname   = []
#     seqname = []
#     tempseq = ""
#     for line in lines:
#         line = line.strip()
# 
#         if line.startswith(">"):
#             pname.append(line[1:])
#             if len(tempseq) > 0:
#                 seqname.append(tempseq)
#             tempseq = ""
#         else:
#             tempseq += line
# 
#     if len(tempseq) > 0:
#         seqname.append(tempseq)
# 
#     print pname
#     print seqname
# 
#     if len(pname) != len(seqname):
#         print "number of pnames and seqs not the same."
#     else:#}}}

    # rewrite sequence reading part
    (seqidlist, seqannolist, seqlist) = myfunc.ReadFasta(infile)
    if len(seqidlist) <= 0:
        print >> sys.stderr, "No valid sequences read from file '%s'"%(infile)
        return 1

    #for i in range(0, len(pname)):
    for i in xrange(len(seqidlist)):
        seqid = seqidlist[i]
        seq = seqlist[i]
        seqanno = seqannolist[i]
        print "processing ", i , seqanno

        subtmpdir = "%s/seq_%d"%(tmpdir, i)
        if os.path.exists(subtmpdir):
            shutil.rmtree(subtmpdir)
        os.makedirs(subtmpdir)

        singleseqfile = "%s/query.fa"%(subtmpdir)
        myfunc.WriteFile(">%s\n%s\n"%(seqanno, seq), singleseqfile, mode="w", isFlush=True)

        if not os.path.exists(singleseqfile):
            print >> sys.stderr, "Failed to write to singleseqfile %s"%(singleseqfile)
            continue

        command = "python "+ "%s/boctopus_startHMM.py "%(rundir) + singleseqfile + " " + blastpath + " " + modHome + " " + hmmfilename + " " + ws_cytosolic + " " + ws_extracellular + " " + ws_lipidfacing + " " + ws_porefacing + " " + rpath+ " " +fakedbpath+\
" " + dbpath+ " " + blastpgppath+ " " + hhsearchpath + " " + hhblitspath
        print command
        os.system(command)
        outpath_this_seq = "%s/seq_%d"%(outpath, i)
        if not os.path.exists(outpath_this_seq):
            os.makedirs(outpath_this_seq)
        filepair_to_copy = [
                ("%s/query.fa"%subtmpdir, "%s/query.fa"%outpath_this_seq),
                ("%s/output/query_ioIOS.prf.txt_svm_topo.png"%subtmpdir, "%s/query.predict.png"%(outpath_this_seq)),
                ("%s/output/query_topologies.txt"%(subtmpdir), "%s/query_topologies.txt"%outpath_this_seq),
                ("%s/svmoutput/query_ioIOS.prf.txt"%subtmpdir, "%s/profile.txt"%outpath_this_seq),
                ("%s/pssm/query.filtered.pssmvals"%subtmpdir, "%s/pssm.txt"%(outpath_this_seq))

        ]
        for tup in filepair_to_copy:
            shutil.move(tup[0], tup[1])


    return
Example #14
0
def RunJob(modelfile, seqfile, outpath, tmpdir, email, jobid, g_params):  #{{{
    all_begin_time = time.time()

    rootname = os.path.basename(os.path.splitext(modelfile)[0])
    starttagfile = "%s/runjob.start" % (outpath)
    runjob_errfile = "%s/runjob.err" % (outpath)
    runjob_logfile = "%s/runjob.log" % (outpath)
    finishtagfile = "%s/runjob.finish" % (outpath)
    rmsg = ""

    query_parafile = "%s/query.para.txt" % (outpath)
    query_para = {}
    content = myfunc.ReadFile(query_parafile)
    if content != "":
        query_para = json.loads(content)

    resultpathname = jobid

    outpath_result = "%s/%s" % (outpath, resultpathname)
    tarball = "%s.tar.gz" % (resultpathname)
    zipfile = "%s.zip" % (resultpathname)
    tarball_fullpath = "%s.tar.gz" % (outpath_result)
    zipfile_fullpath = "%s.zip" % (outpath_result)
    mapfile = "%s/seqid_index_map.txt" % (outpath_result)
    finished_model_file = "%s/finished_models.txt" % (outpath_result)
    timefile = "%s/time.txt" % (outpath_result)

    tmp_outpath_result = "%s/%s" % (tmpdir, resultpathname)
    isOK = True
    if os.path.exists(tmp_outpath_result):
        shutil.rmtree(tmp_outpath_result)
    try:
        os.makedirs(tmp_outpath_result)
        isOK = True
    except OSError:
        msg = "Failed to create folder %s" % (tmp_outpath_result)
        myfunc.WriteFile(msg + "\n", runjob_errfile, "a")
        isOK = False
        pass

    if os.path.exists(outpath_result):
        shutil.rmtree(outpath_result)
    try:
        os.makedirs(outpath_result)
        isOK = True
    except OSError:
        msg = "Failed to create folder %s" % (outpath_result)
        myfunc.WriteFile(msg + "\n", runjob_errfile, "a")
        isOK = False
        pass

    if isOK:
        try:
            open(finished_model_file, 'w').close()
        except:
            pass
#first getting result from caches
# cache profiles for sequences, but do not cache predictions for models
        webserver_common.WriteDateTimeTagFile(starttagfile, runjob_logfile,
                                              runjob_errfile)
        # ==================================
        numModel = 0
        modelFileList = []
        if seqfile != "":  # if the fasta sequence is supplied, all models should be using this sequence
            subfoldername_profile = "profile_%d" % (0)
            outpath_profile = "%s/%s" % (outpath_result, subfoldername_profile)
            CreateProfile(seqfile, outpath_profile, outpath_result,
                          tmp_outpath_result, timefile, runjob_errfile)

            # run proq3 for models
            modelList = myfunc.ReadPDBModel(modelfile)
            numModel = len(modelList)
            for ii in range(len(modelList)):
                model = modelList[ii]
                tmp_model_file = "%s/query_%d.pdb" % (tmp_outpath_result, ii)
                myfunc.WriteFile(model + "\n", tmp_model_file)
                profilename = "%s/%s" % (outpath_profile, "query.fasta")
                subfoldername_this_model = "model_%d" % (ii)
                outpath_this_model = "%s/%s" % (outpath_result,
                                                subfoldername_this_model)

                modelinfo = ScoreModel(query_para, tmp_model_file,
                                       outpath_this_model, profilename,
                                       outpath_result, tmp_outpath_result,
                                       timefile, runjob_errfile)
                myfunc.WriteFile("\t".join(modelinfo) + "\n",
                                 finished_model_file, "a")
                modelFileList.append(
                    "%s/%s" % (outpath_this_model, "query_%d.pdb" % (ii)))

        else:  # no seqfile supplied, sequences are obtained from the model file
            modelList = myfunc.ReadPDBModel(modelfile)
            numModel = len(modelList)
            for ii in range(len(modelList)):
                model = modelList[ii]
                tmp_model_file = "%s/query_%d.pdb" % (tmp_outpath_result, ii)
                myfunc.WriteFile(model + "\n", tmp_model_file)
                subfoldername_this_model = "model_%d" % (ii)
                tmp_outpath_this_model = "%s/%s" % (tmp_outpath_result,
                                                    subfoldername_this_model)
                if not os.path.exists(tmp_outpath_this_model):
                    os.makedirs(tmp_outpath_this_model)
                tmp_seqfile = "%s/query.fasta" % (tmp_outpath_this_model)
                cmd = [pdb2aa_script, tmp_model_file]
                g_params['runjob_log'].append(" ".join(cmd))
                try:
                    rmsg = subprocess.check_output(cmd)
                    g_params['runjob_log'].append(
                        "extracting sequence from modelfile:\n" + rmsg + "\n")
                except subprocess.CalledProcessError as e:
                    g_params['runjob_err'].append(str(e) + "\n")
                    g_params['runjob_err'].append(rmsg + "\n")

                if rmsg != "":
                    myfunc.WriteFile(">seq\n" + rmsg.strip(), tmp_seqfile)

                subfoldername_profile = "profile_%d" % (ii)
                outpath_profile = "%s/%s" % (outpath_result,
                                             subfoldername_profile)
                CreateProfile(tmp_seqfile, outpath_profile, outpath_result,
                              tmp_outpath_result, timefile, runjob_errfile)

                outpath_this_model = "%s/%s" % (outpath_result,
                                                subfoldername_this_model)
                profilename = "%s/%s" % (outpath_profile, "query.fasta")
                modelinfo = ScoreModel(query_para, tmp_model_file,
                                       outpath_this_model, profilename,
                                       outpath_result, tmp_outpath_result,
                                       timefile, runjob_errfile)
                myfunc.WriteFile("\t".join(modelinfo) + "\n",
                                 finished_model_file, "a")
                modelFileList.append(
                    "%s/%s" % (outpath_this_model, "query_%d.pdb" % (ii)))

        all_end_time = time.time()
        all_runtime_in_sec = all_end_time - all_begin_time

        if len(g_params['runjob_log']) > 0:
            rt_msg = myfunc.WriteFile("\n".join(g_params['runjob_log']) + "\n",
                                      runjob_logfile, "a")
            if rt_msg:
                g_params['runjob_err'].append(rt_msg)

        webserver_common.WriteDateTimeTagFile(finishtagfile, runjob_logfile,
                                              runjob_errfile)
        # now write the text output to a single file
        #statfile = "%s/%s"%(outpath_result, "stat.txt")
        statfile = ""
        dumped_resultfile = "%s/%s" % (outpath_result, "query.proq3.txt")
        proq3opt = GetProQ3Option(query_para)
        webserver_common.WriteProQ3TextResultFile(dumped_resultfile,
                                                  query_para,
                                                  modelFileList,
                                                  all_runtime_in_sec,
                                                  g_params['base_www_url'],
                                                  proq3opt,
                                                  statfile=statfile)

        # now making zip instead (for windows users)
        # note that zip rq will zip the real data for symbolic links
        os.chdir(outpath)
        #             cmd = ["tar", "-czf", tarball, resultpathname]
        cmd = ["zip", "-rq", zipfile, resultpathname]
        try:
            subprocess.check_output(cmd)
        except subprocess.CalledProcessError as e:
            g_params['runjob_err'].append(str(e))
            pass

    isSuccess = False
    if (os.path.exists(finishtagfile) and os.path.exists(zipfile_fullpath)):
        isSuccess = True
        flist = glob.glob("%s/*.out" % (tmpdir))
        if len(flist) > 0:
            outfile_runscript = flist[0]
        else:
            outfile_runscript = ""
        if os.path.exists(outfile_runscript):
            shutil.move(outfile_runscript, outpath)
        # delete the tmpdir if succeeded
        shutil.rmtree(tmpdir)  #DEBUG, keep tmpdir
    else:
        isSuccess = False
        failedtagfile = "%s/runjob.failed" % (outpath)
        webserver_common.WriteDateTimeTagFile(failedtagfile, runjob_logfile,
                                              runjob_errfile)

# send the result to email
# do not sendmail at the cloud VM
    if (webserver_common.IsFrontEndNode(g_params['base_www_url'])
            and myfunc.IsValidEmailAddress(email)):
        from_email = "*****@*****.**"
        to_email = email
        subject = "Your result for ProQ3 JOBID=%s" % (jobid)
        if isSuccess:
            bodytext = """
Your result is ready at %s/pred/result/%s

Thanks for using ProQ3

        """ % (g_params['base_www_url'], jobid)
        else:
            bodytext = """
We are sorry that your job with jobid %s is failed.

Please contact %s if you have any questions.

Attached below is the error message:
%s
            """ % (jobid, contact_email, "\n".join(g_params['runjob_err']))
        g_params['runjob_log'].append("Sendmail %s -> %s, %s" %
                                      (from_email, to_email, subject))  #debug
        rtValue = myfunc.Sendmail(from_email, to_email, subject, bodytext)
        if rtValue != 0:
            g_params['runjob_err'].append(
                "Sendmail to {} failed with status {}".format(
                    to_email, rtValue))

    if len(g_params['runjob_err']) > 0:
        rt_msg = myfunc.WriteFile("\n".join(g_params['runjob_err']) + "\n",
                                  runjob_errfile, "w")
        return 1
    return 0
Example #15
0
def loginfo(msg, outfile):  # {{{
    """Write loginfo to outfile, appending current time"""
    date_str = time.strftime(FORMAT_DATETIME)
    myfunc.WriteFile("[%s] %s\n" % (date_str, msg), outfile, "a", True)
Example #16
0
def CreateProfile(seqfile, outpath_profile, outpath_result, tmp_outpath_result,
                  timefile, runjob_errfile):  #{{{
    (seqid, seqanno, seq) = myfunc.ReadSingleFasta(seqfile)
    subfoldername_profile = os.path.basename(outpath_profile)
    tmp_outpath_profile = "%s/%s" % (tmp_outpath_result, subfoldername_profile)
    isSkip = False
    rmsg = ""
    if not g_params['isForceRun']:
        md5_key = hashlib.md5(seq).hexdigest()
        subfoldername = md5_key[:2]
        md5_link = "%s/%s/%s" % (path_md5cache, subfoldername, md5_key)
        if os.path.exists(md5_link):
            # create a symlink to the cache
            rela_path = os.path.relpath(md5_link,
                                        outpath_result)  #relative path
            os.chdir(outpath_result)
            os.symlink(rela_path, subfoldername_profile)
            isSkip = True
    if not isSkip:
        # build profiles
        if not os.path.exists(tmp_outpath_profile):
            try:
                os.makedirs(tmp_outpath_profile)
            except OSError:
                msg = "Failed to create folder %s" % (tmp_outpath_profile)
                myfunc.WriteFile(msg + "\n", runjob_errfile, "a")
                return 1
        cmd = [
            runscript, "-fasta", seqfile, "-outpath", tmp_outpath_profile,
            "-only-build-profile"
        ]
        g_params['runjob_log'].append(" ".join(cmd))
        begin_time = time.time()
        cmdline = " ".join(cmd)
        #os.system("%s >> %s 2>&1"%(cmdline, runjob_errfile)) #DEBUG
        try:
            rmsg = subprocess.check_output(cmd)
            g_params['runjob_log'].append("profile_building:\n" + rmsg + "\n")
        except subprocess.CalledProcessError, e:
            g_params['runjob_err'].append(str(e) + "\n")
            g_params['runjob_err'].append("cmdline: " + cmdline + "\n")
            g_params['runjob_err'].append("profile_building:\n" + rmsg + "\n")
            pass
        end_time = time.time()
        runtime_in_sec = end_time - begin_time
        msg = "%s\t%f\n" % (subfoldername_profile, runtime_in_sec)
        myfunc.WriteFile(msg, timefile, "a")

        if os.path.exists(tmp_outpath_profile):
            md5_key = hashlib.md5(seq).hexdigest()
            md5_subfoldername = md5_key[:2]
            subfolder_profile_cache = "%s/%s" % (path_profile_cache,
                                                 md5_subfoldername)
            outpath_profile_cache = "%s/%s" % (subfolder_profile_cache,
                                               md5_key)
            if os.path.exists(outpath_profile_cache):
                shutil.rmtree(outpath_profile_cache)
            if not os.path.exists(subfolder_profile_cache):
                os.makedirs(subfolder_profile_cache)
            cmd = ["mv", "-f", tmp_outpath_profile, outpath_profile_cache]
            isCmdSuccess = False
            try:
                subprocess.check_output(cmd)
                isCmdSuccess = True
            except subprocess.CalledProcessError, e:
                msg = "Failed to run get profile for the target sequence %s" % (
                    seq)
                g_params['runjob_err'].append(msg)
                g_params['runjob_err'].append(str(e) + "\n")
                pass

            if isCmdSuccess and webserver_common.IsFrontEndNode(
                    g_params['base_www_url']):

                # make zip folder for the cached profile
                cwd = os.getcwd()
                os.chdir(subfolder_profile_cache)
                cmd = ["zip", "-rq", "%s.zip" % (md5_key), md5_key]
                try:
                    subprocess.check_output(cmd)
                except subprocess.CalledProcessError, e:
                    g_params['runjob_err'].append(str(e))
                    pass
                os.chdir(cwd)

                # create soft link for profile and for md5
                # first create a soft link for outpath_profile to outpath_profile_cache
                rela_path = os.path.relpath(outpath_profile_cache,
                                            outpath_result)  #relative path
                try:
                    os.chdir(outpath_result)
                    os.symlink(rela_path, subfoldername_profile)
                except:
                    pass

                # then create a soft link for md5 to outpath_proifle_cache
                md5_subfolder = "%s/%s" % (path_md5cache, md5_subfoldername)
                md5_link = "%s/%s/%s" % (path_md5cache, md5_subfoldername,
                                         md5_key)
                if os.path.exists(md5_link):
                    try:
                        os.unlink(md5_link)
                    except:
                        pass
                if not os.path.exists(md5_subfolder):
                    try:
                        os.makedirs(md5_subfolder)
                    except:
                        pass

                rela_path = os.path.relpath(outpath_profile_cache,
                                            md5_subfolder)  #relative path
                try:
                    os.chdir(md5_subfolder)
                    os.symlink(rela_path, md5_key)
                except:
                    pass
Example #17
0
def RunJob(modelfile, seqfile, outpath, tmpdir, email, jobid, g_params):  #{{{
    all_begin_time = time.time()

    rootname = os.path.basename(os.path.splitext(modelfile)[0])
    starttagfile = "%s/runjob.start" % (outpath)
    runjob_errfile = "%s/runjob.err" % (outpath)
    runjob_logfile = "%s/runjob.log" % (outpath)
    finishtagfile = "%s/runjob.finish" % (outpath)
    rmsg = ""

    query_parafile = "%s/query.para.txt" % (outpath)
    query_para = {}
    content = myfunc.ReadFile(query_parafile)
    if content != "":
        query_para = json.loads(content)

    resultpathname = jobid

    outpath_result = "%s/%s" % (outpath, resultpathname)
    tarball = "%s.tar.gz" % (resultpathname)
    zipfile = "%s.zip" % (resultpathname)
    tarball_fullpath = "%s.tar.gz" % (outpath_result)
    zipfile_fullpath = "%s.zip" % (outpath_result)
    mapfile = "%s/seqid_index_map.txt" % (outpath_result)
    finished_model_file = "%s/finished_models.txt" % (outpath_result)
    timefile = "%s/time.txt" % (outpath_result)

    tmp_outpath_result = "%s/%s" % (tmpdir, resultpathname)
    isOK = True
    if os.path.exists(tmp_outpath_result):
        shutil.rmtree(tmp_outpath_result)
    try:
        os.makedirs(tmp_outpath_result)
        isOK = True
    except OSError:
        msg = "Failed to create folder %s" % (tmp_outpath_result)
        myfunc.WriteFile(msg + "\n", runjob_errfile, "a")
        isOK = False
        pass

    if os.path.exists(outpath_result):
        shutil.rmtree(outpath_result)
    try:
        os.makedirs(outpath_result)
        isOK = True
    except OSError:
        msg = "Failed to create folder %s" % (outpath_result)
        myfunc.WriteFile(msg + "\n", runjob_errfile, "a")
        isOK = False
        pass

    if isOK:
        try:
            open(finished_model_file, 'w').close()
        except:
            pass
#first getting result from caches
# cache profiles for sequences, but do not cache predictions for models
        webserver_common.WriteDateTimeTagFile(starttagfile, runjob_logfile,
                                              runjob_errfile)
        # ==================================
        numModel = 0
        modelFileList = []
        if seqfile != "":  # if the fasta sequence is supplied, all models should be using this sequence
            subfoldername_profile = "profile_%d" % (0)
            outpath_profile = "%s/%s" % (outpath_result, subfoldername_profile)
            CreateProfile(seqfile, outpath_profile, outpath_result,
                          tmp_outpath_result, timefile, runjob_errfile)

            # run proq3 for models
            modelList = myfunc.ReadPDBModel(modelfile)
            numModel = len(modelList)
            for ii in xrange(len(modelList)):
                model = modelList[ii]
                tmp_model_file = "%s/query_%d.pdb" % (tmp_outpath_result, ii)
                myfunc.WriteFile(model + "\n", tmp_model_file)
                profilename = "%s/%s" % (outpath_profile, "query.fasta")
                subfoldername_this_model = "model_%d" % (ii)
                outpath_this_model = "%s/%s" % (outpath_result,
                                                subfoldername_this_model)

                modelinfo = ScoreModel(query_para, tmp_model_file,
                                       outpath_this_model, profilename,
                                       outpath_result, tmp_outpath_result,
                                       timefile, runjob_errfile)
                myfunc.WriteFile("\t".join(modelinfo) + "\n",
                                 finished_model_file, "a")
                modelFileList.append(
                    "%s/%s" % (outpath_this_model, "query_%d.pdb" % (ii)))

        else:  # no seqfile supplied, sequences are obtained from the model file
            modelList = myfunc.ReadPDBModel(modelfile)
            numModel = len(modelList)
            for ii in xrange(len(modelList)):
                model = modelList[ii]
                tmp_model_file = "%s/query_%d.pdb" % (tmp_outpath_result, ii)
                myfunc.WriteFile(model + "\n", tmp_model_file)
                subfoldername_this_model = "model_%d" % (ii)
                tmp_outpath_this_model = "%s/%s" % (tmp_outpath_result,
                                                    subfoldername_this_model)
                if not os.path.exists(tmp_outpath_this_model):
                    os.makedirs(tmp_outpath_this_model)
                tmp_seqfile = "%s/query.fasta" % (tmp_outpath_this_model)
                cmd = [pdb2aa_script, tmp_model_file]
                g_params['runjob_log'].append(" ".join(cmd))
                try:
                    rmsg = subprocess.check_output(cmd)
                    g_params['runjob_log'].append(
                        "extracting sequence from modelfile:\n" + rmsg + "\n")
                except subprocess.CalledProcessError, e:
                    g_params['runjob_err'].append(str(e) + "\n")
                    g_params['runjob_err'].append(rmsg + "\n")

                if rmsg != "":
                    myfunc.WriteFile(">seq\n" + rmsg.strip(), tmp_seqfile)

                subfoldername_profile = "profile_%d" % (ii)
                outpath_profile = "%s/%s" % (outpath_result,
                                             subfoldername_profile)
                CreateProfile(tmp_seqfile, outpath_profile, outpath_result,
                              tmp_outpath_result, timefile, runjob_errfile)

                outpath_this_model = "%s/%s" % (outpath_result,
                                                subfoldername_this_model)
                profilename = "%s/%s" % (outpath_profile, "query.fasta")
                modelinfo = ScoreModel(query_para, tmp_model_file,
                                       outpath_this_model, profilename,
                                       outpath_result, tmp_outpath_result,
                                       timefile, runjob_errfile)
                myfunc.WriteFile("\t".join(modelinfo) + "\n",
                                 finished_model_file, "a")
                modelFileList.append(
                    "%s/%s" % (outpath_this_model, "query_%d.pdb" % (ii)))

        all_end_time = time.time()
        all_runtime_in_sec = all_end_time - all_begin_time

        if len(g_params['runjob_log']) > 0:
            rt_msg = myfunc.WriteFile("\n".join(g_params['runjob_log']) + "\n",
                                      runjob_logfile, "a")
            if rt_msg:
                g_params['runjob_err'].append(rt_msg)

        webserver_common.WriteDateTimeTagFile(finishtagfile, runjob_logfile,
                                              runjob_errfile)
        # now write the text output to a single file
        #statfile = "%s/%s"%(outpath_result, "stat.txt")
        statfile = ""
        dumped_resultfile = "%s/%s" % (outpath_result, "query.proq3.txt")
        proq3opt = GetProQ3Option(query_para)
        webserver_common.WriteProQ3TextResultFile(dumped_resultfile,
                                                  query_para,
                                                  modelFileList,
                                                  all_runtime_in_sec,
                                                  g_params['base_www_url'],
                                                  proq3opt,
                                                  statfile=statfile)

        # now making zip instead (for windows users)
        # note that zip rq will zip the real data for symbolic links
        os.chdir(outpath)
        #             cmd = ["tar", "-czf", tarball, resultpathname]
        cmd = ["zip", "-rq", zipfile, resultpathname]
        try:
            subprocess.check_output(cmd)
        except subprocess.CalledProcessError, e:
            g_params['runjob_err'].append(str(e))
            pass
Example #18
0
 ] + proq3opt
 g_params['runjob_log'].append(" ".join(cmd))
 cmdline = " ".join(cmd)
 begin_time = time.time()
 try:
     rmsg = subprocess.check_output(cmd)
     g_params['runjob_log'].append("model scoring:\n" + rmsg + "\n")
 except subprocess.CalledProcessError, e:
     g_params['runjob_err'].append(str(e) + "\n")
     g_params['runjob_err'].append("cmdline: " + cmdline + "\n")
     g_params['runjob_err'].append("model scoring:\n" + rmsg + "\n")
     pass
 end_time = time.time()
 runtime_in_sec = end_time - begin_time
 msg = "%s\t%f\n" % (subfoldername_this_model, runtime_in_sec)
 myfunc.WriteFile(msg, timefile, "a")
 if os.path.exists(tmp_outpath_this_model):
     cmd = ["mv", "-f", tmp_outpath_this_model, outpath_this_model]
     isCmdSuccess = False
     try:
         subprocess.check_output(cmd)
         isCmdSuccess = True
     except subprocess.CalledProcessError, e:
         msg = "Failed to move result from %s to %s." % (
             tmp_outpath_this_model, outpath_this_model)
         g_params['runjob_err'].append(msg)
         g_params['runjob_err'].append(str(e) + "\n")
         pass
 modelfile = "%s/query_%d.pdb" % (outpath_this_model, modelidx)
 globalscorefile = "%s.proq3.%s.global" % (modelfile, method_quality)
 if not os.path.exists(globalscorefile):
Example #19
0
def ScoreModel(
        query_para,
        model_file,
        outpath_this_model,
        profilename,
        outpath_result,  #{{{
        tmp_outpath_result,
        timefile,
        runjob_errfile):
    subfoldername_this_model = os.path.basename(outpath_this_model)
    modelidx = int(subfoldername_this_model.split("model_")[1])
    try:
        method_quality = query_para['method_quality']
    except KeyError:
        method_quality = 'sscore'
    rmsg = ""
    tmp_outpath_this_model = "%s/%s" % (tmp_outpath_result,
                                        subfoldername_this_model)
    proq3opt = GetProQ3Option(query_para)
    cmd = [
        runscript, "-profile", profilename, "-outpath", tmp_outpath_this_model,
        model_file
    ] + proq3opt
    g_params['runjob_log'].append(" ".join(cmd))
    cmdline = " ".join(cmd)
    begin_time = time.time()
    try:
        rmsg = subprocess.check_output(cmd)
        g_params['runjob_log'].append("model scoring:\n" + rmsg + "\n")
    except subprocess.CalledProcessError as e:
        g_params['runjob_err'].append(str(e) + "\n")
        g_params['runjob_err'].append("cmdline: " + cmdline + "\n")
        g_params['runjob_err'].append("model scoring:\n" + rmsg + "\n")
        pass
    end_time = time.time()
    runtime_in_sec = end_time - begin_time
    msg = "%s\t%f\n" % (subfoldername_this_model, runtime_in_sec)
    myfunc.WriteFile(msg, timefile, "a")
    if os.path.exists(tmp_outpath_this_model):
        cmd = ["mv", "-f", tmp_outpath_this_model, outpath_this_model]
        isCmdSuccess = False
        try:
            subprocess.check_output(cmd)
            isCmdSuccess = True
        except subprocess.CalledProcessError as e:
            msg = "Failed to move result from %s to %s." % (
                tmp_outpath_this_model, outpath_this_model)
            g_params['runjob_err'].append(msg)
            g_params['runjob_err'].append(str(e) + "\n")
            pass
    modelfile = "%s/query_%d.pdb" % (outpath_this_model, modelidx)
    globalscorefile = "%s.proq3.%s.global" % (modelfile, method_quality)
    if not os.path.exists(globalscorefile):
        globalscorefile = "%s.proq3.global" % (modelfile)
    (globalscore,
     itemList) = webserver_common.ReadProQ3GlobalScore(globalscorefile)
    modelseqfile = "%s/query_%d.pdb.fasta" % (outpath_this_model, modelidx)
    modellength = myfunc.GetSingleFastaLength(modelseqfile)

    modelinfo = [
        subfoldername_this_model,
        str(modellength),
        str(runtime_in_sec)
    ]
    if globalscore:
        for i in range(len(itemList)):
            modelinfo.append(str(globalscore[itemList[i]]))
    return modelinfo
def SubmitJobToQueue(
        jobid,
        datapath,
        outpath,
        nummodel,
        nummodel_this_user,
        email,  #{{{
        host_ip,
        base_www_url):
    myfunc.WriteFile("Entering SubmitJobToQueue()\n", g_params['debugfile'],
                     "a")
    modelfile = "%s/query.pdb" % (datapath)
    seqfile = "%s/query.fa" % (datapath)

    if nummodel == -1:
        nummodel = myfunc.ReadFile(modelfile).count("\nENDMDL")
        if nummodel == 0:
            nummodel = 1
    if nummodel_this_user == -1:
        nummodel_this_user = nummodel

    query_parafile = "%s/query.para.txt" % (outpath)

    query_para = {}
    content = myfunc.ReadFile(query_parafile)
    para_str = content
    if content != "":
        query_para = json.loads(content)

    try:
        name_software = query_para['name_software']
    except KeyError:
        name_software = "proq3"

    runjob = "%s %s/run_job.py" % (python_exec, rundir)
    scriptfile = "%s/runjob,%s,%s,%s,%s,%d.sh" % (
        outpath, name_software, jobid, host_ip, email, nummodel)
    code_str_list = []
    code_str_list.append("#!/bin/bash")
    code_str_list.append("source %s/bin/activate" % (virt_env_path))
    cmdline = "%s %s -outpath %s -tmpdir %s -jobid %s " % (
        runjob, modelfile, outpath, datapath, jobid)
    if email != "":
        cmdline += "-email \"%s\" " % (email)
    if os.path.exists(seqfile):
        cmdline += "-fasta \"%s\" " % (seqfile)
    if base_www_url != "":
        cmdline += "-baseurl \"%s\" " % (base_www_url)
    if g_params['isForceRun']:
        cmdline += "-force "
    code_str_list.append(cmdline)

    code = "\n".join(code_str_list)

    msg = "Write scriptfile %s" % (scriptfile)
    myfunc.WriteFile(msg + "\n", g_params['debugfile'], "a")

    myfunc.WriteFile(code, scriptfile)
    os.chmod(scriptfile, 0755)

    myfunc.WriteFile("Getting priority" + "\n", g_params['debugfile'], "a")
    priority = myfunc.GetSuqPriority(nummodel_this_user)

    if email in vip_user_list:
        priority = 999999999.0

    myfunc.WriteFile("priority=%d\n" % (priority), g_params['debugfile'], "a")

    st1 = SubmitSuqJob(suq_basedir, datapath, outpath, priority, scriptfile)

    return st1