예제 #1
0
def SubmitJobToQueue(
        jobid,
        datapath,
        outpath,
        numseq,
        numseq_this_user,
        email,  #{{{
        host_ip,
        base_www_url):
    myfunc.WriteFile("Entering SubmitJobToQueue()\n", g_params['debugfile'],
                     "a")
    fafile = "%s/query.fa" % (datapath)

    if numseq == -1:
        numseq = myfunc.CountFastaSeq(fafile)
    if numseq_this_user == -1:
        numseq_this_user = numseq

    name_software = "boctopus2"
    runjob = "%s %s/run_job.py" % (python_exec, rundir)
    scriptfile = "%s/runjob;%s;%s;%s;%s;%d.sh" % (
        outpath, name_software, jobid, host_ip, email, numseq)
    code_str_list = []
    code_str_list.append("#!/bin/bash")
    cmdline = "%s %s -outpath %s -tmpdir %s -jobid %s " % (
        runjob, fafile, outpath, datapath, jobid)
    if email != "":
        cmdline += "-email \"%s\" " % (email)
    if base_www_url != "":
        cmdline += "-baseurl \"%s\" " % (base_www_url)
    if g_params['isForceRun']:
        cmdline += "-force "
    code_str_list.append(cmdline)

    code = "\n".join(code_str_list)

    msg = "Write scriptfile %s" % (scriptfile)
    myfunc.WriteFile(msg + "\n", g_params['debugfile'], "a")

    myfunc.WriteFile(code, scriptfile)
    os.chmod(scriptfile, 0755)

    myfunc.WriteFile("Getting priority" + "\n", g_params['debugfile'], "a")
    priority = myfunc.GetSuqPriority(numseq_this_user)
    priority = 10  # quick fix debug  2017-09-18

    if email in vip_user_list:
        priority = 999999999.0

    myfunc.WriteFile("priority=%d\n" % (priority), g_params['debugfile'], "a")

    st1 = SubmitSuqJob(suq_basedir, datapath, outpath, priority, scriptfile)

    return st1
예제 #2
0
def IndexFastaFile(infile, dbname, idtype): #{{{
    path_of_dbname = os.path.dirname(dbname)
    if path_of_dbname != "" and not os.path.exists(path_of_dbname):
        os.system("mkdir -p %s"%path_of_dbname)
    fpin = None
    try:
        fpin = open(infile,"rb")
    except IOError:
        print >> sys.stderr, "Failed to open file %s for read"%(infile)
        return 1

    cntdbfile = 0
    record_offset = 0
    
    dbfile = dbname+"%d.db"%(cntdbfile)
    indexfile = dbname+".index"
    try:
        fpindex = open(indexfile,"wb")
    except IOError:
        msg = "Failed to open indexfile {} for write"
        print >> sys.stderr, msg.format(indexfile)
        return 1
    fpdb = None

    fpindex.write("DEF_DBNAME %s\n"%dbname)
    idSet = set([])
    isFirstSeq = True
    totalLength = 0
    buff = fpin.read(BLOCK_SIZE)
    brokenSeqWithAnnoLine = ""; ##for the annotation line broken by BLOCK read
    while buff:
        beg=0
        end=0
        while 1:
            if brokenSeqWithAnnoLine:
                if brokenSeqWithAnnoLine[len(brokenSeqWithAnnoLine)-1] == "\n":
                    end = buff.find(">")
                else:
                    end = buff.find("\n>")
                if end >= 0:
                    seqWithAnno = brokenSeqWithAnnoLine + buff[0:end]
                    (fpdb, record_offset) = WriteIndexFasta(seqWithAnno,
                            fpdb,dbname, fpindex, cntdbfile, record_offset,
                            idSet, idtype)
                    brokenSeqWithAnnoLine = ""
                    beg=end
                else:
                    brokenSeqWithAnnoLine += buff
                    break

            beg = buff.find(">",beg)
            end = buff.find("\n>",beg+1)
            if beg >= 0:
                if end >=0:
                    seqWithAnno=buff[beg:end]
                    (fpdb, record_offset) = WriteIndexFasta(seqWithAnno, fpdb,
                            dbname, fpindex, cntdbfile, record_offset, idSet,
                            idtype)
                    beg=end
                else:
                    brokenSeqWithAnnoLine=buff[beg:]
                    break
            else:
                break

        if record_offset > MAX_DBFILE_SIZE:
            fpdb.close()
            fpdb = None
            cntdbfile +=1
            record_offset=0
        buff = fpin.read(BLOCK_SIZE)
    
    if brokenSeqWithAnnoLine:
        seqWithAnno = brokenSeqWithAnnoLine
        (fpdb, record_offset) = WriteIndexFasta(seqWithAnno, fpdb, dbname,
                fpindex, cntdbfile, record_offset, idSet, idtype)
    fpin.close()   
    fpindex.close()
    if fpdb != None:
        fpdb.close()

# post processing
    numIndexedSeq = len(idSet)
    numSeq = myfunc.CountFastaSeq(infile)

    cmd = "%s/my_indexformatconvert.py -f %s.index"%(g_params['binpath'], dbname)
    os.system(cmd)

    if numIndexedSeq == numSeq and cntdbfile == 0:
        print "%d sequences indexed successfully"

        dbfile = "%s0.db"%(dbname)
        dbfile_base = os.path.basename(dbfile)
        dbfile_path = os.path.dirname(dbfile)
        infile_path = os.path.dirname(infile)
        if dbfile_path == "": 
            dbfile_path = "."
        if infile_path == "": 
            infile_path = "."
        relpath = os.path.relpath(dbfile_path, infile_path)

        cmd = "rm -f %s"%infile
        print cmd
        os.system(cmd)

        cmd = "ln -s %s%s%s %s"%(relpath, os.sep, dbfile_base, infile)
        print cmd
        os.system(cmd)
    else:
        print >> sys.stderr, "numIndexedSeq (%d) conflicts with numSeq (%d)" %(
                numIndexedSeq, numSeq)
    return 0
        try:
            subprocess.check_output(["mkdir", "-p", outpath])
        except subprocess.CalledProcessError, e:
            print >> sys.stderr, e
            return 1
    if tmpdir == "":
        print >> sys.stderr, "tmpdir not set. exit"
        return 1
    elif not os.path.exists(tmpdir):
        try:
            subprocess.check_output(["mkdir", "-p", tmpdir])
        except subprocess.CalledProcessError, e:
            print >> sys.stderr, e
            return 1

    numseq = myfunc.CountFastaSeq(infile)
    g_params['debugfile'] = "%s/debug.log" % (outpath)
    return RunJob(infile, outpath, tmpdir, email, jobid, g_params)


#}}}


def InitGlobalParameter():  #{{{
    g_params = {}
    g_params['isQuiet'] = True
    g_params['runjob_log'] = []
    g_params['runjob_err'] = []
    g_params['isForceRun'] = False
    g_params['base_www_url'] = ""
    return g_params
예제 #4
0
def WriteHTMLTable(
        tablename,
        tabletitle,
        idList,
        pfamDefDict,  #{{{
        datapath,
        topomsapath,
        ordermsapath,
        htmlname,
        outpath,
        fpout):
    numInputID = len(idList)

    ordermsapath = g_params['ordermsapath']
    datapath = g_params['datapath']
    topomsapath = g_params['topomsapath']
    treepath = g_params['treepath']

    print >> fpout, "<a name=\"%s\"></a><h4>%s</h4>" % (tablename, tabletitle)
    print >> fpout, "<table class=\"sortable\" border=1>"
    cntOutputID = 0

    headerItemList = []
    headerItemList.append("No.")
    headerItemList.append("PfamID")
    headerItemList.append("Definition")
    headerItemList.append("numSeq")
    headerItemList.append("numCluster")
    headerItemList.append("Phylo Tree")
    if ordermsapath != "" and os.path.exists(ordermsapath):
        headerItemList.append("Topology MSA ordered according to phylo tree")
    if topomsapath != "" and os.path.exists(topomsapath):
        headerItemList.append("Topology MSA grouped by topology comparison")

    print >> fpout, "<tr>"
    for item in headerItemList:
        print >> fpout, "<th>"
        print >> fpout, item
        print >> fpout, "</th>"
    print >> fpout, "</tr>"

    for i in xrange(numInputID):
        pfamid = idList[i]
        pfamURL = 'http://pfam.sanger.ac.uk/family/' + pfamid
        if pfamid in pfamDefDict:
            pfamDef = pfamDefDict[pfamid]
        else:
            pfamDef = '-'

        topomsafile = datapath + os.sep + pfamid + '.sorted.orig.topomsa.fa'
        if os.path.exists(topomsafile):
            numSeq = myfunc.CountFastaSeq(topomsafile)
        else:
            numSeq = -1
        if numSeq < g_params['MIN_NUMSEQ']:
            continue
        cntOutputID += 1

        print >> fpout, "<tr>"
        #---------------------------
        print >> fpout, '<td>'
        print >> fpout, '%d' % (cntOutputID)
        print >> fpout, '</td>'
        #---------------------------
        print >> fpout, '<td>'
        print >> fpout, '<a href=\"%s\" target=\"_blank\">%s</a>' % (pfamURL,
                                                                     pfamid)
        print >> fpout, '</td>'
        #---------------------------
        print >> fpout, '<td>'
        print >> fpout, '%s' % pfamDef
        print >> fpout, '</td>'
        #---------------------------
        print >> fpout, '<td>'
        print >> fpout, '%d' % numSeq
        print >> fpout, '</td>'
        #---------------------------
        numCluster = -1
        clusteredmsafile = datapath + os.sep + pfamid + '.clustered.orig.topomsa.fa'
        numCluster = GetNumCluster(clusteredmsafile)
        print >> fpout, '<td>'
        if numCluster == -1:
            print >> fpout, '%s' % "-"
        else:
            print >> fpout, '%d' % numCluster

        print >> fpout, '</td>'
        #---------------------------
        ext = '-itol.jpg'
        extpdf = '-itol.pdf'
        print >> fpout, '<td>'
        imageSourceFile = g_params['treepath'] + os.sep + pfamid + ext
        imageSourceFilePDF = g_params['treepath'] + os.sep + pfamid + extpdf
        imageTargetFile = outpath + os.sep + htmlname + os.sep + pfamid + ext
        imageTargetFilePDF = outpath + os.sep + htmlname + os.sep + pfamid + extpdf
        thumbImageSourceFile = g_params[
            'treepath'] + os.sep + 'thumb.' + pfamid + ext
        thumbImageTargetFile = outpath + os.sep + htmlname + os.sep + 'thumb.' + pfamid + ext
        if os.path.exists(imageSourceFile):
            os.system("%s %s %s" %
                      (g_params['CP_EXE'], imageSourceFile, imageTargetFile))
        if os.path.exists(imageSourceFilePDF):
            os.system(
                "%s %s %s" %
                (g_params['CP_EXE'], imageSourceFilePDF, imageTargetFilePDF))
        if os.path.exists(thumbImageSourceFile):
            os.system("%s %s %s" % (g_params['CP_EXE'], thumbImageSourceFile,
                                    thumbImageTargetFile))
        print >> fpout, (
            "<a href=\"%s\"target=\"_blank\">" %
            (htmlname + os.sep + os.path.basename(imageTargetFile)))
        print >> fpout, (
            "<img src=\"%s\">" %
            (htmlname + os.sep + os.path.basename(thumbImageTargetFile)))
        print >> fpout, "</a>"
        print >> fpout, '</td>'
        #---------------------------
        if ordermsapath != "" and os.path.exists(ordermsapath):
            print >> fpout, '<td>'
            ext = '.reordered.topomsa.png'
            imageSourceFile = ordermsapath + os.sep + pfamid + ext
            imageTargetFile = outpath + os.sep + htmlname + os.sep + pfamid + ext
            thumbImageSourceFile = ordermsapath + os.sep + 'thumb.' + pfamid + ext
            thumbImageTargetFile = outpath + os.sep + htmlname + os.sep + 'thumb.' + pfamid + ext
            if os.path.exists(imageSourceFile):
                os.system(
                    "%s %s %s" %
                    (g_params['CP_EXE'], imageSourceFile, imageTargetFile))
            if os.path.exists(thumbImageSourceFile):
                os.system("%s %s %s" %
                          (g_params['CP_EXE'], thumbImageSourceFile,
                           thumbImageTargetFile))
            print >> fpout, (
                "<a href=\"%s\"target=\"_blank\">" %
                (htmlname + os.sep + os.path.basename(imageTargetFile)))
            print >> fpout, (
                "<img src=\"%s\">" %
                (htmlname + os.sep + os.path.basename(thumbImageTargetFile)))
            print >> fpout, "</a>"
            print >> fpout, '</td>'
#---------------------------
        if topomsapath != "" and os.path.exists(topomsapath):
            ext = '.sorted.orig.topomsa.png'
            print >> fpout, '<td>'
            imageSourceFile = topomsapath + os.sep + pfamid + ext
            imageTargetFile = outpath + os.sep + htmlname + os.sep + pfamid + ext
            thumbImageSourceFile = topomsapath + os.sep + 'thumb.' + pfamid + ext
            thumbImageTargetFile = outpath + os.sep + htmlname + os.sep + 'thumb.' + pfamid + ext
            if os.path.exists(imageSourceFile):
                os.system(
                    "%s %s %s" %
                    (g_params['CP_EXE'], imageSourceFile, imageTargetFile))
            if os.path.exists(thumbImageSourceFile):
                os.system("%s %s %s" %
                          (g_params['CP_EXE'], thumbImageSourceFile,
                           thumbImageTargetFile))
            print >> fpout, (
                "<a href=\"%s\"target=\"_blank\">" %
                (htmlname + os.sep + os.path.basename(imageTargetFile)))
            print >> fpout, (
                "<img src=\"%s\">" %
                (htmlname + os.sep + os.path.basename(thumbImageTargetFile)))
            print >> fpout, "</a>"
            print >> fpout, '</td>'
#---------------------------
        print >> fpout, "</tr>"
    print >> fpout, "</table>"
예제 #5
0
def SplitFasta(inFile):  #{{{
    # The faster version
    if 'numsplit' in g_params or g_params['numseq_per_split'] > 1:
        g_params['isNameFileSequentially'] = True
    if 'numsplit' in g_params and g_params['numsplit'] > 1:
        numTotalSeq = myfunc.CountFastaSeq(inFile)
        g_params['numseq_per_split'] = int(
            ceil(numTotalSeq / float(g_params['numsplit'])))
        if g_params['verbose'] >= 1:
            msg = "file %s (with %d sequences) is going to"\
                    "be splitted into %d files"
            print msg % (inFile, numTotalSeq, g_params['numsplit'])

    rootname = os.path.basename(os.path.splitext(inFile)[0])

    cntTotalSeq = 0
    fpout = None
    cntsplit = 0
    cntseq_of_split = 0

    fpin = open(inFile, "r")
    buff = fpin.read(BLOCK_SIZE)
    brokenSeqWithAnnoLine = ""
    ##for the annotation line broken by BLOCK read
    while buff:
        beg = 0
        end = 0
        while 1:
            if brokenSeqWithAnnoLine:
                if brokenSeqWithAnnoLine[len(brokenSeqWithAnnoLine) -
                                         1] == "\n":
                    end = buff.find(">")
                else:
                    end = buff.find("\n>")
                if end >= 0:
                    seqWithAnno = brokenSeqWithAnnoLine + buff[0:end]
                    (cntsplit, cntseq_of_split,
                     fpout) = OutputSplittedSeq(seqWithAnno, rootname,
                                                cntsplit, cntseq_of_split,
                                                fpout)
                    brokenSeqWithAnnoLine = ""
                    cntTotalSeq += 1
                    beg = end
                else:
                    brokenSeqWithAnnoLine += buff
                    break

            beg = buff.find(">", beg)
            end = buff.find("\n>", beg + 1)
            if beg >= 0:
                if end >= 0:
                    seqWithAnno = buff[beg:end]
                    (cntsplit, cntseq_of_split,
                     fpout) = OutputSplittedSeq(seqWithAnno, rootname,
                                                cntsplit, cntseq_of_split,
                                                fpout)
                    cntTotalSeq += 1
                    beg = end
                else:
                    brokenSeqWithAnnoLine = buff[beg:]
                    break
            else:
                break

        buff = fpin.read(BLOCK_SIZE)
    fpin.close()
    if brokenSeqWithAnnoLine:
        seqWithAnno = brokenSeqWithAnnoLine
        (cntsplit, cntseq_of_split,
         fpout) = OutputSplittedSeq(seqWithAnno, rootname, cntsplit,
                                    cntseq_of_split, fpout)
        cntTotalSeq += 1

    return cntTotalSeq