예제 #1
0
def MatchTopoPairAln(queryTopoFile,alignFile, targetsTopologyFile, fpout):#{{{
#     fptmp=open(queryTopoFile);
#     print fptmp.readlines();
#     fptmp.close();
    try:
        (queryID, queryAnnotation, queryTopology) = myfunc.ReadSingleFasta(queryTopoFile);
        # read in alignment
        alns = ReadNeedleAlignment(alignFile);

        # read in topologys
        (targetIDList, targetAnnotationList, targetTopoList) = myfunc.ReadFasta(targetsTopologyFile);

        # match and print the result
        print >> fpout, "#Number of alignments: %d" % len(targetIDList);

        for i in range (len(targetIDList)):
            seqID=targetIDList[i];
            alnseq1=alns[i]['alnseq1'];
            alnseq2=alns[i]['alnseq2'];
            topoaln1="";
            topoaln2="";

            if seqID != alns[i]['seqid2']:
                print >> sys.stderr, "seqID does not match, record %d" %i;

            cnt1=0;
            cnt2=0;
            for j in range(len(alnseq1)):
                if alnseq1[j] != '-':
                    if alnseq2[j] != '-':
                        topoaln1+=queryTopology[cnt1];
                        topoaln2+=targetTopoList[i][cnt2];
                    else:
                        topoaln1+=queryTopology[cnt1];
                        topoaln2+='-';
                else:
                    if alnseq2[j] != '-':
                        topoaln1+='-';
                        topoaln2+=targetTopoList[i][cnt2];
                    else:
                        topoaln1+='-';
                        topoaln2+='-';
                if alnseq1[j] != '-':
                    cnt1 +=1;
                if alnseq2[j] != '-':
                    cnt2 += 1;
            #print the result
            print >> fpout, "#Topology alignment %d" %( i+1);
            print >> fpout, ">%s" % queryAnnotation;
            print >> fpout, "%s" % topoaln1;
            print >> fpout, ">%s" % targetAnnotationList[i];
            print >> fpout, "%s" % topoaln2;
            print >> fpout;
    except: 
        print >>sys.stderr, "except for the function:%s"%sys._getframe().f_code.co_name ;
        raise ;
    return 0;
예제 #2
0
def MoveCache_mode_result(outpath_this_seq):#{{{
    subfoldername_this_seq = os.path.basename(outpath_this_seq)
    outpath_this_seq = os.path.realpath(outpath_this_seq)
    outpath_result = os.path.dirname(outpath_this_seq)
    fafile = "%s/seq.fa"%(outpath_this_seq)
    if os.path.exists(fafile):
        (seqid, seqanno,seq) = myfunc.ReadSingleFasta(fafile)
        md5_key = hashlib.md5(seq).hexdigest()
        sub_md5_name = md5_key[:2]
        sub_cachedir = "%s/%s"%(path_cache, sub_md5_name)
        cachedir = "%s/%s/%s"%(path_cache, sub_md5_name, md5_key)
        if not os.path.exists(sub_cachedir):
            os.makedirs(sub_cachedir)
            if isChangeOwner:
                os.chown(sub_cachedir, apacheusername_uid, apacheusername_gid)
        if not os.path.exists(cachedir):
            cmd = ["mv","-f", outpath_this_seq, cachedir]
            cmdline = " ".join(cmd)
            try:
                subprocess.check_call(cmd)
            except CalledProcessError as e:
                print(e)
                pass
            if VERBOSE>0:
                print(cmdline)
        else:
            print("cachedir %s already exists for %s"%(cachedir, outpath_this_seq))
            cmd = ["rm","-rf", outpath_this_seq]
            cmdline = " ".join(cmd)
            try:
                subprocess.check_call(cmd)
            except CalledProcessError as e:
                print(e)
                pass
            if VERBOSE>0:
                print(cmdline)

        # create symbolic link to the cache
        if not os.path.exists(outpath_this_seq) and os.path.exists(cachedir):
            rela_path = os.path.relpath(cachedir, outpath_result) #relative path
            try:
                os.chdir(outpath_result)
                os.symlink(rela_path,  subfoldername_this_seq)
                if isChangeOwner:
                    os.lchown(subfoldername_this_seq, apacheusername_uid, apacheusername_gid)
            except:
                pass
            if VERBOSE > 0:
                print(outpath_result, "os.symlink(", rela_path, ",", subfoldername_this_seq,")")
    else:
        print("fafile %s does not exist"%(fafile))
예제 #3
0
def WriteTextResultFile(outfile, outpath_result, maplist, runtime_in_sec, statfile=""):#{{{
    try:
        fpout = open(outfile, "w")

        fpstat = None
        numTMPro = 0

        if statfile != "":
            fpstat = open(statfile, "w")

        cnt = 0
        for line in maplist:
            strs = line.split('\t')
            subfoldername = strs[0]
            length = int(strs[1])
            desp = strs[2]
            seq = strs[3]
            isTMPro = False
            outpath_this_seq = "%s/%s"%(outpath_result, subfoldername)
            predfile = "%s/query_topologies.txt"%(outpath_this_seq)
            g_params['runjob_log'].append("predfile =  %s.\n"%(predfile))
            if not os.path.exists(predfile):
                g_params['runjob_log'].append("predfile %s does not exist\n"%(predfile))
            (seqid, seqanno, top) = myfunc.ReadSingleFasta(predfile)
            fpout.write(">%s\n%s\n"%(desp, top))
            numTM = myfunc.CountTM(top)
            if numTM >0:
                isTMPro = True
                numTMPro += 1

            cnt += 1

        if fpstat:
            out_str_list = ["numTMPro\t%d\n"%(numTMPro)]
            fpstat.write("%s"%("\n".join(out_str_list)))
            fpstat.close()
    except IOError:
        print "Failed to write to file %s"%(outfile)
def Labeltopologyfastaseq(queryTopoFile, alignFile, fastaFile, fpout):  #{{{
    #     fptmp=open(queryTopoFile);
    #     print fptmp.readlines();
    #     fptmp.close();
    try:
        (queryID, queryAnnotation,
         queryTopology) = myfunc.ReadSingleFasta(queryTopoFile)
        # read in alignment
        alns = ReadNeedleAlignment(alignFile)
        #         print alns;
        topologyLabels = GetTopologyLabels(queryTopology, alns)

        fpin = open(fastaFile, "r")
        lines = fpin.readlines()
        fpin.close()

        i = 0
        while i < len(lines):
            line = lines[i]
            if line[0] == '>':
                seqID = myfunc.GetSeqIDFromAnnotation(line)
                aaSeq = ""
                fpout.write("%s" % line)
                i = i + 1
                while i < len(lines) and lines[i][0] != '>':
                    fpout.write("%s" % lines[i])
                    aaSeq += lines[i].strip()
                    i = i + 1
                fpout.write("/%s/\n" % topologyLabels[seqID])
                if len(aaSeq) != len(topologyLabels[seqID]):
                    print >> sys.stderr, "%s: length not match" % seqID
    except:
        print >> sys.stderr, "except for the function:%s" % sys._getframe(
        ).f_code.co_name
        raise
    return 0
예제 #5
0
def CreateProfile(seqfile, outpath_profile, outpath_result, tmp_outpath_result,
                  timefile, runjob_errfile):  #{{{
    (seqid, seqanno, seq) = myfunc.ReadSingleFasta(seqfile)
    subfoldername_profile = os.path.basename(outpath_profile)
    tmp_outpath_profile = "%s/%s" % (tmp_outpath_result, subfoldername_profile)
    isSkip = False
    rmsg = ""
    if not g_params['isForceRun']:
        md5_key = hashlib.md5(seq).hexdigest()
        subfoldername = md5_key[:2]
        md5_link = "%s/%s/%s" % (path_md5cache, subfoldername, md5_key)
        if os.path.exists(md5_link):
            # create a symlink to the cache
            rela_path = os.path.relpath(md5_link,
                                        outpath_result)  #relative path
            os.chdir(outpath_result)
            os.symlink(rela_path, subfoldername_profile)
            isSkip = True
    if not isSkip:
        # build profiles
        if not os.path.exists(tmp_outpath_profile):
            try:
                os.makedirs(tmp_outpath_profile)
            except OSError:
                msg = "Failed to create folder %s" % (tmp_outpath_profile)
                myfunc.WriteFile(msg + "\n", runjob_errfile, "a")
                return 1
        cmd = [
            runscript, "-fasta", seqfile, "-outpath", tmp_outpath_profile,
            "-only-build-profile"
        ]
        g_params['runjob_log'].append(" ".join(cmd))
        begin_time = time.time()
        cmdline = " ".join(cmd)
        #os.system("%s >> %s 2>&1"%(cmdline, runjob_errfile)) #DEBUG
        try:
            rmsg = subprocess.check_output(cmd)
            g_params['runjob_log'].append("profile_building:\n" + rmsg + "\n")
        except subprocess.CalledProcessError, e:
            g_params['runjob_err'].append(str(e) + "\n")
            g_params['runjob_err'].append("cmdline: " + cmdline + "\n")
            g_params['runjob_err'].append("profile_building:\n" + rmsg + "\n")
            pass
        end_time = time.time()
        runtime_in_sec = end_time - begin_time
        msg = "%s\t%f\n" % (subfoldername_profile, runtime_in_sec)
        myfunc.WriteFile(msg, timefile, "a")

        if os.path.exists(tmp_outpath_profile):
            md5_key = hashlib.md5(seq).hexdigest()
            md5_subfoldername = md5_key[:2]
            subfolder_profile_cache = "%s/%s" % (path_profile_cache,
                                                 md5_subfoldername)
            outpath_profile_cache = "%s/%s" % (subfolder_profile_cache,
                                               md5_key)
            if os.path.exists(outpath_profile_cache):
                shutil.rmtree(outpath_profile_cache)
            if not os.path.exists(subfolder_profile_cache):
                os.makedirs(subfolder_profile_cache)
            cmd = ["mv", "-f", tmp_outpath_profile, outpath_profile_cache]
            isCmdSuccess = False
            try:
                subprocess.check_output(cmd)
                isCmdSuccess = True
            except subprocess.CalledProcessError, e:
                msg = "Failed to run get profile for the target sequence %s" % (
                    seq)
                g_params['runjob_err'].append(msg)
                g_params['runjob_err'].append(str(e) + "\n")
                pass

            if isCmdSuccess and webserver_common.IsFrontEndNode(
                    g_params['base_www_url']):

                # make zip folder for the cached profile
                cwd = os.getcwd()
                os.chdir(subfolder_profile_cache)
                cmd = ["zip", "-rq", "%s.zip" % (md5_key), md5_key]
                try:
                    subprocess.check_output(cmd)
                except subprocess.CalledProcessError, e:
                    g_params['runjob_err'].append(str(e))
                    pass
                os.chdir(cwd)

                # create soft link for profile and for md5
                # first create a soft link for outpath_profile to outpath_profile_cache
                rela_path = os.path.relpath(outpath_profile_cache,
                                            outpath_result)  #relative path
                try:
                    os.chdir(outpath_result)
                    os.symlink(rela_path, subfoldername_profile)
                except:
                    pass

                # then create a soft link for md5 to outpath_proifle_cache
                md5_subfolder = "%s/%s" % (path_md5cache, md5_subfoldername)
                md5_link = "%s/%s/%s" % (path_md5cache, md5_subfoldername,
                                         md5_key)
                if os.path.exists(md5_link):
                    try:
                        os.unlink(md5_link)
                    except:
                        pass
                if not os.path.exists(md5_subfolder):
                    try:
                        os.makedirs(md5_subfolder)
                    except:
                        pass

                rela_path = os.path.relpath(outpath_profile_cache,
                                            md5_subfolder)  #relative path
                try:
                    os.chdir(md5_subfolder)
                    os.symlink(rela_path, md5_key)
                except:
                    pass
예제 #6
0
def DumpPredictionTOPCONS2(seqfile, path_result, outfile, isWriteDG,
                           isWriteRel):  #{{{
    (seqidlist, seqannolist, seqlist) = myfunc.ReadFasta(seqfile)
    outfile_fa = "%s.fa" % (outfile)
    outfile_unfinished_fa = "%s.unfinished.fa" % (outfile)
    numseq = len(seqidlist)

    fpout = None
    try:
        fpout = open(outfile, "w")
    except IOError:
        print >> sys.stderr, "Failed to write to file \"%s\"" % (outfile)
        return 1

    fpout_fa = None
    try:
        fpout_fa = open(outfile_fa, "w")
    except IOError:
        print >> sys.stderr, "Failed to write to file \"%s\"" % (outfile_fa)
        return 1

    fpout_unfinished_fa = None
    try:
        fpout_unfinished_fa = open(outfile_unfinished_fa, "w")
    except IOError:
        print >> sys.stderr, "Failed to write to file \"%s\"" % (
            outfile_unfinished_fa)
        return 1

    methodlist = [
        'TOPCONS', 'OCTOPUS', 'Philius', 'PolyPhobius', 'SCAMPI', 'SPOCTOPUS',
        'Homology'
    ]

    cntUnFinished = 0
    for iseq in xrange(len(seqidlist)):
        seq = seqlist[iseq]
        length = len(seq)
        desp = seqannolist[iseq]
        if g_params['resultPathFormat'] == "md5":
            md5_key2 = hashlib.md5(seq + "\n").hexdigest()
            md5_key1 = hashlib.md5(seq).hexdigest()
            subdirname = "seq_%d" % (0)
            isFound = False
            for md5_key in [md5_key1, md5_key2]:
                dir1 = md5_key[:2]
                dir2 = md5_key[2:4]
                datapath_this_seq = "%s%s%s%s%s%s%s" % (
                    path_result, os.sep, dir1, os.sep, dir2, os.sep, md5_key)
                subdir = "%s/%s" % (datapath_this_seq, subdirname)
                if os.path.exists(subdir):
                    break
        else:
            subdirname = "seq_%d" % (iseq)
            subdir = "%s/%s" % (path_result, subdirname)

        if g_params['verbose']:
            print "subdir = %s" % (subdir)

        rstfile = "%s/Topcons/topcons.top" % (subdir)
        if os.path.exists(rstfile):
            print >> fpout, "Sequence number: %d" % (iseq + 1)
            print >> fpout, "Sequence name: %s" % (desp)
            print >> fpout, "Sequence length: %d aa." % (length)
            print >> fpout, "Sequence:\n%s\n\n" % (seq)
            topo_consensus = ""
            for i in xrange(len(methodlist)):
                method = methodlist[i]
                seqid = ""
                seqanno = ""
                top = ""
                if method == "TOPCONS":
                    topfile = "%s/%s/topcons.top" % (subdir, "Topcons")
                elif method == "Philius":
                    topfile = "%s/%s/query.top" % (subdir, "philius")
                elif method == "SCAMPI":
                    topfile = "%s/%s/query.top" % (subdir, method + "_MSA")
                else:
                    topfile = "%s/%s/query.top" % (subdir, method)
                if os.path.exists(topfile):
                    (seqid, seqanno, top) = myfunc.ReadSingleFasta(topfile)
                else:
                    top = ""
                if top == "":
                    #top = "***No topology could be produced with this method topfile=%s***"%(topfile)
                    top = "***No topology could be produced with this method***"

                if method == "TOPCONS":
                    topo_consensus = top

                if method == "Homology":
                    showtext_homo = method
                    if seqid != "":
                        showtext_homo = seqid
                    print >> fpout, "%s:\n%s\n\n" % (showtext_homo, top)
                else:
                    print >> fpout, "%s predicted topology:\n%s\n\n" % (method,
                                                                        top)

            if isWriteDG:
                dgfile = "%s/dg.txt" % (subdir)
                dg_content = ""
                if os.path.exists(dgfile):
                    dg_content = myfunc.ReadFile(dgfile)
                lines = dg_content.split("\n")
                dglines = []
                for line in lines:
                    if line and line[0].isdigit():
                        dglines.append(line)
                if len(dglines) > 0:
                    print >> fpout,  "\nPredicted Delta-G-values (kcal/mol) "\
                            "(left column=sequence position; right column=Delta-G)\n"
                    print >> fpout, "\n".join(dglines)

            if isWriteRel:
                reliability_file = "%s/Topcons/reliability.txt" % (subdir)
                reliability = ""
                if os.path.exists(reliability_file):
                    reliability = myfunc.ReadFile(reliability_file)
                if reliability != "":
                    print >> fpout, "\nPredicted TOPCONS reliability (left "\
                            "column=sequence position; right column=reliability)\n"
                    print >> fpout, reliability

            print >> fpout, "##############################################################################"

            # write the concensus prediction in FASTA format
            print >> fpout_fa, ">%s" % (desp)
            print >> fpout_fa, topo_consensus

        else:
            # write unfinished
            fpout_unfinished_fa.write(">%s\n%s\n" % (desp, seq))
            cntUnFinished += 1

    if cntUnFinished > 1:
        print >> sys.stderr, "%s out of %d sequences are with unfinished predictions, please check." % (
            cntUnFinished, numseq)

    for fp in [fpout, fpout_fa, fpout_unfinished_fa]:
        if fp:
            try:
                fp.close()
            except IOError:
                pass

    return 0
예제 #7
0
def DumpPredictionTOPCONS2(seqfile, path_result, outfile, isWriteDG,
                           isWriteRel):  #{{{
    (seqidlist, seqannolist, seqlist) = myfunc.ReadFasta(seqfile)
    outfile_fa = "%s.fa" % (outfile)

    fpout = None
    try:
        fpout = open(outfile, "w")
    except IOError:
        print >> sys.stderr, "Failed to write to file \"%s\"" % (outfile)
        return 1

    fpout_fa = None
    try:
        fpout_fa = open(outfile_fa, "w")
    except IOError:
        print >> sys.stderr, "Failed to write to file \"%s\"" % (outfile_fa)
        return 1

    methodlist = [
        'TOPCONS', 'OCTOPUS', 'Philius', 'PolyPhobius', 'SCAMPI', 'SPOCTOPUS',
        'Homology'
    ]

    for i in xrange(len(seqidlist)):
        subdirname = "seq_%d" % (i)
        subdir = "%s/%s" % (path_result, subdirname)
        seq = seqlist[i]
        length = len(seq)
        desp = seqannolist[i]
        print >> fpout, "Sequence number: %d" % (i + 1)
        print >> fpout, "Sequence name: %s" % (desp)
        print >> fpout, "Sequence length: %d aa." % (length)
        print >> fpout, "Sequence:\n%s\n\n" % (seq)
        topo_consensus = ""
        for i in xrange(len(methodlist)):
            method = methodlist[i]
            seqid = ""
            seqanno = ""
            top = ""
            if method == "TOPCONS":
                topfile = "%s/%s/topcons.top" % (subdir, "Topcons")
            elif method == "Philius":
                topfile = "%s/%s/query.top" % (subdir, "philius")
            elif method == "SCAMPI":
                topfile = "%s/%s/query.top" % (subdir, method + "_MSA")
            else:
                topfile = "%s/%s/query.top" % (subdir, method)
            if os.path.exists(topfile):
                (seqid, seqanno, top) = myfunc.ReadSingleFasta(topfile)
            else:
                top = ""
            if top == "":
                #top = "***No topology could be produced with this method topfile=%s***"%(topfile)
                top = "***No topology could be produced with this method***"

            if method == "TOPCONS":
                topo_consensus = top

            if method == "Homology":
                showtext_homo = method
                if seqid != "":
                    showtext_homo = seqid
                print >> fpout, "%s:\n%s\n\n" % (showtext_homo, top)
            else:
                print >> fpout, "%s predicted topology:\n%s\n\n" % (method,
                                                                    top)

        if isWriteDG:
            dgfile = "%s/dg.txt" % (subdir)
            dg_content = ""
            if os.path.exists(dgfile):
                dg_content = myfunc.ReadFile(dgfile)
            lines = dg_content.split("\n")
            dglines = []
            for line in lines:
                if line and line[0].isdigit():
                    dglines.append(line)
            if len(dglines) > 0:
                print >> fpout,  "\nPredicted Delta-G-values (kcal/mol) "\
                        "(left column=sequence position; right column=Delta-G)\n"
                print >> fpout, "\n".join(dglines)

        if isWriteRel:
            reliability_file = "%s/Topcons/reliability.txt" % (subdir)
            reliability = ""
            if os.path.exists(reliability_file):
                reliability = myfunc.ReadFile(reliability_file)
            if reliability != "":
                print >> fpout, "\nPredicted TOPCONS reliability (left "\
                        "column=sequence position; right column=reliability)\n"
                print >> fpout, reliability

        print >> fpout, "##############################################################################"

        # write the concensus prediction in FASTA format
        print >> fpout_fa, ">%s" % (desp)
        print >> fpout_fa, topo_consensus

    if fpout:
        try:
            fpout.close()
        except IOError:
            pass
    if fpout_fa:
        try:
            fpout_fa.close()
        except IOError:
            pass

    return 0