def MatchTopoPairAln(queryTopoFile,alignFile, targetsTopologyFile, fpout):#{{{ # fptmp=open(queryTopoFile); # print fptmp.readlines(); # fptmp.close(); try: (queryID, queryAnnotation, queryTopology) = myfunc.ReadSingleFasta(queryTopoFile); # read in alignment alns = ReadNeedleAlignment(alignFile); # read in topologys (targetIDList, targetAnnotationList, targetTopoList) = myfunc.ReadFasta(targetsTopologyFile); # match and print the result print >> fpout, "#Number of alignments: %d" % len(targetIDList); for i in range (len(targetIDList)): seqID=targetIDList[i]; alnseq1=alns[i]['alnseq1']; alnseq2=alns[i]['alnseq2']; topoaln1=""; topoaln2=""; if seqID != alns[i]['seqid2']: print >> sys.stderr, "seqID does not match, record %d" %i; cnt1=0; cnt2=0; for j in range(len(alnseq1)): if alnseq1[j] != '-': if alnseq2[j] != '-': topoaln1+=queryTopology[cnt1]; topoaln2+=targetTopoList[i][cnt2]; else: topoaln1+=queryTopology[cnt1]; topoaln2+='-'; else: if alnseq2[j] != '-': topoaln1+='-'; topoaln2+=targetTopoList[i][cnt2]; else: topoaln1+='-'; topoaln2+='-'; if alnseq1[j] != '-': cnt1 +=1; if alnseq2[j] != '-': cnt2 += 1; #print the result print >> fpout, "#Topology alignment %d" %( i+1); print >> fpout, ">%s" % queryAnnotation; print >> fpout, "%s" % topoaln1; print >> fpout, ">%s" % targetAnnotationList[i]; print >> fpout, "%s" % topoaln2; print >> fpout; except: print >>sys.stderr, "except for the function:%s"%sys._getframe().f_code.co_name ; raise ; return 0;
def MoveCache_mode_result(outpath_this_seq):#{{{ subfoldername_this_seq = os.path.basename(outpath_this_seq) outpath_this_seq = os.path.realpath(outpath_this_seq) outpath_result = os.path.dirname(outpath_this_seq) fafile = "%s/seq.fa"%(outpath_this_seq) if os.path.exists(fafile): (seqid, seqanno,seq) = myfunc.ReadSingleFasta(fafile) md5_key = hashlib.md5(seq).hexdigest() sub_md5_name = md5_key[:2] sub_cachedir = "%s/%s"%(path_cache, sub_md5_name) cachedir = "%s/%s/%s"%(path_cache, sub_md5_name, md5_key) if not os.path.exists(sub_cachedir): os.makedirs(sub_cachedir) if isChangeOwner: os.chown(sub_cachedir, apacheusername_uid, apacheusername_gid) if not os.path.exists(cachedir): cmd = ["mv","-f", outpath_this_seq, cachedir] cmdline = " ".join(cmd) try: subprocess.check_call(cmd) except CalledProcessError as e: print(e) pass if VERBOSE>0: print(cmdline) else: print("cachedir %s already exists for %s"%(cachedir, outpath_this_seq)) cmd = ["rm","-rf", outpath_this_seq] cmdline = " ".join(cmd) try: subprocess.check_call(cmd) except CalledProcessError as e: print(e) pass if VERBOSE>0: print(cmdline) # create symbolic link to the cache if not os.path.exists(outpath_this_seq) and os.path.exists(cachedir): rela_path = os.path.relpath(cachedir, outpath_result) #relative path try: os.chdir(outpath_result) os.symlink(rela_path, subfoldername_this_seq) if isChangeOwner: os.lchown(subfoldername_this_seq, apacheusername_uid, apacheusername_gid) except: pass if VERBOSE > 0: print(outpath_result, "os.symlink(", rela_path, ",", subfoldername_this_seq,")") else: print("fafile %s does not exist"%(fafile))
def WriteTextResultFile(outfile, outpath_result, maplist, runtime_in_sec, statfile=""):#{{{ try: fpout = open(outfile, "w") fpstat = None numTMPro = 0 if statfile != "": fpstat = open(statfile, "w") cnt = 0 for line in maplist: strs = line.split('\t') subfoldername = strs[0] length = int(strs[1]) desp = strs[2] seq = strs[3] isTMPro = False outpath_this_seq = "%s/%s"%(outpath_result, subfoldername) predfile = "%s/query_topologies.txt"%(outpath_this_seq) g_params['runjob_log'].append("predfile = %s.\n"%(predfile)) if not os.path.exists(predfile): g_params['runjob_log'].append("predfile %s does not exist\n"%(predfile)) (seqid, seqanno, top) = myfunc.ReadSingleFasta(predfile) fpout.write(">%s\n%s\n"%(desp, top)) numTM = myfunc.CountTM(top) if numTM >0: isTMPro = True numTMPro += 1 cnt += 1 if fpstat: out_str_list = ["numTMPro\t%d\n"%(numTMPro)] fpstat.write("%s"%("\n".join(out_str_list))) fpstat.close() except IOError: print "Failed to write to file %s"%(outfile)
def Labeltopologyfastaseq(queryTopoFile, alignFile, fastaFile, fpout): #{{{ # fptmp=open(queryTopoFile); # print fptmp.readlines(); # fptmp.close(); try: (queryID, queryAnnotation, queryTopology) = myfunc.ReadSingleFasta(queryTopoFile) # read in alignment alns = ReadNeedleAlignment(alignFile) # print alns; topologyLabels = GetTopologyLabels(queryTopology, alns) fpin = open(fastaFile, "r") lines = fpin.readlines() fpin.close() i = 0 while i < len(lines): line = lines[i] if line[0] == '>': seqID = myfunc.GetSeqIDFromAnnotation(line) aaSeq = "" fpout.write("%s" % line) i = i + 1 while i < len(lines) and lines[i][0] != '>': fpout.write("%s" % lines[i]) aaSeq += lines[i].strip() i = i + 1 fpout.write("/%s/\n" % topologyLabels[seqID]) if len(aaSeq) != len(topologyLabels[seqID]): print >> sys.stderr, "%s: length not match" % seqID except: print >> sys.stderr, "except for the function:%s" % sys._getframe( ).f_code.co_name raise return 0
def CreateProfile(seqfile, outpath_profile, outpath_result, tmp_outpath_result, timefile, runjob_errfile): #{{{ (seqid, seqanno, seq) = myfunc.ReadSingleFasta(seqfile) subfoldername_profile = os.path.basename(outpath_profile) tmp_outpath_profile = "%s/%s" % (tmp_outpath_result, subfoldername_profile) isSkip = False rmsg = "" if not g_params['isForceRun']: md5_key = hashlib.md5(seq).hexdigest() subfoldername = md5_key[:2] md5_link = "%s/%s/%s" % (path_md5cache, subfoldername, md5_key) if os.path.exists(md5_link): # create a symlink to the cache rela_path = os.path.relpath(md5_link, outpath_result) #relative path os.chdir(outpath_result) os.symlink(rela_path, subfoldername_profile) isSkip = True if not isSkip: # build profiles if not os.path.exists(tmp_outpath_profile): try: os.makedirs(tmp_outpath_profile) except OSError: msg = "Failed to create folder %s" % (tmp_outpath_profile) myfunc.WriteFile(msg + "\n", runjob_errfile, "a") return 1 cmd = [ runscript, "-fasta", seqfile, "-outpath", tmp_outpath_profile, "-only-build-profile" ] g_params['runjob_log'].append(" ".join(cmd)) begin_time = time.time() cmdline = " ".join(cmd) #os.system("%s >> %s 2>&1"%(cmdline, runjob_errfile)) #DEBUG try: rmsg = subprocess.check_output(cmd) g_params['runjob_log'].append("profile_building:\n" + rmsg + "\n") except subprocess.CalledProcessError, e: g_params['runjob_err'].append(str(e) + "\n") g_params['runjob_err'].append("cmdline: " + cmdline + "\n") g_params['runjob_err'].append("profile_building:\n" + rmsg + "\n") pass end_time = time.time() runtime_in_sec = end_time - begin_time msg = "%s\t%f\n" % (subfoldername_profile, runtime_in_sec) myfunc.WriteFile(msg, timefile, "a") if os.path.exists(tmp_outpath_profile): md5_key = hashlib.md5(seq).hexdigest() md5_subfoldername = md5_key[:2] subfolder_profile_cache = "%s/%s" % (path_profile_cache, md5_subfoldername) outpath_profile_cache = "%s/%s" % (subfolder_profile_cache, md5_key) if os.path.exists(outpath_profile_cache): shutil.rmtree(outpath_profile_cache) if not os.path.exists(subfolder_profile_cache): os.makedirs(subfolder_profile_cache) cmd = ["mv", "-f", tmp_outpath_profile, outpath_profile_cache] isCmdSuccess = False try: subprocess.check_output(cmd) isCmdSuccess = True except subprocess.CalledProcessError, e: msg = "Failed to run get profile for the target sequence %s" % ( seq) g_params['runjob_err'].append(msg) g_params['runjob_err'].append(str(e) + "\n") pass if isCmdSuccess and webserver_common.IsFrontEndNode( g_params['base_www_url']): # make zip folder for the cached profile cwd = os.getcwd() os.chdir(subfolder_profile_cache) cmd = ["zip", "-rq", "%s.zip" % (md5_key), md5_key] try: subprocess.check_output(cmd) except subprocess.CalledProcessError, e: g_params['runjob_err'].append(str(e)) pass os.chdir(cwd) # create soft link for profile and for md5 # first create a soft link for outpath_profile to outpath_profile_cache rela_path = os.path.relpath(outpath_profile_cache, outpath_result) #relative path try: os.chdir(outpath_result) os.symlink(rela_path, subfoldername_profile) except: pass # then create a soft link for md5 to outpath_proifle_cache md5_subfolder = "%s/%s" % (path_md5cache, md5_subfoldername) md5_link = "%s/%s/%s" % (path_md5cache, md5_subfoldername, md5_key) if os.path.exists(md5_link): try: os.unlink(md5_link) except: pass if not os.path.exists(md5_subfolder): try: os.makedirs(md5_subfolder) except: pass rela_path = os.path.relpath(outpath_profile_cache, md5_subfolder) #relative path try: os.chdir(md5_subfolder) os.symlink(rela_path, md5_key) except: pass
def DumpPredictionTOPCONS2(seqfile, path_result, outfile, isWriteDG, isWriteRel): #{{{ (seqidlist, seqannolist, seqlist) = myfunc.ReadFasta(seqfile) outfile_fa = "%s.fa" % (outfile) outfile_unfinished_fa = "%s.unfinished.fa" % (outfile) numseq = len(seqidlist) fpout = None try: fpout = open(outfile, "w") except IOError: print >> sys.stderr, "Failed to write to file \"%s\"" % (outfile) return 1 fpout_fa = None try: fpout_fa = open(outfile_fa, "w") except IOError: print >> sys.stderr, "Failed to write to file \"%s\"" % (outfile_fa) return 1 fpout_unfinished_fa = None try: fpout_unfinished_fa = open(outfile_unfinished_fa, "w") except IOError: print >> sys.stderr, "Failed to write to file \"%s\"" % ( outfile_unfinished_fa) return 1 methodlist = [ 'TOPCONS', 'OCTOPUS', 'Philius', 'PolyPhobius', 'SCAMPI', 'SPOCTOPUS', 'Homology' ] cntUnFinished = 0 for iseq in xrange(len(seqidlist)): seq = seqlist[iseq] length = len(seq) desp = seqannolist[iseq] if g_params['resultPathFormat'] == "md5": md5_key2 = hashlib.md5(seq + "\n").hexdigest() md5_key1 = hashlib.md5(seq).hexdigest() subdirname = "seq_%d" % (0) isFound = False for md5_key in [md5_key1, md5_key2]: dir1 = md5_key[:2] dir2 = md5_key[2:4] datapath_this_seq = "%s%s%s%s%s%s%s" % ( path_result, os.sep, dir1, os.sep, dir2, os.sep, md5_key) subdir = "%s/%s" % (datapath_this_seq, subdirname) if os.path.exists(subdir): break else: subdirname = "seq_%d" % (iseq) subdir = "%s/%s" % (path_result, subdirname) if g_params['verbose']: print "subdir = %s" % (subdir) rstfile = "%s/Topcons/topcons.top" % (subdir) if os.path.exists(rstfile): print >> fpout, "Sequence number: %d" % (iseq + 1) print >> fpout, "Sequence name: %s" % (desp) print >> fpout, "Sequence length: %d aa." % (length) print >> fpout, "Sequence:\n%s\n\n" % (seq) topo_consensus = "" for i in xrange(len(methodlist)): method = methodlist[i] seqid = "" seqanno = "" top = "" if method == "TOPCONS": topfile = "%s/%s/topcons.top" % (subdir, "Topcons") elif method == "Philius": topfile = "%s/%s/query.top" % (subdir, "philius") elif method == "SCAMPI": topfile = "%s/%s/query.top" % (subdir, method + "_MSA") else: topfile = "%s/%s/query.top" % (subdir, method) if os.path.exists(topfile): (seqid, seqanno, top) = myfunc.ReadSingleFasta(topfile) else: top = "" if top == "": #top = "***No topology could be produced with this method topfile=%s***"%(topfile) top = "***No topology could be produced with this method***" if method == "TOPCONS": topo_consensus = top if method == "Homology": showtext_homo = method if seqid != "": showtext_homo = seqid print >> fpout, "%s:\n%s\n\n" % (showtext_homo, top) else: print >> fpout, "%s predicted topology:\n%s\n\n" % (method, top) if isWriteDG: dgfile = "%s/dg.txt" % (subdir) dg_content = "" if os.path.exists(dgfile): dg_content = myfunc.ReadFile(dgfile) lines = dg_content.split("\n") dglines = [] for line in lines: if line and line[0].isdigit(): dglines.append(line) if len(dglines) > 0: print >> fpout, "\nPredicted Delta-G-values (kcal/mol) "\ "(left column=sequence position; right column=Delta-G)\n" print >> fpout, "\n".join(dglines) if isWriteRel: reliability_file = "%s/Topcons/reliability.txt" % (subdir) reliability = "" if os.path.exists(reliability_file): reliability = myfunc.ReadFile(reliability_file) if reliability != "": print >> fpout, "\nPredicted TOPCONS reliability (left "\ "column=sequence position; right column=reliability)\n" print >> fpout, reliability print >> fpout, "##############################################################################" # write the concensus prediction in FASTA format print >> fpout_fa, ">%s" % (desp) print >> fpout_fa, topo_consensus else: # write unfinished fpout_unfinished_fa.write(">%s\n%s\n" % (desp, seq)) cntUnFinished += 1 if cntUnFinished > 1: print >> sys.stderr, "%s out of %d sequences are with unfinished predictions, please check." % ( cntUnFinished, numseq) for fp in [fpout, fpout_fa, fpout_unfinished_fa]: if fp: try: fp.close() except IOError: pass return 0
def DumpPredictionTOPCONS2(seqfile, path_result, outfile, isWriteDG, isWriteRel): #{{{ (seqidlist, seqannolist, seqlist) = myfunc.ReadFasta(seqfile) outfile_fa = "%s.fa" % (outfile) fpout = None try: fpout = open(outfile, "w") except IOError: print >> sys.stderr, "Failed to write to file \"%s\"" % (outfile) return 1 fpout_fa = None try: fpout_fa = open(outfile_fa, "w") except IOError: print >> sys.stderr, "Failed to write to file \"%s\"" % (outfile_fa) return 1 methodlist = [ 'TOPCONS', 'OCTOPUS', 'Philius', 'PolyPhobius', 'SCAMPI', 'SPOCTOPUS', 'Homology' ] for i in xrange(len(seqidlist)): subdirname = "seq_%d" % (i) subdir = "%s/%s" % (path_result, subdirname) seq = seqlist[i] length = len(seq) desp = seqannolist[i] print >> fpout, "Sequence number: %d" % (i + 1) print >> fpout, "Sequence name: %s" % (desp) print >> fpout, "Sequence length: %d aa." % (length) print >> fpout, "Sequence:\n%s\n\n" % (seq) topo_consensus = "" for i in xrange(len(methodlist)): method = methodlist[i] seqid = "" seqanno = "" top = "" if method == "TOPCONS": topfile = "%s/%s/topcons.top" % (subdir, "Topcons") elif method == "Philius": topfile = "%s/%s/query.top" % (subdir, "philius") elif method == "SCAMPI": topfile = "%s/%s/query.top" % (subdir, method + "_MSA") else: topfile = "%s/%s/query.top" % (subdir, method) if os.path.exists(topfile): (seqid, seqanno, top) = myfunc.ReadSingleFasta(topfile) else: top = "" if top == "": #top = "***No topology could be produced with this method topfile=%s***"%(topfile) top = "***No topology could be produced with this method***" if method == "TOPCONS": topo_consensus = top if method == "Homology": showtext_homo = method if seqid != "": showtext_homo = seqid print >> fpout, "%s:\n%s\n\n" % (showtext_homo, top) else: print >> fpout, "%s predicted topology:\n%s\n\n" % (method, top) if isWriteDG: dgfile = "%s/dg.txt" % (subdir) dg_content = "" if os.path.exists(dgfile): dg_content = myfunc.ReadFile(dgfile) lines = dg_content.split("\n") dglines = [] for line in lines: if line and line[0].isdigit(): dglines.append(line) if len(dglines) > 0: print >> fpout, "\nPredicted Delta-G-values (kcal/mol) "\ "(left column=sequence position; right column=Delta-G)\n" print >> fpout, "\n".join(dglines) if isWriteRel: reliability_file = "%s/Topcons/reliability.txt" % (subdir) reliability = "" if os.path.exists(reliability_file): reliability = myfunc.ReadFile(reliability_file) if reliability != "": print >> fpout, "\nPredicted TOPCONS reliability (left "\ "column=sequence position; right column=reliability)\n" print >> fpout, reliability print >> fpout, "##############################################################################" # write the concensus prediction in FASTA format print >> fpout_fa, ">%s" % (desp) print >> fpout_fa, topo_consensus if fpout: try: fpout.close() except IOError: pass if fpout_fa: try: fpout_fa.close() except IOError: pass return 0