def compare_motif(motif_filename1, motif_filename2): cons1, rf1 = furnish_motif(*read_motif(motif_filename1)) cons2, rf2 = furnish_motif(*read_motif(motif_filename2)) # remove gaps and furnish consensus so it's just ( ) . pr1 = findPRs(cons1)[0] pr2 = findPRs(cons2)[0] #print cons1, rf1, pr1 #print cons2, rf2, pr2 return test2(rf1,rf2,cons1,cons2,pr1,pr2,'XV','VX',2000,True,['junk1'],['junk2'])
def main(): handles = {} os.chdir(MOTIF_DIR) for d in os.listdir(os.path.curdir): fam = dir_info[d]['family'] if fam not in handles: f = open(os.path.join(os.pardir, STORE_DIR, fam+'.SS_cons.compare.txt'), 'w+') handles[fam] = f handles[fam].write(">{fam}\n{ss_cons}\n".format(fam=fam, ss_cons=db_summary[fam]['SS_CONS'])) motif_info = {} for file in glob.iglob(d + "/*.cmsearched"): motif_name = os.path.splitext(os.path.basename(file))[0] print >> sys.stderr, "extracting SS_cons from {0}....".format(motif_name) # CMFinder doesn't have the #=GC RF line, so just make a fake XXXX... string # since all we want is a furnished SS_cons ss_cons = miscCMF.read_motif(os.path.join(d, motif_name))[0] ss_cons = miscCMF.furnish_motif(ss_cons, 'X'*len(ss_cons))[0] handles[fam].write(">{motif}\n{ss_cons}\n".format(\ motif=motif_name,\ ss_cons=ss_cons))
def calc_MCC(filename, fam, output_prefix, db_summary, score_cutoff=0.): """ Read through the evaled file, and writes out to <output_prefix>.txt --- (per line) motif_name, rank, pscore, tp(count), fp, fn, tn, MCC, home-made-TP/FP score """ VARNA_APPLET_NAME = "VARNAv37.jar" COL_PER_VARNA = 2 # number of cols for Varna-applet motif drawing HEIGHT_PER_ROW_VARNA = 400 # per motif drawing height (px) WIDTH_VARNA = 1200 # width of the html page for Varna (px) import math import miscCMF f_html = open(output_prefix+'.html', 'w') f_out = open(output_prefix+'.txt', 'w') f_out.write("MOTIF\tRANK\tPSCORE\tTP\tFP\tFN\tTN\tMCC\tMyScore\n") chunk_to_write = [] # list of (MCC, chunk_dict) with open(filename) as f: i = 0 for line in f: i += 1 raw = line.strip().split('\t') if len(raw) == 6: motif_name, rank, pscore, tp_count, tps, fps = raw elif len(raw) == 5: motif_name, rank, pscore, tp_count, tps = raw fps = '' elif len(raw) == 4: motif_name, rank, pscore, tp_count = raw tps = fps = '' else: raise ValueError, "wacky!!! {0}".format(raw) tps = map(float, tps.split(',')) if len(tps) > 0 else [] fps = map(float, fps.split(',')) if len(fps) > 0 else [] TP = len(filter(lambda x: x >= score_cutoff, tps)) # the rest are FN FP = len(filter(lambda x: x >= score_cutoff, fps)) # the rest are TN FN = db_summary[fam]['TRUE'] - TP TN = db_summary[fam]['CONTROLS'] - FP MCC = ((TP * TN) - (FP * FN))*1. / max(1, math.sqrt((TP + FP) * (TP + FN) * (TN + FP) * (TN + FN))) myscore = calc_CMscan_separation_score(tps, fps) / max(1., TP) f_out.write(str(motif_name) + '\t' + str(rank) + '\t' + str(pscore) + '\t' + str(TP) + '\t' + str(FP) + '\t' + str(FN) + '\t' + str(TN) + '\t' + str(MCC) + '\t' + str(myscore) + '\n') ind = motif_name.find('.') if ind > 0: motif_filename = "motifs/{0}/{1}".format(motif_name[:ind], motif_name) else: motif_filename = "motifs/{0}/{0}".format(motif_name) cons, rf = miscCMF.furnish_motif(*miscCMF.read_motif(motif_filename)) chunk_to_write.append( (MCC, {'motif_name':motif_name, 'cons':cons, 'rf': rf}) ) # sort chunk_to_write by decreasing order of MCC chunk_to_write.sort(key=lambda x: x[0], reverse=True) N = len(chunk_to_write) # ---------------------- VARNA APPLET HTML WRITING ---------------------- # rows = N / COL_PER_VARNA + (N % COL_PER_VARNA > 0) f_html.write(""" <applet code="VARNA.class" codebase="." archive="{varna}" width="{width}" height="{height}"> <param name="rows" value="{rows}" /> <param name="columns" value="{columns}" /> """.format(\ varna=VARNA_APPLET_NAME,\ width=WIDTH_VARNA,\ height=rows*HEIGHT_PER_ROW_VARNA,\ columns=COL_PER_VARNA,\ rows=rows\ )) # remember sequence/struuctureDBN<i> has to be 1-based! # so must i+1 when using enumerate for i, (MCC, chunk_dict) in enumerate(chunk_to_write): f_html.write(""" <param name="sequenceDBN{i}" value="{rf}" /> <param name="structureDBN{i}" value="{cons}" /> <param name="titleSize{i}" value="12" /> <param name="title{i}" value="{motif_name}(MCC {MCC:.2f})" /> """.format(\ i=i+1,\ cons=chunk_dict['cons'],\ rf=chunk_dict['rf'],\ motif_name=chunk_dict['motif_name'],\ MCC=MCC\ )) f_html.write("</applet>\n")