def generate_interval(): train = DataSet('cf/Test/test_rand1.csv') columns = ['id','content_id','class_name','start','end','timespan','user_id'] user_dict = to_dict_byUser(train, columns, -1) timespliter = TimeSpliter(user_dict) time_tag = timespliter.tag_all_user() myPickle.save(time_tag, 'test_user_time_all.pkl')
def main(): i = 0 during_dict = {} for line in open('VideoInfo.csv', 'r'): if i <= 2: i += 1 continue if len(line.split(',')) < 26: continue content_name = line.split(',')[1] during = line.split(',')[8] during_dict.setdefault(content_name, during) during_dict['V0001008060161'] = 2659 during_dict['V0001008060159'] = 2615 during_dict['V0001008060162'] = 2688 during_dict['V0001008060163'] = 2666 during_dict['V0001008060165'] = 2608 during_dict['V0001008060166'] = 2672 during_dict['V0001008100071'] = 2627 during_dict['V0001008100072'] = 2642 during_dict['V1401003030026'] = 2767 myPickle.save(during_dict, 'video.pkl')
for p in range(1,nprocs): (DNTP_p,dsA_p,LV_p,LVP_p,TPS_p)=comm.recv(source=p) dsAr.append(dsA_p) DNTP.extend(DNTP_p) LV.extend(LV_p) LVP.extend(LVP_p) TPS.extend(TPS_p) dsA=mergeDicts(dsAr) print 'Number of complexes',len(dsA) #print 'Complex wise AUC = ',np.mean(dA.values()) p12=map(list,zip(*dsA.values()));pa=p12[0];p1=p12[1];p2=p12[2];ps=p1;ps.extend(p2); print 'Complex Wise AUC =',np.mean(pa),'Protein Wise AUC =',np.mean(ps) if not auconly: (fplv,tplv,auclv)=roc.roc_VA(LV) (fplvp,tplvp,auclvp)=roc.roc_VA(LVP) mkl.save(ofname,((fplv,tplv,auclv),(fplvp,tplvp,auclvp))) print "AUC = ",auclv """ plt.hist(np.array(DNTP).flatten(),[0,1,2,3,4,5,6,1000],cumulative=True);plt.grid();plt.xlabel('sequence distance');plt.ylabel('counts');plt.title('Number of top 200 predictions vs. sequence distance from nearest true positive');plt.show() [np.sum(dn<2.0) for dn in DNTP] cids=[getFileParts(getFileParts(ifile)[1])[1] for ifile in fs] [dsA[cid] for cid in cids] [dAo[cid] for cid in cids] """ #DISTRIBUTION PLOT dthresh=[0,1,2,3,4] # sequence distance threshold XX=calcRFPP(np.array(TPS)[:,1]+1,DNTP,dthresh=dthresh) if doplot: plt.figure();plt.plot(fplv,tplv);plt.xlabel('FP');plt.ylabel('TP');plt.grid();plt.title('ROC Curve: AUC = %1.2f' % (auclv*100)) plt.figure();plt.boxplot(tuple(XX),bootstrap=1000,positions=dthresh);plt.xlabel('Sequence Distance (D) from a TP'); plt.ylabel('Minimum rank of a prediction within distance D of a TP' );plt.title('Results of soft sequence distance threshold');plt.grid();plt.yticks(range(0,201,10)); plt.show()
TPS.extend(TPS_p) dsA = mergeDicts(dsAr) print 'Number of complexes', len(dsA) #print 'Complex wise AUC = ',np.mean(dA.values()) p12 = map(list, zip(*dsA.values())) pa = p12[0] p1 = p12[1] p2 = p12[2] ps = p1 ps.extend(p2) print 'Complex Wise AUC =', np.mean(pa), 'Protein Wise AUC =', np.mean( ps) if not auconly: (fplv, tplv, auclv) = roc.roc_VA(LV) (fplvp, tplvp, auclvp) = roc.roc_VA(LVP) mkl.save(ofname, ((fplv, tplv, auclv), (fplvp, tplvp, auclvp))) print "AUC = ", auclv """ plt.hist(np.array(DNTP).flatten(),[0,1,2,3,4,5,6,1000],cumulative=True);plt.grid();plt.xlabel('sequence distance');plt.ylabel('counts');plt.title('Number of top 200 predictions vs. sequence distance from nearest true positive');plt.show() [np.sum(dn<2.0) for dn in DNTP] cids=[getFileParts(getFileParts(ifile)[1])[1] for ifile in fs] [dsA[cid] for cid in cids] [dAo[cid] for cid in cids] """ #DISTRIBUTION PLOT dthresh = [0, 1, 2, 3, 4] # sequence distance threshold XX = calcRFPP(np.array(TPS)[:, 1] + 1, DNTP, dthresh=dthresh) if doplot: plt.figure() plt.plot(fplv, tplv) plt.xlabel('FP')