Example #1
0
def generate_interval():
    train = DataSet('cf/Test/test_rand1.csv')
    columns = ['id','content_id','class_name','start','end','timespan','user_id']
    user_dict = to_dict_byUser(train, columns, -1)
    timespliter = TimeSpliter(user_dict)
    time_tag = timespliter.tag_all_user()
    myPickle.save(time_tag, 'test_user_time_all.pkl')
Example #2
0
def main():
    i = 0
    during_dict = {}
    for line in open('VideoInfo.csv', 'r'):
        if i <= 2:
            i += 1
            continue
        if len(line.split(',')) < 26:
            continue
        content_name = line.split(',')[1]
        during = line.split(',')[8]
        during_dict.setdefault(content_name, during)
    during_dict['V0001008060161'] = 2659
    during_dict['V0001008060159'] = 2615
    during_dict['V0001008060162'] = 2688
    during_dict['V0001008060163'] = 2666
    during_dict['V0001008060165'] = 2608
    during_dict['V0001008060166'] = 2672
    during_dict['V0001008100071'] = 2627
    during_dict['V0001008100072'] = 2642
    during_dict['V1401003030026'] = 2767
    myPickle.save(during_dict, 'video.pkl')
Example #3
0
 for p in range(1,nprocs):
     (DNTP_p,dsA_p,LV_p,LVP_p,TPS_p)=comm.recv(source=p)
     dsAr.append(dsA_p)
     DNTP.extend(DNTP_p)
     LV.extend(LV_p)
     LVP.extend(LVP_p)
     TPS.extend(TPS_p)
 dsA=mergeDicts(dsAr)
 print 'Number of complexes',len(dsA)
 #print 'Complex wise AUC = ',np.mean(dA.values())
 p12=map(list,zip(*dsA.values()));pa=p12[0];p1=p12[1];p2=p12[2];ps=p1;ps.extend(p2);
 print 'Complex Wise AUC =',np.mean(pa),'Protein Wise AUC =',np.mean(ps)  
 if not auconly:
     (fplv,tplv,auclv)=roc.roc_VA(LV) 
     (fplvp,tplvp,auclvp)=roc.roc_VA(LVP) 
     mkl.save(ofname,((fplv,tplv,auclv),(fplvp,tplvp,auclvp)))
     print "AUC = ",auclv
     """        
         plt.hist(np.array(DNTP).flatten(),[0,1,2,3,4,5,6,1000],cumulative=True);plt.grid();plt.xlabel('sequence distance');plt.ylabel('counts');plt.title('Number of top 200 predictions vs. sequence distance from nearest true positive');plt.show()
         [np.sum(dn<2.0) for dn in DNTP]
         cids=[getFileParts(getFileParts(ifile)[1])[1] for ifile in fs]
         [dsA[cid] for cid in cids]
         [dAo[cid] for cid in cids]
     """ 
     #DISTRIBUTION PLOT
     dthresh=[0,1,2,3,4] # sequence distance threshold    
     XX=calcRFPP(np.array(TPS)[:,1]+1,DNTP,dthresh=dthresh)
     if doplot:
         plt.figure();plt.plot(fplv,tplv);plt.xlabel('FP');plt.ylabel('TP');plt.grid();plt.title('ROC Curve: AUC =  %1.2f' % (auclv*100))
         plt.figure();plt.boxplot(tuple(XX),bootstrap=1000,positions=dthresh);plt.xlabel('Sequence Distance (D) from a TP'); plt.ylabel('Minimum rank of a prediction within distance D of a TP' );plt.title('Results of soft sequence distance threshold');plt.grid();plt.yticks(range(0,201,10));
         plt.show() 
Example #4
0
     TPS.extend(TPS_p)
 dsA = mergeDicts(dsAr)
 print 'Number of complexes', len(dsA)
 #print 'Complex wise AUC = ',np.mean(dA.values())
 p12 = map(list, zip(*dsA.values()))
 pa = p12[0]
 p1 = p12[1]
 p2 = p12[2]
 ps = p1
 ps.extend(p2)
 print 'Complex Wise AUC =', np.mean(pa), 'Protein Wise AUC =', np.mean(
     ps)
 if not auconly:
     (fplv, tplv, auclv) = roc.roc_VA(LV)
     (fplvp, tplvp, auclvp) = roc.roc_VA(LVP)
     mkl.save(ofname, ((fplv, tplv, auclv), (fplvp, tplvp, auclvp)))
     print "AUC = ", auclv
     """        
         plt.hist(np.array(DNTP).flatten(),[0,1,2,3,4,5,6,1000],cumulative=True);plt.grid();plt.xlabel('sequence distance');plt.ylabel('counts');plt.title('Number of top 200 predictions vs. sequence distance from nearest true positive');plt.show()
         [np.sum(dn<2.0) for dn in DNTP]
         cids=[getFileParts(getFileParts(ifile)[1])[1] for ifile in fs]
         [dsA[cid] for cid in cids]
         [dAo[cid] for cid in cids]
     """
     #DISTRIBUTION PLOT
     dthresh = [0, 1, 2, 3, 4]  # sequence distance threshold
     XX = calcRFPP(np.array(TPS)[:, 1] + 1, DNTP, dthresh=dthresh)
     if doplot:
         plt.figure()
         plt.plot(fplv, tplv)
         plt.xlabel('FP')