Esempio n. 1
0
def TestModelOnData(modelname, fdata, flabel):
	actual_set = com.GetBuySet(flabel)
	rec_set = set()
	
	f_base = util.file_basename(fdata)
	
	re_str = f_base.replace('.',r'\.') + r'\.\d+\.csv$'
	f_list = util.FilterFile(re_str) #['%s.%d.csv' % (f_base, j) for j in range(com.__n_process)]
	

	for f in f_list:
		r , p, y = _ParTestModelOnData((modelname, f))
		
		rec_set |= r
		pred_prob = np.concatenate([pred_prob,p])
		Y_true = np.concatenate([Y_true, y])
		
	
	TP = len(rec_set & actual_set)
	TN = len(rec_set - actual_set)
	FP = len(actual_set - rec_set)
	
	PrintConfuseMatrix(TP, TN, FP)
	P, R, F1 =  GetPRF1(TP, TN, FP)
	PrintPRF1(P, R, F1)
	
	print 'AUC:', roc_auc_score( Y_true.astype(int),  pred_prob)
	
	
	return TP, TN, FP, P, R, F1, pred_prob,Y_true
Esempio n. 2
0
	fout = 'submit.%s.csv' % sys.argv[1]



	# load need to be recommanded item
	



	fo = open(fout, 'wb')
	fw = csv.writer(fo, delimiter=',')
	fw.writerow(['user_id','item_id'])

	rec_set =  set()

	pool = mp.Pool(com.__n_process)

	re_str = r'feature_total\.merge\.\d+\.csv$'
	f_list = util.FilterFile(re_str)
	rec_set_list = pool.map(GenRecDataFromFeatureFile,[(sys.argv[1], f) for f in f_list])

	for r in rec_set_list:
		rec_set |= r 

	for uid, tid in rec_set:
		fw.writerow([uid, tid])
	fo.close()
	
	nrows = len(rec_set)
	print 'recommand %d record.' % nrows
	util.notify_me('recommand data are done! %d record.' % nrows)
Esempio n. 3
0
# coding:utf-8
import util

if __name__ == '__main__':
    fs = util.FilterFile(r'feature\d*\.csv$')
    for f in fs:
        header = open(f).readline().split(',')
        print f
        for it in header[2:]:
            print it
Esempio n. 4
0
                                               index=False)

        mod = 'a'
        header = False

        i = i + len(data)
        print 'process %d rows.' % i


if __name__ == '__main__':
    if sys.argv[1] == 'train':
        ff = 'feature.merge.csv'
        fl = 'label.csv'
        fd = 'data.csv'
    elif sys.argv[1] == 'test':
        ff = 'feature_test.merge.csv'
        fl = 'label_test.csv'
        fd = 'data.test.csv'
    elif sys.argv[1] == 'submit':
        ff = 'feature_total.merge.csv'
    else:
        print __doc__
        sys.exit()

    pool = mp.Pool(com.__n_process)

    fs = util.FilterFile(
        util.file_basename(ff).replace('.', r'\.') + r'\.\d+\.csv')
    #print fs
    pool.map(FilterCSV, fs)
Esempio n. 5
0
		
			
		data[train].to_csv(fname1, mode=mod, header = header,index = False)

		
		header = False
		mod = 'a'
		
		train_rows = np.sum(train) + train_rows

		train_trows = np.sum(data['buy'][train]==1) + train_trows

		rows = rows + len(data)
		print '[%s] process %d rows!' % (fn,rows)
		# print data.head()
		
	
	
	return (train_rows, train_trows)


if __name__ == '__main__':
	if sys.argv[1]=='train':
		fs = util.FilterFile(r'data\.\d+\.csv')
		train_rows, train_trows = 0,0
		for f in fs:
			i,j = Sample(f, 'data.train.csv')
			train_rows += i 
			train_trows += j
			
		print 'sample %d rows, positive %d rows. ' % (train_rows, train_trows)
Esempio n. 6
0
# coding:utf-8

import util, os, sys
from multiprocessing import Pool

if __name__ == '__main__':
    pool = Pool(50)
    root = r'D:\zuoyuan\alibaba\csv'
    fs = util.FilterFile(r'featureTEST_\d+-\d+-\d+\.csv$', root=root)
    cmds = []
    for f in fs:
        cmds.append('python subset.py %s 0,1,2,3,4,5,6' %
                    os.path.join(root, f))
    #print cmds
    pool.map(os.system, cmds)
Esempio n. 7
0
# coding:utf-8
import os, sys, util


root = r'D:\zuoyuan\alibaba\csv'
fs = util.FilterFile(r'feature.+subset_0_1_2_3_4_5_6\.csv$',root=root)

for f in fs:
	print f 
	
sys.exit()
cmd = 'python merge_fast.py ' +  ' '.join([os.path.join(root,f) for f in fs]) + ' ' + os.path.join(root,'feature_0_1_2_3_4_5_6.csv')
os.system(cmd)