path = 'E://workspace//kdd cup 2012//KDD Cup Track 1 Data//track1//' rec_log_train = path + 'rec_log_train.txt' user_profile = path + 'user_profile.txt' item = path + 'item.txt' user_action = path + 'user_action.txt' user_sns = path + 'user_sns.txt' user_key_word = path + 'user_key_word.txt' rec_log_test = path + 'rec_log_test.txt' ISOTIMEFORMAT='%Y-%m-%d %X' print 'begian to read file |', print time.strftime( ISOTIMEFORMAT, time.localtime( time.time() ) ) items = data.read_filedata(user_action,'ALL','\t') f = open('user_action_new.txt','w+') print 'read complite!begian to process data |', print time.strftime( ISOTIMEFORMAT, time.localtime( time.time() ) ) a={} for it in items: if it[0] not in a:a[it[0]]=[0,0,0,0] a[it[0]][0]+=1 a[it[0]][1]+=int(it[2]) a[it[0]][2]+=int(it[3]) a[it[0]][3]+=int(it[4]) print 'process complite!begian to write new file |', print time.strftime( ISOTIMEFORMAT, time.localtime( time.time() ) )
path = 'G://kdd cup 2012//KDD Cup Track 1 Data//track1//' rec_log_train = path + 'rec_log_train.txt' user_profile = path + 'user_profile.txt' item = path + 'item.txt' user_action = path + 'user_action.txt' user_sns = path + 'user_sns.txt' user_key_word = path + 'user_key_word.txt' rec_log_test = path + 'rec_log_test.txt' ISOTIMEFORMAT='%Y-%m-%d %X' print 'begian to read file |', print time.strftime( ISOTIMEFORMAT, time.localtime( time.time() ) ) items = data.read_filedata(user_sns,'ALL','\t') f = open('user_sns_new.txt','w+') print 'read complite!begain to process data |', print time.strftime( ISOTIMEFORMAT, time.localtime( time.time() ) ) a={} for it in items: if it[0] in a: a[it[0]]+=1 else: a[it[0]]=1 print 'process complite!begain to write new file |', print time.strftime( ISOTIMEFORMAT, time.localtime( time.time() ) )
["p", "USA", "yes", 19, "Basic"], ["q", "China", "no", 20, "None"], ["r", "UK", "yes", 21, "Basic"], ["s", "China", "no", 18, "Premium"], ["t", "China", "no", 17, "None"], ] my_data2 = [ ["a", "USA", "yes", "18", "None"], ["b", "France", "yes", "23", "Premium"], ["c", "USA", "yes", "24", "Basic"], ["d", "France", "yes", "23", "Basic"], ] train_flowers = data.read_filedata("..//data//train_data.txt", "ALL", ",", [0, 1, 2, 3]) test_flowers = data.read_filedata("..//data//test_data.txt", "ALL", ",", [0, 1, 2, 3]) tree = DecisionTree(train_flowers) treepredict.buildtree(tree) tree.printTree() right = 0 wrong = 0 for flower in test_flowers: result = treepredict.predic(tree, flower) if flower[-1] in result: if right == 49: pass right += 1 else:
''' @author: WQ ''' import treepredict import data from DecisionTree import DecisionTree path = '..//data//' train_file = path + 'weibo_train_data.txt' train_main = data.read_filedata(train_file,'ALL','|',[5,7,8]) print train_main #for row in train_main: # row.pop(0) # row.pop(0) # row.pop(1) # row.pop(1) # row.append(row[0]) # row.pop(0) #print train_main #tree = DecisionTree(train_main) #treepredict.buildtree(tree) #treepredict.prune(tree, 0.3) #tree.printTree()
path = 'E://workspace//kdd cup 2012//KDD Cup Track 1 Data//track1//' path = 'E://workspace//kdd cup 2012//KDD Cup Track 1 Data//' rec_log_train = path + 'rec_log_train.txt' user_profile = path + 'user_profile.txt' item = path + 'item.txt' user_action = path + 'user_action.txt' user_sns = path + 'user_sns.txt' user_key_word = path + 'user_key_word.txt' rec_log_test = path + 'rec_log_test.txt' ISOTIMEFORMAT='%Y-%m-%d %X' print 'begian to read file |', print time.strftime( ISOTIMEFORMAT, time.localtime( time.time() ) ) items = data.read_filedata(rec_log_test,1000,'\t') f = open('test_new.txt','w+') print 'read complite!begain to process data |', print time.strftime( ISOTIMEFORMAT, time.localtime( time.time() ) ) print 'process complite!begain to write new file |', print time.strftime( ISOTIMEFORMAT, time.localtime( time.time() ) ) print items a={'-1':0,'1':0} for it in items: f.write(it[0]+'|'+it[1]+'|'+it[2]+'|'+it[3]+'\n') # a[it[2]]+=1
path = 'E://workspace//kdd cup 2012//KDD Cup Track 1 Data//track1//' rec_log_train = path + 'rec_log_train.txt' user_profile = path + 'user_profile.txt' item = path + 'item.txt' user_action = path + 'user_action.txt' user_sns = path + 'user_sns.txt' user_key_word = path + 'user_key_word.txt' rec_log_test = path + 'rec_log_test.txt' ISOTIMEFORMAT='%Y-%m-%d %X' print 'begian to read file |', print time.strftime( ISOTIMEFORMAT, time.localtime( time.time() ) ) items = data.read_filedata(user_key_word,'ALL','\t') f = open('user_key_word_new.txt','w+') print 'read complite!begian to write new file |', print time.strftime( ISOTIMEFORMAT, time.localtime( time.time() ) ) for i in range(len(items)): items[i][1] = len(items[i][1].split(';')) if items[i][1] == 1 : items[i][1]=0 f.write(items[i][0]+'|'+str(items[i][1])+'\n') #a = {} #for it in items:( # if it[0] not in a: a[it[0]] = 0 # a[it[0]]+=1 print 'finish |',