path = 'E://workspace//kdd cup 2012//KDD Cup Track 1 Data//track1//'
rec_log_train = path + 'rec_log_train.txt'
user_profile = path + 'user_profile.txt'
item = path + 'item.txt'
user_action = path + 'user_action.txt'
user_sns = path + 'user_sns.txt'
user_key_word = path + 'user_key_word.txt'
rec_log_test = path + 'rec_log_test.txt'

ISOTIMEFORMAT='%Y-%m-%d %X'

print 'begian to read file |',
print time.strftime( ISOTIMEFORMAT, time.localtime( time.time() ) )

items = data.read_filedata(user_action,'ALL','\t')
f = open('user_action_new.txt','w+')

print 'read complite!begian to process data |',
print time.strftime( ISOTIMEFORMAT, time.localtime( time.time() ) )

a={}
for it in items:
    if it[0] not in a:a[it[0]]=[0,0,0,0]
    a[it[0]][0]+=1
    a[it[0]][1]+=int(it[2])
    a[it[0]][2]+=int(it[3])
    a[it[0]][3]+=int(it[4])

print 'process complite!begian to write new file |',
print time.strftime( ISOTIMEFORMAT, time.localtime( time.time() ) )
Example #2
0
path = 'G://kdd cup 2012//KDD Cup Track 1 Data//track1//'
rec_log_train = path + 'rec_log_train.txt'
user_profile = path + 'user_profile.txt'
item = path + 'item.txt'
user_action = path + 'user_action.txt'
user_sns = path + 'user_sns.txt'
user_key_word = path + 'user_key_word.txt'
rec_log_test = path + 'rec_log_test.txt'

ISOTIMEFORMAT='%Y-%m-%d %X'

print 'begian to read file |',
print time.strftime( ISOTIMEFORMAT, time.localtime( time.time() ) )

items = data.read_filedata(user_sns,'ALL','\t')
f = open('user_sns_new.txt','w+')

print 'read complite!begain to process data |',
print time.strftime( ISOTIMEFORMAT, time.localtime( time.time() ) )

a={}
for it in items:
    if it[0] in a:
        a[it[0]]+=1
    else:
        a[it[0]]=1

print 'process complite!begain to write new file |',
print time.strftime( ISOTIMEFORMAT, time.localtime( time.time() ) )
Example #3
0
    ["p", "USA", "yes", 19, "Basic"],
    ["q", "China", "no", 20, "None"],
    ["r", "UK", "yes", 21, "Basic"],
    ["s", "China", "no", 18, "Premium"],
    ["t", "China", "no", 17, "None"],
]

my_data2 = [
    ["a", "USA", "yes", "18", "None"],
    ["b", "France", "yes", "23", "Premium"],
    ["c", "USA", "yes", "24", "Basic"],
    ["d", "France", "yes", "23", "Basic"],
]


train_flowers = data.read_filedata("..//data//train_data.txt", "ALL", ",", [0, 1, 2, 3])
test_flowers = data.read_filedata("..//data//test_data.txt", "ALL", ",", [0, 1, 2, 3])

tree = DecisionTree(train_flowers)
treepredict.buildtree(tree)
tree.printTree()

right = 0
wrong = 0
for flower in test_flowers:
    result = treepredict.predic(tree, flower)
    if flower[-1] in result:
        if right == 49:
            pass
        right += 1
    else:
Example #4
0
'''

@author: WQ
'''

import treepredict
import data
from DecisionTree import DecisionTree

path = '..//data//'
train_file = path + 'weibo_train_data.txt'


train_main = data.read_filedata(train_file,'ALL','|',[5,7,8])

print train_main

#for row in train_main:
#    row.pop(0)
#    row.pop(0)
#    row.pop(1)
#    row.pop(1)
#    row.append(row[0])
#    row.pop(0)

#print train_main
 
#tree = DecisionTree(train_main)
#treepredict.buildtree(tree)
#treepredict.prune(tree, 0.3)
#tree.printTree()
Example #5
0
path = 'E://workspace//kdd cup 2012//KDD Cup Track 1 Data//track1//'
path = 'E://workspace//kdd cup 2012//KDD Cup Track 1 Data//'
rec_log_train = path + 'rec_log_train.txt'
user_profile = path + 'user_profile.txt'
item = path + 'item.txt'
user_action = path + 'user_action.txt'
user_sns = path + 'user_sns.txt'
user_key_word = path + 'user_key_word.txt'
rec_log_test = path + 'rec_log_test.txt'

ISOTIMEFORMAT='%Y-%m-%d %X'

print 'begian to read file |',
print time.strftime( ISOTIMEFORMAT, time.localtime( time.time() ) )

items = data.read_filedata(rec_log_test,1000,'\t')
f = open('test_new.txt','w+')

print 'read complite!begain to process data |',
print time.strftime( ISOTIMEFORMAT, time.localtime( time.time() ) )

print 'process complite!begain to write new file |',
print time.strftime( ISOTIMEFORMAT, time.localtime( time.time() ) )

print items

a={'-1':0,'1':0}

for it in items:
    f.write(it[0]+'|'+it[1]+'|'+it[2]+'|'+it[3]+'\n')
#    a[it[2]]+=1
path = 'E://workspace//kdd cup 2012//KDD Cup Track 1 Data//track1//'
rec_log_train = path + 'rec_log_train.txt'
user_profile = path + 'user_profile.txt'
item = path + 'item.txt'
user_action = path + 'user_action.txt'
user_sns = path + 'user_sns.txt'
user_key_word = path + 'user_key_word.txt'
rec_log_test = path + 'rec_log_test.txt'

ISOTIMEFORMAT='%Y-%m-%d %X'

print 'begian to read file |',
print time.strftime( ISOTIMEFORMAT, time.localtime( time.time() ) )

items = data.read_filedata(user_key_word,'ALL','\t')
f = open('user_key_word_new.txt','w+')

print 'read complite!begian to write new file |',
print time.strftime( ISOTIMEFORMAT, time.localtime( time.time() ) )

for i in range(len(items)):
    items[i][1] = len(items[i][1].split(';'))
    if items[i][1] == 1 : items[i][1]=0
    f.write(items[i][0]+'|'+str(items[i][1])+'\n')
#a = {}
#for it in items:(
#    if it[0] not in a: a[it[0]] = 0
#    a[it[0]]+=1

print 'finish |',