def generate_test_file(fileMR_test, file1_test, file2_test, file3_test):    
    ids, x0 = parse.read_test_data(fileMR_test)
    ids, x1 = parse.read_test_data(file1_test)
    ids, x2 = parse.read_test_data(file2_test)
    ids, x3 = parse.read_test_data(file3_test)
    
    c = csv.writer(open("/Joel/Cursos/DataScience/yelp/votes_prediction/regression_multi/test_bayes.csv", "wb"))
    c.writerow(['m#id', 'sent','rev','bus','user'])

    count = 0
    for id in ids:
        c.writerow([count, x0[count], x1[count], x2[count], x3[count]])
        count = count + 1
def generate_traininig_file(fileMR_train, file1_train, file2_train, file3_train):
    ids, x0 = parse.read_test_data(fileMR_train)
    x1, y = parse.read_trainning_data(file1_train)
    x2, y = parse.read_trainning_data(file2_train)
    x3, y = parse.read_trainning_data(file3_train)
    
    c = csv.writer(open("/Joel/Cursos/DataScience/yelp/votes_prediction/regression_multi/training_bayes.csv", "wb"))
    c.writerow(['m#id', 'sent','rev','bus','user','c#votes'])
    
    count = 0
    for id in ids:
        c.writerow([count, x0[count], x1[count], x2[count], x3[count], y[count]])
        count = count + 1
예제 #3
0
import Orange
import csv_parsing_utils as parse
import sys

data = Orange.data.Table("training")
learner = Orange.regression.linear.LinearRegressionLearner()
model = learner(data)

test = Orange.data.Table("test")
ids, x1 = parse.read_test_data("/Joel/Cursos/DataScience/yelp/data/user_review_joined_test.tab")

f = open("results_reg.csv", "w")
sys.stdout = f

for i in range(len(test)):
    c = model(test[i])
    print "%s,%s" % (ids[i], c)
f.close