def generate_test_file(fileMR_test, file1_test, file2_test, file3_test): ids, x0 = parse.read_test_data(fileMR_test) ids, x1 = parse.read_test_data(file1_test) ids, x2 = parse.read_test_data(file2_test) ids, x3 = parse.read_test_data(file3_test) c = csv.writer(open("/Joel/Cursos/DataScience/yelp/votes_prediction/regression_multi/test_bayes.csv", "wb")) c.writerow(['m#id', 'sent','rev','bus','user']) count = 0 for id in ids: c.writerow([count, x0[count], x1[count], x2[count], x3[count]]) count = count + 1
def generate_traininig_file(fileMR_train, file1_train, file2_train, file3_train): ids, x0 = parse.read_test_data(fileMR_train) x1, y = parse.read_trainning_data(file1_train) x2, y = parse.read_trainning_data(file2_train) x3, y = parse.read_trainning_data(file3_train) c = csv.writer(open("/Joel/Cursos/DataScience/yelp/votes_prediction/regression_multi/training_bayes.csv", "wb")) c.writerow(['m#id', 'sent','rev','bus','user','c#votes']) count = 0 for id in ids: c.writerow([count, x0[count], x1[count], x2[count], x3[count], y[count]]) count = count + 1
import Orange import csv_parsing_utils as parse import sys data = Orange.data.Table("training") learner = Orange.regression.linear.LinearRegressionLearner() model = learner(data) test = Orange.data.Table("test") ids, x1 = parse.read_test_data("/Joel/Cursos/DataScience/yelp/data/user_review_joined_test.tab") f = open("results_reg.csv", "w") sys.stdout = f for i in range(len(test)): c = model(test[i]) print "%s,%s" % (ids[i], c) f.close