def main(inputfilename, outputfilename): """ This is done by the following steps: 1. 2. 3. 4. Parameters ---------- Returns ------- """ print "STRAT -----> " + str(time.now()) running_time = time.now() values_dict, y = p.processEntries(p.readDatasetFromFile(inputfilename)) X = p.hashingTrick(values_dict) p.save_sparse_csr(outputfilename,X) running_time = time.now() - running_time print("\nFINISH -----> " + str(time.now()) + " Total running time : %s " % str(running_time)) return X, y
''' Created on Dec 22, 2015 @author: nancy ''' import numpy as np import statsmodels.api as sm from dataHandler import dataProcessing from datetime import datetime ########################################################### folder = 'C:/Users/nancy/OneDrive/FeatureSelection/starwood/a5d3c3ca-58a4-4702-9eca-6ba20dad2ec7_starwood_starwood_retargeting_CONV_7_fp_sheraton/' train = folder + 'estimationset.working.csv' # path to training file test = folder + 'validationset.working.csv' # path to testing file ########################################################### start = datetime.now() print "Process training data..." X_dict, y = dataProcessing.processEntries(dataProcessing.readDatasetFromFile(train)) X, features_mapper = dataProcessing.hashingTrick(X_dict) # Fit regression model results = sm.OLS(y, X).fit() # Inspect the results print results.summary()