def main(inputfilename, outputfilename):
    
    """  
    
    This is done by the following steps:
    
    1.  
    2. 
    3. 
    4. 
    
    Parameters
    ----------
    
    Returns
    ------- 
    
    """
    
    print "STRAT -----> " + str(time.now())
    
    running_time = time.now()
    
    values_dict, y = p.processEntries(p.readDatasetFromFile(inputfilename))
    
    X = p.hashingTrick(values_dict)
    
    p.save_sparse_csr(outputfilename,X)
    
    running_time = time.now() - running_time
    
    print("\nFINISH -----> " + str(time.now()) + " Total running time : %s " % str(running_time))
    
    return X, y
'''
Created on Dec 22, 2015

@author: nancy
'''

import numpy as np
import statsmodels.api as sm
from dataHandler import dataProcessing
from datetime import datetime

###########################################################

folder = 'C:/Users/nancy/OneDrive/FeatureSelection/starwood/a5d3c3ca-58a4-4702-9eca-6ba20dad2ec7_starwood_starwood_retargeting_CONV_7_fp_sheraton/'
train = folder + 'estimationset.working.csv' # path to training file
test = folder + 'validationset.working.csv' # path to testing file

###########################################################
start = datetime.now()

print "Process training data..."        

X_dict, y = dataProcessing.processEntries(dataProcessing.readDatasetFromFile(train))

X, features_mapper = dataProcessing.hashingTrick(X_dict)

# Fit regression model
results = sm.OLS(y, X).fit()

# Inspect the results
print results.summary()