import os
sys.path.append(os.path.abspath("../preprocessing/"))
import features_preprocessing as fp
import submission_preprocessing as sp
import submission_postprocess as postpro
from sklearn import svm
from sklearn import cross_validation
from sklearn import metrics
from configuration import CONFIG
import pandas as pd
import time

t1 = time.time()

print('preprocessing...')
preprocessing = fp.feature_preprocessing()
preprocessing.full_preprocess(used_columns=['WEEK_DAY', 'YEAR', 'YEAR_DAY', 'ASS_ID', 'TIME', 'CSPL_CALLS'])

sub_p = sp.submission_preprocessing()
sub_p.full_preprocess(used_columns=['WEEK_DAY', 'YEAR', 'YEAR_DAY', 'ASS_ID', 'TIME', 'CSPL_CALLS'])
submission_data = sub_p.data
print(submission_data.columns)

data = preprocessing.data


prediction = []

print('data loaded, beginning prediction...')

for i in range(submission_data.shape[0]):
Ejemplo n.º 2
0
import sys
import os
sys.path.append(os.path.abspath("../preprocessing/"))
import features_preprocessing as fp
from sklearn import linear_model
from sklearn import cross_validation
from sklearn import metrics
import numpy as np

preprocessing = fp.feature_preprocessing()
preprocessing.full_preprocess()

data = preprocessing.data
Y = data['CSPL_CALLS']
X = data.drop(['CSPL_CALLS'], axis=1)

X_train, X_test, y_train, y_test = cross_validation.train_test_split(
    X, Y, test_size=0.4, random_state=0)

print X_train
clf = linear_model.Ridge(alpha=.5)
clf.fit(X_train, y_train)

clf.fit(X_train, y_train)

y_predict = clf.predict(X_test)
print(y_predict)
print(clf.score(X_test, y_test))
Ejemplo n.º 3
0
import linex as ln
import numpy as np
from sklearn import cross_validation
from sklearn.cross_validation import cross_val_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import AdaBoostRegressor
import pandas as pd
from sklearn import linear_model
import datetime as dt
from linex import loss_linex
from sklearn.feature_selection import RFE
from tqdm import tqdm
  
    
#Loading training dataset and submission dataset    
preprocessing = fp.feature_preprocessing('train_2011_2012_2013.csv', ';')
submission = pd.read_csv('submission.txt', sep = '\t')
submission['ASS_ID'] = submission['ASS_ASSIGNMENT'].apply(lambda x: int(CONFIG.ass_assign[x]))

print('Ready for training...')

#Training model on 27 assignments
for id in tqdm([0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,18,19,20,21,22,23,24,25,26]):
    
    print("Working on assignment : ",id)
    preprocessing_id = fp.feature_preprocessing()
    preprocessing_id.data = preprocessing.data.copy()
    preprocessing_id.full_preprocess(id)
    data = preprocessing_id.data
    print(id,' Data loaded')
    Y = data['CSPL_RECEIVED_CALLS']