from parse_data import yield_data, train, test import numpy as np from sklearn import preprocessing N = 145231 if __name__ == '__main__': X = np.empty((N, 2006)) y = np.empty(N) for (ind, row) in enumerate(yield_data(train)): ID, target, features = row X[ind, :] = features y[ind] = target X_scaled = preprocessing.scale(X)
OUTPUT: logarithmic loss of p given y ''' p = max(min(p, 1. - 10e-15), 10e-15) return -log(p) if y == 1. else -log(1. - p) start = datetime.now() learner = ftrl_proximal(alpha, beta, L1, L2, D, interaction) for e in range(epoch): loss = 0. count = 0 for t, y, x in yield_data(train, hashing_trick=True, d=D): # data is a generator p = learner.predict(x) loss += logloss(p, y) learner.update(x, p, y) count+=1 if count%1000==0: #print count,loss/count print('%s\tencountered: %d\tcurrent logloss: %f' % ( datetime.now(), count, loss/count)) #if count>10000: # comment this out when you run it locally. #break count=0 loss=0 #import pickle