from eval import logloss maxlines_val = None start = datetime.now() val_ids = avito2_io.get_artifact('full_val_set.pkl') print 'validation set ids read' train_etl = { 'ad': lambda l: l['AdID'], 'pos': lambda l: l['Position'], 'ctr': lambda l: l['HistCTR'] } search_etl = {'cat': lambda l: l['CategoryID']} # validation run input = avito2_io.rolling_join(True, train_etl, search_etl, do_validation=True, val_ids=val_ids) loss = 0.0 for (k, (x, y)) in enumerate(input): #loss += logloss(float(x['ctr']), y) loss += logloss(0.006, y) if k == maxlines_val: break if (k + 1) % 250000 == 0: print 'processed %d lines on validation pass' % (k + 1) print 'validation set log loss: %.5f' % (loss / (k + 1)) print 'elapsed time: %s' % (datetime.now() - start)
start = datetime.now() train_etl = { 'ad': (lambda l: l['AdID']), 'pos': (lambda l: l['Position']), 'log_ctr': (lambda l: -10 * round(log(float(l['HistCTR'])), 1)) } search_etl = { 'user': (lambda l: l['UserID']), 'category': (lambda l: l['CategoryID']), 'location': (lambda l: l['LocationID']), 'logon': (lambda l: l['IsUserLoggedOn']), 'SPexists': (lambda l: int(len(l['SearchParams']) > 0)), 'SQexists': (lambda l: int(len(l['SearchQuery']) > 0)) } # use_train = True input = rolling_join(True, train_etl, search_etl) model = ftrl_proximal(alpha, beta, L1, L2, D, interaction) for (k, (x, y)) in enumerate(input): f = hash_features(x, D) p = model.predict(f) model.update(f, p, y) if k == maxlines: break if (k + 1) % 1000000 == 0: print 'processed %d lines' % (k + 1) print 'finished training' # testing: use_train=False train_etl['id'] = (lambda l: l['ID']) input = rolling_join(False, train_etl, search_etl) outfile = open(submission, 'w')
from datetime import datetime from eval import logloss maxlines_val = None start = datetime.now() val_ids = avito2_io.get_artifact('full_val_set.pkl') print 'validation set ids read' train_etl = {'ad' : lambda l : l['AdID'], 'pos' : lambda l : l['Position'], 'ctr' : lambda l : l['HistCTR']} search_etl = {'cat' : lambda l : l['CategoryID']} # validation run input = avito2_io.rolling_join(True, train_etl, search_etl, do_validation=True, val_ids=val_ids) loss = 0.0 for (k, (x, y)) in enumerate(input): #loss += logloss(float(x['ctr']), y) loss += logloss(0.006, y) if k == maxlines_val: break if (k + 1) % 250000 == 0: print 'processed %d lines on validation pass' % (k + 1) print 'validation set log loss: %.5f' % (loss/(k + 1)) print 'elapsed time: %s' % (datetime.now() - start)
interaction = False maxlines = None start = datetime.now() train_etl = {'ad' : (lambda l : l['AdID']), 'pos' : (lambda l : l['Position']), 'log_ctr': (lambda l : -10 * round(log(float(l['HistCTR'])), 1))} search_etl = {'user' : (lambda l : l['UserID']), 'category': (lambda l : l['CategoryID']), 'location': (lambda l : l['LocationID']), 'logon' : (lambda l : l['IsUserLoggedOn']), 'SPexists': (lambda l : int(len(l['SearchParams']) > 0)), 'SQexists': (lambda l : int(len(l['SearchQuery']) > 0))} # use_train = True input = rolling_join(True, train_etl, search_etl) model = ftrl_proximal(alpha, beta, L1, L2, D, interaction) for (k, (x, y)) in enumerate(input): f = hash_features(x, D) p = model.predict(f) model.update(f, p, y) if k == maxlines: break if (k + 1) % 1000000 == 0: print 'processed %d lines' % (k + 1) print 'finished training' # testing: use_train=False train_etl['id'] = (lambda l : l['ID']) input = rolling_join(False, train_etl, search_etl) outfile = open(submission, 'w')