-
Notifications
You must be signed in to change notification settings - Fork 0
/
val_run0.py
47 lines (34 loc) · 1.23 KB
/
val_run0.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
'''
This script gets log loss on the validation set from full_val_set.pkl,
(generated by the full_validation_set.py script) for some simple,
no-learning models like the HistCTR, all 0's, or mean-value benchmark.
author: David Thaler
date: July 2015
'''
import avito2_io
from datetime import datetime
from eval import logloss
maxlines_val = None
start = datetime.now()
val_ids = avito2_io.get_artifact('full_val_set.pkl')
print 'validation set ids read'
train_etl = {'ad' : lambda l : l['AdID'],
'pos' : lambda l : l['Position'],
'ctr' : lambda l : l['HistCTR']}
search_etl = {'cat' : lambda l : l['CategoryID']}
# validation run
input = avito2_io.rolling_join(True,
train_etl,
search_etl,
do_validation=True,
val_ids=val_ids)
loss = 0.0
for (k, (x, y)) in enumerate(input):
#loss += logloss(float(x['ctr']), y)
loss += logloss(0.006, y)
if k == maxlines_val:
break
if (k + 1) % 250000 == 0:
print 'processed %d lines on validation pass' % (k + 1)
print 'validation set log loss: %.5f' % (loss/(k + 1))
print 'elapsed time: %s' % (datetime.now() - start)