def train(tr, si, alpha, beta, L1, L2, D, users=None, interaction=False, maxlines=None, iterations=1): model = ftrl_proximal(alpha, beta, L1, L2, D, interaction) for j in range(iterations): it = gl_iter.basic_join(tr, si, users) for (k, line) in enumerate(it): y = line.pop('IsClick') process_line(line) f = hash_features(line, D) p = model.predict(f) model.update(f, p, y) if k == maxlines: break if (k + 1) % 250000 == 0: print('processed %d lines on training pass %d' % (k + 1, j + 1)) return model
def run_test(submission_file, test, si, users=None, offset=0): it = gl_iter.basic_join(test, si, users) for (k, line) in enumerate(it): id = line.pop('ID') process_line(line) f = hash_features(line, D) dv = model.predict(f, False) dv += offset p = 1.0/(1.0 + exp(-dv)) submission_file.write('%d,%s\n' % (id, str(p))) if (k + 1) % 250000 == 0: print 'processed %d lines' % (k + 1)
def run_test(submission_file, test, si, users=None, offset=0): it = gl_iter.basic_join(test, si, users) for (k, line) in enumerate(it): id = line.pop('ID') process_line(line) f = hash_features(line, D) dv = model.predict(f, False) dv += offset p = 1.0 / (1.0 + exp(-dv)) submission_file.write('%d,%s\n' % (id, str(p))) if (k + 1) % 250000 == 0: print('processed %d lines' % (k + 1))
def validate(val, si, users=None, offset=0, maxlines=None): it = gl_iter.basic_join(val, si, users) loss = 0.0 for (k, line) in enumerate(it): y = line.pop('IsClick') process_line(line) f = hash_features(line, D) dv = model.predict(f, False) dv += offset p = 1.0/(1.0 + exp(-dv)) loss += logloss(p, y) if k == maxlines: break if (k + 1) % 250000 == 0: print 'processed %d lines from validation set' % (k + 1) return loss/k, k
def validate(val, si, users=None, offset=0, maxlines=None): it = gl_iter.basic_join(val, si, users) loss = 0.0 for (k, line) in enumerate(it): y = line.pop('IsClick') process_line(line) f = hash_features(line, D) dv = model.predict(f, False) dv += offset p = 1.0 / (1.0 + exp(-dv)) loss += logloss(p, y) if k == maxlines: break if (k + 1) % 250000 == 0: print('processed %d lines from validation set' % (k + 1)) return loss / k, k
def train(tr, si, alpha, beta, L1, L2, D, users=None, interaction=False, maxlines=None, iterations=1): model = ftrl_proximal(alpha, beta, L1, L2, D, interaction) for j in range(iterations): it = gl_iter.basic_join(tr, si, users) for (k, line) in enumerate(it): y = line.pop('IsClick') process_line(line) f = hash_features(line, D) p = model.predict(f) model.update(f, p, y) if k == maxlines: break if (k + 1) % 250000 == 0: print 'processed %d lines on training pass %d' % (k + 1, j + 1) return model