Example #1
0
def train(tr,
          si,
          alpha,
          beta,
          L1,
          L2,
          D,
          users=None,
          interaction=False,
          maxlines=None,
          iterations=1):
    model = ftrl_proximal(alpha, beta, L1, L2, D, interaction)
    for j in range(iterations):
        it = gl_iter.basic_join(tr, si, users)
        for (k, line) in enumerate(it):
            y = line.pop('IsClick')
            process_line(line)
            f = hash_features(line, D)
            p = model.predict(f)
            model.update(f, p, y)
            if k == maxlines:
                break
            if (k + 1) % 250000 == 0:
                print('processed %d lines on training pass %d' %
                      (k + 1, j + 1))
    return model
def run_test(submission_file, test, si, users=None, offset=0):
  it = gl_iter.basic_join(test, si, users)
  for (k, line) in enumerate(it):
    id = line.pop('ID')
    process_line(line)
    f = hash_features(line, D)
    dv = model.predict(f, False)
    dv += offset
    p = 1.0/(1.0 + exp(-dv))
    submission_file.write('%d,%s\n' % (id, str(p)))
    if (k + 1) % 250000 == 0:
      print 'processed %d lines' % (k + 1)
Example #3
0
def run_test(submission_file, test, si, users=None, offset=0):
    it = gl_iter.basic_join(test, si, users)
    for (k, line) in enumerate(it):
        id = line.pop('ID')
        process_line(line)
        f = hash_features(line, D)
        dv = model.predict(f, False)
        dv += offset
        p = 1.0 / (1.0 + exp(-dv))
        submission_file.write('%d,%s\n' % (id, str(p)))
        if (k + 1) % 250000 == 0:
            print('processed %d lines' % (k + 1))
def validate(val, si, users=None, offset=0, maxlines=None):
  it = gl_iter.basic_join(val, si, users)
  loss = 0.0
  for (k, line) in enumerate(it):
    y = line.pop('IsClick')
    process_line(line)
    f = hash_features(line, D)
    dv = model.predict(f, False)
    dv += offset
    p = 1.0/(1.0 + exp(-dv))
    loss += logloss(p, y)
    if k == maxlines:
      break
    if (k + 1) % 250000 == 0:
      print 'processed %d lines from validation set' % (k + 1)
  return loss/k, k
Example #5
0
def validate(val, si, users=None, offset=0, maxlines=None):
    it = gl_iter.basic_join(val, si, users)
    loss = 0.0
    for (k, line) in enumerate(it):
        y = line.pop('IsClick')
        process_line(line)
        f = hash_features(line, D)
        dv = model.predict(f, False)
        dv += offset
        p = 1.0 / (1.0 + exp(-dv))
        loss += logloss(p, y)
        if k == maxlines:
            break
        if (k + 1) % 250000 == 0:
            print('processed %d lines from validation set' % (k + 1))
    return loss / k, k
def train(tr, si, alpha, beta, L1, 
          L2, D, users=None, 
          interaction=False, maxlines=None,
          iterations=1):
  model = ftrl_proximal(alpha, beta, L1, L2, D, interaction)
  for j in range(iterations):
    it = gl_iter.basic_join(tr, si, users)
    for (k, line) in enumerate(it):
      y = line.pop('IsClick')
      process_line(line)
      f = hash_features(line, D)
      p = model.predict(f)
      model.update(f, p, y)
      if k == maxlines:
        break
      if (k + 1) % 250000 == 0:
        print 'processed %d lines on training pass %d' % (k + 1, j + 1)
  return model