예제 #1
0
def testScoreWithMemorizedModel():
  orig_cols = ['gender', 'height']
  memorized_cols = ['gender']
  model_config = model_cfg.ModelConfig(
  'v1', 'tmp/learned_model_v1.csv', 'tmp/memorized_model_v1.csv', 
  model_cfg.ColsCfg(orig_cols, memorized_cols),
  'tmp/feature_map_v1.csv', 'tmp/feature_map2_v1.csv')
  s0 = seti.create_seti(5.0, bfs=[('gender', 'm')], cfs=[('height', 6.0)])
  s1 = seti.create_seti(7.0, bfs=[('gender', 'f')], cfs=[('height', 6.0)])
  setis = [s0, s1]

  # Empty learned model.
  lm = model_exporter.LearnedModel()
  lm.write_model({}, 'tmp/learned_model_v1.csv')

  # Set up memorized model.
  fs, _ = training_data.write_feature_maps_from_seti(model_config, setis)
  mem = model_exporter.Memorizer(fs, model_config)
  memorized_model = mem.create_model(setis)
  mm = model_exporter.MemorizedModel()
  mm.write_features(memorized_model, model_config.memorized_model_loc)

  ss = seti_server.make_from_config([model_config])
  ss.model_map['v1'].learned_model = {}
  s2 = seti.create_seti(7.0, bfs=[('gender', 'f')])
  assertFloatEquals(7.0, ss.score(s2))
예제 #2
0
def testLearn():
    orig_cols = ["gender", "height"]

    s0 = seti.create_seti(1.0, bfs=[("gender", "m")], cfs=[("height", 6.0)])
    s1 = seti.create_seti(0.0, bfs=[("gender", "f")], cfs=[("height", 3.0)])
    s1.for_holdout = True
    setis = [s0, s1]

    fs = feature_selector.FeatureSelect()
    fs.generate_feature_map(orig_cols, setis)
    fvs = [[0, 0, 6.0], [0, 1, 3.0]]

    l = learner.Learner(fs)
    l.learn(setis)
    print l.stats()
예제 #3
0
def testFVSimple():
  orig_cols = ['gender', 'height']

  s0 = seti.create_seti(1.0, bfs=[('gender', 'm')], cfs=[('height', 6.0)])
  s1 = seti.create_seti(0.0, bfs=[('gender', 'f')], cfs=[('height', 3.0)])
  setis = [s0, s1]

  fs = feature_selector.FeatureSelect()
  fs.generate_feature_map(orig_cols, setis)
  assertEquals(['gender_MISSING', 'gender_f', 'height'], fs.all_col_names)
  fvs = [[0, 0, 6.0], [0, 1, 3.0]]
  for i in xrange(len(setis)):
    setie = setis[i]
    fv = fvs[i]
    assertEquals(fv, seti.float_feature_vector(fs, setie))
예제 #4
0
def testFeatureSelectorMissingColumn():
  orig_cols = ['gender', 'height']

  s0 = seti.create_seti(1.0, bfs=[], cfs=[('height', 6.0)])
  s1 = seti.create_seti(0.0, bfs=[('gender', 'f')], cfs=[])
  s2 = seti.create_seti(0.0, bfs=[('gender', 'm')], cfs=[])
  setis = [s0, s1, s2]
  fs = feature_selector.FeatureSelector()
  fs.build_feature_map(setis)

  features = ['height', 'gender:m', 'gender:f', 'gender:metro']
  indices = [0, 2, 1, None]
  for i in xrange(len(features)):
    index, err = fs.get_index(features[i])
    assertEquals(indices[i], index)
    if index is None:
      assert err != ''
    else:
      assert err == None
예제 #5
0
def testScoreWithLearnedModel():
  model = { 
    'gender_MISSING': 0.0, 
    'gender_f': -0.099999999999999992, 
    ':': -0.7999999999999996, 
    'height': 0.29999999999999993
  }
  lm = model_exporter.LearnedModel()
  lm.write_model(model, 'tmp/learned_model.csv')
  mm = model_exporter.MemorizedModel()
  mm.write_features([], 'tmp/memorized_model.csv')

  # Setup feature selector and such.
  orig_cols = ['gender', 'height']

  s0 = seti.create_seti(5.0, bfs=[('gender', 'm')], cfs=[('height', 6.0)])
  s1 = seti.create_seti(3.0, bfs=[('gender', 'f')], cfs=[('height', 3.0)])
  setis = [s0, s1]

  model_types = [model_cfg.LINEAR_REGRESSION, model_cfg.LOGISTIC_REGRESSION]
  transforms = [lambda x: x, lambda x: 1 / (1 + math.exp(-x))]

  # Test the actual model.
  for mIndex in xrange(len(model_types)):
    model_config = model_cfg.ModelConfig(
      'v0', 'tmp/learned_model.csv', 'tmp/memorized_model.csv', 
      model_cfg.ColsCfg(orig_cols, orig_cols),
      'tmp/feature_map_v0.csv', 'tmp/feature_map2_v0.csv', 
      model_type=model_types[mIndex])
    training_data.write_feature_maps_from_seti(model_config, setis)

    ss = seti_server.make_from_config([model_config])

    w0 = model[':'] + model['gender_MISSING'] * 0 + model['gender_f'] * 0 + model['height'] * 6.0
    w1 = model[':'] + model['gender_MISSING'] * 0 + model['gender_f'] * 1 + model['height'] * 3.0
    wants = [w0, w1]
    for i in xrange(len(setis)):
      setie = setis[i]
      want_after_transform = transforms[mIndex](wants[i])
      assertEquals(want_after_transform, ss.score(setie))
예제 #6
0
def testFVManyCategorical():
  s0 = seti.create_seti(1.0, bfs=[('dir', 'north')], cfs=[('dist', 6.0)])
  s1 = seti.create_seti(1.0, bfs=[('dir', 'south')], cfs=[('dist', 5.0)])
  s2 = seti.create_seti(1.0, bfs=[('dir', 'east')], cfs=[('dist', 4.0)])
  s3 = seti.create_seti(1.0, bfs=[('dir', 'west')], cfs=[('dist', 3.0)])
  s4 = seti.create_seti(1.0, bfs=[], cfs=[('dist', 2.0)])
  setis = [s0, s1, s2, s3, s4]
  fvs = [[0, 0, 0, 0, 6.0], [0, 1, 0, 0, 5.0], [0, 0, 1, 0, 4.0],
        [0, 0, 0, 1, 3.0], [1, 0, 0, 0, 2.0]]

  orig_cols = ['dir', 'dist']
  fs = feature_selector.FeatureSelect()
  fs.generate_feature_map(orig_cols, setis)

  #l = learner.Learner(fs)
  assertEquals(
    ['dir_MISSING', 'dir_south', 'dir_east', 'dir_west', 'dist'],
    fs.all_col_names)

  for i in xrange(len(setis)):
    setie = setis[i]
    fv = fvs[i]
    assertEquals(fv, seti.float_feature_vector(fs, setie))
예제 #7
0
def testRun():
  # Setup feature selector and such.
  orig_cols = ['gender', 'height']

  s0 = seti.create_seti(1.0, bfs=[('gender', 'm')], cfs=[('height', 6.0)])
  s1 = seti.create_seti(0.0, bfs=[('gender', 'f')], cfs=[('height', 3.0)])
  setis = [s0, s1]

  model_config = model_cfg.ModelConfig(
    'v0', 'tmp/learned_model.csv', 'tmp/memorized_model.csv', 
    model_cfg.ColsCfg(
      orig_cols, orig_cols),
    'tmp/feature_map_v0.csv', 'tmp/feature_map2_v0.csv')
  run_pipeline.run([model_config], setis)
  model = { 
    'gender_MISSING': 0.0, 
    'gender_f': -0.099999999999999992, 
    ':': -0.7999999999999996, 
    'height': 0.29999999999999993
  }
  # Test model gets created and loaded.
  # Test that we can score one example.

  ss = seti_server.make_from_config([model_config])

  # Test the learned model works.
  #print 'Learned model: '
  #print ss.model_map['v0'].learned_model
  s2 = seti.create_seti(5.0, bfs=[('gender', 'm')], cfs=[('height', 2.0)])
  val0 = model[':'] + model['gender_MISSING'] * 0 + model['gender_f'] * 0 + model['height'] * 2.0
  assertFloatEquals(val0, ss.score(s2))

  # Test the memorized model works. Destroy the learned model.
  ss.model_map['v0'].learned_model = {}
  val1 = model[':'] + model['gender_MISSING'] * 0 + model['gender_f'] * 1 + model['height'] * 3.0
  assertFloatEquals(val1, ss.score(s1))