def testScoreWithMemorizedModel():
  orig_cols = ['gender', 'height']
  memorized_cols = ['gender']
  model_config = model_cfg.ModelConfig(
  'v1', 'tmp/learned_model_v1.csv', 'tmp/memorized_model_v1.csv', 
  model_cfg.ColsCfg(orig_cols, memorized_cols),
  'tmp/feature_map_v1.csv', 'tmp/feature_map2_v1.csv')
  s0 = seti.create_seti(5.0, bfs=[('gender', 'm')], cfs=[('height', 6.0)])
  s1 = seti.create_seti(7.0, bfs=[('gender', 'f')], cfs=[('height', 6.0)])
  setis = [s0, s1]

  # Empty learned model.
  lm = model_exporter.LearnedModel()
  lm.write_model({}, 'tmp/learned_model_v1.csv')

  # Set up memorized model.
  fs, _ = training_data.write_feature_maps_from_seti(model_config, setis)
  mem = model_exporter.Memorizer(fs, model_config)
  memorized_model = mem.create_model(setis)
  mm = model_exporter.MemorizedModel()
  mm.write_features(memorized_model, model_config.memorized_model_loc)

  ss = seti_server.make_from_config([model_config])
  ss.model_map['v1'].learned_model = {}
  s2 = seti.create_seti(7.0, bfs=[('gender', 'f')])
  assertFloatEquals(7.0, ss.score(s2))
Beispiel #2
0
def run(model_configs, setis):
  # Determine which setis are for holdout and which are for training.
  for seti in setis:
    if random.random() < 0.1:
      seti.for_holdout = True

  for model_config in model_configs:
    # - Look at the occurence of features and their index.
    fs, fs2 = training_data.write_feature_maps_from_seti(model_config, setis)

    print 'Learning model %s' % (model_config.name)
    # - Memorize the examples.
    mem = model_exporter.Memorizer(fs, model_config)
    memorized_model = mem.create_model(setis)
    mm = model_exporter.MemorizedModel()
    mm.write_features(memorized_model, model_config.memorized_model_loc)
    # - Build a model for unmemorized examples.
    l = learner.Learner(fs2)
    learned_model = l.learn(setis)
    lm = model_exporter.LearnedModel()
    lm.write_model(learned_model, model_config.learned_model_loc)
    print 'Wrote learned model to: %s' % (model_config.learned_model_loc)
    print 'Model performed as: %s' % (str(l.stats()))
    #print l.stats()
    # Write the model to a file.
    print 'Finished model generation for %s' % (model_config.name)
def testScoreWithLearnedModel():
  model = { 
    'gender_MISSING': 0.0, 
    'gender_f': -0.099999999999999992, 
    ':': -0.7999999999999996, 
    'height': 0.29999999999999993
  }
  lm = model_exporter.LearnedModel()
  lm.write_model(model, 'tmp/learned_model.csv')
  mm = model_exporter.MemorizedModel()
  mm.write_features([], 'tmp/memorized_model.csv')

  # Setup feature selector and such.
  orig_cols = ['gender', 'height']

  s0 = seti.create_seti(5.0, bfs=[('gender', 'm')], cfs=[('height', 6.0)])
  s1 = seti.create_seti(3.0, bfs=[('gender', 'f')], cfs=[('height', 3.0)])
  setis = [s0, s1]

  model_types = [model_cfg.LINEAR_REGRESSION, model_cfg.LOGISTIC_REGRESSION]
  transforms = [lambda x: x, lambda x: 1 / (1 + math.exp(-x))]

  # Test the actual model.
  for mIndex in xrange(len(model_types)):
    model_config = model_cfg.ModelConfig(
      'v0', 'tmp/learned_model.csv', 'tmp/memorized_model.csv', 
      model_cfg.ColsCfg(orig_cols, orig_cols),
      'tmp/feature_map_v0.csv', 'tmp/feature_map2_v0.csv', 
      model_type=model_types[mIndex])
    training_data.write_feature_maps_from_seti(model_config, setis)

    ss = seti_server.make_from_config([model_config])

    w0 = model[':'] + model['gender_MISSING'] * 0 + model['gender_f'] * 0 + model['height'] * 6.0
    w1 = model[':'] + model['gender_MISSING'] * 0 + model['gender_f'] * 1 + model['height'] * 3.0
    wants = [w0, w1]
    for i in xrange(len(setis)):
      setie = setis[i]
      want_after_transform = transforms[mIndex](wants[i])
      assertEquals(want_after_transform, ss.score(setie))