def testScoreWithMemorizedModel(): orig_cols = ['gender', 'height'] memorized_cols = ['gender'] model_config = model_cfg.ModelConfig( 'v1', 'tmp/learned_model_v1.csv', 'tmp/memorized_model_v1.csv', model_cfg.ColsCfg(orig_cols, memorized_cols), 'tmp/feature_map_v1.csv', 'tmp/feature_map2_v1.csv') s0 = seti.create_seti(5.0, bfs=[('gender', 'm')], cfs=[('height', 6.0)]) s1 = seti.create_seti(7.0, bfs=[('gender', 'f')], cfs=[('height', 6.0)]) setis = [s0, s1] # Empty learned model. lm = model_exporter.LearnedModel() lm.write_model({}, 'tmp/learned_model_v1.csv') # Set up memorized model. fs, _ = training_data.write_feature_maps_from_seti(model_config, setis) mem = model_exporter.Memorizer(fs, model_config) memorized_model = mem.create_model(setis) mm = model_exporter.MemorizedModel() mm.write_features(memorized_model, model_config.memorized_model_loc) ss = seti_server.make_from_config([model_config]) ss.model_map['v1'].learned_model = {} s2 = seti.create_seti(7.0, bfs=[('gender', 'f')]) assertFloatEquals(7.0, ss.score(s2))
def run(model_configs, setis): # Determine which setis are for holdout and which are for training. for seti in setis: if random.random() < 0.1: seti.for_holdout = True for model_config in model_configs: # - Look at the occurence of features and their index. fs, fs2 = training_data.write_feature_maps_from_seti(model_config, setis) print 'Learning model %s' % (model_config.name) # - Memorize the examples. mem = model_exporter.Memorizer(fs, model_config) memorized_model = mem.create_model(setis) mm = model_exporter.MemorizedModel() mm.write_features(memorized_model, model_config.memorized_model_loc) # - Build a model for unmemorized examples. l = learner.Learner(fs2) learned_model = l.learn(setis) lm = model_exporter.LearnedModel() lm.write_model(learned_model, model_config.learned_model_loc) print 'Wrote learned model to: %s' % (model_config.learned_model_loc) print 'Model performed as: %s' % (str(l.stats())) #print l.stats() # Write the model to a file. print 'Finished model generation for %s' % (model_config.name)
def testScoreWithLearnedModel(): model = { 'gender_MISSING': 0.0, 'gender_f': -0.099999999999999992, ':': -0.7999999999999996, 'height': 0.29999999999999993 } lm = model_exporter.LearnedModel() lm.write_model(model, 'tmp/learned_model.csv') mm = model_exporter.MemorizedModel() mm.write_features([], 'tmp/memorized_model.csv') # Setup feature selector and such. orig_cols = ['gender', 'height'] s0 = seti.create_seti(5.0, bfs=[('gender', 'm')], cfs=[('height', 6.0)]) s1 = seti.create_seti(3.0, bfs=[('gender', 'f')], cfs=[('height', 3.0)]) setis = [s0, s1] model_types = [model_cfg.LINEAR_REGRESSION, model_cfg.LOGISTIC_REGRESSION] transforms = [lambda x: x, lambda x: 1 / (1 + math.exp(-x))] # Test the actual model. for mIndex in xrange(len(model_types)): model_config = model_cfg.ModelConfig( 'v0', 'tmp/learned_model.csv', 'tmp/memorized_model.csv', model_cfg.ColsCfg(orig_cols, orig_cols), 'tmp/feature_map_v0.csv', 'tmp/feature_map2_v0.csv', model_type=model_types[mIndex]) training_data.write_feature_maps_from_seti(model_config, setis) ss = seti_server.make_from_config([model_config]) w0 = model[':'] + model['gender_MISSING'] * 0 + model['gender_f'] * 0 + model['height'] * 6.0 w1 = model[':'] + model['gender_MISSING'] * 0 + model['gender_f'] * 1 + model['height'] * 3.0 wants = [w0, w1] for i in xrange(len(setis)): setie = setis[i] want_after_transform = transforms[mIndex](wants[i]) assertEquals(want_after_transform, ss.score(setie))