def testScoreWithMemorizedModel(): orig_cols = ['gender', 'height'] memorized_cols = ['gender'] model_config = model_cfg.ModelConfig( 'v1', 'tmp/learned_model_v1.csv', 'tmp/memorized_model_v1.csv', model_cfg.ColsCfg(orig_cols, memorized_cols), 'tmp/feature_map_v1.csv', 'tmp/feature_map2_v1.csv') s0 = seti.create_seti(5.0, bfs=[('gender', 'm')], cfs=[('height', 6.0)]) s1 = seti.create_seti(7.0, bfs=[('gender', 'f')], cfs=[('height', 6.0)]) setis = [s0, s1] # Empty learned model. lm = model_exporter.LearnedModel() lm.write_model({}, 'tmp/learned_model_v1.csv') # Set up memorized model. fs, _ = training_data.write_feature_maps_from_seti(model_config, setis) mem = model_exporter.Memorizer(fs, model_config) memorized_model = mem.create_model(setis) mm = model_exporter.MemorizedModel() mm.write_features(memorized_model, model_config.memorized_model_loc) ss = seti_server.make_from_config([model_config]) ss.model_map['v1'].learned_model = {} s2 = seti.create_seti(7.0, bfs=[('gender', 'f')]) assertFloatEquals(7.0, ss.score(s2))
def testLearn(): orig_cols = ["gender", "height"] s0 = seti.create_seti(1.0, bfs=[("gender", "m")], cfs=[("height", 6.0)]) s1 = seti.create_seti(0.0, bfs=[("gender", "f")], cfs=[("height", 3.0)]) s1.for_holdout = True setis = [s0, s1] fs = feature_selector.FeatureSelect() fs.generate_feature_map(orig_cols, setis) fvs = [[0, 0, 6.0], [0, 1, 3.0]] l = learner.Learner(fs) l.learn(setis) print l.stats()
def testFVSimple(): orig_cols = ['gender', 'height'] s0 = seti.create_seti(1.0, bfs=[('gender', 'm')], cfs=[('height', 6.0)]) s1 = seti.create_seti(0.0, bfs=[('gender', 'f')], cfs=[('height', 3.0)]) setis = [s0, s1] fs = feature_selector.FeatureSelect() fs.generate_feature_map(orig_cols, setis) assertEquals(['gender_MISSING', 'gender_f', 'height'], fs.all_col_names) fvs = [[0, 0, 6.0], [0, 1, 3.0]] for i in xrange(len(setis)): setie = setis[i] fv = fvs[i] assertEquals(fv, seti.float_feature_vector(fs, setie))
def testFeatureSelectorMissingColumn(): orig_cols = ['gender', 'height'] s0 = seti.create_seti(1.0, bfs=[], cfs=[('height', 6.0)]) s1 = seti.create_seti(0.0, bfs=[('gender', 'f')], cfs=[]) s2 = seti.create_seti(0.0, bfs=[('gender', 'm')], cfs=[]) setis = [s0, s1, s2] fs = feature_selector.FeatureSelector() fs.build_feature_map(setis) features = ['height', 'gender:m', 'gender:f', 'gender:metro'] indices = [0, 2, 1, None] for i in xrange(len(features)): index, err = fs.get_index(features[i]) assertEquals(indices[i], index) if index is None: assert err != '' else: assert err == None
def testScoreWithLearnedModel(): model = { 'gender_MISSING': 0.0, 'gender_f': -0.099999999999999992, ':': -0.7999999999999996, 'height': 0.29999999999999993 } lm = model_exporter.LearnedModel() lm.write_model(model, 'tmp/learned_model.csv') mm = model_exporter.MemorizedModel() mm.write_features([], 'tmp/memorized_model.csv') # Setup feature selector and such. orig_cols = ['gender', 'height'] s0 = seti.create_seti(5.0, bfs=[('gender', 'm')], cfs=[('height', 6.0)]) s1 = seti.create_seti(3.0, bfs=[('gender', 'f')], cfs=[('height', 3.0)]) setis = [s0, s1] model_types = [model_cfg.LINEAR_REGRESSION, model_cfg.LOGISTIC_REGRESSION] transforms = [lambda x: x, lambda x: 1 / (1 + math.exp(-x))] # Test the actual model. for mIndex in xrange(len(model_types)): model_config = model_cfg.ModelConfig( 'v0', 'tmp/learned_model.csv', 'tmp/memorized_model.csv', model_cfg.ColsCfg(orig_cols, orig_cols), 'tmp/feature_map_v0.csv', 'tmp/feature_map2_v0.csv', model_type=model_types[mIndex]) training_data.write_feature_maps_from_seti(model_config, setis) ss = seti_server.make_from_config([model_config]) w0 = model[':'] + model['gender_MISSING'] * 0 + model['gender_f'] * 0 + model['height'] * 6.0 w1 = model[':'] + model['gender_MISSING'] * 0 + model['gender_f'] * 1 + model['height'] * 3.0 wants = [w0, w1] for i in xrange(len(setis)): setie = setis[i] want_after_transform = transforms[mIndex](wants[i]) assertEquals(want_after_transform, ss.score(setie))
def testFVManyCategorical(): s0 = seti.create_seti(1.0, bfs=[('dir', 'north')], cfs=[('dist', 6.0)]) s1 = seti.create_seti(1.0, bfs=[('dir', 'south')], cfs=[('dist', 5.0)]) s2 = seti.create_seti(1.0, bfs=[('dir', 'east')], cfs=[('dist', 4.0)]) s3 = seti.create_seti(1.0, bfs=[('dir', 'west')], cfs=[('dist', 3.0)]) s4 = seti.create_seti(1.0, bfs=[], cfs=[('dist', 2.0)]) setis = [s0, s1, s2, s3, s4] fvs = [[0, 0, 0, 0, 6.0], [0, 1, 0, 0, 5.0], [0, 0, 1, 0, 4.0], [0, 0, 0, 1, 3.0], [1, 0, 0, 0, 2.0]] orig_cols = ['dir', 'dist'] fs = feature_selector.FeatureSelect() fs.generate_feature_map(orig_cols, setis) #l = learner.Learner(fs) assertEquals( ['dir_MISSING', 'dir_south', 'dir_east', 'dir_west', 'dist'], fs.all_col_names) for i in xrange(len(setis)): setie = setis[i] fv = fvs[i] assertEquals(fv, seti.float_feature_vector(fs, setie))
def testRun(): # Setup feature selector and such. orig_cols = ['gender', 'height'] s0 = seti.create_seti(1.0, bfs=[('gender', 'm')], cfs=[('height', 6.0)]) s1 = seti.create_seti(0.0, bfs=[('gender', 'f')], cfs=[('height', 3.0)]) setis = [s0, s1] model_config = model_cfg.ModelConfig( 'v0', 'tmp/learned_model.csv', 'tmp/memorized_model.csv', model_cfg.ColsCfg( orig_cols, orig_cols), 'tmp/feature_map_v0.csv', 'tmp/feature_map2_v0.csv') run_pipeline.run([model_config], setis) model = { 'gender_MISSING': 0.0, 'gender_f': -0.099999999999999992, ':': -0.7999999999999996, 'height': 0.29999999999999993 } # Test model gets created and loaded. # Test that we can score one example. ss = seti_server.make_from_config([model_config]) # Test the learned model works. #print 'Learned model: ' #print ss.model_map['v0'].learned_model s2 = seti.create_seti(5.0, bfs=[('gender', 'm')], cfs=[('height', 2.0)]) val0 = model[':'] + model['gender_MISSING'] * 0 + model['gender_f'] * 0 + model['height'] * 2.0 assertFloatEquals(val0, ss.score(s2)) # Test the memorized model works. Destroy the learned model. ss.model_map['v0'].learned_model = {} val1 = model[':'] + model['gender_MISSING'] * 0 + model['gender_f'] * 1 + model['height'] * 3.0 assertFloatEquals(val1, ss.score(s1))