def testFVSimple(): orig_cols = ['gender', 'height'] s0 = seti.create_seti(1.0, bfs=[('gender', 'm')], cfs=[('height', 6.0)]) s1 = seti.create_seti(0.0, bfs=[('gender', 'f')], cfs=[('height', 3.0)]) setis = [s0, s1] fs = feature_selector.FeatureSelect() fs.generate_feature_map(orig_cols, setis) assertEquals(['gender_MISSING', 'gender_f', 'height'], fs.all_col_names) fvs = [[0, 0, 6.0], [0, 1, 3.0]] for i in xrange(len(setis)): setie = setis[i] fv = fvs[i] assertEquals(fv, seti.float_feature_vector(fs, setie))
def testFVManyCategorical(): s0 = seti.create_seti(1.0, bfs=[('dir', 'north')], cfs=[('dist', 6.0)]) s1 = seti.create_seti(1.0, bfs=[('dir', 'south')], cfs=[('dist', 5.0)]) s2 = seti.create_seti(1.0, bfs=[('dir', 'east')], cfs=[('dist', 4.0)]) s3 = seti.create_seti(1.0, bfs=[('dir', 'west')], cfs=[('dist', 3.0)]) s4 = seti.create_seti(1.0, bfs=[], cfs=[('dist', 2.0)]) setis = [s0, s1, s2, s3, s4] fvs = [[0, 0, 0, 0, 6.0], [0, 1, 0, 0, 5.0], [0, 0, 1, 0, 4.0], [0, 0, 0, 1, 3.0], [1, 0, 0, 0, 2.0]] orig_cols = ['dir', 'dist'] fs = feature_selector.FeatureSelect() fs.generate_feature_map(orig_cols, setis) #l = learner.Learner(fs) assertEquals( ['dir_MISSING', 'dir_south', 'dir_east', 'dir_west', 'dist'], fs.all_col_names) for i in xrange(len(setis)): setie = setis[i] fv = fvs[i] assertEquals(fv, seti.float_feature_vector(fs, setie))
def learn(self, setis): # Determine the original columns to use. feature_cols = self.fs.all_col_names # Convert SETI inputs into X and y format. y = [] X = [] for setie in setis: if setie.for_holdout: self.holdout_setis.append(setie) continue x = seti.float_feature_vector(self.fs, setie) y.append(setie.label) X.append(x) lm = LinearRegression() lm.fit(X, y) #print lm.intercept_ col_and_coeffs = zip(feature_cols, lm.coef_) model = {} model[':'] = float(lm.intercept_) for (col, coeff) in col_and_coeffs: model[col] = coeff self.model = dict([(k, float(v)) for k, v in model.iteritems()]) return model