def testFVSimple():
  orig_cols = ['gender', 'height']

  s0 = seti.create_seti(1.0, bfs=[('gender', 'm')], cfs=[('height', 6.0)])
  s1 = seti.create_seti(0.0, bfs=[('gender', 'f')], cfs=[('height', 3.0)])
  setis = [s0, s1]

  fs = feature_selector.FeatureSelect()
  fs.generate_feature_map(orig_cols, setis)
  assertEquals(['gender_MISSING', 'gender_f', 'height'], fs.all_col_names)
  fvs = [[0, 0, 6.0], [0, 1, 3.0]]
  for i in xrange(len(setis)):
    setie = setis[i]
    fv = fvs[i]
    assertEquals(fv, seti.float_feature_vector(fs, setie))
def testFVManyCategorical():
  s0 = seti.create_seti(1.0, bfs=[('dir', 'north')], cfs=[('dist', 6.0)])
  s1 = seti.create_seti(1.0, bfs=[('dir', 'south')], cfs=[('dist', 5.0)])
  s2 = seti.create_seti(1.0, bfs=[('dir', 'east')], cfs=[('dist', 4.0)])
  s3 = seti.create_seti(1.0, bfs=[('dir', 'west')], cfs=[('dist', 3.0)])
  s4 = seti.create_seti(1.0, bfs=[], cfs=[('dist', 2.0)])
  setis = [s0, s1, s2, s3, s4]
  fvs = [[0, 0, 0, 0, 6.0], [0, 1, 0, 0, 5.0], [0, 0, 1, 0, 4.0],
        [0, 0, 0, 1, 3.0], [1, 0, 0, 0, 2.0]]

  orig_cols = ['dir', 'dist']
  fs = feature_selector.FeatureSelect()
  fs.generate_feature_map(orig_cols, setis)

  #l = learner.Learner(fs)
  assertEquals(
    ['dir_MISSING', 'dir_south', 'dir_east', 'dir_west', 'dist'],
    fs.all_col_names)

  for i in xrange(len(setis)):
    setie = setis[i]
    fv = fvs[i]
    assertEquals(fv, seti.float_feature_vector(fs, setie))
Example #3
0
  def learn(self, setis):
    # Determine the original columns to use.
    feature_cols = self.fs.all_col_names
    # Convert SETI inputs into X and y format.
    y = []
    X = []
    for setie in setis:
      if setie.for_holdout:
        self.holdout_setis.append(setie)
        continue
      x = seti.float_feature_vector(self.fs, setie)
      y.append(setie.label)
      X.append(x)

    lm = LinearRegression()
    lm.fit(X, y)
    #print lm.intercept_
    col_and_coeffs = zip(feature_cols, lm.coef_)
    model = {}
    model[':'] = float(lm.intercept_)
    for (col, coeff) in col_and_coeffs:
      model[col] = coeff
    self.model = dict([(k, float(v)) for k, v in model.iteritems()])
    return model