コード例 #1
0
ファイル: textMining_hash.py プロジェクト: ai-se/SMOTE
 def tune_learner(train_X):
   train_len = len(train_X)
   new_train_index = np.random.choice(range(train_len), train_len * 0.7)
   new_tune_index = list(set(range(train_len)) - set(new_train_index))
   new_train_X = train_X[new_train_index]
   new_train_Y = train_Y[new_train_index]
   new_tune_X = train_X[new_tune_index]
   new_tune_Y = train_Y[new_tune_index]
   clf = learner(new_train_X, new_train_Y, new_tune_X, new_tune_Y, goal)
   tuner = DE_Tune_ML(clf, clf.get_param(), target_class, goal)
   return tuner.Tune()
コード例 #2
0
ファイル: tester.py プロジェクト: pombreda/python-1
def test_unknown_data():
    n = 100
    l = 1
    d = int(np.ceil(n**(0.15)))
    rho = 0.01
    N = int(np.log(n)/rho**2)

    Y = generate_Y(n, l, d, rho, N)
    Gp, Hp = learner(n, l, d, rho, Y, np.zeros((l, n,n)), np.zeros((N,n)))
    print 'Learned the network'

    Yp = decoder(Gp, Hp)
    print 'learner: training error %.4f' % (l1_loss(Y, Yp))    
コード例 #3
0
ファイル: mnist.py プロジェクト: pombreda/python-1
def test_mnist():
    train_set, valid_set, test_set = generate_mnist(fname)
    Y, target = train_set
    Yt, targett = test_set
    #pdb.set_trace()
    #Y = np.concatenate((Y, 0*Y), axis=1)
    #Yt = np.concatenate((Yt, 0*Yt), axis=1)

    N, n = Y.shape
    l = 4
    d = int(np.ceil(n**(0.19)))
    rho = estimate_rho(l, d, Y)
    #N = int(np.log(n)/rho**2)
    
    if N >= Y.shape[0]:
        print 'samples N: %.d, Y: %.d' %(N, Y.shape[0])
    else:
        N = int(np.log(n)/rho**2)
        Y = Y[0:N,:]
        target = target[0:N]
        Yt = Yt[0:N,:]
        targett = targett[0:N]
    
    print n, l, d, rho, N
    Gp, Hp = learner(n, l, d, rho, Y)
    print 'Learned the network'

    Yp = decoder(Gp, Hp)
    print 'NN training error: %.4f' % l1_loss(Y, Yp)
    
    if 1:
        clf = svm.LinearSVC(loss='l2', penalty='l1', dual=False)
        clf.fit(Hp, target)
        targetp = clf.predict(Hp)
        print 'training error: %.4f' % (zero_one_loss(target, targetp))

        Htp = encoder(d, Gp, Yt)
        targettp = clf.predict(Htp)
        print 'test error: %.4f' % (zero_one_loss(targett, targettp))
コード例 #4
0
ファイル: tester.py プロジェクト: pombreda/python-1
def test_leaner():

    training_data, test_data = create_data()
    G, H, Y, n, l, d, rho, N = training_data
    Gt, Ht, Yt, nt, lt, dt, rhot, Nt = test_data
    
    print n, l, d, rho, N
    l = 1
    d = 2
    rho = estimate_rho(l, d, Y)
    
    Gp, Hp = learner(n, l, d, rho, Y, G, H)
    print 'Learned the network'

    Yp = decoder(Gp, Hp)
    print 'learner: training error %.4f' % (l1_loss(Y, Yp))

    Htp = encoder(d, Gp, Yt)
    Ytp = decoder(Gp, Htp)
    print 'learner: test error %.4f' % (l1_loss(Yt, Ytp))

    '''
コード例 #5
0
ファイル: textMining_hash.py プロジェクト: ai-se/SMOTE
def cross_val(pd_data, learner, target_class, goal, isWhat="", fold=5,
              repeats=2):
  """
  do 5-fold cross_validation
  """

  def tune_learner(train_X):
    train_len = len(train_X)
    new_train_index = np.random.choice(range(train_len), train_len * 0.7)
    new_tune_index = list(set(range(train_len)) - set(new_train_index))
    new_train_X = train_X[new_train_index]
    new_train_Y = train_Y[new_train_index]
    new_tune_X = train_X[new_tune_index]
    new_tune_Y = train_Y[new_tune_index]
    clf = learner(new_train_X, new_train_Y, new_tune_X, new_tune_Y, goal)
    tuner = DE_Tune_ML(clf, clf.get_param(), target_class, goal)
    return tuner.Tune()

  def tune_SMOTE(train_pd):

    train_len = len(train_pd)
    new_train_index = random.sample(train_pd.index, int(train_len * 0.7))
    new_train = train_pd.ix[new_train_index]
    if "_TunedSmote" in isWhat:
      new_train_X = new_train.ix[:, new_train.columns[:-1]].values
      new_train_Y = new_train.ix[:, new_train.columns[-1]].values
      new_tune = train_pd.drop(new_train_index)
      new_tune_X = new_tune.ix[:, new_tune.columns[:-1]].values
      new_tune_Y = new_tune.ix[:, new_tune.columns[-1]].values
      # clf = learner(new_train_X, new_train_Y, new_tune_X, new_tune_Y)
      A_smote = smote(new_train)
      num_range = [[int(A_smote.get_majority_num() * 0.5),
                    int(A_smote.get_majority_num() * 1.5)]] * (
                    A_smote.label_num - 1)
      params_to_tune = {"k": [2, 20], "up_to_num": num_range}
      # pdb.set_trace()
      tuner = DE_Tune_SMOTE(learner, smote, params_to_tune, new_train,
                            new_tune, target_class, goal)
      params = tuner.Tune()
      return params, new_train

  F = {}
  total_evaluation = 0
  for i in xrange(repeats):  # repeat 5 times here
    kf = StratifiedKFold(pd_data.ix[:, pd_data.columns[-1]].values, fold, shuffle=True)
    for train_index, test_index in kf:
      train_pd = pd_data.ix[train_index]
      test_pd = pd_data.ix[test_index]
      if "Smote" in isWhat:
        k = 5
        up_to_num = []
        if "_TunedSmote" in isWhat:
          params, train_pd = tune_SMOTE(train_pd)
          # use new training data not original, because some are used as tuning
          k = params["k"]
          up_to_num = params["up_to_num"]
        train_pd = smote(train_pd, k, up_to_num).run()

      train_X = train_pd.ix[:, train_pd.columns[:-1]].values
      train_Y = train_pd.ix[:, train_pd.columns[-1]].values
      test_X = test_pd.ix[:, test_pd.columns[:-1]].values
      test_Y = test_pd.ix[:, test_pd.columns[-1]].values
      params, evaluation = tune_learner(train_X) if "_TunedLearner" in isWhat else ({},0)
      F = learner(train_X, train_Y, test_X, test_Y, goal).learn(F, **params)
      total_evaluation +=evaluation
  # pdb.set_trace()
  avg_evaluation = total_evaluation / (repeats * fold)
  return avg_evaluation, F