Beispiel #1
0
  def test_iris(self):
    num_constraints = 200

    n = self.iris_points.shape[0]
    C = ITML.prepare_constraints(self.iris_labels, n, num_constraints)
    itml = ITML(self.iris_points, C)
    itml.fit(verbose=False)

    csep = class_separation(itml.transform(), self.iris_labels)
    self.assertLess(csep, 0.4)  # it's not great
Beispiel #2
0
def sandwich_demo():
  x, y = sandwich_data()
  knn = nearest_neighbors(x, k=2)
  ax = pyplot.subplot(3, 1, 1)  # take the whole top row
  plot_sandwich_data(x, y, ax)
  plot_neighborhood_graph(x, knn, y, ax)
  ax.set_title('input space')
  ax.set_aspect('equal')
  ax.set_xticks([])
  ax.set_yticks([])

  num_constraints = 60
  mls = [
    LMNN(x, y),
    ITML(x, ITML.prepare_constraints(y, len(x), num_constraints)),
    SDML(x, SDML.prepare_constraints(y, len(x), num_constraints)),
    LSML(x, LSML.prepare_constraints(y, num_constraints))
  ]

  for ax_num, ml in zip(xrange(3,7), mls):
    ml.fit()
    tx = ml.transform()
    ml_knn = nearest_neighbors(tx, k=2)
    ax = pyplot.subplot(3,2,ax_num)
    plot_sandwich_data(tx, y, ax)
    plot_neighborhood_graph(tx, ml_knn, y, ax)
    ax.set_title('%s space' % ml.__class__.__name__)
    ax.set_xticks([])
    ax.set_yticks([])
  pyplot.show()
Beispiel #3
0
def applyAlgo(algo, p, Xtrain, Ytrain, Xtest, Ytest):
    if a.startswith("o"):
        nbMinority = len(Xtrain[Ytrain == minClass])
        if nbMinority <= 5:
            sm = SMOTE(random_state=42, k_neighbors=nbMinority-1)
        else:
            sm = SMOTE(random_state=42)
        Xtrain2, Ytrain2 = sm.fit_sample(Xtrain, Ytrain)
    elif a.startswith("u"):
        rus = RandomUnderSampler(random_state=42)
        Xtrain2, Ytrain2 = rus.fit_sample(Xtrain, Ytrain)
    else:
        Xtrain2, Ytrain2 = Xtrain, Ytrain

    scaler = StandardScaler()
    scaler.fit(Xtrain2)
    Xtrain2 = scaler.transform(Xtrain2)
    Xtest = scaler.transform(Xtest)

    if algo.endswith("IML"):
        ml = IML(pClass=minClass, k=K, m=p["m"], Lambda=p["Lambda"], a=p["a"])
    elif algo.endswith("LMNN"):
        ml = LMNN(k=K, mu=p["mu"], randomState=np.random.RandomState(1))
    elif algo.endswith("GMML"):
        ml = GMML(t=p["t"], randomState=np.random.RandomState(1))
    elif algo.endswith("ITML"):
        ml = ITML(gamma=p["gamma"], randomState=np.random.RandomState(1))

    if not algo.endswith("Euclidean"):
        ml.fit(Xtrain2, Ytrain2)
        Xtrain2 = ml.transform(Xtrain2)
        Xtest = ml.transform(Xtest)

    # Apply kNN to predict classes of test examples
    Ytrain_pred = knnSame(K, Xtrain2, Ytrain2)
    Ytest_pred = knn(K, Xtrain2, Ytrain2, Xtest)

    perf = {}
    for true, pred, name in [(Ytrain2, Ytrain_pred, "train"),
                             (Ytest, Ytest_pred, "test")]:
        # Compute performance measures by comparing prediction with true labels
        tn, fp, fn, tp = confusion_matrix(true, pred,
                                          labels=[majClass, minClass]).ravel()
        perf[name] = ((int(tn), int(fp), int(fn), int(tp)))

    return perf
Beispiel #4
0
def applyAlgo(algo, p, Xtrain, Ytrain, Xtest, Ytest):
    Xtrain2, Ytrain2 = Xtrain, Ytrain

    scaler = StandardScaler()
    scaler.fit(Xtrain2)
    Xtrain2 = scaler.transform(Xtrain2)
    Xtest = scaler.transform(Xtest)

    if algo == "IML":
        ml = IML(pClass=minClass, k=K, m=p["m"], Lambda=p["Lambda"], a=p["a"])
    elif algo == "ML2":
        ml = ML2(pClass=minClass, k=K, m=p["m"], Lambda=p["Lambda"], a=p["a"])
    elif algo == "ML1":
        ml = ML1(num_const=2*K*len(Xtrain),
                 m=p["m"], Lambda=p["Lambda"], a=p["a"])
    elif algo == "LMNN":
        ml = LMNN(k=K, mu=p["mu"], randomState=np.random.RandomState(1))
    elif algo == "GMML":
        ml = GMML(t=p["t"], randomState=np.random.RandomState(1))
    elif algo == "ITML":
        ml = ITML(gamma=p["gamma"], randomState=np.random.RandomState(1))

    if not algo.endswith("Euclidean"):
        ml.fit(Xtrain2, Ytrain2)
        Xtrain2 = ml.transform(Xtrain2)
        Xtest = ml.transform(Xtest)

    # Apply kNN to predict classes of test examples
    Ytrain_pred = knnSame(K, Xtrain2, Ytrain2)
    Ytest_pred = knn(K, Xtrain2, Ytrain2, Xtest)

    perf = {}
    for true, pred, name in [(Ytrain2, Ytrain_pred, "train"),
                             (Ytest, Ytest_pred, "test")]:
        # Compute performance measures by comparing prediction with true labels
        tn, fp, fn, tp = confusion_matrix(true, pred,
                                          labels=[majClass, minClass]).ravel()
        perf[name] = ((int(tn), int(fp), int(fn), int(tp)))

    return perf