Example #1
0
def apply_NN(trainX, trainY, testX, testY, window, source_pos, target_pos):
    # Decision Tree
    print("\n Nearest-neighbour-based weighting (Loog, 2015)    ")
    classifier = ImportanceWeightedClassifier(iwe='nn', loss="dtree")
    classifier.fit(trainX, trainY, testX)
    pred_naive = classifier.predict(testX)
    acc_DT_NN, acc_DT_NN_INFO = check_accuracy(testY, pred_naive)
    # Logistic Regression
    print("\n Nearest-neighbour-based weighting (Loog, 2015)    ")
    classifier = ImportanceWeightedClassifier(iwe='nn', loss="logistic")
    classifier.fit(trainX, trainY, testX)
    pred_naive = classifier.predict(testX)
    acc_LR_NN, acc_LR_NN_INFO = check_accuracy(testY, pred_naive)
    # Naive Bayes Bernoulli
    print("\n Nearest-neighbour-based weighting (Loog, 2015)    ")
    classifier = ImportanceWeightedClassifier(iwe='nn', loss="berno")
    classifier.fit(trainX, trainY, testX)
    pred_naive = classifier.predict(testX)
    acc_NB_NN, acc_NB_NN_INFO = check_accuracy(testY, pred_naive)
    #
    return pd.DataFrame(
            [{ 
            'window': window,
            'source_position': source_pos,
            'target_position': target_pos,

            'acc_LR_NN': acc_LR_NN,
            'acc_LR_NN_INFO': str(acc_LR_NN_INFO),                                              
            'acc_DT_NN': acc_DT_NN,
            'acc_DT_NN_INFO': str(acc_DT_NN_INFO),                                                                            
            'acc_NB_NN': acc_NB_NN,
            'acc_NB_NN_INFO': str(acc_NB_NN_INFO),                                                
            }]
        )
Example #2
0
def apply_KMM(trainX, trainY, testX, testY, window, source_pos, target_pos):
    # Decision Tree
    print("\n Kernel Mean Matching (Huang et al., 2006) ")
    classifier = ImportanceWeightedClassifier(iwe='kmm', loss="dtree")
    classifier.fit(trainX, trainY, testX)
    pred_naive = classifier.predict(testX)
    acc_DT_KMM, acc_DT_KMM_INFO = check_accuracy(testY, pred_naive)
    # Logistic Regression
    classifier = ImportanceWeightedClassifier(iwe='kmm', loss="logistic")
    classifier.fit(trainX, trainY, testX)
    pred_naive = classifier.predict(testX)
    acc_LR_KMM, acc_LR_KMM_INFO = check_accuracy(testY, pred_naive)
    # Naive Bayes Bernoulli
    classifier = ImportanceWeightedClassifier(iwe='kmm', loss="berno")
    classifier.fit(trainX, trainY, testX)
    pred_naive = classifier.predict(testX)
    acc_NB_KMM, acc_NB_KMM_INFO = check_accuracy(testY, pred_naive)
    #
    return pd.DataFrame(
            [{ 
            'window': window,
            'source_position': source_pos,
            'target_position': target_pos,

            'acc_LR_KMM': acc_LR_KMM,
            'acc_LR_KMM_INFO': str(acc_LR_KMM_INFO),                                              
            'acc_DT_KMM': acc_DT_KMM,
            'acc_DT_KMM_INFO': str(acc_DT_KMM_INFO),                                                                            
            'acc_NB_KMM': acc_NB_KMM,
            'acc_NB_KMM_INFO': str(acc_NB_KMM_INFO),                                                
            }]
        )
Example #3
0
def test_predict():
    """Test for making predictions."""
    X = rnd.randn(10, 2)
    y = np.hstack((-np.ones((5,)), np.ones((5,))))
    Z = rnd.randn(10, 2) + 1
    clf = ImportanceWeightedClassifier()
    clf.fit(X, y, Z)
    u_pred = clf.predict(Z)
    labels = np.unique(y)
    assert len(np.setdiff1d(np.unique(u_pred), labels)) == 0
Example #4
0
def run_6():
    X = np.random.randn(10, 2)
    y = np.vstack((-np.ones((5, )), np.ones((5, ))))
    Z = np.random.randn(10, 2)

    from libtlda.iw import ImportanceWeightedClassifier
    clf = ImportanceWeightedClassifier(loss='quadratic', iwe='kmm')

    clf.fit(X, y, Z)
    u_pred = clf.predict(Z)

    print(u_pred)
Example #5
0
    # Classifier based on subspace alignment
    clf = StructuralCorrespondenceClassifier(num_pivots=2, num_components=1)

elif classifier == 'rba':
    # Robust bias-aware classifier
    clf = RobustBiasAwareClassifier(l2=0.1, max_iter=1000)

elif classifier == 'flda':
    # Feature-level domain-adaptive classifier
    clf = FeatureLevelDomainAdaptiveClassifier(l2=0.1, max_iter=1000)

elif classifier == 'tcpr':
    # Target Contrastive Pessimistic Classifier
    clf = TargetContrastivePessimisticClassifier(l2=0.1, tolerance=1e-20)

else:
    raise ValueError('Classifier not recognized.')

# Train classifier
clf.fit(X, y, Z)

# Make predictions
pred_adapt = clf.predict(Z)

# Compute error rates
err_naive = np.mean(pred_naive != u, axis=0)
err_adapt = np.mean(pred_adapt != u, axis=0)

# Report results
print('Error naive: ' + str(err_naive))
print('Error adapt: ' + str(err_adapt))
Example #6
0
        # Split out test data
        tst_X = X[tst_index, :]
        tst_Y = Y[tst_index]

        # Define classifiers
        clf_n = linear_model.LogisticRegression(C=0.1)
        clf_a = ImportanceWeightedClassifier(loss='logistic', l2=0.1)

        # Train classifier on data from current and previous days
        clf_n.fit(trn_X, trn_Y)
        clf_a.fit(trn_X, trn_Y, tst_X)

        # Make predictions
        preds_n = clf_n.predict(tst_X)
        preds_a = clf_a.predict(tst_X)

        # Test on data from current day and store
        perf_n.append(np.mean(preds_n != tst_Y))
        perf_a.append(np.mean(preds_a != tst_Y))

        # Store day and rumour
        days_array.append(days[d])
        rums_array.append(rumour)

# Compact to DataFrame
performance = pd.DataFrame({
    'err_n': perf_n,
    'err_a': perf_a,
    'days': days_array,
    'rumours': rums_array
Example #7
0
def build_models(trainX, trainY, testX, testY, source_pos, target_pos, window):
    #######################
    ### SEMI-SUPERVISED ###
    ########################
    # Label Propagation
    label_prop_model = LabelPropagation(kernel='knn')
    label_prop_model.fit(trainX, trainY)
    Y_Pred = label_prop_model.predict(testX)
    acc_ss_propagation, acc_ss_propagation_INFO = checkAccuracy(testY, Y_Pred)
    # Label Spreading
    label_prop_models_spr = LabelSpreading(kernel='knn')
    label_prop_models_spr.fit(trainX, trainY)
    Y_Pred = label_prop_models_spr.predict(testX)
    acc_ss_spreading, acc_ss_spreading_INFO = checkAccuracy(testY, Y_Pred)
    ########################
    #### WITHOUT TL ########
    ########################
    # LogisticRegression
    modelLR = LogisticRegression()
    modelLR.fit(trainX, trainY)
    predLR = modelLR.predict(testX)
    accLR, acc_LR_INFO = checkAccuracy(testY, predLR)
    # DecisionTreeClassifier
    modelDT = tree.DecisionTreeClassifier()
    modelDT.fit(trainX, trainY)
    predDT = modelDT.predict(testX)
    accDT, acc_DT_INFO = checkAccuracy(testY, predDT)
    # BernoulliNB
    modelNB = BernoulliNB()
    modelNB.fit(trainX, trainY)
    predND = modelNB.predict(testX)
    accNB, acc_NB_INFO = checkAccuracy(testY, predND)
    #
    print("WITHOUT TL ACC_LR:", accLR, " ACC_DT:", accDT, " ACC_NB:", accNB)
    ########################
    #### WITH TL ########
    ########################

    ####################################################
    ### Kernel Mean Matching (Huang et al., 2006)
    ###
    # Decision Tree
    print("\n Kernel Mean Matching (Huang et al., 2006) ")
    classifier = ImportanceWeightedClassifier(iwe='kmm', loss="dtree")
    classifier.fit(trainX, trainY, testX)
    pred_naive = classifier.predict(testX)
    acc_DT_KMM, acc_DT_KMM_INFO = checkAccuracy(testY, pred_naive)
    print("ACC:", acc_DT_KMM)
    # Logistic Regression
    classifier = ImportanceWeightedClassifier(iwe='kmm', loss="logistic")
    classifier.fit(trainX, trainY, testX)
    pred_naive = classifier.predict(testX)
    acc_LR_KMM, acc_LR_KMM_INFO = checkAccuracy(testY, pred_naive)
    print("ACC:", acc_LR_KMM)
    # Naive Bayes Bernoulli
    classifier = ImportanceWeightedClassifier(iwe='kmm', loss="berno")
    classifier.fit(trainX, trainY, testX)
    pred_naive = classifier.predict(testX)
    acc_NB_KMM, acc_NB_KMM_INFO = checkAccuracy(testY, pred_naive)
    print("ACC:", acc_NB_KMM)
    ####################################################
    ### Nearest-neighbour-based weighting (Loog, 2015)
    ###
    # Decision Tree
    print("\n Nearest-neighbour-based weighting (Loog, 2015)    ")
    classifier = ImportanceWeightedClassifier(iwe='nn', loss="dtree")
    classifier.fit(trainX, trainY, testX)
    pred_naive = classifier.predict(testX)
    acc_DT_NN, acc_DT_NN_INFO = checkAccuracy(testY, pred_naive)
    print("ACC:", acc_DT_NN)
    # Logistic Regression
    print("\n Nearest-neighbour-based weighting (Loog, 2015)    ")
    classifier = ImportanceWeightedClassifier(iwe='nn', loss="logistic")
    classifier.fit(trainX, trainY, testX)
    pred_naive = classifier.predict(testX)
    acc_LR_NN, acc_LR_NN_INFO = checkAccuracy(testY, pred_naive)
    print("ACC:", acc_LR_NN)
    # Naive Bayes Bernoulli
    print("\n Nearest-neighbour-based weighting (Loog, 2015)    ")
    classifier = ImportanceWeightedClassifier(iwe='nn', loss="berno")
    classifier.fit(trainX, trainY, testX)
    pred_naive = classifier.predict(testX)
    acc_NB_NN, acc_NB_NN_INFO = checkAccuracy(testY, pred_naive)
    print("ACC:", acc_NB_NN)

    ####################################################
    ### Transfer Component Analysis (Pan et al, 2009)
    ###
    # Decision Tree
    print("\n Transfer Component Analysis (Pan et al, 2009)")
    classifier = TransferComponentClassifier(loss="dtree", num_components=6)
    classifier.fit(trainX, trainY, testX)
    pred_naive = classifier.predict(testX)
    acc_DT_TCA, acc_DT_TCA_INFO = checkAccuracy(testY, pred_naive)
    print("ACC:", acc_DT_TCA)
    # Logistic Regression
    classifier = TransferComponentClassifier(loss="logistic", num_components=6)
    classifier.fit(trainX, trainY, testX)
    pred_naive = classifier.predict(testX)
    acc_LR_TCA, acc_LR_TCA_INFO = checkAccuracy(testY, pred_naive)
    print("ACC:", acc_LR_TCA)
    # Naive Bayes Bernoulli
    classifier = TransferComponentClassifier(loss="berno", num_components=6)
    classifier.fit(trainX, trainY, testX)
    pred_naive = classifier.predict(testX)
    acc_NB_TCA, acc_NB_TCA_INFO = checkAccuracy(testY, pred_naive)
    print("ACC:", acc_NB_TCA)

    ####################################################
    ### Subspace Alignment (Fernando et al., 2013)
    ###
    # Decision Tree
    print("\n Subspace Alignment (Fernando et al., 2013) ")
    classifier = SubspaceAlignedClassifier(loss="dtree")
    classifier.fit(trainX, trainY, testX)
    pred_naive = classifier.predict(testX)
    acc_DT_SA, acc_DT_SA_INFO = checkAccuracy(testY, pred_naive)
    print("ACC:", acc_DT_SA)
    # Logistic Regression
    print("\n Subspace Alignment (Fernando et al., 2013) ")
    classifier = SubspaceAlignedClassifier(loss="logistic")
    classifier.fit(trainX, trainY, testX)
    pred_naive = classifier.predict(testX)
    acc_LR_SA, acc_LR_SA_INFO = checkAccuracy(testY, pred_naive)
    print("ACC:", acc_LR_SA)
    # Naive Bayes Bernoulli
    print("\n Subspace Alignment (Fernando et al., 2013) ")
    classifier = SubspaceAlignedClassifier(loss="berno")
    classifier.fit(trainX, trainY, testX)
    pred_naive = classifier.predict(testX)
    acc_NB_SA, acc_NB_SA_INFO = checkAccuracy(testY, pred_naive)
    print("ACC:", acc_NB_SA)
    #################################
    ############# ENSEMBLE ##########
    #################################
    classifier_SA_DT = SubspaceAlignedClassifier(loss="dtree")
    classifier_SA_LR = SubspaceAlignedClassifier(loss="logistic")
    classifier_SA_NB = SubspaceAlignedClassifier(loss="berno")
    classifier_TCA_DT = TransferComponentClassifier(loss="dtree")
    classifier_TCA_LR = TransferComponentClassifier(loss="logistic")
    classifier_TCA_NB = TransferComponentClassifier(loss="berno")
    classifier_NN_DT = ImportanceWeightedClassifier(iwe='nn', loss="dtree")
    classifier_NN_LR = ImportanceWeightedClassifier(iwe='nn', loss="logistic")
    classifier_NN_NB = ImportanceWeightedClassifier(iwe='nn', loss="berno")
    classifier_KMM_DT = ImportanceWeightedClassifier(iwe='kmm', loss="dtree")
    classifier_KMM_LR = ImportanceWeightedClassifier(iwe='kmm',
                                                     loss="logistic")
    classifier_KMM_NB = ImportanceWeightedClassifier(iwe='kmm', loss="berno")
    #
    eclf = EnsembleClassifier(
        clfs=[classifier_TCA_DT, classifier_NN_DT, classifier_KMM_DT])
    eclf.fit(trainX, trainY, testX)
    pred = eclf.predict_v2(testX)
    acc_ENSEMBLE, acc_ENSEMBLE_INFO = checkAccuracy(testY, pred)

    ########################
    #### RETURN ########
    ########################
    return pd.DataFrame([{
        'window': window,
        'source_position': source_pos,
        'target_position': target_pos,
        'acc_SS_propagation': acc_ss_propagation,
        'acc_SS_propagation_INFO': acc_ss_propagation_INFO,
        'acc_SS_spreading': acc_ss_spreading,
        'acc_SS_spreading_INFO': acc_ss_spreading_INFO,
        'acc_ENSEMBLE': acc_ENSEMBLE,
        'acc_LR': accLR,
        'acc_LR_INFO': str(acc_LR_INFO),
        'acc_DT': accDT,
        'acc_DT_INFO': str(acc_DT_INFO),
        'acc_NB': accNB,
        'acc_NB_INFO': str(acc_NB_INFO),
        'acc_LR_KMM': acc_LR_KMM,
        'acc_LR_KMM_INFO': str(acc_LR_KMM_INFO),
        'acc_LR_NN': acc_LR_NN,
        'acc_LR_NN_INFO': str(acc_LR_NN_INFO),
        'acc_LR_TCA': acc_LR_TCA,
        'acc_LR_TCA_INFO': str(acc_LR_TCA_INFO),
        'acc_LR_SA': acc_LR_SA,
        'acc_LR_SA_INFO': str(acc_LR_SA_INFO),
        'acc_DT_KMM': acc_DT_KMM,
        'acc_DT_KMM_INFO': str(acc_DT_KMM_INFO),
        'acc_DT_NN': acc_DT_NN,
        'acc_DT_NN_INFO': str(acc_DT_NN_INFO),
        'acc_DT_TCA': acc_DT_TCA,
        'acc_DT_TCA_INFO': str(acc_DT_TCA_INFO),
        'acc_DT_SA': acc_DT_SA,
        'acc_DT_SA_INFO': str(acc_DT_SA_INFO),
        'acc_NB_KMM': acc_NB_KMM,
        'acc_NB_KMM_INFO': str(acc_NB_KMM_INFO),
        'acc_NB_NN': acc_NB_NN,
        'acc_NB_NN_INFO': str(acc_NB_NN_INFO),
        'acc_NB_TCA': acc_NB_TCA,
        'acc_NB_TCA_INFO': str(acc_NB_TCA_INFO),
        'acc_NB_SA': acc_NB_SA,
        'acc_NB_SA_INFO': str(acc_NB_SA_INFO)
    }])
Example #8
0
def run_5(n_samples=100):
    from libtlda.iw import ImportanceWeightedClassifier
    clf = ImportanceWeightedClassifier(loss='quadratic', iwe='kmm')

    # X ~ N(0.5, 0.5²)
    # Z ~ N(0.0, 0.3²)

    x = np.random.normal(0.5, 0.5**2, (n_samples, 1))
    z = np.random.normal(0, 0.3**2, (n_samples, 1))

    x_noise = np.random.normal(0, 0.07, (n_samples, 1))
    z_noise = np.random.normal(0, 0.03, (n_samples, 1))

    def data_func(var):
        return var**3 - var

    y = data_func(x)
    y = np.array(y)
    y = y.ravel()

    # + bruit
    X = x + x_noise
    Z = z + z_noise

    # distribution différente à approximer avec une distrib initiale
    y_bis = data_func(z)
    y_bis = np.array(y_bis)
    y_bis = y_bis.ravel()

    print(X.shape)
    print(y.shape)
    print(Z.shape)
    print(y_bis.shape)

    clf.fit(X, y, Z)
    preds = clf.predict(Z)
    print(np.linalg.norm(preds - Z))

    from sklearn.linear_model import LinearRegression
    clf_linear = LinearRegression()
    clf_linear.fit(Z, y_bis)
    true_coefs = clf_linear.coef_

    # print(clf.get_weights())

    print(preds)

    # plot facilities
    x_range = np.linspace(-0.4, 1.2, 100)
    kmm_line = x_range * preds
    true_line = x_range * true_coefs

    plt.axis([-0.4, 1.2, -0.5, 1])
    plt.scatter(X, y, label='X points', color='blue', marker='o')
    plt.plot(x_range, data_func(x_range), label='X distribution', color='blue')

    plt.scatter(Z, y_bis, label='Z points', color='red', marker='+')
    plt.plot(x_range, kmm_line, label='Z kmm regression line', color='red')

    plt.plot(x_range, true_line, label='Z OLS line', color='black')
    plt.legend()
    plt.show()
    """