예제 #1
0
def test_iwe_kernel_mean_matching():
    """Test for estimating through kernel mean matching."""
    X = rnd.randn(10, 2)
    Z = rnd.randn(10, 2) + 1
    clf = ImportanceWeightedClassifier()
    iw = clf.iwe_kernel_mean_matching(X, Z)
    assert np.all(iw >= 0)
예제 #2
0
def test_iwe_kernel_densities():
    """Test for estimating through kernel density estimation."""
    X = rnd.randn(10, 2)
    Z = rnd.randn(10, 2) + 1
    clf = ImportanceWeightedClassifier()
    iw = clf.iwe_kernel_densities(X, Z)
    assert np.all(iw >= 0)
예제 #3
0
def test_iwe_nearest_neighbours():
    """Test for estimating through nearest neighbours."""
    X = rnd.randn(10, 2)
    Z = rnd.randn(10, 2) + 1
    clf = ImportanceWeightedClassifier()
    iw = clf.iwe_nearest_neighbours(X, Z)
    assert np.all(iw >= 0)
예제 #4
0
def test_iwe_logistic_discrimination():
    """Test for estimating through logistic classifier."""
    X = rnd.randn(10, 2)
    Z = rnd.randn(10, 2) + 1
    clf = ImportanceWeightedClassifier()
    iw = clf.iwe_logistic_discrimination(X, Z)
    assert np.all(iw >= 0)
예제 #5
0
def test_iwe_ratio_Gaussians():
    """Test for estimating ratio of Gaussians."""
    X = rnd.randn(10, 2)
    Z = rnd.randn(10, 2) + 1
    clf = ImportanceWeightedClassifier()
    iw = clf.iwe_ratio_gaussians(X, Z)
    assert np.all(iw >= 0)
예제 #6
0
def test_fit():
    """Test for fitting the model."""
    X = rnd.randn(10, 2)
    y = np.hstack((-np.ones((5,)), np.ones((5,))))
    Z = rnd.randn(10, 2) + 1
    clf = ImportanceWeightedClassifier()
    clf.fit(X, y, Z)
    assert clf.is_trained
예제 #7
0
def test_predict():
    """Test for making predictions."""
    X = rnd.randn(10, 2)
    y = np.hstack((-np.ones((5,)), np.ones((5,))))
    Z = rnd.randn(10, 2) + 1
    clf = ImportanceWeightedClassifier()
    clf.fit(X, y, Z)
    u_pred = clf.predict(Z)
    labels = np.unique(y)
    assert len(np.setdiff1d(np.unique(u_pred), labels)) == 0
예제 #8
0
def test_regularization():
    """Test for fitting the model."""
    X = rnd.randn(10, 2)
    y = np.hstack((-np.ones((5, )), np.ones((5, ))))
    Z = rnd.randn(10, 2) + 1
    clf = ImportanceWeightedClassifier(loss_function='lr',
                                       l2_regularization=None)
    assert isinstance(clf.clf, LogisticRegressionCV)
    clf = ImportanceWeightedClassifier(loss_function='lr',
                                       l2_regularization=1.0)
    assert isinstance(clf.clf, LogisticRegression)
예제 #9
0
def run_6():
    X = np.random.randn(10, 2)
    y = np.vstack((-np.ones((5, )), np.ones((5, ))))
    Z = np.random.randn(10, 2)

    from libtlda.iw import ImportanceWeightedClassifier
    clf = ImportanceWeightedClassifier(loss='quadratic', iwe='kmm')

    clf.fit(X, y, Z)
    u_pred = clf.predict(Z)

    print(u_pred)
예제 #10
0
def main(source, target, model, target_train_ratio, random_state):

    params = {
        'source': source,
        'target': target,
        'target_train_ratio': target_train_ratio,
        'max_features': 5000,
        'random_state': random_state
    }

    params['partition'] = 'tr'
    tr_X, tr_y = get_data(AmazonDatasetCombined(**params))

    params['partition'] = 'te'
    te_X, te_y = get_data(AmazonDatasetCombined(**params))

    tr_y = tr_y.reshape(-1)
    te_y = te_y.reshape(-1)

    if model == 'lr':
        C = 0.2
        clf = LogisticRegression(solver='lbfgs', max_iter=1000, C=C)
        clf.fit(tr_X, tr_y)

    elif model == 'svm':
        C = 0.2
        clf = LinearSVC(C=C)
        clf.fit(tr_X, tr_y)

    elif model == 'kmm':
        clf = ImportanceWeightedClassifier(iwe='kmm')
        clf.fit(tr_X, tr_y, te_X)

    elif model == 'suba-lr':
        clf = SubspaceAlignedClassifier(loss='logistic')
        clf.fit(tr_X, tr_y, te_X)

    elif model == 'suba-hi':
        clf = SubspaceAlignedClassifier(loss='hinge')
        clf.fit(tr_X, tr_y, te_X)

    elif model == 'tca-lr':
        clf = TransferComponentClassifier(loss='logistic')
        clf.fit(tr_X, tr_y, te_X)

    elif model == 'tca-hi':
        clf = TransferComponentClassifier(loss='hinge')
        clf.fit(tr_X, tr_y, te_X)

    else:
        raise Exception('Unknown model called..')

    tr_score = accuracy_score(tr_y, clf.predict(tr_X))
    te_score = accuracy_score(te_y, clf.predict(te_X))

    return tr_score, te_score
예제 #11
0
def InitTransferClassifier(index,l,iw):
   if index==1:      
     clf = ImportanceWeightedClassifier(loss=l,iwe=iw)
   if index==2:  
     clf = TransferComponentClassifier()
   if index==3:  
     clf=SubspaceAlignedClassifier()
   if index==4:
     clf=StructuralCorrespondenceClassifier()             
   if index==5:  
     clf=RobustBiasAwareClassifier()
   if index==6:  
     clf=FeatureLevelDomainAdaptiveClassifier()
   if index==7:  
     clf=TargetContrastivePessimisticClassifier()        
  
   return clf
예제 #12
0
def model_build(classifier, trian_features, train_labels, test_features):
    if classifier == "IW":
        # pipe = make_pipeline(StandardScaler(), ImportanceWeightedClassifier(iwe='nn'))
        # param_grid = [{}]
        # model = GridSearchCV(pipe, param_grid, cv=3)
        # model.fit(trian_features, train_labels, test_features)
        # print(model.best_params_)

        model = ImportanceWeightedClassifier(iwe='kde')
        model.fit(preprocessing.scale(trian_features), train_labels,
                  preprocessing.scale(test_features))

    else:
        if classifier == "LR":
            pipe = make_pipeline(StandardScaler(), LogisticRegression())
            param_grid = [{'logisticregression__C': [1, 10, 100]}]
        elif classifier == "SVM":
            # pipe = make_pipeline(StandardScaler(), LinearSVC(random_state=0, tol=1e-5)) SVC(kernel='linear',probability=True)
            pipe = make_pipeline(StandardScaler(),
                                 SVC(kernel='linear', probability=True))
            param_grid = [{'svc__C': [0.01, 0.1, 1]}]
        elif classifier == "RF":
            pipe = make_pipeline(StandardScaler(),
                                 RandomForestClassifier(max_features='sqrt'))
            param_grid = {
                'randomforestclassifier__n_estimators': range(230, 300, 10),
                'randomforestclassifier__max_depth': range(8, 12, 1),
                'randomforestclassifier__min_samples_leaf': range(1, 5, 1),
                'randomforestclassifier__max_features': range(1, 20, 1)

                # 'learning_rate': np.linspace(0.01, 2, 20),
                # 'subsample': np.linspace(0.7, 0.9, 20),
                # 'colsample_bytree': np.linspace(0.5, 0.98, 10),
                # 'min_child_weight': range(1, 9, 1)
            }
        model = GridSearchCV(pipe, param_grid, cv=3)
        model.fit(trian_features, train_labels)
        print(model.best_params_)
    # save the model
    model_file_name = classifier + time.strftime("%m%d-%H%M%S") + ".model"
    joblib.dump(filename=model_file_name, value=model)
    return model
예제 #13
0
def apply_ENSEMBLE(trainX, trainY, testX, testY, window, source_pos, target_pos):
    classifier_SA_DT = SubspaceAlignedClassifier(loss="dtree")
    classifier_SA_LR = SubspaceAlignedClassifier(loss="logistic")
    classifier_SA_NB = SubspaceAlignedClassifier(loss="berno")
    classifier_TCA_DT = TransferComponentClassifier(loss="dtree")
    classifier_TCA_LR = TransferComponentClassifier(loss="logistic")
    classifier_TCA_NB = TransferComponentClassifier(loss="berno")
    classifier_NN_DT = ImportanceWeightedClassifier(iwe='nn', loss="dtree")
    classifier_NN_LR = ImportanceWeightedClassifier(iwe='nn', loss="logistic")
    classifier_NN_NB = ImportanceWeightedClassifier(iwe='nn', loss="berno")
    classifier_KMM_DT = ImportanceWeightedClassifier(iwe='kmm', loss="dtree")
    classifier_KMM_LR = ImportanceWeightedClassifier(iwe='kmm', loss="logistic")
    classifier_KMM_NB = ImportanceWeightedClassifier(iwe='kmm', loss="berno")
    #
    eclf = EnsembleClassifier(clfs=[ 
        #classifier_SA_DT,
        #classifier_SA_LR,
        #classifier_SA_NB,

        #classifier_TCA_DT,
        #classifier_TCA_LR,
        classifier_TCA_NB,

        classifier_NN_DT,
        #classifier_NN_LR,
        #classifier_NN_NB,

        classifier_KMM_DT,
        classifier_KMM_LR,
        #classifier_KMM_NB
         ])
    eclf.fit(trainX, trainY, testX)
    pred = eclf.predict(testX)
    acc_ENSEMBLE, acc_ENSEMBLE_INFO = check_accuracy(testY, pred)
    #
    return pd.DataFrame(
        [{ 
        'window': window,
        'source_position': source_pos,
        'target_position': target_pos,

        'acc_ENSEMBLE': acc_ENSEMBLE,  
        'acc_ENSEMBLE_INFO': acc_ENSEMBLE_INFO,                                            
        }]
    )
예제 #14
0
#i = 0
#for pred_label in pred_LR:
#    print(pred_label, np.max(prob[i]))
#    i+=1

#.predict(testX)
#acc_LR_SA = checkAccuracy(testY, pred)
#print("ACC:", acc_LR_SA);
#
classifier_SA_DT = SubspaceAlignedClassifier(loss="dtree")
classifier_SA_LR = SubspaceAlignedClassifier(loss="logistic")
classifier_SA_NB = SubspaceAlignedClassifier(loss="berno")
classifier_TCA_DT = TransferComponentClassifier(loss="dtree")
classifier_TCA_LR = TransferComponentClassifier(loss="logistic")
classifier_TCA_NB = TransferComponentClassifier(loss="berno")
classifier_NN_DT = ImportanceWeightedClassifier(iwe='nn', loss="dtree")
classifier_NN_LR = ImportanceWeightedClassifier(iwe='nn', loss="logistic")
classifier_NN_NB = ImportanceWeightedClassifier(iwe='nn', loss="berno")
classifier_KMM_DT = ImportanceWeightedClassifier(iwe='kmm', loss="dtree")
classifier_KMM_LR = ImportanceWeightedClassifier(iwe='kmm', loss="logistic")
classifier_KMM_NB = ImportanceWeightedClassifier(iwe='kmm', loss="berno")
#
eclf = EnsembleClassifier(clfs=[
    classifier_SA_DT, classifier_SA_LR, classifier_SA_NB, classifier_TCA_LR,
    classifier_TCA_DT, classifier_TCA_NB, classifier_NN_DT, classifier_NN_LR,
    classifier_NN_NB, classifier_KMM_DT, classifier_KMM_LR, classifier_KMM_NB
],
                          weights=[1, 1])

eclf.fit(trainX, trainY, testX)
예제 #15
0
Z = np.concatenate((Z0, Z1), axis=0)
u = np.concatenate((labels[0] * np.ones(
    (M0, ), dtype='int'), labels[1] * np.ones((M1, ), dtype='int')),
                   axis=0)
"""Classifiers"""

# Train a naive logistic regressor
lr = LogisticRegression().fit(X, y)

# Make predictions
pred_naive = lr.predict(Z)

# Select adaptive classifier
if classifier == 'iw':
    # Call an importance-weighted classifier
    clf = ImportanceWeightedClassifier(iwe='lr', loss='logistic')

elif classifier == 'tca':
    # Classifier based on transfer component analysis
    clf = TransferComponentClassifier(loss='logistic', mu=1.)

elif classifier == 'suba':
    # Classifier based on subspace alignment
    clf = SubspaceAlignedClassifier(loss='logistic')

elif classifier == 'scl':
    # Classifier based on subspace alignment
    clf = StructuralCorrespondenceClassifier(num_pivots=2, num_components=1)

elif classifier == 'rba':
    # Robust bias-aware classifier
예제 #16
0
def test_init():
    """Test for object type."""
    clf = ImportanceWeightedClassifier()
    assert type(clf) == ImportanceWeightedClassifier
    assert not clf.is_trained
예제 #17
0
import numpy as np
from libtlda.iw import ImportanceWeightedClassifier

X = np.random.randn(10, 2)
# y = np.random.randint(10,)
y = np.array([1, 0, 1, 0, 1, 1, 0, 1, 0, 1])
Z = np.random.randn(10, 2)
ImportanceWeightedClassifier()
clf = ImportanceWeightedClassifier()
clf.fit(X, y, Z)
print(clf.predict_proba(Z))
예제 #18
0
               clf.fit(X,y,Y)
               probas_ = clf.predict(X)
               accuracy_org = sum((probas_>0.5)==y)/(1.0*nsamples)
               target_probas_ = clf.predict(Y)
               #print(classes_test.shape,Y.shape,n_test_samples)
               accuracy_targ = sum((target_probas_>0.5)==classes_test)/(1.0*n_test_samples)           
               print (subject,target_subj,index,l,iw,accuracy_org,accuracy_targ)

    elif mode==3:            # Evaluation of classifiers using the full training set as test set
       y = classes_train
       nsamples = y.shape[0]
       n_test_samples = classes_test.shape[0]
       for index in  [1,2,6,7,8,9,10,11,12,13]:  #range(1,n_classifiers):          
          for iw  in [0,1,2,3]:
             iwe = weighting_functions[iw] 
             w_clf = ImportanceWeightedClassifier(iwe=iwe)
             X = np.asarray(MEG_data_train)
             Y = np.asarray(MEG_data_test)
             w_clf.fit(X,y,Y)
             if iwe == 'lr':
                w = w_clf.iwe_logistic_discrimination(X, Y)
             elif iwe == 'rg':
                w = w_clf.iwe_ratio_gaussians(X, Y)
             elif iwe == 'nn':
                w = w_clf.iwe_nearest_neighbours(X, Y)
             elif iwe == 'kde':
                w = w_clf.iwe_kernel_densities(X, Y)
             elif iwe == 'kmm':
                w = w_clf.iwe_kernel_mean_matching(X, Y)
             else:
                raise NotImplementedError('Estimator not implemented.')
예제 #19
0
파일: main.py 프로젝트: myhrbeu/py-har
def build_models(trainX, trainY, testX, testY, source_pos, target_pos, window):
    #######################
    ### SEMI-SUPERVISED ###
    ########################
    # Label Propagation
    label_prop_model = LabelPropagation(kernel='knn')
    label_prop_model.fit(trainX, trainY)
    Y_Pred = label_prop_model.predict(testX)
    acc_ss_propagation, acc_ss_propagation_INFO = checkAccuracy(testY, Y_Pred)
    # Label Spreading
    label_prop_models_spr = LabelSpreading(kernel='knn')
    label_prop_models_spr.fit(trainX, trainY)
    Y_Pred = label_prop_models_spr.predict(testX)
    acc_ss_spreading, acc_ss_spreading_INFO = checkAccuracy(testY, Y_Pred)
    ########################
    #### WITHOUT TL ########
    ########################
    # LogisticRegression
    modelLR = LogisticRegression()
    modelLR.fit(trainX, trainY)
    predLR = modelLR.predict(testX)
    accLR, acc_LR_INFO = checkAccuracy(testY, predLR)
    # DecisionTreeClassifier
    modelDT = tree.DecisionTreeClassifier()
    modelDT.fit(trainX, trainY)
    predDT = modelDT.predict(testX)
    accDT, acc_DT_INFO = checkAccuracy(testY, predDT)
    # BernoulliNB
    modelNB = BernoulliNB()
    modelNB.fit(trainX, trainY)
    predND = modelNB.predict(testX)
    accNB, acc_NB_INFO = checkAccuracy(testY, predND)
    #
    print("WITHOUT TL ACC_LR:", accLR, " ACC_DT:", accDT, " ACC_NB:", accNB)
    ########################
    #### WITH TL ########
    ########################

    ####################################################
    ### Kernel Mean Matching (Huang et al., 2006)
    ###
    # Decision Tree
    print("\n Kernel Mean Matching (Huang et al., 2006) ")
    classifier = ImportanceWeightedClassifier(iwe='kmm', loss="dtree")
    classifier.fit(trainX, trainY, testX)
    pred_naive = classifier.predict(testX)
    acc_DT_KMM, acc_DT_KMM_INFO = checkAccuracy(testY, pred_naive)
    print("ACC:", acc_DT_KMM)
    # Logistic Regression
    classifier = ImportanceWeightedClassifier(iwe='kmm', loss="logistic")
    classifier.fit(trainX, trainY, testX)
    pred_naive = classifier.predict(testX)
    acc_LR_KMM, acc_LR_KMM_INFO = checkAccuracy(testY, pred_naive)
    print("ACC:", acc_LR_KMM)
    # Naive Bayes Bernoulli
    classifier = ImportanceWeightedClassifier(iwe='kmm', loss="berno")
    classifier.fit(trainX, trainY, testX)
    pred_naive = classifier.predict(testX)
    acc_NB_KMM, acc_NB_KMM_INFO = checkAccuracy(testY, pred_naive)
    print("ACC:", acc_NB_KMM)
    ####################################################
    ### Nearest-neighbour-based weighting (Loog, 2015)
    ###
    # Decision Tree
    print("\n Nearest-neighbour-based weighting (Loog, 2015)    ")
    classifier = ImportanceWeightedClassifier(iwe='nn', loss="dtree")
    classifier.fit(trainX, trainY, testX)
    pred_naive = classifier.predict(testX)
    acc_DT_NN, acc_DT_NN_INFO = checkAccuracy(testY, pred_naive)
    print("ACC:", acc_DT_NN)
    # Logistic Regression
    print("\n Nearest-neighbour-based weighting (Loog, 2015)    ")
    classifier = ImportanceWeightedClassifier(iwe='nn', loss="logistic")
    classifier.fit(trainX, trainY, testX)
    pred_naive = classifier.predict(testX)
    acc_LR_NN, acc_LR_NN_INFO = checkAccuracy(testY, pred_naive)
    print("ACC:", acc_LR_NN)
    # Naive Bayes Bernoulli
    print("\n Nearest-neighbour-based weighting (Loog, 2015)    ")
    classifier = ImportanceWeightedClassifier(iwe='nn', loss="berno")
    classifier.fit(trainX, trainY, testX)
    pred_naive = classifier.predict(testX)
    acc_NB_NN, acc_NB_NN_INFO = checkAccuracy(testY, pred_naive)
    print("ACC:", acc_NB_NN)

    ####################################################
    ### Transfer Component Analysis (Pan et al, 2009)
    ###
    # Decision Tree
    print("\n Transfer Component Analysis (Pan et al, 2009)")
    classifier = TransferComponentClassifier(loss="dtree", num_components=6)
    classifier.fit(trainX, trainY, testX)
    pred_naive = classifier.predict(testX)
    acc_DT_TCA, acc_DT_TCA_INFO = checkAccuracy(testY, pred_naive)
    print("ACC:", acc_DT_TCA)
    # Logistic Regression
    classifier = TransferComponentClassifier(loss="logistic", num_components=6)
    classifier.fit(trainX, trainY, testX)
    pred_naive = classifier.predict(testX)
    acc_LR_TCA, acc_LR_TCA_INFO = checkAccuracy(testY, pred_naive)
    print("ACC:", acc_LR_TCA)
    # Naive Bayes Bernoulli
    classifier = TransferComponentClassifier(loss="berno", num_components=6)
    classifier.fit(trainX, trainY, testX)
    pred_naive = classifier.predict(testX)
    acc_NB_TCA, acc_NB_TCA_INFO = checkAccuracy(testY, pred_naive)
    print("ACC:", acc_NB_TCA)

    ####################################################
    ### Subspace Alignment (Fernando et al., 2013)
    ###
    # Decision Tree
    print("\n Subspace Alignment (Fernando et al., 2013) ")
    classifier = SubspaceAlignedClassifier(loss="dtree")
    classifier.fit(trainX, trainY, testX)
    pred_naive = classifier.predict(testX)
    acc_DT_SA, acc_DT_SA_INFO = checkAccuracy(testY, pred_naive)
    print("ACC:", acc_DT_SA)
    # Logistic Regression
    print("\n Subspace Alignment (Fernando et al., 2013) ")
    classifier = SubspaceAlignedClassifier(loss="logistic")
    classifier.fit(trainX, trainY, testX)
    pred_naive = classifier.predict(testX)
    acc_LR_SA, acc_LR_SA_INFO = checkAccuracy(testY, pred_naive)
    print("ACC:", acc_LR_SA)
    # Naive Bayes Bernoulli
    print("\n Subspace Alignment (Fernando et al., 2013) ")
    classifier = SubspaceAlignedClassifier(loss="berno")
    classifier.fit(trainX, trainY, testX)
    pred_naive = classifier.predict(testX)
    acc_NB_SA, acc_NB_SA_INFO = checkAccuracy(testY, pred_naive)
    print("ACC:", acc_NB_SA)
    #################################
    ############# ENSEMBLE ##########
    #################################
    classifier_SA_DT = SubspaceAlignedClassifier(loss="dtree")
    classifier_SA_LR = SubspaceAlignedClassifier(loss="logistic")
    classifier_SA_NB = SubspaceAlignedClassifier(loss="berno")
    classifier_TCA_DT = TransferComponentClassifier(loss="dtree")
    classifier_TCA_LR = TransferComponentClassifier(loss="logistic")
    classifier_TCA_NB = TransferComponentClassifier(loss="berno")
    classifier_NN_DT = ImportanceWeightedClassifier(iwe='nn', loss="dtree")
    classifier_NN_LR = ImportanceWeightedClassifier(iwe='nn', loss="logistic")
    classifier_NN_NB = ImportanceWeightedClassifier(iwe='nn', loss="berno")
    classifier_KMM_DT = ImportanceWeightedClassifier(iwe='kmm', loss="dtree")
    classifier_KMM_LR = ImportanceWeightedClassifier(iwe='kmm',
                                                     loss="logistic")
    classifier_KMM_NB = ImportanceWeightedClassifier(iwe='kmm', loss="berno")
    #
    eclf = EnsembleClassifier(
        clfs=[classifier_TCA_DT, classifier_NN_DT, classifier_KMM_DT])
    eclf.fit(trainX, trainY, testX)
    pred = eclf.predict_v2(testX)
    acc_ENSEMBLE, acc_ENSEMBLE_INFO = checkAccuracy(testY, pred)

    ########################
    #### RETURN ########
    ########################
    return pd.DataFrame([{
        'window': window,
        'source_position': source_pos,
        'target_position': target_pos,
        'acc_SS_propagation': acc_ss_propagation,
        'acc_SS_propagation_INFO': acc_ss_propagation_INFO,
        'acc_SS_spreading': acc_ss_spreading,
        'acc_SS_spreading_INFO': acc_ss_spreading_INFO,
        'acc_ENSEMBLE': acc_ENSEMBLE,
        'acc_LR': accLR,
        'acc_LR_INFO': str(acc_LR_INFO),
        'acc_DT': accDT,
        'acc_DT_INFO': str(acc_DT_INFO),
        'acc_NB': accNB,
        'acc_NB_INFO': str(acc_NB_INFO),
        'acc_LR_KMM': acc_LR_KMM,
        'acc_LR_KMM_INFO': str(acc_LR_KMM_INFO),
        'acc_LR_NN': acc_LR_NN,
        'acc_LR_NN_INFO': str(acc_LR_NN_INFO),
        'acc_LR_TCA': acc_LR_TCA,
        'acc_LR_TCA_INFO': str(acc_LR_TCA_INFO),
        'acc_LR_SA': acc_LR_SA,
        'acc_LR_SA_INFO': str(acc_LR_SA_INFO),
        'acc_DT_KMM': acc_DT_KMM,
        'acc_DT_KMM_INFO': str(acc_DT_KMM_INFO),
        'acc_DT_NN': acc_DT_NN,
        'acc_DT_NN_INFO': str(acc_DT_NN_INFO),
        'acc_DT_TCA': acc_DT_TCA,
        'acc_DT_TCA_INFO': str(acc_DT_TCA_INFO),
        'acc_DT_SA': acc_DT_SA,
        'acc_DT_SA_INFO': str(acc_DT_SA_INFO),
        'acc_NB_KMM': acc_NB_KMM,
        'acc_NB_KMM_INFO': str(acc_NB_KMM_INFO),
        'acc_NB_NN': acc_NB_NN,
        'acc_NB_NN_INFO': str(acc_NB_NN_INFO),
        'acc_NB_TCA': acc_NB_TCA,
        'acc_NB_TCA_INFO': str(acc_NB_TCA_INFO),
        'acc_NB_SA': acc_NB_SA,
        'acc_NB_SA_INFO': str(acc_NB_SA_INFO)
    }])
예제 #20
0
def apply_KMM(trainX, trainY, testX, testY, window, source_pos, target_pos):
    # Decision Tree
    print("\n Kernel Mean Matching (Huang et al., 2006) ")
    classifier = ImportanceWeightedClassifier(iwe='kmm', loss="dtree")
    classifier.fit(trainX, trainY, testX)
    pred_naive = classifier.predict(testX)
    acc_DT_KMM, acc_DT_KMM_INFO = check_accuracy(testY, pred_naive)
    # Logistic Regression
    classifier = ImportanceWeightedClassifier(iwe='kmm', loss="logistic")
    classifier.fit(trainX, trainY, testX)
    pred_naive = classifier.predict(testX)
    acc_LR_KMM, acc_LR_KMM_INFO = check_accuracy(testY, pred_naive)
    # Naive Bayes Bernoulli
    classifier = ImportanceWeightedClassifier(iwe='kmm', loss="berno")
    classifier.fit(trainX, trainY, testX)
    pred_naive = classifier.predict(testX)
    acc_NB_KMM, acc_NB_KMM_INFO = check_accuracy(testY, pred_naive)
    #
    return pd.DataFrame(
            [{ 
            'window': window,
            'source_position': source_pos,
            'target_position': target_pos,

            'acc_LR_KMM': acc_LR_KMM,
            'acc_LR_KMM_INFO': str(acc_LR_KMM_INFO),                                              
            'acc_DT_KMM': acc_DT_KMM,
            'acc_DT_KMM_INFO': str(acc_DT_KMM_INFO),                                                                            
            'acc_NB_KMM': acc_NB_KMM,
            'acc_NB_KMM_INFO': str(acc_NB_KMM_INFO),                                                
            }]
        )
예제 #21
0
def run_5(n_samples=100):
    from libtlda.iw import ImportanceWeightedClassifier
    clf = ImportanceWeightedClassifier(loss='quadratic', iwe='kmm')

    # X ~ N(0.5, 0.5²)
    # Z ~ N(0.0, 0.3²)

    x = np.random.normal(0.5, 0.5**2, (n_samples, 1))
    z = np.random.normal(0, 0.3**2, (n_samples, 1))

    x_noise = np.random.normal(0, 0.07, (n_samples, 1))
    z_noise = np.random.normal(0, 0.03, (n_samples, 1))

    def data_func(var):
        return var**3 - var

    y = data_func(x)
    y = np.array(y)
    y = y.ravel()

    # + bruit
    X = x + x_noise
    Z = z + z_noise

    # distribution différente à approximer avec une distrib initiale
    y_bis = data_func(z)
    y_bis = np.array(y_bis)
    y_bis = y_bis.ravel()

    print(X.shape)
    print(y.shape)
    print(Z.shape)
    print(y_bis.shape)

    clf.fit(X, y, Z)
    preds = clf.predict(Z)
    print(np.linalg.norm(preds - Z))

    from sklearn.linear_model import LinearRegression
    clf_linear = LinearRegression()
    clf_linear.fit(Z, y_bis)
    true_coefs = clf_linear.coef_

    # print(clf.get_weights())

    print(preds)

    # plot facilities
    x_range = np.linspace(-0.4, 1.2, 100)
    kmm_line = x_range * preds
    true_line = x_range * true_coefs

    plt.axis([-0.4, 1.2, -0.5, 1])
    plt.scatter(X, y, label='X points', color='blue', marker='o')
    plt.plot(x_range, data_func(x_range), label='X distribution', color='blue')

    plt.scatter(Z, y_bis, label='Z points', color='red', marker='+')
    plt.plot(x_range, kmm_line, label='Z kmm regression line', color='red')

    plt.plot(x_range, true_line, label='Z OLS line', color='black')
    plt.legend()
    plt.show()
    """
예제 #22
0
        trn_index = tweets_r['created_date'].isin(days[past]).values.tolist()

        # Find all tweets from today
        tst_index = (tweets_r['created_date'] == days[d]).values.tolist()

        # Split out training data
        trn_X = X[trn_index, :]
        trn_Y = Y[trn_index]

        # Split out test data
        tst_X = X[tst_index, :]
        tst_Y = Y[tst_index]

        # Define classifiers
        clf_n = linear_model.LogisticRegression(C=0.1)
        clf_a = ImportanceWeightedClassifier(loss='logistic', l2=0.1)

        # Train classifier on data from current and previous days
        clf_n.fit(trn_X, trn_Y)
        clf_a.fit(trn_X, trn_Y, tst_X)

        # Make predictions
        preds_n = clf_n.predict(tst_X)
        preds_a = clf_a.predict(tst_X)

        # Test on data from current day and store
        perf_n.append(np.mean(preds_n != tst_Y))
        perf_a.append(np.mean(preds_a != tst_Y))

        # Store day and rumour
        days_array.append(days[d])
예제 #23
0
def apply_NN(trainX, trainY, testX, testY, window, source_pos, target_pos):
    # Decision Tree
    print("\n Nearest-neighbour-based weighting (Loog, 2015)    ")
    classifier = ImportanceWeightedClassifier(iwe='nn', loss="dtree")
    classifier.fit(trainX, trainY, testX)
    pred_naive = classifier.predict(testX)
    acc_DT_NN, acc_DT_NN_INFO = check_accuracy(testY, pred_naive)
    # Logistic Regression
    print("\n Nearest-neighbour-based weighting (Loog, 2015)    ")
    classifier = ImportanceWeightedClassifier(iwe='nn', loss="logistic")
    classifier.fit(trainX, trainY, testX)
    pred_naive = classifier.predict(testX)
    acc_LR_NN, acc_LR_NN_INFO = check_accuracy(testY, pred_naive)
    # Naive Bayes Bernoulli
    print("\n Nearest-neighbour-based weighting (Loog, 2015)    ")
    classifier = ImportanceWeightedClassifier(iwe='nn', loss="berno")
    classifier.fit(trainX, trainY, testX)
    pred_naive = classifier.predict(testX)
    acc_NB_NN, acc_NB_NN_INFO = check_accuracy(testY, pred_naive)
    #
    return pd.DataFrame(
            [{ 
            'window': window,
            'source_position': source_pos,
            'target_position': target_pos,

            'acc_LR_NN': acc_LR_NN,
            'acc_LR_NN_INFO': str(acc_LR_NN_INFO),                                              
            'acc_DT_NN': acc_DT_NN,
            'acc_DT_NN_INFO': str(acc_DT_NN_INFO),                                                                            
            'acc_NB_NN': acc_NB_NN,
            'acc_NB_NN_INFO': str(acc_NB_NN_INFO),                                                
            }]
        )