def test_iwe_kernel_mean_matching(): """Test for estimating through kernel mean matching.""" X = rnd.randn(10, 2) Z = rnd.randn(10, 2) + 1 clf = ImportanceWeightedClassifier() iw = clf.iwe_kernel_mean_matching(X, Z) assert np.all(iw >= 0)
def test_iwe_kernel_densities(): """Test for estimating through kernel density estimation.""" X = rnd.randn(10, 2) Z = rnd.randn(10, 2) + 1 clf = ImportanceWeightedClassifier() iw = clf.iwe_kernel_densities(X, Z) assert np.all(iw >= 0)
def test_iwe_nearest_neighbours(): """Test for estimating through nearest neighbours.""" X = rnd.randn(10, 2) Z = rnd.randn(10, 2) + 1 clf = ImportanceWeightedClassifier() iw = clf.iwe_nearest_neighbours(X, Z) assert np.all(iw >= 0)
def test_iwe_logistic_discrimination(): """Test for estimating through logistic classifier.""" X = rnd.randn(10, 2) Z = rnd.randn(10, 2) + 1 clf = ImportanceWeightedClassifier() iw = clf.iwe_logistic_discrimination(X, Z) assert np.all(iw >= 0)
def test_iwe_ratio_Gaussians(): """Test for estimating ratio of Gaussians.""" X = rnd.randn(10, 2) Z = rnd.randn(10, 2) + 1 clf = ImportanceWeightedClassifier() iw = clf.iwe_ratio_gaussians(X, Z) assert np.all(iw >= 0)
def test_fit(): """Test for fitting the model.""" X = rnd.randn(10, 2) y = np.hstack((-np.ones((5,)), np.ones((5,)))) Z = rnd.randn(10, 2) + 1 clf = ImportanceWeightedClassifier() clf.fit(X, y, Z) assert clf.is_trained
def test_predict(): """Test for making predictions.""" X = rnd.randn(10, 2) y = np.hstack((-np.ones((5,)), np.ones((5,)))) Z = rnd.randn(10, 2) + 1 clf = ImportanceWeightedClassifier() clf.fit(X, y, Z) u_pred = clf.predict(Z) labels = np.unique(y) assert len(np.setdiff1d(np.unique(u_pred), labels)) == 0
def test_regularization(): """Test for fitting the model.""" X = rnd.randn(10, 2) y = np.hstack((-np.ones((5, )), np.ones((5, )))) Z = rnd.randn(10, 2) + 1 clf = ImportanceWeightedClassifier(loss_function='lr', l2_regularization=None) assert isinstance(clf.clf, LogisticRegressionCV) clf = ImportanceWeightedClassifier(loss_function='lr', l2_regularization=1.0) assert isinstance(clf.clf, LogisticRegression)
def run_6(): X = np.random.randn(10, 2) y = np.vstack((-np.ones((5, )), np.ones((5, )))) Z = np.random.randn(10, 2) from libtlda.iw import ImportanceWeightedClassifier clf = ImportanceWeightedClassifier(loss='quadratic', iwe='kmm') clf.fit(X, y, Z) u_pred = clf.predict(Z) print(u_pred)
def main(source, target, model, target_train_ratio, random_state): params = { 'source': source, 'target': target, 'target_train_ratio': target_train_ratio, 'max_features': 5000, 'random_state': random_state } params['partition'] = 'tr' tr_X, tr_y = get_data(AmazonDatasetCombined(**params)) params['partition'] = 'te' te_X, te_y = get_data(AmazonDatasetCombined(**params)) tr_y = tr_y.reshape(-1) te_y = te_y.reshape(-1) if model == 'lr': C = 0.2 clf = LogisticRegression(solver='lbfgs', max_iter=1000, C=C) clf.fit(tr_X, tr_y) elif model == 'svm': C = 0.2 clf = LinearSVC(C=C) clf.fit(tr_X, tr_y) elif model == 'kmm': clf = ImportanceWeightedClassifier(iwe='kmm') clf.fit(tr_X, tr_y, te_X) elif model == 'suba-lr': clf = SubspaceAlignedClassifier(loss='logistic') clf.fit(tr_X, tr_y, te_X) elif model == 'suba-hi': clf = SubspaceAlignedClassifier(loss='hinge') clf.fit(tr_X, tr_y, te_X) elif model == 'tca-lr': clf = TransferComponentClassifier(loss='logistic') clf.fit(tr_X, tr_y, te_X) elif model == 'tca-hi': clf = TransferComponentClassifier(loss='hinge') clf.fit(tr_X, tr_y, te_X) else: raise Exception('Unknown model called..') tr_score = accuracy_score(tr_y, clf.predict(tr_X)) te_score = accuracy_score(te_y, clf.predict(te_X)) return tr_score, te_score
def InitTransferClassifier(index,l,iw): if index==1: clf = ImportanceWeightedClassifier(loss=l,iwe=iw) if index==2: clf = TransferComponentClassifier() if index==3: clf=SubspaceAlignedClassifier() if index==4: clf=StructuralCorrespondenceClassifier() if index==5: clf=RobustBiasAwareClassifier() if index==6: clf=FeatureLevelDomainAdaptiveClassifier() if index==7: clf=TargetContrastivePessimisticClassifier() return clf
def model_build(classifier, trian_features, train_labels, test_features): if classifier == "IW": # pipe = make_pipeline(StandardScaler(), ImportanceWeightedClassifier(iwe='nn')) # param_grid = [{}] # model = GridSearchCV(pipe, param_grid, cv=3) # model.fit(trian_features, train_labels, test_features) # print(model.best_params_) model = ImportanceWeightedClassifier(iwe='kde') model.fit(preprocessing.scale(trian_features), train_labels, preprocessing.scale(test_features)) else: if classifier == "LR": pipe = make_pipeline(StandardScaler(), LogisticRegression()) param_grid = [{'logisticregression__C': [1, 10, 100]}] elif classifier == "SVM": # pipe = make_pipeline(StandardScaler(), LinearSVC(random_state=0, tol=1e-5)) SVC(kernel='linear',probability=True) pipe = make_pipeline(StandardScaler(), SVC(kernel='linear', probability=True)) param_grid = [{'svc__C': [0.01, 0.1, 1]}] elif classifier == "RF": pipe = make_pipeline(StandardScaler(), RandomForestClassifier(max_features='sqrt')) param_grid = { 'randomforestclassifier__n_estimators': range(230, 300, 10), 'randomforestclassifier__max_depth': range(8, 12, 1), 'randomforestclassifier__min_samples_leaf': range(1, 5, 1), 'randomforestclassifier__max_features': range(1, 20, 1) # 'learning_rate': np.linspace(0.01, 2, 20), # 'subsample': np.linspace(0.7, 0.9, 20), # 'colsample_bytree': np.linspace(0.5, 0.98, 10), # 'min_child_weight': range(1, 9, 1) } model = GridSearchCV(pipe, param_grid, cv=3) model.fit(trian_features, train_labels) print(model.best_params_) # save the model model_file_name = classifier + time.strftime("%m%d-%H%M%S") + ".model" joblib.dump(filename=model_file_name, value=model) return model
def apply_ENSEMBLE(trainX, trainY, testX, testY, window, source_pos, target_pos): classifier_SA_DT = SubspaceAlignedClassifier(loss="dtree") classifier_SA_LR = SubspaceAlignedClassifier(loss="logistic") classifier_SA_NB = SubspaceAlignedClassifier(loss="berno") classifier_TCA_DT = TransferComponentClassifier(loss="dtree") classifier_TCA_LR = TransferComponentClassifier(loss="logistic") classifier_TCA_NB = TransferComponentClassifier(loss="berno") classifier_NN_DT = ImportanceWeightedClassifier(iwe='nn', loss="dtree") classifier_NN_LR = ImportanceWeightedClassifier(iwe='nn', loss="logistic") classifier_NN_NB = ImportanceWeightedClassifier(iwe='nn', loss="berno") classifier_KMM_DT = ImportanceWeightedClassifier(iwe='kmm', loss="dtree") classifier_KMM_LR = ImportanceWeightedClassifier(iwe='kmm', loss="logistic") classifier_KMM_NB = ImportanceWeightedClassifier(iwe='kmm', loss="berno") # eclf = EnsembleClassifier(clfs=[ #classifier_SA_DT, #classifier_SA_LR, #classifier_SA_NB, #classifier_TCA_DT, #classifier_TCA_LR, classifier_TCA_NB, classifier_NN_DT, #classifier_NN_LR, #classifier_NN_NB, classifier_KMM_DT, classifier_KMM_LR, #classifier_KMM_NB ]) eclf.fit(trainX, trainY, testX) pred = eclf.predict(testX) acc_ENSEMBLE, acc_ENSEMBLE_INFO = check_accuracy(testY, pred) # return pd.DataFrame( [{ 'window': window, 'source_position': source_pos, 'target_position': target_pos, 'acc_ENSEMBLE': acc_ENSEMBLE, 'acc_ENSEMBLE_INFO': acc_ENSEMBLE_INFO, }] )
#i = 0 #for pred_label in pred_LR: # print(pred_label, np.max(prob[i])) # i+=1 #.predict(testX) #acc_LR_SA = checkAccuracy(testY, pred) #print("ACC:", acc_LR_SA); # classifier_SA_DT = SubspaceAlignedClassifier(loss="dtree") classifier_SA_LR = SubspaceAlignedClassifier(loss="logistic") classifier_SA_NB = SubspaceAlignedClassifier(loss="berno") classifier_TCA_DT = TransferComponentClassifier(loss="dtree") classifier_TCA_LR = TransferComponentClassifier(loss="logistic") classifier_TCA_NB = TransferComponentClassifier(loss="berno") classifier_NN_DT = ImportanceWeightedClassifier(iwe='nn', loss="dtree") classifier_NN_LR = ImportanceWeightedClassifier(iwe='nn', loss="logistic") classifier_NN_NB = ImportanceWeightedClassifier(iwe='nn', loss="berno") classifier_KMM_DT = ImportanceWeightedClassifier(iwe='kmm', loss="dtree") classifier_KMM_LR = ImportanceWeightedClassifier(iwe='kmm', loss="logistic") classifier_KMM_NB = ImportanceWeightedClassifier(iwe='kmm', loss="berno") # eclf = EnsembleClassifier(clfs=[ classifier_SA_DT, classifier_SA_LR, classifier_SA_NB, classifier_TCA_LR, classifier_TCA_DT, classifier_TCA_NB, classifier_NN_DT, classifier_NN_LR, classifier_NN_NB, classifier_KMM_DT, classifier_KMM_LR, classifier_KMM_NB ], weights=[1, 1]) eclf.fit(trainX, trainY, testX)
Z = np.concatenate((Z0, Z1), axis=0) u = np.concatenate((labels[0] * np.ones( (M0, ), dtype='int'), labels[1] * np.ones((M1, ), dtype='int')), axis=0) """Classifiers""" # Train a naive logistic regressor lr = LogisticRegression().fit(X, y) # Make predictions pred_naive = lr.predict(Z) # Select adaptive classifier if classifier == 'iw': # Call an importance-weighted classifier clf = ImportanceWeightedClassifier(iwe='lr', loss='logistic') elif classifier == 'tca': # Classifier based on transfer component analysis clf = TransferComponentClassifier(loss='logistic', mu=1.) elif classifier == 'suba': # Classifier based on subspace alignment clf = SubspaceAlignedClassifier(loss='logistic') elif classifier == 'scl': # Classifier based on subspace alignment clf = StructuralCorrespondenceClassifier(num_pivots=2, num_components=1) elif classifier == 'rba': # Robust bias-aware classifier
def test_init(): """Test for object type.""" clf = ImportanceWeightedClassifier() assert type(clf) == ImportanceWeightedClassifier assert not clf.is_trained
import numpy as np from libtlda.iw import ImportanceWeightedClassifier X = np.random.randn(10, 2) # y = np.random.randint(10,) y = np.array([1, 0, 1, 0, 1, 1, 0, 1, 0, 1]) Z = np.random.randn(10, 2) ImportanceWeightedClassifier() clf = ImportanceWeightedClassifier() clf.fit(X, y, Z) print(clf.predict_proba(Z))
clf.fit(X,y,Y) probas_ = clf.predict(X) accuracy_org = sum((probas_>0.5)==y)/(1.0*nsamples) target_probas_ = clf.predict(Y) #print(classes_test.shape,Y.shape,n_test_samples) accuracy_targ = sum((target_probas_>0.5)==classes_test)/(1.0*n_test_samples) print (subject,target_subj,index,l,iw,accuracy_org,accuracy_targ) elif mode==3: # Evaluation of classifiers using the full training set as test set y = classes_train nsamples = y.shape[0] n_test_samples = classes_test.shape[0] for index in [1,2,6,7,8,9,10,11,12,13]: #range(1,n_classifiers): for iw in [0,1,2,3]: iwe = weighting_functions[iw] w_clf = ImportanceWeightedClassifier(iwe=iwe) X = np.asarray(MEG_data_train) Y = np.asarray(MEG_data_test) w_clf.fit(X,y,Y) if iwe == 'lr': w = w_clf.iwe_logistic_discrimination(X, Y) elif iwe == 'rg': w = w_clf.iwe_ratio_gaussians(X, Y) elif iwe == 'nn': w = w_clf.iwe_nearest_neighbours(X, Y) elif iwe == 'kde': w = w_clf.iwe_kernel_densities(X, Y) elif iwe == 'kmm': w = w_clf.iwe_kernel_mean_matching(X, Y) else: raise NotImplementedError('Estimator not implemented.')
def build_models(trainX, trainY, testX, testY, source_pos, target_pos, window): ####################### ### SEMI-SUPERVISED ### ######################## # Label Propagation label_prop_model = LabelPropagation(kernel='knn') label_prop_model.fit(trainX, trainY) Y_Pred = label_prop_model.predict(testX) acc_ss_propagation, acc_ss_propagation_INFO = checkAccuracy(testY, Y_Pred) # Label Spreading label_prop_models_spr = LabelSpreading(kernel='knn') label_prop_models_spr.fit(trainX, trainY) Y_Pred = label_prop_models_spr.predict(testX) acc_ss_spreading, acc_ss_spreading_INFO = checkAccuracy(testY, Y_Pred) ######################## #### WITHOUT TL ######## ######################## # LogisticRegression modelLR = LogisticRegression() modelLR.fit(trainX, trainY) predLR = modelLR.predict(testX) accLR, acc_LR_INFO = checkAccuracy(testY, predLR) # DecisionTreeClassifier modelDT = tree.DecisionTreeClassifier() modelDT.fit(trainX, trainY) predDT = modelDT.predict(testX) accDT, acc_DT_INFO = checkAccuracy(testY, predDT) # BernoulliNB modelNB = BernoulliNB() modelNB.fit(trainX, trainY) predND = modelNB.predict(testX) accNB, acc_NB_INFO = checkAccuracy(testY, predND) # print("WITHOUT TL ACC_LR:", accLR, " ACC_DT:", accDT, " ACC_NB:", accNB) ######################## #### WITH TL ######## ######################## #################################################### ### Kernel Mean Matching (Huang et al., 2006) ### # Decision Tree print("\n Kernel Mean Matching (Huang et al., 2006) ") classifier = ImportanceWeightedClassifier(iwe='kmm', loss="dtree") classifier.fit(trainX, trainY, testX) pred_naive = classifier.predict(testX) acc_DT_KMM, acc_DT_KMM_INFO = checkAccuracy(testY, pred_naive) print("ACC:", acc_DT_KMM) # Logistic Regression classifier = ImportanceWeightedClassifier(iwe='kmm', loss="logistic") classifier.fit(trainX, trainY, testX) pred_naive = classifier.predict(testX) acc_LR_KMM, acc_LR_KMM_INFO = checkAccuracy(testY, pred_naive) print("ACC:", acc_LR_KMM) # Naive Bayes Bernoulli classifier = ImportanceWeightedClassifier(iwe='kmm', loss="berno") classifier.fit(trainX, trainY, testX) pred_naive = classifier.predict(testX) acc_NB_KMM, acc_NB_KMM_INFO = checkAccuracy(testY, pred_naive) print("ACC:", acc_NB_KMM) #################################################### ### Nearest-neighbour-based weighting (Loog, 2015) ### # Decision Tree print("\n Nearest-neighbour-based weighting (Loog, 2015) ") classifier = ImportanceWeightedClassifier(iwe='nn', loss="dtree") classifier.fit(trainX, trainY, testX) pred_naive = classifier.predict(testX) acc_DT_NN, acc_DT_NN_INFO = checkAccuracy(testY, pred_naive) print("ACC:", acc_DT_NN) # Logistic Regression print("\n Nearest-neighbour-based weighting (Loog, 2015) ") classifier = ImportanceWeightedClassifier(iwe='nn', loss="logistic") classifier.fit(trainX, trainY, testX) pred_naive = classifier.predict(testX) acc_LR_NN, acc_LR_NN_INFO = checkAccuracy(testY, pred_naive) print("ACC:", acc_LR_NN) # Naive Bayes Bernoulli print("\n Nearest-neighbour-based weighting (Loog, 2015) ") classifier = ImportanceWeightedClassifier(iwe='nn', loss="berno") classifier.fit(trainX, trainY, testX) pred_naive = classifier.predict(testX) acc_NB_NN, acc_NB_NN_INFO = checkAccuracy(testY, pred_naive) print("ACC:", acc_NB_NN) #################################################### ### Transfer Component Analysis (Pan et al, 2009) ### # Decision Tree print("\n Transfer Component Analysis (Pan et al, 2009)") classifier = TransferComponentClassifier(loss="dtree", num_components=6) classifier.fit(trainX, trainY, testX) pred_naive = classifier.predict(testX) acc_DT_TCA, acc_DT_TCA_INFO = checkAccuracy(testY, pred_naive) print("ACC:", acc_DT_TCA) # Logistic Regression classifier = TransferComponentClassifier(loss="logistic", num_components=6) classifier.fit(trainX, trainY, testX) pred_naive = classifier.predict(testX) acc_LR_TCA, acc_LR_TCA_INFO = checkAccuracy(testY, pred_naive) print("ACC:", acc_LR_TCA) # Naive Bayes Bernoulli classifier = TransferComponentClassifier(loss="berno", num_components=6) classifier.fit(trainX, trainY, testX) pred_naive = classifier.predict(testX) acc_NB_TCA, acc_NB_TCA_INFO = checkAccuracy(testY, pred_naive) print("ACC:", acc_NB_TCA) #################################################### ### Subspace Alignment (Fernando et al., 2013) ### # Decision Tree print("\n Subspace Alignment (Fernando et al., 2013) ") classifier = SubspaceAlignedClassifier(loss="dtree") classifier.fit(trainX, trainY, testX) pred_naive = classifier.predict(testX) acc_DT_SA, acc_DT_SA_INFO = checkAccuracy(testY, pred_naive) print("ACC:", acc_DT_SA) # Logistic Regression print("\n Subspace Alignment (Fernando et al., 2013) ") classifier = SubspaceAlignedClassifier(loss="logistic") classifier.fit(trainX, trainY, testX) pred_naive = classifier.predict(testX) acc_LR_SA, acc_LR_SA_INFO = checkAccuracy(testY, pred_naive) print("ACC:", acc_LR_SA) # Naive Bayes Bernoulli print("\n Subspace Alignment (Fernando et al., 2013) ") classifier = SubspaceAlignedClassifier(loss="berno") classifier.fit(trainX, trainY, testX) pred_naive = classifier.predict(testX) acc_NB_SA, acc_NB_SA_INFO = checkAccuracy(testY, pred_naive) print("ACC:", acc_NB_SA) ################################# ############# ENSEMBLE ########## ################################# classifier_SA_DT = SubspaceAlignedClassifier(loss="dtree") classifier_SA_LR = SubspaceAlignedClassifier(loss="logistic") classifier_SA_NB = SubspaceAlignedClassifier(loss="berno") classifier_TCA_DT = TransferComponentClassifier(loss="dtree") classifier_TCA_LR = TransferComponentClassifier(loss="logistic") classifier_TCA_NB = TransferComponentClassifier(loss="berno") classifier_NN_DT = ImportanceWeightedClassifier(iwe='nn', loss="dtree") classifier_NN_LR = ImportanceWeightedClassifier(iwe='nn', loss="logistic") classifier_NN_NB = ImportanceWeightedClassifier(iwe='nn', loss="berno") classifier_KMM_DT = ImportanceWeightedClassifier(iwe='kmm', loss="dtree") classifier_KMM_LR = ImportanceWeightedClassifier(iwe='kmm', loss="logistic") classifier_KMM_NB = ImportanceWeightedClassifier(iwe='kmm', loss="berno") # eclf = EnsembleClassifier( clfs=[classifier_TCA_DT, classifier_NN_DT, classifier_KMM_DT]) eclf.fit(trainX, trainY, testX) pred = eclf.predict_v2(testX) acc_ENSEMBLE, acc_ENSEMBLE_INFO = checkAccuracy(testY, pred) ######################## #### RETURN ######## ######################## return pd.DataFrame([{ 'window': window, 'source_position': source_pos, 'target_position': target_pos, 'acc_SS_propagation': acc_ss_propagation, 'acc_SS_propagation_INFO': acc_ss_propagation_INFO, 'acc_SS_spreading': acc_ss_spreading, 'acc_SS_spreading_INFO': acc_ss_spreading_INFO, 'acc_ENSEMBLE': acc_ENSEMBLE, 'acc_LR': accLR, 'acc_LR_INFO': str(acc_LR_INFO), 'acc_DT': accDT, 'acc_DT_INFO': str(acc_DT_INFO), 'acc_NB': accNB, 'acc_NB_INFO': str(acc_NB_INFO), 'acc_LR_KMM': acc_LR_KMM, 'acc_LR_KMM_INFO': str(acc_LR_KMM_INFO), 'acc_LR_NN': acc_LR_NN, 'acc_LR_NN_INFO': str(acc_LR_NN_INFO), 'acc_LR_TCA': acc_LR_TCA, 'acc_LR_TCA_INFO': str(acc_LR_TCA_INFO), 'acc_LR_SA': acc_LR_SA, 'acc_LR_SA_INFO': str(acc_LR_SA_INFO), 'acc_DT_KMM': acc_DT_KMM, 'acc_DT_KMM_INFO': str(acc_DT_KMM_INFO), 'acc_DT_NN': acc_DT_NN, 'acc_DT_NN_INFO': str(acc_DT_NN_INFO), 'acc_DT_TCA': acc_DT_TCA, 'acc_DT_TCA_INFO': str(acc_DT_TCA_INFO), 'acc_DT_SA': acc_DT_SA, 'acc_DT_SA_INFO': str(acc_DT_SA_INFO), 'acc_NB_KMM': acc_NB_KMM, 'acc_NB_KMM_INFO': str(acc_NB_KMM_INFO), 'acc_NB_NN': acc_NB_NN, 'acc_NB_NN_INFO': str(acc_NB_NN_INFO), 'acc_NB_TCA': acc_NB_TCA, 'acc_NB_TCA_INFO': str(acc_NB_TCA_INFO), 'acc_NB_SA': acc_NB_SA, 'acc_NB_SA_INFO': str(acc_NB_SA_INFO) }])
def apply_KMM(trainX, trainY, testX, testY, window, source_pos, target_pos): # Decision Tree print("\n Kernel Mean Matching (Huang et al., 2006) ") classifier = ImportanceWeightedClassifier(iwe='kmm', loss="dtree") classifier.fit(trainX, trainY, testX) pred_naive = classifier.predict(testX) acc_DT_KMM, acc_DT_KMM_INFO = check_accuracy(testY, pred_naive) # Logistic Regression classifier = ImportanceWeightedClassifier(iwe='kmm', loss="logistic") classifier.fit(trainX, trainY, testX) pred_naive = classifier.predict(testX) acc_LR_KMM, acc_LR_KMM_INFO = check_accuracy(testY, pred_naive) # Naive Bayes Bernoulli classifier = ImportanceWeightedClassifier(iwe='kmm', loss="berno") classifier.fit(trainX, trainY, testX) pred_naive = classifier.predict(testX) acc_NB_KMM, acc_NB_KMM_INFO = check_accuracy(testY, pred_naive) # return pd.DataFrame( [{ 'window': window, 'source_position': source_pos, 'target_position': target_pos, 'acc_LR_KMM': acc_LR_KMM, 'acc_LR_KMM_INFO': str(acc_LR_KMM_INFO), 'acc_DT_KMM': acc_DT_KMM, 'acc_DT_KMM_INFO': str(acc_DT_KMM_INFO), 'acc_NB_KMM': acc_NB_KMM, 'acc_NB_KMM_INFO': str(acc_NB_KMM_INFO), }] )
def run_5(n_samples=100): from libtlda.iw import ImportanceWeightedClassifier clf = ImportanceWeightedClassifier(loss='quadratic', iwe='kmm') # X ~ N(0.5, 0.5²) # Z ~ N(0.0, 0.3²) x = np.random.normal(0.5, 0.5**2, (n_samples, 1)) z = np.random.normal(0, 0.3**2, (n_samples, 1)) x_noise = np.random.normal(0, 0.07, (n_samples, 1)) z_noise = np.random.normal(0, 0.03, (n_samples, 1)) def data_func(var): return var**3 - var y = data_func(x) y = np.array(y) y = y.ravel() # + bruit X = x + x_noise Z = z + z_noise # distribution différente à approximer avec une distrib initiale y_bis = data_func(z) y_bis = np.array(y_bis) y_bis = y_bis.ravel() print(X.shape) print(y.shape) print(Z.shape) print(y_bis.shape) clf.fit(X, y, Z) preds = clf.predict(Z) print(np.linalg.norm(preds - Z)) from sklearn.linear_model import LinearRegression clf_linear = LinearRegression() clf_linear.fit(Z, y_bis) true_coefs = clf_linear.coef_ # print(clf.get_weights()) print(preds) # plot facilities x_range = np.linspace(-0.4, 1.2, 100) kmm_line = x_range * preds true_line = x_range * true_coefs plt.axis([-0.4, 1.2, -0.5, 1]) plt.scatter(X, y, label='X points', color='blue', marker='o') plt.plot(x_range, data_func(x_range), label='X distribution', color='blue') plt.scatter(Z, y_bis, label='Z points', color='red', marker='+') plt.plot(x_range, kmm_line, label='Z kmm regression line', color='red') plt.plot(x_range, true_line, label='Z OLS line', color='black') plt.legend() plt.show() """
trn_index = tweets_r['created_date'].isin(days[past]).values.tolist() # Find all tweets from today tst_index = (tweets_r['created_date'] == days[d]).values.tolist() # Split out training data trn_X = X[trn_index, :] trn_Y = Y[trn_index] # Split out test data tst_X = X[tst_index, :] tst_Y = Y[tst_index] # Define classifiers clf_n = linear_model.LogisticRegression(C=0.1) clf_a = ImportanceWeightedClassifier(loss='logistic', l2=0.1) # Train classifier on data from current and previous days clf_n.fit(trn_X, trn_Y) clf_a.fit(trn_X, trn_Y, tst_X) # Make predictions preds_n = clf_n.predict(tst_X) preds_a = clf_a.predict(tst_X) # Test on data from current day and store perf_n.append(np.mean(preds_n != tst_Y)) perf_a.append(np.mean(preds_a != tst_Y)) # Store day and rumour days_array.append(days[d])
def apply_NN(trainX, trainY, testX, testY, window, source_pos, target_pos): # Decision Tree print("\n Nearest-neighbour-based weighting (Loog, 2015) ") classifier = ImportanceWeightedClassifier(iwe='nn', loss="dtree") classifier.fit(trainX, trainY, testX) pred_naive = classifier.predict(testX) acc_DT_NN, acc_DT_NN_INFO = check_accuracy(testY, pred_naive) # Logistic Regression print("\n Nearest-neighbour-based weighting (Loog, 2015) ") classifier = ImportanceWeightedClassifier(iwe='nn', loss="logistic") classifier.fit(trainX, trainY, testX) pred_naive = classifier.predict(testX) acc_LR_NN, acc_LR_NN_INFO = check_accuracy(testY, pred_naive) # Naive Bayes Bernoulli print("\n Nearest-neighbour-based weighting (Loog, 2015) ") classifier = ImportanceWeightedClassifier(iwe='nn', loss="berno") classifier.fit(trainX, trainY, testX) pred_naive = classifier.predict(testX) acc_NB_NN, acc_NB_NN_INFO = check_accuracy(testY, pred_naive) # return pd.DataFrame( [{ 'window': window, 'source_position': source_pos, 'target_position': target_pos, 'acc_LR_NN': acc_LR_NN, 'acc_LR_NN_INFO': str(acc_LR_NN_INFO), 'acc_DT_NN': acc_DT_NN, 'acc_DT_NN_INFO': str(acc_DT_NN_INFO), 'acc_NB_NN': acc_NB_NN, 'acc_NB_NN_INFO': str(acc_NB_NN_INFO), }] )