def optimizeRF(did,amount): X,y = read_did(did) X = add_copy_features(X,amount) X_train, X_test, y_train, y_test = train_test_split(X, y) clf = RandomForestClassifier() params = {'max_features': range(1,len(X[0])), 'min_samples_split': range(2,20)} randomRF = RandomizedSearchCV(clf, param_distributions=params, n_iter=40,n_jobs = 3) with stopwatch() as sw: _=randomRF.fit(X_train, y_train) duration = sw.duration estimator = randomRF.best_estimator_ cv_score = list(cross_val_score(randomRF.best_estimator_,X,y,cv = 10)) cv_score.append(randomRF.score(X_test,y_test)) return estimator,cv_score,duration
def optimizeADA(did,amount): X,y = read_did(did) iters = 40 X = add_copy_features(X,amount) X_train, X_test, y_train, y_test = train_test_split(X, y) clf = AdaBoostClassifier() learns = [random()*1.9 + 0.1 for i in range(iters*4)] params = {'learning_rate': learns} randomRF = RandomizedSearchCV(clf, param_distributions=params, n_iter=iters,n_jobs = 3) with stopwatch() as sw: _=randomRF.fit(X_train, y_train) duration = sw.duration estimator = randomRF.best_estimator_ cv_score = list(cross_val_score(randomRF.best_estimator_,X,y,cv = 10)) cv_score.append(randomRF.score(X_test,y_test)) return estimator,cv_score,duration
def functionoidFeatures(clf,did,rand,amount): listOutput = [] local = True if (local) : X,y = read_did(did) else: DataSetOML = oml.datasets.get_dataset(did) X, y = DataSetOML.get_data(target=DataSetOML.default_target_attribute); if len(X[0]) > 10: feature_X = add_noise_features(X,len(X[0])//amount,rand) else: feature_X = add_noise_features(X,10//amount,rand) clf1 = clf clf2 = copy(clf) listOutput.append(cross_val_score(clf1, X, y,cv=10,n_jobs = -1,scoring = 'f1_weighted')) listOutput.append(cross_val_score(clf2, feature_X, y,cv=10,n_jobs = -1, scoring = 'f1_weighted')) return listOutput
def functionoid_amount(clf,did, amount,adj): local = True listOutput = [] if (local) : X,y = read_did(did) else: DataSetOML = oml.datasets.get_dataset(did) X, y = DataSetOML.get_data(target=DataSetOML.default_target_attribute); svm1 = clf svm2 = copy(svm1) svm3 = copy(svm1) X_train, X_test, y_train, y_test = train_test_split(X, y,stratify=y) noise_X_train,noise_y_train = random_test_set4(X_train,y_train,amount) #len(noise_X_train) #len(noise_y_train) noise_X_test,noise_y_test = random_test_set4(X_test,y_test,amount) _ = svm1.fit(noise_X_train, noise_y_train) _ = svm2.fit(X_train, y_train) y_predict_noise1 = svm1.predict(noise_X_test) y_predict_noise2 = svm2.predict(noise_X_test) y_predict = svm2.predict(X_test) #print("Score of no noise in training data",svm1.score(noise_X_test,noise_y_test)) #print("Score of noise in training and test data",svm2.score(noise_X_test,noise_y_test)) # add noise then split up noise_X,noise_y = random_test_set4(X,y,amount) X_train3, X_test3, y_train3, y_test3 = train_test_split(noise_X, noise_y, stratify = noise_y) _ = svm3.fit(X_train3,y_train3) y_predict_noise3 = svm3.predict(X_test3) #print("Score of adding noise before split",svm3.score(X_test3,y_test3)) #calculate noise influence and errors made score_1 = 0 score_2 = 0 score_3 = 0 noise1 = 0 noise2 = 0 noise3 = 0 score = 1 pred_wrong = set() pred_wrong3 = set() for i in range(0,len(noise_X_test)): if noise_X_test[i] == [0] *len(noise_X_test[i]): pred_wrong.add(i) for i in range(0,len(noise_y_test)): if y_predict_noise2[i] == noise_y_test[i]: score_2 = score_2 + 1 if i in pred_wrong: noise2 = noise2+1 if y_predict_noise1[i] == noise_y_test[i]: score_1 = score_1 + 1 if i in pred_wrong: noise1 = noise1+1 listOutput.append({'score' : score_1, 'noise' : noise1}) listOutput.append({'score' : score_2, 'noise' : noise2}) #print("amount of wrong classification first classifier ",((len(y_test)-score_1)/len(y_test))) #print("amount of wrong classification second classifier ",((len(y_test)-score_2)/len(y_test))) #print("noise classified by first ",noise1) #print("noise classified by seconds", noise2) for i in range(0,len(X_test3)): if X_test3[i] == [0] *len(X_test3[i]): pred_wrong3.add(i) for i in range(0,len(y_test3)): if y_predict_noise3[i] == y_test3[i]: score_3 = score_3 + 1 if i in pred_wrong3: noise3 = noise3+1 listOutput.append({'score' : score_3, 'noise' : noise3}) listOutput.append({'test1' : len(noise_y_test), 'test2' : len(y_test3), 'amountOfTargets' : len(values_target(y)), 'noise1' : len(pred_wrong), 'noise2' : len(pred_wrong3)}) #print("amount of wrong classification third classifier", ((len(y_test3)-score_3)/len(y_test3))) #print("noise classified by third " ,noise3) return listOutput
def loopFunctionoid_amount(clf,did,times,adj,amount): listOutput = functionoid_amount(clf,did,amount,adj) if (local) : X,y = read_did(did) else: DataSetOML = oml.datasets.get_dataset(did) X, y = DataSetOML.get_data(target=DataSetOML.default_target_attribute); for i in range(1,times): svm1 = copy(clf) svm2 = copy(clf) svm3 = copy(clf) X_train, X_test, y_train, y_test = train_test_split(X, y,stratify=y) noise_X_train,noise_y_train = add_noise_amount(X_train,y_train,amount,adj) #len(noise_X_train) #len(noise_y_train) noise_X_test,noise_y_test = add_noise_amount(X_test,y_test,amount,adj) _ = svm1.fit(noise_X_train,noise_y_train) _ = svm2.fit(X_train,y_train) y_predict_noise1 = svm1.predict(noise_X_test) y_predict_noise2 = svm2.predict(noise_X_test) y_predict = svm2.predict(X_test) #print("Score of no noise in training data",svm1.score(noise_X_test,noise_y_test)) #print("Score of noise in training and test data",svm2.score(noise_X_test,noise_y_test)) # add noise then split up noise_X,noise_y = add_noise_amount(X,y,amount,adj) X_train3, X_test3, y_train3, y_test3 = train_test_split(noise_X, noise_y, stratify = noise_y) _ = svm3.fit(X_train3,y_train3) y_predict_noise3 = svm3.predict(X_test3) #print("Score of adding noise before split",svm3.score(X_test3,y_test3)) #calculate noise influence and errors made score_1 = 0 score_2 = 0 score_3 = 0 noise1 = 0 noise2 = 0 noise3 = 0 pred_wrong = set() pred_wrong3 = set() for i in range(0,len(noise_X_test)): if noise_X_test[i] == [0] *len(noise_X_test[i]): pred_wrong.add(i) for i in range(0,len(noise_y_test)): if y_predict_noise2[i] == noise_y_test[i]: score_2 = score_2 + 1 if i in pred_wrong: noise2 = noise2+1 if y_predict_noise1[i] == noise_y_test[i]: score_1 = score_1 + 1 if i in pred_wrong: noise1 = noise1+1 listOutput[0]['score'] = listOutput[0]['score'] + score_1 listOutput[1]['score'] = listOutput[1]['score'] + score_2 listOutput[0]['noise'] = listOutput[0]['noise'] + noise1 listOutput[1]['noise'] = listOutput[1]['noise'] + noise2 for i in range(0,len(X_test3)): if X_test3[i] == [0] *len(X_test3[i]): pred_wrong3.add(i) for i in range(0,len(y_test3)): if y_predict_noise3[i] == y_test3[i]: score_3 = score_3 + 1 if i in pred_wrong3: noise3 = noise3+1 listOutput[2]['score'] = listOutput[2]['score'] + score_3 listOutput[2]['noise'] = listOutput[2]['noise'] + noise3 listOutput[0]['score'] = listOutput[0]['score']//times listOutput[1]['score'] = listOutput[1]['score']//times listOutput[0]['noise'] = listOutput[0]['noise']//times listOutput[1]['noise'] = listOutput[1]['noise']//times listOutput[2]['score'] = listOutput[2]['score']//times listOutput[2]['noise'] = listOutput[2]['noise']//times return listOutput