def optimizeRF(did,amount):
    X,y = read_did(did)
    X = add_copy_features(X,amount)
    X_train, X_test, y_train, y_test = train_test_split(X, y)
    clf = RandomForestClassifier()
    params = {'max_features': range(1,len(X[0])), 'min_samples_split': range(2,20)}
    randomRF = RandomizedSearchCV(clf, param_distributions=params,
                                       n_iter=40,n_jobs = 3)
    with stopwatch() as sw:
        _=randomRF.fit(X_train, y_train)
    duration = sw.duration
    estimator = randomRF.best_estimator_
    cv_score = list(cross_val_score(randomRF.best_estimator_,X,y,cv = 10))
    cv_score.append(randomRF.score(X_test,y_test))
    return estimator,cv_score,duration
def optimizeADA(did,amount):
    X,y = read_did(did)
    iters = 40
    X = add_copy_features(X,amount)
    X_train, X_test, y_train, y_test = train_test_split(X, y)
    clf = AdaBoostClassifier()
    learns = [random()*1.9 + 0.1 for i in range(iters*4)]
    params = {'learning_rate': learns}
    randomRF = RandomizedSearchCV(clf, param_distributions=params,
                                       n_iter=iters,n_jobs = 3)
    with stopwatch() as sw:
        _=randomRF.fit(X_train, y_train)
    duration = sw.duration
    estimator = randomRF.best_estimator_
    cv_score = list(cross_val_score(randomRF.best_estimator_,X,y,cv = 10))
    cv_score.append(randomRF.score(X_test,y_test))
    return estimator,cv_score,duration
def functionoidFeatures(clf,did,rand,amount):
    listOutput = []
    local = True
    if (local) :
        X,y = read_did(did)
    else:
        DataSetOML = oml.datasets.get_dataset(did)
        X, y = DataSetOML.get_data(target=DataSetOML.default_target_attribute);
      
    if len(X[0]) > 10:
        feature_X = add_noise_features(X,len(X[0])//amount,rand)
    else:
        feature_X = add_noise_features(X,10//amount,rand)
    clf1 = clf
    clf2 = copy(clf)
    listOutput.append(cross_val_score(clf1, X, y,cv=10,n_jobs = -1,scoring = 'f1_weighted'))
    listOutput.append(cross_val_score(clf2, feature_X, y,cv=10,n_jobs = -1, scoring = 'f1_weighted'))
    return listOutput
def functionoid_amount(clf,did, amount,adj):
    local = True
    listOutput = []
    if (local) :
        X,y = read_did(did)
    else:
        DataSetOML = oml.datasets.get_dataset(did)
        X, y = DataSetOML.get_data(target=DataSetOML.default_target_attribute);
     
    svm1 = clf
    svm2 = copy(svm1)
    svm3 = copy(svm1)
    X_train, X_test, y_train, y_test = train_test_split(X, y,stratify=y)
    noise_X_train,noise_y_train = random_test_set4(X_train,y_train,amount)
    #len(noise_X_train)
    #len(noise_y_train)
    
    noise_X_test,noise_y_test = random_test_set4(X_test,y_test,amount)
    
    _ = svm1.fit(noise_X_train, noise_y_train)
    _ = svm2.fit(X_train, y_train)
    
    y_predict_noise1 = svm1.predict(noise_X_test)
    y_predict_noise2 = svm2.predict(noise_X_test)
    y_predict = svm2.predict(X_test)
    #print("Score of no noise in training data",svm1.score(noise_X_test,noise_y_test))
    #print("Score of noise in training and test data",svm2.score(noise_X_test,noise_y_test))
    # add noise then split up
    noise_X,noise_y = random_test_set4(X,y,amount)
    X_train3, X_test3, y_train3, y_test3 = train_test_split(noise_X, noise_y, stratify = noise_y)
    _ = svm3.fit(X_train3,y_train3)
    y_predict_noise3 = svm3.predict(X_test3)
    #print("Score of adding noise before split",svm3.score(X_test3,y_test3))
    
    #calculate noise influence and errors made
    score_1 = 0
    score_2 = 0
    score_3 = 0
    noise1 = 0
    noise2 = 0
    noise3 = 0
    score = 1
    pred_wrong = set()
    pred_wrong3 = set()
    for i in range(0,len(noise_X_test)):
        if noise_X_test[i] == [0] *len(noise_X_test[i]):
            pred_wrong.add(i)
    for i in range(0,len(noise_y_test)):
        if y_predict_noise2[i] == noise_y_test[i]:
            score_2 = score_2 + 1
            if i in pred_wrong:
                noise2 = noise2+1
        if y_predict_noise1[i] == noise_y_test[i]:
            score_1 = score_1 + 1
            if i in pred_wrong:
                noise1 = noise1+1
    listOutput.append({'score' : score_1, 'noise' : noise1})
    listOutput.append({'score' : score_2, 'noise' : noise2})
    #print("amount of wrong classification first classifier ",((len(y_test)-score_1)/len(y_test)))
    #print("amount of wrong classification second classifier ",((len(y_test)-score_2)/len(y_test)))
    #print("noise classified by first ",noise1)
    #print("noise classified by seconds", noise2)
    for i in range(0,len(X_test3)):
        if X_test3[i] == [0] *len(X_test3[i]):
            pred_wrong3.add(i)
    for i in range(0,len(y_test3)):
        if y_predict_noise3[i] == y_test3[i]:
            score_3 = score_3 + 1
            if i in pred_wrong3:
                noise3 = noise3+1
    listOutput.append({'score' : score_3, 'noise' : noise3})
    listOutput.append({'test1' : len(noise_y_test), 'test2' : len(y_test3), 
                       'amountOfTargets' : len(values_target(y)), 'noise1' : len(pred_wrong),
                       'noise2' : len(pred_wrong3)})
    #print("amount of wrong classification third classifier", ((len(y_test3)-score_3)/len(y_test3)))
    #print("noise classified by third " ,noise3)
    return listOutput
def loopFunctionoid_amount(clf,did,times,adj,amount):
    listOutput = functionoid_amount(clf,did,amount,adj)
    if (local) :
        X,y = read_did(did)
    else:
        DataSetOML = oml.datasets.get_dataset(did)
        X, y = DataSetOML.get_data(target=DataSetOML.default_target_attribute);
     
    for i in range(1,times):
        svm1 = copy(clf)
        svm2 = copy(clf)
        svm3 = copy(clf)
        X_train, X_test, y_train, y_test = train_test_split(X, y,stratify=y)
        noise_X_train,noise_y_train = add_noise_amount(X_train,y_train,amount,adj)
        #len(noise_X_train)
        #len(noise_y_train)
        noise_X_test,noise_y_test = add_noise_amount(X_test,y_test,amount,adj)
        _ = svm1.fit(noise_X_train,noise_y_train)
        _ = svm2.fit(X_train,y_train)
        
        y_predict_noise1 = svm1.predict(noise_X_test)
        y_predict_noise2 = svm2.predict(noise_X_test)
        y_predict = svm2.predict(X_test)
        #print("Score of no noise in training data",svm1.score(noise_X_test,noise_y_test))
        #print("Score of noise in training and test data",svm2.score(noise_X_test,noise_y_test))
        # add noise then split up
        noise_X,noise_y = add_noise_amount(X,y,amount,adj)
        X_train3, X_test3, y_train3, y_test3 = train_test_split(noise_X, noise_y, stratify = noise_y)
        _ = svm3.fit(X_train3,y_train3)
        y_predict_noise3 = svm3.predict(X_test3)
        #print("Score of adding noise before split",svm3.score(X_test3,y_test3))
        
        #calculate noise influence and errors made
        score_1 = 0
        score_2 = 0
        score_3 = 0
        noise1 = 0
        noise2 = 0
        noise3 = 0
        pred_wrong = set()
        pred_wrong3 = set()
        for i in range(0,len(noise_X_test)):
            if noise_X_test[i] == [0] *len(noise_X_test[i]):
                pred_wrong.add(i)
        for i in range(0,len(noise_y_test)):
            if y_predict_noise2[i] == noise_y_test[i]:
                score_2 = score_2 + 1
                if i in pred_wrong:
                    noise2 = noise2+1
            if y_predict_noise1[i] == noise_y_test[i]:
                score_1 = score_1 + 1
                if i in pred_wrong:
                    noise1 = noise1+1
        listOutput[0]['score'] = listOutput[0]['score'] + score_1
        listOutput[1]['score'] = listOutput[1]['score'] + score_2
        listOutput[0]['noise'] = listOutput[0]['noise'] + noise1
        listOutput[1]['noise'] = listOutput[1]['noise'] + noise2
        for i in range(0,len(X_test3)):
            if X_test3[i] == [0] *len(X_test3[i]):
                pred_wrong3.add(i)
        for i in range(0,len(y_test3)):
            if y_predict_noise3[i] == y_test3[i]:
                score_3 = score_3 + 1
                if i in pred_wrong3:
                    noise3 = noise3+1
        listOutput[2]['score'] = listOutput[2]['score'] + score_3
        listOutput[2]['noise'] = listOutput[2]['noise'] + noise3
    
    listOutput[0]['score'] = listOutput[0]['score']//times
    listOutput[1]['score'] = listOutput[1]['score']//times
    listOutput[0]['noise'] = listOutput[0]['noise']//times
    listOutput[1]['noise'] = listOutput[1]['noise']//times
    listOutput[2]['score'] = listOutput[2]['score']//times
    listOutput[2]['noise'] = listOutput[2]['noise']//times
    return listOutput