Ejemplo n.º 1
0
 def KNeighbors_en(self, iter):
     n_estimators = 10
     model = BC(KNeighborsClassifier(n_neighbors =iter, weights = 'uniform', algorithm = 'auto', n_jobs = -1), max_samples= 0.5, n_estimators=n_estimators, n_jobs = -1)
     model.fit(self.x_train, self.y_train)
     predict = model.predict(self.x_test)
     print("Ensemble kneighbors Regression : accurancy_is", metrics.accuracy_score(self.y_test, predict))
     return predict
Ejemplo n.º 2
0
 def make_model(self, n_estimators, n_jobs, verbose=1):
     model1 = RF(n_estimators=1,
                 criterion='entropy',
                 bootstrap=False,
                 class_weight='balanced_subsample')
     self.model = BC(base_estimator=model1,
                     n_estimators=n_estimators,
                     max_features=1.,
                     verbose=verbose)
Ejemplo n.º 3
0
	def __init__(self, criterion_, max_depth_, random_state_,
			n_estimators_, n_jobs_):
		# initialize DTC
		self.tree = DTC(criterion=criterion_, max_depth=max_depth_,
				random_state=random_state_)

		# initialize BaggingClassifier, with
		# n_estimators being the only adjustable value
		self.bag = BC(base_estimator=self.tree,
				n_estimators=n_estimators_,
				n_jobs=n_jobs_,
				random_state=random_state_)
Ejemplo n.º 4
0
                          skip_header=1)

X = training_data[:, :1000]
Y = training_data[:, 1000]

# Various Classifiers
dtc_min_samples_leaf = DTC(min_samples_leaf=15)
etc = ETC()
gbc = GBC()
rfc = RFC()
dtc_max_depth = DTC(max_depth=8)
nb = BernoulliNB()
svc = SVC()
lr = LR()
abc = ABC()
bc = BC()
'''
inv_doc_freq = np.zeros(1000)
for i in range(len(inv_doc_freq)):
    total = sum(X[:, i])
    if total == 0:
        inv_doc_freq[i] = 0
    else:
        inv_doc_freq[i] = math.log(N / sum(X[:, i]))
'''

# Data normalization
for i in range(len(X)):
    max_freq = max(X[i])
    if max_freq == 0:
        pass
Ejemplo n.º 5
0
from sklearn.ensemble import AdaBoostClassifier as ABC

model = ABC(n_estimators=100, random_state=42,
            learning_rate=.80).fit(X_train, y_train)
prediction = model.predict(X_test)
score = accuracy_score(y_test, prediction)
print(score)

# Bagging Classifier
# ----------------

# In[22]:

from sklearn.ensemble import BaggingClassifier as BC

model = BC(n_estimators=100, random_state=42).fit(X_train, y_train)
prediction = model.predict(X_test)
score = accuracy_score(y_test, prediction)
print(score)

# Extra Trees Classifier
# ----------------------

# In[23]:

from sklearn.ensemble import ExtraTreesClassifier as XTC

model = XTC(n_estimators=100,
            random_state=42,
            criterion='entropy',
            max_depth=20).fit(X_train, y_train)
Ejemplo n.º 6
0
 def __init__(self, matrixdatabase):
     self._matrix_database = matrixdatabase
     self._has_fit = False
     self._bc = BC(n_estimators=10)
    print(X_all.shape)

    # one hot encode
    y_s = np.zeros((y_all.shape[0], 2))
    y_s[:, 1] = (y_all == 1).reshape(y_all.shape[0], )
    y_s[:, 0] = (y_all == 0).reshape(y_all.shape[0], )
    pos_weight = np.sum(y_s[:, 0]) / np.sum(y_s[:, 1])

    X_train, X_test, y_train, y_test = train_test_split(X_all,
                                                        y_all,
                                                        test_size=0.30,
                                                        stratify=y_all,
                                                        random_state=42)
    del X_all

    clf = BC(**bc_params)
    clf.fit(X_train, y_train.ravel())

    #clf = RandomForestClassifier(min_samples_leaf=15, class_weight = 'balanced')#{0:1,1:pos_weight})
    #clf.fit(X_train, y_train.ravel())

    #print ('oob_score: ', clf.oob_score_)
    # svm = SVC(probability = True, class_weight = 'balanced')
    # svm.fit(X_train, y_train)

    preds_tr = clf.predict_proba(X_train)
    print("Predictions_tr:")
    print(preds_tr[0:20])
    print(np.median(preds_tr[:, 1]))
    roc_auc_tr = metrics.roc_auc_score(y_train.ravel(), preds_tr[:, 1])
    print('ROC AUC_tr:', roc_auc_tr)
Ejemplo n.º 8
0
import pandas as pd
from sklearn.ensemble import BaggingClassifier as BC
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split

# DTLZ STUFFS
data = pd.read_csv("features with R2.csv")
inputs_train = data[data.keys()[1:11]]
outputs_train = data[data.keys()[11:]]
targetnames = ["SVM", "GPR", "NN", "EN"]
target_train = outputs_train.idxmax(axis=1)

num_classes = len(targetnames)

clf = BC()
clf = clf.fit(inputs_train, target_train)

# WFG STUFFS
data_wfg = pd.read_csv("WFG features with R2.csv")
predictions = clf.predict(x_test)
con_mat = confusion_matrix(target_test, predictions)
cost_mat = output_test
cost_mat = -cost_mat.sub(cost_mat.max(axis=1), axis=0)
print(con_mat)
num_samples, num_classes = cost_mat.shape
cost = 0
lenp = len(predictions)
for index in range(num_samples):
    cost += cost_mat.iloc[index][predictions[index]]
# print('Cost independent classification', cost/lenp)
cic = cost / lenp
Ejemplo n.º 9
0
def Find_Optimal_Features(ML_DATA):
    # Creates all combinations of features
    lst = list(itertools.product([0, 1], repeat=8))
    lst.remove((0, 0, 0, 0, 0, 0, 0, 0))
    for i in range(len(lst)):
        lst[i] = list(lst[i])

    outs = []
    for i in range(8):
        outs.append([])

    # Clasifiers put into array for easier access and expandability
    clfs = [
        DTC(),
        KNN(),
        BC(ETC()),
        LSVC(max_iter=10_000),
        RFC(),
        SVC(),
        SGDC(),
        MLP(max_iter=10_000)
    ]
    tot = len(lst)
    y = []

    # Trains all models on all feature sets and gets scores
    for i, item in enumerate(lst[:15]):
        X_train, ys_train, ya_train, X_test, ys_test, ya_test = ML_DATA.Make_Set(
            item, 0.25)
        for item in clfs:
            item.fit(X_train, ys_train)

        scores = []

        for j, item in enumerate(clfs):
            scores.append(item.score(X_test, ys_test))
            outs[j].append(scores[j])

        PBR(i + 1, tot, name="Test Set", end="\r")
        y = ys_test
    print()

    temp = Counter(y)
    total = len(y)
    rand_ratio = 0
    for item in temp.values():
        rand_ratio += item / total
    rand_ratio = (rand_ratio / 8) * 100
    print("Random Choice :", rand_ratio)

    final = np.zeros(8)
    final_top = np.zeros(8)
    final_btop = np.zeros(8)

    names = [
        "Decision Tree", "KNN", "Bag Extra Tree", "Linear SVC",
        "Random Forest", "SVC", "SGDC", "MLP"
    ]

    # Prints all results
    for k, name in enumerate(names):
        o = RAS(outs[k], 10)
        final_top += np.asarray(lst[o[0]])
        f = np.zeros(8)
        count = 0
        best = outs[k][0]
        print(name + ":")
        for i, item in enumerate(o):
            if i == 0:
                best = outs[k][item]
            print("\t\t{:2}) {:7.4f}% {}".format(i, outs[k][item] * 100,
                                                 lst[item]))
            if outs[k][item] == best:
                count += 1
                f += np.asarray(lst[item])
            final += np.asarray(lst[item])
        final_btop += f / count
        print()

    print(final)
    print(final_top)
    print(final_btop)
Ejemplo n.º 10
0
print(con_mat)
lent = len(target_test)
lenp = len(predictions)
cost_mat = output_test
cost_mat = -cost_mat.sub(cost_mat.max(axis=1), axis=0)
cost = classifier.cost_loss(x_test, cost_mat)
# print(lent == lenp, 'Cost based classification', cost/lenp)
cbc = cost / lenp
""" classifier.optmize_weights(x_t2, o_t2)
w = classifier.weights
cost = classifier.weightedcost(w, x_test, cost_mat)
print(lent == lenp, 'Weighted Votes Cost based classification', cost) """
################################################
num_classes = len(targetnames)

clf = BC()
clf = clf.fit(x_train, target_train)
predictions = clf.predict(x_test)
con_mat = confusion_matrix(target_test, predictions)
cost_mat = output_test
cost_mat = -cost_mat.sub(cost_mat.max(axis=1), axis=0)
print(con_mat)
num_samples, num_classes = cost_mat.shape
cost = 0
lenp = len(predictions)
for index in range(num_samples):
    cost += cost_mat.iloc[index][predictions[index]]
# print('Cost independent classification', cost/lenp)
cic = cost / lenp
classes = targetnames
cost_mat = output_train
Ejemplo n.º 11
0
def gogo_bagged_svm( fxpath, mpath, spath ):

    transform = True

    svc_params = {'penalty':'l2',
                  'loss':'l2', 
                  'dual':False,
                  'C':33.0, 
                  'intercept_scaling':1e4, 
                  'class_weight':'auto',
                  'random_state':42}

    bc_params = {'base_estimator':LinearSVC(**svc_params),
                 'n_estimators':96, 
                 'max_samples':0.1, 
                 'max_features':0.8,  
                 'oob_score':False,
                 
                 # if you have tons of memory (i.e. 32gb ram + 32gb swap)
                 #  incresaing this parameter may help performance.  else,
                 #  increasing it may cause "out of memory" errors.
                 'n_jobs':1,
                 #'n_jobs':8,

                 'verbose':1,
                 'random_state':42}

    '''
    lr_params = {'C':1e6,#tr(-3,3,7),
                 'penalty':'l2',
                 'class_weight':'auto',
                 'intercept_scaling':1e6}#tr(-1,6,7)}
    '''

    preds = []

    kpca_fname = '%s/kpca_rbf_{0}_{1}.pkl' % mpath
    s_fname = '%s/kpca_linear_svm{0}_{1}_preds.csv' % spath

    for i in range(7):
        if i < 5:
            nbreed = 1
            sbreed = 'dog'
            nsubject = i+1
        else:
            nbreed = 2
            sbreed = 'human'
            nsubject = 1 + abs(5-i)

        print('breed%d.subject%d..' % ( nbreed, nsubject ))

        X_ictal = load_features( fxpath, nbreed, nsubject, 1 )
        X_inter = load_features( fxpath, nbreed, nsubject, 2 )
    
        X_train = vstack((X_inter, X_ictal))
        Y = [0 for x in X_inter] + [1 for x in X_ictal]
        wi = 1.0/len(X_inter) * 1000
        wp = 1.0/len(X_ictal) * 1000
        W = array([wp if y else wi for y in Y])
    
        del X_inter, X_ictal; gc.collect()
    
        with open(kpca_fname.format(sbreed,nsubject),'rb') as f:
            kpca = pickle.load(f)
    
        if transform:   
            X_train = kpca_preprocess_features(X_train)
            X_train = kpca_incremental_transform(kpca,X_train)
            gc.collect()
    
        X_test = load_features( fxpath, nbreed, nsubject, 3 )
        if transform:
            X_test = kpca_preprocess_features(X_test)
            X_test = kpca_incremental_transform(kpca,X_test)
            gc.collect()
            
        bc = BC(**bc_params)
        bc.fit(X_train,Y)

        #print 'oob_score: ', bc.oob_score_
        subject_preds = bc.predict_proba(X_test)[:,1]

        preds.append(subject_preds)
        subject_preds = pd.DataFrame(subject_preds)

        subject_preds.to_csv(s_fname.format(sbreed,nsubject),index=False,header=None)    

        del X_train, X_test; gc.collect()
        sys.stdout.flush()
Ejemplo n.º 12
0
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier as DTC
from sklearn.ensemble import BaggingClassifier as BC, RandomForestClassifier as RFC
from sklearn.metrics import accuracy_score

digits = load_digits()

fig = plt.figure()
fig.subplots_adjust(left=0, right=1, bottom=0, top=1, hspace=0.05, wspace=0.05)
for i in range(36):
    ax = fig.add_subplot(6, 6, i + 1, xticks=[], yticks=[])
    ax.imshow(digits.images[i], cmap='binary', interpolation='nearest')
    ax.text(0, 0.05, str(digits.target[i]), transform=ax.transAxes)

xtr, xte, ytr, yte = train_test_split(digits.data,
                                      digits.target,
                                      test_size=0.2,
                                      random_state=0)
names = ['DTC', 'BC', 'RFC']
models = [
    DTC(),
    BC(n_estimators=300, max_samples=0.8, random_state=0),
    RFC(n_estimators=300, random_state=0)
]
for name, model in zip(names, models):
    print('模型{0}的预测准确率:'.format(name), end='\t')
    model.fit(xtr, ytr)
    ypred = model.predict(xte)
    print(accuracy_score(yte, ypred))

plt.show()