def make_classifiers(): names = [ "ELM(10,tanh)", "ELM(10,tanh,LR)", "ELM(10,sinsq)", "ELM(10,tribas)", "ELM(hardlim)", "ELM(20,rbf(0.1))" ] nh = 10 # pass user defined transfer func sinsq = (lambda x: np.power(np.sin(x), 2.0)) srhl_sinsq = MLPRandomLayer(n_hidden=nh, activation_func=sinsq) # use internal transfer funcs srhl_tanh = MLPRandomLayer(n_hidden=nh, activation_func='tanh') srhl_tribas = MLPRandomLayer(n_hidden=nh, activation_func='tribas') srhl_hardlim = MLPRandomLayer(n_hidden=nh, activation_func='hardlim') # use gaussian RBF srhl_rbf = RBFRandomLayer(n_hidden=nh * 2, rbf_width=0.1, random_state=0) log_reg = LogisticRegression() classifiers = [ GenELMClassifier(hidden_layer=srhl_tanh), GenELMClassifier(hidden_layer=srhl_tanh, regressor=log_reg), GenELMClassifier(hidden_layer=srhl_sinsq), GenELMClassifier(hidden_layer=srhl_tribas), GenELMClassifier(hidden_layer=srhl_hardlim), GenELMClassifier(hidden_layer=srhl_rbf) ] return names, classifiers
def define_classification_model(h): if config['model_type'] == 'linearSVM': return LinearSVC(C=h) elif config['model_type'] == 'ELM': rl = RandomLayer(n_hidden=h, activation_func='reclinear', alpha=1) return GenELMClassifier(hidden_layer=rl) elif config['model_type'] == 'MLP': return MLPClassifier(hidden_layer_sizes=(20, ), max_iter=600, verbose=10, early_stopping=False) elif config['model_type'] == 'linear': return linear_model.SGDClassifier() elif config['model_type'] == 'KNN': return KNeighborsClassifier(n_neighbors=h)
def trainELMClassifier(trainData, trainLabels, testData): print("\nTraining ELM Classifier...") trainData = np.asarray(trainData) trainLabels = np.asarray(trainLabels) print(trainData.shape) print(trainLabels.shape) # create initialize elm activation functions nh = 100 activation = 'tanh' if activation == 'rbf': act_layer = RBFRandomLayer(n_hidden=nh, random_state=0, rbf_width=0.001) elif activation == 'tanh': act_layer = MLPRandomLayer(n_hidden=nh, activation_func='tanh') elif activation == 'tribas': act_layer = MLPRandomLayer(n_hidden=nh, activation_func='tribas') elif activation == 'hardlim': act_layer = MLPRandomLayer(n_hidden=nh, activation_func='hardlim') # initialize ELM Classifier elm = GenELMClassifier(hidden_layer=act_layer) t0 = time() elm.fit(trainData, trainLabels) print("\nTraining finished in %0.3fs \n" % (time() - t0)) t0 = time() predictedLabels = elm.predict(testData) print("\nTesting finished in %0.3fs" % (time() - t0)) t0 = time() confidence_scores = elm.decision_function(testData) print("\nTesting finished in %0.3fs" % (time() - t0)) print("\nPredicted Labels") print("----------------------------------") print(predictedLabels) print("\nConfidence Scores") print("----------------------------------") print(confidence_scores) params = { 'nh': nh, 'af': activation, } return confidence_scores, predictedLabels, params
def run_ELM( x, y, threshold, test_num, n_hidden, random_state=2018, kernel_type='MLP', ): # split the data set into train/test x_train, x_test, y_train, y_test = cross_validation.train_test_split( x, y, test_size=0.3, random_state=random_state) # currently only support test_num <=100k assert test_num <= 100000 def powtanh_xfer(activations, power=1.0): return pow(np.tanh(activations), power) model_count = 0 result = [] hidden_options = { 'MLP': MLPRandomLayer, 'RBF': RBFRandomLayer, 'GRBF': GRBFRandomLayer } for i in range(0, test_num): tanh_rhl = hidden_options[kernel_type](n_hidden=n_hidden, random_state=i, activation_func=powtanh_xfer, activation_args={ 'power': 3.0 }) elmc_tanh = GenELMClassifier(hidden_layer=tanh_rhl) # start Training elmc_tanh.fit(x_train, y_train) # calculate score train_acc = elmc_tanh.score(x_train, y_train) test_acc = elmc_tanh.score(x_test, y_test) if train_acc > threshold and test_acc > threshold: logging.info( 'find model satisfiy threshold, train_acc: {}, test_acc: {}'. format(train_acc, test_acc)) result.append( (train_acc, test_acc, tanh_rhl.components_['weights'])) model_count += 1 logging.info('fininsh training, get {} valid models'.format(model_count)) result.sort(key=lambda x: x[1], reverse=True) return result
def TripleTest(x, y, pvalue_sort, top_k, threshold, classifer): index = [] count = 0 for i in range(0, top_k): #取p_value值top x进行穷举 index.append(pvalue_sort[i][0]) if classifer == 'ELM': rbf_rhl = RBFRandomLayer(n_hidden=20, rbf_width=0.01, random_state=2018) clf = GenELMClassifier(hidden_layer=rbf_rhl) elif classifer == 'SVM': clf = SVC(kernel='linear', C=1) elif classifer == 'KNN': clf = neighbors.KNeighborsClassifier(n_neighbors=3) elif classifer == 'Normal_Bayes': clf = MultinomialNB(alpha=0.01) else: clf = DecisionTreeClassifier(random_state=0) combination = list(combinations(index, 3)) #前50个特征穷举,公19600组 result = [] #存储测试集正确率和训练集正确率都大于0.9的特征组合 #((特征组合),训练集正确率,测试集正确率) value_set = [] i_list = list(range(len(combination))) worker = partial(classify_func, combination, clf, x.T, y) # running in multithread pool = multiprocessing.Pool(4) pool_result = pool.map(worker, i_list) pool.close() pool.join() for res in pool_result: if res[2] >= threshold: result.append( [combination[res[4]], res[2], res[3], res[0], res[1]]) count += 1 value_set.append(res[2]) return result, count, max(value_set)
stop_train = False num_epochs = 10 for train_index, test_index in sKF.split(std_X, y): i += 1 x_train = std_X[train_index] y_train = y[train_index] x_test = std_X[test_index] y_test = y[test_index] #------------------------------------------------------------------------------- grbf = GRBFRandomLayer(n_hidden=500, grbf_lambda=0.0001) act = MLPRandomLayer(n_hidden=500, activation_func='sigmoid') rbf = RBFRandomLayer(n_hidden=290, rbf_width=0.0001, activation_func='sigmoid') clf = GenELMClassifier(hidden_layer=rbf) clf.fit(x_train, y_train.ravel()) y_pre = clf.predict(x_test) y_score = clf.decision_function(x_test) fpr, tpr, thresholds = roc_curve(y_test, y_score) tprs.append(tpr) fprs.append(fpr) roc_auc = auc(fpr, tpr) tn, fp, fn, tp = confusion_matrix(y_test, y_pre).ravel() test_acc = (tn + tp) / (tn + fp + fn + tp) test_Sn = tp / (fn + tp) test_Sp = tn / (fp + tn) mcc = (tp * tn - fp * fn) / pow( ((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn)), 0.5) final_test_acc.append(test_acc) final_test_Sn.append(test_Sn)
nh = 15 (ctrs, _, _) = k_means(xtoy_train, nh) unit_rs = np.ones(nh) #rhl = RBFRandomLayer(n_hidden=nh, activation_func='inv_multiquadric') #rhl = RBFRandomLayer(n_hidden=nh, centers=ctrs, radii=unit_rs) rhl = GRBFRandomLayer(n_hidden=nh, grbf_lambda=.0001, centers=ctrs) elmr = GenELMRegressor(hidden_layer=rhl) elmr.fit(xtoy_train, ytoy_train) print elmr.score(xtoy_train, ytoy_train), elmr.score(xtoy_test, ytoy_test) plot(xtoy, ytoy, xtoy, elmr.predict(xtoy)) # <codecell> rbf_rhl = RBFRandomLayer(n_hidden=100, random_state=0, rbf_width=0.01) elmc_rbf = GenELMClassifier(hidden_layer=rbf_rhl) elmc_rbf.fit(dgx_train, dgy_train) print elmc_rbf.score(dgx_train, dgy_train), elmc_rbf.score(dgx_test, dgy_test) def powtanh_xfer(activations, power=1.0): return pow(np.tanh(activations), power) tanh_rhl = MLPRandomLayer(n_hidden=100, activation_func=powtanh_xfer, activation_args={'power': 3.0}) elmc_tanh = GenELMClassifier(hidden_layer=tanh_rhl) elmc_tanh.fit(dgx_train, dgy_train) print elmc_tanh.score(dgx_train, dgy_train), elmc_tanh.score(dgx_test, dgy_test)
filtered_data = data.dropna(axis='columns', how='all') X = filtered_data.drop(['label', 'numOtus'], axis=1) metadata = pd.read_table(mapfile, sep='\t', index_col=0) y = metadata[disease_col] ## Merge adenoma and normal in one-category called no-cancer, so we have binary classification y = y.replace(to_replace=['normal', 'adenoma'], value=['no-cancer', 'no-cancer']) encoder = LabelEncoder() y = pd.Series(encoder.fit_transform(y), index=y.index, name=y.name) A, P, Y, Q = train_test_split(X, y, test_size=0.15, random_state=42) # Can change to 0.2 srhl_rbf = RBFRandomLayer(n_hidden=50, rbf_width=0.1, random_state=0) clf6 = GenELMClassifier(hidden_layer=srhl_rbf).fit(A, Y.values.ravel()) print("Accuracy of Extreme learning machine Classifier: " + str(clf6.score(P, Q))) #============================================== plt.figure() cls = 0 # Set figure size and plot layout figsize = (20, 15) f, ax = plt.subplots(1, 1, figsize=figsize) x = [clf6, 'purple', 'ELM'] #y_true = Q[Q.argsort().index] y_score = x[0].decision_function(P) #y_prob = x[0].predict_proba(P.ix[Q.argsort().index, :])
nh = 15 (ctrs, _, _) = k_means(xtoy_train, nh) unit_rs = np.ones(nh) #rhl = RBFRandomLayer(n_hidden=nh, activation_func='inv_multiquadric') #rhl = RBFRandomLayer(n_hidden=nh, centers=ctrs, radii=unit_rs) rhl = GRBFRandomLayer(n_hidden=nh, grbf_lambda=.0001, centers=ctrs) elmr = GenELMRegressor(hidden_layer=rhl) elmr.fit(xtoy_train, ytoy_train) print elmr.score(xtoy_train, ytoy_train), elmr.score(xtoy_test, ytoy_test) plot(xtoy, ytoy, xtoy, elmr.predict(xtoy)) # <codecell> rbf_rhl = RBFRandomLayer(n_hidden=100, random_state=0, rbf_width=0.01) elmc_rbf = GenELMClassifier(hidden_layer=rbf_rhl) elmc_rbf.fit(dgx_train, dgy_train) print elmc_rbf.score(dgx_train, dgy_train), elmc_rbf.score(dgx_test, dgy_test) def powtanh_xfer(activations, power=1.0): return pow(np.tanh(activations), power) tanh_rhl = MLPRandomLayer(n_hidden=100, activation_func=powtanh_xfer, activation_args={'power':3.0}) elmc_tanh = GenELMClassifier(hidden_layer=tanh_rhl) elmc_tanh.fit(dgx_train, dgy_train) print elmc_tanh.score(dgx_train, dgy_train), elmc_tanh.score(dgx_test, dgy_test) # <codecell> rbf_rhl = RBFRandomLayer(n_hidden=100, rbf_width=0.01) tr, ts = res_dist(dgx, dgy, GenELMClassifier(hidden_layer=rbf_rhl), n_runs=100, random_state=0)
'npymodel/ORL_lable.npy', 'npymodel/Indianface_lable.npy' ] srhl_sigmoid = MLPRandomLayer(n_hidden=2000, activation_func='sigmoid') srhl_gaussian = MLPRandomLayer(n_hidden=2000, activation_func='gaussian') names = [ "ELM(sigmoid)", #"ELM(gaussian)", "SVM(linear)", #'SVM(rbf)', "LR" ] classifiers = [ GenELMClassifier(hidden_layer=srhl_sigmoid), # GenELMClassifier(hidden_layer=srhl_gaussian), SVC(kernel='linear', C=1), #SVC(kernel='rbf',C=10,gamma=0.01), LogisticRegression() ] for i in range(4): #for the ith dataset datas_name = dataset_name[i] X = np.load(features[i]) y = np.load(lables[i]) print("Processing data", datas_name, "..............") k_fold = model_selection.RepeatedKFold(n_splits=5, n_repeats=5, random_state=5) X_train, X_test, y_train, y_test = train_test_split(X,
X = filtered_data.drop(['label','numOtus'],axis=1) metadata = pd.read_table(mapfile,sep='\t',index_col=0) y = metadata[disease_col] ## Merge adenoma and normal in one-category called no-cancer, so we have binary classification y = y.replace(to_replace=['normal','adenoma'], value=['no-cancer','no-cancer']) encoder = LabelEncoder() y = pd.Series(encoder.fit_transform(y), index=y.index, name=y.name) A, P, Y, Q = train_test_split( X, y, test_size=0.1, random_state=42) # Can change to 0.2 srhl_rbf = RBFRandomLayer(n_hidden=50,rbf_width=0.1,random_state=0) clf6 = GenELMClassifier(hidden_layer=srhl_rbf).fit(A, Y.values.ravel()) print ("Accuracy of Extreme learning machine Classifier: "+str(clf6.score(P,Q))) #============================================== #plt.figure() cls = 0 # Set figure size and plot layout figsize=(20,15) f, ax = plt.subplots(1, 1, figsize=figsize) x = [clf6,'purple','ELM'] #y_true = Q[Q.argsort().index] y_score = x[0].decision_function(P) #y_prob = x[0].predict_proba(P.ix[Q.argsort().index, :])
from random_layer import RBFRandomLayer, MLPRandomLayer from sklearn.ensemble import AdaBoostClassifier from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score from sklearn.externals import joblib M = 2 nh = 5 T = 5 srhl_tanh = MLPRandomLayer(n_hidden=nh, activation_func='tanh') srhl_rbf = RBFRandomLayer(n_hidden=nh*2, rbf_width=0.1, random_state=0) srhl_tribas = MLPRandomLayer(n_hidden=nh, activation_func='tribas') srhl_hardlim = MLPRandomLayer(n_hidden=nh, activation_func='hardlim') # clf = GenELMClassifier(hidden_layer=srhl_tanh) clf = GenELMClassifier(hidden_layer=srhl_rbf) # clf = GenELMClassifier(hidden_layer=srhl_tribas) # clf = GenELMClassifier(hidden_layer=srhl_hardlim) class ELMTraining(MRJob): def mapper(self, _, line): k = random.randint(1,M) yield k, (line) def reducer(self, key, values): D = np.zeros((1, 1)) f_tmp = open("tmp_val_" + str(key) + ".txt", "w");
nh = 15 (ctrs, _, _) = k_means(xtoy_train, nh) unit_rs = np.ones(nh) #rhl = RBFRandomLayer(n_hidden=nh, activation_func='inv_multiquadric') #rhl = RBFRandomLayer(n_hidden=nh, centers=ctrs, radii=unit_rs) rhl = GRBFRandomLayer(n_hidden=nh, grbf_lambda=.0001, centers=ctrs) elmr = GenELMRegressor(hidden_layer=rhl) elmr.fit(xtoy_train, ytoy_train) print(elmr.score(xtoy_train, ytoy_train), elmr.score(xtoy_test, ytoy_test)) plot(xtoy, ytoy, xtoy, elmr.predict(xtoy)) # <codecell> rbf_rhl = RBFRandomLayer(n_hidden=100, random_state=0, rbf_width=0.01) elmc_rbf = GenELMClassifier(hidden_layer=rbf_rhl) elmc_rbf.fit(dgx_train, dgy_train) print(elmc_rbf.score(dgx_train, dgy_train), elmc_rbf.score(dgx_test, dgy_test)) def powtanh_xfer(activations, power=1.0): return pow(np.tanh(activations), power) tanh_rhl = MLPRandomLayer( n_hidden=100, activation_func=powtanh_xfer, activation_args={'power': 3.0}) elmc_tanh = GenELMClassifier(hidden_layer=tanh_rhl) elmc_tanh.fit(dgx_train, dgy_train) print(elmc_tanh.score(dgx_train, dgy_train), elmc_tanh.score(dgx_test, dgy_test)) # <codecell>