Beispiel #1
0
def make_classifiers():

    names = [
        "ELM(10,tanh)", "ELM(10,tanh,LR)", "ELM(10,sinsq)", "ELM(10,tribas)",
        "ELM(hardlim)", "ELM(20,rbf(0.1))"
    ]

    nh = 10

    # pass user defined transfer func
    sinsq = (lambda x: np.power(np.sin(x), 2.0))
    srhl_sinsq = MLPRandomLayer(n_hidden=nh, activation_func=sinsq)

    # use internal transfer funcs
    srhl_tanh = MLPRandomLayer(n_hidden=nh, activation_func='tanh')

    srhl_tribas = MLPRandomLayer(n_hidden=nh, activation_func='tribas')

    srhl_hardlim = MLPRandomLayer(n_hidden=nh, activation_func='hardlim')

    # use gaussian RBF
    srhl_rbf = RBFRandomLayer(n_hidden=nh * 2, rbf_width=0.1, random_state=0)

    log_reg = LogisticRegression()

    classifiers = [
        GenELMClassifier(hidden_layer=srhl_tanh),
        GenELMClassifier(hidden_layer=srhl_tanh, regressor=log_reg),
        GenELMClassifier(hidden_layer=srhl_sinsq),
        GenELMClassifier(hidden_layer=srhl_tribas),
        GenELMClassifier(hidden_layer=srhl_hardlim),
        GenELMClassifier(hidden_layer=srhl_rbf)
    ]

    return names, classifiers
Beispiel #2
0
def define_classification_model(h):
    if config['model_type'] == 'linearSVM':
        return LinearSVC(C=h)
    elif config['model_type'] == 'ELM':
        rl = RandomLayer(n_hidden=h, activation_func='reclinear', alpha=1)
        return GenELMClassifier(hidden_layer=rl)
    elif config['model_type'] == 'MLP':
        return MLPClassifier(hidden_layer_sizes=(20, ),
                             max_iter=600,
                             verbose=10,
                             early_stopping=False)
    elif config['model_type'] == 'linear':
        return linear_model.SGDClassifier()
    elif config['model_type'] == 'KNN':
        return KNeighborsClassifier(n_neighbors=h)
def trainELMClassifier(trainData, trainLabels, testData):
    print("\nTraining ELM Classifier...")

    trainData = np.asarray(trainData)
    trainLabels = np.asarray(trainLabels)
    print(trainData.shape)
    print(trainLabels.shape)

    # create initialize elm activation functions
    nh = 100
    activation = 'tanh'

    if activation == 'rbf':
        act_layer = RBFRandomLayer(n_hidden=nh,
                                   random_state=0,
                                   rbf_width=0.001)
    elif activation == 'tanh':
        act_layer = MLPRandomLayer(n_hidden=nh, activation_func='tanh')
    elif activation == 'tribas':
        act_layer = MLPRandomLayer(n_hidden=nh, activation_func='tribas')
    elif activation == 'hardlim':
        act_layer = MLPRandomLayer(n_hidden=nh, activation_func='hardlim')

    # initialize ELM Classifier
    elm = GenELMClassifier(hidden_layer=act_layer)

    t0 = time()
    elm.fit(trainData, trainLabels)
    print("\nTraining finished in %0.3fs \n" % (time() - t0))

    t0 = time()
    predictedLabels = elm.predict(testData)
    print("\nTesting finished in %0.3fs" % (time() - t0))

    t0 = time()
    confidence_scores = elm.decision_function(testData)
    print("\nTesting finished in %0.3fs" % (time() - t0))

    print("\nPredicted Labels")
    print("----------------------------------")
    print(predictedLabels)

    print("\nConfidence Scores")
    print("----------------------------------")
    print(confidence_scores)

    params = {
        'nh': nh,
        'af': activation,
    }

    return confidence_scores, predictedLabels, params
Beispiel #4
0
def run_ELM(
    x,
    y,
    threshold,
    test_num,
    n_hidden,
    random_state=2018,
    kernel_type='MLP',
):
    #  split the data set into train/test
    x_train, x_test, y_train, y_test = cross_validation.train_test_split(
        x, y, test_size=0.3, random_state=random_state)
    # currently only support test_num <=100k
    assert test_num <= 100000

    def powtanh_xfer(activations, power=1.0):
        return pow(np.tanh(activations), power)

    model_count = 0
    result = []
    hidden_options = {
        'MLP': MLPRandomLayer,
        'RBF': RBFRandomLayer,
        'GRBF': GRBFRandomLayer
    }

    for i in range(0, test_num):
        tanh_rhl = hidden_options[kernel_type](n_hidden=n_hidden,
                                               random_state=i,
                                               activation_func=powtanh_xfer,
                                               activation_args={
                                                   'power': 3.0
                                               })
        elmc_tanh = GenELMClassifier(hidden_layer=tanh_rhl)
        # start Training
        elmc_tanh.fit(x_train, y_train)
        # calculate score
        train_acc = elmc_tanh.score(x_train, y_train)
        test_acc = elmc_tanh.score(x_test, y_test)
        if train_acc > threshold and test_acc > threshold:
            logging.info(
                'find model satisfiy threshold, train_acc: {}, test_acc: {}'.
                format(train_acc, test_acc))
            result.append(
                (train_acc, test_acc, tanh_rhl.components_['weights']))

            model_count += 1
    logging.info('fininsh training, get {} valid models'.format(model_count))

    result.sort(key=lambda x: x[1], reverse=True)
    return result
Beispiel #5
0
def TripleTest(x, y, pvalue_sort, top_k, threshold, classifer):
    index = []
    count = 0
    for i in range(0, top_k):  #取p_value值top x进行穷举
        index.append(pvalue_sort[i][0])

    if classifer == 'ELM':
        rbf_rhl = RBFRandomLayer(n_hidden=20,
                                 rbf_width=0.01,
                                 random_state=2018)
        clf = GenELMClassifier(hidden_layer=rbf_rhl)
    elif classifer == 'SVM':
        clf = SVC(kernel='linear', C=1)
    elif classifer == 'KNN':
        clf = neighbors.KNeighborsClassifier(n_neighbors=3)
    elif classifer == 'Normal_Bayes':
        clf = MultinomialNB(alpha=0.01)
    else:
        clf = DecisionTreeClassifier(random_state=0)

    combination = list(combinations(index, 3))  #前50个特征穷举,公19600组

    result = []
    #存储测试集正确率和训练集正确率都大于0.9的特征组合
    #((特征组合),训练集正确率,测试集正确率)
    value_set = []
    i_list = list(range(len(combination)))
    worker = partial(classify_func, combination, clf, x.T, y)
    # running in multithread
    pool = multiprocessing.Pool(4)

    pool_result = pool.map(worker, i_list)
    pool.close()
    pool.join()

    for res in pool_result:
        if res[2] >= threshold:
            result.append(
                [combination[res[4]], res[2], res[3], res[0], res[1]])
            count += 1
        value_set.append(res[2])

    return result, count, max(value_set)
Beispiel #6
0
stop_train = False
num_epochs = 10
for train_index, test_index in sKF.split(std_X, y):
    i += 1
    x_train = std_X[train_index]
    y_train = y[train_index]
    x_test = std_X[test_index]
    y_test = y[test_index]
    #-------------------------------------------------------------------------------
    grbf = GRBFRandomLayer(n_hidden=500, grbf_lambda=0.0001)
    act = MLPRandomLayer(n_hidden=500, activation_func='sigmoid')
    rbf = RBFRandomLayer(n_hidden=290,
                         rbf_width=0.0001,
                         activation_func='sigmoid')

    clf = GenELMClassifier(hidden_layer=rbf)
    clf.fit(x_train, y_train.ravel())
    y_pre = clf.predict(x_test)
    y_score = clf.decision_function(x_test)
    fpr, tpr, thresholds = roc_curve(y_test, y_score)
    tprs.append(tpr)
    fprs.append(fpr)
    roc_auc = auc(fpr, tpr)
    tn, fp, fn, tp = confusion_matrix(y_test, y_pre).ravel()
    test_acc = (tn + tp) / (tn + fp + fn + tp)
    test_Sn = tp / (fn + tp)
    test_Sp = tn / (fp + tn)
    mcc = (tp * tn - fp * fn) / pow(
        ((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn)), 0.5)
    final_test_acc.append(test_acc)
    final_test_Sn.append(test_Sn)
nh = 15
(ctrs, _, _) = k_means(xtoy_train, nh)
unit_rs = np.ones(nh)

#rhl = RBFRandomLayer(n_hidden=nh, activation_func='inv_multiquadric')
#rhl = RBFRandomLayer(n_hidden=nh, centers=ctrs, radii=unit_rs)
rhl = GRBFRandomLayer(n_hidden=nh, grbf_lambda=.0001, centers=ctrs)
elmr = GenELMRegressor(hidden_layer=rhl)
elmr.fit(xtoy_train, ytoy_train)
print elmr.score(xtoy_train, ytoy_train), elmr.score(xtoy_test, ytoy_test)
plot(xtoy, ytoy, xtoy, elmr.predict(xtoy))

# <codecell>

rbf_rhl = RBFRandomLayer(n_hidden=100, random_state=0, rbf_width=0.01)
elmc_rbf = GenELMClassifier(hidden_layer=rbf_rhl)
elmc_rbf.fit(dgx_train, dgy_train)
print elmc_rbf.score(dgx_train, dgy_train), elmc_rbf.score(dgx_test, dgy_test)


def powtanh_xfer(activations, power=1.0):
    return pow(np.tanh(activations), power)


tanh_rhl = MLPRandomLayer(n_hidden=100,
                          activation_func=powtanh_xfer,
                          activation_args={'power': 3.0})
elmc_tanh = GenELMClassifier(hidden_layer=tanh_rhl)
elmc_tanh.fit(dgx_train, dgy_train)
print elmc_tanh.score(dgx_train,
                      dgy_train), elmc_tanh.score(dgx_test, dgy_test)
Beispiel #8
0
filtered_data = data.dropna(axis='columns', how='all')
X = filtered_data.drop(['label', 'numOtus'], axis=1)
metadata = pd.read_table(mapfile, sep='\t', index_col=0)
y = metadata[disease_col]
## Merge adenoma and normal in one-category called no-cancer, so we have binary classification
y = y.replace(to_replace=['normal', 'adenoma'],
              value=['no-cancer', 'no-cancer'])

encoder = LabelEncoder()
y = pd.Series(encoder.fit_transform(y), index=y.index, name=y.name)

A, P, Y, Q = train_test_split(X, y, test_size=0.15,
                              random_state=42)  # Can change to 0.2

srhl_rbf = RBFRandomLayer(n_hidden=50, rbf_width=0.1, random_state=0)
clf6 = GenELMClassifier(hidden_layer=srhl_rbf).fit(A, Y.values.ravel())
print("Accuracy of Extreme learning machine Classifier: " +
      str(clf6.score(P, Q)))

#==============================================
plt.figure()
cls = 0
# Set figure size and plot layout
figsize = (20, 15)
f, ax = plt.subplots(1, 1, figsize=figsize)

x = [clf6, 'purple', 'ELM']

#y_true = Q[Q.argsort().index]
y_score = x[0].decision_function(P)
#y_prob = x[0].predict_proba(P.ix[Q.argsort().index, :])
Beispiel #9
0
nh = 15
(ctrs, _, _) = k_means(xtoy_train, nh)
unit_rs = np.ones(nh)

#rhl = RBFRandomLayer(n_hidden=nh, activation_func='inv_multiquadric')
#rhl = RBFRandomLayer(n_hidden=nh, centers=ctrs, radii=unit_rs)
rhl = GRBFRandomLayer(n_hidden=nh, grbf_lambda=.0001, centers=ctrs)
elmr = GenELMRegressor(hidden_layer=rhl)
elmr.fit(xtoy_train, ytoy_train)
print elmr.score(xtoy_train, ytoy_train), elmr.score(xtoy_test, ytoy_test)
plot(xtoy, ytoy, xtoy, elmr.predict(xtoy))

# <codecell>

rbf_rhl = RBFRandomLayer(n_hidden=100, random_state=0, rbf_width=0.01)
elmc_rbf = GenELMClassifier(hidden_layer=rbf_rhl)
elmc_rbf.fit(dgx_train, dgy_train)
print elmc_rbf.score(dgx_train, dgy_train), elmc_rbf.score(dgx_test, dgy_test)

def powtanh_xfer(activations, power=1.0):
    return pow(np.tanh(activations), power)

tanh_rhl = MLPRandomLayer(n_hidden=100, activation_func=powtanh_xfer, activation_args={'power':3.0})
elmc_tanh = GenELMClassifier(hidden_layer=tanh_rhl)
elmc_tanh.fit(dgx_train, dgy_train)
print elmc_tanh.score(dgx_train, dgy_train), elmc_tanh.score(dgx_test, dgy_test)

# <codecell>

rbf_rhl = RBFRandomLayer(n_hidden=100, rbf_width=0.01)
tr, ts = res_dist(dgx, dgy, GenELMClassifier(hidden_layer=rbf_rhl), n_runs=100, random_state=0)
Beispiel #10
0
    'npymodel/ORL_lable.npy', 'npymodel/Indianface_lable.npy'
]

srhl_sigmoid = MLPRandomLayer(n_hidden=2000, activation_func='sigmoid')
srhl_gaussian = MLPRandomLayer(n_hidden=2000, activation_func='gaussian')

names = [
    "ELM(sigmoid)",
    #"ELM(gaussian)",
    "SVM(linear)",
    #'SVM(rbf)',
    "LR"
]

classifiers = [
    GenELMClassifier(hidden_layer=srhl_sigmoid),
    # GenELMClassifier(hidden_layer=srhl_gaussian),
    SVC(kernel='linear', C=1),
    #SVC(kernel='rbf',C=10,gamma=0.01),
    LogisticRegression()
]

for i in range(4):  #for the ith dataset
    datas_name = dataset_name[i]
    X = np.load(features[i])
    y = np.load(lables[i])
    print("Processing data", datas_name, "..............")
    k_fold = model_selection.RepeatedKFold(n_splits=5,
                                           n_repeats=5,
                                           random_state=5)
    X_train, X_test, y_train, y_test = train_test_split(X,
Beispiel #11
0
X = filtered_data.drop(['label','numOtus'],axis=1)
metadata = pd.read_table(mapfile,sep='\t',index_col=0)
y = metadata[disease_col]
## Merge adenoma and normal in one-category called no-cancer, so we have binary classification
y = y.replace(to_replace=['normal','adenoma'], value=['no-cancer','no-cancer'])

encoder = LabelEncoder()
y = pd.Series(encoder.fit_transform(y),
index=y.index, name=y.name)

A, P, Y, Q = train_test_split(
X, y, test_size=0.1, random_state=42)	# Can change to 0.2


srhl_rbf = RBFRandomLayer(n_hidden=50,rbf_width=0.1,random_state=0)
clf6 = GenELMClassifier(hidden_layer=srhl_rbf).fit(A, Y.values.ravel())
print ("Accuracy of Extreme learning machine Classifier: "+str(clf6.score(P,Q)))


#==============================================
#plt.figure()
cls = 0
# Set figure size and plot layout
figsize=(20,15)
f, ax = plt.subplots(1, 1, figsize=figsize)

x = [clf6,'purple','ELM']

#y_true = Q[Q.argsort().index]
y_score = x[0].decision_function(P)
#y_prob = x[0].predict_proba(P.ix[Q.argsort().index, :])
Beispiel #12
0
from random_layer import RBFRandomLayer, MLPRandomLayer
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score
from sklearn.externals import joblib

M = 2
nh = 5
T = 5

srhl_tanh = MLPRandomLayer(n_hidden=nh, activation_func='tanh')
srhl_rbf = RBFRandomLayer(n_hidden=nh*2, rbf_width=0.1, random_state=0)
srhl_tribas = MLPRandomLayer(n_hidden=nh, activation_func='tribas')
srhl_hardlim = MLPRandomLayer(n_hidden=nh, activation_func='hardlim')

# clf = GenELMClassifier(hidden_layer=srhl_tanh)
clf = GenELMClassifier(hidden_layer=srhl_rbf)
# clf = GenELMClassifier(hidden_layer=srhl_tribas)
# clf = GenELMClassifier(hidden_layer=srhl_hardlim)



class ELMTraining(MRJob):

    def mapper(self, _, line):
        k = random.randint(1,M)
        yield k, (line)

    def reducer(self, key, values):
        D = np.zeros((1, 1))

        f_tmp = open("tmp_val_" + str(key) + ".txt", "w");
Beispiel #13
0
nh = 15
(ctrs, _, _) = k_means(xtoy_train, nh)
unit_rs = np.ones(nh)

#rhl = RBFRandomLayer(n_hidden=nh, activation_func='inv_multiquadric')
#rhl = RBFRandomLayer(n_hidden=nh, centers=ctrs, radii=unit_rs)
rhl = GRBFRandomLayer(n_hidden=nh, grbf_lambda=.0001, centers=ctrs)
elmr = GenELMRegressor(hidden_layer=rhl)
elmr.fit(xtoy_train, ytoy_train)
print(elmr.score(xtoy_train, ytoy_train), elmr.score(xtoy_test, ytoy_test))
plot(xtoy, ytoy, xtoy, elmr.predict(xtoy))

# <codecell>

rbf_rhl = RBFRandomLayer(n_hidden=100, random_state=0, rbf_width=0.01)
elmc_rbf = GenELMClassifier(hidden_layer=rbf_rhl)
elmc_rbf.fit(dgx_train, dgy_train)
print(elmc_rbf.score(dgx_train, dgy_train), elmc_rbf.score(dgx_test, dgy_test))


def powtanh_xfer(activations, power=1.0):
    return pow(np.tanh(activations), power)

tanh_rhl = MLPRandomLayer(
    n_hidden=100, activation_func=powtanh_xfer, activation_args={'power': 3.0})
elmc_tanh = GenELMClassifier(hidden_layer=tanh_rhl)
elmc_tanh.fit(dgx_train, dgy_train)
print(elmc_tanh.score(dgx_train, dgy_train),
      elmc_tanh.score(dgx_test, dgy_test))

# <codecell>