Exemple #1
0
        def get_sel_idx(feature_list, sel_feature_num):
            x_all, s_all = shuffle(x, s, random_state=self.random_seed)
            s_all = np.log10(s_all)

            n = len(x_all)
            dev_index = n * 4 // 5

            trn_x = x_all[:dev_index]
            trn_y = s_all[:dev_index]
            val_x = x_all[dev_index:]
            val_y = s_all[dev_index:]

            feature_num = trn_x.shape[1]
            hp = WxHyperParameter(epochs=50,
                                  learning_ratio=0.001,
                                  batch_size=16,
                                  verbose=True)
            sel_gene_num = sel_feature_num
            sel_idx, sel_genes, sel_weight, test_auc = DoFeatureSelectionConnectionWeight(
                trn_x,
                trn_y,
                val_x,
                val_y,
                val_x,
                val_y,
                feature_list,
                hp,
                n_sel=sel_gene_num)

            return sel_idx
Exemple #2
0
def DoFeatureSelection(n_sel=14):
    VALIDATION_RATIO = 0.2
    ITERATION = 1000
    df = cPickle.load(open(FEATURE_SET_DF_FILE_NAME, 'rb'))
    df = StandByRow(df)
    print('Feature Data Frame : ', df.shape)
    gene_names = GetValueListFromFile('GENE_LIST_TCGA_ASSEM.txt')
    feature_num = len(gene_names)
    all_weight = np.zeros(feature_num)
    all_count = np.ones(feature_num)
    for i in range(0, ITERATION):
        train_x, train_y, val_x, val_y = LoadNormFeatureSet(
            df, VALIDATION_RATIO, i)
        hp = WxHyperParameter(epochs=30, learning_ratio=0.001, batch_size=32)
        sel_idx, sel_weight, val_acc = WxSlp(train_x,
                                             train_y,
                                             val_x,
                                             val_y,
                                             n_selection=min(
                                                 n_sel * 100, feature_num),
                                             hyper_param=hp)
        for j in range(0, min(n_sel * 100, feature_num)):
            all_weight[sel_idx[j]] += sel_weight[j]
            all_count[sel_idx[j]] += 1
    all_weight = all_weight / all_count
    sort_index = np.argsort(all_weight)[::-1]
    sel_index = sort_index[:n_sel]
    sel_weight = all_weight[sel_index]
    gene_names = np.asarray(gene_names)
    sel_genes = gene_names[sel_index]

    return sel_index, sel_genes, sel_weight
Exemple #3
0
        def get_wx_sel_idx(high_th_year,
                           low_th_year,
                           feature_list,
                           set_feature,
                           sel_feature_num,
                           sel_op,
                           div_ratio=4):
            high_risk_th = high_th_year * 365
            low_risk_th = low_th_year * 365
            high_risk_group, low_risk_group = helper.get_risk_group(
                x, c, s, high_risk_th, low_risk_th)
            trn_x, trn_y, val_x, val_y = helper.get_train_val(
                high_risk_group,
                low_risk_group,
                is_categori_y=True,
                seed=self.random_seed)
            if len(set_feature):
                trn_x = trn_x[:, set_feature]
                val_x = val_x[:, set_feature]
            feature_num = trn_x.shape[1]

            if sel_feature_num == 0:
                hp = WxHyperParameter(epochs=50,
                                      learning_ratio=0.01,
                                      batch_size=int(len(trn_x) / 4),
                                      verbose=True)
                sel_gene_num = int(
                    max(sel_feature_num, feature_num / div_ratio))
            else:
                hp = WxHyperParameter(epochs=50,
                                      learning_ratio=0.001,
                                      batch_size=int(len(trn_x) / 4),
                                      verbose=True)
                sel_gene_num = sel_feature_num
            sel_idx, sel_genes, sel_weight, test_auc = DoFeatureSelectionWX(
                trn_x,
                trn_y,
                val_x,
                val_y,
                val_x,
                val_y,
                feature_list,
                hp,
                n_sel=sel_gene_num,
                sel_option=sel_op)

            return sel_idx
def main():
    data = np.loadtxt(
        r"F:\00_paper_Proj\NeuralNet_GeneSelection\DearWXpub-master\DearWXpub-master\src\dummy.csv",
        delimiter=',')
    xtrain = data[:, :-1]
    ytrain = data[:, -1]
    ytrain = ytrain.astype(np.int)
    ytrain = np.eye(3)[ytrain]
    WxSlp(xtrain, ytrain, xtrain, ytrain, 3,
          WxHyperParameter(learning_ratio=0.001))
def wx_feature_selection(df='',
                         gene_names='',
                         n_sel=14,
                         val_ratio=0.2,
                         iter=1000,
                         epochs=30,
                         learning_ratio=0.001,
                         batch_size=32,
                         verbose=False,
                         model_type='MLP',
                         num_cls=2):

    feature_num = len(gene_names)
    all_weight = np.zeros(feature_num)
    all_count = np.ones(feature_num)
    for i in range(0, iter):
        train_x, train_y, val_x, val_y = load_norm_feature_set(
            df, val_ratio, i, num_cls)
        print(i, 'train : ', train_x.shape, 'val : ', val_x.shape)
        hp = WxHyperParameter(epochs=epochs,
                              learning_ratio=learning_ratio,
                              batch_size=batch_size,
                              verbose=verbose)
        if model_type == 'MLP':
            sel_idx, sel_weight, val_acc = wx_mlp(train_x,
                                                  train_y,
                                                  val_x,
                                                  val_y,
                                                  n_selection=min(
                                                      n_sel * 100,
                                                      feature_num),
                                                  hyper_param=hp,
                                                  num_cls=num_cls)
        if model_type == 'SLP':
            sel_idx, sel_weight, val_acc = wx_slp(train_x,
                                                  train_y,
                                                  val_x,
                                                  val_y,
                                                  n_selection=min(
                                                      n_sel * 100,
                                                      feature_num),
                                                  hyper_param=hp,
                                                  num_cls=num_cls)
        for j in range(0, min(n_sel * 100, feature_num)):
            all_weight[sel_idx[j]] += sel_weight[j]
            all_count[sel_idx[j]] += 1
    all_weight = all_weight / all_count
    sort_index = np.argsort(all_weight)[::-1]
    sel_index = sort_index[:n_sel]
    sel_weight = all_weight[sel_index]
    gene_names = np.asarray(gene_names)
    sel_genes = gene_names[sel_index]

    return sel_index, sel_genes, sel_weight
import tensorflow as tf
from keras.models import Model
from keras.layers import Input, Dense
from keras import backend as K
from keras import optimizers, applications, callbacks
from keras.callbacks import ModelCheckpoint
from keras.callbacks import LearningRateScheduler
import numpy as np
from wx_hyperparam import WxHyperParameter
import xgboost as xgb

#set default global hyper paramerters
wx_hyperparam = WxHyperParameter(learning_ratio=0.001)


def NaiveSLPmodel(x_train, y_train, x_val, y_val, hyper_param=wx_hyperparam):
    input_dim = len(x_train[0])
    inputs = Input((input_dim, ))
    #fc_out = Dense(2,  kernel_initializer='zeros', bias_initializer='zeros', activation='softmax')(inputs)
    fc_out = Dense(2, activation='softmax')(inputs)
    model = Model(input=inputs, output=fc_out)

    #build a optimizer
    sgd = optimizers.SGD(lr=hyper_param.learning_ratio,
                         decay=hyper_param.weight_decay,
                         momentum=hyper_param.momentum,
                         nesterov=True)
    model.compile(loss='categorical_crossentropy',
                  optimizer=sgd,
                  metrics=['accuracy'])
Exemple #7
0
        idx = np.where(anno_ids == id_)
        y_all.append(np.where(class_type == anno_class[idx])[0][0])
    y_all = np.asarray(y_all)
    y_all = to_categorical(y_all, num_classes=n_cls)

    print('samples names : ', id_names)
    print('classes : ', class_type)

    #split to train and val
    x_train, x_val, y_train, y_val = train_test_split(x_all,
                                                      y_all,
                                                      test_size=0.2,
                                                      random_state=1)

    hp = WxHyperParameter(epochs=1000,
                          learning_ratio=0.01,
                          batch_size=8,
                          verbose=True)
    sel_idx, sel_weight, val_acc = wx_slp(x_train,
                                          y_train,
                                          x_val,
                                          y_val,
                                          n_selection=10,
                                          hyper_param=hp,
                                          num_cls=n_cls)

    print('\nSingle Layer WX')
    print('selected feature names:', f_names[sel_idx])
    print('selected feature index:', sel_idx)
    print('selected feature weight:', sel_weight)
    print('evaluation accuracy:', val_acc)
Exemple #8
0
def wx_feature_selection(n_sel=14,
                         val_ratio=0.2,
                         iter=1000,
                         epochs=30,
                         learning_ratio=0.001,
                         batch_size=32,
                         verbose=False,
                         except_norm=['Tumor', 'CancerName'],
                         model_type='MLP'):
    VALIDATION_RATIO = 0.1  #val_ratio
    ITERATION = iter
    df = cPickle.load(open(FEATURE_SET_DF_FILE_NAME, 'rb'))
    if False:
        df2 = cPickle.load(open(TRAIN_SET_DF_FILE_NAME, 'rb'))
        LIMIT_SAMPLE = 5000
        df = pd.concat([df, df2])
        df = df[:LIMIT_SAMPLE]
    df = StandByRow(df, except_norm)
    print('Feature Data Frame : ', df.shape)

    gene_names = get_value_list_from_file(TCGA_ASSEM_GENE_FILE_NAME)
    feature_num = len(gene_names)
    all_weight = np.zeros(feature_num)
    all_count = np.ones(feature_num)
    for i in range(0, ITERATION):
        train_x, train_y, val_x, val_y = load_norm_feature_set(
            df, VALIDATION_RATIO, i)
        print(i, 'train : ', train_x.shape, 'val : ', val_x.shape)
        hp = WxHyperParameter(epochs=epochs,
                              learning_ratio=learning_ratio,
                              batch_size=batch_size,
                              verbose=verbose)
        if model_type == 'MLP':
            sel_idx, sel_weight, val_acc = wx_mlp(train_x,
                                                  train_y,
                                                  val_x,
                                                  val_y,
                                                  n_selection=min(
                                                      n_sel * 100,
                                                      feature_num),
                                                  hyper_param=hp)
        if model_type == 'SLP':
            sel_idx, sel_weight, val_acc = wx_slp(train_x,
                                                  train_y,
                                                  val_x,
                                                  val_y,
                                                  n_selection=min(
                                                      n_sel * 100,
                                                      feature_num),
                                                  hyper_param=hp)
        if model_type == 'ConnectionWeight':
            sel_idx, sel_weight, val_acc = connection_weight(train_x,
                                                             train_y,
                                                             val_x,
                                                             val_y,
                                                             n_selection=min(
                                                                 n_sel * 100,
                                                                 feature_num),
                                                             hyper_param=hp)
        for j in range(0, min(n_sel * 100, feature_num)):
            all_weight[sel_idx[j]] += sel_weight[j]
            all_count[sel_idx[j]] += 1
    all_weight = all_weight / all_count
    sort_index = np.argsort(all_weight)[::-1]
    sel_index = sort_index[:n_sel]
    sel_weight = all_weight[sel_index]
    gene_names = np.asarray(gene_names)
    sel_genes = gene_names[sel_index]

    return sel_index, sel_genes.tolist(), sel_weight
Exemple #9
0
    n_cls = len(class_type)
    y_train = []
    for id_ in id_names:
        idx = np.where(anno_ids == id_)
        y_train.append(np.where(class_type == anno_class[idx])[0][0])
    y_train = np.asarray(y_train)
    y_train = to_categorical(y_train, num_classes=n_cls)

    print('TRAIN samples names : ', id_names)    
    print('TRAIN classes : ' , class_type)    

    #split to train and val
    #x_train, x_val, y_train, y_val = train_test_split(x_all, y_all, test_size=0.2, random_state=1)

    hp = WxHyperParameter(epochs=3080, learning_ratio=0.01, batch_size=100, num_hidden_layer = 2, num_h_unit = 64, verbose=True)
    sel_idx, sel_weight, val_acc = wx_slp(x_train, y_train, x_val, y_val, n_selection=10, hyper_param=hp, num_cls=n_cls)

    print ('\nSingle Layer WX')
    print ('selected feature names:',f_names[sel_idx])
    print ('selected feature index:',sel_idx)
    print ('selected feature weight:',sel_weight)
    print ('evaluation accuracy:',val_acc)
    print ('\n\n\n\n')
    
    file1 = open("wyniki.txt","w") 
    file1.write(np.array2string(f_names[sel_idx],separator=','))
    file1.close()
    
    np.save("wyniki.npy", f_names[sel_idx])
Exemple #10
0
        idx = np.where(anno_ids == id_)
        y_all.append(np.where(class_type == anno_class[idx])[0][0])
    y_all = np.asarray(y_all)
    y_all = to_categorical(y_all, num_classes=n_cls)

    print('samples names : ', id_names)
    print('classes : ', class_type)

    #split to train and val
    x_train, x_val, y_train, y_val = train_test_split(x_all,
                                                      y_all,
                                                      test_size=0.2,
                                                      random_state=1)

    hp = WxHyperParameter(epochs=30,
                          learning_ratio=0.01,
                          batch_size=8,
                          verbose=False)
    sel_idx, sel_weight, val_acc = wx_slp(x_train,
                                          y_train,
                                          x_val,
                                          y_val,
                                          n_selection=10,
                                          hyper_param=hp,
                                          num_cls=n_cls)

    print('\nSingle Layer WX')
    print('selected feature names:', f_names[sel_idx])
    print('selected feature index:', sel_idx)
    print('selected feature weight:', sel_weight)
    print('evaluation accuracy:', val_acc)
    train_num = 100
    test_num = 100
    input_dim = 2000
    num_cls = 2

    x_train = np.random.random((train_num, input_dim))
    y_train = to_categorical(np.random.randint(num_cls, size=(train_num, 1)),
                             num_classes=num_cls)

    x_test = np.random.random((test_num, input_dim))
    y_test = to_categorical(np.random.randint(num_cls, size=(test_num, 1)),
                            num_classes=num_cls)

    return x_train, y_train, x_test, y_test


if __name__ == '__main__':
    x_train, y_train, x_val, y_val = GetSampleData()

    hp = WxHyperParameter(epochs=30, learning_ratio=0.01, batch_size=10)
    sel_idx, sel_weight, val_acc = WxSlp(x_train,
                                         y_train,
                                         x_train,
                                         y_train,
                                         n_selection=50,
                                         hyper_param=hp)

    print('\nSingle Layer WX')
    print('selected feature index:', sel_idx)
    print('selected feature weight:', sel_weight)
    print('evaluation accuracy:', val_acc)