def get_sel_idx(feature_list, sel_feature_num): x_all, s_all = shuffle(x, s, random_state=self.random_seed) s_all = np.log10(s_all) n = len(x_all) dev_index = n * 4 // 5 trn_x = x_all[:dev_index] trn_y = s_all[:dev_index] val_x = x_all[dev_index:] val_y = s_all[dev_index:] feature_num = trn_x.shape[1] hp = WxHyperParameter(epochs=50, learning_ratio=0.001, batch_size=16, verbose=True) sel_gene_num = sel_feature_num sel_idx, sel_genes, sel_weight, test_auc = DoFeatureSelectionConnectionWeight( trn_x, trn_y, val_x, val_y, val_x, val_y, feature_list, hp, n_sel=sel_gene_num) return sel_idx
def DoFeatureSelection(n_sel=14): VALIDATION_RATIO = 0.2 ITERATION = 1000 df = cPickle.load(open(FEATURE_SET_DF_FILE_NAME, 'rb')) df = StandByRow(df) print('Feature Data Frame : ', df.shape) gene_names = GetValueListFromFile('GENE_LIST_TCGA_ASSEM.txt') feature_num = len(gene_names) all_weight = np.zeros(feature_num) all_count = np.ones(feature_num) for i in range(0, ITERATION): train_x, train_y, val_x, val_y = LoadNormFeatureSet( df, VALIDATION_RATIO, i) hp = WxHyperParameter(epochs=30, learning_ratio=0.001, batch_size=32) sel_idx, sel_weight, val_acc = WxSlp(train_x, train_y, val_x, val_y, n_selection=min( n_sel * 100, feature_num), hyper_param=hp) for j in range(0, min(n_sel * 100, feature_num)): all_weight[sel_idx[j]] += sel_weight[j] all_count[sel_idx[j]] += 1 all_weight = all_weight / all_count sort_index = np.argsort(all_weight)[::-1] sel_index = sort_index[:n_sel] sel_weight = all_weight[sel_index] gene_names = np.asarray(gene_names) sel_genes = gene_names[sel_index] return sel_index, sel_genes, sel_weight
def get_wx_sel_idx(high_th_year, low_th_year, feature_list, set_feature, sel_feature_num, sel_op, div_ratio=4): high_risk_th = high_th_year * 365 low_risk_th = low_th_year * 365 high_risk_group, low_risk_group = helper.get_risk_group( x, c, s, high_risk_th, low_risk_th) trn_x, trn_y, val_x, val_y = helper.get_train_val( high_risk_group, low_risk_group, is_categori_y=True, seed=self.random_seed) if len(set_feature): trn_x = trn_x[:, set_feature] val_x = val_x[:, set_feature] feature_num = trn_x.shape[1] if sel_feature_num == 0: hp = WxHyperParameter(epochs=50, learning_ratio=0.01, batch_size=int(len(trn_x) / 4), verbose=True) sel_gene_num = int( max(sel_feature_num, feature_num / div_ratio)) else: hp = WxHyperParameter(epochs=50, learning_ratio=0.001, batch_size=int(len(trn_x) / 4), verbose=True) sel_gene_num = sel_feature_num sel_idx, sel_genes, sel_weight, test_auc = DoFeatureSelectionWX( trn_x, trn_y, val_x, val_y, val_x, val_y, feature_list, hp, n_sel=sel_gene_num, sel_option=sel_op) return sel_idx
def main(): data = np.loadtxt( r"F:\00_paper_Proj\NeuralNet_GeneSelection\DearWXpub-master\DearWXpub-master\src\dummy.csv", delimiter=',') xtrain = data[:, :-1] ytrain = data[:, -1] ytrain = ytrain.astype(np.int) ytrain = np.eye(3)[ytrain] WxSlp(xtrain, ytrain, xtrain, ytrain, 3, WxHyperParameter(learning_ratio=0.001))
def wx_feature_selection(df='', gene_names='', n_sel=14, val_ratio=0.2, iter=1000, epochs=30, learning_ratio=0.001, batch_size=32, verbose=False, model_type='MLP', num_cls=2): feature_num = len(gene_names) all_weight = np.zeros(feature_num) all_count = np.ones(feature_num) for i in range(0, iter): train_x, train_y, val_x, val_y = load_norm_feature_set( df, val_ratio, i, num_cls) print(i, 'train : ', train_x.shape, 'val : ', val_x.shape) hp = WxHyperParameter(epochs=epochs, learning_ratio=learning_ratio, batch_size=batch_size, verbose=verbose) if model_type == 'MLP': sel_idx, sel_weight, val_acc = wx_mlp(train_x, train_y, val_x, val_y, n_selection=min( n_sel * 100, feature_num), hyper_param=hp, num_cls=num_cls) if model_type == 'SLP': sel_idx, sel_weight, val_acc = wx_slp(train_x, train_y, val_x, val_y, n_selection=min( n_sel * 100, feature_num), hyper_param=hp, num_cls=num_cls) for j in range(0, min(n_sel * 100, feature_num)): all_weight[sel_idx[j]] += sel_weight[j] all_count[sel_idx[j]] += 1 all_weight = all_weight / all_count sort_index = np.argsort(all_weight)[::-1] sel_index = sort_index[:n_sel] sel_weight = all_weight[sel_index] gene_names = np.asarray(gene_names) sel_genes = gene_names[sel_index] return sel_index, sel_genes, sel_weight
import tensorflow as tf from keras.models import Model from keras.layers import Input, Dense from keras import backend as K from keras import optimizers, applications, callbacks from keras.callbacks import ModelCheckpoint from keras.callbacks import LearningRateScheduler import numpy as np from wx_hyperparam import WxHyperParameter import xgboost as xgb #set default global hyper paramerters wx_hyperparam = WxHyperParameter(learning_ratio=0.001) def NaiveSLPmodel(x_train, y_train, x_val, y_val, hyper_param=wx_hyperparam): input_dim = len(x_train[0]) inputs = Input((input_dim, )) #fc_out = Dense(2, kernel_initializer='zeros', bias_initializer='zeros', activation='softmax')(inputs) fc_out = Dense(2, activation='softmax')(inputs) model = Model(input=inputs, output=fc_out) #build a optimizer sgd = optimizers.SGD(lr=hyper_param.learning_ratio, decay=hyper_param.weight_decay, momentum=hyper_param.momentum, nesterov=True) model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
idx = np.where(anno_ids == id_) y_all.append(np.where(class_type == anno_class[idx])[0][0]) y_all = np.asarray(y_all) y_all = to_categorical(y_all, num_classes=n_cls) print('samples names : ', id_names) print('classes : ', class_type) #split to train and val x_train, x_val, y_train, y_val = train_test_split(x_all, y_all, test_size=0.2, random_state=1) hp = WxHyperParameter(epochs=1000, learning_ratio=0.01, batch_size=8, verbose=True) sel_idx, sel_weight, val_acc = wx_slp(x_train, y_train, x_val, y_val, n_selection=10, hyper_param=hp, num_cls=n_cls) print('\nSingle Layer WX') print('selected feature names:', f_names[sel_idx]) print('selected feature index:', sel_idx) print('selected feature weight:', sel_weight) print('evaluation accuracy:', val_acc)
def wx_feature_selection(n_sel=14, val_ratio=0.2, iter=1000, epochs=30, learning_ratio=0.001, batch_size=32, verbose=False, except_norm=['Tumor', 'CancerName'], model_type='MLP'): VALIDATION_RATIO = 0.1 #val_ratio ITERATION = iter df = cPickle.load(open(FEATURE_SET_DF_FILE_NAME, 'rb')) if False: df2 = cPickle.load(open(TRAIN_SET_DF_FILE_NAME, 'rb')) LIMIT_SAMPLE = 5000 df = pd.concat([df, df2]) df = df[:LIMIT_SAMPLE] df = StandByRow(df, except_norm) print('Feature Data Frame : ', df.shape) gene_names = get_value_list_from_file(TCGA_ASSEM_GENE_FILE_NAME) feature_num = len(gene_names) all_weight = np.zeros(feature_num) all_count = np.ones(feature_num) for i in range(0, ITERATION): train_x, train_y, val_x, val_y = load_norm_feature_set( df, VALIDATION_RATIO, i) print(i, 'train : ', train_x.shape, 'val : ', val_x.shape) hp = WxHyperParameter(epochs=epochs, learning_ratio=learning_ratio, batch_size=batch_size, verbose=verbose) if model_type == 'MLP': sel_idx, sel_weight, val_acc = wx_mlp(train_x, train_y, val_x, val_y, n_selection=min( n_sel * 100, feature_num), hyper_param=hp) if model_type == 'SLP': sel_idx, sel_weight, val_acc = wx_slp(train_x, train_y, val_x, val_y, n_selection=min( n_sel * 100, feature_num), hyper_param=hp) if model_type == 'ConnectionWeight': sel_idx, sel_weight, val_acc = connection_weight(train_x, train_y, val_x, val_y, n_selection=min( n_sel * 100, feature_num), hyper_param=hp) for j in range(0, min(n_sel * 100, feature_num)): all_weight[sel_idx[j]] += sel_weight[j] all_count[sel_idx[j]] += 1 all_weight = all_weight / all_count sort_index = np.argsort(all_weight)[::-1] sel_index = sort_index[:n_sel] sel_weight = all_weight[sel_index] gene_names = np.asarray(gene_names) sel_genes = gene_names[sel_index] return sel_index, sel_genes.tolist(), sel_weight
n_cls = len(class_type) y_train = [] for id_ in id_names: idx = np.where(anno_ids == id_) y_train.append(np.where(class_type == anno_class[idx])[0][0]) y_train = np.asarray(y_train) y_train = to_categorical(y_train, num_classes=n_cls) print('TRAIN samples names : ', id_names) print('TRAIN classes : ' , class_type) #split to train and val #x_train, x_val, y_train, y_val = train_test_split(x_all, y_all, test_size=0.2, random_state=1) hp = WxHyperParameter(epochs=3080, learning_ratio=0.01, batch_size=100, num_hidden_layer = 2, num_h_unit = 64, verbose=True) sel_idx, sel_weight, val_acc = wx_slp(x_train, y_train, x_val, y_val, n_selection=10, hyper_param=hp, num_cls=n_cls) print ('\nSingle Layer WX') print ('selected feature names:',f_names[sel_idx]) print ('selected feature index:',sel_idx) print ('selected feature weight:',sel_weight) print ('evaluation accuracy:',val_acc) print ('\n\n\n\n') file1 = open("wyniki.txt","w") file1.write(np.array2string(f_names[sel_idx],separator=',')) file1.close() np.save("wyniki.npy", f_names[sel_idx])
idx = np.where(anno_ids == id_) y_all.append(np.where(class_type == anno_class[idx])[0][0]) y_all = np.asarray(y_all) y_all = to_categorical(y_all, num_classes=n_cls) print('samples names : ', id_names) print('classes : ', class_type) #split to train and val x_train, x_val, y_train, y_val = train_test_split(x_all, y_all, test_size=0.2, random_state=1) hp = WxHyperParameter(epochs=30, learning_ratio=0.01, batch_size=8, verbose=False) sel_idx, sel_weight, val_acc = wx_slp(x_train, y_train, x_val, y_val, n_selection=10, hyper_param=hp, num_cls=n_cls) print('\nSingle Layer WX') print('selected feature names:', f_names[sel_idx]) print('selected feature index:', sel_idx) print('selected feature weight:', sel_weight) print('evaluation accuracy:', val_acc)
train_num = 100 test_num = 100 input_dim = 2000 num_cls = 2 x_train = np.random.random((train_num, input_dim)) y_train = to_categorical(np.random.randint(num_cls, size=(train_num, 1)), num_classes=num_cls) x_test = np.random.random((test_num, input_dim)) y_test = to_categorical(np.random.randint(num_cls, size=(test_num, 1)), num_classes=num_cls) return x_train, y_train, x_test, y_test if __name__ == '__main__': x_train, y_train, x_val, y_val = GetSampleData() hp = WxHyperParameter(epochs=30, learning_ratio=0.01, batch_size=10) sel_idx, sel_weight, val_acc = WxSlp(x_train, y_train, x_train, y_train, n_selection=50, hyper_param=hp) print('\nSingle Layer WX') print('selected feature index:', sel_idx) print('selected feature weight:', sel_weight) print('evaluation accuracy:', val_acc)