Esempio n. 1
0
        def get_cascaded_sel_idx(high_th_year,
                                 low_th_year,
                                 feature_list,
                                 set_feature,
                                 sel_feature_num,
                                 div_ratio=4):
            high_risk_th = high_th_year * 365
            low_risk_th = low_th_year * 365
            high_risk_group, low_risk_group = helper.get_risk_group(
                x, c, s, high_risk_th, low_risk_th)
            #trn_x, trn_y, val_x, val_y = get_train_val(high_risk_group, low_risk_group)
            trn_x, trn_y = helper.get_train(
                high_risk_group,
                low_risk_group,
                is_categori_y=False,
                seed=self.random_seed)  #without validation set
            if len(set_feature):
                trn_x = trn_x[:, set_feature]
                #val_x = val_x[:,set_feature]
            feature_num = trn_x.shape[1]

            if sel_feature_num == 0:
                sel_gene_num = int(
                    max(sel_feature_num, feature_num / div_ratio))
            else:
                sel_gene_num = sel_feature_num

            clf = RandomForestClassifier()
            clf.fit(trn_x, trn_y)
            f_scores = clf.feature_importances_
            coef_idx_sort = np.argsort(f_scores)[::-1]
            sel_idx = coef_idx_sort[:sel_gene_num]

            return sel_idx
Esempio n. 2
0
        def get_cascaded_sel_idx(high_th_year,
                                 low_th_year,
                                 feature_list,
                                 set_feature,
                                 sel_feature_num,
                                 div_ratio=4):
            high_risk_th = high_th_year * 365
            low_risk_th = low_th_year * 365
            high_risk_group, low_risk_group = helper.get_risk_group(
                x, c, s, high_risk_th, low_risk_th)
            #trn_x, trn_y, val_x, val_y = get_train_val(high_risk_group, low_risk_group)
            trn_x, trn_y = helper.get_train(
                high_risk_group,
                low_risk_group,
                is_categori_y=False,
                seed=self.random_seed)  #without validation set
            if len(set_feature):
                trn_x = trn_x[:, set_feature]
                #val_x = val_x[:,set_feature]
            feature_num = trn_x.shape[1]

            if sel_feature_num == 0:
                sel_gene_num = int(
                    max(sel_feature_num, feature_num / div_ratio))
            else:
                sel_gene_num = sel_feature_num

            sort_idx = trace_ratio.trace_ratio(trn_x, trn_y, mode='index')
            sel_idx = sort_idx[:sel_gene_num]

            return sel_idx
Esempio n. 3
0
        def get_sel_idx(high_th_year, low_th_year, feature_list, sel_feature_num):
            high_risk_th = high_th_year*365
            low_risk_th = low_th_year*365
            high_risk_group, low_risk_group = helper.get_risk_group(x,c,s,high_risk_th,low_risk_th)
            trn_x, trn_y = helper.get_train(high_risk_group, low_risk_group, is_categori_y=False, seed=self.random_seed)#without validation set
            print('Into RFS fs...')
            sort_idx = RFS.rfs(trn_x, trn_y, mode='index', verbose=True)
            print('RFS fs done...')

            return sort_idx[:sel_feature_num]
Esempio n. 4
0
        def get_sel_idx(high_th_year, low_th_year, feature_list, sel_feature_num):
            high_risk_th = high_th_year*365
            low_risk_th = low_th_year*365
            high_risk_group, low_risk_group = helper.get_risk_group(x,c,s,high_risk_th,low_risk_th)
            trn_x, trn_y, val_x, val_y = helper.get_train_val(high_risk_group, low_risk_group, is_categori_y=False, seed=self.random_seed)#with validation set

            clf = xgb.XGBClassifier(seed=1, objective='binary:logistic')
            clf.fit(trn_x, trn_y, eval_set=[(val_x, val_y)], verbose=False, eval_metric='auc', early_stopping_rounds=200)
            f_scores = clf.feature_importances_
            coef_idx_sort = np.argsort(f_scores)[::-1]            

            return coef_idx_sort[:sel_feature_num]
Esempio n. 5
0
 def get_sel_idx(high_th_year, low_th_year, feature_list,
                 sel_feature_num):
     high_risk_th = high_th_year * 365
     low_risk_th = low_th_year * 365
     high_risk_group, low_risk_group = helper.get_risk_group(
         x, c, s, high_risk_th, low_risk_th)
     trn_x, trn_y = helper.get_train(
         high_risk_group,
         low_risk_group,
         is_categori_y=False,
         seed=self.random_seed)  #without validation set
     sort_idx = reliefF.reliefF(trn_x, trn_y, mode='index')
     return sort_idx[:sel_feature_num]
Esempio n. 6
0
        def get_sel_idx(high_th_year, low_th_year, feature_list, sel_feature_num):
            high_risk_th = high_th_year*365
            low_risk_th = low_th_year*365
            high_risk_group, low_risk_group = helper.get_risk_group(x,c,s,high_risk_th,low_risk_th)
            trn_x, trn_y = helper.get_train(high_risk_group, low_risk_group, is_categori_y=False, seed=self.random_seed)#without validation set

            svm_clf = svm.SVC(kernel='linear')
            svm_clf.fit(trn_x, trn_y)
            svm_coef = svm_clf.coef_
            svm_coef = np.abs(np.mean(svm_coef, axis=0))
            svm_coef_idx_sort = np.argsort(svm_coef)[::-1]

            return svm_coef_idx_sort[:sel_feature_num]
Esempio n. 7
0
        def get_wx_sel_idx(high_th_year,
                           low_th_year,
                           feature_list,
                           set_feature,
                           sel_feature_num,
                           sel_op,
                           div_ratio=4):
            high_risk_th = high_th_year * 365
            low_risk_th = low_th_year * 365
            high_risk_group, low_risk_group = helper.get_risk_group(
                x, c, s, high_risk_th, low_risk_th)
            trn_x, trn_y, val_x, val_y = helper.get_train_val(
                high_risk_group,
                low_risk_group,
                is_categori_y=True,
                seed=self.random_seed)
            if len(set_feature):
                trn_x = trn_x[:, set_feature]
                val_x = val_x[:, set_feature]
            feature_num = trn_x.shape[1]

            if sel_feature_num == 0:
                hp = WxHyperParameter(epochs=50,
                                      learning_ratio=0.01,
                                      batch_size=int(len(trn_x) / 4),
                                      verbose=True)
                sel_gene_num = int(
                    max(sel_feature_num, feature_num / div_ratio))
            else:
                hp = WxHyperParameter(epochs=50,
                                      learning_ratio=0.001,
                                      batch_size=int(len(trn_x) / 4),
                                      verbose=True)
                sel_gene_num = sel_feature_num
            sel_idx, sel_genes, sel_weight, test_auc = DoFeatureSelectionWX(
                trn_x,
                trn_y,
                val_x,
                val_y,
                val_x,
                val_y,
                feature_list,
                hp,
                n_sel=sel_gene_num,
                sel_option=sel_op)

            return sel_idx
Esempio n. 8
0
        def get_sel_idx(high_th_year, low_th_year, feature_list,
                        sel_feature_num):
            high_risk_th = high_th_year * 365
            low_risk_th = low_th_year * 365
            high_risk_group, low_risk_group = helper.get_risk_group(
                x, c, s, high_risk_th, low_risk_th)
            trn_x, trn_y = helper.get_train(
                high_risk_group,
                low_risk_group,
                is_categori_y=False,
                seed=self.random_seed)  #without validation set

            clf = RandomForestClassifier()
            clf.fit(trn_x, trn_y)
            f_scores = clf.feature_importances_
            coef_idx_sort = np.argsort(f_scores)[::-1]

            return coef_idx_sort[:sel_feature_num]
Esempio n. 9
0
        def get_sel_idx(high_th_year, low_th_year, feature_list,
                        sel_feature_num):
            high_risk_th = high_th_year * 365
            low_risk_th = low_th_year * 365
            high_risk_group, low_risk_group = helper.get_risk_group(
                x, c, s, high_risk_th, low_risk_th)
            trn_x, trn_y = helper.get_train(
                high_risk_group,
                low_risk_group,
                is_categori_y=False,
                seed=self.random_seed)  #without validation set

            W, _, _ = ll_l21.proximal_gradient_descent(trn_x,
                                                       trn_y,
                                                       z=0.01,
                                                       mode='raw')
            sort_idx = feature_ranking(W)

            return sort_idx[:sel_feature_num]