Exemple #1
0
        def get_cascaded_sel_idx(high_th_year,
                                 low_th_year,
                                 feature_list,
                                 set_feature,
                                 sel_feature_num,
                                 div_ratio=4):
            high_risk_th = high_th_year * 365
            low_risk_th = low_th_year * 365
            high_risk_group, low_risk_group = helper.get_risk_group(
                x, c, s, high_risk_th, low_risk_th)
            #trn_x, trn_y, val_x, val_y = get_train_val(high_risk_group, low_risk_group)
            trn_x, trn_y = helper.get_train(
                high_risk_group,
                low_risk_group,
                is_categori_y=False,
                seed=self.random_seed)  #without validation set
            if len(set_feature):
                trn_x = trn_x[:, set_feature]
                #val_x = val_x[:,set_feature]
            feature_num = trn_x.shape[1]

            if sel_feature_num == 0:
                sel_gene_num = int(
                    max(sel_feature_num, feature_num / div_ratio))
            else:
                sel_gene_num = sel_feature_num

            sort_idx = trace_ratio.trace_ratio(trn_x, trn_y, mode='index')
            sel_idx = sort_idx[:sel_gene_num]

            return sel_idx
Exemple #2
0
        def get_cascaded_sel_idx(high_th_year,
                                 low_th_year,
                                 feature_list,
                                 set_feature,
                                 sel_feature_num,
                                 div_ratio=4):
            high_risk_th = high_th_year * 365
            low_risk_th = low_th_year * 365
            high_risk_group, low_risk_group = helper.get_risk_group(
                x, c, s, high_risk_th, low_risk_th)
            #trn_x, trn_y, val_x, val_y = get_train_val(high_risk_group, low_risk_group)
            trn_x, trn_y = helper.get_train(
                high_risk_group,
                low_risk_group,
                is_categori_y=False,
                seed=self.random_seed)  #without validation set
            if len(set_feature):
                trn_x = trn_x[:, set_feature]
                #val_x = val_x[:,set_feature]
            feature_num = trn_x.shape[1]

            if sel_feature_num == 0:
                sel_gene_num = int(
                    max(sel_feature_num, feature_num / div_ratio))
            else:
                sel_gene_num = sel_feature_num

            clf = RandomForestClassifier()
            clf.fit(trn_x, trn_y)
            f_scores = clf.feature_importances_
            coef_idx_sort = np.argsort(f_scores)[::-1]
            sel_idx = coef_idx_sort[:sel_gene_num]

            return sel_idx
Exemple #3
0
        def get_sel_idx(high_th_year, low_th_year, feature_list, sel_feature_num):
            high_risk_th = high_th_year*365
            low_risk_th = low_th_year*365
            high_risk_group, low_risk_group = helper.get_risk_group(x,c,s,high_risk_th,low_risk_th)
            trn_x, trn_y = helper.get_train(high_risk_group, low_risk_group, is_categori_y=False, seed=self.random_seed)#without validation set
            print('Into RFS fs...')
            sort_idx = RFS.rfs(trn_x, trn_y, mode='index', verbose=True)
            print('RFS fs done...')

            return sort_idx[:sel_feature_num]
 def get_sel_idx(high_th_year, low_th_year, feature_list,
                 sel_feature_num):
     high_risk_th = high_th_year * 365
     low_risk_th = low_th_year * 365
     high_risk_group, low_risk_group = helper.get_risk_group(
         x, c, s, high_risk_th, low_risk_th)
     trn_x, trn_y = helper.get_train(
         high_risk_group,
         low_risk_group,
         is_categori_y=False,
         seed=self.random_seed)  #without validation set
     sort_idx = reliefF.reliefF(trn_x, trn_y, mode='index')
     return sort_idx[:sel_feature_num]
Exemple #5
0
        def get_sel_idx(high_th_year, low_th_year, feature_list, sel_feature_num):
            high_risk_th = high_th_year*365
            low_risk_th = low_th_year*365
            high_risk_group, low_risk_group = helper.get_risk_group(x,c,s,high_risk_th,low_risk_th)
            trn_x, trn_y = helper.get_train(high_risk_group, low_risk_group, is_categori_y=False, seed=self.random_seed)#without validation set

            svm_clf = svm.SVC(kernel='linear')
            svm_clf.fit(trn_x, trn_y)
            svm_coef = svm_clf.coef_
            svm_coef = np.abs(np.mean(svm_coef, axis=0))
            svm_coef_idx_sort = np.argsort(svm_coef)[::-1]

            return svm_coef_idx_sort[:sel_feature_num]
Exemple #6
0
        def get_sel_idx(high_th_year, low_th_year, feature_list,
                        sel_feature_num):
            high_risk_th = high_th_year * 365
            low_risk_th = low_th_year * 365
            high_risk_group, low_risk_group = helper.get_risk_group(
                x, c, s, high_risk_th, low_risk_th)
            trn_x, trn_y = helper.get_train(
                high_risk_group,
                low_risk_group,
                is_categori_y=False,
                seed=self.random_seed)  #without validation set

            clf = RandomForestClassifier()
            clf.fit(trn_x, trn_y)
            f_scores = clf.feature_importances_
            coef_idx_sort = np.argsort(f_scores)[::-1]

            return coef_idx_sort[:sel_feature_num]
Exemple #7
0
        def get_sel_idx(high_th_year, low_th_year, feature_list,
                        sel_feature_num):
            high_risk_th = high_th_year * 365
            low_risk_th = low_th_year * 365
            high_risk_group, low_risk_group = helper.get_risk_group(
                x, c, s, high_risk_th, low_risk_th)
            trn_x, trn_y = helper.get_train(
                high_risk_group,
                low_risk_group,
                is_categori_y=False,
                seed=self.random_seed)  #without validation set

            W, _, _ = ll_l21.proximal_gradient_descent(trn_x,
                                                       trn_y,
                                                       z=0.01,
                                                       mode='raw')
            sort_idx = feature_ranking(W)

            return sort_idx[:sel_feature_num]