def get_cascaded_sel_idx(high_th_year, low_th_year, feature_list, set_feature, sel_feature_num, div_ratio=4): high_risk_th = high_th_year * 365 low_risk_th = low_th_year * 365 high_risk_group, low_risk_group = helper.get_risk_group( x, c, s, high_risk_th, low_risk_th) #trn_x, trn_y, val_x, val_y = get_train_val(high_risk_group, low_risk_group) trn_x, trn_y = helper.get_train( high_risk_group, low_risk_group, is_categori_y=False, seed=self.random_seed) #without validation set if len(set_feature): trn_x = trn_x[:, set_feature] #val_x = val_x[:,set_feature] feature_num = trn_x.shape[1] if sel_feature_num == 0: sel_gene_num = int( max(sel_feature_num, feature_num / div_ratio)) else: sel_gene_num = sel_feature_num sort_idx = trace_ratio.trace_ratio(trn_x, trn_y, mode='index') sel_idx = sort_idx[:sel_gene_num] return sel_idx
def get_cascaded_sel_idx(high_th_year, low_th_year, feature_list, set_feature, sel_feature_num, div_ratio=4): high_risk_th = high_th_year * 365 low_risk_th = low_th_year * 365 high_risk_group, low_risk_group = helper.get_risk_group( x, c, s, high_risk_th, low_risk_th) #trn_x, trn_y, val_x, val_y = get_train_val(high_risk_group, low_risk_group) trn_x, trn_y = helper.get_train( high_risk_group, low_risk_group, is_categori_y=False, seed=self.random_seed) #without validation set if len(set_feature): trn_x = trn_x[:, set_feature] #val_x = val_x[:,set_feature] feature_num = trn_x.shape[1] if sel_feature_num == 0: sel_gene_num = int( max(sel_feature_num, feature_num / div_ratio)) else: sel_gene_num = sel_feature_num clf = RandomForestClassifier() clf.fit(trn_x, trn_y) f_scores = clf.feature_importances_ coef_idx_sort = np.argsort(f_scores)[::-1] sel_idx = coef_idx_sort[:sel_gene_num] return sel_idx
def get_sel_idx(high_th_year, low_th_year, feature_list, sel_feature_num): high_risk_th = high_th_year*365 low_risk_th = low_th_year*365 high_risk_group, low_risk_group = helper.get_risk_group(x,c,s,high_risk_th,low_risk_th) trn_x, trn_y = helper.get_train(high_risk_group, low_risk_group, is_categori_y=False, seed=self.random_seed)#without validation set print('Into RFS fs...') sort_idx = RFS.rfs(trn_x, trn_y, mode='index', verbose=True) print('RFS fs done...') return sort_idx[:sel_feature_num]
def get_sel_idx(high_th_year, low_th_year, feature_list, sel_feature_num): high_risk_th = high_th_year * 365 low_risk_th = low_th_year * 365 high_risk_group, low_risk_group = helper.get_risk_group( x, c, s, high_risk_th, low_risk_th) trn_x, trn_y = helper.get_train( high_risk_group, low_risk_group, is_categori_y=False, seed=self.random_seed) #without validation set sort_idx = reliefF.reliefF(trn_x, trn_y, mode='index') return sort_idx[:sel_feature_num]
def get_sel_idx(high_th_year, low_th_year, feature_list, sel_feature_num): high_risk_th = high_th_year*365 low_risk_th = low_th_year*365 high_risk_group, low_risk_group = helper.get_risk_group(x,c,s,high_risk_th,low_risk_th) trn_x, trn_y = helper.get_train(high_risk_group, low_risk_group, is_categori_y=False, seed=self.random_seed)#without validation set svm_clf = svm.SVC(kernel='linear') svm_clf.fit(trn_x, trn_y) svm_coef = svm_clf.coef_ svm_coef = np.abs(np.mean(svm_coef, axis=0)) svm_coef_idx_sort = np.argsort(svm_coef)[::-1] return svm_coef_idx_sort[:sel_feature_num]
def get_sel_idx(high_th_year, low_th_year, feature_list, sel_feature_num): high_risk_th = high_th_year * 365 low_risk_th = low_th_year * 365 high_risk_group, low_risk_group = helper.get_risk_group( x, c, s, high_risk_th, low_risk_th) trn_x, trn_y = helper.get_train( high_risk_group, low_risk_group, is_categori_y=False, seed=self.random_seed) #without validation set clf = RandomForestClassifier() clf.fit(trn_x, trn_y) f_scores = clf.feature_importances_ coef_idx_sort = np.argsort(f_scores)[::-1] return coef_idx_sort[:sel_feature_num]
def get_sel_idx(high_th_year, low_th_year, feature_list, sel_feature_num): high_risk_th = high_th_year * 365 low_risk_th = low_th_year * 365 high_risk_group, low_risk_group = helper.get_risk_group( x, c, s, high_risk_th, low_risk_th) trn_x, trn_y = helper.get_train( high_risk_group, low_risk_group, is_categori_y=False, seed=self.random_seed) #without validation set W, _, _ = ll_l21.proximal_gradient_descent(trn_x, trn_y, z=0.01, mode='raw') sort_idx = feature_ranking(W) return sort_idx[:sel_feature_num]