def run_cv(): factory = SimulatedData( num_var=200, pos = [0, 0, 0, 0], neg=[0, 0, 0, 0], neut=[100,0,0,100] ) dataset = factory.generate_black_white_data('PanGyn-DFI-5-base.mat', white_alive=130, white_dead=130, black_alive=130, black_dead=130) dataset_w = get_one_race_clf(dataset, 'WHITE') dataset_b = get_one_race_clf(dataset, 'BLACK') dataset_tl = [e for e in dataset] dataset_tl[0] = preprocessing.normalize(dataset_tl[0]) k = -1 X, Y, R, y_sub, y_strat = dataset df = pd.DataFrame(y_strat, columns=['RY']) df['R'] = R df['Y'] = Y print(X.shape) print(df['RY'].value_counts()) print(df['R'].value_counts()) print(df['Y'].value_counts()) parametrs_mix = {'fold': 3, 'k': k, 'val_size': 0.0, 'batch_size': 20, 'momentum': 0.9, 'learning_rate': 0.01, 'lr_decay': 0.0, 'dropout': 0.5, 'L1_reg': 0.001, 'L2_reg': 0.001, 'hiddenLayers': [128, 64]} parametrs_w = {'fold': 3, 'k': k, 'val_size':0.0, 'batch_size':20, 'learning_rate':0.01, 'lr_decay':0.0, 'dropout':0.5, 'L1_reg': 0.001, 'L2_reg': 0.001, 'hiddenLayers': [128, 64]} parametrs_b = {'fold': 3, 'k': k, 'val_size': 0.0, 'batch_size': 20, 'learning_rate': 0.01, 'lr_decay': 0.0, 'dropout': 0.5, 'L1_reg': 0.001, 'L2_reg': 0.001, 'hiddenLayers': [128, 64]} parametrs_tl = {'fold': 3, 'k': k, 'val_size': 0.0, 'batch_size': 32, 'train_epoch': 100, 'tune_epoch': 100, 'learning_rate': 0.01, 'lr_decay': 0.0, 'dropout': 0.5, 'tune_lr': 0.001, 'L1_reg': 0.001, 'L2_reg': 0.001, 'hiddenLayers': [128, 64], 'tune_batch': 32} res = pd.DataFrame() for i in range(20): seed = i df_m = run_mixture_cv(seed, dataset, **parametrs_mix) df_w = run_one_race_cv(seed, dataset_w, **parametrs_w) df_w = df_w.rename(columns={"Auc": "W_ind"}) df_b = run_one_race_cv(seed, dataset_b, **parametrs_b) df_b = df_b.rename(columns={"Auc": "B_ind"}) df_tl = run_supervised_transfer_cv(seed, dataset, **parametrs_tl) df1 = pd.concat([df_m, df_w['W_ind'], df_b['B_ind'], df_tl['TL_Auc']], sort=False, axis=1) print (df1) res = res.append(df1) print (res)
def run_cv(cancer_type, feature_type, target, years=3, groups=("WHITE", "BLACK")): print (cancer_type, feature_type, target, years) # dataset = read_data(cancer_type, feature_type[0], target, years) dataset = get_dataset(cancer_type=cancer_type, feature_type=feature_type, target=target, groups=("WHITE", "BLACK")) dataset = standarize_dataset(dataset) dataset_w = get_one_race(dataset, groups[0]) dataset_w = get_n_years(dataset_w, years) dataset_b = get_one_race(dataset, groups[1]) dataset_b = get_n_years(dataset_b, years) dataset = get_n_years(dataset, years) k = 200 if 'mRNA' in feature_type else -1 X, Y, R, y_sub, y_strat = dataset df = pd.DataFrame(y_strat, columns=['RY']) df['R'] = R df['Y'] = Y Dict = df['RY'].value_counts() Dict = dict(Dict) print (Dict) parametrs_mix = {'fold': 3, 'k': k, 'val_size':0.0, 'batch_size':20,'momentum':0.9, 'learning_rate':0.01, 'lr_decay':0.03, 'dropout':0.5, 'L1_reg': 0.001, 'L2_reg': 0.001, 'hiddenLayers': [128, 64]} parametrs_w = {'fold': 3, 'k': k, 'val_size':0.0, 'batch_size':20, 'learning_rate':0.01, 'lr_decay':0.0, 'dropout':0.5, 'L1_reg': 0.001, 'L2_reg': 0.001, 'hiddenLayers': [128, 64]} parametrs_b = {'fold': 3, 'k': k, 'val_size':0.0, 'batch_size':4, 'learning_rate':0.01, 'lr_decay':0.0, 'dropout':0.5, 'L1_reg': 0.001, 'L2_reg': 0.001, 'hiddenLayers': [128, 64]} parametrs_tl = {'fold': 3, 'k': k, 'val_size':0.0, 'batch_size':32, 'tune_epoch':100, 'train_epoch':100, 'learning_rate':0.01, 'lr_decay':0.0, 'dropout':0.5, 'tune_lr':0.01, 'L1_reg': 0.001, 'L2_reg': 0.001, 'hiddenLayers': [128, 64], 'tune_batch':10} res = pd.DataFrame() for i in range(20): seed = i df_m = run_mixture_cv(seed, dataset, **parametrs_mix, groups=groups) df_w = run_one_race_cv(seed, dataset_w, **parametrs_w) df_w = df_w.rename(columns={"Auc": "W_ind"}) df_b = run_one_race_cv(seed, dataset_b, **parametrs_b) df_b = df_b.rename(columns={"Auc": "A_ind"}) df_tl = run_supervised_transfer_cv(seed, dataset, **parametrs_tl, groups=groups) df1 = pd.concat([df_m, df_w['W_ind'], df_b['A_ind'], df_tl['TL_Auc']], sort=False, axis=1) print (df1) res = res.append(df1) f_name = 'Result/' + cancer_type + '-AA-EA-' + feature_type[0] + '-' + target + '-' + str(years) + 'YR.xlsx' res.to_excel(f_name)
def run_cv(): dataset = read_data('MMRF', 'mRNA', 'OS', 3) dataset_w = get_one_race(dataset, 'WHITE') dataset_w = get_n_years(dataset_w, 3) dataset_b = get_one_race(dataset, 'BLACK') dataset_b = get_n_years(dataset_b, 3) dataset = get_n_years(dataset, 3) X, Y, R, y_sub, y_strat = dataset df = pd.DataFrame(y_strat, columns=['RY']) df['R'] = R df['Y'] = Y print(X.shape) print(df['RY'].value_counts()) print(df['R'].value_counts()) print(df['Y'].value_counts()) k = -1 parametrs_b = { 'fold': 3, 'k': k, 'val_size': 0.0, 'batch_size': 4, 'learning_rate': 0.01, 'lr_decay': 0.0, 'dropout': 0.5, 'L1_reg': 0.001, 'L2_reg': 0.001, 'hiddenLayers': [128, 64] } res = pd.DataFrame() for i in range(20): seed = i df_m = run_mixture_cv(seed, dataset, fold=3) df_w = run_one_race_cv(seed, dataset_w, fold=3) df_w = df_w.rename(columns={"Auc": "W_ind"}) df_b = run_one_race_cv(seed, dataset_b, **parametrs_b) df_b = df_b.rename(columns={"Auc": "B_ind"}) df_tl = run_unsupervised_transfer_cv(seed, dataset, fold=3) df1 = pd.concat([df_m, df_w['W_ind'], df_b['B_ind'], df_tl['TL_Auc']], sort=False, axis=1) print(df1) res = res.append(df1) f_name = 'Result/MM-AA-EA-mRNA-OS-3YR.xlsx' res.to_excel(f_name)
def run_cv(cancer_type, feature_type, target, years=3): print(cancer_type, feature_type, target, years) dataset = get_dataset(cancer_type=cancer_type, feature_type=feature_type, target=target, groups=("WHITE", "BLACK")) # dataset = read_data(cancer_type, feature_type[0], target, years) dataset_w = get_one_race(dataset, 'WHITE') dataset_w = get_n_years(dataset_w, years) dataset_b = get_one_race(dataset, 'BLACK') dataset_b = get_n_years(dataset_b, years) dataset_tl = normalize_dataset(dataset) dataset_tl = get_n_years(dataset_tl, years) dataset = get_n_years(dataset, years) k = -1 X, Y, R, y_sub, y_strat = dataset df = pd.DataFrame(y_strat, columns=['RY']) df['R'] = R df['Y'] = Y print(X.shape) print(df['RY'].value_counts()) print(df['R'].value_counts()) print(df['Y'].value_counts()) parametrs_mix = { 'fold': 3, 'k': k, 'val_size': 0.0, 'batch_size': 20, 'learning_rate': 0.01, 'lr_decay': 0.0, 'dropout': 0.5, 'L1_reg': 0.001, 'L2_reg': 0.001, 'hiddenLayers': [128, 64] } parametrs_w = { 'fold': 3, 'k': k, 'val_size': 0.0, 'batch_size': 20, 'learning_rate': 0.01, 'lr_decay': 0.0, 'dropout': 0.5, 'L1_reg': 0.001, 'L2_reg': 0.001, 'hiddenLayers': [128, 64] } parametrs_b = { 'fold': 3, 'k': k, 'val_size': 0.0, 'batch_size': 4, 'learning_rate': 0.01, 'lr_decay': 0.0, 'dropout': 0.5, 'L1_reg': 0.001, 'L2_reg': 0.001, 'hiddenLayers': [128, 64] } parameters_CCSA = { 'fold': 3, 'n_features': k, 'alpha': 0.3, 'batch_size': 20, 'learning_rate': 0.01, 'hiddenLayers': [100], 'dr': 0.0, 'momentum': 0.9, 'decay': 0.0, 'sample_per_class': 2 } res = pd.DataFrame() for i in range(20): seed = i df_m = run_mixture_cv(seed, dataset, **parametrs_mix) df_w = run_one_race_cv(seed, dataset_w, **parametrs_w) df_w = df_w.rename(columns={"Auc": "W_ind"}) df_b = run_one_race_cv(seed, dataset_b, **parametrs_b) df_b = df_b.rename(columns={"Auc": "B_ind"}) df_tl = run_CCSA_transfer(seed, dataset_tl, **parameters_CCSA) df1 = pd.concat([df_m, df_w['W_ind'], df_b['B_ind'], df_tl['TL_Auc']], sort=False, axis=1) print(df1) res = res.append(df1) f_name = 'Result/' + cancer_type + '-AA-EA-' + feature_type[ 0] + '-' + target + '-' + str(years) + 'YR.xlsx' res.to_excel(f_name)
def run_cv(cancer_type, feature_type, target, years=3, groups=("WHITE", "BLACK")): print(cancer_type, feature_type, target, years) # dataset = get_dataset_integ(cancer_type=cancer_type, feature_type=feature_type, target=target, groups=groups) dataset = get_dataset(cancer_type=cancer_type, feature_type=feature_type, target=target, groups=groups) if dataset['X'].shape[0] < 10: return None dataset = standarize_dataset(dataset) dataset_w = get_one_race(dataset, 'WHITE') if dataset_w['X'].shape[0] < 5: return None dataset_w = get_n_years(dataset_w, years) dataset_b = get_one_race(dataset, 'BLACK') if dataset_b['X'].shape[0] < 5: return None dataset_b = get_n_years(dataset_b, years) dataset_tl = normalize_dataset(dataset) dataset_tl = get_n_years(dataset_tl, years) dataset = get_n_years(dataset, years) k = 200 if 'mRNA' in feature_type or 'methylation' in feature_type else -1 # print(numpy.count_nonzero(numpy.isnan(dataset['X']))) X, Y, R, y_sub, y_strat = dataset df = pd.DataFrame(y_strat, columns=['RY']) df['R'] = R df['Y'] = Y print(X.shape) Dict = df['RY'].value_counts() print(Dict) if len(Dict) < 4: return None Dict = dict(Dict) print(Dict) for key in Dict: print(key, Dict[key]) if Dict[key] < 5: return None parametrs_mix = { 'fold': 3, 'k': k, 'val_size': 0.0, 'batch_size': 20, 'momentum': 0.9, 'learning_rate': 0.01, 'lr_decay': 0.03, 'dropout': 0.5, 'L1_reg': 0.001, 'L2_reg': 0.001, 'hiddenLayers': [128, 64] } parametrs_w = { 'fold': 3, 'k': k, 'val_size': 0.0, 'batch_size': 20, 'learning_rate': 0.01, 'lr_decay': 0.0, 'dropout': 0.5, 'L1_reg': 0.001, 'L2_reg': 0.001, 'hiddenLayers': [128, 64] } parametrs_b = { 'fold': 3, 'k': k, 'val_size': 0.0, 'batch_size': 4, 'learning_rate': 0.01, 'lr_decay': 0.0, 'dropout': 0.5, 'L1_reg': 0.001, 'L2_reg': 0.001, 'hiddenLayers': [128, 64] } parametrs_tl = { 'fold': 3, 'k': k, 'val_size': 0.0, 'batch_size': 20, 'tune_epoch': 500, 'learning_rate': 0.01, 'lr_decay': 0.03, 'dropout': 0.5, 'tune_lr': 0.002, 'L1_reg': 0.001, 'L2_reg': 0.001, 'hiddenLayers': [128, 64], 'tune_batch': 10 } parametrs_tl_unsupervised = { 'fold': 3, 'k': k, 'val_size': 0.0, 'batch_size': 20, 'learning_rate': 0.001, 'lr_decay': 0.03, 'dropout': 0.0, 'n_epochs': 100, 'L1_reg': 0.001, 'L2_reg': 0.001, 'hiddenLayers': [100] } # parametrs_tl_sa = {'fold': 3, 'k': k, 'val_size':0.0, 'batch_size':20, # 'learning_rate':0.005, 'lr_decay':0.0, 'dropout':0.5, # 'L1_reg': 0.001, 'L2_reg': 0.001, 'hiddenLayers': [128, 64]} parameters_CCSA = { 'fold': 3, 'n_features': k, 'alpha': 0.3, 'batch_size': 32, 'learning_rate': 0.01, 'hiddenLayers': [100], 'dr': 0.0, 'momentum': 0.0, 'decay': 0.0, 'sample_per_class': 2 } res = pd.DataFrame() for i in range(20): seed = i df_m = run_mixture_cv(seed, dataset, **parametrs_mix) df_w = run_one_race_cv(seed, dataset_w, **parametrs_w) df_w = df_w.rename(columns={"Auc": "W_ind"}) df_b = run_one_race_cv(seed, dataset_b, **parametrs_b) df_b = df_b.rename(columns={"Auc": "B_ind"}) df_tl_supervised = run_supervised_transfer_cv(seed, dataset, **parametrs_tl) df_tl_supervised = df_tl_supervised.rename(columns={"TL_Auc": "XY_TL"}) df_tl_unsupervised = run_unsupervised_transfer_cv( seed, dataset, **parametrs_tl_unsupervised) df_tl_unsupervised = df_tl_unsupervised.rename( columns={"TL_Auc": "X_TL"}) df_tl = run_CCSA_transfer(seed, dataset_tl, **parameters_CCSA) df_tl = df_tl.rename(columns={"TL_Auc": "CCSA_TL"}) df1 = pd.concat( [ df_m, df_w['W_ind'], df_b['B_ind'], df_tl['CCSA_TL'], # df_tl_unsupervised['X_TL'], df_tl_supervised['XY_TL'] ], sort=False, axis=1) res = res.append(df1) print(res) res['cancer_type'] = cancer_type res['feature_type'] = '-'.join(feature_type) res['target'] = target res['years'] = years return res