def read_coincide_types_dataset(path="./"): dataset = pd.read_csv(os.path.join(path, 'coincideTypes.csv')) treat_dataset = read_treat_dataset() return pd.merge(treat_dataset, dataset)
from funs_common import read_treat_dataset from collections import Counter def count_val(ds, val): c = Counter(ds[val]) return c[0], c[1], len(ds[val]) - c[0] - c[1] def count_pcr_rfs_dfs(ds): print("pCR", *count_val(ds, "pCR")) print("RFS", *count_val(ds, "RFS")) print("DFS", *count_val(ds, "DFS")) d = read_treat_dataset() all_studies = list(set(d['study'])) for study in all_studies: print(study) ds = d.loc[d['study'] == study] count_pcr_rfs_dfs(ds) print("")
)) else: print( '\txstudy posOutcome accuracy: {0:3.3f} AUC: -'.format( np.mean(posoutcomes['test_acc']) )) parser = argparse.ArgumentParser(description='Process some integers.') parser.add_argument('embds_file', help='file with embeddings') parser.add_argument('bmc_root', help='root bmc15mldata1.csv') args = parser.parse_args() genes_dataset = pd.read_csv('data/ex15bmcMerged.csv.xz') codes_dataset = pd.read_csv(args.embds_file) treat_dataset = read_treat_dataset(args.bmc_root) dataset = pd.merge(treat_dataset, codes_dataset) dataset = pd.merge(dataset, genes_dataset) # studies = dataset.study.unique() studies_idx2name = {i: n for i, n in enumerate(studies)} #train_accs, test_accs = [], [] logreg_outs = {} xgb_outs = {} for study_idx in studies_idx2name: print('\rSTUDY {0:02d} in processing'.format(study_idx), end='', flush=True) logreg_out = apply_test(dataset, studies_idx2name, apply_logistic_regression) logreg_outs[study_idx] = logreg_out xgb_out = apply_test(dataset, studies_idx2name, apply_xgb) xgb_outs[study_idx] = xgb_out print()