def read_coincide_types_dataset(path="./"):
    dataset = pd.read_csv(os.path.join(path, 'coincideTypes.csv'))
    treat_dataset = read_treat_dataset()
    return pd.merge(treat_dataset, dataset)
Esempio n. 2
0
from funs_common import read_treat_dataset
from collections import Counter

def count_val(ds, val):
    c = Counter(ds[val])
    return c[0], c[1], len(ds[val]) - c[0] - c[1]

def count_pcr_rfs_dfs(ds):
    print("pCR", *count_val(ds, "pCR"))
    print("RFS", *count_val(ds, "RFS"))
    print("DFS", *count_val(ds, "DFS"))
    
d = read_treat_dataset()

all_studies = list(set(d['study']))

for study in all_studies:
    print(study)
    ds = d.loc[d['study'] == study]
    count_pcr_rfs_dfs(ds)
    print("")
Esempio n. 3
0
            ))
    else:
        print(
            '\txstudy posOutcome accuracy: {0:3.3f} AUC: -'.format(
                np.mean(posoutcomes['test_acc'])
            ))


parser = argparse.ArgumentParser(description='Process some integers.')
parser.add_argument('embds_file', help='file with embeddings')
parser.add_argument('bmc_root', help='root bmc15mldata1.csv')
args = parser.parse_args()

genes_dataset = pd.read_csv('data/ex15bmcMerged.csv.xz')
codes_dataset = pd.read_csv(args.embds_file)
treat_dataset = read_treat_dataset(args.bmc_root)
dataset = pd.merge(treat_dataset, codes_dataset)
dataset = pd.merge(dataset, genes_dataset)  #

studies = dataset.study.unique()
studies_idx2name = {i: n for i, n in enumerate(studies)}
#train_accs, test_accs = [], []
logreg_outs = {}
xgb_outs = {}
for study_idx in studies_idx2name:
    print('\rSTUDY {0:02d} in processing'.format(study_idx), end='', flush=True)
    logreg_out = apply_test(dataset, studies_idx2name, apply_logistic_regression)
    logreg_outs[study_idx] = logreg_out
    xgb_out = apply_test(dataset, studies_idx2name, apply_xgb)
    xgb_outs[study_idx] = xgb_out
print()