Beispiel #1
0
def test_time(input_folder):
    seed = 2222
    standardize_method = 'z'
    is_cz = False
    freq_list = [
        '001', '002', '003', '004', '005', '006', '008', '009', '01', '011'
    ]
    freq_to_trainFreq_map = {
        '001': '001',
        '002': '002',
        '003': '004',
        '004': '005',
        '005': '007',
        '006': '008',
        '008': '01',
        '009': '011',
        '01': '013',
        '011': '014'
    }
    nel_graph_length = 13

    e = run.Experiment('%s/%s' % (input_folder, standardize_method),
                       '%s/dataset' % (input_folder), seed, is_cz,
                       standardize_method, freq_list, freq_to_trainFreq_map,
                       nel_graph_length)

    for foldi in range(5):
        ftrnel = "%s/mimic_train_fold%d.nel" % (e.cdn, foldi)
        ftrnode = "%s/mimic_train_fold%d.node" % (e.cdn, foldi)
        fnel = "%s/mimic_fold%d.nel" % (e.cdn, foldi)
        fnode = "%s/mimic_fold%d.node" % (e.cdn, foldi)
        e.subgraph_mining(tr_nel=ftrnel,
                          tr_te_nel=fnel,
                          freq_t='011',
                          foldi=foldi)
Beispiel #2
0
def check_interpolation_and_subgraphs():
    ft = 'raw'
    minp = 0.5
    minc = 0.6
    seed = 2222

    standardize_method = "cz"
    is_cz = True
    # standardize_method = "z"
    # is_cz = False

    freq_list = [
        '001', '002', '003', '004', '005', '006', '008', '009', '01', '011'
    ]
    freq_to_trainFreq_map = {
        '001': '001',
        '002': '002',
        '003': '004',
        '004': '005',
        '005': '007',
        '006': '008',
        '008': '01',
        '009': '011',
        '01': '013',
        '011': '014'
    }
    nel_graph_length = 13

    fout = '../observer/check_interpolation_and_subgraphs/seed%s_%s_mice_mp%s_mc%s_%s' % (
        seed, ft, minp, minc, standardize_method)
    cu.checkAndCreate(fout)

    cu.checkAndCreate('../data/seed%s/%s/mice/mp%s_mc%s/%s' %
                      (seed, ft, minp, minc, standardize_method))
    e = run.Experiment(
        '../data/seed%s/%s/mice/mp%s_mc%s/%s' %
        (seed, ft, minp, minc, standardize_method),
        '../data/seed%s/%s/mice/mp%s_mc%s/dataset' % (seed, ft, minp, minc),
        seed, is_cz, standardize_method, freq_list, freq_to_trainFreq_map,
        nel_graph_length)

    foldi = 2

    train = e.ftrain % (e.dataset_folder, foldi, e.standardize_method)
    test = e.ftest % (e.dataset_folder, foldi, e.standardize_method)

    print train
    print test

    ftrnel = "%s/mimic_train_fold%d.nel" % (fout, foldi)
    ftrnode = "%s/mimic_train_fold%d.node" % (fout, foldi)
    fnel = "%s/mimic_fold%d.nel" % (fout, foldi)
    fnode = "%s/mimic_fold%d.node" % (fout, foldi)

    # e.interpolation(trcsv=train, tecsv=test, ftrnel=ftrnel, ftrnode=ftrnode, fnel=fnel, fnode=fnode)
    e.subgraph_mining(tr_nel=ftrnel,
                      tr_te_nel=fnel,
                      freq_t='011',
                      foldi=foldi,
                      cfolder=fout)
Beispiel #3
0
def check_nmfClassify(input_folder, output_folder, isg, freq_t, nc, c, pl, cw,
                      ntestth):
    seed = 2222
    standardize_method = 'z'
    is_cz = False
    freq_list = [
        '001', '002', '003', '004', '005', '006', '008', '009', '01', '011'
    ]
    freq_to_trainFreq_map = {
        '001': '001',
        '002': '002',
        '003': '004',
        '004': '005',
        '005': '007',
        '006': '008',
        '008': '01',
        '009': '011',
        '01': '013',
        '011': '014'
    }
    nel_graph_length = 13
    e = run.Experiment('%s/%s' % (input_folder, standardize_method),
                       '%s/dataset' % (input_folder), seed, is_cz,
                       standardize_method, freq_list, freq_to_trainFreq_map,
                       nel_graph_length)

    res_list = []
    for foldi in range(5):
        prediction_matrics = e.read_prediction_matrics(
            isg, freq_t, cfolder='%s/isg%d/same_freq_t/pt_sg_w' % (e.cdn, isg))
        res = e.nmfClassify(
            prediction_matrics['ptsg'][foldi],
            prediction_matrics['ptwd'][foldi],
            prediction_matrics['sgs'][foldi], prediction_matrics['pt'][foldi],
            prediction_matrics['gt'][foldi],
            '%s/isg%d/nmf_piks/nmf_%s_fold%d_%d_ramdon0-again.pik' %
            (output_folder, isg, freq_t, foldi, nc), ntestth, foldi, nc, c, pl,
            cw)
        res_list.append(res)
    (auc, tr_auc) = e.get_mean_auc(res_list)
    print auc, tr_auc
Beispiel #4
0
def run_best_model(cdn):
    ft = 'raw'
    seed = 2222
    standardize_method = 'z'
    is_cz = False

    cu.checkAndCreate('%s/seed%d' % (cdn, seed))
    pp.split_nfolds('%s/alldata_readmit.csv' % cdn,
                    '%s/seed%d/alldata_readmit' % (cdn, seed),
                    shuffle=True,
                    seed=seed)
    pp.split_by_feature_type(cdn='%s/seed%d' % (cdn, seed),
                             fn_prefix='%s/seed%d/alldata_readmit' %
                             (cdn, seed))

    cu.checkAndCreate('%s/seed%d/raw/interp' % (cdn, seed))
    cu.checkAndCreate('%s/seed%d/raw/interp/mean/dataset' % (cdn, seed))
    for i in range(5):
        pp.impute_by_interpolation_on_last12h(
            '%s/seed%d/raw/test_fold%d.csv' % (cdn, seed, i),
            '%s/seed%d/raw/interp/test_fold%d.csv' % (cdn, seed, i),
            '%s/seed%d/raw/interp/extrapolation_log_test_fold%d.txt' %
            (cdn, seed, i))
        pp.impute_by_interpolation_on_last12h(
            '%s/seed%d/raw/train_fold%d.csv' % (cdn, seed, i),
            '%s/seed%d/raw/interp/train_fold%d.csv' % (cdn, seed, i),
            '%s/seed%d/raw/interp/extrapolation_log_train_fold%d.txt' %
            (cdn, seed, i))
        pp.impute_by_mean(
            '%s/seed%d/raw/interp/train_fold%d.csv' % (cdn, seed, i),
            '%s/seed%d/raw/interp/test_fold%d.csv' % (cdn, seed, i),
            '%s/seed%d/raw/interp/mean/dataset/train_fold%d.csv' %
            (cdn, seed, i),
            '%s/seed%d/raw/interp/mean/dataset/test_fold%d.csv' %
            (cdn, seed, i))
        pp.standardize_data(
            '%s/seed%d/raw/interp/mean/dataset/train_fold%d.csv' %
            (cdn, seed, i),
            '%s/seed%d/raw/interp/mean/dataset/test_fold%d.csv' %
            (cdn, seed, i),
            '%s/seed%d/raw/interp/mean/dataset/train_fold%d_%s.csv' %
            (cdn, seed, i, standardize_method),
            '%s/seed%d/raw/interp/mean/dataset/test_fold%d_%s.csv' %
            (cdn, seed, i, standardize_method))

    # run temporal model
    freq_list = ['011']
    freq_to_trainFreq_map = {'011': '014'}
    nel_graph_length = 13

    cu.checkAndCreate('%s/seed%d/%s/interp/mean/%s' %
                      (cdn, seed, ft, standardize_method))
    e = rn.Experiment(
        '%s/seed%d/%s/interp/mean/%s' % (cdn, seed, ft, standardize_method),
        '%s/seed%d/%s/interp/mean/dataset' % (cdn, seed, ft), seed, is_cz,
        standardize_method, freq_list, freq_to_trainFreq_map, nel_graph_length)

    isg = 0
    freq_t = '011'
    nc = 110
    c = 2
    pl = 'l1'
    cw = 'balanced'
    ntestth = 2

    cu.checkAndCreate('%s/isg%d' % (e.cdn, isg))
    cu.checkAndCreate('%s/isg%d/pt_sg_w' % (e.cdn, isg))
    cu.checkAndCreate('%s/isg%d/res' % (e.cdn, isg))
    cu.checkAndCreate('%s/isg%d/nmf_piks' % (e.cdn, isg))

    for foldi in range(5):

        train = e.ftrain % (e.dataset_folder, foldi, e.standardize_method)
        test = e.ftest % (e.dataset_folder, foldi, e.standardize_method)

        print train
        print test

        ftrnel = "%s/mimic_train_fold%d.nel" % (e.cdn, foldi)
        ftrnode = "%s/mimic_train_fold%d.node" % (e.cdn, foldi)
        fnel = "%s/mimic_fold%d.nel" % (e.cdn, foldi)
        fnode = "%s/mimic_fold%d.node" % (e.cdn, foldi)

        e.interpolation(trcsv=train,
                        tecsv=test,
                        ftrnel=ftrnel,
                        ftrnode=ftrnode,
                        fnel=fnel,
                        fnode=fnode)

        e.get_freq_to_trainFreq_map(foldi)

        for freq_t in e.moss_freq_threshold_list:
            e.subgraph_mining(tr_nel=ftrnel,
                              tr_te_nel=fnel,
                              freq_t=freq_t,
                              foldi=foldi)

            e.gen_pt_sg_files(isg, freq_t, foldi)

    cu.checkAndCreate('%s/seed%d/raw/interp/mean/last_measures/dataset' %
                      (cdn, seed))
    # run baseline model
    for i in range(5):
        pp.get_last_measurements(
            '%s/seed%d/raw/interp/mean/dataset/train_fold%d_%s.csv' %
            (cdn, seed, i, standardize_method),
            '%s/seed%d/raw/interp/mean/last_measures/dataset/train_fold%d_%s.csv'
            % (cdn, seed, i, standardize_method))
        pp.get_last_measurements(
            '%s/seed%d/raw/interp/mean/dataset/test_fold%d_%s.csv' %
            (cdn, seed, i, standardize_method),
            '%s/seed%d/raw/interp/mean/last_measures/dataset/test_fold%d_%s.csv'
            % (cdn, seed, i, standardize_method))

    best_features = rfe(
        '%s/seed%d/raw/interp/mean/last_measures' % (cdn, seed), 50,
        standardize_method, 5, 'l1', 'balanced')
    print best_features

    # best_features = ['urineByHrByWeight', 'HCT', 'INR', 'Platelets', 'RBC',
    # 'DeliveredTidalVolume', 'PlateauPres', 'RAW', 'RSBI', 'mDBP', 'CV_HR',
    # 'Art_BE', 'Art_CO2', 'Art_PaCO2', 'Art_pH', 'Cl', 'Mg', 'Anticoagulant',
    # 'beta.Blocking_agent', 'Somatostatin_preparation', 'Vasodilating_agent',
    # 'AIDS', 'MetCarcinoma']

    baseline_auc = lr('%s/seed%d/raw/interp/mean/last_measures' % (cdn, seed),
                      standardize_method, 5, 'l1', 'balanced', 50)
    print 'baseline AUC: %s' % baseline_auc

    res_list = []
    for foldi in range(5):
        fnaddtr = '../data/seed2222/raw/interp/mean/last_measures/dataset/train_fold%d_%s.csv' % (
            foldi, standardize_method)
        fnaddte = '../data/seed2222/raw/interp/mean/last_measures/dataset/test_fold%d_%s.csv' % (
            foldi, standardize_method)
        prediction_matrics = e.read_prediction_matrics(isg, freq_t)
        (res, gt_te, pt_te, res_baseline) = e.nmfClassify_ob(
            prediction_matrics['ptsg'][foldi],
            prediction_matrics['ptwd'][foldi],
            prediction_matrics['sgs'][foldi], prediction_matrics['pt'][foldi],
            prediction_matrics['gt'][foldi],
            '%s/isg%d/nmf_piks/nmf_%s_fold%d_%d.pik' %
            (e.cdn, isg, freq_t, foldi, nc), ntestth, foldi, nc, c, pl, cw,
            fnaddtr, fnaddte, best_features)
        res_list.append(res)

    (auc, tr_auc) = e.get_mean_auc(res_list)
    print auc, tr_auc

    for i in range(len(res_list)):
        with open(
                '../data/seed2222/raw/interp/mean/z/isg0/res/c_pre_te_fold%d' %
                i, 'wb') as f:
            pickle.dump(res_list[i]['c_pre_te'], f)
        with open('../data/seed2222/raw/interp/mean/z/isg0/res/res_fold%d' % i,
                  'wb') as f:
            pickle.dump(res_list[i], f)
Beispiel #5
0
def get_res_and_oc(input_folder, output_folder):
    seed = 2222
    standardize_method = 'z'
    is_cz = False
    freq_list = [
        '001', '002', '003', '004', '005', '006', '008', '009', '01', '011'
    ]
    freq_to_trainFreq_map = {
        '001': '001',
        '002': '002',
        '003': '004',
        '004': '005',
        '005': '007',
        '006': '008',
        '008': '01',
        '009': '011',
        '01': '013',
        '011': '014'
    }
    nel_graph_length = 13
    e = run.Experiment('%s/%s' % (input_folder, standardize_method),
                       '%s/dataset' % (input_folder), seed, is_cz,
                       standardize_method, freq_list, freq_to_trainFreq_map,
                       nel_graph_length)

    isg = 0
    freq_t = '011'
    # foldi = 0
    nc = 110
    c = 2
    pl = 'l1'
    cw = 'balanced'
    ntestth = 2

    best_features = [
        'urineByHrByWeight', 'HCT', 'INR', 'Platelets', 'RBC',
        'DeliveredTidalVolume', 'PlateauPres', 'RAW', 'RSBI', 'mDBP', 'CV_HR',
        'Art_BE', 'Art_CO2', 'Art_PaCO2', 'Art_pH', 'Cl', 'Mg',
        'Anticoagulant', 'beta.Blocking_agent', 'Somatostatin_preparation',
        'Vasodilating_agent', 'AIDS', 'MetCarcinoma'
    ]

    # res_list = []
    # oc_list = []
    # oa = []
    # for foldi in range(5):
    # 	fnaddtr = '../../readmission_risk_baseline/data/seed2222/raw/interp/mean/last_measures/dataset/train_fold%d_%s.csv'%(foldi,standardize_method)
    # 	fnaddte = '../../readmission_risk_baseline/data/seed2222/raw/interp/mean/last_measures/dataset/test_fold%d_%s.csv'%(foldi,standardize_method)
    # 	prediction_matrics = e.read_prediction_matrics(isg,freq_t)
    # 	(res, gt_te, pt_te) = e.nmfClassify_ob(prediction_matrics['ptsg'][foldi],
    # 		prediction_matrics['ptwd'][foldi],
    # 		prediction_matrics['sgs'][foldi],
    # 		prediction_matrics['pt'][foldi],
    # 		prediction_matrics['gt'][foldi],
    # 		'%s/isg%d/nmf_piks/nmf_%s_fold%d_%d.pik'%(e.cdn,isg,freq_t,foldi,nc),
    # 		ntestth, foldi, nc, c, pl, cw, fnaddtr, fnaddte, best_features)
    # 	res_list.append(res)
    # 	oc_list.append(gt_te)
    # 	oa.append(res['n_pre_te'])
    # (auc, tr_auc) = e.get_mean_auc(res_list)
    # print auc, tr_auc

    res_list = []
    ob = []
    for foldi in range(5):
        prediction_matrics = e.read_prediction_matrics(isg, freq_t)
        res = e.nmfClassify(
            prediction_matrics['ptsg'][foldi],
            prediction_matrics['ptwd'][foldi],
            prediction_matrics['sgs'][foldi], prediction_matrics['pt'][foldi],
            prediction_matrics['gt'][foldi],
            '%s/isg%d/nmf_piks/nmf_%s_fold%d_%d.pik' %
            (e.cdn, isg, freq_t, foldi, nc), ntestth, foldi, nc, c, pl, cw)
        res_list.append(res)
        ob.append(res['n_pre_te'])
    (auc, tr_auc) = e.get_mean_auc(res_list)
    print auc, tr_auc
Beispiel #6
0
def error_analysis(input_folder, output_folder):
    ft = 'raw'
    minp = 0.5
    minc = 0.6
    seed = 2222
    standardize_method = 'z'
    is_cz = False
    freq_list = [
        '001', '002', '003', '004', '005', '006', '008', '009', '01', '011'
    ]
    freq_to_trainFreq_map = {
        '001': '001',
        '002': '002',
        '003': '004',
        '004': '005',
        '005': '007',
        '006': '008',
        '008': '01',
        '009': '011',
        '01': '013',
        '011': '014'
    }
    nel_graph_length = 13
    # cu.checkAndCreate('../data/seed%s/%s/mice/mp%s_mc%s/%s'%(seed,ft,minp,minc,standardize_method))
    e = run.Experiment('%s/%s' % (input_folder, standardize_method),
                       '%s/dataset' % (input_folder), seed, is_cz,
                       standardize_method, freq_list, freq_to_trainFreq_map,
                       nel_graph_length)

    # e = run.Experiment('../data/seed%s/%s/mice/mp%s_mc%s/%s'%(seed,ft,minp,minc,standardize_method),
    # 	'../data/seed%s/%s/mice/mp%s_mc%s/dataset'%(seed,ft,minp,minc),
    # 	seed,is_cz,standardize_method,freq_list,freq_to_trainFreq_map,nel_graph_length)

    # NMF:
    isg = 0
    freq_t = '011'
    nc = 110
    c = 2
    pl = 'l1'
    cw = 'balanced'
    ntestth = 2

    # DirClassify:
    # isg = 0
    # freq_t = '004'
    # c = 1
    # pl = 'l1'
    # cw = 'balanced'
    # ntestth = 2

    best_features = [
        'urineByHrByWeight', 'HCT', 'INR', 'Platelets', 'RBC',
        'DeliveredTidalVolume', 'PlateauPres', 'RAW', 'RSBI', 'mDBP', 'CV_HR',
        'Art_BE', 'Art_CO2', 'Art_PaCO2', 'Art_pH', 'Cl', 'Mg',
        'Anticoagulant', 'beta.Blocking_agent', 'Somatostatin_preparation',
        'Vasodilating_agent', 'AIDS', 'MetCarcinoma'
    ]

    res_list = []
    # res_baseline_list = []
    for foldi in range(5):
        fnaddtr = '../../readmission_risk_baseline/data/seed2222/raw/interp/mean/last_measures/dataset/train_fold%d_%s_t.csv' % (
            foldi, standardize_method)
        fnaddte = '../../readmission_risk_baseline/data/seed2222/raw/interp/mean/last_measures/dataset/test_fold%d_%s_t.csv' % (
            foldi, standardize_method)
        prediction_matrics = e.read_prediction_matrics(isg, freq_t)
        (res, gt_te, pt_te, res_baseline) = e.nmfClassify_ob(
            prediction_matrics['ptsg'][foldi],
            # res = e.nmfClassify(prediction_matrics['ptsg'][foldi],
            prediction_matrics['ptwd'][foldi],
            prediction_matrics['sgs'][foldi],
            prediction_matrics['pt'][foldi],
            prediction_matrics['gt'][foldi],
            '%s/isg%d/nmf_piks/nmf_%s_fold%d_%d.pik' %
            (e.cdn, isg, freq_t, foldi, nc),
            ntestth,
            foldi,
            nc,
            c,
            pl,
            cw,
            fnaddtr,
            fnaddte,
            best_features)

        # res = e.dirClassify(prediction_matrics['ptsg'][foldi],
        # # res = e.nmfClassify(prediction_matrics['ptsg'][foldi],
        # 	prediction_matrics['ptwd'][foldi],
        # 	prediction_matrics['sgs'][foldi],
        # 	prediction_matrics['pt'][foldi],
        # 	prediction_matrics['gt'][foldi],
        # 	ntestth, foldi, c, pl, cw)
        res_list.append(res)
        # res_baseline_list.append(res_baseline)
    # with open('%s/res_baseline_1170_list'%(output_folder),'wb') as f:
    # 	pickle.dump(res_baseline_list,f)
    (auc, tr_auc) = e.get_mean_auc(res_list)
    print auc, tr_auc
    # (auc, tr_auc) = e.get_mean_auc(res_baseline_list)
    # print auc, tr_auc

    # cu.checkAndCreate(output_folder)
    for i in range(len(res_list)):
        # i = 4
        # with open('%s/gt_te_fold%d_t'%(output_folder,i),'wb') as f:
        # 	pickle.dump(gt_te,f)
        # with open('%s/pt_te_fold%d_t'%(output_folder,i),'wb') as f:
        # 	pickle.dump(pt_te,f)
        # with open('%s/pre_te_fold%d_t'%(output_folder,i),'wb') as f:
        # 	pickle.dump(res_list[i]['n_pre_te'],f)
        with open('%s/c_pre_te_fold%d' % (output_folder, i), 'wb') as f:
            pickle.dump(res_list[i]['c_pre_te'], f)