Ejemplo n.º 1
0
def predict_LME(model_path, outdir, X, y, dv, partition_name, model_name=''):
    stderr('Retrieving saved model %s...\n' % m)
    with open(model_path, 'rb') as m_file:
        lme = pickle.load(m_file)

    summary = '=' * 50 + '\n'
    summary += 'LME regression\n\n'
    summary += 'Model name: %s\n\n' % m
    summary += 'Formula:\n'
    summary += '  ' + formula + '\n'
    summary += str(lme.summary()) + '\n'

    if args.mode in [None, 'response']:
        lme_preds = lme.predict(X)

        with open(outdir + '/%spreds_%s.txt' % ('' if model_name=='' else model_name + '_', partition_name), 'w') as p_file:
            for i in range(len(lme_preds)):
                p_file.write(str(lme_preds[i]) + '\n')
        losses = np.array(y[dv] - lme_preds) ** 2
        with open(outdir + '/%slosses_mse_%s.txt' % ('' if model_name=='' else model_name + '_', partition_name), 'w') as p_file:
            for i in range(len(losses)):
                p_file.write(str(losses[i]) + '\n')
        lme_mse = mse(y[dv], lme_preds)
        lme_mae = mae(y[dv], lme_preds)

        summary += 'Loss (%s set):\n' % partition_name
        summary += '  MSE: %.4f\n' % lme_mse
        summary += '  MAE: %.4f\n' % lme_mae

    summary += '=' * 50 + '\n'
    with open(outdir + '/%seval_%s.txt' % ('' if model_name=='' else model_name + '_', partition_name), 'w') as f_out:
        f_out.write(summary)
    stderr(summary)
Ejemplo n.º 2
0
Archivo: pt.py Proyecto: mindis/cdr
                else:
                    basenames_to_pool = sorted(
                        list(
                            set(basenames_to_pool).intersection(
                                set(basenames_to_pool_cur))))
                exps_outdirs.append(p.outdir)
        else:
            comparison_sets = {None: cdr_models}

        if not args.pool:
            for s in comparison_sets:
                model_set = comparison_sets[s]
                if len(model_set) > 1:
                    if s is not None:
                        stderr(
                            'Comparing models within ablation set "%s"...\n' %
                            s)
                    for i in range(len(model_set)):
                        m1 = model_set[i]
                        p.set_model(m1)

                        for j in range(i + 1, len(model_set)):
                            m2 = model_set[j]
                            is_nested = nested(m1, m2)
                            if is_nested or not args.ablation:
                                if is_nested:
                                    if m1.count('!') > m2.count('!'):
                                        a_model = m1
                                        b_model = m2
                                    else:
                                        a_model = m2
Ejemplo n.º 3
0
Archivo: lmer.py Proyecto: mindis/cdr
        models = [
            x for x in models if (x.startswith('CDR') or x.startswith('DTSR'))
        ]

        partitions = get_partition_list(args.partition)
        partition_str = '-'.join(partitions)

        for m in models:
            dir_path = p.outdir + '/' + m
            if args.ablated_models:
                data_path = dir_path + '/X_conv_' + partition_str + '.csv'
            else:
                data_path = p.outdir + '/' + m.split(
                    '!')[0] + '/X_conv_' + partition_str + '.csv'

            stderr('Two-step analysis using data file %s\n' % data_path)

            if os.path.exists(data_path):
                p.set_model(m)
                f = Formula(p['formula'])
                model_form = f.to_lmer_formula_string(
                    z=args.zscore, correlated=not args.uncorrelated)
                model_form = model_form.replace('-', '_')

                is_lme = '|' in model_form

                df = pd.read_csv(data_path, sep=' ', skipinitialspace=True)
                for c in df.columns:
                    if df[c].dtype.name == 'object':
                        df[c] = df[c].astype(str)
Ejemplo n.º 4
0
Archivo: plot.py Proyecto: mindis/cdr
                         plot_y_inches=p['plot_y_inches']
                         if y_inches is None else y_inches,
                         ylim=args.ylim,
                         cmap=p['cmap'] if cmap is None else cmap,
                         dpi=args.dpi,
                         legend=legend,
                         xlab=args.xlab,
                         ylab=args.ylab,
                         use_line_markers=args.markers,
                         transparent_background=args.transparent_background,
                         dump_source=args.dump_source)

        for m in models:
            p.set_model(m)

            stderr('Retrieving saved model %s...\n' % m)
            cdr_model = load_cdr(p.outdir + '/' + m)

            kwargs = {
                'plot_n_time_units':
                p['plot_n_time_units']
                if n_time_units is None else n_time_units,
                'plot_n_time_points':
                p['plot_n_time_points'] if resolution is None else resolution,
                'plot_x_inches':
                p['plot_x_inches'] if x_inches is None else x_inches,
                'plot_y_inches':
                p['plot_y_inches'] if y_inches is None else y_inches,
                'cmap':
                p['cmap'] if cmap is None else cmap,
                'dpi':
Ejemplo n.º 5
0
            evaluation_set_paths.append((X_paths, y_paths))

        for d in range(len(evaluation_sets)):
            X, y, select, X_response_aligned_predictor_names, X_response_aligned_predictors, X_2d_predictor_names, X_2d_predictors = evaluation_sets[d]
            partition_str = evaluation_set_names[d]

            for m in cdr_models:
                formula = p.models[m]['formula']

                dv = formula.strip().split('~')[0].strip()
                y_valid, select_y_valid = filter_invalid_responses(y, dv)
                X_response_aligned_predictors_valid = X_response_aligned_predictors
                if X_response_aligned_predictors_valid is not None:
                    X_response_aligned_predictors_valid = X_response_aligned_predictors_valid[select_y_valid]

                stderr('Retrieving saved model %s...\n' % m)
                cdr_model = load_cdr(p.outdir + '/' + m)

                X_conv, X_conv_summary = cdr_model.convolve_inputs(
                    X,
                    y_valid,
                    X_response_aligned_predictor_names=X_response_aligned_predictor_names,
                    X_response_aligned_predictors=X_response_aligned_predictors_valid,
                    X_2d_predictor_names=X_2d_predictor_names,
                    X_2d_predictors=X_2d_predictors,
                    scaled=not args.unscaled,
                    n_samples=args.nsamples,
                    algorithm=args.algorithm,
                    standardize_response=args.standardize_response
                )
Ejemplo n.º 6
0
Archivo: train.py Proyecto: mindis/cdr
    if not p.use_gpu_if_available:
        os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

    models = filter_models(p.model_list, args.models)

    run_baseline = False
    run_cdr = False
    for m in models:
        if not run_baseline and m.startswith('LM') or m.startswith('GAM'):
            run_baseline = True
        elif not run_cdr and (m.startswith('CDR') or m.startswith('DTSR')):
            run_cdr = True

    if not (run_baseline or run_cdr):
        stderr('No models to run. Exiting...\n')
        exit()

    cdr_formula_list = [
        Formula(p.models[m]['formula']) for m in models
        if (m.startswith('CDR') or m.startswith('DTSR'))
    ]
    cdr_formula_name_list = [
        m for m in p.model_list
        if (m.startswith('CDR') or m.startswith('DTSR'))
    ]
    all_rangf = [v for x in cdr_formula_list for v in x.rangf]
    partitions = get_partition_list(args.partition)
    X_paths, y_paths = paths_from_partition_cliarg(partitions, p)
    X, y = read_data(X_paths,
                     y_paths,
Ejemplo n.º 7
0
                dv = formula.strip().split('~')[0].strip()

                ## For some reason, GAM can't predict using custom functions, so we have to translate them
                z_term = re.compile('z.\((.*)\)')
                c_term = re.compile('c.\((.*)\)')
                formula = [
                    t.strip() for t in formula.strip().split()
                    if t.strip() != ''
                ]
                for i in range(len(formula)):
                    formula[i] = z_term.sub(r'scale(\1)', formula[i])
                    formula[i] = c_term.sub(r'scale(\1, scale=FALSE)',
                                            formula[i])
                formula = ' '.join(formula)

                stderr('Retrieving saved model %s...\n' % m)
                with open(p.outdir + '/' + m + '/m.obj', 'rb') as m_file:
                    gam = pickle.load(m_file)
                gam_preds = gam.predict(X_baseline)
                with open(p.outdir + '/' + m + '/preds_%s.txt' % partition_str,
                          'w') as p_file:
                    for i in range(len(gam_preds)):
                        p_file.write(str(gam_preds[i]) + '\n')
                losses = np.array(y[dv] - gam_preds)**2
                with open(
                        p.outdir + '/' + m +
                        '/losses_mse_%s.txt' % partition_str, 'w') as p_file:
                    for i in range(len(losses)):
                        p_file.write(str(losses[i]) + '\n')
                gam_mse = mse(y[dv], gam_preds)
                gam_mae = mae(y[dv], gam_preds)
Ejemplo n.º 8
0
Archivo: rmsd.py Proyecto: mindis/cdr
        models = filter_models(p.model_list, args.models, cdr_only=True)

        synth_path = os.path.dirname(os.path.dirname(p.X_train)) + '/d.obj'
        if not os.path.exists(synth_path):
            raise ValueError('Path to synth data %s does not exist. Check to make sure that model is fitted to synthetic data and that paths are correct in the config file.')
        with open(synth_path, 'rb') as f:
            d = pickle.load(f)
        def gold_irf_lambda(x):
            return d.irf(x, coefs=True)

        for m in models:
            p.set_model(m)
            formula = p.models[m]['formula']

            stderr('Retrieving saved model %s...\n' % m)
            cdr_model = load_cdr(p.outdir + '/' + m)

            stderr('Computing RMSD...\n')

            rmsd = cdr_model.irf_rmsd(
                gold_irf_lambda,
                summed=args.summed,
                n_time_units=args.ntimeunits,
                n_time_points=args.resolution,
                algorithm=args.algorithm
            )

            summary = '=' * 50 + '\n'
            summary += 'CDR regression\n\n'
            summary += 'Model name: %s\n\n' % m
Ejemplo n.º 9
0
                           type=str,
                           default=None,
                           help='String to prepend to output file.')
    args, unknown = argparser.parse_known_args()

    for path in args.paths:
        p = Config(path)

        if not p.use_gpu_if_available:
            os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

        models = filter_models(p.model_list, args.models, cdr_only=True)

        for m in models:

            stderr('Retrieving saved model %s...\n' % m)
            cdr_model = load_cdr(p.outdir + '/' + m)

            summary = cdr_model.summary(random=args.random,
                                        level=args.level,
                                        n_samples=args.nsamples,
                                        integral_n_time_units=args.timeunits)

            if args.prefix:
                outname = p.outdir + '/' + m + '/' + args.prefix + '_summary.txt'
            else:
                outname = p.outdir + '/' + m + '/summary.txt'

            stderr('Saving summary to %s' % outname)
            with open(outname, 'w') as f:
                f.write(summary)
Ejemplo n.º 10
0
        model_cur = np.concatenate(model_cur, axis=0)
        baseline_cur = np.concatenate(baseline_cur, axis=0)
    else:
        model_cur = np.array(model_errors[0])
        baseline_cur = np.array(baseline_errors[0])

    select = np.logical_and(np.isfinite(np.array(model_cur)),
                            np.isfinite(np.array(baseline_cur)))
    diff = float(len(model_cur) - select.sum())
    p_value, base_diff, diffs = permutation_test(baseline_cur[select],
                                                 model_cur[select],
                                                 n_iter=10000,
                                                 n_tails=args.tails,
                                                 mode=args.metric,
                                                 nested=True)
    stderr('\n')
    out_path = args.outdir + '/%s_PT.txt' % args.name
    with open(out_path, 'w') as f:
        stderr('Saving output to %s...\n' % out_path)

        summary = '=' * 50 + '\n'
        summary += 'Model comparison:\n'
        summary += '  %s\n' % ';'.join(args.baseline_error_paths)
        summary += '  vs\n'
        summary += '  %s\n' % ';'.join(args.model_error_paths)
        if diff > 0:
            summary += '%d NaN rows filtered out (out of %d)\n' % (
                diff, len(model_cur))
        summary += 'Metric: %s\n' % args.metric
        summary += 'Difference: %.4f\n' % base_diff
        summary += 'p: %.4e%s\n' % (