Ejemplo n.º 1
0
# plot top several clusters
plot_top_clust_ephys_curves(cluster_super_means,
                            y_cnts=y_cnts,
                            overall_means=super_data_means,
                            overall_stds=super_data_stds,
                            clust_labels=clust_labels,
                            n_to_show=n_top_clust,
                            inches=inches)

save_fig(join(ephys_viz_dir, 'ephys_curves_top_clust.png'))

# plot each (non-trival) cluster
# non_trivial_clusters = y_cnts[y_cnts >= 5].index.values
non_trivial_clusters = y_cnts[y_cnts >= 0].index.values
save_dir = make_and_get_dir(ephys_viz_dir, 'cluster_curves')
for cl_idx in non_trivial_clusters:

    label = clust_labels[cl_idx]

    values = {}
    for name in cluster_super_means.keys():
        values[name] = cluster_super_means[name][cl_idx]

    plt.figure(figsize=(2 * n_datasets * inches, inches))

    plot_cluster_ephys_curve(values,
                             overall_means=super_data_means,
                             overall_stds=super_data_stds,
                             y_label=label)
Ejemplo n.º 2
0
parser.add_argument('--user_bd_mvmm_best_idx', default=None,
                    help='Optional user provided index for'
                         'best bd MVMM model.')

parser.add_argument('--user_log_pen_mvmm_best_idx', default=None,
                    help='Optional user provided index for'
                         'best log pen MVMM model.')

parser.add_argument('--select_metric', type=str, default='bic',
                    help='Model selection criterion.')
args = parser.parse_args()


results_dir = args.results_dir
log_dir = make_and_get_dir(results_dir, 'log')
fitting_dir = make_and_get_dir(results_dir, 'model_fitting')
mv_fitting_dir = make_and_get_dir(fitting_dir, 'multi_view')
model_sel_dir = make_and_get_dir(results_dir, 'model_selection')
# bd_sel_dir = make_and_get_dir(model_sel_dir, 'bd_mvmm')
# log_sel_dir = make_and_get_dir(model_sel_dir, 'log_pen_mvmm')
bd_sel_dir = make_and_get_dir(model_sel_dir)
log_sel_dir = make_and_get_dir(model_sel_dir)
opt_diag_dir = make_and_get_dir(results_dir, 'opt_diagnostics')


res_writer = ResultsWriter(os.path.join(log_dir, 'mvmm_model_selection.txt'),
                           delete_if_exists=True)


res_writer.write('user_bd_mvmm_best_idx: {}'.
Ejemplo n.º 3
0
parser.add_argument('--event_col',
                    help='Column name of the survival event data.')


args = parser.parse_args()
results_dir = args.results_dir
fpaths = args.fpaths
vars2compare_fpath = args.vars2compare_fpath
super_fpaths = args.super_fpaths
survival_fpath = args.survival_fpath

duration_col = args.duration_col
event_col = args.event_col

# setup directories
log_dir = make_and_get_dir(results_dir, 'log')
fitting_dir = make_and_get_dir(results_dir, 'model_fitting')
model_sel_dir = make_and_get_dir(results_dir, 'model_selection')
opt_diag_dir = make_and_get_dir(results_dir, 'opt_diagnostics')
clust_interpret_dir = make_and_get_dir(results_dir, 'interpret')

# load models and data
models = load(join(fitting_dir, 'selected_models'))
view_data, dataset_names, sample_names, view_feat_names = \
    load_data(*fpaths)

n_views = len(fpaths)
view_data = [pd.DataFrame(view_data[v],
                          index=sample_names,
                          columns=view_feat_names[v])
             for v in range(n_views)]
Ejemplo n.º 4
0
                    help='Maximum number of components for concatenated data.')

parser.add_argument('--exclude_cat_gmm',
                    action='store_true',
                    default=False,
                    help='Dont run cat gmm.')

parser = add_parsers(parser, to_add=[general_opt_parser, base_gmm_parser])

parser = bayes_parser(parser)
args = parser.parse_args()
bayes_submit(args)
args = format_mini_experiment(args)

results_dir = args.results_dir
log_dir = make_and_get_dir(results_dir, 'log')
fitting_dir = make_and_get_dir(results_dir, 'model_fitting', 'single_view')

res_writer = ResultsWriter(join(log_dir, 'single_view_fitting.txt'),
                           delete_if_exists=True)

res_writer.write(args)

run_start_time = time()

n_views = len(args.fpaths)

#############
# load data #
#############
Ejemplo n.º 5
0
parser.add_argument('--feat_list', default='icluster',
                    help='Which feature list to use e.g. icluster,'
                    'all, top_2000 by variace.')

parser = bayes_parser(parser)
args = parser.parse_args()
bayes_submit(args)

cancer_type = args.cancer_type
feat_list = args.feat_list

print(cancer_type)

raw_data_dir = TCGAPaths().raw_data_dir

pca_dir = make_and_get_dir(TCGAPaths().top_dir, 'pca', cancer_type, feat_list)
diagnostics_dir = make_and_get_dir(pca_dir, 'diagnostics')
feat_save_dir = join(TCGAPaths().pro_data_dir, cancer_type, feat_list)

datasets = ['rna', 'mi_rna', 'dna_meth', 'cp']
data = {}
# save processed data
for k in datasets:
    fpath = join(feat_save_dir, '{}.csv'.format(k))
    data[k] = pd.read_csv(fpath, index_col=0)


########################
# extract PCA features #
########################
Ejemplo n.º 6
0
        log_pen_mvmm_parser,
        bd_mvmm_parser,
        spect_pen_parser
    ])

parser = bayes_parser(parser)
args = parser.parse_args()
args = format_mini_experiment(args)
args.job_name = args.sim_name

bayes_submit(args)

if args.sim_name is None:
    args.sim_name = 'meow'

save_dir = make_and_get_dir(Paths().results_dir, 'single', args.sim_name)

res_writer = ResultsWriter(os.path.join(save_dir, 'results.txt'),
                           delete_if_exists=True)

res_writer.write('\n\n\n Input args')
res_writer.write(args)

rng = check_random_state(args.metaseed)

to_exclude = []
# if args.exclude_sp_mvmm:
to_exclude.append('sp_mvmm')
if args.exclude_bd_mvmm:
    to_exclude.append('bd_mvmm')
if args.exclude_log_pen_mvmm:
Ejemplo n.º 7
0
cancer_type = args.cancer_type
feat_list = args.feat_list
handle_nans = args.handle_nans

assert feat_list in ['all', 'icluster'] or 'top' in feat_list

filter_kws = {
    'tumor_sample_only': True,
    'primary_tumor_only': True,
    'keep_first_of_participant_multiples': True,
    'ensure_participant_idx': True,
    'verbose': True
}

raw_data_dir = TCGAPaths().raw_data_dir
pro_data_dir = make_and_get_dir(TCGAPaths().pro_data_dir, cancer_type)
feat_save_dir = make_and_get_dir(pro_data_dir, feat_list)

res_writer = ResultsWriter(join(feat_save_dir, 'log.txt'),
                           delete_if_exists=True)

res_writer.write(args)

fnames = {
    'mi_rna':
    'pancanMiRs_EBadjOnProtocolPlatformWithoutRepsWithUnCorrectMiRs_08_04_16.csv',
    'rna': 'EBPlusPlusAdjustPANCAN_IlluminaHiSeq_RNASeqV2-v2.geneExp.tsv',
    'dna_meth':
    'jhu-usc.edu_PANCAN_merged_HumanMethylation27_HumanMethylation450.betaValue_whitelisted.tsv',
    'cp': 'all_data_by_genes_whitelisted.tsv'
}
Ejemplo n.º 8
0
results, model_dfs, model_dfs_at_truth, \
    fit_models, pi_true_summary, \
    n_samples_tr_seq, zero_thresh, data = load_results(sim_name)

timing_data = get_timing_data(results, fit_models)

n_comp_tot_true = pi_true_summary['n_comp_tot_true']
n_blocks_true = pi_true_summary['n_blocks_true']

models2exclude = []
for model_name in ['log_pen_mvmm', 'bd_mvmm', 'sp_mvmm']:
    if model_dfs[model_name].shape[0] == 0:
        models2exclude.append(model_name)

# where to save simulation results
results_save_dir = make_and_get_dir(Paths().results_dir, sim_name)
# results_save_dir = join(Paths().results_dir, sim_name)
# os.makedirs(results_save_dir, exist_ok=True)

##################
# Set parameters #
##################

# model names
model_names = {
    'log_pen_mvmm': 'log penalized MVMM',
    'bd_mvmm': 'block diagonal MVMM',
    'cat_gmm': 'Mixture model on concatenated data',
    'full_mvmm': 'MVMM',
    'sp_mvmm': 'spectral penalized MVMM',
    'marginal_view_0': 'Mixture model on view 1 marginal data',
Ejemplo n.º 9
0
                         log_pen_mvmm_parser
                     ])

parser = bayes_parser(parser)
args = parser.parse_args()
bayes_submit(args)
args = format_mini_experiment(args)

# stub = 'mvmm_fitting_{}_{}'.format(args.n_comp_v0, args.n_comp_v1)
stub = 'mvmm_fitting'
for nc in args.n_view_comps:
    stub += '_{}'.format(nc)

results_dir = args.results_dir
# results_dir = make_and_get_dir(args.top_dir, args.sim_name)
log_dir = make_and_get_dir(results_dir, 'log')
fitting_dir = make_and_get_dir(results_dir, 'model_fitting')
model_sel_dir = make_and_get_dir(results_dir, 'model_selection')
opt_diag_dir = make_and_get_dir(results_dir, 'opt_diagnostics')

res_writer = ResultsWriter(os.path.join(log_dir, '{}.txt'.format(stub)),
                           delete_if_exists=True)

res_writer.write(args)

run_start_time = time()

to_exclude = []
# if args.exclude_sp_mvmm:
#     to_exclude.append('sp_mvmm')
if args.exclude_bd_mvmm:
Ejemplo n.º 10
0
parser.add_argument('--event_col',
                    help='Column name of the survival event data.')

args = parser.parse_args()

results_dir = args.results_dir
fpaths = args.fpaths
vars2compare_fpath = args.vars2compare_fpath
super_fpaths = args.super_fpaths
survival_fpath = args.survival_fpath

print(args)

# setup directories
log_dir = make_and_get_dir(results_dir, 'log')
fitting_dir = make_and_get_dir(results_dir, 'model_fitting')
model_sel_dir = make_and_get_dir(results_dir, 'model_selection')
opt_diag_dir = make_and_get_dir(results_dir, 'opt_diagnostics')
clust_interpret_dir = make_and_get_dir(results_dir, 'interpret')

# load models and data
n_views = len(fpaths)
models = load(join(fitting_dir, 'selected_models'))
view_data, dataset_names, sample_names, view_feat_names = \
    load_data(*fpaths)

view_data = [
    pd.DataFrame(view_data[v], index=sample_names, columns=view_feat_names[v])
    for v in range(n_views)
]