def _build_mutations_file(in_file, out_file, prior): config = {} reader = csv.DictReader(open(in_file), delimiter='\t') config['mutations'] = [] for row in reader: mutation_id = row['mutation_id'] ref_counts = int(row['ref_counts']) var_counts = int(row['var_counts']) normal_cn = int(row['normal_cn']) minor_cn = int(row['minor_cn']) major_cn = int(row['major_cn']) mutation = get_mutation(mutation_id, ref_counts, var_counts, normal_cn, minor_cn, major_cn, prior) config['mutations'].append(mutation.to_dict()) make_parent_directory(out_file) fh = open(out_file, 'w') yaml.dump(config, fh, Dumper=Dumper) fh.close()
def run_analysis_pipeline(args): config_file = _setup_analysis( density=args.density, in_files=args.in_files, init_method=args.init_method, num_iters=args.num_iters, samples=args.samples, prior=args.prior, tumour_contents=args.tumour_contents, working_dir=args.working_dir, config_extras_file=args.config_extras_file, ) _run_analysis(config_file, args.seed) tables_dir = os.path.join(args.working_dir, 'tables') make_directory(tables_dir) for table_type in ['cluster', 'loci']: out_file = os.path.join(tables_dir, '{0}.tsv'.format(table_type)) _build_table(config_file=config_file, out_file=out_file, burnin=args.burnin, max_clusters=args.max_clusters, mesh_size=args.mesh_size, table_type=table_type, thin=args.thin) plots_dir = os.path.join(args.working_dir, 'plots') plots = [('cluster', 'density'), ('cluster', 'parallel_coordinates'), ('cluster', 'scatter'), ('loci', 'density'), ('loci', 'parallel_coordinates'), ('loci', 'scatter'), ('loci', 'similarity_matrix'), ('loci', 'vaf_parallel_coordinates'), ('loci', 'vaf_scatter')] for category, plot_type in plots: plot_file = os.path.join( plots_dir, category, '{0}.{1}'.format(plot_type, args.plot_file_format)) make_parent_directory(plot_file) if category == 'cluster': _cluster_plot(config_file, plot_file, args.burnin, args.max_clusters, args.mesh_size, args.min_cluster_size, plot_type, args.samples, args.thin) elif category == 'loci': _loci_plot(config_file, plot_file, plot_type, burnin=args.burnin, min_cluster_size=args.min_cluster_size, samples=args.samples, thin=args.thin)