Beispiel #1
0
def _build_mutations_file(in_file, out_file, prior):
    config = {}

    reader = csv.DictReader(open(in_file), delimiter='\t')

    config['mutations'] = []

    for row in reader:
        mutation_id = row['mutation_id']

        ref_counts = int(row['ref_counts'])

        var_counts = int(row['var_counts'])

        normal_cn = int(row['normal_cn'])

        minor_cn = int(row['minor_cn'])

        major_cn = int(row['major_cn'])

        mutation = get_mutation(mutation_id, ref_counts, var_counts, normal_cn,
                                minor_cn, major_cn, prior)

        config['mutations'].append(mutation.to_dict())

    make_parent_directory(out_file)

    fh = open(out_file, 'w')

    yaml.dump(config, fh, Dumper=Dumper)

    fh.close()
Beispiel #2
0
def run_analysis_pipeline(args):
    config_file = _setup_analysis(
        density=args.density,
        in_files=args.in_files,
        init_method=args.init_method,
        num_iters=args.num_iters,
        samples=args.samples,
        prior=args.prior,
        tumour_contents=args.tumour_contents,
        working_dir=args.working_dir,
        config_extras_file=args.config_extras_file,
    )

    _run_analysis(config_file, args.seed)

    tables_dir = os.path.join(args.working_dir, 'tables')

    make_directory(tables_dir)

    for table_type in ['cluster', 'loci']:
        out_file = os.path.join(tables_dir, '{0}.tsv'.format(table_type))

        _build_table(config_file=config_file,
                     out_file=out_file,
                     burnin=args.burnin,
                     max_clusters=args.max_clusters,
                     mesh_size=args.mesh_size,
                     table_type=table_type,
                     thin=args.thin)

    plots_dir = os.path.join(args.working_dir, 'plots')

    plots = [('cluster', 'density'), ('cluster', 'parallel_coordinates'),
             ('cluster', 'scatter'), ('loci', 'density'),
             ('loci', 'parallel_coordinates'), ('loci', 'scatter'),
             ('loci', 'similarity_matrix'),
             ('loci', 'vaf_parallel_coordinates'), ('loci', 'vaf_scatter')]

    for category, plot_type in plots:

        plot_file = os.path.join(
            plots_dir, category, '{0}.{1}'.format(plot_type,
                                                  args.plot_file_format))

        make_parent_directory(plot_file)

        if category == 'cluster':

            _cluster_plot(config_file, plot_file, args.burnin,
                          args.max_clusters, args.mesh_size,
                          args.min_cluster_size, plot_type, args.samples,
                          args.thin)

        elif category == 'loci':

            _loci_plot(config_file,
                       plot_file,
                       plot_type,
                       burnin=args.burnin,
                       min_cluster_size=args.min_cluster_size,
                       samples=args.samples,
                       thin=args.thin)