Exemple #1
0
def split(args):
    assert (args.min_len >= 2 * args.bin_size)

    # We only need the bam to get the chromosome names and lengths.
    in_bam = tk_bam.create_bam_infile(args.possorted_bam)

    if args.targets is None:
        target_regions = None
    else:
        with open(args.targets, 'r') as f:
            target_regions = tk_io.get_target_regions(f)

    primary_contigs = tk_reference.load_primary_contigs(args.reference_path)

    all_loci = []
    for (chrom_name, chrom_size) in zip(in_bam.references, in_bam.lengths):
        if not chrom_name in primary_contigs:
            continue
        # The chunks will overlap by min_len. This will ensure that we don't
        # miss any regions of low depth that span chunk boundaries.
        new_loci = generate_chrom_loci(target_regions,
                                       chrom_name,
                                       chrom_size,
                                       PARALLEL_LOCUS_SIZE / 2,
                                       overlap=args.min_len)
        all_loci.extend(new_loci)
    in_bam.close()

    # Group loci
    locus_sets = pack_loci(all_loci)

    chunk_defs = [{'loci': loci, '__mem_gb': 16} for loci in locus_sets]
    return {'chunks': chunk_defs}
Exemple #2
0
def split(args):
    in_bam = tk_bam.create_bam_infile(args.possorted_bam)

    if not args.reference_path is None and os.path.exists(
            tk_reference.get_primary_contigs(args.reference_path)):
        with open(tk_reference.get_primary_contigs(args.reference_path),
                  'r') as f:
            primary_contigs = set([line.strip() for line in f.readlines()])
    else:
        # Default is to include all contigs
        primary_contigs = set(in_bam.references)

    all_loci = []
    for (chrom_name, chrom_size) in zip(in_bam.references, in_bam.lengths):
        if chrom_name in primary_contigs and not (
                args.sex in ['f', 'female'] and chrom_name in ['Y', 'chrY']):
            all_loci.extend(
                generate_chrom_loci(None, chrom_name, chrom_size,
                                    PARALLEL_LOCUS_SIZE))
    in_bam.close()

    locus_sets = pack_loci(all_loci)

    chunk_defs = [{'loci': loci, '__mem_gb': 8} for loci in locus_sets]
    return {'chunks': chunk_defs}
Exemple #3
0
def split(args):
    ref = contig_manager.contig_manager(args.reference_path)
    contig_lengths = ref.get_contig_lengths()

    target_regions = None
    all_loci = []
    for (chrom_name, chrom_size) in contig_lengths.iteritems():
        all_loci.extend(
            generate_chrom_loci(target_regions, chrom_name, chrom_size,
                                100000000))

    locus_sets = pack_loci(all_loci)

    chunk_defs = [{'loci': loci} for loci in locus_sets]
    return {'chunks': chunk_defs}
Exemple #4
0
def split(args):
    ref = contig_manager.contig_manager(args.reference_path)
    contig_lengths = ref.get_contig_lengths()

    target_regions = None
    all_loci = []
    for (chrom_name, chrom_size) in contig_lengths.iteritems():
        all_loci.extend(
            generate_chrom_loci(target_regions, chrom_name, chrom_size,
                                tenkit.constants.PARALLEL_LOCUS_SIZE))

    locus_sets = pack_loci(all_loci)

    chunk_defs = [{'loci': loci, '__mem_gb': 12} for loci in locus_sets]
    return {'chunks': chunk_defs, 'join': {'__mem_gb': 12}}
Exemple #5
0
def split(args):
    in_bam = tk_bam.create_bam_infile(args.possorted_bam)

    # Load pull-down targets
    if args.targets is None:
        target_regions = None
    else:
        with open(args.targets, 'r') as f:
            target_regions = tk_io.get_target_regions(f)

    all_loci = []
    for (chrom_name, chrom_size) in zip(in_bam.references, in_bam.lengths):
        all_loci.extend(generate_chrom_loci(target_regions, chrom_name, chrom_size, PARALLEL_LOCUS_SIZE))
    in_bam.close()

    locus_sets = pack_loci(all_loci)

    chunk_defs = [{'loci': loci, '__mem_gb':16} for loci in locus_sets]
    return {'chunks': chunk_defs}