def split(args): assert (args.min_len >= 2 * args.bin_size) # We only need the bam to get the chromosome names and lengths. in_bam = tk_bam.create_bam_infile(args.possorted_bam) if args.targets is None: target_regions = None else: with open(args.targets, 'r') as f: target_regions = tk_io.get_target_regions(f) primary_contigs = tk_reference.load_primary_contigs(args.reference_path) all_loci = [] for (chrom_name, chrom_size) in zip(in_bam.references, in_bam.lengths): if not chrom_name in primary_contigs: continue # The chunks will overlap by min_len. This will ensure that we don't # miss any regions of low depth that span chunk boundaries. new_loci = generate_chrom_loci(target_regions, chrom_name, chrom_size, PARALLEL_LOCUS_SIZE / 2, overlap=args.min_len) all_loci.extend(new_loci) in_bam.close() # Group loci locus_sets = pack_loci(all_loci) chunk_defs = [{'loci': loci, '__mem_gb': 16} for loci in locus_sets] return {'chunks': chunk_defs}
def split(args): in_bam = tk_bam.create_bam_infile(args.possorted_bam) if not args.reference_path is None and os.path.exists( tk_reference.get_primary_contigs(args.reference_path)): with open(tk_reference.get_primary_contigs(args.reference_path), 'r') as f: primary_contigs = set([line.strip() for line in f.readlines()]) else: # Default is to include all contigs primary_contigs = set(in_bam.references) all_loci = [] for (chrom_name, chrom_size) in zip(in_bam.references, in_bam.lengths): if chrom_name in primary_contigs and not ( args.sex in ['f', 'female'] and chrom_name in ['Y', 'chrY']): all_loci.extend( generate_chrom_loci(None, chrom_name, chrom_size, PARALLEL_LOCUS_SIZE)) in_bam.close() locus_sets = pack_loci(all_loci) chunk_defs = [{'loci': loci, '__mem_gb': 8} for loci in locus_sets] return {'chunks': chunk_defs}
def split(args): ref = contig_manager.contig_manager(args.reference_path) contig_lengths = ref.get_contig_lengths() target_regions = None all_loci = [] for (chrom_name, chrom_size) in contig_lengths.iteritems(): all_loci.extend( generate_chrom_loci(target_regions, chrom_name, chrom_size, 100000000)) locus_sets = pack_loci(all_loci) chunk_defs = [{'loci': loci} for loci in locus_sets] return {'chunks': chunk_defs}
def split(args): ref = contig_manager.contig_manager(args.reference_path) contig_lengths = ref.get_contig_lengths() target_regions = None all_loci = [] for (chrom_name, chrom_size) in contig_lengths.iteritems(): all_loci.extend( generate_chrom_loci(target_regions, chrom_name, chrom_size, tenkit.constants.PARALLEL_LOCUS_SIZE)) locus_sets = pack_loci(all_loci) chunk_defs = [{'loci': loci, '__mem_gb': 12} for loci in locus_sets] return {'chunks': chunk_defs, 'join': {'__mem_gb': 12}}
def split(args): in_bam = tk_bam.create_bam_infile(args.possorted_bam) # Load pull-down targets if args.targets is None: target_regions = None else: with open(args.targets, 'r') as f: target_regions = tk_io.get_target_regions(f) all_loci = [] for (chrom_name, chrom_size) in zip(in_bam.references, in_bam.lengths): all_loci.extend(generate_chrom_loci(target_regions, chrom_name, chrom_size, PARALLEL_LOCUS_SIZE)) in_bam.close() locus_sets = pack_loci(all_loci) chunk_defs = [{'loci': loci, '__mem_gb':16} for loci in locus_sets] return {'chunks': chunk_defs}