def produce_bowtie2_alignments_old(reads, sam_fn, index_prefix, genome_dir, score_min, ): bowtie2_options = {'local': True, #'report_all': True, 'report_up_to': 10, 'seed_mismatches': 1, 'seed_interval_function': 'C,1,0', 'seed_length': 10, #'threads': 12, } mapping_tools.map_bowtie2(index_prefix, None, None, sam_fn, unpaired_Reads=reads, custom_binary=True, score_min=score_min, **bowtie2_options) sam_file = pysam.Samfile(sam_fn) region_fetcher = genomes.build_region_fetcher(genome_dir, load_references=True) mapping_groups = utilities.group_by(sam_file, lambda m: m.qname) for qname, group in mapping_groups: alignments = [mapping_to_alignment(mapping, sam_file, region_fetcher) for mapping in group if not mapping.is_unmapped] yield qname, alignments
def produce_bowtie2_alignments( reads, index_prefix, genome_dir, score_min, ): bowtie2_options = { 'local': True, 'report_up_to': 10, 'seed_mismatches': 1, 'seed_interval_function': 'C,1,0', 'seed_length': 10, } sam_file, mappings = mapping_tools.map_bowtie2(index_prefix, reads=reads, custom_binary=True, score_min=score_min, yield_mappings=True, **bowtie2_options) base_lookup = genomes.build_base_lookup(genome_dir, sam_file) mapping_groups = utilities.group_by(mappings, lambda m: m.qname) for qname, group in mapping_groups: group = sorted(group, key=lambda m: (m.tid, m.pos)) alignments = [ mapping_to_alignment(mapping, sam_file, base_lookup) for mapping in group if not mapping.is_unmapped ] yield qname, alignments
def produce_bowtie2_alignments(reads, index_prefix, genome_dir, score_min, ): bowtie2_options = {'local': True, #'report_all': True, 'report_up_to': 10, 'seed_mismatches': 1, 'seed_interval_function': 'C,1,0', 'seed_length': 10, } sam_file, mappings = mapping_tools.map_bowtie2(index_prefix, reads=reads, custom_binary=True, score_min=score_min, yield_mappings=True, **bowtie2_options) region_fetcher = genomes.build_region_fetcher(genome_dir, load_references=True) mapping_groups = utilities.group_by(mappings, lambda m: m.qname) for qname, group in mapping_groups: group = sorted(group, key=lambda m: (m.tid, m.pos)) alignments = [mapping_to_alignment(mapping, sam_file, region_fetcher) for mapping in group if not mapping.is_unmapped] yield qname, alignments
def pre_filter_paired(contaminant_index, read_pairs, bam_fn, error_fn): unmapped_pairs = mapping_tools.map_bowtie2( contaminant_index, output_file_name=bam_fn, bam_output=True, read_pairs=read_pairs, max_insert_size=1500, suppress_unaligned_SAM=True, report_all=True, error_file_name=error_fn, yield_unaligned=True, ) return unmapped_pairs
def pre_filter(contaminant_index, reads, bam_fn, error_fn='/dev/null'): ''' Maps reads to contaminant_index. Return an iterator over reads that don't map. ''' unmapped_reads = mapping_tools.map_bowtie2( contaminant_index, output_file_name=bam_fn, reads=reads, bam_output=True, report_all=True, omit_secondary_seq=True, suppress_unaligned_SAM=True, error_file_name=error_fn, yield_unaligned=True, ) return unmapped_reads
def pre_filter(contaminant_index, reads, bam_fn, error_fn="/dev/null"): """ Maps reads to contaminant_index. Return an iterator over reads that don't map. """ unmapped_reads = mapping_tools.map_bowtie2( contaminant_index, output_file_name=bam_fn, reads=reads, bam_output=True, report_all=True, omit_secondary_seq=True, suppress_unaligned_SAM=True, error_file_name=error_fn, yield_unaligned=True, ) return unmapped_reads
def pre_filter_paired( contaminant_index, read_pairs, bam_fn, error_fn, ): unmapped_pairs = mapping_tools.map_bowtie2( contaminant_index, output_file_name=bam_fn, bam_output=True, read_pairs=read_pairs, max_insert_size=1500, suppress_unaligned_SAM=True, report_all=True, error_file_name=error_fn, yield_unaligned=True, ) return unmapped_pairs
#fastq_fn = '/home/jah/projects/arlen/experiments/lareau_elife/Cycloheximide_replicate_1/data/SRR1363415.fastq' #fastq_fn = '/home/jah/projects/arlen/experiments/arribere_gr/S288C_TLSeq2/data/SRR825166.fastq' #fastq_fn = '/home/jah/projects/arlen/experiments/baudin-baillieu_cell_reports/traductome_PSI-_rep_2/data/SRR594901.fastq' fastq_fn = '/home/jah/projects/arlen/experiments/baudin-baillieu_cell_reports/Ribo-seq_[PSI+]_rep1/data/SRR1190356.fastq' index_prefix = '/home/jah/projects/arlen/data/organisms/saccharomyces_cerevisiae/EF4/genome/genome' root, ext = os.path.splitext(fastq_fn) small_fastq_fn = '{0}_small.fastq'.format(root) small_sam_fn = '{0}_small.sam'.format(root) head_command = ['head', '-n', '100000', fastq_fn] subprocess.check_call(head_command, stdout=open(small_fastq_fn, 'w')) mapping_tools.map_bowtie2(small_fastq_fn, index_prefix, small_sam_fn, seed_length=12, local=True) positions = [Counter() for i in range(40)] qlens = Counter() for read in pysam.Samfile(small_sam_fn): if read.is_unmapped: continue qlens[read.qlen] += 1 trimmed = read.seq[read.qend:] for p, b in zip(positions, trimmed): p[b] += 1
#fastq_fn = '/home/jah/projects/arlen/experiments/belgium_3_5_14/wt/data/wt_cDNA.140219.HiSeq2500.FCB.lane1.R1.fastq' #fastq_fn = '/home/jah/projects/arlen/experiments/dunn_elife/YCF182_110222_HiSeq.fq' #fastq_fn = '/home/jah/projects/arlen/experiments/lareau_elife/Cycloheximide_replicate_1/data/SRR1363415.fastq' #fastq_fn = '/home/jah/projects/arlen/experiments/arribere_gr/S288C_TLSeq2/data/SRR825166.fastq' #fastq_fn = '/home/jah/projects/arlen/experiments/baudin-baillieu_cell_reports/traductome_PSI-_rep_2/data/SRR594901.fastq' fastq_fn = '/home/jah/projects/arlen/experiments/baudin-baillieu_cell_reports/Ribo-seq_[PSI+]_rep1/data/SRR1190356.fastq' index_prefix = '/home/jah/projects/arlen/data/organisms/saccharomyces_cerevisiae/EF4/genome/genome' root, ext = os.path.splitext(fastq_fn) small_fastq_fn = '{0}_small.fastq'.format(root) small_sam_fn = '{0}_small.sam'.format(root) head_command = ['head', '-n', '100000', fastq_fn] subprocess.check_call(head_command, stdout=open(small_fastq_fn, 'w')) mapping_tools.map_bowtie2(small_fastq_fn, index_prefix, small_sam_fn, seed_length=12, local=True) positions = [Counter() for i in range(40)] qlens = Counter() for read in pysam.Samfile(small_sam_fn): if read.is_unmapped: continue qlens[read.qlen] += 1 trimmed = read.seq[read.qend:] for p, b in zip(positions, trimmed): p[b] += 1 for p in positions: if not p: