def produce_bowtie2_alignments_old(reads,
                                   sam_fn,
                                   index_prefix,
                                   genome_dir,
                                   score_min,
                                  ):

    bowtie2_options = {'local': True,
                       #'report_all': True,
                       'report_up_to': 10,
                       'seed_mismatches': 1,
                       'seed_interval_function': 'C,1,0',
                       'seed_length': 10,
                       #'threads': 12,
                      }

   
    mapping_tools.map_bowtie2(index_prefix,
                              None,
                              None,
                              sam_fn,
                              unpaired_Reads=reads,
                              custom_binary=True,
                              score_min=score_min,
                              **bowtie2_options)
    
    sam_file = pysam.Samfile(sam_fn)
    region_fetcher = genomes.build_region_fetcher(genome_dir, load_references=True)

    mapping_groups = utilities.group_by(sam_file, lambda m: m.qname)
    
    for qname, group in mapping_groups:
        alignments = [mapping_to_alignment(mapping, sam_file, region_fetcher)
                      for mapping in group if not mapping.is_unmapped]
        yield qname, alignments
def produce_bowtie2_alignments(
    reads,
    index_prefix,
    genome_dir,
    score_min,
):

    bowtie2_options = {
        'local': True,
        'report_up_to': 10,
        'seed_mismatches': 1,
        'seed_interval_function': 'C,1,0',
        'seed_length': 10,
    }

    sam_file, mappings = mapping_tools.map_bowtie2(index_prefix,
                                                   reads=reads,
                                                   custom_binary=True,
                                                   score_min=score_min,
                                                   yield_mappings=True,
                                                   **bowtie2_options)

    base_lookup = genomes.build_base_lookup(genome_dir, sam_file)

    mapping_groups = utilities.group_by(mappings, lambda m: m.qname)

    for qname, group in mapping_groups:
        group = sorted(group, key=lambda m: (m.tid, m.pos))
        alignments = [
            mapping_to_alignment(mapping, sam_file, base_lookup)
            for mapping in group if not mapping.is_unmapped
        ]
        yield qname, alignments
def produce_bowtie2_alignments(reads,
                               index_prefix,
                               genome_dir,
                               score_min,
                              ):

    bowtie2_options = {'local': True,
                       #'report_all': True,
                       'report_up_to': 10,
                       'seed_mismatches': 1,
                       'seed_interval_function': 'C,1,0',
                       'seed_length': 10,
                      }

    sam_file, mappings = mapping_tools.map_bowtie2(index_prefix,
                                                   reads=reads,
                                                   custom_binary=True,
                                                   score_min=score_min,
                                                   yield_mappings=True,
                                                   **bowtie2_options)

    region_fetcher = genomes.build_region_fetcher(genome_dir, load_references=True)

    mapping_groups = utilities.group_by(mappings, lambda m: m.qname)
    
    for qname, group in mapping_groups:
        group = sorted(group, key=lambda m: (m.tid, m.pos))
        alignments = [mapping_to_alignment(mapping, sam_file, region_fetcher)
                      for mapping in group if not mapping.is_unmapped]
        yield qname, alignments
예제 #4
0
def pre_filter_paired(contaminant_index, read_pairs, bam_fn, error_fn):
    unmapped_pairs = mapping_tools.map_bowtie2(
        contaminant_index,
        output_file_name=bam_fn,
        bam_output=True,
        read_pairs=read_pairs,
        max_insert_size=1500,
        suppress_unaligned_SAM=True,
        report_all=True,
        error_file_name=error_fn,
        yield_unaligned=True,
    )
    return unmapped_pairs
예제 #5
0
def pre_filter(contaminant_index, reads, bam_fn, error_fn='/dev/null'):
    ''' Maps reads to contaminant_index. Return an iterator over reads that
        don't map. 
    '''
    unmapped_reads = mapping_tools.map_bowtie2(
        contaminant_index,
        output_file_name=bam_fn,
        reads=reads,
        bam_output=True,
        report_all=True,
        omit_secondary_seq=True,
        suppress_unaligned_SAM=True,
        error_file_name=error_fn,
        yield_unaligned=True,
    )
    return unmapped_reads
예제 #6
0
def pre_filter(contaminant_index, reads, bam_fn, error_fn="/dev/null"):
    """ Maps reads to contaminant_index. Return an iterator over reads that
        don't map. 
    """
    unmapped_reads = mapping_tools.map_bowtie2(
        contaminant_index,
        output_file_name=bam_fn,
        reads=reads,
        bam_output=True,
        report_all=True,
        omit_secondary_seq=True,
        suppress_unaligned_SAM=True,
        error_file_name=error_fn,
        yield_unaligned=True,
    )
    return unmapped_reads
예제 #7
0
def pre_filter_paired(
    contaminant_index,
    read_pairs,
    bam_fn,
    error_fn,
):
    unmapped_pairs = mapping_tools.map_bowtie2(
        contaminant_index,
        output_file_name=bam_fn,
        bam_output=True,
        read_pairs=read_pairs,
        max_insert_size=1500,
        suppress_unaligned_SAM=True,
        report_all=True,
        error_file_name=error_fn,
        yield_unaligned=True,
    )
    return unmapped_pairs
예제 #8
0
#fastq_fn = '/home/jah/projects/arlen/experiments/lareau_elife/Cycloheximide_replicate_1/data/SRR1363415.fastq'
#fastq_fn = '/home/jah/projects/arlen/experiments/arribere_gr/S288C_TLSeq2/data/SRR825166.fastq'
#fastq_fn = '/home/jah/projects/arlen/experiments/baudin-baillieu_cell_reports/traductome_PSI-_rep_2/data/SRR594901.fastq'
fastq_fn = '/home/jah/projects/arlen/experiments/baudin-baillieu_cell_reports/Ribo-seq_[PSI+]_rep1/data/SRR1190356.fastq'
index_prefix = '/home/jah/projects/arlen/data/organisms/saccharomyces_cerevisiae/EF4/genome/genome'

root, ext = os.path.splitext(fastq_fn)
small_fastq_fn = '{0}_small.fastq'.format(root)
small_sam_fn = '{0}_small.sam'.format(root)

head_command = ['head', '-n', '100000', fastq_fn]
subprocess.check_call(head_command, stdout=open(small_fastq_fn, 'w'))

mapping_tools.map_bowtie2(small_fastq_fn,
                          index_prefix,
                          small_sam_fn,
                          seed_length=12,
                          local=True)

positions = [Counter() for i in range(40)]

qlens = Counter()

for read in pysam.Samfile(small_sam_fn):
    if read.is_unmapped:
        continue
    qlens[read.qlen] += 1
    trimmed = read.seq[read.qend:]
    for p, b in zip(positions, trimmed):
        p[b] += 1
예제 #9
0
#fastq_fn = '/home/jah/projects/arlen/experiments/belgium_3_5_14/wt/data/wt_cDNA.140219.HiSeq2500.FCB.lane1.R1.fastq'
#fastq_fn = '/home/jah/projects/arlen/experiments/dunn_elife/YCF182_110222_HiSeq.fq'
#fastq_fn = '/home/jah/projects/arlen/experiments/lareau_elife/Cycloheximide_replicate_1/data/SRR1363415.fastq'
#fastq_fn = '/home/jah/projects/arlen/experiments/arribere_gr/S288C_TLSeq2/data/SRR825166.fastq'
#fastq_fn = '/home/jah/projects/arlen/experiments/baudin-baillieu_cell_reports/traductome_PSI-_rep_2/data/SRR594901.fastq'
fastq_fn = '/home/jah/projects/arlen/experiments/baudin-baillieu_cell_reports/Ribo-seq_[PSI+]_rep1/data/SRR1190356.fastq'
index_prefix = '/home/jah/projects/arlen/data/organisms/saccharomyces_cerevisiae/EF4/genome/genome'

root, ext = os.path.splitext(fastq_fn)
small_fastq_fn = '{0}_small.fastq'.format(root)
small_sam_fn = '{0}_small.sam'.format(root)

head_command = ['head', '-n', '100000', fastq_fn]
subprocess.check_call(head_command, stdout=open(small_fastq_fn, 'w'))

mapping_tools.map_bowtie2(small_fastq_fn, index_prefix, small_sam_fn, seed_length=12, local=True)

positions = [Counter() for i in range(40)]

qlens = Counter()

for read in pysam.Samfile(small_sam_fn): 
    if read.is_unmapped:
        continue
    qlens[read.qlen] += 1
    trimmed = read.seq[read.qend:]
    for p, b in zip(positions, trimmed):
        p[b] += 1

for p in positions:
    if not p: