Esempio n. 1
0
 def get_kmer_reads(self, kmer_values, read_items):
     """
     Args:
         kmer_values: Tuple containing the alignment information of a kmer sequence
                      in a read sequence.
                      1. String kmer sequence.
                      2. Integer kmer alignment position in sequence.
                      3. Boolean whether kmer align position is below midpoint of sequence.
                      4. Integer of absolute difference between align position and midpoint.
                      5. String of the order for tuples in a list.
         read_items: List of tuples for sequence reads:
                     1. String read sequence.
                     2. List of fq_read objects with read sequence.
     Return:
         reads: List of tuples containing:
                1. read object,
                2. start position of kmer match in read seq
                3. Boolean that a match was found.
                4. Length of the read sequence.
                5. Number of reads with this sequence.
     """
     kmer, kmerPos, lessThanHalf, dist_half, order = kmer_values
     read_order = 'for'
     if order == 'mid':
         if lessThanHalf == 0:
             read_order = 'rev'
     elif order == 'for':
         read_order = 'rev'
     reads = assemblyUtils.find_reads(kmer, read_items, self.buffer, read_order)
     return reads
Esempio n. 2
0
    def get_mer_reads(self, kmer_values, read_items):
        mer, pos, less_than_half, dist_half, order = kmer_values
        read_order = 'for'
        if order == 'mid':
            if less_than_half == 0:
                read_order = 'rev'
        elif order == 'for':
            read_order = 'rev'

        reads = assembly_utils.find_reads(mer, read_items, self.buffer, read_order)
        return reads
Esempio n. 3
0
    def get_mer_reads(self, kmer_values, read_items):
        mer, pos, less_than_half, dist_half, order = kmer_values
        read_order = 'for'
        if order == 'mid':
            if less_than_half == 0:
                read_order = 'rev'
        elif order == 'for':
            read_order = 'rev'

        reads = assembly_utils.find_reads(mer, read_items, self.buffer,
                                          read_order)
        return reads
Esempio n. 4
0
def setup_contigs(kmer_seq, fq_recs, kmer_len, kmer_tracker, contig_buffer):
    '''Create a contig instance starting with a seed kmer and associated reads.

    First find the reads containing the kmerSeq value, iterate through reads and
    either create a new contig or add to existing contig.

    Args:
        kmer_seq (str):               Kmer sequence.
        fq_recs (dict):               Read sequences stored as keys with associated list of fq_read objects.
        kmer_len (int):               Number of base pairs in kmer sequence.
        kmer_tracker (KmerTracker):   Object containing all the kmer values.
        contig_buffer (ContigBuffer): Object to track the buffered contig objects.
    Returns:
        None
    '''

    contig_unit = None

    # Find all reads with kmer sequence passed in.
    # kmer_reads contains a list of tuples.
    #   1. fq_read object defined in breakmer.utils.py
    #   2. Starting position of the kmer match in the read sequence
    #   3. Boolean that a match was found.
    #   4. Length of the read sequence.
    #   5. Number of reads with this sequence.
    kmer_reads = assembly_utils.find_reads(kmer_seq, fq_recs.items(), set())
    contig_buffer.add_used_kmer(kmer_seq)
    kmer_unit = assembly_utils.KmerUnit(kmer_seq,
                                        kmer_tracker.get_count(kmer_seq),
                                        kmer_tracker.kmer_seqs, kmer_len)
    for read_values in kmer_reads:
        read, kmer_pos, kmer_found, read_len, nreads = read_values
        read_align_values = {
            'read': read,
            'align_pos': kmer_pos,
            'nreads': nreads
        }
        contig_buffer.add_used_read(read.id)
        if not contig_unit:
            contig_unit = contig.Contig(kmer_unit, read_align_values)
            contig_buffer.add_contig(read, contig_unit)
        else:
            contig_unit.check_read(kmer_seq, kmer_tracker.get_count(kmer_seq),
                                   read, kmer_pos, nreads,
                                   kmer_tracker.kmer_seqs, 'setup')
    if contig_unit:
        contig_unit.finalize(fq_recs, kmer_tracker, contig_buffer, 'setup')
Esempio n. 5
0
def setup_contigs(kmerSeq, fqRecs, kmerLen, kmerTracker, contigBuffer):
    """Create a contig instance starting with a seed kmer and associated reads.
    First find the reads containing the kmerSeq value, iterate through reads and
    either create a new contig or add to existing contig.
    Args:
        kmerSeq:        String of kmer sequence.
        fqRecs:         Dictionary with sequence values as keys and a list of fq_read objects.
        kmerLen:        Integer of kmer size.
        kmerTracker:    KmerTracker object that contains all the kmer values.
        contigBuffer:   ContigBuffer object to track the buffered contig objects.
    Return: None
    """
    logger = logging.getLogger('breakmer.assembly.assembler')
    contig = None
    # Find all reads with kmer sequence passed in.
    # kmerReads contains a list of tuples.
    #   1. fq_read object defined in breakmer.utils.py
    #   2. Starting position of the kmer match in the read sequence
    #   3. Boolean that a match was found.
    #   4. Length of the read sequence.
    #   5. Number of reads with this sequence.
    kmerReads = assemblyUtils.find_reads(kmerSeq, fqRecs.items(), set())
    contigBuffer.add_used_mer(kmerSeq)
    kmerObj = assemblyUtils.Kmer(kmerSeq, kmerTracker.get_count(kmerSeq),
                                 kmerTracker.kmerSeqs, kmerLen)
    for readVals in kmerReads:
        read, kmerPos, matchFound, seqLen, nReadsWithSeq = readVals
        readAlignValues = {
            'read': read,
            'align_pos': kmerPos,
            'nreads': nReadsWithSeq
        }
        contigBuffer.add_used_read(read.id)
        # If no contig, build one.
        if not contig:
            contig = contig_assembler.Contig(kmerObj, readAlignValues)
            contigBuffer.add_contig(read, contig)
        # Check if read should be added to the existing contig.
        else:
            contig.check_read(kmerObj, readAlignValues, 'setup')
    if contig:
        contig.finalize(fqRecs, kmerTracker, contigBuffer, 'setup')
Esempio n. 6
0
def setup_contigs(kmer_seq, fq_recs, kmer_len, kmer_tracker, contig_buffer):

    '''Create a contig instance starting with a seed kmer and associated reads.

    First find the reads containing the kmerSeq value, iterate through reads and
    either create a new contig or add to existing contig.

    Args:
        kmer_seq (str):               Kmer sequence.
        fq_recs (dict):               Read sequences stored as keys with associated list of fq_read objects.
        kmer_len (int):               Number of base pairs in kmer sequence.
        kmer_tracker (KmerTracker):   Object containing all the kmer values.
        contig_buffer (ContigBuffer): Object to track the buffered contig objects.
    Returns:
        None
    '''

    contig_unit = None

    # Find all reads with kmer sequence passed in.
    # kmer_reads contains a list of tuples.
    #   1. fq_read object defined in breakmer.utils.py
    #   2. Starting position of the kmer match in the read sequence
    #   3. Boolean that a match was found.
    #   4. Length of the read sequence.
    #   5. Number of reads with this sequence.
    kmer_reads = assembly_utils.find_reads(kmer_seq, fq_recs.items(), set())
    contig_buffer.add_used_kmer(kmer_seq)
    kmer_unit = assembly_utils.KmerUnit(kmer_seq, kmer_tracker.get_count(kmer_seq), kmer_tracker.kmer_seqs, kmer_len)
    for read_values in kmer_reads:
        read, kmer_pos, kmer_found, read_len, nreads = read_values
        read_align_values = {'read': read,
                             'align_pos': kmer_pos,
                             'nreads': nreads}
        contig_buffer.add_used_read(read.id)
        if not contig_unit: 
            contig_unit = contig.Contig(kmer_unit, read_align_values)
            contig_buffer.add_contig(read, contig_unit)
        else:
            contig_unit.check_read(kmer_seq, kmer_tracker.get_count(kmer_seq), read, kmer_pos, nreads, kmer_tracker.kmer_seqs, 'setup')
    if contig_unit: 
        contig_unit.finalize(fq_recs, kmer_tracker, contig_buffer, 'setup')
Esempio n. 7
0
def setup_contigs(kmerSeq, fqRecs, kmerLen, kmerTracker, contigBuffer):
    """Create a contig instance starting with a seed kmer and associated reads.
    First find the reads containing the kmerSeq value, iterate through reads and
    either create a new contig or add to existing contig.
    Args:
        kmerSeq:        String of kmer sequence.
        fqRecs:         Dictionary with sequence values as keys and a list of fq_read objects.
        kmerLen:        Integer of kmer size.
        kmerTracker:    KmerTracker object that contains all the kmer values.
        contigBuffer:   ContigBuffer object to track the buffered contig objects.
    Return: None
    """
    logger = logging.getLogger('breakmer.assembly.assembler')
    contig = None
    # Find all reads with kmer sequence passed in.
    # kmerReads contains a list of tuples.
    #   1. fq_read object defined in breakmer.utils.py
    #   2. Starting position of the kmer match in the read sequence
    #   3. Boolean that a match was found.
    #   4. Length of the read sequence.
    #   5. Number of reads with this sequence.
    kmerReads = assemblyUtils.find_reads(kmerSeq, fqRecs.items(), set())
    contigBuffer.add_used_mer(kmerSeq)
    kmerObj = assemblyUtils.Kmer(kmerSeq, kmerTracker.get_count(kmerSeq), kmerTracker.kmerSeqs, kmerLen)
    for readVals in kmerReads:
        read, kmerPos, matchFound, seqLen, nReadsWithSeq = readVals
        readAlignValues = {'read': read,
                           'align_pos': kmerPos,
                           'nreads': nReadsWithSeq}
        contigBuffer.add_used_read(read.id)
        # If no contig, build one.
        if not contig:
            contig = contig_assembler.Contig(kmerObj, readAlignValues)
            contigBuffer.add_contig(read, contig)
        # Check if read should be added to the existing contig.
        else:
            contig.check_read(kmerObj, readAlignValues, 'setup')
    if contig:
        contig.finalize(fqRecs, kmerTracker, contigBuffer, 'setup')