Ejemplo n.º 1
0
def get_inferred_sequences(pairs, genome_dict, add_softclipped_bases=False):

    inferred_sequences = []
    for read1, read2 in pairs:
        if read1.query_name.count('_') == 2:
            context_width = int(read1.query_name.split('_')[-2])
            name = read1.reference_name + ':' + str(
                read1.reference_start +
                context_width) + '-' + str(read2.reference_end - context_width)

            inferred_sequence = genome_dict[read1.reference_name][
                read1.reference_start:read2.reference_end]

            if add_softclipped_bases:
                inferred_sequence = sctools.left_softclipped_sequence_strict(
                    read1
                ) + inferred_sequence + sctools.right_softclipped_sequence_strict(
                    read2)

            inferred_sequence = inferred_sequence[context_width:-context_width]

            if read1.query_name.split('_')[-1] == '2':
                inferred_sequence = misc.revcomp(inferred_sequence)

            contig_edge = False
            if sctools.is_left_softclipped_strict(read1) and \
                sctools.left_softclipped_position(read1) < 0:
                contig_edge = True
            elif sctools.is_right_softclipped_strict(read2) and \
                sctools.right_softclipped_position(read2) >= len(genome_dict[read2.reference_name]):
                contig_edge = True

        else:
            name = read1.reference_name + ':' + str(
                read1.reference_start) + '-' + str(read2.reference_end)
            inferred_sequence = genome_dict[read1.reference_name][
                read1.reference_start:read2.reference_end]

            if add_softclipped_bases:
                inferred_sequence = sctools.left_softclipped_sequence_strict(
                    read1
                ) + inferred_sequence + sctools.right_softclipped_sequence_strict(
                    read2)

            if read1.query_name.split('_')[-1] == '2':
                inferred_sequence = misc.revcomp(inferred_sequence)

            contig_edge = False
            if sctools.is_left_softclipped_strict(read1) and \
                            sctools.left_softclipped_position(read1) < 0:
                contig_edge = True
            elif sctools.is_right_softclipped_strict(read2) and \
                            sctools.right_softclipped_position(read2) >= len(genome_dict[read2.reference_name]):
                contig_edge = True

        inferred_sequences.append(
            (name, len(inferred_sequence), contig_edge, inferred_sequence))

    return inferred_sequences
Ejemplo n.º 2
0
    def get_inferred_sequence(self, forward_read, reverse_read, is_reverse):
        contig = forward_read.reference_name
        start = forward_read.reference_start
        end = reverse_read.reference_end

        inferred_sequence = ''.join(self.genome_dict[contig][start:end])

        inferred_sequence = sctools.left_softclipped_sequence_strict(forward_read) + \
                            inferred_sequence + \
                            sctools.right_softclipped_sequence_strict(reverse_read)

        inferred_sequence = inferred_sequence[self.context_width:-self.context_width]

        if is_reverse:
            inferred_sequence = misc.revcomp(inferred_sequence)

        contig_edge = False
        if sctools.is_left_softclipped_strict(forward_read) and \
                        sctools.left_softclipped_position(forward_read) < 0:
            contig_edge = True
        elif sctools.is_right_softclipped_strict(reverse_read) and \
                        sctools.right_softclipped_position(reverse_read) >= len(self.genome_dict[contig]):
            contig_edge = True


        return inferred_sequence, contig_edge
Ejemplo n.º 3
0
    def get_inferred_sequence(self, forward_read, reverse_read, is_reverse):
        contig, start, end = forward_read.reference_name, forward_read.reference_start, reverse_read.reference_end
        inferred_sequence = ''.join(self.genome_dict[contig][start:end])

        inferred_sequence = sctools.left_softclipped_sequence_strict(forward_read) + \
                            inferred_sequence + \
                            sctools.right_softclipped_sequence_strict(reverse_read)
        if is_reverse:
            inferred_sequence = misc.revcomp(inferred_sequence)

        return inferred_sequence
Ejemplo n.º 4
0
def get_inferred_sequences(pairs, genome_dict, add_softclipped_bases=False):

    inferred_sequences = []
    for read1, read2 in pairs:

        name = read1.reference_name + ':' + str(
            read1.reference_start) + '-' + str(read2.reference_end)
        inferred_sequence = genome_dict[
            read1.reference_name][read1.reference_start:read2.reference_end]

        if add_softclipped_bases:
            inferred_sequence = sctools.left_softclipped_sequence_strict(
                read1
            ) + inferred_sequence + sctools.right_softclipped_sequence_strict(
                read2)

        if read1.is_read2:
            inferred_sequence = misc.revcomp(inferred_sequence)

        inferred_sequences.append(
            (name, len(inferred_sequence), inferred_sequence))

    return inferred_sequences