예제 #1
0
    def select_pileup_range_and_truncate_output(
            self, pileup, startpos, endpos, pileup_select_range_expected_out,
            pileup_truncate_expected_out):
        """
        select_pileup_range_and_truncate_output - Checks if the program works
        as expected when truncating contiguous start and end regions after
        first selecting a specified range.

        INPUT:
            [2D ARRAY of DICTIONARIES] [pileup]
            [INT] [startpos]
            [INT] [endpos]
            [2D ARRAY OF DICTIONARIES] [pileup_select_range_expected_out]
            [2D ARRAY OF DICTIONARIES] [pileup_truncate_expected_out]
        RETURN:
            TODO
        POST:
            TODO
        """

        pileups = Pileup_List([Pileup(bam) for bam in pileup])
        pileups.select_pileup_range(startpos, endpos)
        select_pileup = pileups.get_pileups_as_array()

        # assert that the pileup positions before startpos and after endpos
        # have been ignored
        assert select_pileup == pileup_select_range_expected_out

        pileups.truncate_output()
        truncated_pileup = pileups.get_pileups_as_array()

        # assert that the pileup is truncated now as expected
        assert truncated_pileup == pileup_truncate_expected_out
예제 #2
0
    def test_remove_no_coverage(self, pileup, expected_truncated_pileup,
                                expected_left_pos_truncated,
                                expected_right_pos_truncated):
        """
        test_remove_no_coverage - Checks that the after truncating all
        empty positions from the pileup that the output is as expected

        INPUT:
            [2D ARRAY OF DICTIONARIES] [pileup] # to be truncated
            [2D ARRAY OF DICTIONARIES] [expected_remove_no_coverage_pileup]
            [2D ARRAY OF DICTIONARIES] [expected_left_pos_truncated]
            # number of contiguous left positions that were truncated
            [2D ARRAY OF DICTIONARIES] [expected_right_pos_truncated]
            # number of contiguous right positions that were truncated

        RETURN:
            [None]

        POST:
            Checks that the expected outputs match the actual output
        """
        pileups = Pileup_List([Pileup(bam) for bam in pileup])
        pileups.remove_no_coverage()
        truncated = pileups.get_pileups_as_array()

        assert truncated == expected_truncated_pileup
        assert pileups.get_num_left_positions_truncated(
        ) == expected_left_pos_truncated
        assert pileups.get_num_right_positions_truncated(
        ) == expected_right_pos_truncated
예제 #3
0
    def test_select_pileup_range_and_remove_no_coverage(
            self, pileup, startpos, endpos, expected_remove_no_coverage):
        """
        select_pileup_range_and_truncate_output - Checks if the program works
        as expected when removing all no coverage regions after first selecting
        a specified range.

        INPUT:
            [2D ARRAY of DICTIONARIES] [pileup]
            [INT] [startpos]
            [INT] [endpos]
            [2D ARRAY OF DICTIONARIES] [expected_remove_no_coverage]

        RETURN:
            TODO
        POST:
            TODO
        """

        pileups = Pileup_List([Pileup(bam) for bam in pileup])
        pileups.select_pileup_range(startpos, endpos)
        pileups.remove_no_coverage()
        truncated = pileups.get_pileups_as_array()

        assert truncated == expected_remove_no_coverage
예제 #4
0
    def test_truncate_output(self, pileup, expected_truncated_pileup,
                             expected_left_pos_truncated,
                             expected_right_pos_truncated):
        """
        test_truncate_output - Checks that the expected truncated outputs
        matches the actual output.

        INPUT:
            [2D ARRAY OF DICTIONARIES] [pileup] # to be truncated
            [2D ARRAY OF DICTIONARIES] [expected_truncated_pileup]
            [2D ARRAY OF DICTIONARIES] [expected_left_pos_truncated]
            # number of contiguous left positions that were truncated
            [2D ARRAY OF DICTIONARIES] [expected_right_pos_truncated]
            # number of contiguous right positions that were truncated

        RETURN:
            [None]

        POST:
            Checks that the expected outputs match the actual output
        """
        pileups = Pileup_List([Pileup(bam) for bam in pileup])
        pileups.truncate_output()
        truncated = pileups.get_pileups_as_array()

        assert truncated == expected_truncated_pileup
        assert pileups.get_num_left_positions_truncated(
        ) == expected_left_pos_truncated
        assert pileups.get_num_right_positions_truncated(
        ) == expected_right_pos_truncated
예제 #5
0
def build_pileup_from_haplotypes(haplotypes):
    """
    # ========================================================================

    BUILD PILEUP FROM HAPLOTYPES


    PURPOSE
    -------

    Creates a pileup from a list of Haplotype objects.


    INPUT
    -----

    [HAPLOTYPE LIST] [haplotypes]
        A list of haplotypes.
    [BOOLEAN] [gaps]
        Indicate whether or not there are gaps in our haplotype sequences.

    RETURN
    ------

    [PILEUP] [pileup]
        A pilup object.

    # ========================================================================
    """

    pileup_list = []

    if haplotypes:

        length = len(haplotypes[0].sequence)

        # Initialize empty dictionaries
        for i in range(0, length):
            pileup_list.append({})

        for haplotype in haplotypes:
            for i in range(0, length):

                base = haplotype.sequence[i]

                if pileup_list[i].get(base):
                    pileup_list[i][base] += haplotype.count
                else:
                    pileup_list[i][base] = haplotype.count

    pileup = Pileup(pileup_list)

    return pileup
예제 #6
0
    def matrix(self, request):
        """
        matrix - test fixture for test_get_similarity_matrix function
                 and test_get_distance_matrix function

        INPUT:
            [LIST OF TUPLES]
            request.param[0]---[BOOL] [normalize] # normalized or not
            request.param[1]---[ARRAY] [pileup list]
            request.param[2]---[ARRAY] [pileup_files] # file names corresponding to pileups
            request.param[3]---[ARRAY] normalized or unnormalized similarity csv-format output
            request.param[4]---[ARRAY] normalized or unnormalized distance csv-format output
            request.param[5]---[INT or NONE] [startpos or default if NONE]
            request.param[6]---[INT or NONE] [endpos or default if NONE]

        RETURN:
            [DistanceMatrix] [matrix with the pileup to be used]

        POST:
            self.expected_csv_distance is now a csv representation of the
            expected distance that should be calculated from this matrix.

            self.expected_csv_similarity is now a csv representation of the
            expected similarity that should be calculated from this matrix.
        """
        pileups = Pileup_List([Pileup(bam) for bam in request.param[1]])

        # if startpos is int and endpos is int (aka they are not None)
        if type(request.param[5]) is int and type(request.param[6]) is int:
            pileups.select_pileup_range(request.param[5], request.param[6])

        # if boolean normalize flag (request.param[0]) is true normalize
        if request.param[0] is True:
            pileups.normalize_pileups()

        # create matrix with pileup
        dist = DistanceMatrix(pileups.get_pileups_as_numerical_array(),
                              request.param[2])

        self.expected_csv_similarity = request.param[3]
        self.expected_csv_distance = request.param[4]

        return dist
예제 #7
0
def parse_pileup_from_fasta(reads_location, gaps=False):
    """
    # ========================================================================

    PARSE PILEUP FROM FASTA


    PURPOSE
    -------

    Parses an aligned FASTA file and returns a Pileup file corresponding to
    the aligned FASTA file.


    INPUT
    -----

    [(FASTA) FILE LOCATION] [reads_location]
        The file location of the aligned FASTA file.

    [BOOLEAN] [gaps]
        Whether or not to include gaps in the pileup. This is default by
        false.


    RETURN
    ------

    [Pileup]
        A new pileup object constructed from the information in the aligned
        FASTA file.

    # ========================================================================
    """

    pileup = []
    reads = Bio.SeqIO.parse(reads_location, "fasta")

    read = next(reads)

    for i in range(len(read)):

        pileup.append({})

    while read:

        for i in range(len(read)):

            base = read[i]

            if pileup[i].get(base):
                pileup[i][base] += 1

            else:
                pileup[i][base] = 1

        read = next(reads, None)

    # Remove the gaps from the pileup.
    if not gaps:

        for position in pileup:

            position.pop(GAP, None)

    return Pileup(pileup)
예제 #8
0
def parse_pileup_from_bam(references, bam_location):
    """
    PARSE PILEUP FROM BAM


    PURPOSE
    -------

    Constructs a Pileup obect from reference objects and a BAM file.


    INPUT
    -----

    [LIST (REFERENCE)] [references]
        A list of quasitools Reference objects.


    [BAM FILE LOCATION)] [bam_location]
        The file location of the aligned BAM file from which to build the
        pileup object.


    RETURN
    ------

    [Pileup]
        A new pileup object constructed from the information in the Reference
        object(s) and the BAM file.

    """

    # PySam bases:
    A = 0
    C = 1
    G = 2
    T = 3

    pileup = []
    samfile = pysam.AlignmentFile(bam_location, "rb")

    for reference in references:

        coverage = samfile.count_coverage(contig=reference.name,
                                          start=0,
                                          stop=len(reference.seq),
                                          quality_threshold=0)

        for column in range(len(coverage[0])):

            dictionary = {}

            if coverage[A][column] > 0:
                dictionary["A"] = coverage[A][column]

            if coverage[C][column] > 0:
                dictionary["C"] = coverage[C][column]

            if coverage[G][column] > 0:
                dictionary["G"] = coverage[G][column]

            if coverage[T][column] > 0:
                dictionary["T"] = coverage[T][column]

            pileup.append(dictionary)

    return Pileup(pileup)