Exemple #1
0
    def __get_cds_rel_pos(self, cdss, cds_id, aln_loc):
        '''Returns position of CDS relative to alignment.

            @param  [Cds]       List of CDSs
            @param  (Location)  cds_id CDS we are looking at
            @param  (Location)  aln_loc Alignment Location

            @return (string)    LEFT_OF_ALN  - fully left of alignment      
                                RIGHT_OF_ALN - fully right or not first which overlaps 
                                FIRST        - first to overlap
        '''
        cds_loc = Location.from_location_str(cdss[cds_id].location)

        if (cds_loc.end < aln_loc.start):
            return "LEFT_OF_ALN"

        if (cds_loc.start > aln_loc.end):
            return "RIGHT_OF_ALN"

        # Overlap occured - check if it is first
        # If it is first CDS or the previous one does not overlap
        if (cds_id == 0):
            return "FIRST"

        cds_prev_loc = Location.from_location_str(cdss[cds_id - 1].location)
        if (not self.__overlap(cds_prev_loc, aln_loc)):
            return "FIRST"

        return "RIGHT_OF_ALN"
Exemple #2
0
    def __get_cds_rel_pos (self, cdss, cds_id, aln_loc):
        '''Returns position of CDS relative to alignment.

            @param  [Cds]       List of CDSs
            @param  (Location)  cds_id CDS we are looking at
            @param  (Location)  aln_loc Alignment Location

            @return (string)    LEFT_OF_ALN  - fully left of alignment
                                RIGHT_OF_ALN - fully right or not first which overlaps
                                FIRST        - first to overlap
        '''
        cds_loc = Location.from_location_str(cdss[cds_id].location)

        if (cds_loc.end < aln_loc.start):
            return "LEFT_OF_ALN"

        if (cds_loc.start > aln_loc.end):
            return "RIGHT_OF_ALN"

        # Overlap occured - check if it is first
            # If it is first CDS or the previous one does not overlap
        if (cds_id == 0):
            return "FIRST"

        cds_prev_loc = Location.from_location_str(cdss[cds_id - 1].location)
        if (not self.__overlap(cds_prev_loc, aln_loc)):
            return "FIRST"

        return "RIGHT_OF_ALN"
Exemple #3
0
    def testInstersectionWithoutComplementInformation(self):
        l1 = Location.from_location_str('1..10')
        l2 = Location.from_location_str('complement(5..15)')

        self.assertTrue(l1.intersects(l2, use_complement=False),
                        'The locations should intersect')
        self.assertFalse(l1.intersects(l2, use_complement=True),
                         "The locations should't intersect")
Exemple #4
0
    def testSingleLocation(self):
        location = Location.from_location_str('join(311,400..854)')

        self.assertTrue(location.intersects(Location.from_location((311, ))),
                        "Location doesn't contain target point")

        location = Location.from_location_str('join(14424..14857,1)')
        self.assertTrue(location.intersects(Location.from_location((1, ))),
                        "Location doesn't contain target point")
Exemple #5
0
    def testSingleLocationWithTolerance(self):
        location = Location.from_location_str(
            'join(<10,61..86,162..203,264..318,388..>495)', tolerance=10)

        self.assertTrue(location.intersects(Location.from_location((5, ))),
                        "Location doesn't contain target point")

        location = Location.from_location_str('join(<1..129,>657)',
                                              tolerance=10)

        self.assertTrue(location.intersects(Location.from_location((660, ))),
                        "Location doesn't contain target point")
Exemple #6
0
    def testLocationMinimum(self):
        l1 = Location.from_location_str('join(1..10,11..50)')
        l2 = Location.from_location_str('complement(15..20)')
        l3 = Location.from_location_str('REF2:5..10')
        l4 = Location.from_location_str('complement(join(1..10,11..50))')
        l5 = Location.from_location_str('complement(join(15..20,1..2))')

        self.assertEqual(l1.min(), 1, 'Minimum should be 1')
        self.assertEqual(l2.min(), 15, 'Minimum should be 15')
        self.assertEqual(l3.min(), 5, 'Minimum should be 5')
        self.assertEqual(l4.min(), 1, 'Minimum should be 1')
        self.assertEqual(l5.min(), 1, 'Minimum should be 1')
Exemple #7
0
    def testLocationOverlap(self):
        l1 = Location.from_location_str('join(1..10,11..50)')
        l2 = Location.from_location_str('complement(join(1..2,15..20))')

        self.assertEqual(1, l1.start, "Start should be 1")
        self.assertEqual(50, l1.end, "End should be 50")

        self.assertEqual(1, l2.start, "Start should be 1")
        self.assertEqual(20, l2.end, "End should be 20")

        self.assertTrue(l1.overlaps(l2, use_complement=False),
                        'Locations should overlap')

        self.assertTrue(l1.overlaps(l2, use_complement=True),
                        "Locations shouldn't overlap")
Exemple #8
0
    def __find_first_overlapping_CDS_id (self, aln_location, cdss):
        ''' Find id of the first CDS which overlaps with the given alignment.
            Uses binary search algorithm.

            @param   (Location) aln_location    Alignment Location
            @param   [Cds]      cdss            List of CDSs, sorted by start
            @returns (int|None)                 Id in cdss of described CDS, None if no overlap
        '''
        lo = 0
        hi = len(cdss) - 1

        # If cdss is empty -> return None
        if (hi < 0):
            return None

        while (lo < hi):
            mid = lo + (hi - lo) // 2   # '//' for python 3 compatibility

            cds_rel_pos = self.__get_cds_rel_pos (cdss, mid, aln_location)

            if (cds_rel_pos == "LEFT_OF_ALN"):
                lo = mid + 1

            if (cds_rel_pos == "RIGHT_OF_ALN"):
                hi = mid - 1

            if (cds_rel_pos == "FIRST"):
                return mid

        # Check lo
        cds_location = Location.from_location_str(cdss[lo].location)
        if self. __overlap(cds_location, aln_location):
            return lo
        else:
            return None
Exemple #9
0
    def __find_first_overlapping_CDS_id(self, aln_location, cdss):
        ''' Find id of the first CDS which overlaps with the given alignment.
            Uses binary search algorithm.

            @param   (Location) aln_location    Alignment Location
            @param   [Cds]      cdss            List of CDSs, sorted by start
            @returns (int|None)                 Id in cdss of described CDS, None if no overlap
        '''
        lo = 0
        hi = len(cdss) - 1

        # If cdss is empty -> return None
        if (hi < 0):
            return None

        while (lo < hi):
            mid = lo + (hi - lo) // 2  # '//' for python 3 compatibility

            cds_rel_pos = self.__get_cds_rel_pos(cdss, mid, aln_location)

            if (cds_rel_pos == "LEFT_OF_ALN"):
                lo = mid + 1

            if (cds_rel_pos == "RIGHT_OF_ALN"):
                hi = mid - 1

            if (cds_rel_pos == "FIRST"):
                return mid

        # Check lo
        cds_location = Location.from_location_str(cdss[lo].location)
        if self.__overlap(cds_location, aln_location):
            return lo
        else:
            return None
Exemple #10
0
    def get_cds_location(self):
        '''Returns Location object of the associated CDS.

        Args:
            None
        Returns:
            (Location): Location of the associated CDS.
        '''
        return Location.from_location_str(self.cds.location)
Exemple #11
0
    def testIntersectionLocation(self):

        # Test no intersection case
        loc1 = Location.from_location_str('complement(50..100)')
        self.assertEqual(
            None,
            loc1.find_intersection(
                Location.from_location_str('complement(200..500)')))
        self.assertEqual(
            None,
            loc1.find_intersection(
                Location.from_location_str('complement(1..40)')))
        self.assertEqual(
            None,
            loc1.find_intersection(
                Location.from_location_str('complement(1..49)')))
        self.assertEqual(
            None,
            loc1.find_intersection(
                Location.from_location_str('complement(101..150)')))

        # Test simple one interval intersection
        loc1 = Location.from_location_str('10..100')
        loc2 = Location.from_location_str('50..100')
        intersection = loc1.find_intersection(loc2)
        self.assertEqual(loc2.start, intersection.start,
                         "Start intersection position doesn't match")
        self.assertEqual(loc2.end, intersection.end,
                         "End intersection position doesn't match")
        self.assertEqual(loc2.complement, intersection.complement,
                         "Complement intersection information doesn't match")

        # Test multiple interval intersection
        loc1 = Location.from_location_str('join(1..40,60..80,120..200)')
        aln_location = Location.from_location_str('30..130')
        loc2 = Location.from_location_str('join(30..40,60..80,120..130)')
        intersection = loc1.find_intersection(aln_location)
        for subint, subl2 in zip(intersection.sublocations, loc2.sublocations):
            self.assertEqual(subl2.start, subint.start,
                             "Start intersection position doesn't match")
            self.assertEqual(subl2.end, subint.end,
                             "End intersection position doesn't match")
        self.assertEqual(loc2.complement, intersection.complement,
                         "Complement intersection information doesn't match")
Exemple #12
0
 def _calc_coverage(self, cds_aln):
     """ Calculates coverage of given cds alignment.
     Coverage is calculated as sum of lengths of aligned regions divided by length of cds.
     @param (CdsAlignment) cds_aln
     @return (float) coverage
     """
     # Aligned region is part of a read that intersects with cds.
     coverage = 0
     for aln_reg in cds_aln.aligned_regions.values(): # aln_reg is of type CdsAlnSublocation
         location = aln_reg.location # location is of type Location
         coverage += location.length()
     coverage = coverage / float(Location.from_location_str(cds_aln.cds.location).length())
     return coverage
Exemple #13
0
def calc_cds_coverage(cds_aln):
    """ Calculates coverage of cds.
    Coverage is average number of reads per base of cds.
    @param (CdsAlignment) cds_aln
    @return (float) Cds coverage.
    """
    coverage = 0
    for aln_reg in cds_aln.aligned_regions.values(
    ):  # aln_reg is of type CdsAlnSublocation
        location = aln_reg.location  # location is of type Location
        coverage += location.length()
    coverage = coverage / float(
        Location.from_location_str(cds_aln.cds.location).length())
    return coverage
Exemple #14
0
    def testIntersectionsWithTolerance(self):
        location = Location.from_location_str(
            "complement(join(<197..1301,2070..>2451))", tolerance=100)

        self.assertFalse(location.intersects(Location.from_location((100, ))))

        self.assertTrue(
            location.intersects(
                Location.from_location((100, ), complement=True)))

        self.assertFalse(location.intersects(Location.from_location((300, ))))

        self.assertTrue(
            location.intersects(
                Location.from_location((300, ), complement=True)))

        self.assertFalse(location.intersects(Location.from_location(
            (50, 100))))

        self.assertTrue(
            location.intersects(
                Location.from_location((50, 100), complement=True)))

        self.assertFalse(
            location.intersects(Location.from_location((300, 400))))

        self.assertTrue(
            location.intersects(
                Location.from_location((300, 400), complement=True)))

        self.assertFalse(
            location.intersects(Location.from_location((1200, 1400))))

        self.assertTrue(
            location.intersects(
                Location.from_location((1200, 1400), complement=True)))

        self.assertFalse(
            location.intersects(Location.from_location((2500, 2600))))

        self.assertTrue(
            location.intersects(
                Location.from_location((2500, 2600), complement=True)))

        self.assertFalse(
            location.intersects(Location.from_location((3000, 4000))))

        self.assertFalse(
            location.intersects(
                Location.from_location((3000, 4000), complement=True)))
Exemple #15
0
 def _calc_coverage(self, cds_aln):
     """ Calculates coverage of given cds alignment.
     Coverage is calculated as sum of lengths of aligned regions divided by length of cds.
     @param (CdsAlignment) cds_aln
     @return (float) coverage
     """
     # Aligned region is part of a read that intersects with cds.
     coverage = 0
     for aln_reg in cds_aln.aligned_regions.values(
     ):  # aln_reg is of type CdsAlnSublocation
         location = aln_reg.location  # location is of type Location
         coverage += location.length()
     coverage = coverage / float(
         Location.from_location_str(cds_aln.cds.location).length())
     return coverage
Exemple #16
0
    def testParsesMultisegmentLocation(self):
        location = Location.from_location_str(
            'join(AF178221.1:<1..60,AF178222.1:1..63,AF178223.1:1..42, 1..>90)'
        )

        self.assertTrue(location.intersects(Location.from_location((80, ))),
                        "Location doesn't contain target point")
        self.assertIn('AF178221.1', location.references(),
                      'Reference AF178221.1 not parsed')
        self.assertIn('AF178222.1', location.references(),
                      'Reference AF178222.1 not parsed')
        self.assertIn('AF178223.1', location.references(),
                      'Reference AF178223.1 not parsed')
        self.assertTrue(
            len(location.references()) == 3, 'Wrong number of references')
Exemple #17
0
    def determine_coding_seqs(self, record_container):
        ''' Determines which of the CDSs in the record aligned_regions
            aligned to the read.
            @return list of tuples (cds, intersecting_location) if such exist, 
            None if record is not available from the database
        '''
        self.aligned_cdss = []
        record = record_container.fetch_record(self.nucleotide_accession)

        # if not possible to fetch a record from the db, return None
        if not record:
            return None

        (start, stop) = self.location_span
        try:
            location = Location.from_location_str("%d..%d" % (start, stop))
        except LoactionParsingException, e:
            print "ReadAlignment/determine_coding_seqs:", e
            self.aligned_cdss = []
            return self.aligned_cdss
Exemple #18
0
    def determine_coding_seqs (self, record_container):
        ''' Determines which of the CDSs in the record aligned_regions
            aligned to the read.
            @return list of tuples (cds, intersecting_location) if such exist,
            None if record is not available from the database
        '''
        self.aligned_cdss = []
        record = record_container.fetch_record (self.nucleotide_accession)

        # if not possible to fetch a record from the db, return None
        if not record:
            return None

        (start,stop) = self.location_span
        try:
            location = Location.from_location_str("%d..%d" % (start, stop))
        except LoactionParsingException, e:
            print "ReadAlignment/determine_coding_seqs:", e
            self.aligned_cdss = []
            return self.aligned_cdss
Exemple #19
0
def add_cds_to_organism(organism, read, target_alignment):
    target_cdss = target_alignment.aligned_cdss
    assert(len(target_cdss) >= 1)
    binned_read = resdata.BinnedRead(read.id)
    if len(target_cdss) == 1:
        # do stuffs
        (target_cds, intersection) = target_cdss[0]
    else:
        cdss = []
        for (cds, intersection) in target_cdss:
            cdss.append(cds)
        sorted_cdss = sorted(cdss, key = lambda cds: Location.from_location_str(cds.location).length())
        target_cds = sorted_cdss[-1]

    if organism.contains_identified_coding_region(target_cds):
        identified_cds = organism.identified_coding_regions[target_cds]
        identified_cds.add_binned_read(binned_read)
    else:
        identified_cds = resdata.IdentifiedCds(target_cds)
        identified_cds.add_binned_read(binned_read)
        organism.add_identified_coding_region(identified_cds)
Exemple #20
0
    def determine_coding_seqs_optimal (self, record):
        ''' Determines which of the CDSs in the record aligned_regions
            aligned to the read.

            @param (UnityRecord) record Record that is used
            @return                  list of tuples (cds, intersecting_location) if such exist,
                                     None if record is not available from the database
        '''

        self.aligned_cdss = []

        # If not possible to fetch a record from the db, return None
        if not record:
            return None

        # Acquire alignment Location
        (start, stop) = self.location_span
        try:
            aln_location = Location.from_location_str("%d..%d" % (start, stop))
        except LoactionParsingException, e:
            print "ReadAlignment/determine_coding_seqs:", e
            self.aligned_cdss = []
            return self.aligned_cdss
Exemple #21
0
    def determine_coding_seqs_optimal(self, record):
        ''' Determines which of the CDSs in the record aligned_regions
            aligned to the read.

            @param (UnityRecord) record Record that is used
            @return                  list of tuples (cds, intersecting_location) if such exist, 
                                     None if record is not available from the database
        '''

        self.aligned_cdss = []

        # If not possible to fetch a record from the db, return None
        if not record:
            return None

        # Acquire alignment Location
        (start, stop) = self.location_span
        try:
            aln_location = Location.from_location_str("%d..%d" % (start, stop))
        except LoactionParsingException, e:
            print "ReadAlignment/determine_coding_seqs:", e
            self.aligned_cdss = []
            return self.aligned_cdss
Exemple #22
0
class ReadAlnLocation(object):
    """ Contains information on alignment location on 
        an NT nucleotide string
    """
    def __init__(self,
                 read_id,
                 nucleotide_accession,
                 db_source,
                 genome_index,
                 score,
                 location_span,
                 complement,
                 active=True):
        self.read_id = read_id
        self.nucleotide_accession = nucleotide_accession
        self.db_source = db_source
        self.genome_index = genome_index
        self.score = score
        self.location_span = location_span
        self.complement = complement
        self.active = active
        # self.determine_coding_seqs()
        # Sto je sa .aligned_cdss? Navesti to negdje u komentarima ako postoji!

    def set_active(self, active):
        '''
        Sets active status for the read alignment.
        Inactive reads do not go into CDS alignments.
        '''
        self.active = active

    def set_potential_host_status(self, potential_host):
        '''
        Set to true if organism is potential host [child of 
        animalia kingdom]
        @param potential_host (boolean) 
        '''
        self.potential_host = potential_host

    def is_potential_host(self):
        """ Returns true if organism is potential host 
        (child of animalia kingdom), false otherwise.
        @return (boolean)
        """
        return self.potential_host

    def determine_coding_seqs(self, record_container):
        ''' Determines which of the CDSs in the record aligned_regions
            aligned to the read.
            @return list of tuples (cds, intersecting_location) if such exist, 
            None if record is not available from the database
        '''
        self.aligned_cdss = []
        record = record_container.fetch_record(self.nucleotide_accession)

        # if not possible to fetch a record from the db, return None
        if not record:
            return None

        (start, stop) = self.location_span
        try:
            location = Location.from_location_str("%d..%d" % (start, stop))
        except LoactionParsingException, e:
            print "ReadAlignment/determine_coding_seqs:", e
            self.aligned_cdss = []
            return self.aligned_cdss

        for cds in record.cds:
            try:
                cds_location = Location.from_location_str(cds.location)
            except LoactionParsingException, e:
                print "ReadAlignment/determine_coding_seqs:", e
                continue
            location_intersection = cds_location.find_intersection(location)
            if location_intersection is not None:
                self.aligned_cdss.append((cds, location_intersection))
Exemple #23
0
        except LoactionParsingException, e:
            print "ReadAlignment/determine_coding_seqs:", e
            self.aligned_cdss = []
            return self.aligned_cdss

        # Determine first overlapping CDS - binary search
        first_ovp_id = self.__find_first_overlapping_CDS_id (aln_location, record.cds)

        # No CDS from the list overlaps - return []
        if (first_ovp_id == None):
            return self.aligned_cdss

        # Determine following overlapping CDSs - loop while overlaps
        for i in range(first_ovp_id, len(record.cds)):
            cds = record.cds[i]
            cds_location = Location.from_location_str(cds.location)

            # If this one does not overlap, the others also won't because it's sorted
            if not self.__overlap(cds_location, aln_location):
                break

            location_intersection = cds_location.find_intersection (aln_location)
            if location_intersection is not None:
                self.aligned_cdss.append ((cds, location_intersection))

        return self.aligned_cdss

    # ---------------------------------------------------------------------------- #

    def set_type (self):
        """ Location can be coding or non-coding
Exemple #24
0
 def testParseReferenceLocation(self):
     location = Location.from_location_str('REFERENCE:1..10')
     self.assertTrue(location.intersects(Location.from_location((5, 15))))
Exemple #25
0
 def matches(self, location, complement, tolerance):
     l1 = Location.from_location_str(self.location, tolerance)
     return l1.intersects(Location.from_location(location, complement))
Exemple #26
0
 def testParseOrderLocation(self):
     location = Location.from_location_str('order(1..3,4..6)')
     self.assertTrue(location.intersects(Location.from_location((3, 4))),
                     "Location doesn't contain target point")
Exemple #27
0
 def matches(self, location, complement, tolerance):
     l1 = Location.from_location_str(self.location, tolerance)
     return l1.intersects(Location.from_location(location, complement))
Exemple #28
0
    def testParsesLocationWithSpaces(self):
        location = Location.from_location_str(
            'join(620..987, 1010..1170,1194..1443)')

        self.assertTrue(location.intersects(Location.from_location((1010, ))),
                        "Location doesn't contain target point")
Exemple #29
0
    def testLocationContains(self):
        l1 = Location.from_location_str('1..10')
        l2 = Location.from_location_str('5..10')

        self.assertTrue(l1.contains(l2), '1..10 should contain 5..10')

        l1 = Location.from_location_str('complement(<23..50)')
        l2 = Location.from_location_str('complement(24..50)')

        self.assertTrue(
            l1.contains(l2), 'complement(<23..50) should contain '
            'complement(24..50)')

        l1 = Location.from_location_str('join(1..10,20..30)')
        l2 = Location.from_location_str('join(2..8,25..28)')

        self.assertTrue(
            l1.contains(l2), 'join(1..10,20..30) should contain '
            'join(2..8,25..28)')

        l1 = Location.from_location_str(
            'join(complement(1..10),complement(20..30))')
        l2 = Location.from_location_str(
            'join(complement(2..8),complement(25..28))')

        self.assertTrue(
            l1.contains(l2),
            'join(complement(1..10),complement(20..30)) should contain '
            'join(complement(2..8),complement(25..28))')

        l1 = Location.from_location_str('1..10')
        l2 = Location.from_location_str('4..20')

        self.assertFalse(l1.contains(l2), '1..10 should not contain ' '4..20')

        l1 = Location.from_location_str('1..10')
        l2 = Location.from_location_str('15..20')

        self.assertFalse(l1.contains(l2), '1..10 should not contain ' '15..20')

        l1 = Location.from_location_str('complement(1..10)')
        l2 = Location.from_location_str('complement(4..20)')

        self.assertFalse(
            l1.contains(l2), 'complement(1..10) should not contain '
            'complement(4..20)')

        l1 = Location.from_location_str('complement(1..10)')
        l2 = Location.from_location_str('complement(15..20)')

        self.assertFalse(
            l1.contains(l2), 'complement(1..10) should not contain '
            'complement(15..20)')

        l1 = Location.from_location_str('1..10')
        l2 = Location.from_location_str('complement(1..10)')

        self.assertFalse(l1.contains(l2), '1..10 should not contain '
                         'complement(1..10)')

        l1 = Location.from_location_str('1..10')
        l2 = Location.from_location_str('complement(15..20)')

        self.assertFalse(l1.contains(l2), '1..10 should not contain '
                         'complement(15..20)')

        l1 = Location.from_location_str('join(1..10,11..50)')
        l2 = Location.from_location_str('15..20')

        self.assertTrue(l1.contains(l2), 'join(1..10,11..50) should contain '
                        '15..20')

        l1 = Location.from_location_str('join(1..10,11..50)')
        l2 = Location.from_location_str('complement(15..20)')

        self.assertTrue(
            l1.contains(l2, use_complement=False),
            'join(1..10,11..50) should contain '
            'complement(15..20) without complement information')

        l1 = Location.from_location_str('REF1:1..10')
        l2 = Location.from_location_str('5..10')

        self.assertFalse(l1.contains(l2), 'REF1:1..10 should contain 5..10')

        l1 = Location.from_location_str('REF1:1..10')
        l2 = Location.from_location_str('REF2:5..10')

        self.assertFalse(l1.contains(l2), '1..10 should contain 5..10')
Exemple #30
0
            print "ReadAlignment/determine_coding_seqs:", e
            self.aligned_cdss = []
            return self.aligned_cdss

        # Determine first overlapping CDS - binary search
        first_ovp_id = self.__find_first_overlapping_CDS_id(
            aln_location, record.cds)

        # No CDS from the list overlaps - return []
        if (first_ovp_id == None):
            return self.aligned_cdss

        # Determine following overlapping CDSs - loop while overlaps
        for i in range(first_ovp_id, len(record.cds)):
            cds = record.cds[i]
            cds_location = Location.from_location_str(cds.location)

            # If this one does not overlap, the others also won't because it's sorted
            if not self.__overlap(cds_location, aln_location):
                break

            location_intersection = cds_location.find_intersection(
                aln_location)
            if location_intersection is not None:
                self.aligned_cdss.append((cds, location_intersection))

        return self.aligned_cdss

    # ---------------------------------------------------------------------------- #

    def set_type(self):