Example #1
0
class RecordContainerTest (unittest.TestCase):

    def setUp(self):
        self.read_container = ReadContainer()
        self.record_container = RecordContainer()

    def tearUp(self):
        pass

    def testFillRecordContainer(self):
        '''Method to test whether record container populating works.
        Uses mock database access to test whether record container
        has correct number of items.'''
        aln_file = './test/solver/read2cds/.test_data/lisa.in'
        cds_fasta = './test/solver/read2cds/.test_data/cds.fa'
        db_access = MockDbQuery(cds_fasta)
        self.record_container.set_db_access(db_access)

        self.read_container.populate_from_aln_file(aln_file)
        self.record_container.populate(
            self.read_container.fetch_all_reads_versions())
        records = self.record_container.fetch_all_records(format=list)
        self.assertEqual (len(db_access.records), len(records))
        
    def testReturnsNoneForNonexistentRecord(self):
        record = self.record_container.fetch_existing_record("XXX")
        self.assertIsNone(record, "No record with version XXX should be found")
Example #2
0
class BestScoreSolverTest(unittest.TestCase):

    # setUp is executed before each test method
    def setUp(self):
        '''
        @param mock_db_fpath (str) path to syntheticaly created CDSs which serves
        to fill up mock database of records
        @param input_aln_fpath (str) path to input alignment file 
        @param results_fpath (str) path to file with generated correct results 
        greedy solver should generate
        '''
        self.mock_db_fpath = './test/solver/read2cds/.test_data/cds.fa'
        self.input_aln_fpath = './test/solver/read2cds/.test_data/lisa.in'
        self.results_fpath = './test/solver/read2cds/.test_data/cds_ordering.txt'
        #       Initialize read container
        self.read_cont = ReadContainer()
        self.read_cont.populate_from_aln_file(self.input_aln_fpath)
        #       Initialize and fill record container
        self.db_query = MockDbQuery(self.mock_db_fpath)
        self.record_cont = RecordContainer()
        self.record_cont.set_db_access(self.db_query)
        self.record_cont.populate(self.read_cont.fetch_all_reads_versions())
        self.read_cont.populate_cdss(self.record_cont)
        #       Initialize and fill up cds aln container
        self.cds_aln_cont = CdsAlnContainer()
        self.cds_aln_cont.populate(self.read_cont.fetch_all_reads())

        self.bs_solver = BestScoreSolver()
        self.bs_solver.map_reads_2_cdss(self.cds_aln_cont)

    def testCdsAlignmentContainerConsistency(self):
        assert (Read2CDSSolver.test_cds_alignment_container_consistency(
            self.cds_aln_cont) == True)
Example #3
0
class RecordContainerTest(unittest.TestCase):
    def setUp(self):
        self.read_container = ReadContainer()
        self.record_container = RecordContainer()

    def tearUp(self):
        pass

    def testFillRecordContainer(self):
        '''Method to test whether record container populating works.
        Uses mock database access to test whether record container
        has correct number of items.'''
        aln_file = './test/solver/read2cds/.test_data/lisa.in'
        cds_fasta = './test/solver/read2cds/.test_data/cds.fa'
        db_access = MockDbQuery(cds_fasta)
        self.record_container.set_db_access(db_access)

        self.read_container.populate_from_aln_file(aln_file)
        self.record_container.populate(
            self.read_container.fetch_all_reads_versions())
        records = self.record_container.fetch_all_records(format=list)
        self.assertEqual(len(db_access.records), len(records))

    def testReturnsNoneForNonexistentRecord(self):
        record = self.record_container.fetch_existing_record("XXX")
        self.assertIsNone(record, "No record with version XXX should be found")
Example #4
0
class ReadContainerTest(unittest.TestCase):
    def setUp(self):
        self.read_cont = ReadContainer()
        self.aln_file = './test/data/containers/.data/example.in'

    def tearUp(self):
        pass

    def testReadCount(self):
        '''
        Tests whether the number of reads in the read container
        is consistent with the number of reads in the 
        alignment file.
        '''
        self.read_cont.populate_from_aln_file(self.aln_file)
        reads_cont = self.read_cont.read_repository.keys()
        reads_from_file = self._load_read_ids()

        for read_id in reads_from_file:
            self.assertTrue(read_id in reads_cont,
                            msg="Read ID %s not in read container." % read_id)
        self.assertEqual(len(reads_cont), 100)

    def testCorrectAlignmentNumber(self):
        '''
        Test the loader for correct number of alignments.
        Test file organized so that read ID specifies number
        of alignments.
        '''
        aln_file = './test/data/containers/.data/aln_num.in'
        self.read_cont.populate_from_aln_file(aln_file)
        for (read_id, read) in self.read_cont.read_repository.items():
            self.assertEqual(int(read_id), len(read.get_alignments()))

    def _load_read_ids(self):
        aln_fhandle = open(self.aln_file, 'r')
        nextline = aln_fhandle.readline
        read_ids = []
        while (True):
            line = nextline()
            if not line: break
            read_id = line.split(',')[0]
            if read_id.startswith('@'):
                read_id = read_id[1:]
            read_ids.append(read_id)
        aln_fhandle.close()
        return read_ids
Example #5
0
class ReadContainerTest(unittest.TestCase):

    def setUp(self):
        self.read_cont = ReadContainer()
        self.aln_file = './test/data/containers/.data/example.in'

    def tearUp(self):
        pass

    def testReadCount(self):
        '''
        Tests whether the number of reads in the read container
        is consistent with the number of reads in the 
        alignment file.
        '''
        self.read_cont.populate_from_aln_file(self.aln_file)
        reads_cont = self.read_cont.read_repository.keys()
        reads_from_file = self._load_read_ids()

        for read_id in reads_from_file:
            self.assertTrue(read_id in reads_cont, msg = "Read ID %s not in read container." % read_id)
        self.assertEqual(len(reads_cont), 100)

    def testCorrectAlignmentNumber (self):
        '''
        Test the loader for correct number of alignments.
        Test file organized so that read ID specifies number
        of alignments.
        '''
        aln_file = './test/data/containers/.data/aln_num.in'
        self.read_cont.populate_from_aln_file(aln_file)
        for (read_id, read) in self.read_cont.read_repository.items():
            self.assertEqual(int(read_id), len(read.get_alignments()))

    def _load_read_ids (self):
        aln_fhandle = open(self.aln_file, 'r')
        nextline = aln_fhandle.readline
        read_ids = []
        while(True):
            line = nextline()
            if not line: break
            read_id = line.split(',')[0]
            if read_id.startswith('@'):
                read_id = read_id[1:]
            read_ids.append(read_id)
        aln_fhandle.close()
        return read_ids
Example #6
0
class StatisticsTest (unittest.TestCase):

    # setUp is executed before each test method
    def setUp(self):
        '''
        @param mock_db_fpath (str) path to syntheticaly created CDSs which serves
        to fill up mock database of records
        @param input_aln_fpath (str) path to input alignment file 
        @param results_fpath (str) path to file with generated correct results 
        greedy solver should generate
        '''        
        self.mock_db_fpath = './test/statistics/.test_data/cds.fa'
        self.input_aln_fpath = './test/statistics/.test_data/lisa.in'
#       Initialize read container
        self.read_cont = ReadContainer()
        self.read_cont.populate_from_aln_file(self.input_aln_fpath)
#       Initialize and fill record container
        self.db_query = MockDbQuery (self.mock_db_fpath)
        self.record_cont = RecordContainer()
        self.record_cont.set_db_access(self.db_query)
        self.record_cont.populate(self.read_cont.fetch_all_reads_versions())
        self.read_cont.populate_cdss(self.record_cont)
#       Initialize and fill up cds aln container
        self.cds_aln_cont = CdsAlnContainer()
        self.cds_aln_cont.populate(self.read_cont.fetch_all_reads())


    def testStatistics(self):
        assert(num_read_alns(self.read_cont) == 22)
        assert(num_active_aligned_regions(self.cds_aln_cont) == 22)
        assert(num_inactive_read_alns(self.read_cont) == 0)
        
        self.bs_solver = BestScoreSolver()
        self.bs_solver.map_reads_2_cdss(self.cds_aln_cont)
        
        records_stats = count_alns_to_record_and_cds(self.read_cont)
        print "Number of records for which we  have stats: %d\n" % len(records_stats)
        for rec_stat in records_stats.values():
            rec_stat.print_data()

        assert(num_active_aligned_regions(self.cds_aln_cont) == 16)
        
        assert(num_cdss(self.cds_aln_cont) == 4)
        assert(num_cdss_with_no_alns(self.cds_aln_cont) == 0)
Example #7
0
class StatisticsTest(unittest.TestCase):

    # setUp is executed before each test method
    def setUp(self):
        '''
        @param mock_db_fpath (str) path to syntheticaly created CDSs which serves
        to fill up mock database of records
        @param input_aln_fpath (str) path to input alignment file 
        @param results_fpath (str) path to file with generated correct results 
        greedy solver should generate
        '''
        self.mock_db_fpath = './test/statistics/.test_data/cds.fa'
        self.input_aln_fpath = './test/statistics/.test_data/lisa.in'
        #       Initialize read container
        self.read_cont = ReadContainer()
        self.read_cont.populate_from_aln_file(self.input_aln_fpath)
        #       Initialize and fill record container
        self.db_query = MockDbQuery(self.mock_db_fpath)
        self.record_cont = RecordContainer()
        self.record_cont.set_db_access(self.db_query)
        self.record_cont.populate(self.read_cont.fetch_all_reads_versions())
        self.read_cont.populate_cdss(self.record_cont)
        #       Initialize and fill up cds aln container
        self.cds_aln_cont = CdsAlnContainer()
        self.cds_aln_cont.populate(self.read_cont.fetch_all_reads())

    def testStatistics(self):
        assert (num_read_alns(self.read_cont) == 22)
        assert (num_active_aligned_regions(self.cds_aln_cont) == 22)
        assert (num_inactive_read_alns(self.read_cont) == 0)

        self.bs_solver = BestScoreSolver()
        self.bs_solver.map_reads_2_cdss(self.cds_aln_cont)

        records_stats = count_alns_to_record_and_cds(self.read_cont)
        print "Number of records for which we  have stats: %d\n" % len(
            records_stats)
        for rec_stat in records_stats.values():
            rec_stat.print_data()

        assert (num_active_aligned_regions(self.cds_aln_cont) == 16)

        assert (num_cdss(self.cds_aln_cont) == 4)
        assert (num_cdss_with_no_alns(self.cds_aln_cont) == 0)
Example #8
0
def fill_containers (alignment_file, db_access):
    '''
    Populates read, record and CDS alignment container.
    @return tuple(ReadContainer, RecordContainer, CdsAlnContainer)
    '''

    read_cont   = ReadContainer()
    record_cont = RecordContainer()
    record_cont.set_db_access(db_access)
    cdsaln_cont = CdsAlnContainer()

#   1. Load all the information available in the alignment file
    read_cont.populate_from_aln_file(alignment_file)
#   2. Fetch all the records reported in the alignment file from the database
    record_cont.populate(read_cont.fetch_all_reads_versions())
#   3. Find to which coding sequences reads map
    read_cont.populate_cdss(record_cont)
#   4. Populate Cds Alignment container
    cdsaln_cont.populate(read_cont.fetch_all_reads())

    return (read_cont, record_cont, cdsaln_cont)
Example #9
0
class GreedySolverTest(unittest.TestCase):

    # setUp is executed before each test method
    def setUp(self):
        '''
        @param mock_db_fpath (str) path to syntheticaly created CDSs which serves
        to fill up mock database of records
        @param input_aln_fpath (str) path to input alignment file 
        @param results_fpath (str) path to file with generated correct results 
        greedy solver should generate
        '''
        self.mock_db_fpath = './test/solver/read2cds/.test_data/cds.fa'
        self.input_aln_fpath = './test/solver/read2cds/.test_data/lisa.in'
        self.results_fpath = './test/solver/read2cds/.test_data/cds_ordering.txt'
        #       Initialize read container
        self.read_cont = ReadContainer()
        self.read_cont.populate_from_aln_file(self.input_aln_fpath)
        #       Initialize and fill record container
        self.db_query = MockDbQuery(self.mock_db_fpath)
        self.record_cont = RecordContainer()
        self.record_cont.set_db_access(self.db_query)
        self.record_cont.populate(self.read_cont.fetch_all_reads_versions())
        self.read_cont.populate_cdss(self.record_cont)
        #       Initialize and fill up cds aln container
        self.cds_aln_cont = CdsAlnContainer()
        self.cds_aln_cont.populate(self.read_cont.fetch_all_reads())

        self.greedy_solver = GreedySolver()
        self.greedy_solver.map_reads_2_cdss(self.cds_aln_cont)

    def testAlignmentsCorrectlyInactivated(self):
        '''
        Loads correct results from results file and checks whether 
        all the reads for a CDS listed in the file are active and
        whether all the other reads are inactive.
        '''
        cds2read = self._load_active_reads()

        for (cds, cds_aln) in self.cds_aln_cont.cds_repository.items():
            accession = cds.record_id
            mapped_reads = cds2read[accession]
            for cds_aln_subloc in cds_aln.aligned_regions.values():
                if cds_aln_subloc.active:
                    assert (cds_aln_subloc.read_id in mapped_reads)
                else:
                    assert (cds_aln_subloc.read_id not in mapped_reads)

    def testCdsAlignmentContainerConsistency(self):
        assert (Read2CDSSolver.test_cds_alignment_container_consistency(
            self.cds_aln_cont) == True)

    def _load_active_reads(self):
        results_fhandle = open(self.results_fpath)
        lines = iter(results_fhandle.readlines())
        cds2read_map = {}
        while (True):
            cds_id = next(lines, None)
            read_ids = next(lines, None)
            if not cds_id: break
            cds2read_map[cds_id.strip()] = read_ids.strip().split(';')
        results_fhandle.close()
        return cds2read_map
Example #10
0
class GreedySolverTest (unittest.TestCase):

    # setUp is executed before each test method
    def setUp(self):
        '''
        @param mock_db_fpath (str) path to syntheticaly created CDSs which serves
        to fill up mock database of records
        @param input_aln_fpath (str) path to input alignment file 
        @param results_fpath (str) path to file with generated correct results 
        greedy solver should generate
        '''        
        self.mock_db_fpath = './test/solver/read2cds/.test_data/cds.fa'
        self.input_aln_fpath = './test/solver/read2cds/.test_data/lisa.in'
        self.results_fpath = './test/solver/read2cds/.test_data/cds_ordering.txt'
#       Initialize read container
        self.read_cont = ReadContainer()
        self.read_cont.populate_from_aln_file(self.input_aln_fpath)
#       Initialize and fill record container
        self.db_query = MockDbQuery (self.mock_db_fpath)
        self.record_cont = RecordContainer()
        self.record_cont.set_db_access(self.db_query)
        self.record_cont.populate(self.read_cont.fetch_all_reads_versions())
        self.read_cont.populate_cdss(self.record_cont)
#       Initialize and fill up cds aln container
        self.cds_aln_cont = CdsAlnContainer()
        self.cds_aln_cont.populate(self.read_cont.fetch_all_reads())

        self.greedy_solver = GreedySolver()
        self.greedy_solver.map_reads_2_cdss(self.cds_aln_cont)


    def testAlignmentsCorrectlyInactivated(self):
        '''
        Loads correct results from results file and checks whether 
        all the reads for a CDS listed in the file are active and
        whether all the other reads are inactive.
        '''
        cds2read = self._load_active_reads()

        for (cds, cds_aln) in self.cds_aln_cont.cds_repository.items():
            accession = cds.record_id
            mapped_reads = cds2read[accession]
            for cds_aln_subloc in cds_aln.aligned_regions.values():
                if cds_aln_subloc.active:
                    assert (cds_aln_subloc.read_id in mapped_reads)
                else:
                    assert (cds_aln_subloc.read_id not in mapped_reads)

    def testCdsAlignmentContainerConsistency(self):
        assert(Read2CDSSolver.test_cds_alignment_container_consistency(self.cds_aln_cont) == True)

    def _load_active_reads (self):
        results_fhandle = open(self.results_fpath)
        lines = iter(results_fhandle.readlines())
        cds2read_map = {}
        while (True):
            cds_id = next(lines, None)
            read_ids = next(lines, None)
            if not cds_id: break
            cds2read_map[cds_id.strip()] = read_ids.strip().split(';')
        results_fhandle.close()
        return cds2read_map
Example #11
0
    db_access = DbQuery()
    tax_tree = TaxTree(args.tax_tree)
    host_determinator = HostDeterminator(dbquery=db_access,
                                         tax_tree=tax_tree)
    
    log.info("Started.")
    processing_start = timing.start()
    
    solver = Solver(host_determinator, read2cds_solver, tax_solver)

    # Populate read container
    # The read container type can now be determined from the input parameters
    # and injected into the Solver
    start = timing.start()
    read_container = ReadContainer()
    read_container.populate_from_aln_file(read_alignment_file=args.input)
    elapsed_time = timing.end(start)
    log.info("Populate read container - elapsed time: %s", 
             timing.humanize(elapsed_time))    
    
    # Populate record container
    # The record container type can now be determine from the input parameters
    # and injected into the Solver
    start = timing.start()
    record_container = RecordContainer()
    record_container.set_db_access(db_access)
    # Extract all records from database
    record_container.populate(read_container.fetch_all_reads_versions())
    elapsed_time = timing.end(start)
    log.info("Populate record container - elapsed time: %s", 
             timing.humanize(elapsed_time)) 
Example #12
0
from data.containers.record import RecordContainer
from data.containers.read import ReadContainer
from data.containers.cdsaln import CdsAlnContainer

from utils.logger import Logger

Logger("log")

db_query = DbQuery()

# create containers
record_container = RecordContainer()
record_container.set_db_access(db_query)

read_container = ReadContainer()
read_container.populate_from_aln_file("example_data/2reads.in")

record_container.populate(read_container)
read_container.populate_cdss(record_container)

cds_aln_container = CdsAlnContainer()
cds_aln_container.populate(read_container)

print cds_aln_container


r2c_solver = BestScoreSolver()
r2c_solver.map_reads_2_cdss(cds_aln_container)

print "Consistency test result: ", Read2CDSSolver.test_cds_alignment_container_consistency(cds_aln_container)
Example #13
0
    # Create database access
    db_access = DbQuery()
    tax_tree = TaxTree(args.tax_tree)
    host_determinator = HostDeterminator(dbquery=db_access, tax_tree=tax_tree)

    log.info("Started.")
    processing_start = timing.start()

    solver = Solver(host_determinator, read2cds_solver, tax_solver)

    # Populate read container
    # The read container type can now be determined from the input parameters
    # and injected into the Solver
    start = timing.start()
    read_container = ReadContainer()
    read_container.populate_from_aln_file(read_alignment_file=args.input)
    elapsed_time = timing.end(start)
    log.info("Populate read container - elapsed time: %s",
             timing.humanize(elapsed_time))

    # Populate record container
    # The record container type can now be determine from the input parameters
    # and injected into the Solver
    start = timing.start()
    record_container = RecordContainer()
    record_container.set_db_access(db_access)
    # Extract all records from database
    record_container.populate(read_container.fetch_all_reads_versions())
    elapsed_time = timing.end(start)
    log.info("Populate record container - elapsed time: %s",
             timing.humanize(elapsed_time))