class RecordContainerTest (unittest.TestCase): def setUp(self): self.read_container = ReadContainer() self.record_container = RecordContainer() def tearUp(self): pass def testFillRecordContainer(self): '''Method to test whether record container populating works. Uses mock database access to test whether record container has correct number of items.''' aln_file = './test/solver/read2cds/.test_data/lisa.in' cds_fasta = './test/solver/read2cds/.test_data/cds.fa' db_access = MockDbQuery(cds_fasta) self.record_container.set_db_access(db_access) self.read_container.populate_from_aln_file(aln_file) self.record_container.populate( self.read_container.fetch_all_reads_versions()) records = self.record_container.fetch_all_records(format=list) self.assertEqual (len(db_access.records), len(records)) def testReturnsNoneForNonexistentRecord(self): record = self.record_container.fetch_existing_record("XXX") self.assertIsNone(record, "No record with version XXX should be found")
class BestScoreSolverTest(unittest.TestCase): # setUp is executed before each test method def setUp(self): ''' @param mock_db_fpath (str) path to syntheticaly created CDSs which serves to fill up mock database of records @param input_aln_fpath (str) path to input alignment file @param results_fpath (str) path to file with generated correct results greedy solver should generate ''' self.mock_db_fpath = './test/solver/read2cds/.test_data/cds.fa' self.input_aln_fpath = './test/solver/read2cds/.test_data/lisa.in' self.results_fpath = './test/solver/read2cds/.test_data/cds_ordering.txt' # Initialize read container self.read_cont = ReadContainer() self.read_cont.populate_from_aln_file(self.input_aln_fpath) # Initialize and fill record container self.db_query = MockDbQuery(self.mock_db_fpath) self.record_cont = RecordContainer() self.record_cont.set_db_access(self.db_query) self.record_cont.populate(self.read_cont.fetch_all_reads_versions()) self.read_cont.populate_cdss(self.record_cont) # Initialize and fill up cds aln container self.cds_aln_cont = CdsAlnContainer() self.cds_aln_cont.populate(self.read_cont.fetch_all_reads()) self.bs_solver = BestScoreSolver() self.bs_solver.map_reads_2_cdss(self.cds_aln_cont) def testCdsAlignmentContainerConsistency(self): assert (Read2CDSSolver.test_cds_alignment_container_consistency( self.cds_aln_cont) == True)
class RecordContainerTest(unittest.TestCase): def setUp(self): self.read_container = ReadContainer() self.record_container = RecordContainer() def tearUp(self): pass def testFillRecordContainer(self): '''Method to test whether record container populating works. Uses mock database access to test whether record container has correct number of items.''' aln_file = './test/solver/read2cds/.test_data/lisa.in' cds_fasta = './test/solver/read2cds/.test_data/cds.fa' db_access = MockDbQuery(cds_fasta) self.record_container.set_db_access(db_access) self.read_container.populate_from_aln_file(aln_file) self.record_container.populate( self.read_container.fetch_all_reads_versions()) records = self.record_container.fetch_all_records(format=list) self.assertEqual(len(db_access.records), len(records)) def testReturnsNoneForNonexistentRecord(self): record = self.record_container.fetch_existing_record("XXX") self.assertIsNone(record, "No record with version XXX should be found")
class ReadContainerTest(unittest.TestCase): def setUp(self): self.read_cont = ReadContainer() self.aln_file = './test/data/containers/.data/example.in' def tearUp(self): pass def testReadCount(self): ''' Tests whether the number of reads in the read container is consistent with the number of reads in the alignment file. ''' self.read_cont.populate_from_aln_file(self.aln_file) reads_cont = self.read_cont.read_repository.keys() reads_from_file = self._load_read_ids() for read_id in reads_from_file: self.assertTrue(read_id in reads_cont, msg="Read ID %s not in read container." % read_id) self.assertEqual(len(reads_cont), 100) def testCorrectAlignmentNumber(self): ''' Test the loader for correct number of alignments. Test file organized so that read ID specifies number of alignments. ''' aln_file = './test/data/containers/.data/aln_num.in' self.read_cont.populate_from_aln_file(aln_file) for (read_id, read) in self.read_cont.read_repository.items(): self.assertEqual(int(read_id), len(read.get_alignments())) def _load_read_ids(self): aln_fhandle = open(self.aln_file, 'r') nextline = aln_fhandle.readline read_ids = [] while (True): line = nextline() if not line: break read_id = line.split(',')[0] if read_id.startswith('@'): read_id = read_id[1:] read_ids.append(read_id) aln_fhandle.close() return read_ids
class ReadContainerTest(unittest.TestCase): def setUp(self): self.read_cont = ReadContainer() self.aln_file = './test/data/containers/.data/example.in' def tearUp(self): pass def testReadCount(self): ''' Tests whether the number of reads in the read container is consistent with the number of reads in the alignment file. ''' self.read_cont.populate_from_aln_file(self.aln_file) reads_cont = self.read_cont.read_repository.keys() reads_from_file = self._load_read_ids() for read_id in reads_from_file: self.assertTrue(read_id in reads_cont, msg = "Read ID %s not in read container." % read_id) self.assertEqual(len(reads_cont), 100) def testCorrectAlignmentNumber (self): ''' Test the loader for correct number of alignments. Test file organized so that read ID specifies number of alignments. ''' aln_file = './test/data/containers/.data/aln_num.in' self.read_cont.populate_from_aln_file(aln_file) for (read_id, read) in self.read_cont.read_repository.items(): self.assertEqual(int(read_id), len(read.get_alignments())) def _load_read_ids (self): aln_fhandle = open(self.aln_file, 'r') nextline = aln_fhandle.readline read_ids = [] while(True): line = nextline() if not line: break read_id = line.split(',')[0] if read_id.startswith('@'): read_id = read_id[1:] read_ids.append(read_id) aln_fhandle.close() return read_ids
class StatisticsTest (unittest.TestCase): # setUp is executed before each test method def setUp(self): ''' @param mock_db_fpath (str) path to syntheticaly created CDSs which serves to fill up mock database of records @param input_aln_fpath (str) path to input alignment file @param results_fpath (str) path to file with generated correct results greedy solver should generate ''' self.mock_db_fpath = './test/statistics/.test_data/cds.fa' self.input_aln_fpath = './test/statistics/.test_data/lisa.in' # Initialize read container self.read_cont = ReadContainer() self.read_cont.populate_from_aln_file(self.input_aln_fpath) # Initialize and fill record container self.db_query = MockDbQuery (self.mock_db_fpath) self.record_cont = RecordContainer() self.record_cont.set_db_access(self.db_query) self.record_cont.populate(self.read_cont.fetch_all_reads_versions()) self.read_cont.populate_cdss(self.record_cont) # Initialize and fill up cds aln container self.cds_aln_cont = CdsAlnContainer() self.cds_aln_cont.populate(self.read_cont.fetch_all_reads()) def testStatistics(self): assert(num_read_alns(self.read_cont) == 22) assert(num_active_aligned_regions(self.cds_aln_cont) == 22) assert(num_inactive_read_alns(self.read_cont) == 0) self.bs_solver = BestScoreSolver() self.bs_solver.map_reads_2_cdss(self.cds_aln_cont) records_stats = count_alns_to_record_and_cds(self.read_cont) print "Number of records for which we have stats: %d\n" % len(records_stats) for rec_stat in records_stats.values(): rec_stat.print_data() assert(num_active_aligned_regions(self.cds_aln_cont) == 16) assert(num_cdss(self.cds_aln_cont) == 4) assert(num_cdss_with_no_alns(self.cds_aln_cont) == 0)
class StatisticsTest(unittest.TestCase): # setUp is executed before each test method def setUp(self): ''' @param mock_db_fpath (str) path to syntheticaly created CDSs which serves to fill up mock database of records @param input_aln_fpath (str) path to input alignment file @param results_fpath (str) path to file with generated correct results greedy solver should generate ''' self.mock_db_fpath = './test/statistics/.test_data/cds.fa' self.input_aln_fpath = './test/statistics/.test_data/lisa.in' # Initialize read container self.read_cont = ReadContainer() self.read_cont.populate_from_aln_file(self.input_aln_fpath) # Initialize and fill record container self.db_query = MockDbQuery(self.mock_db_fpath) self.record_cont = RecordContainer() self.record_cont.set_db_access(self.db_query) self.record_cont.populate(self.read_cont.fetch_all_reads_versions()) self.read_cont.populate_cdss(self.record_cont) # Initialize and fill up cds aln container self.cds_aln_cont = CdsAlnContainer() self.cds_aln_cont.populate(self.read_cont.fetch_all_reads()) def testStatistics(self): assert (num_read_alns(self.read_cont) == 22) assert (num_active_aligned_regions(self.cds_aln_cont) == 22) assert (num_inactive_read_alns(self.read_cont) == 0) self.bs_solver = BestScoreSolver() self.bs_solver.map_reads_2_cdss(self.cds_aln_cont) records_stats = count_alns_to_record_and_cds(self.read_cont) print "Number of records for which we have stats: %d\n" % len( records_stats) for rec_stat in records_stats.values(): rec_stat.print_data() assert (num_active_aligned_regions(self.cds_aln_cont) == 16) assert (num_cdss(self.cds_aln_cont) == 4) assert (num_cdss_with_no_alns(self.cds_aln_cont) == 0)
def fill_containers (alignment_file, db_access): ''' Populates read, record and CDS alignment container. @return tuple(ReadContainer, RecordContainer, CdsAlnContainer) ''' read_cont = ReadContainer() record_cont = RecordContainer() record_cont.set_db_access(db_access) cdsaln_cont = CdsAlnContainer() # 1. Load all the information available in the alignment file read_cont.populate_from_aln_file(alignment_file) # 2. Fetch all the records reported in the alignment file from the database record_cont.populate(read_cont.fetch_all_reads_versions()) # 3. Find to which coding sequences reads map read_cont.populate_cdss(record_cont) # 4. Populate Cds Alignment container cdsaln_cont.populate(read_cont.fetch_all_reads()) return (read_cont, record_cont, cdsaln_cont)
class GreedySolverTest(unittest.TestCase): # setUp is executed before each test method def setUp(self): ''' @param mock_db_fpath (str) path to syntheticaly created CDSs which serves to fill up mock database of records @param input_aln_fpath (str) path to input alignment file @param results_fpath (str) path to file with generated correct results greedy solver should generate ''' self.mock_db_fpath = './test/solver/read2cds/.test_data/cds.fa' self.input_aln_fpath = './test/solver/read2cds/.test_data/lisa.in' self.results_fpath = './test/solver/read2cds/.test_data/cds_ordering.txt' # Initialize read container self.read_cont = ReadContainer() self.read_cont.populate_from_aln_file(self.input_aln_fpath) # Initialize and fill record container self.db_query = MockDbQuery(self.mock_db_fpath) self.record_cont = RecordContainer() self.record_cont.set_db_access(self.db_query) self.record_cont.populate(self.read_cont.fetch_all_reads_versions()) self.read_cont.populate_cdss(self.record_cont) # Initialize and fill up cds aln container self.cds_aln_cont = CdsAlnContainer() self.cds_aln_cont.populate(self.read_cont.fetch_all_reads()) self.greedy_solver = GreedySolver() self.greedy_solver.map_reads_2_cdss(self.cds_aln_cont) def testAlignmentsCorrectlyInactivated(self): ''' Loads correct results from results file and checks whether all the reads for a CDS listed in the file are active and whether all the other reads are inactive. ''' cds2read = self._load_active_reads() for (cds, cds_aln) in self.cds_aln_cont.cds_repository.items(): accession = cds.record_id mapped_reads = cds2read[accession] for cds_aln_subloc in cds_aln.aligned_regions.values(): if cds_aln_subloc.active: assert (cds_aln_subloc.read_id in mapped_reads) else: assert (cds_aln_subloc.read_id not in mapped_reads) def testCdsAlignmentContainerConsistency(self): assert (Read2CDSSolver.test_cds_alignment_container_consistency( self.cds_aln_cont) == True) def _load_active_reads(self): results_fhandle = open(self.results_fpath) lines = iter(results_fhandle.readlines()) cds2read_map = {} while (True): cds_id = next(lines, None) read_ids = next(lines, None) if not cds_id: break cds2read_map[cds_id.strip()] = read_ids.strip().split(';') results_fhandle.close() return cds2read_map
class GreedySolverTest (unittest.TestCase): # setUp is executed before each test method def setUp(self): ''' @param mock_db_fpath (str) path to syntheticaly created CDSs which serves to fill up mock database of records @param input_aln_fpath (str) path to input alignment file @param results_fpath (str) path to file with generated correct results greedy solver should generate ''' self.mock_db_fpath = './test/solver/read2cds/.test_data/cds.fa' self.input_aln_fpath = './test/solver/read2cds/.test_data/lisa.in' self.results_fpath = './test/solver/read2cds/.test_data/cds_ordering.txt' # Initialize read container self.read_cont = ReadContainer() self.read_cont.populate_from_aln_file(self.input_aln_fpath) # Initialize and fill record container self.db_query = MockDbQuery (self.mock_db_fpath) self.record_cont = RecordContainer() self.record_cont.set_db_access(self.db_query) self.record_cont.populate(self.read_cont.fetch_all_reads_versions()) self.read_cont.populate_cdss(self.record_cont) # Initialize and fill up cds aln container self.cds_aln_cont = CdsAlnContainer() self.cds_aln_cont.populate(self.read_cont.fetch_all_reads()) self.greedy_solver = GreedySolver() self.greedy_solver.map_reads_2_cdss(self.cds_aln_cont) def testAlignmentsCorrectlyInactivated(self): ''' Loads correct results from results file and checks whether all the reads for a CDS listed in the file are active and whether all the other reads are inactive. ''' cds2read = self._load_active_reads() for (cds, cds_aln) in self.cds_aln_cont.cds_repository.items(): accession = cds.record_id mapped_reads = cds2read[accession] for cds_aln_subloc in cds_aln.aligned_regions.values(): if cds_aln_subloc.active: assert (cds_aln_subloc.read_id in mapped_reads) else: assert (cds_aln_subloc.read_id not in mapped_reads) def testCdsAlignmentContainerConsistency(self): assert(Read2CDSSolver.test_cds_alignment_container_consistency(self.cds_aln_cont) == True) def _load_active_reads (self): results_fhandle = open(self.results_fpath) lines = iter(results_fhandle.readlines()) cds2read_map = {} while (True): cds_id = next(lines, None) read_ids = next(lines, None) if not cds_id: break cds2read_map[cds_id.strip()] = read_ids.strip().split(';') results_fhandle.close() return cds2read_map
db_access = DbQuery() tax_tree = TaxTree(args.tax_tree) host_determinator = HostDeterminator(dbquery=db_access, tax_tree=tax_tree) log.info("Started.") processing_start = timing.start() solver = Solver(host_determinator, read2cds_solver, tax_solver) # Populate read container # The read container type can now be determined from the input parameters # and injected into the Solver start = timing.start() read_container = ReadContainer() read_container.populate_from_aln_file(read_alignment_file=args.input) elapsed_time = timing.end(start) log.info("Populate read container - elapsed time: %s", timing.humanize(elapsed_time)) # Populate record container # The record container type can now be determine from the input parameters # and injected into the Solver start = timing.start() record_container = RecordContainer() record_container.set_db_access(db_access) # Extract all records from database record_container.populate(read_container.fetch_all_reads_versions()) elapsed_time = timing.end(start) log.info("Populate record container - elapsed time: %s", timing.humanize(elapsed_time))
from data.containers.record import RecordContainer from data.containers.read import ReadContainer from data.containers.cdsaln import CdsAlnContainer from utils.logger import Logger Logger("log") db_query = DbQuery() # create containers record_container = RecordContainer() record_container.set_db_access(db_query) read_container = ReadContainer() read_container.populate_from_aln_file("example_data/2reads.in") record_container.populate(read_container) read_container.populate_cdss(record_container) cds_aln_container = CdsAlnContainer() cds_aln_container.populate(read_container) print cds_aln_container r2c_solver = BestScoreSolver() r2c_solver.map_reads_2_cdss(cds_aln_container) print "Consistency test result: ", Read2CDSSolver.test_cds_alignment_container_consistency(cds_aln_container)
# Create database access db_access = DbQuery() tax_tree = TaxTree(args.tax_tree) host_determinator = HostDeterminator(dbquery=db_access, tax_tree=tax_tree) log.info("Started.") processing_start = timing.start() solver = Solver(host_determinator, read2cds_solver, tax_solver) # Populate read container # The read container type can now be determined from the input parameters # and injected into the Solver start = timing.start() read_container = ReadContainer() read_container.populate_from_aln_file(read_alignment_file=args.input) elapsed_time = timing.end(start) log.info("Populate read container - elapsed time: %s", timing.humanize(elapsed_time)) # Populate record container # The record container type can now be determine from the input parameters # and injected into the Solver start = timing.start() record_container = RecordContainer() record_container.set_db_access(db_access) # Extract all records from database record_container.populate(read_container.fetch_all_reads_versions()) elapsed_time = timing.end(start) log.info("Populate record container - elapsed time: %s", timing.humanize(elapsed_time))