def test_tabix_db(self): #init self.init_test('test_tabix_database') ucsc_ctrller = self.__create_ucsc_ctrller_instance() test_file = os.path.join(self.data_dir, 'test_tabix_database.txt') working_file = os.path.join(self.working_dir, 'test_tabix_database.txt') expected_out_file = os.path.join(self.working_dir, 'test_tabix_database.txt.gz') self.copy_file(test_file, working_file) #test if the 'tabix' files are produced out_file = ucsc_ctrller.tabix_db(working_file) self.assertEqual(expected_out_file, out_file, "Tabix doesn't work correctly") self.assertTrue(os.path.exists(out_file), "Tabix doesn't work correctly") self.assertTrue(os.path.exists(out_file+'.tbi'), "Tabix doesn't work correctly") #test if it is readable ucsc_reader = UcscReader() ucsc_reader.read(out_file) readable = False for rec in ucsc_reader.fetch_snps('chr3', 108572604, 108572605): readable = True self.assertEqual(rec.start_pos, '108572604', "Database tabixing doesn't work correctly") break self.assertTrue(readable, "Tabixed ucsc database is not readable")
class Referer(Configure): """To connect to reference database""" def __init__(self): Configure.__init__(self) def load_config(self): Configure.load_config(self) self.__ucsc_reader = UcscReader() self.__ucsc_reader.read(self.config_values[combivep_settings.LATEST_UCSC_FILE_NAME]) self.__ljb_reader = LjbReader() self.__ljb_reader.read(self.config_values[combivep_settings.LATEST_LJB_FILE_PREFIX] + '.txt.gz') def validate_snp(self, chrom, pos, ref, alt): """ This function checks if a given snp is valid by referencing with UCSC database The inputs of this function are in string format except "pos", which is integer. "chrom" can be either in format "chr1" or "1" "pos" is 1-based index return True if the snp is presented in UCSC reference database and False otherwise. """ for rec in self.__ucsc_reader.fetch_array_snps(chrom, int(pos)-1, int(pos)): if rec[combivep_settings.UCSC_0_INDEX_REF] != ref: continue if ref == alt: continue ucsc_alts = rec[combivep_settings.UCSC_0_INDEX_OBSERVED].split('/') for ucsc_alt in ucsc_alts: if ucsc_alt == alt: return True return False def get_scores(self, chrom, pos, ref, alt): """ This function returns precomputed prediction scores from LJB database The inputs of this function are in string format except "pos", which is integer. "chrom" can be either in format "chr1" or "1" "pos" is 1-based index return hash scores if the snp is precomputed and None otherwise """ return self.__ljb_reader.get_scores(chrom, pos, ref, alt)
def test_tabix_database(self): #init self.init_test('test_tabix_database') self.init_ucsc_controller_instance() test_file = os.path.join(self.data_dir, 'test_tabix_database.txt') working_file = os.path.join(self.working_dir, 'test_tabix_database.txt') expected_out_file = os.path.join(self.working_dir, 'test_tabix_database.txt.gz') self.copy_file(test_file, working_file) #test if the 'tabix' files are produced out_file = self.__ucsc_controller.tabix_database(working_file) self.assertEqual(expected_out_file, out_file, "Tabix doesn't work correctly") self.assertTrue(os.path.exists(out_file), "Tabix doesn't work correctly") self.assertTrue(os.path.exists(out_file+'.tbi'), "Tabix doesn't work correctly") #test if it is readable ucsc_reader = UcscReader() ucsc_reader.read(out_file) readable = False for rec in ucsc_reader.fetch_array_snps('chr3', 108572604, 108572605): readable = True self.assertEqual(rec[combivep_settings.UCSC_0_INDEX_START_POS], '108572604', "Database tabixing doesn't work correctly") break self.assertTrue(readable, "Tabixed ucsc database is not readable")