def test_parse_lrg5(): """Test that all expected exon labels are parsed from LRG files.""" # Get exons for LRG 5 lrg5_root = lrg_parser.set_root(lrg_parser.parse_file('data/LRG_5.xml')) lrg5_exon_data_tuple = (lrg_parser.lrg_parse(lrg5_root)) lrg5_exons = list(zip(*lrg5_exon_data_tuple))[0] # Get exon label truthset from local data lrg5_truthset = open('data/LRG5_exons.list', 'r').read().split(",") assert set(lrg5_exons) == set(lrg5_truthset)
def test_parse_lrg1(): """Test that all expected exon labels are parsed from LRG files.""" # Get exons for LRG 1 lrg1_root = lrg_parser.set_root(lrg_parser.parse_file('data/LRG_1.xml')) lrg1_exon_data_tuple = (lrg_parser.lrg_parse(lrg1_root)) # Exon labels are the first element of each tuple in returned by lrg_parser.lrg_parse(). # zip() is used to merge the tuples by matching element indexes. e.g.: # a = [(1,2,3), (4,5,6)] # zip(*a) = [(1,4), (2,5), (3,6)] lrg1_exons = list(zip(*lrg1_exon_data_tuple))[0] # Get exon label truthset from local data lrg1_truthset = open('data/LRG1_exons.list', 'r').read().split(",") assert set(lrg1_exons) == set(lrg1_truthset)
def test_convert_lrg5(): """Assert LRG_5 exon regions are accurately converted to GRCh38.p12 coordinates""" lrg5_root = lrg_parser.set_root(lrg_parser.parse_file('data/LRG_5.xml')) lrg5_exon_data_tuple = (lrg_parser.lrg_parse(lrg5_root)) lrg5_coordinates = lrg_parser.convert_coords(lrg5_root, lrg5_exon_data_tuple) lrg5_coords_truthset_wheaders = [ tuple(line.strip().split(",")) for line in open( 'data/LRG_5_GRCh38_p12_coordinates.csv', 'r').readlines() ] # Remove lines from truthset file that start with '#'. These lines contain headers of the truthset columns lrg5_coords_truthset = list( filter(lambda x: not x[0].startswith('#'), lrg5_coords_truthset_wheaders)) print(lrg5_coordinates) for index in range(len(lrg5_coords_truthset)): assert lrg5_coordinates[index][1] == lrg5_coords_truthset[index][1] assert lrg5_coordinates[index][2] == lrg5_coords_truthset[index][2]
def test_set_root_from_name(): """Test that the xml data string objects are converted to root objects for parsing for LRG number inputs""" root = lrg_parser.set_root(lrg_parser.get_file('LRG_1')) assert isinstance(root, ET.Element)
def test_set_root_from_file(): """Test that the xml data string objects are converted to root objects for parsing for file inputs""" root = lrg_parser.set_root(lrg_parser.parse_file('data/LRG_1.xml')) assert isinstance(root, ET.Element)