def test_sorter_with_sort_order_args(self): lines = [ "chr1\t248956422\t112\t70\t71" "chr2\t242193529\t252513167\t70\t71", "chr3\t198295559\t498166716\t70\t71", "chr4\t190214555\t699295181\t70\t71", "chr5\t181538259\t892227221\t70\t71", "chr6\t170805979\t1076358996\t70\t71", "chr7\t159345973\t1249605173\t70\t71", "chr8\t145138636\t1411227630\t70\t71", "chr9\t138394717\t1558439788\t70\t71", "chr10\t133797422\t1698811686\t70\t71", ] fd, fn = tmp_file(lines=lines) sorter = MafSorter( sort_order_name=BarcodesAndCoordinate.name(), max_objects_in_ram=100, fasta_index=fn, ) self.__test_sorter(sorter=sorter, chromosome="chr5") with self.assertRaises(ValueError): self.__test_sorter(sorter=sorter, chromosome="1") fd.close() os.remove(fn)
def test_empty_file(self): fh, fn = tmp_file(lines=[]) reader = LineReader(fh=fh) self.assertEqual(reader.peek_line(), "") self.assertEqual(reader.read_line(), "") self.assertEqual(reader.line_number(), 0) reader.close() os.remove(fn)
def test_single_line(self): line = "A single line" fh, fn = tmp_file(lines=[line]) reader = LineReader(fh=fh) self.assertEqual(reader.line_number(), 0) self.assertEqual(reader.peek_line(), line) self.assertEqual(reader.read_line(), line) self.assertEqual(reader.line_number(), 1) self.assertEqual(reader.peek_line(), "") self.assertEqual(reader.read_line(), "") self.assertEqual(reader.line_number(), 1) reader.close() os.remove(fn)
def test_multiple_line(self): lines = ["A few", "good", "lines"] fh, fn = tmp_file(lines=lines) reader = LineReader(fh=fh) num_lines = 0 for i, line in enumerate(reader): line_number = i + 1 self.assertEqual(line, lines[i]) self.assertEqual(line_number, reader.line_number()) if line_number < len(lines): self.assertEqual(reader.peek_line(), lines[i + 1]) num_lines += 1 self.assertEqual(num_lines, len(lines)) reader.close() os.remove(fn)
def test_load_all_scheme_data(self): # silly test to make sure we can load ll the built-in scheme data filenames = get_built_in_filenames() data = load_all_scheme_data( filenames, column_types=TestSchemeFactory.column_types ) self.assertTrue(len(data) > 1) # test malformed JSON fd, fn = tmp_file("blah blah") fd.close() with self.assertRaises(ValueError): load_all_schemes([fn]) os.remove(fn) # no column types, so we can't find the column with self.assertRaises(ValueError) as e: data = load_all_scheme_data(filenames, column_types=[]) self.assertTrue("Could not find a column type with name" in str(e))
def test_reader_out_of_order(self): column_names = ["Chromosome", "Start_Position", "End_Position"] scheme = NoRestrictionsScheme(column_names) header_version = "%s%s %s" % ( MafHeader.HeaderLineStartSymbol, MafHeader.VersionKey, scheme.version(), ) header_sort_order = "%s%s %s" % ( MafHeader.HeaderLineStartSymbol, MafHeader.SortOrderKey, Coordinate(), ) lines = [ header_version, header_sort_order, "\t".join(column_names), "\t".join(["A", "1", "1"]), "\t".join(["A", "4", "4"]), "\t".join(["A", "2", "2"]), ] fh, fn = tmp_file(lines=lines) fh.close() reader = MafReader.reader_from( path=fn, validation_stringency=ValidationStringency.Silent, scheme=scheme) self.assertEqual(reader.scheme().version(), scheme.version()) self.assertEqual(reader.header().version(), scheme.version()) self.assertEqual(reader.header().sort_order().name(), Coordinate().name()) with self.assertRaises(ValueError): records = [record for record in reader] reader.close()
def test_from_line_reader_ok(self): fh, fn = tmp_file([ TestMafHeader.__version_line, TestMafHeader.__annotation_line, TestMafHeader.__sort_order_line, "#key1 value1", "#key2 value2", ]) line_reader = LineReader(fh) header = MafHeader.from_line_reader( line_reader=line_reader, validation_stringency=ValidationStringency.Silent) fh.close() self.assertTrue(len(header.validation_errors) == 0) self.assertTrue(len(header) == 5) self.assertEqual( list(header.keys()), [ MafHeader.VersionKey, MafHeader.AnnotationSpecKey, MafHeader.SortOrderKey, "key1", "key2", ], ) self.assertEqual( [str(record.value) for record in header.values()], [ TestMafHeader.Version, TestMafHeader.AnnotationSpec, Coordinate.name(), "value1", "value2", ], ) self.assertEqual(header.version(), TestMafHeader.Version) os.remove(fn)
def test_reader_from_with_scheme(self): scheme = TestMafReader.TestScheme() header = "%s%s %s" % ( MafHeader.HeaderLineStartSymbol, MafHeader.VersionKey, scheme.version(), ) column_names = scheme.column_names() lines = [ header, "\t".join(column_names), "\t".join(["cell-1-1", "1.314", "cell-1-2"]), "\t".join(["cell-2-1", "2.314", "cell-2-2"]), "\t".join(["cell-3-1", "3.314", "cell-3-2"]), ] fh, fn = tmp_file(lines=lines) fh.close() reader = MafReader.reader_from( path=fn, validation_stringency=ValidationStringency.Silent, scheme=scheme) records = [record for record in reader] self.assertEqual(reader.scheme().version(), scheme.version()) self.assertEqual(reader.header().version(), scheme.version()) self.assertEqual(len(reader.header()), 1) self.assertEqual(len(records), 3) self.assertListEqual([r["str1"].value for r in records], ["cell-1-1", "cell-2-1", "cell-3-1"]) self.assertListEqual([r["float"].value for r in records], [1.314, 2.314, 3.314]) reader.close()