Beispiel #1
0
    def test_sorter_with_sort_order_args(self):
        lines = [
            "chr1\t248956422\t112\t70\t71"
            "chr2\t242193529\t252513167\t70\t71",
            "chr3\t198295559\t498166716\t70\t71",
            "chr4\t190214555\t699295181\t70\t71",
            "chr5\t181538259\t892227221\t70\t71",
            "chr6\t170805979\t1076358996\t70\t71",
            "chr7\t159345973\t1249605173\t70\t71",
            "chr8\t145138636\t1411227630\t70\t71",
            "chr9\t138394717\t1558439788\t70\t71",
            "chr10\t133797422\t1698811686\t70\t71",
        ]
        fd, fn = tmp_file(lines=lines)

        sorter = MafSorter(
            sort_order_name=BarcodesAndCoordinate.name(),
            max_objects_in_ram=100,
            fasta_index=fn,
        )

        self.__test_sorter(sorter=sorter, chromosome="chr5")

        with self.assertRaises(ValueError):
            self.__test_sorter(sorter=sorter, chromosome="1")

        fd.close()
        os.remove(fn)
Beispiel #2
0
 def test_empty_file(self):
     fh, fn = tmp_file(lines=[])
     reader = LineReader(fh=fh)
     self.assertEqual(reader.peek_line(), "")
     self.assertEqual(reader.read_line(), "")
     self.assertEqual(reader.line_number(), 0)
     reader.close()
     os.remove(fn)
Beispiel #3
0
 def test_single_line(self):
     line = "A single line"
     fh, fn = tmp_file(lines=[line])
     reader = LineReader(fh=fh)
     self.assertEqual(reader.line_number(), 0)
     self.assertEqual(reader.peek_line(), line)
     self.assertEqual(reader.read_line(), line)
     self.assertEqual(reader.line_number(), 1)
     self.assertEqual(reader.peek_line(), "")
     self.assertEqual(reader.read_line(), "")
     self.assertEqual(reader.line_number(), 1)
     reader.close()
     os.remove(fn)
Beispiel #4
0
 def test_multiple_line(self):
     lines = ["A few", "good", "lines"]
     fh, fn = tmp_file(lines=lines)
     reader = LineReader(fh=fh)
     num_lines = 0
     for i, line in enumerate(reader):
         line_number = i + 1
         self.assertEqual(line, lines[i])
         self.assertEqual(line_number, reader.line_number())
         if line_number < len(lines):
             self.assertEqual(reader.peek_line(), lines[i + 1])
         num_lines += 1
     self.assertEqual(num_lines, len(lines))
     reader.close()
     os.remove(fn)
Beispiel #5
0
    def test_load_all_scheme_data(self):
        # silly test to make sure we can load ll the built-in scheme data
        filenames = get_built_in_filenames()
        data = load_all_scheme_data(
            filenames, column_types=TestSchemeFactory.column_types
        )
        self.assertTrue(len(data) > 1)

        # test malformed JSON
        fd, fn = tmp_file("blah blah")
        fd.close()
        with self.assertRaises(ValueError):
            load_all_schemes([fn])
        os.remove(fn)

        # no column types, so we can't find the column
        with self.assertRaises(ValueError) as e:
            data = load_all_scheme_data(filenames, column_types=[])
            self.assertTrue("Could not find a column type with name" in str(e))
Beispiel #6
0
    def test_reader_out_of_order(self):
        column_names = ["Chromosome", "Start_Position", "End_Position"]
        scheme = NoRestrictionsScheme(column_names)
        header_version = "%s%s %s" % (
            MafHeader.HeaderLineStartSymbol,
            MafHeader.VersionKey,
            scheme.version(),
        )
        header_sort_order = "%s%s %s" % (
            MafHeader.HeaderLineStartSymbol,
            MafHeader.SortOrderKey,
            Coordinate(),
        )

        lines = [
            header_version,
            header_sort_order,
            "\t".join(column_names),
            "\t".join(["A", "1", "1"]),
            "\t".join(["A", "4", "4"]),
            "\t".join(["A", "2", "2"]),
        ]

        fh, fn = tmp_file(lines=lines)
        fh.close()

        reader = MafReader.reader_from(
            path=fn,
            validation_stringency=ValidationStringency.Silent,
            scheme=scheme)

        self.assertEqual(reader.scheme().version(), scheme.version())
        self.assertEqual(reader.header().version(), scheme.version())
        self.assertEqual(reader.header().sort_order().name(),
                         Coordinate().name())

        with self.assertRaises(ValueError):
            records = [record for record in reader]

        reader.close()
Beispiel #7
0
    def test_from_line_reader_ok(self):
        fh, fn = tmp_file([
            TestMafHeader.__version_line,
            TestMafHeader.__annotation_line,
            TestMafHeader.__sort_order_line,
            "#key1 value1",
            "#key2 value2",
        ])
        line_reader = LineReader(fh)
        header = MafHeader.from_line_reader(
            line_reader=line_reader,
            validation_stringency=ValidationStringency.Silent)
        fh.close()

        self.assertTrue(len(header.validation_errors) == 0)
        self.assertTrue(len(header) == 5)
        self.assertEqual(
            list(header.keys()),
            [
                MafHeader.VersionKey,
                MafHeader.AnnotationSpecKey,
                MafHeader.SortOrderKey,
                "key1",
                "key2",
            ],
        )
        self.assertEqual(
            [str(record.value) for record in header.values()],
            [
                TestMafHeader.Version,
                TestMafHeader.AnnotationSpec,
                Coordinate.name(),
                "value1",
                "value2",
            ],
        )
        self.assertEqual(header.version(), TestMafHeader.Version)
        os.remove(fn)
Beispiel #8
0
    def test_reader_from_with_scheme(self):
        scheme = TestMafReader.TestScheme()
        header = "%s%s %s" % (
            MafHeader.HeaderLineStartSymbol,
            MafHeader.VersionKey,
            scheme.version(),
        )
        column_names = scheme.column_names()

        lines = [
            header,
            "\t".join(column_names),
            "\t".join(["cell-1-1", "1.314", "cell-1-2"]),
            "\t".join(["cell-2-1", "2.314", "cell-2-2"]),
            "\t".join(["cell-3-1", "3.314", "cell-3-2"]),
        ]

        fh, fn = tmp_file(lines=lines)
        fh.close()

        reader = MafReader.reader_from(
            path=fn,
            validation_stringency=ValidationStringency.Silent,
            scheme=scheme)
        records = [record for record in reader]

        self.assertEqual(reader.scheme().version(), scheme.version())
        self.assertEqual(reader.header().version(), scheme.version())
        self.assertEqual(len(reader.header()), 1)
        self.assertEqual(len(records), 3)
        self.assertListEqual([r["str1"].value for r in records],
                             ["cell-1-1", "cell-2-1", "cell-3-1"])
        self.assertListEqual([r["float"].value for r in records],
                             [1.314, 2.314, 3.314])

        reader.close()