Exemple #1
0
def run_main(subcommand,
             lines,
             main_args=None,
             subcommand_args=None,
             to_stdout=False):
    in_fh, in_fn = tmp_file(lines=lines)
    in_fh.close()

    out_fh, out_fn = tempfile.mkstemp()

    if not main_args:
        main_args = []
    main_args.extend([subcommand, "--input", str(in_fn)])
    if not to_stdout:
        main_args.extend(["--output", str(out_fn)])
    if subcommand_args:
        main_args.extend(subcommand_args)

    with captured_output() as (stdout, stderr):
        main(args=main_args)

    fh = open(out_fn, "r")
    out_lines = [line.rstrip("\r\n") for line in fh]
    fh.close()

    stdout = stdout.getvalue().rstrip('\r\n').split("\n")
    stderr = stderr.getvalue().rstrip('\r\n').split("\n")

    os.remove(in_fn)
    os.remove(out_fn)

    return (out_lines, stdout, stderr)
Exemple #2
0
    def test_sorter_with_sort_order_args(self):
        lines = [
            "chr1\t248956422\t112\t70\t71"
            "chr2\t242193529\t252513167\t70\t71",
            "chr3\t198295559\t498166716\t70\t71",
            "chr4\t190214555\t699295181\t70\t71",
            "chr5\t181538259\t892227221\t70\t71",
            "chr6\t170805979\t1076358996\t70\t71",
            "chr7\t159345973\t1249605173\t70\t71",
            "chr8\t145138636\t1411227630\t70\t71",
            "chr9\t138394717\t1558439788\t70\t71",
            "chr10\t133797422\t1698811686\t70\t71"
        ]
        fd, fn = tmp_file(lines=lines)

        sorter = MafSorter(sort_order_name=BarcodesAndCoordinate.name(),
                           max_objects_in_ram=100,
                           fasta_index=fn)

        self.__test_sorter(sorter=sorter, chromosome="chr5")

        with self.assertRaises(ValueError):
            self.__test_sorter(sorter=sorter, chromosome="1")

        fd.close()
        os.remove(fn)
Exemple #3
0
    def test_with_contigs(self):
        lines = [
            "chr1\t248956422\t112\t70\t71"
            "chr2\t242193529\t252513167\t70\t71",
            "chr3\t198295559\t498166716\t70\t71",
            "chr4\t190214555\t699295181\t70\t71",
            "chr5\t181538259\t892227221\t70\t71",
            "chr6\t170805979\t1076358996\t70\t71",
            "chr7\t159345973\t1249605173\t70\t71",
            "chr8\t145138636\t1411227630\t70\t71",
            "chr9\t138394717\t1558439788\t70\t71",
            "chr10\t133797422\t1698811686\t70\t71"
        ]
        fd, fn = tmp_file(lines=lines)

        sort_order = Coordinate(fasta_index=fn)
        sort_key = sort_order.sort_key()

        r1 = TestCoordinateKey.DummyRecord("chr1", 1, 2)
        r2 = TestCoordinateKey.DummyRecord("chr10", 1, 3)
        r3 = TestCoordinateKey.DummyRecord("no-chr", 1, 3)

        # both have contigs defined
        self.__test_diff(r1, r2, sort_key=sort_key)

        # contig undefined
        with self.assertRaises(ValueError):
            k3 = sort_key(r3)

        fd.close()
        os.remove(fn)
Exemple #4
0
    def __sort(self, lines, extra_args, test_func, to_stdout=False):
        in_fh, in_fn = tmp_file(lines=lines)
        in_fh.close()

        out_fh, out_fn = tempfile.mkstemp()

        main_args = ["validate", "--input", str(in_fn)]
        if not to_stdout:
            main_args.extend(["--output", str(out_fn)])
        main_args.extend(extra_args)

        with captured_output() as (stdout, stderr):
            main(args=main_args)

        fh = open(out_fn, "r")
        out_lines = [line.rstrip("\r\n") for line in fh]
        fh.close()

        stdout = stdout.getvalue().rstrip('\r\n').split("\n")
        stderr = stderr.getvalue().rstrip('\r\n').split("\n")

        test_func(out_lines=out_lines, stdout=stdout, stderr=stderr)

        os.remove(in_fn)
        os.remove(out_fn)
Exemple #5
0
 def test_empty_file(self):
     fh, fn = tmp_file(lines=[])
     reader = LineReader(fh=fh)
     self.assertEqual(reader.peek_line(), "")
     self.assertEqual(reader.read_line(), "")
     self.assertEqual(reader.line_number(), 0)
     reader.close()
     os.remove(fn)
Exemple #6
0
 def test_single_line(self):
     line = "A single line"
     fh, fn = tmp_file(lines=[line])
     reader = LineReader(fh=fh)
     self.assertEqual(reader.line_number(), 0)
     self.assertEqual(reader.peek_line(), line)
     self.assertEqual(reader.read_line(), line)
     self.assertEqual(reader.line_number(), 1)
     self.assertEqual(reader.peek_line(), "")
     self.assertEqual(reader.read_line(), "")
     self.assertEqual(reader.line_number(), 1)
     reader.close()
     os.remove(fn)
Exemple #7
0
 def test_multiple_line(self):
     lines = ["A few", "good", "lines"]
     fh, fn = tmp_file(lines=lines)
     reader = LineReader(fh=fh)
     num_lines = 0
     for i, line in enumerate(reader):
         line_number = i + 1
         self.assertEqual(line, lines[i])
         self.assertEqual(line_number, reader.line_number())
         if line_number < len(lines):
             self.assertEqual(reader.peek_line(), lines[i+1])
         num_lines += 1
     self.assertEqual(num_lines, len(lines))
     reader.close()
     os.remove(fn)
Exemple #8
0
    def test_with_fasta_index(self):
        # change the order of chromosomes!
        fasta_index_lines = [
            "chr13\t114364328\t2106716512\t70\t71",
            "chr1\t248956422\t112\t70\t71"
        ]
        fd, fn = tmp_file(lines=fasta_index_lines)
        lines, header, records = self.read_test_maf()
        subcommand_args = [
            "--version",
            GdcV1_0_0_PublicScheme.version(), "--annotation",
            GdcV1_0_0_PublicScheme.annotation_spec()
        ]
        out_lines, stdout, stderr = run_main(subcommand="sort",
                                             lines=lines,
                                             subcommand_args=subcommand_args)

        # Check that we have the same # of records
        out_records = [line for line in out_lines \
                       if not line.startswith("#") and not line.startswith("Hugo_Symbol")]
        self.assertEqual(len(out_records), len(records))

        # Check that we added the sort pragma
        sortOrderLine = "%s%s %s" % (MafHeader.HeaderLineStartSymbol,
                                     MafHeader.SortOrderKey,
                                     BarcodesAndCoordinate.name())
        self.assertTrue(sortOrderLine in out_lines)

        scheme = find_scheme(
            version=GdcV1_0_0_PublicScheme.version(),
            annotation=GdcV1_0_0_PublicScheme.annotation_spec())
        # we should see chr13 before chr1
        self.assertEqual(len(out_lines) - 1, len(lines))  # added the pragma
        found_chr1 = False
        for line in out_lines:
            if line.startswith(MafHeader.HeaderLineStartSymbol):
                continue
            record = MafRecord.from_line(line=line, scheme=scheme)
            self.assertFalse(record["Chromosome"] == "chr13" and found_chr1)
            found_chr1 = record["Chromosome"] == "chr1"
        fd.close()
        os.remove(fn)
Exemple #9
0
    def test_from_line_reader_ok(self):
        fh, fn = tmp_file([
            TestMafHeader.__version_line, TestMafHeader.__annotation_line,
            TestMafHeader.__sort_order_line, "#key1 value1", "#key2 value2"
        ])
        line_reader = LineReader(fh)
        header = MafHeader.from_line_reader(
            line_reader=line_reader,
            validation_stringency=ValidationStringency.Silent)
        fh.close()

        self.assertTrue(len(header.validation_errors) == 0)
        self.assertTrue(len(header) == 5)
        self.assertEqual(list(header.keys()), [
            MafHeader.VersionKey, MafHeader.AnnotationSpecKey,
            MafHeader.SortOrderKey, "key1", "key2"
        ])
        self.assertEqual([str(record.value) for record in header.values()], [
            TestMafHeader.Version, TestMafHeader.AnnotationSpec,
            Coordinate.name(), "value1", "value2"
        ])
        self.assertEqual(header.version(), TestMafHeader.Version)
        os.remove(fn)