Example #1
0
    def test_from_reader(self):
        scheme = GdcV1_0_0_ProtectedScheme()

        lines = [
            TestMafHeader.__version_line,
            TestMafHeader.__annotation_line,
            TestMafHeader.__sort_order_line,
        ]
        reader = MafReader(
            lines=lines,
            validation_stringency=ValidationStringency.Silent,
            scheme=scheme,
        )
        reader.close()

        # No overrides
        header = MafHeader.from_reader(reader=reader)
        self.assertEqual(header.scheme().version(), scheme.version())
        self.assertEqual(header.scheme().annotation_spec(),
                         scheme.annotation_spec())
        self.assertEqual(header.sort_order().name(), Coordinate.name())

        # Override version and annotation
        scheme = GdcV1_0_0_PublicScheme()
        header = MafHeader.from_reader(
            reader=reader,
            version=scheme.version(),
            annotation=scheme.annotation_spec(),
            sort_order=sort_order.Unsorted().name(),
        )
        self.assertEqual(header.scheme().version(), scheme.version())
        self.assertEqual(header.scheme().annotation_spec(),
                         scheme.annotation_spec())
        self.assertEqual(header.sort_order().name(),
                         sort_order.Unsorted().name())
Example #2
0
    def setup_maf_header(self):
        """
        Sets up the maf header.
        """
        # Reader header
        _hdr = MafHeader.from_reader(reader=self.maf_reader)

        if not self.options["reference_fasta_index"]:
            self.maf_header = MafHeader.from_defaults(
                version=self.options["version"],
                annotation=self.options["annotation"],
                sort_order=BarcodesAndCoordinate(),
                contigs=_hdr.contigs(),
            )
        else:
            self.maf_header = MafHeader.from_defaults(
                version=self.options["version"],
                annotation=self.options["annotation"],
                sort_order=BarcodesAndCoordinate(),
                fasta_index=self.options["reference_fasta_index"],
            )
        self.maf_header.validation_stringency = ValidationStringency.Strict

        header_date = BaseRunner.get_header_date()
        self.maf_header[header_date.key] = header_date

        try:
            nkey = _hdr["normal.aliquot"]
            self.maf_header["normal.aliquot"] = nkey
        except KeyError as e:
            if not self.options["tumor_only"]:
                raise e

        tkey = _hdr["tumor.aliquot"]
        self.maf_header["tumor.aliquot"] = tkey
Example #3
0
    def test_from_lines_strict_raises_on_error(self):
        """
        Checks that the first error encountered is raised.
        """
        lines = ["#key1 value", "#key1 value"]
        with self.assertRaises(MafFormatException) as context:
            MafHeader.from_lines(
                lines=lines, validation_stringency=ValidationStringency.Strict)

        self.assertIn("Multiple header lines", str(context.exception))
        self.assertEqual(context.exception.tpe,
                         MafValidationErrorType.HEADER_DUPLICATE_KEYS)
Example #4
0
    def test_record_validation_error(self):
        scheme = TestMafWriter.TestScheme()
        fd, path = tempfile.mkstemp()

        # Create the header
        header_lines = (MafHeader.scheme_header_lines(scheme) +
                        ["#key1 value1", "#key2 value2"] +
                        ["str1\tNone\tstr2"])
        header = MafHeader.from_lines(
            lines=header_lines,
            validation_stringency=ValidationStringency.Silent)

        # Create the record
        values = ["string2", "error", "string1"]
        record_line = MafRecord.ColumnSeparator.join(values)
        record = MafRecord.from_line(
            line=record_line,
            scheme=scheme,
            line_number=1,
            validation_stringency=ValidationStringency.Silent,
        )

        # Write the header, and the record twice
        with captured_output() as (stdout, stderr):
            writer = MafWriter.from_path(
                header=header,
                validation_stringency=ValidationStringency.Lenient,
                path=path,
            )
            writer += record
            writer.write(record)
            writer.close()
        stdout = stdout.getvalue().rstrip('\r\n').split("\n")
        stderr = stderr.getvalue().rstrip('\r\n').split("\n")
        self.assertListEqual(stdout, [''])

        # The errors that should be written stderr
        errors = [
            "HEADER_UNSUPPORTED_VERSION",
            "HEADER_UNSUPPORTED_ANNOTATION_SPEC",
            "RECORD_COLUMN_WITH_NO_VALUE",
            "RECORD_COLUMN_WITH_NO_VALUE",
        ]
        self.assertListEqualAndIn(errors, stderr)

        # The second column should be None
        err_record_line = record_line.replace("error", "None")
        self.assertListEqual(read_lines(path),
                             header_lines + [err_record_line, err_record_line])
    def setup_maf_header(self):
        """
        Sets up the maf header.
        """
        self.maf_header = MafHeader.from_defaults(
            version=self.options["version"],
            annotation=self.options["annotation"],
            sort_order=BarcodesAndCoordinate(),
            fasta_index=self.options["reference_fasta_index"],
        )

        header_date = BaseRunner.get_header_date()
        self.maf_header[header_date.key] = header_date

        if not self.options["tumor_only"]:
            normal_aliquot = MafHeaderRecord(
                key="normal.aliquot",
                value=self.options["normal_aliquot_uuid"]
                if not self.options["tumor_only"]
                else "",
            )
            self.maf_header[normal_aliquot.key] = normal_aliquot

        tumor_aliquot = MafHeaderRecord(
            key="tumor.aliquot", value=self.options["tumor_aliquot_uuid"]
        )
        self.maf_header[tumor_aliquot.key] = tumor_aliquot
Example #6
0
    def test_str(self):
        version = MafHeaderRecord(MafHeader.VersionKey, TestMafHeader.Version)
        record1 = MafHeaderRecord("key1", "value1")
        record2 = MafHeaderRecord("key2", "value2")
        header = MafHeader()

        version_line = "%s%s %s" % (
            MafHeader.HeaderLineStartSymbol,
            MafHeader.VersionKey,
            TestMafHeader.Version,
        )
        record1_line = "%s%s %s" % (
            MafHeader.HeaderLineStartSymbol,
            record1.key,
            record1.value,
        )
        record2_line = "%s%s %s" % (
            MafHeader.HeaderLineStartSymbol,
            record2.key,
            record2.value,
        )

        self.assertEqual(str(header), "")

        header[version.key] = version
        self.assertEqual(str(header), version_line)

        header[record1.key] = record1
        self.assertEqual(str(header), "%s\n%s" % (version_line, record1_line))

        header[record2.key] = record2
        self.assertEqual(
            str(header),
            "%s\n%s\n%s" % (version_line, record1_line, record2_line))
Example #7
0
def writer_from_reader(reader, options):
    """
    Builds a writer from the given reader and command line options.
    :param options: the command line options, which should have "output",
    "version", and "annotation" defined.
    :param reader: the reader from which to records will be obtained
    :return:
    """
    out_header = MafHeader.from_reader(
        reader=reader,
        version=options.version,
        annotation=options.annotation,
        sort_order=options.sort_order \
            if hasattr(options, 'sort_order') else None
    )

    if options.output:
        writer = MafWriter.from_path(
            path=options.output,
            header=out_header,
            validation_stringency=options.validation_stringency)
    else:
        writer = MafWriter.from_fd(
            desc=sys.stdout,
            header=out_header,
            validation_stringency=options.validation_stringency)
    return writer
Example #8
0
 def test_from_lines_default_to_basic(self):
     lines = [TestMafHeader.__version_line, "#key1 value", "#key2 value"]
     header = MafHeader.from_lines(
         lines=lines, validation_stringency=ValidationStringency.Silent)
     self.assertEqual(len(header.validation_errors), 0)
     self.assertIsNone(header.annotation())
     self.assertIsNotNone(header.scheme())
     self.assertIsNotNone(header.scheme().annotation_spec())
Example #9
0
    def test_from_lines_no_sort_order(self):
        lines = [TestMafHeader.__version_line, TestMafHeader.__annotation_line]
        header = MafHeader.from_lines(
            lines=lines, validation_stringency=ValidationStringency.Silent)

        self.assertTrue(len(header.validation_errors) == 0)
        self.assertEqual(header.sort_order().name(),
                         sort_order.Unsorted.name())
Example #10
0
    def test_empty_file(self):
        fd, path = tempfile.mkstemp()

        # No logging to stderr/stdout
        with captured_output() as (stdout, stderr):
            writer = MafWriter.from_path(
                path=path,
                header=MafHeader(),
                validation_stringency=ValidationStringency.Silent,
            )
            writer.close()
            self.assertEqual(read_lines(path), [])
            self.assertEqual(str(writer.header()), "")
        stdout = stdout.getvalue().rstrip('\r\n').split("\n")
        stderr = stderr.getvalue().rstrip('\r\n').split("\n")
        self.assertListEqual(stdout, [''])
        self.assertListEqual(stderr, [''])

        # Logging to stderr/stdout
        with captured_output() as (stdout, stderr):
            writer = MafWriter.from_path(
                path=path,
                header=MafHeader(),
                validation_stringency=ValidationStringency.Lenient,
            )
            writer.close()
            self.assertEqual(read_lines(path), [])
            self.assertEqual(str(writer.header()), "")
        stdout = stdout.getvalue().rstrip('\r\n').split("\n")
        stderr = stderr.getvalue().rstrip('\r\n').split("\n")
        self.assertListEqual(stdout, [''])
        self.assertListEqualAndIn(
            ['HEADER_MISSING_VERSION', 'HEADER_MISSING_ANNOTATION_SPEC'],
            stderr)

        #  Exceptions
        with captured_output():
            with self.assertRaises(MafFormatException) as context:
                writer = MafWriter.from_path(
                    path=path,
                    header=MafHeader(),
                    validation_stringency=ValidationStringency.Strict,
                )
            self.assertEqual(context.exception.tpe,
                             MafValidationErrorType.HEADER_MISSING_VERSION)
Example #11
0
 def test_from_defaults(self):
     header = MafHeader.from_defaults(
         version=TestMafHeader.Scheme.version(),
         annotation=TestMafHeader.Scheme.annotation_spec(),
         sort_order=Coordinate(),
     )
     self.assertIsNotNone(header.scheme())
     self.assertIsNotNone(header.scheme().version())
     self.assertIsNotNone(header.scheme().annotation_spec())
     self.assertIsNotNone(header.sort_order())
Example #12
0
    def add_records(self):
        scheme = TestMafWriter.TestScheme()
        fd, path = tempfile.mkstemp()

        header_lines = MafHeader.scheme_header_lines(scheme) + [
            "#key1 value1",
            "#key2 value2",
        ]
        header = MafHeader.from_lines(lines=header_lines)
        writer = MafWriter.from_path(header=header, path=path)
        values = ["string2", "3.14", "string1"]
        record_line = MafRecord.ColumnSeparator.join(values)
        record = MafRecord.from_line(line=record_line,
                                     scheme=scheme,
                                     line_number=1)
        writer += record
        writer.write(record)
        writer.close()

        self.assertListEqual(read_lines(path),
                             header_lines + [record_line, record_line])
    def setup_maf_header(self):
        """
        Sets up the maf header.
        """
        # Reader header
        _hdr = MafHeader.from_reader(reader=self.maf_readers[0])

        self.maf_header = MafHeader.from_defaults(
            version=self.options['version'],
            annotation=self.options['annotation'],
            sort_order=BarcodesAndCoordinate(),
            contigs=_hdr.contigs())
        self.maf_header.validation_stringency = ValidationStringency.Strict

        header_date = BaseRunner.get_header_date()
        self.maf_header[header_date.key] = header_date

        nkey = _hdr["normal.aliquot"]
        self.maf_header["normal.aliquot"] = nkey
        tkey = _hdr["tumor.aliquot"]
        self.maf_header["tumor.aliquot"] = tkey
Example #14
0
    def test_with_sorting(self):
        scheme = TestMafWriter.TestCoordinateScheme()
        fd, path = tempfile.mkstemp()

        # Create the header
        header_lines = (MafHeader.scheme_header_lines(scheme) +
                        ["#key1 value1", "#key2 value2"] + [
                            "%s%s %s" % (
                                MafHeader.HeaderLineStartSymbol,
                                MafHeader.SortOrderKey,
                                Coordinate().name(),
                            )
                        ] + ["\t".join(scheme.column_names())])
        header = MafHeader.from_lines(
            lines=header_lines,
            validation_stringency=ValidationStringency.Silent)

        # Write the header, and the record twice
        writer = MafWriter.from_path(
            header=header,
            validation_stringency=ValidationStringency.Lenient,
            path=path,
            assume_sorted=False,
        )
        writer += TestMafWriter.DummyRecord("chr1", 2, 2)
        writer += TestMafWriter.DummyRecord("chr1", 3, 3)
        writer += TestMafWriter.DummyRecord("chr1", 4, 4)
        writer.close()

        reader = MafReader.reader_from(path=path, scheme=scheme)
        header = reader.header()
        records = [rec for rec in reader]
        reader.close()

        self.assertEqual(header.sort_order().name(), Coordinate.name())

        self.assertListEqual([r["Start_Position"].value for r in records],
                             [2, 3, 4])
        self.assertListEqual([r["End_Position"].value for r in records],
                             [2, 3, 4])
Example #15
0
    def test_from_lines_supported_sort_order(self):
        for so in sort_order.SortOrder.all():
            lines = [
                TestMafHeader.__version_line,
                TestMafHeader.__annotation_line,
                "%s%s %s" % (MafHeader.HeaderLineStartSymbol,
                             MafHeader.SortOrderKey, so.name()),
            ]
            header = MafHeader.from_lines(
                lines=lines, validation_stringency=ValidationStringency.Silent)

            self.assertTrue(len(header.validation_errors) == 0)
            self.assertEqual(header.sort_order().name(), so.name())
Example #16
0
    def test_from_lines_unsupported_annotation(self):
        scheme = GdcV1_0_0_BasicScheme()
        lines = [
            "%s%s %s" % (MafHeader.HeaderLineStartSymbol, MafHeader.VersionKey,
                         scheme.version()),
            "%s%s %s" % (
                MafHeader.HeaderLineStartSymbol,
                MafHeader.AnnotationSpecKey,
                scheme.annotation_spec(),
            ),
        ]
        header = MafHeader.from_lines(
            lines=lines, validation_stringency=ValidationStringency.Silent)

        self.assertTrue(len(header.validation_errors) == 1)
        self.assertEqual(
            header.validation_errors[0].tpe,
            MafValidationErrorType.HEADER_UNSUPPORTED_ANNOTATION_SPEC,
        )
        self.assertIsNotNone(header.annotation())
        self.assertIsNotNone(header.scheme())

        for line in [
                "#%s not_annotation" % MafHeader.AnnotationSpecKey,
                "#%s %sx" %
            (MafHeader.AnnotationSpecKey, TestMafHeader.AnnotationSpec),
        ]:
            lines = [TestMafHeader.__version_line, line]
            header = MafHeader.from_lines(
                lines=lines, validation_stringency=ValidationStringency.Silent)

            self.assertTrue(len(header.validation_errors) == 1)
            self.assertEqual(
                header.validation_errors[0].tpe,
                MafValidationErrorType.HEADER_UNSUPPORTED_ANNOTATION_SPEC,
            )
            self.assertIsNotNone(header.annotation())
            self.assertIsNone(header.scheme())
Example #17
0
    def test_from_lines_missing_version(self):
        lines = [TestMafHeader.__annotation_line, "#key1 value", "#key2 value"]
        header = MafHeader.from_lines(
            lines=lines, validation_stringency=ValidationStringency.Silent)

        self.assertTrue(len(header.validation_errors) == 1)
        self.assertEqual(
            header.validation_errors[0].tpe,
            MafValidationErrorType.HEADER_MISSING_VERSION,
        )
        self.assertIsNone(header.version())
        self.assertIsNotNone(header.scheme())
        self.assertIsNotNone(header.scheme().version())
        self.assertIsNotNone(header.scheme().annotation_spec())
Example #18
0
 def test_from_lines_misformatted_line(self):
     lines = [
         TestMafHeader.__version_line,
         TestMafHeader.__annotation_line,
         "key1 value1",
     ]
     header = MafHeader.from_lines(
         lines=lines, validation_stringency=ValidationStringency.Silent)
     self.assertEqual(len(header), 2)
     self.assertEqual(len(header.validation_errors), 1)
     self.assertEqual(
         header.validation_errors[0].tpe,
         MafValidationErrorType.HEADER_LINE_MISSING_START_SYMBOL,
     )
Example #19
0
    def test_from_lines_duplicate_keys(self):
        lines = [
            TestMafHeader.__version_line,
            TestMafHeader.__annotation_line,
            "#dupkey value",
            "#dupkey value",
        ]
        header = MafHeader.from_lines(
            lines=lines, validation_stringency=ValidationStringency.Silent)

        self.assertTrue(len(header.validation_errors) == 1)
        self.assertIn("dupkey", str(header.validation_errors[0]))
        self.assertEqual(
            header.validation_errors[0].tpe,
            MafValidationErrorType.HEADER_DUPLICATE_KEYS,
        )
Example #20
0
    def test_from_lines_unsupported_version(self):
        for line in [
                "#%s not_version" % MafHeader.VersionKey,
                "#%s %sx" % (MafHeader.VersionKey, TestMafHeader.Version),
        ]:
            lines = [line, TestMafHeader.__annotation_line]
            header = MafHeader.from_lines(
                lines=lines, validation_stringency=ValidationStringency.Silent)

            self.assertTrue(len(header.validation_errors) == 1)
            self.assertEqual(
                header.validation_errors[0].tpe,
                MafValidationErrorType.HEADER_UNSUPPORTED_VERSION,
            )
            self.assertIsNotNone(header.version())
            self.assertIsNone(header.scheme())
Example #21
0
    def test_close(self):
        fd, path = tempfile.mkstemp()

        lines = [
            TestMafWriter.__version_line,
            TestMafWriter.__annotation_line,
            "#key1 value1",
            "#key2 value2",
            TestMafWriter.__keys_line,
        ]
        header = MafHeader.from_lines(lines=lines)
        writer = MafWriter.from_path(header=header, path=path)
        writer._handle.write("LAST")  # Naughty
        writer.close()
        self.assertListEqual(read_lines(path), lines + ["LAST"])

        with self.assertRaises(ValueError):
            writer._handle.write("Oh no")
Example #22
0
 def test_from_lines_missing_annotation(self):
     lines = [
         "%s%s %s" % (
             MafHeader.HeaderLineStartSymbol,
             MafHeader.VersionKey,
             NoRestrictionsScheme.version(),
         ),
         "#key1 value",
         "#key2 value",
     ]
     header = MafHeader.from_lines(
         lines=lines, validation_stringency=ValidationStringency.Silent)
     self.assertEqual(len(header.validation_errors), 1)
     self.assertEqual(
         header.validation_errors[0].tpe,
         MafValidationErrorType.HEADER_MISSING_ANNOTATION_SPEC,
     )
     self.assertIsNone(header.annotation())
     self.assertIsNone(header.scheme())
Example #23
0
    def test_from_lines_supported_annotation(self):
        scheme = GdcV1_0_0_ProtectedScheme()
        lines = [
            "%s%s %s" % (MafHeader.HeaderLineStartSymbol, MafHeader.VersionKey,
                         scheme.version()),
            "%s%s %s" % (
                MafHeader.HeaderLineStartSymbol,
                MafHeader.AnnotationSpecKey,
                scheme.annotation_spec(),
            ),
        ]
        header = MafHeader.from_lines(
            lines=lines, validation_stringency=ValidationStringency.Silent)

        self.assertTrue(len(header.validation_errors) == 0)
        self.assertEqual(header.version(), scheme.version())
        self.assertEqual(header.annotation(), scheme.annotation_spec())
        self.assertEqual(header.scheme().version(), scheme.version())
        self.assertEqual(header.scheme().annotation_spec(),
                         scheme.annotation_spec())
Example #24
0
    def test_from_lines_unsupported_sort_order(self):
        lines = [
            TestMafHeader.__version_line,
            TestMafHeader.__annotation_line,
            "%s%s %s" % (
                MafHeader.HeaderLineStartSymbol,
                MafHeader.SortOrderKey,
                "not-a-sort-order",
            ),
        ]
        header = MafHeader.from_lines(
            lines=lines, validation_stringency=ValidationStringency.Silent)

        self.assertTrue(len(header.validation_errors) == 1)
        self.assertEqual(
            header.validation_errors[0].tpe,
            MafValidationErrorType.HEADER_UNSUPPORTED_SORT_ORDER,
        )
        self.assertEqual(header.sort_order().name(),
                         sort_order.Unsorted.name())
Example #25
0
    def test_from_line_reader_ok(self):
        fh, fn = tmp_file([
            TestMafHeader.__version_line,
            TestMafHeader.__annotation_line,
            TestMafHeader.__sort_order_line,
            "#key1 value1",
            "#key2 value2",
        ])
        line_reader = LineReader(fh)
        header = MafHeader.from_line_reader(
            line_reader=line_reader,
            validation_stringency=ValidationStringency.Silent)
        fh.close()

        self.assertTrue(len(header.validation_errors) == 0)
        self.assertTrue(len(header) == 5)
        self.assertEqual(
            list(header.keys()),
            [
                MafHeader.VersionKey,
                MafHeader.AnnotationSpecKey,
                MafHeader.SortOrderKey,
                "key1",
                "key2",
            ],
        )
        self.assertEqual(
            [str(record.value) for record in header.values()],
            [
                TestMafHeader.Version,
                TestMafHeader.AnnotationSpec,
                Coordinate.name(),
                "value1",
                "value2",
            ],
        )
        self.assertEqual(header.version(), TestMafHeader.Version)
        os.remove(fn)
Example #26
0
    def test_gz_support(self):
        fd, path = tempfile.mkstemp(suffix=".gz")

        lines = [
            TestMafWriter.__version_line,
            TestMafWriter.__annotation_line,
            "#key1 value1",
            "#key2 value2",
            TestMafWriter.__keys_line,
        ]
        with captured_output() as (stdout, stderr):
            header = MafHeader.from_lines(lines=lines)
            writer = MafWriter.from_path(header=header, path=path)
            writer.close()
            self.assertListEqual(read_lines(path), lines)
            self.assertEqual(
                str(writer.header()) + "\n" + TestMafWriter.__keys_line,
                "\n".join(lines),
            )
        stdout = stdout.getvalue().rstrip('\r\n').split("\n")
        stderr = stderr.getvalue().rstrip('\r\n').split("\n")
        self.assertListEqual(stdout, [''])
        self.assertListEqual(stderr, [''])
Example #27
0
    def test_dict_methods(self):
        """
        Checks that the header is well-behaved for a Mapping
        """
        version = MafHeaderRecord(MafHeader.VersionKey, TestMafHeader.Version)
        record1 = MafHeaderRecord("key1", "value1")
        record2 = MafHeaderRecord("key1", "value2")
        header = MafHeader()

        # Set version
        self.assertNotIn(version.key, header)
        header[version.key] = version
        self.assertIn(version.key, header)
        self.assertTrue(len(header) == 1)
        self.assertListEqual(list(header.keys()), [MafHeader.VersionKey])
        self.assertTrue(header.values(), [TestMafHeader.Version])
        self.assertEqual(header.version(), TestMafHeader.Version)
        expected_scheme = GdcV1_0_0_BasicScheme()
        self.assertEqual(header.scheme().version(), expected_scheme.version())
        self.assertEqual(header.scheme().annotation_spec(),
                         expected_scheme.annotation_spec())

        # Set when it is not in the header
        self.assertNotIn(record1.key, header)
        header[record1.key] = record1
        self.assertIn(record1.key, header)
        self.assertTrue(len(header) == 2)
        self.assertListEqual(list(header.keys()),
                             [MafHeader.VersionKey, "key1"])
        self.assertTrue(header.values(), [TestMafHeader.Version, "value1"])

        # Overwrite
        self.assertIn(record2.key, header)
        header[record2.key] = record2
        self.assertIn(record2.key, header)
        self.assertTrue(len(header) == 2)
        self.assertListEqual(list(header.keys()),
                             [MafHeader.VersionKey, "key1"])
        self.assertTrue(header.values(), [TestMafHeader.Version, "value2"])

        # Remove it
        del header[record2.key]
        self.assertNotIn(record2.key, header)
        self.assertTrue(len(header) == 1)
        self.assertListEqual(list(header.keys()), [MafHeader.VersionKey])
        self.assertTrue(header.values(), [TestMafHeader.Version])
Example #28
0
    def __test_from_lines_lenient_or_silent(self, validation_stringency):
        """
        Checks that all errors are either printed out (Lenient) or not (Silent), and that a header is returned *without*
        the header lines that caused an error.
        """
        self.assertIn(
            validation_stringency,
            [ValidationStringency.Silent, ValidationStringency.Lenient],
        )
        lines = [
            TestMafHeader.__version_line,
            TestMafHeader.__annotation_line,
            "#key1 value1",
            "#key1 value2",
            "#key2 value3",
            "#key2 value4",
        ]
        err_stream = tempfile.NamedTemporaryFile(delete=False, mode="w")
        err_file_name = err_stream.name
        logger = Logger.get_logger(err_file_name, stream=err_stream)
        header = MafHeader.from_lines(
            lines=lines,
            validation_stringency=validation_stringency,
            logger=logger)
        err_stream.close()

        reader = open(err_file_name, "r")
        actual_lines = reader.readlines()
        expected_lines = [
            "Multiple header lines with key 'key1' found",
            "Multiple header lines with key 'key2' found",
        ]
        reader.close()
        os.remove(err_file_name)

        if validation_stringency == ValidationStringency.Lenient:
            self.assertTrue(len(actual_lines) == len(expected_lines))
            [
                self.assertIn(expected, actual)
                for (actual, expected) in zip(actual_lines, expected_lines)
            ]
        else:
            self.assertTrue(len(actual_lines) == 0)

        self.assertTrue(len(header) == 4)
        self.assertListEqual(
            list(header.keys()),
            [
                MafHeader.VersionKey, MafHeader.AnnotationSpecKey, "key1",
                "key2"
            ],
        )
        self.assertEqual(
            [str(record.value) for record in header.values()],
            [
                TestMafHeader.Version, TestMafHeader.AnnotationSpec, "value1",
                "value3"
            ],
        )
        for record, clzz in zip(
                header.values(),
            [MafHeaderVersionRecord, MafHeaderRecord, MafHeaderRecord]):
            self.assertTrue(isinstance(record, clzz))
Example #29
0
 def test_scheme_header_lines(self):
     scheme = TestMafHeader.Scheme
     self.assertListEqual(
         MafHeader.scheme_header_lines(scheme),
         [TestMafHeader.__version_line, TestMafHeader.__annotation_line],
     )
Example #30
0
    def __init__(self, lines,
                 closeable=None,
                 validation_stringency=None,
                 scheme=None):
        """ Initializes a MAF reader and reads in the header and column
        definitions.

        If no scheme is provided, the scheme will be determined from the
        version and annotation pragmas in the header, and matched against the
        known set of schemes.  If the scheme is not recognized, then the
        column names will determine a custom scheme and no assumption is made
        about the values of each column.

        :param lines: the lines (iterable) from the MAF file.
        :param closeable: any closeable object (has a ``close()`` method) that
        will be closed when ``close()`` is called.
        :param validation_stringency: the validation stringency.
        :param scheme: a scheme that should be used to override the scheme in
        the header.
        """
        self.__iter = iter(lines)
        self.__closeable = closeable
        self.validation_stringency = \
            ValidationStringency.Silent if (validation_stringency is None) \
                else validation_stringency
        self.__logger = Logger.get_logger(self.__class__.__name__)
        self.validation_errors = list()

        self.__next_line = None
        self.__line_number = 0

        def add_error(error):
            self.validation_errors.append(error)

        # read in the header lines
        header_lines = list()
        while True:
            self.__next_line__()
            if self.__next_line is not None \
                    and self.__next_line.startswith(MafHeader.HeaderLineStartSymbol):
                header_lines.append(self.__next_line)
            else:
                break
        self.__header = \
            MafHeader.from_lines(
                lines=header_lines,
                validation_stringency=self.validation_stringency)

        for error in self.__header.validation_errors:
            add_error(error)

        # get the column names
        if self.__next_line is not None:
            column_names = self.__next_line.split(MafRecord.ColumnSeparator)
            self.__next_line__()
        else:
            column_names = None

        # update the scheme
        self.__update_scheme__(scheme=scheme, column_names=column_names)

        # validate the column names against the scheme
        if column_names is not None:
            # match the column names against the scheme
            scheme_column_names = self.__scheme.column_names()
            if len(column_names) != len(scheme_column_names):
                add_error(MafValidationError(
                    MafValidationErrorType.SCHEME_MISMATCHING_NUMBER_OF_COLUMN_NAMES,
                    "Found '%d' columns but expected '%d'" %
                    (len(column_names), len(scheme_column_names)),
                    line_number=self.__line_number - 1
                ))
            else:
                for i, (column_name, scheme_column_name) in \
                        enumerate(zip(column_names, scheme_column_names)):
                    if column_name != scheme_column_name:
                        add_error(MafValidationError(
                            MafValidationErrorType.SCHEME_MISMATCHING_COLUMN_NAMES,
                            "Found column with name '%s' but expected '%s' for "
                            "the '%d'th column" %
                            (column_name, scheme_column_name, i + 1),
                            line_number=self.__line_number - 1
                        ))
        else:
            add_error(MafValidationError(
                MafValidationErrorType.HEADER_MISSING_COLUMN_NAMES,
                "Found no column names",
                line_number=self.__line_number+1
            ))

        # process validation errors so far
        MafValidationError.process_validation_errors(
            validation_errors=self.validation_errors,
            validation_stringency=self.validation_stringency,
            name=self.__class__.__name__,
            logger=self.__logger
        )