コード例 #1
0
ファイル: writer.py プロジェクト: NCI-GDC/maf-lib
    def __iadd__(self, record: MafRecord) -> 'MafWriter':
        """Write a MafRecord."""

        # set the scheme and write the column names if not already written
        if not self._scheme:
            column_names = [str(key) for key in record.keys()]
            self._scheme = NoRestrictionsScheme(column_names=column_names)
            self._handle.write(
                MafRecord.ColumnSeparator.join(self._scheme.column_names()) +
                "\n")
            self._set_checker_and_sorter()

        # validate the record
        record.validate(
            validation_stringency=self.validation_stringency,
            logger=self._logger,
            reset_errors=True,
            scheme=self._scheme,
        )

        # either write it directly, or add it to the sorter
        if self._sorter:
            self._sorter += record  # type: ignore
        else:
            self._handle.write(str(record) + "\n")

        return self
コード例 #2
0
ファイル: test_record.py プロジェクト: NCI-GDC/maf-lib
 def test_str(self):
     record = MafRecord()
     record.add(MafColumnRecord("key1", "value1"))
     record.add(MafColumnRecord("key2", "value2"))
     self.assertEqual(len(record.validate()), 0)
     self.assertEqual(str(record),
                      MafRecord.ColumnSeparator.join(["value1", "value2"]))
コード例 #3
0
ファイル: test_record.py プロジェクト: NCI-GDC/maf-lib
    def test_delitem_various_columns(self):
        # The aim is to make sure that when there are columns that have None values, they are removed when the next
        # column is removed.
        record = MafRecord()
        record[0] = MafColumnRecord("key0", "value0", column_index=0)
        record[1] = MafColumnRecord("key1", "value1", column_index=1)
        record[4] = MafColumnRecord("key4", "value4", column_index=4)
        record.validate()
        self.assertEqual(len(record), 5)
        self.assertEqual(len(record.validation_errors),
                         2)  # two missing columns: 2 & 3
        types = list(set([error.tpe for error in record.validation_errors]))
        self.assertEqual(len(types), 1)  # only one error
        self.assertEqual(types[0],
                         MafValidationErrorType.RECORD_COLUMN_WITH_NO_VALUE)

        del record[1]
        record.validate()
        self.assertEqual(len(record), 5)
        self.assertEqual(len(record.validation_errors),
                         3)  # three missing columns: 1, 2, & 3
        types = list(set([error.tpe for error in record.validation_errors]))
        self.assertEqual(len(types), 1)  # only one error
        self.assertEqual(types[0],
                         MafValidationErrorType.RECORD_COLUMN_WITH_NO_VALUE)

        del record[4]
        record.validate()
        self.assertEqual(len(record), 1)
        self.assertEqual(len(record.validation_errors), 0)  # all good

        del record[0]
        record.validate()
        self.assertEqual(len(record), 0)
        self.assertEqual(len(record.validation_errors), 0)  # all good
コード例 #4
0
ファイル: test_record.py プロジェクト: NCI-GDC/maf-lib
 def test_add(self):
     record = MafRecord()
     column = MafColumnRecord("key1", "value1", column_index=0)
     self.assertEqual(record.add(column), record)
     self.assertEqual(len(record), 1)
     self.assertEqual(len(record.validate()), 0)
     self.assertEqual(record[0], column)
コード例 #5
0
ファイル: conftest.py プロジェクト: NCI-GDC/aliquot-maf-tools
    def _generate_overlaps(test_input_scheme, line_list, callers):
        lst = []

        for line in line_list:
            if isinstance(line, list):
                curr = []
                for item in line:
                    curr.append(
                        MafRecord.from_line(
                            item,
                            scheme=test_input_scheme,
                            validation_stringency=ValidationStringency.Strict,
                        )
                    )
                lst.append(curr)
            elif line:
                lst.append(
                    [
                        MafRecord.from_line(
                            line,
                            scheme=test_input_scheme,
                            validation_stringency=ValidationStringency.Strict,
                        )
                    ]
                )
            else:
                lst.append([])

        return OverlapSet(lst, callers)
コード例 #6
0
ファイル: test_record.py プロジェクト: NCI-GDC/maf-lib
 def test_from_line_neither_column_names_nor_scheme(self):
     with self.assertRaises(ValueError):
         MafRecord.from_line(
             line=MafRecord.ColumnSeparator.join(
                 ["value1", "value2", "value3"]),
             validation_stringency=ValidationStringency.Silent,
         )
コード例 #7
0
ファイル: test_record.py プロジェクト: NCI-GDC/maf-lib
 def test_iadd(self):
     record = MafRecord()
     column = MafColumnRecord("key1", "value1", column_index=0)
     previous_record = record
     record += column
     self.assertEqual(record, previous_record)
     self.assertEqual(len(record), 1)
     self.assertEqual(len(record.validate()), 0)
     self.assertEqual(record[0], column)
コード例 #8
0
ファイル: test_record.py プロジェクト: NCI-GDC/maf-lib
    def test_setitem_in_order_and_delitem_in_reverse(self):
        record = MafRecord()

        # add the columns in order
        for i in range(0, 10):
            record["key%d" % i] = MafColumnRecord("key%d" % i, "value%d" % i)
        self.assertEqual(len(record), 10)
        self.assertEqual(len(record.validate()), 0)
        self.assertListEqual(list(record.keys()),
                             ["key%d" % i for i in range(0, 10)])
        self.assertListEqual(
            [column.value for column in record.values()],
            ["value%d" % i for i in range(0, 10)],
        )

        # delete in the reverse order
        keys = list(record.keys())
        for i in range(9, -1, -1):
            column = record[i]
            self.assertIn(column, record)
            if i % 2 == 0:
                del record[i]
            else:
                del record[keys[i]]
            self.assertFalse(column in record)
            self.assertEqual(len(record), i)
            self.assertEqual(len(record.validate()), 0)
            self.assertListEqual(list(record.keys()),
                                 ["key%d" % j for j in range(0, i)])
            self.assertListEqual(
                [column.value for column in record.values()],
                ["value%d" % j for j in range(0, i)],
            )
コード例 #9
0
ファイル: test_record.py プロジェクト: NCI-GDC/maf-lib
 def test_setitem_mismatching_column_indexes(self):
     record = MafRecord()
     column_old = MafColumnRecord("key1", "value1", column_index=0)
     record[column_old.key] = column_old
     column_new = MafColumnRecord("key1", "value1", column_index=1)
     with self.assertRaises(ValueError):
         record[column_old] = column_new
コード例 #10
0
ファイル: test_record.py プロジェクト: NCI-GDC/maf-lib
    def test_access_with_different_types(self):
        record = MafRecord()
        column = MafColumnRecord(key="key1", value="value2", column_index=0)

        # via int
        record[column.column_index] = column
        self.assertIn(column.column_index, record)
        self.assertEqual(record[column.column_index], column)
        del record[column.column_index]
        self.assertEqual(len(record), 0)
        self.assertFalse(column.column_index in record)

        # via MafColumnRecord
        record[column] = column
        self.assertIn(column, record)
        self.assertEqual(record[column], column)
        del record[column]
        self.assertEqual(len(record), 0)
        self.assertFalse(column in record)

        # via str
        record[column.key] = column
        self.assertIn(column.key, record)
        self.assertEqual(record[column.key], column)
        del record[column.key]
        self.assertEqual(len(record), 0)
        self.assertFalse(column.key in record)
コード例 #11
0
 def decode(self, data, start, length):
     """Decodes the data and re-parses the text, returning a MafRecord"""
     line = data[start:(start + length)].decode('utf-8')
     return MafRecord.from_line(
         line=line,
         column_names=self._column_names,
         scheme=self._scheme,
         validation_stringency=self.validation_stringency)
コード例 #12
0
ファイル: test_record.py プロジェクト: NCI-GDC/maf-lib
 def test_setitem_with_wrong_type(self):
     record = MafRecord()
     with self.assertRaises(TypeError):
         record[42.42] = MafColumnRecord("key", "value")
     with self.assertRaises(TypeError):
         record["key2"] = 42.42
     with self.assertRaises(TypeError):
         record[None] = MafColumnRecord("key", "value")
コード例 #13
0
ファイル: test_record.py プロジェクト: NCI-GDC/maf-lib
 def test_getitem_with_missing_key(self):
     record = MafRecord()
     with self.assertRaises(KeyError):
         column = record[0]
     with self.assertRaises(KeyError):
         column = record[MafColumnRecord(key="key1", value="value2")]
     with self.assertRaises(KeyError):
         column = record["key"]
コード例 #14
0
ファイル: test_record.py プロジェクト: NCI-GDC/maf-lib
 def test_getitem_with_column_index_out_of_range(self):
     record = MafRecord()
     with self.assertRaises(KeyError):
         column = record[0]
     record[0] = MafColumnRecord(key="key1", value="value2")
     with self.assertRaises(KeyError):
         column = record[-1]
     with self.assertRaises(KeyError):
         column = record[2]
コード例 #15
0
 def decode(self, data: bytes, start: int, length: int) -> MafRecord:
     """Decodes the data and re-parses the text, returning a MafRecord"""
     end = start + length
     line = data[start:end].decode('utf-8')
     return MafRecord.from_line(
         line=line,
         column_names=self._column_names,
         scheme=self._scheme,
         validation_stringency=self.validation_stringency,
     )
コード例 #16
0
ファイル: test_record.py プロジェクト: NCI-GDC/maf-lib
 def test_from_line_valid(self):
     column_names = ["key1", "key2", "key3"]
     values = ["value1", "value2", "value3"]
     record = MafRecord.from_line(
         line=MafRecord.ColumnSeparator.join(values),
         column_names=column_names,
         validation_stringency=ValidationStringency.Silent,
     )
     self.assertEqual(len(record), 3)
     self.assertEqual(len(record.validation_errors), 0)
     self.assertListEqual(list(record.keys()), column_names)
     self.assertListEqual(record.column_values(), values)
コード例 #17
0
ファイル: test_record.py プロジェクト: NCI-GDC/maf-lib
 def test_from_line_mismatch_number_of_columns(self):
     record = MafRecord.from_line(
         line=MafRecord.ColumnSeparator.join(["value1", "value2",
                                              "value3"]),
         column_names=["key1", "key2"],
         validation_stringency=ValidationStringency.Silent,
     )
     self.assertEqual(len(record), 0)
     self.assertEqual(len(record.validation_errors), 1)
     self.assertEqual(
         record.validation_errors[0].tpe,
         MafValidationErrorType.RECORD_MISMATCH_NUMBER_OF_COLUMNS,
     )
コード例 #18
0
ファイル: test_record.py プロジェクト: NCI-GDC/maf-lib
    def test_with_scheme_diff_num_columns(self):
        scheme = TestMafRecord.TestScheme()
        record = MafRecord()
        column_names = scheme.column_names()
        column_names = column_names[:len(column_names) - 1]
        column_values = ["string1", "3.14"]

        for column_index, column_name in enumerate(column_names):
            column_class = scheme.column_class(column_name)
            column = column_class.build(
                name=column_name,
                value=column_values[column_index],
                column_index=column_index,
            )
            record[column_name] = column
        record.validate(scheme=scheme)
        self.assertEqual(len(scheme.column_names()), 3)
        self.assertEqual(len(record), 2)
        self.assertEqual(len(record.validation_errors), 1)
        self.assertListEqual(
            [e.tpe for e in record.validation_errors],
            [MafValidationErrorType.RECORD_MISMATCH_NUMBER_OF_COLUMNS],
        )
コード例 #19
0
    def test_record_validation_error(self):
        scheme = TestMafWriter.TestScheme()
        fd, path = tempfile.mkstemp()

        # Create the header
        header_lines = (MafHeader.scheme_header_lines(scheme) +
                        ["#key1 value1", "#key2 value2"] +
                        ["str1\tNone\tstr2"])
        header = MafHeader.from_lines(
            lines=header_lines,
            validation_stringency=ValidationStringency.Silent)

        # Create the record
        values = ["string2", "error", "string1"]
        record_line = MafRecord.ColumnSeparator.join(values)
        record = MafRecord.from_line(
            line=record_line,
            scheme=scheme,
            line_number=1,
            validation_stringency=ValidationStringency.Silent,
        )

        # Write the header, and the record twice
        with captured_output() as (stdout, stderr):
            writer = MafWriter.from_path(
                header=header,
                validation_stringency=ValidationStringency.Lenient,
                path=path,
            )
            writer += record
            writer.write(record)
            writer.close()
        stdout = stdout.getvalue().rstrip('\r\n').split("\n")
        stderr = stderr.getvalue().rstrip('\r\n').split("\n")
        self.assertListEqual(stdout, [''])

        # The errors that should be written stderr
        errors = [
            "HEADER_UNSUPPORTED_VERSION",
            "HEADER_UNSUPPORTED_ANNOTATION_SPEC",
            "RECORD_COLUMN_WITH_NO_VALUE",
            "RECORD_COLUMN_WITH_NO_VALUE",
        ]
        self.assertListEqualAndIn(errors, stderr)

        # The second column should be None
        err_record_line = record_line.replace("error", "None")
        self.assertListEqual(read_lines(path),
                             header_lines + [err_record_line, err_record_line])
コード例 #20
0
ファイル: test_record.py プロジェクト: NCI-GDC/maf-lib
    def test_setitem_and_delitem_every_other(self):
        record = MafRecord()

        # add every other
        for i in range(0, 10, 2):
            record["key%d" % i] = MafColumnRecord("key%d" % i,
                                                  "value%d" % i,
                                                  column_index=i)
        self.assertEqual(len(record), 9)
        validation_errors = record.validate()
        self.assertEqual(len(validation_errors), 4)
        self.assertListEqual(
            [error.tpe for error in validation_errors],
            [MafValidationErrorType.RECORD_COLUMN_WITH_NO_VALUE] *
            4,  # records 1, 3, 5, 7
        )
        self.assertListEqual(
            list(record.keys()),
            ["key%d" % i if (i % 2 == 0) else None for i in range(0, 9)],
        )
        self.assertListEqual(
            record.column_values(),
            ["value%d" % i if (i % 2 == 0) else None for i in range(0, 9)],
        )
コード例 #21
0
    def __next__(self):
        """Gets the next ``MafRecord``.  Raises a ``StopIteration`` when no
        more records can be read."""
        if self.__next_line is None:
            raise StopIteration

        record = MafRecord.from_line(
            line=self.__next_line,
            scheme=self.__scheme,  # always use the scheme
            line_number=self.__line_number,
            validation_stringency=self.validation_stringency
        )

        for error in record.validation_errors:
            self.validation_errors.append(error)

        self.__next_line__()

        return record
コード例 #22
0
ファイル: test_record.py プロジェクト: NCI-GDC/maf-lib
 def test_from_line_with_scheme_failed_to_build(self):
     scheme = TestMafRecord.TestScheme()
     values = ["string1", "string2", "string3"]
     record = MafRecord.from_line(
         line=MafRecord.ColumnSeparator.join(values),
         scheme=scheme,
         validation_stringency=ValidationStringency.Silent,
     )
     self.assertEqual(len(record), 3)
     self.assertListEqual(record.column_values(),
                          ["string1", None, "string3"])
     self.assertEqual(len(record.validation_errors), 2)
     self.assertListEqual(
         [e.tpe for e in record.validation_errors],
         [
             MafValidationErrorType.RECORD_INVALID_COLUMN_VALUE,
             MafValidationErrorType.RECORD_COLUMN_WITH_NO_VALUE,
         ],
     )
コード例 #23
0
    def test_with_fasta_index(self):
        # change the order of chromosomes!
        fasta_index_lines = [
            "chr13\t114364328\t2106716512\t70\t71",
            "chr1\t248956422\t112\t70\t71"
        ]
        fd, fn = tmp_file(lines=fasta_index_lines)
        lines, header, records = self.read_test_maf()
        subcommand_args = [
            "--version",
            GdcV1_0_0_PublicScheme.version(), "--annotation",
            GdcV1_0_0_PublicScheme.annotation_spec()
        ]
        out_lines, stdout, stderr = run_main(subcommand="sort",
                                             lines=lines,
                                             subcommand_args=subcommand_args)

        # Check that we have the same # of records
        out_records = [line for line in out_lines \
                       if not line.startswith("#") and not line.startswith("Hugo_Symbol")]
        self.assertEqual(len(out_records), len(records))

        # Check that we added the sort pragma
        sortOrderLine = "%s%s %s" % (MafHeader.HeaderLineStartSymbol,
                                     MafHeader.SortOrderKey,
                                     BarcodesAndCoordinate.name())
        self.assertTrue(sortOrderLine in out_lines)

        scheme = find_scheme(
            version=GdcV1_0_0_PublicScheme.version(),
            annotation=GdcV1_0_0_PublicScheme.annotation_spec())
        # we should see chr13 before chr1
        self.assertEqual(len(out_lines) - 1, len(lines))  # added the pragma
        found_chr1 = False
        for line in out_lines:
            if line.startswith(MafHeader.HeaderLineStartSymbol):
                continue
            record = MafRecord.from_line(line=line, scheme=scheme)
            self.assertFalse(record["Chromosome"] == "chr13" and found_chr1)
            found_chr1 = record["Chromosome"] == "chr1"
        fd.close()
        os.remove(fn)
コード例 #24
0
ファイル: test_record.py プロジェクト: NCI-GDC/maf-lib
 def test_from_line_with_scheme_invalid_column_name(self):
     scheme = TestMafRecord.TestScheme()
     column_names = ["no-name", "float", "str2"]
     values = ["string1", "3.14", "string3"]
     record = MafRecord.from_line(
         line=MafRecord.ColumnSeparator.join(values),
         column_names=column_names,
         scheme=scheme,
         validation_stringency=ValidationStringency.Silent,
     )
     self.assertEqual(len(record), 3)
     self.assertListEqual(record.column_values(), [None, 3.14, "string3"])
     self.assertEqual(len(record.validation_errors), 2)
     self.assertListEqual(
         [e.tpe for e in record.validation_errors],
         [
             MafValidationErrorType.SCHEME_MISMATCHING_COLUMN_NAMES,
             MafValidationErrorType.RECORD_COLUMN_WITH_NO_VALUE,
         ],
     )
コード例 #25
0
    def add_records(self):
        scheme = TestMafWriter.TestScheme()
        fd, path = tempfile.mkstemp()

        header_lines = MafHeader.scheme_header_lines(scheme) + [
            "#key1 value1",
            "#key2 value2",
        ]
        header = MafHeader.from_lines(lines=header_lines)
        writer = MafWriter.from_path(header=header, path=path)
        values = ["string2", "3.14", "string1"]
        record_line = MafRecord.ColumnSeparator.join(values)
        record = MafRecord.from_line(line=record_line,
                                     scheme=scheme,
                                     line_number=1)
        writer += record
        writer.write(record)
        writer.close()

        self.assertListEqual(read_lines(path),
                             header_lines + [record_line, record_line])
コード例 #26
0
ファイル: test_record.py プロジェクト: NCI-GDC/maf-lib
 def test_from_line_with_scheme_column_out_of_order(self):
     scheme = TestMafRecord.TestScheme()
     column_names = ["str2", "float", "str1"]
     values = ["string2", "3.14", "string1"]
     record = MafRecord.from_line(
         line=MafRecord.ColumnSeparator.join(values),
         column_names=column_names,
         scheme=scheme,
         validation_stringency=ValidationStringency.Silent,
     )
     self.assertEqual(len(record), 2)
     self.assertListEqual(record.column_values(), [None, 3.14])
     self.assertEqual(len(record.validation_errors), 3)
     self.assertListEqual(
         [e.tpe for e in record.validation_errors],
         [
             MafValidationErrorType.RECORD_COLUMN_OUT_OF_ORDER,
             MafValidationErrorType.RECORD_COLUMN_OUT_OF_ORDER,
             MafValidationErrorType.RECORD_COLUMN_WITH_NO_VALUE,
         ],
     )
コード例 #27
0
 def _get_empty_maf_record(line_number=0, stringency=ValidationStringency.Strict):
     return MafRecord(line_number=line_number, validation_stringency=stringency)
コード例 #28
0
ファイル: sort_order.py プロジェクト: NCI-GDC/maf-lib
 def __init__(self, record: MafRecord, contigs: List[str]):
     self.tumor_barcode = record.value("Tumor_Sample_Barcode")
     self.normal_barcode = record.value("Matched_Norm_Sample_Barcode")
     super(_BarcodesAndCoordinateKey, self).__init__(record, contigs)
コード例 #29
0
ファイル: utils.py プロジェクト: NCI-GDC/aliquot-maf-tools
def init_empty_maf_record(line_number=None,
                          stringency=ValidationStringency.Strict):
    """
    Initialize an empty maf record.
    """
    return MafRecord(line_number=line_number, validation_stringency=stringency)
コード例 #30
0
 def encode(self, record: MafRecord) -> bytearray:
     """Encodes a MafRecord"""
     if not self._column_names:
         self._column_names = record.keys()  # type: ignore
     return bytearray(source=str(record), encoding='utf-8')  # type: ignore