Beispiel #1
0
 def build(cls, column, value=None, default=None, scheme=None):
     # None must be '' in this case
     default = default if default is not None else 'None'
     if value is None:
         return MafColumnRecord.build(column, default, scheme=scheme)
     else:
         return MafColumnRecord.build(column, value, scheme=scheme)
Beispiel #2
0
 def __init__(self, chromosome, start, end, ref="A", alts=None):
     super(DummyRecordWithAllele, self).__init__()
     self.add(MafColumnRecord("Chromosome", chromosome))
     self.add(MafColumnRecord("Start_Position", start))
     self.add(MafColumnRecord("End_Position", end))
     self.add(MafColumnRecord("Reference_Allele", ref))
     self._alts = alts if alts else []
Beispiel #3
0
 def build(cls, column, value=None, default=None, scheme=None):
     if value is None:
         assert default is not None, "BooleanColumn {0} can't be None".format(
             column)
         return MafColumnRecord.build(column, default, scheme=scheme)
     else:
         return MafColumnRecord.build(column, value, scheme=scheme)
Beispiel #4
0
 def build(cls, key, value, scheme, default=None):
     if value is None and default is not None:
         return MafColumnRecord.build(key, default, scheme=scheme)
     elif value is None and scheme.column_class(key).is_nullable():
         return scheme.column_class(key).build_nullable(key, scheme=scheme)
     elif isinstance(value, list):
         return MafColumnRecord.build(key,
                                      ';'.join(sorted(value)),
                                      scheme=scheme)
     else:
         return MafColumnRecord.build(key, value, scheme=scheme)
Beispiel #5
0
 def build(cls, key, value, scheme, default=None, fn=None, **kwargs):
     if fn is not None:
         return MafColumnRecord.build(key,
                                      fn(value, **kwargs),
                                      scheme=scheme)
     elif value is None and default is not None:
         return MafColumnRecord.build(key, default, scheme=scheme)
     elif value is None and scheme.column_class(key).is_nullable():
         return scheme.column_class(key).build_nullable(key, scheme=scheme)
     else:
         return MafColumnRecord.build(key, value, scheme=scheme)
Beispiel #6
0
 def __init__(self, chromosome, start, end,
              tumor_barcode = None,
              normal_barcode = None):
     super(DummyRecord, self).__init__()
     self.add(MafColumnRecord("Chromosome", chromosome))
     self.add(MafColumnRecord("Start_Position", start))
     self.add(MafColumnRecord("End_Position", end))
     if tumor_barcode:
         self.add(MafColumnRecord("Tumor_Sample_Barcode",
                                  tumor_barcode))
     if normal_barcode:
         self.add(MafColumnRecord("Matched_Norm_Sample_Barcode",
                                  normal_barcode))
Beispiel #7
0
 def test_basic_column(self):
     column = MafColumnRecord.build(name="key",
                                    value="value",
                                    column_index=0,
                                    description="Foo Bar")
     self.assertFalse(column.is_nullable())
     self.assertEqual([], column.__nullable_values__())
     self.assertIsNone(column.__nullable_dict__())
     self.assertEqual([], column.__nullable_keys__())
     self.assertFalse(column.is_null())
     self.assertEqual(str(column), "value")
Beispiel #8
0
    def test_build_with_scheme(self):
        scheme = _TestScheme()

        # column found in the scheme, column index inferred
        column = MafColumnRecord.build(name="float",
                                       value=2.1,
                                       description="Foo Bar",
                                       scheme=scheme)
        self.assertEqual(str(column), "2.1")
        self.assertEqual(column.column_index, 1)

        # column found in the scheme, column index validated
        column = MafColumnRecord.build(name="float",
                                       value=2.1,
                                       column_index=1,
                                       description="Foo Bar",
                                       scheme=scheme)
        self.assertEqual(str(column), "2.1")
        self.assertEqual(column.column_index, 1)

        # error: name not found in scheme
        with self.assertRaises(KeyError):
            MafColumnRecord.build(name="not-a-key",
                                  value=2.1,
                                  column_index=0,
                                  description="Foo Bar",
                                  scheme=scheme)

        # error: mismatching column index
        with self.assertRaises(ValueError):
            MafColumnRecord.build(name="float",
                                  value=2.1,
                                  column_index=0,
                                  description="Foo Bar",
                                  scheme=scheme)
Beispiel #9
0
 def __get_key_from_int(key: Union[str, int],
                        column: MafColumnRecord) -> str:
     """Returns the name that should be used for the column and sets the
     column index if not already set"""
     column_index = int(key)
     if column_index < 0:
         raise KeyError
     # set the column_index on column if not already set, otherwise
     # ensure they are the same
     if column.column_index is None:
         column.column_index = column_index
     elif column_index != column.column_index:
         raise ValueError(
             f"Column index mismatch: {column.column_index} is not {column_index}"
         )
     return column.key
Beispiel #10
0
    def from_line(
        cls,
        line: str,
        column_names: Optional[List[str]] = None,
        scheme: Optional['MafScheme'] = None,
        line_number: Optional[int] = None,
        validation_stringency: ValidationStringency = ValidationStringency.
        Strict,
        logger: logging.Logger = Logger.RootLogger,
    ) -> 'MafRecord':
        """
        Parses a record from a single tab-delimited line.
        :param column_names: the expected names of the columns, in order,
        otherwise will use the scheme.
        :param line: the line to parse.
        :param scheme: an optional MafScheme
        :param line_number: the optional line number.
        :param validation_stringency: the optional validation stringency for
        the record
        :param logger the logger to which to write errors
        :return:
        """
        record = cls(line_number=line_number,
                     validation_stringency=validation_stringency)

        if column_names is None:
            if scheme is None:
                raise ValueError("Either column_names or scheme must be given")
            column_names = scheme.column_names()

        def add_errors(error: MafValidationError) -> None:
            record.validation_errors.append(error)

        column_values = line.rstrip("\r\n").split(cls.ColumnSeparator)

        if len(column_names) != len(column_values):
            add_errors(
                MafValidationError(
                    MafValidationErrorType.RECORD_MISMATCH_NUMBER_OF_COLUMNS,
                    f"Found '{len(column_values)}' columns but expected '{len(column_names)}'",
                    line_number=line_number,
                ))
            record.validate(logger=logger, reset_errors=False)

            return record

        for column_index, (column_name, column_value) in enumerate(
                zip(column_names, column_values)):
            column = None

            scheme_column_class = (scheme.column_class(
                name=column_name) if scheme else None)

            # A validation error will be found later if we don't find the
            # column name
            if scheme_column_class is None:
                column = MafColumnRecord(key=column_name,
                                         value=column_value,
                                         column_index=column_index)
            else:
                try:
                    scheme_column_class = scheme.column_class(
                        name=column_name)  # type: ignore
                    column = scheme_column_class.build(  # type: ignore
                        name=column_name,
                        value=column_value,
                        column_index=column_index,
                    )
                except Exception as error:
                    add_errors(
                        MafValidationError(
                            MafValidationErrorType.RECORD_INVALID_COLUMN_VALUE,
                            f"Could not build column '{column_index+1}' with name '{column_name}' scheme '{scheme.version()}': {error}",  # type: ignore
                            line_number=line_number,
                        ))

            if column is not None:
                column_validation_errors = column.validate(
                    scheme=scheme, line_number=line_number)
                record.validation_errors.extend(
                    column_validation_errors)  # type: ignore
                if len(column_validation_errors) == 0:
                    record[column_name] = column

        # process validation errors
        record.validate(logger=logger, reset_errors=False)

        return record
Beispiel #11
0
    def __setitem__(self, key: Union[int, str],
                    column: MafColumnRecord) -> None:
        """If a record already exists with the column name `key`, we either
        check that the `column_index`es are the same, or set the `column_index`
        if it is not set on the record.  Otherwise, if the `column_index` is not
        set and no existing record is found with the column name `key`, then
        append it, and set `column_index` in `column`.

        If the `column_index` is greater than the length, then extend the number
        of columns, inserting `None` for values for any columns between the old
        and new length; these columns will have no name.

        :param key: The key may be one of four things: 1. If an `int`,
        it is assumed to be the `column_index` of the column.  The
        `column_index` of`column` will be set to `int(key)` if not already set.
        If already set, then `column.column_index` should equal `int(key)`. 2.
        If an `MafColumnRecord`, then the `MafColumnRecord` should have the same
        `key` as the provided column. 3. If `None`, then `None` will always be
        returned. 4. Otherwise, it should be the column name, and be the same as
        the `key` in the provided column.
        :param column: an instance of `MafColumnRecord`."""
        if not isinstance(column, MafColumnRecord):
            raise TypeError(f"{type(column)} is not 'MafColumnRecord'")

        if isinstance(key, int):
            key = self.__get_key_from_int(key, column)
        elif isinstance(key, MafColumnRecord):
            # make sure the keys are the same
            key = self.__get_key_from_column(key, column)
        elif not isinstance(key, str):
            raise TypeError("Column name must be a string")
        elif column.key != key:
            raise ValueError(
                f"Adding a column with name '{column.key}' but key was '{key}'"
            )
        assert key == column.key

        # if there already is a record with the same key, make sure that it has
        # the same column index, otherwise set the column index if the current
        # record doesn't have one.  If there is no record with the same and the
        # column index is not set, set it as the next column in the list.
        if key in self.__columns_dict:
            if column.column_index is None:
                column.column_index = self.__columns_dict[key].column_index
            else:
                if self.__columns_dict[key].column_index != column.column_index:
                    raise ValueError(
                        f"Existing column's index '{self.__columns_dict[key].column_index}' does not match replacement column's index '{column.column_index}'"
                    )
        elif column.column_index is None:
            # set the column index to the next column
            column.column_index = len(self.__columns_list)
        self.__columns_dict[key] = column
        assert column.column_index is not None

        # extend the list if the index is out of range
        if len(self) <= column.column_index:
            num_more = column.column_index - len(self) + 1
            self.__columns_list.extend([None] * num_more)
        # Developer Note: due to padding, the number of items in the dictionary
        # may be less than the number of items in the list.  Use validate to
        # catch this later.
        self.__columns_list[column.column_index] = column
Beispiel #12
0
    def from_line(cls,
                  line,
                  column_names=None,
                  scheme=None,
                  line_number=None,
                  validation_stringency=None,
                  logger=Logger.RootLogger):
        """
        Parses a record from a single tab-delimited line.
        :param column_names: the expected names of the columns, in order,
        otherwise will use the scheme.
        :param line: the line to parse.
        :param scheme: an optional MafScheme
        :param line_number: the optional line number.
        :param validation_stringency: the optional validation stringency for
        the record
        :param logger the logger to which to write errors
        :return:
        """
        record = MafRecord(line_number=line_number,
                           validation_stringency=validation_stringency)

        if column_names is None:
            if scheme is None:
                raise ValueError("Either column_names or scheme must be given")
            column_names = scheme.column_names()

        def add_errors(error):
            record.validation_errors.append(error)

        column_values = line.rstrip("\r\n").split(MafRecord.ColumnSeparator)

        if len(column_names) != len(column_values):
            add_errors(
                MafValidationError(
                    MafValidationErrorType.RECORD_MISMATCH_NUMBER_OF_COLUMNS,
                    "Found '%d' columns but expected '%d'" %
                    (len(column_values), len(column_names)),
                    line_number=line_number))
        else:
            for column_index, column_name_and_value in \
                    enumerate(zip(column_names, column_values)):
                column_name = column_name_and_value[0]
                column_value = column_name_and_value[1]
                column = None

                scheme_column_class = \
                    scheme.column_class(name=column_name) if scheme else None

                # A validation error will be found later if we don't find the
                # column name
                if scheme_column_class is None:
                    column = MafColumnRecord(key=column_name,
                                             value=column_value,
                                             column_index=column_index)
                else:
                    try:
                        scheme_column_class = \
                            scheme.column_class(name=column_name)
                        column = scheme_column_class.build(
                            name=column_name,
                            value=column_value,
                            column_index=column_index)
                    except Exception as error:
                        add_errors(
                            MafValidationError(
                                MafValidationErrorType.
                                RECORD_INVALID_COLUMN_VALUE,
                                "Could not build column '%d' with name '%s' "
                                "with the scheme '%s': %s" %
                                (column_index + 1, column_name,
                                 scheme.version(), str(error)),
                                line_number=line_number,
                            ))

                if column is not None:
                    column_validation_errors = \
                        column.validate(scheme=scheme, line_number=line_number)
                    record.validation_errors.extend(column_validation_errors)
                    if len(column_validation_errors) == 0:
                        record[column_name] = column

        # process validation errors
        record.validate(logger=logger, reset_errors=False)

        return record