def build(cls, column, value=None, default=None, scheme=None): # None must be '' in this case default = default if default is not None else 'None' if value is None: return MafColumnRecord.build(column, default, scheme=scheme) else: return MafColumnRecord.build(column, value, scheme=scheme)
def __init__(self, chromosome, start, end, ref="A", alts=None): super(DummyRecordWithAllele, self).__init__() self.add(MafColumnRecord("Chromosome", chromosome)) self.add(MafColumnRecord("Start_Position", start)) self.add(MafColumnRecord("End_Position", end)) self.add(MafColumnRecord("Reference_Allele", ref)) self._alts = alts if alts else []
def build(cls, column, value=None, default=None, scheme=None): if value is None: assert default is not None, "BooleanColumn {0} can't be None".format( column) return MafColumnRecord.build(column, default, scheme=scheme) else: return MafColumnRecord.build(column, value, scheme=scheme)
def build(cls, key, value, scheme, default=None): if value is None and default is not None: return MafColumnRecord.build(key, default, scheme=scheme) elif value is None and scheme.column_class(key).is_nullable(): return scheme.column_class(key).build_nullable(key, scheme=scheme) elif isinstance(value, list): return MafColumnRecord.build(key, ';'.join(sorted(value)), scheme=scheme) else: return MafColumnRecord.build(key, value, scheme=scheme)
def build(cls, key, value, scheme, default=None, fn=None, **kwargs): if fn is not None: return MafColumnRecord.build(key, fn(value, **kwargs), scheme=scheme) elif value is None and default is not None: return MafColumnRecord.build(key, default, scheme=scheme) elif value is None and scheme.column_class(key).is_nullable(): return scheme.column_class(key).build_nullable(key, scheme=scheme) else: return MafColumnRecord.build(key, value, scheme=scheme)
def __init__(self, chromosome, start, end, tumor_barcode = None, normal_barcode = None): super(DummyRecord, self).__init__() self.add(MafColumnRecord("Chromosome", chromosome)) self.add(MafColumnRecord("Start_Position", start)) self.add(MafColumnRecord("End_Position", end)) if tumor_barcode: self.add(MafColumnRecord("Tumor_Sample_Barcode", tumor_barcode)) if normal_barcode: self.add(MafColumnRecord("Matched_Norm_Sample_Barcode", normal_barcode))
def test_basic_column(self): column = MafColumnRecord.build(name="key", value="value", column_index=0, description="Foo Bar") self.assertFalse(column.is_nullable()) self.assertEqual([], column.__nullable_values__()) self.assertIsNone(column.__nullable_dict__()) self.assertEqual([], column.__nullable_keys__()) self.assertFalse(column.is_null()) self.assertEqual(str(column), "value")
def test_build_with_scheme(self): scheme = _TestScheme() # column found in the scheme, column index inferred column = MafColumnRecord.build(name="float", value=2.1, description="Foo Bar", scheme=scheme) self.assertEqual(str(column), "2.1") self.assertEqual(column.column_index, 1) # column found in the scheme, column index validated column = MafColumnRecord.build(name="float", value=2.1, column_index=1, description="Foo Bar", scheme=scheme) self.assertEqual(str(column), "2.1") self.assertEqual(column.column_index, 1) # error: name not found in scheme with self.assertRaises(KeyError): MafColumnRecord.build(name="not-a-key", value=2.1, column_index=0, description="Foo Bar", scheme=scheme) # error: mismatching column index with self.assertRaises(ValueError): MafColumnRecord.build(name="float", value=2.1, column_index=0, description="Foo Bar", scheme=scheme)
def __get_key_from_int(key: Union[str, int], column: MafColumnRecord) -> str: """Returns the name that should be used for the column and sets the column index if not already set""" column_index = int(key) if column_index < 0: raise KeyError # set the column_index on column if not already set, otherwise # ensure they are the same if column.column_index is None: column.column_index = column_index elif column_index != column.column_index: raise ValueError( f"Column index mismatch: {column.column_index} is not {column_index}" ) return column.key
def from_line( cls, line: str, column_names: Optional[List[str]] = None, scheme: Optional['MafScheme'] = None, line_number: Optional[int] = None, validation_stringency: ValidationStringency = ValidationStringency. Strict, logger: logging.Logger = Logger.RootLogger, ) -> 'MafRecord': """ Parses a record from a single tab-delimited line. :param column_names: the expected names of the columns, in order, otherwise will use the scheme. :param line: the line to parse. :param scheme: an optional MafScheme :param line_number: the optional line number. :param validation_stringency: the optional validation stringency for the record :param logger the logger to which to write errors :return: """ record = cls(line_number=line_number, validation_stringency=validation_stringency) if column_names is None: if scheme is None: raise ValueError("Either column_names or scheme must be given") column_names = scheme.column_names() def add_errors(error: MafValidationError) -> None: record.validation_errors.append(error) column_values = line.rstrip("\r\n").split(cls.ColumnSeparator) if len(column_names) != len(column_values): add_errors( MafValidationError( MafValidationErrorType.RECORD_MISMATCH_NUMBER_OF_COLUMNS, f"Found '{len(column_values)}' columns but expected '{len(column_names)}'", line_number=line_number, )) record.validate(logger=logger, reset_errors=False) return record for column_index, (column_name, column_value) in enumerate( zip(column_names, column_values)): column = None scheme_column_class = (scheme.column_class( name=column_name) if scheme else None) # A validation error will be found later if we don't find the # column name if scheme_column_class is None: column = MafColumnRecord(key=column_name, value=column_value, column_index=column_index) else: try: scheme_column_class = scheme.column_class( name=column_name) # type: ignore column = scheme_column_class.build( # type: ignore name=column_name, value=column_value, column_index=column_index, ) except Exception as error: add_errors( MafValidationError( MafValidationErrorType.RECORD_INVALID_COLUMN_VALUE, f"Could not build column '{column_index+1}' with name '{column_name}' scheme '{scheme.version()}': {error}", # type: ignore line_number=line_number, )) if column is not None: column_validation_errors = column.validate( scheme=scheme, line_number=line_number) record.validation_errors.extend( column_validation_errors) # type: ignore if len(column_validation_errors) == 0: record[column_name] = column # process validation errors record.validate(logger=logger, reset_errors=False) return record
def __setitem__(self, key: Union[int, str], column: MafColumnRecord) -> None: """If a record already exists with the column name `key`, we either check that the `column_index`es are the same, or set the `column_index` if it is not set on the record. Otherwise, if the `column_index` is not set and no existing record is found with the column name `key`, then append it, and set `column_index` in `column`. If the `column_index` is greater than the length, then extend the number of columns, inserting `None` for values for any columns between the old and new length; these columns will have no name. :param key: The key may be one of four things: 1. If an `int`, it is assumed to be the `column_index` of the column. The `column_index` of`column` will be set to `int(key)` if not already set. If already set, then `column.column_index` should equal `int(key)`. 2. If an `MafColumnRecord`, then the `MafColumnRecord` should have the same `key` as the provided column. 3. If `None`, then `None` will always be returned. 4. Otherwise, it should be the column name, and be the same as the `key` in the provided column. :param column: an instance of `MafColumnRecord`.""" if not isinstance(column, MafColumnRecord): raise TypeError(f"{type(column)} is not 'MafColumnRecord'") if isinstance(key, int): key = self.__get_key_from_int(key, column) elif isinstance(key, MafColumnRecord): # make sure the keys are the same key = self.__get_key_from_column(key, column) elif not isinstance(key, str): raise TypeError("Column name must be a string") elif column.key != key: raise ValueError( f"Adding a column with name '{column.key}' but key was '{key}'" ) assert key == column.key # if there already is a record with the same key, make sure that it has # the same column index, otherwise set the column index if the current # record doesn't have one. If there is no record with the same and the # column index is not set, set it as the next column in the list. if key in self.__columns_dict: if column.column_index is None: column.column_index = self.__columns_dict[key].column_index else: if self.__columns_dict[key].column_index != column.column_index: raise ValueError( f"Existing column's index '{self.__columns_dict[key].column_index}' does not match replacement column's index '{column.column_index}'" ) elif column.column_index is None: # set the column index to the next column column.column_index = len(self.__columns_list) self.__columns_dict[key] = column assert column.column_index is not None # extend the list if the index is out of range if len(self) <= column.column_index: num_more = column.column_index - len(self) + 1 self.__columns_list.extend([None] * num_more) # Developer Note: due to padding, the number of items in the dictionary # may be less than the number of items in the list. Use validate to # catch this later. self.__columns_list[column.column_index] = column
def from_line(cls, line, column_names=None, scheme=None, line_number=None, validation_stringency=None, logger=Logger.RootLogger): """ Parses a record from a single tab-delimited line. :param column_names: the expected names of the columns, in order, otherwise will use the scheme. :param line: the line to parse. :param scheme: an optional MafScheme :param line_number: the optional line number. :param validation_stringency: the optional validation stringency for the record :param logger the logger to which to write errors :return: """ record = MafRecord(line_number=line_number, validation_stringency=validation_stringency) if column_names is None: if scheme is None: raise ValueError("Either column_names or scheme must be given") column_names = scheme.column_names() def add_errors(error): record.validation_errors.append(error) column_values = line.rstrip("\r\n").split(MafRecord.ColumnSeparator) if len(column_names) != len(column_values): add_errors( MafValidationError( MafValidationErrorType.RECORD_MISMATCH_NUMBER_OF_COLUMNS, "Found '%d' columns but expected '%d'" % (len(column_values), len(column_names)), line_number=line_number)) else: for column_index, column_name_and_value in \ enumerate(zip(column_names, column_values)): column_name = column_name_and_value[0] column_value = column_name_and_value[1] column = None scheme_column_class = \ scheme.column_class(name=column_name) if scheme else None # A validation error will be found later if we don't find the # column name if scheme_column_class is None: column = MafColumnRecord(key=column_name, value=column_value, column_index=column_index) else: try: scheme_column_class = \ scheme.column_class(name=column_name) column = scheme_column_class.build( name=column_name, value=column_value, column_index=column_index) except Exception as error: add_errors( MafValidationError( MafValidationErrorType. RECORD_INVALID_COLUMN_VALUE, "Could not build column '%d' with name '%s' " "with the scheme '%s': %s" % (column_index + 1, column_name, scheme.version(), str(error)), line_number=line_number, )) if column is not None: column_validation_errors = \ column.validate(scheme=scheme, line_number=line_number) record.validation_errors.extend(column_validation_errors) if len(column_validation_errors) == 0: record[column_name] = column # process validation errors record.validate(logger=logger, reset_errors=False) return record