Esempio n. 1
0
    def from_line(
        cls,
        line: str,
        column_names: Optional[List[str]] = None,
        scheme: Optional['MafScheme'] = None,
        line_number: Optional[int] = None,
        validation_stringency: ValidationStringency = ValidationStringency.
        Strict,
        logger: logging.Logger = Logger.RootLogger,
    ) -> 'MafRecord':
        """
        Parses a record from a single tab-delimited line.
        :param column_names: the expected names of the columns, in order,
        otherwise will use the scheme.
        :param line: the line to parse.
        :param scheme: an optional MafScheme
        :param line_number: the optional line number.
        :param validation_stringency: the optional validation stringency for
        the record
        :param logger the logger to which to write errors
        :return:
        """
        record = cls(line_number=line_number,
                     validation_stringency=validation_stringency)

        if column_names is None:
            if scheme is None:
                raise ValueError("Either column_names or scheme must be given")
            column_names = scheme.column_names()

        def add_errors(error: MafValidationError) -> None:
            record.validation_errors.append(error)

        column_values = line.rstrip("\r\n").split(cls.ColumnSeparator)

        if len(column_names) != len(column_values):
            add_errors(
                MafValidationError(
                    MafValidationErrorType.RECORD_MISMATCH_NUMBER_OF_COLUMNS,
                    f"Found '{len(column_values)}' columns but expected '{len(column_names)}'",
                    line_number=line_number,
                ))
            record.validate(logger=logger, reset_errors=False)

            return record

        for column_index, (column_name, column_value) in enumerate(
                zip(column_names, column_values)):
            column = None

            scheme_column_class = (scheme.column_class(
                name=column_name) if scheme else None)

            # A validation error will be found later if we don't find the
            # column name
            if scheme_column_class is None:
                column = MafColumnRecord(key=column_name,
                                         value=column_value,
                                         column_index=column_index)
            else:
                try:
                    scheme_column_class = scheme.column_class(
                        name=column_name)  # type: ignore
                    column = scheme_column_class.build(  # type: ignore
                        name=column_name,
                        value=column_value,
                        column_index=column_index,
                    )
                except Exception as error:
                    add_errors(
                        MafValidationError(
                            MafValidationErrorType.RECORD_INVALID_COLUMN_VALUE,
                            f"Could not build column '{column_index+1}' with name '{column_name}' scheme '{scheme.version()}': {error}",  # type: ignore
                            line_number=line_number,
                        ))

            if column is not None:
                column_validation_errors = column.validate(
                    scheme=scheme, line_number=line_number)
                record.validation_errors.extend(
                    column_validation_errors)  # type: ignore
                if len(column_validation_errors) == 0:
                    record[column_name] = column

        # process validation errors
        record.validate(logger=logger, reset_errors=False)

        return record
Esempio n. 2
0
    def from_line(cls,
                  line,
                  column_names=None,
                  scheme=None,
                  line_number=None,
                  validation_stringency=None,
                  logger=Logger.RootLogger):
        """
        Parses a record from a single tab-delimited line.
        :param column_names: the expected names of the columns, in order,
        otherwise will use the scheme.
        :param line: the line to parse.
        :param scheme: an optional MafScheme
        :param line_number: the optional line number.
        :param validation_stringency: the optional validation stringency for
        the record
        :param logger the logger to which to write errors
        :return:
        """
        record = MafRecord(line_number=line_number,
                           validation_stringency=validation_stringency)

        if column_names is None:
            if scheme is None:
                raise ValueError("Either column_names or scheme must be given")
            column_names = scheme.column_names()

        def add_errors(error):
            record.validation_errors.append(error)

        column_values = line.rstrip("\r\n").split(MafRecord.ColumnSeparator)

        if len(column_names) != len(column_values):
            add_errors(
                MafValidationError(
                    MafValidationErrorType.RECORD_MISMATCH_NUMBER_OF_COLUMNS,
                    "Found '%d' columns but expected '%d'" %
                    (len(column_values), len(column_names)),
                    line_number=line_number))
        else:
            for column_index, column_name_and_value in \
                    enumerate(zip(column_names, column_values)):
                column_name = column_name_and_value[0]
                column_value = column_name_and_value[1]
                column = None

                scheme_column_class = \
                    scheme.column_class(name=column_name) if scheme else None

                # A validation error will be found later if we don't find the
                # column name
                if scheme_column_class is None:
                    column = MafColumnRecord(key=column_name,
                                             value=column_value,
                                             column_index=column_index)
                else:
                    try:
                        scheme_column_class = \
                            scheme.column_class(name=column_name)
                        column = scheme_column_class.build(
                            name=column_name,
                            value=column_value,
                            column_index=column_index)
                    except Exception as error:
                        add_errors(
                            MafValidationError(
                                MafValidationErrorType.
                                RECORD_INVALID_COLUMN_VALUE,
                                "Could not build column '%d' with name '%s' "
                                "with the scheme '%s': %s" %
                                (column_index + 1, column_name,
                                 scheme.version(), str(error)),
                                line_number=line_number,
                            ))

                if column is not None:
                    column_validation_errors = \
                        column.validate(scheme=scheme, line_number=line_number)
                    record.validation_errors.extend(column_validation_errors)
                    if len(column_validation_errors) == 0:
                        record[column_name] = column

        # process validation errors
        record.validate(logger=logger, reset_errors=False)

        return record