Beispiel #1
0
def main(args=None, extra_subparser=None):
    """The main method for maf tools"""
    Logger.setup_root_logger()

    parser = argparse.ArgumentParser()

    # Add any pre-subcommand arguments
    parser.add_argument('-v',
                        '--validation-stringency',
                        action=StoreEnumAction,
                        type=ValidationStringency,
                        default=ValidationStringency.Strict)
    # Add any pre-subcommand arguments
    parser.add_argument('-s',
                        '--schemes',
                        action='append',
                        help="One or more JSON files with custom scheme "
                        "definitions")

    # Add subparsers here
    subparsers = parser.add_subparsers(dest="subcommand")
    subparsers.required = True
    Validate.add(subparsers=subparsers)
    View.add(subparsers=subparsers)
    Sort.add(subparsers=subparsers)
    if extra_subparser:
        extra_subparser.add(subparsers=subparsers)

    options = parser.parse_args(args=args)

    # Add any custom schemes to the set of known schemes
    all_schemes(extra_filenames=options.schemes)

    options.func(options)
Beispiel #2
0
    def add(cls, subparsers):
        """Adds the given subcommand to the subparsers.  Will always add the
        version and annotation options."""
        subparser = subparsers.add_parser(
            name=cls.__get_name__(), description=cls.__get_description__())

        versions = list(set(s.version() for s in all_schemes()))
        annotations = [s.annotation_spec() for s in all_schemes()]

        subparser.add_argument('-v',
                               '--version',
                               default=None,
                               choices=versions,
                               help="Use the given version when validating "
                               "rather than what's in the header.  "
                               "Choices: %s" % ", ".join(versions))
        subparser.add_argument('-a',
                               '--annotation',
                               type=str,
                               default=None,
                               choices=annotations,
                               help="Use the given annotation specification "
                               "when validating rather than what's in "
                               "the header.  "
                               "Choices: %s" % ", ".join(annotations))

        cls.__add_arguments__(subparser)
        subparser.set_defaults(func=cls.main)
        return subparser
Beispiel #3
0
    def __validate_options__(cls, options):
        """
        Validate the custom command line options.  All parsers should
        recursively call this method first, as it will set the "scheme" member
        on the provided options object.
        """
        logger = Logger.get_logger(cls.__name__)

        if options.version or options.annotation:
            options.scheme = find_scheme(version=options.version,
                                         annotation=options.annotation)
            if not options.scheme:
                tuples = [
                    "\t%s %s" % (s.version(), s.annotation_spec())
                    for s in all_schemes()
                ]
                raise ValueError(
                    "Could not find a scheme with version '%s' "
                    "and annotation '%s'.  Available schemes:\n%s" %
                    (options.version, options.annotation, "\n".join(tuples)))
            if options.version is None:
                logger.info(
                    "No version given, defaulting to version '%s' "
                    "based on -a/--annotation", options.scheme.version())
            if options.annotation is None:
                if options.scheme.is_basic():
                    logger.info("The scheme is assumed to be the basic scheme")
                else:
                    logger.info(
                        "No annotation given, defaulting to "
                        "annotation '%s'", options.scheme.annotation_spec())
        else:
            options.scheme = None
Beispiel #4
0
class MafHeader(MutableMapping):
    """
    A header for a MAF file storing zero or more header records.  Each record
    represents a single line from the original MAF file.

    Provides methods for accessing the records in the order they were added, as
    well as methods for returning the version
    (:func:`~header.MafHeader.version`), annotation specification
    (:func:`~header.MafHeader.annotation`),
    scheme (:func:`~header.MafHeader.scheme`), and sort order (
    :func:`~header.MafHeader.sort_order`).  Additionally, the
    :func:`~header.MafHeader.validate` method can be used
    to validate the format of the header as well as validate the contents
    relative to the given scheme. If no sort order is found in the header,
    the sort order will be "unsorted".
    """

    VersionKey = "version"

    AnnotationSpecKey = "annotation.spec"

    SortOrderKey = "sort.order"  # NOQA

    ContigKey = "contigs"

    SupportedVersions = [s.version() for s in all_schemes()]

    SupportedAnnotationSpecs = [s.annotation_spec() for s in all_schemes()]

    SupportedSortOrders = [so.name() for so in SortOrder.all()]

    HeaderLineStartSymbol = "#"

    def __init__(self, validation_stringency: ValidationStringency = None):
        self.validation_errors: List[MafValidationError] = []
        self.validation_stringency = (
            ValidationStringency.Silent
            if (validation_stringency is None)
            else validation_stringency
        )
        self.__records: Dict[str, MafHeaderRecord] = OrderedDict()
        self.__scheme = None

    def __getitem__(self, key: str) -> MafHeaderRecord:
        return self.__records[key]

    def __setitem__(self, key: str, value: MafHeaderRecord) -> None:
        assert key == value.key
        assert isinstance(value, MafHeaderRecord)
        self.__records[key] = value

    def __delitem__(self, key: str) -> None:
        del self.__records[key]

    def __iter__(self) -> Iterator[str]:
        return iter(self.__records.keys())

    def __len__(self) -> int:
        return len(self.__records)

    def version(self) -> Optional[str]:
        """Gets the version or `None` if not present"""
        if MafHeader.VersionKey in self.__records:
            return str(self.__records[MafHeader.VersionKey].value)
        else:
            return None

    def annotation(self) -> Optional[str]:
        """Gets the annotation specification or `None` if not present"""
        if MafHeader.AnnotationSpecKey in self.__records:
            return str(self.__records[MafHeader.AnnotationSpecKey].value)
        else:
            return None

    def contigs(self) -> Optional[List[str]]:
        """Gets the contig list or `None` if not present"""
        if MafHeader.ContigKey in self.__records:
            return self.__records[MafHeader.ContigKey].value  # type: ignore
        else:
            return None

    def sort_order(self) -> SortOrderType:
        """Gets the sort order or `Unsorted` if not present"""
        if MafHeader.SortOrderKey in self.__records:
            return self.__records[MafHeader.SortOrderKey].value  # type: ignore
        else:
            return Unsorted()

    def scheme(self) -> Optional['MafScheme']:
        """Gets the scheme according to the version and annotation, None if
        no suitable scheme was found.
        """
        try:
            return find_scheme(version=self.version(), annotation=self.annotation())
        except ValueError:
            return None

    def validate(
        self,
        validation_stringency: ValidationStringency = None,
        logger: logging.Logger = Logger.RootLogger,
        reset_errors: bool = True,
    ) -> List[MafValidationError]:
        """Validates the header and returns a list of errors.
        Ensures that:
        - there is a version line in the header
        - the version is supported
        - the annotation specification is not in the header if the scheme is
          basic
        - the annotation specification is in the header if the scheme is basic
        - the annotation specification, when present, is supported
        """

        if reset_errors:
            self.validation_errors = list()

        def add_error(error: MafValidationError) -> None:
            self.validation_errors.append(error)

        # get the scheme!
        scheme = self.scheme()

        if not validation_stringency:
            validation_stringency = self.validation_stringency

        # ensure there's a version record
        if MafHeader.VersionKey not in self:
            add_error(
                MafValidationError(
                    MafValidationErrorType.HEADER_MISSING_VERSION,
                    "No version line found in the header",
                )
            )
        else:
            # ensure that the version is a supported version
            version = self[MafHeader.VersionKey].value
            if version not in MafHeader.SupportedVersions:
                add_error(
                    MafValidationError(
                        MafValidationErrorType.HEADER_UNSUPPORTED_VERSION,
                        "The version '%s' is not supported" % version,
                    )
                )

        # Check the annotation spec
        # 1. basic annotation specs should not be in the header
        # 2. non-basic annotation specs should be present (in the header) and
        # have a known value
        if scheme is not None and scheme.is_basic():
            if MafHeader.AnnotationSpecKey in self:
                add_error(
                    MafValidationError(
                        MafValidationErrorType.HEADER_UNSUPPORTED_ANNOTATION_SPEC,
                        "Unexpected annotation.spec line found in the header",
                    )
                )
        else:
            if MafHeader.AnnotationSpecKey not in self:
                add_error(
                    MafValidationError(
                        MafValidationErrorType.HEADER_MISSING_ANNOTATION_SPEC,
                        "No annotation.spec line found in the header",
                    )
                )
            else:
                # ensure that the annotation spec is a supported annotation spec
                annotation = self[MafHeader.AnnotationSpecKey].value
                if annotation not in MafHeader.SupportedAnnotationSpecs:
                    add_error(
                        MafValidationError(
                            MafValidationErrorType.HEADER_UNSUPPORTED_ANNOTATION_SPEC,
                            "The annotation.spec '%s' is not supported" % annotation,
                        )
                    )

        # process validation errors
        MafValidationError.process_validation_errors(
            validation_errors=self.validation_errors,
            validation_stringency=validation_stringency,
            logger=logger,
        )

        return self.validation_errors

    def __str__(self) -> str:
        """gets the text representation of the header"""
        return "\n".join([str(record) for record in self.values()])

    @classmethod
    def from_lines(
        cls,
        lines: List[str],
        validation_stringency: ValidationStringency = None,
        logger: logging.Logger = Logger.RootLogger,
    ) -> 'MafHeader':
        """
        :param lines: a sequence of lines
        :param validation_stringency: optionally the validation stringency to
        use, otherwise use the default (Silent)
        :param logger the logger to which to write errors
        :return: a MafHeader
        """

        header = cls(validation_stringency=validation_stringency)

        def add_error(error: MafValidationError) -> None:
            header.validation_errors.append(error)

        for line_number, line in enumerate(lines):
            line_number = line_number + 1  # 1-based
            record, error = MafHeaderRecord.from_line(line, line_number)
            if error:
                assert record is None
                add_error(error)
            else:
                assert record is not None
                if record.key in header:
                    add_error(
                        MafValidationError(
                            MafValidationErrorType.HEADER_DUPLICATE_KEYS,
                            "Multiple header lines with key '%s' found" % record.key,
                            line_number=line_number,
                        )
                    )
                else:
                    header[record.key] = record

        if header.contigs():
            if header.sort_order() and issubclass(
                header.sort_order().__class__, Coordinate
            ):
                sokey = header[MafHeader.SortOrderKey].value.name()
                header[MafHeader.SortOrderKey] = MafHeaderSortOrderRecord(
                    value=sokey, contigs=header.contigs()
                )

        header.validate(logger=logger, reset_errors=False)

        return header

    @classmethod
    def from_line_reader(
        cls,
        line_reader: LineReader,
        validation_stringency: ValidationStringency = None,
        logger: logging.Logger = Logger.RootLogger,
    ) -> 'MafHeader':
        """Reads a header from a line reader.
        :param line_reader: a line reader
        :param validation_stringency: optionally the validation stringency to
        use, otherwise use the default (Silent)
        :param logger the logger to which to write errors
        :return: a MafHeader
        """
        lines = list()
        while True:
            line = line_reader.peek_line()
            if not line.startswith(MafHeader.HeaderLineStartSymbol):
                break
            lines.append(line_reader.read_line())

        return cls.from_lines(
            lines=lines, validation_stringency=validation_stringency, logger=logger
        )

    @classmethod
    def from_reader(
        cls,
        reader: 'MafReader',
        version: Optional[str] = None,
        annotation: Optional[str] = None,
        sort_order: SortOrderType = None,
        fasta_index: Optional[str] = None,
        contigs: Optional[list] = None,
    ) -> 'MafHeader':
        header: 'MafHeader' = deepcopy(reader.header())
        if version:
            header[MafHeader.VersionKey] = MafHeaderVersionRecord(value=version)
        if annotation:
            header[MafHeader.AnnotationSpecKey] = MafHeaderAnnotationSpecRecord(
                value=annotation
            )
        if fasta_index:
            handle = open(fasta_index, "r")
            contigs = [line.rstrip("\r\n").split("\t")[0] for line in handle]
            handle.close()
            header[MafHeader.ContigKey] = MafHeaderContigRecord(value=contigs)
        elif contigs:
            assert isinstance(
                contigs, list
            ), "contigs must be a list, but {0} found".format(type(contigs))
            header[MafHeader.ContigKey] = MafHeaderContigRecord(value=contigs)
        if sort_order:
            header[MafHeader.SortOrderKey] = MafHeaderSortOrderRecord(
                value=sort_order, fasta_index=fasta_index, contigs=contigs
            )
            if (
                not contigs
                and hasattr(header[MafHeader.SortOrderKey].value, "_contigs")
                and getattr(header[MafHeader.SortOrderKey].value, "_contigs")
            ):
                header[MafHeader.ContigKey] = MafHeaderContigRecord(
                    value=header[MafHeader.SortOrderKey].value._contigs
                )
        return header

    @classmethod
    def from_defaults(
        cls,
        version: Optional[str] = None,
        annotation: Optional[str] = None,
        sort_order: SortOrderType = None,
        fasta_index: Optional[str] = None,
        contigs: Optional[list] = None,
    ) -> 'MafHeader':
        header = MafHeader()
        if version:
            header[MafHeader.VersionKey] = MafHeaderVersionRecord(value=version)
        if annotation:
            header[MafHeader.AnnotationSpecKey] = MafHeaderAnnotationSpecRecord(
                value=annotation
            )
        if fasta_index:
            handle = open(fasta_index, "r")
            _contigs = [line.rstrip("\r\n").split("\t")[0] for line in handle]
            handle.close()
            header[MafHeader.ContigKey] = MafHeaderContigRecord(value=_contigs)
        elif contigs:
            assert isinstance(
                contigs, list
            ), "contigs must be a list, but {0} found".format(type(contigs))
            header[MafHeader.ContigKey] = MafHeaderContigRecord(value=contigs)
        if sort_order:
            header[MafHeader.SortOrderKey] = MafHeaderSortOrderRecord(
                value=sort_order, fasta_index=fasta_index, contigs=contigs
            )
            if (
                not contigs
                and hasattr(header[MafHeader.SortOrderKey].value, "_contigs")
                and getattr(header[MafHeader.SortOrderKey].value, "_contigs")
            ):
                header[MafHeader.ContigKey] = MafHeaderContigRecord(
                    value=header[MafHeader.SortOrderKey].value._contigs
                )
        return header

    @classmethod
    def scheme_header_lines(cls, scheme: 'MafScheme') -> List[str]:
        """Gets the list of header lines as they would be printed in a
        MafHeader for the given scheme."""
        return [
            f"{MafHeader.HeaderLineStartSymbol}{MafHeader.VersionKey} {scheme.version()}",
            f"{MafHeader.HeaderLineStartSymbol}{MafHeader.AnnotationSpecKey} {scheme.annotation_spec()}",
        ]