def main(args=None, extra_subparser=None): """The main method for maf tools""" Logger.setup_root_logger() parser = argparse.ArgumentParser() # Add any pre-subcommand arguments parser.add_argument('-v', '--validation-stringency', action=StoreEnumAction, type=ValidationStringency, default=ValidationStringency.Strict) # Add any pre-subcommand arguments parser.add_argument('-s', '--schemes', action='append', help="One or more JSON files with custom scheme " "definitions") # Add subparsers here subparsers = parser.add_subparsers(dest="subcommand") subparsers.required = True Validate.add(subparsers=subparsers) View.add(subparsers=subparsers) Sort.add(subparsers=subparsers) if extra_subparser: extra_subparser.add(subparsers=subparsers) options = parser.parse_args(args=args) # Add any custom schemes to the set of known schemes all_schemes(extra_filenames=options.schemes) options.func(options)
def add(cls, subparsers): """Adds the given subcommand to the subparsers. Will always add the version and annotation options.""" subparser = subparsers.add_parser( name=cls.__get_name__(), description=cls.__get_description__()) versions = list(set(s.version() for s in all_schemes())) annotations = [s.annotation_spec() for s in all_schemes()] subparser.add_argument('-v', '--version', default=None, choices=versions, help="Use the given version when validating " "rather than what's in the header. " "Choices: %s" % ", ".join(versions)) subparser.add_argument('-a', '--annotation', type=str, default=None, choices=annotations, help="Use the given annotation specification " "when validating rather than what's in " "the header. " "Choices: %s" % ", ".join(annotations)) cls.__add_arguments__(subparser) subparser.set_defaults(func=cls.main) return subparser
def __validate_options__(cls, options): """ Validate the custom command line options. All parsers should recursively call this method first, as it will set the "scheme" member on the provided options object. """ logger = Logger.get_logger(cls.__name__) if options.version or options.annotation: options.scheme = find_scheme(version=options.version, annotation=options.annotation) if not options.scheme: tuples = [ "\t%s %s" % (s.version(), s.annotation_spec()) for s in all_schemes() ] raise ValueError( "Could not find a scheme with version '%s' " "and annotation '%s'. Available schemes:\n%s" % (options.version, options.annotation, "\n".join(tuples))) if options.version is None: logger.info( "No version given, defaulting to version '%s' " "based on -a/--annotation", options.scheme.version()) if options.annotation is None: if options.scheme.is_basic(): logger.info("The scheme is assumed to be the basic scheme") else: logger.info( "No annotation given, defaulting to " "annotation '%s'", options.scheme.annotation_spec()) else: options.scheme = None
class MafHeader(MutableMapping): """ A header for a MAF file storing zero or more header records. Each record represents a single line from the original MAF file. Provides methods for accessing the records in the order they were added, as well as methods for returning the version (:func:`~header.MafHeader.version`), annotation specification (:func:`~header.MafHeader.annotation`), scheme (:func:`~header.MafHeader.scheme`), and sort order ( :func:`~header.MafHeader.sort_order`). Additionally, the :func:`~header.MafHeader.validate` method can be used to validate the format of the header as well as validate the contents relative to the given scheme. If no sort order is found in the header, the sort order will be "unsorted". """ VersionKey = "version" AnnotationSpecKey = "annotation.spec" SortOrderKey = "sort.order" # NOQA ContigKey = "contigs" SupportedVersions = [s.version() for s in all_schemes()] SupportedAnnotationSpecs = [s.annotation_spec() for s in all_schemes()] SupportedSortOrders = [so.name() for so in SortOrder.all()] HeaderLineStartSymbol = "#" def __init__(self, validation_stringency: ValidationStringency = None): self.validation_errors: List[MafValidationError] = [] self.validation_stringency = ( ValidationStringency.Silent if (validation_stringency is None) else validation_stringency ) self.__records: Dict[str, MafHeaderRecord] = OrderedDict() self.__scheme = None def __getitem__(self, key: str) -> MafHeaderRecord: return self.__records[key] def __setitem__(self, key: str, value: MafHeaderRecord) -> None: assert key == value.key assert isinstance(value, MafHeaderRecord) self.__records[key] = value def __delitem__(self, key: str) -> None: del self.__records[key] def __iter__(self) -> Iterator[str]: return iter(self.__records.keys()) def __len__(self) -> int: return len(self.__records) def version(self) -> Optional[str]: """Gets the version or `None` if not present""" if MafHeader.VersionKey in self.__records: return str(self.__records[MafHeader.VersionKey].value) else: return None def annotation(self) -> Optional[str]: """Gets the annotation specification or `None` if not present""" if MafHeader.AnnotationSpecKey in self.__records: return str(self.__records[MafHeader.AnnotationSpecKey].value) else: return None def contigs(self) -> Optional[List[str]]: """Gets the contig list or `None` if not present""" if MafHeader.ContigKey in self.__records: return self.__records[MafHeader.ContigKey].value # type: ignore else: return None def sort_order(self) -> SortOrderType: """Gets the sort order or `Unsorted` if not present""" if MafHeader.SortOrderKey in self.__records: return self.__records[MafHeader.SortOrderKey].value # type: ignore else: return Unsorted() def scheme(self) -> Optional['MafScheme']: """Gets the scheme according to the version and annotation, None if no suitable scheme was found. """ try: return find_scheme(version=self.version(), annotation=self.annotation()) except ValueError: return None def validate( self, validation_stringency: ValidationStringency = None, logger: logging.Logger = Logger.RootLogger, reset_errors: bool = True, ) -> List[MafValidationError]: """Validates the header and returns a list of errors. Ensures that: - there is a version line in the header - the version is supported - the annotation specification is not in the header if the scheme is basic - the annotation specification is in the header if the scheme is basic - the annotation specification, when present, is supported """ if reset_errors: self.validation_errors = list() def add_error(error: MafValidationError) -> None: self.validation_errors.append(error) # get the scheme! scheme = self.scheme() if not validation_stringency: validation_stringency = self.validation_stringency # ensure there's a version record if MafHeader.VersionKey not in self: add_error( MafValidationError( MafValidationErrorType.HEADER_MISSING_VERSION, "No version line found in the header", ) ) else: # ensure that the version is a supported version version = self[MafHeader.VersionKey].value if version not in MafHeader.SupportedVersions: add_error( MafValidationError( MafValidationErrorType.HEADER_UNSUPPORTED_VERSION, "The version '%s' is not supported" % version, ) ) # Check the annotation spec # 1. basic annotation specs should not be in the header # 2. non-basic annotation specs should be present (in the header) and # have a known value if scheme is not None and scheme.is_basic(): if MafHeader.AnnotationSpecKey in self: add_error( MafValidationError( MafValidationErrorType.HEADER_UNSUPPORTED_ANNOTATION_SPEC, "Unexpected annotation.spec line found in the header", ) ) else: if MafHeader.AnnotationSpecKey not in self: add_error( MafValidationError( MafValidationErrorType.HEADER_MISSING_ANNOTATION_SPEC, "No annotation.spec line found in the header", ) ) else: # ensure that the annotation spec is a supported annotation spec annotation = self[MafHeader.AnnotationSpecKey].value if annotation not in MafHeader.SupportedAnnotationSpecs: add_error( MafValidationError( MafValidationErrorType.HEADER_UNSUPPORTED_ANNOTATION_SPEC, "The annotation.spec '%s' is not supported" % annotation, ) ) # process validation errors MafValidationError.process_validation_errors( validation_errors=self.validation_errors, validation_stringency=validation_stringency, logger=logger, ) return self.validation_errors def __str__(self) -> str: """gets the text representation of the header""" return "\n".join([str(record) for record in self.values()]) @classmethod def from_lines( cls, lines: List[str], validation_stringency: ValidationStringency = None, logger: logging.Logger = Logger.RootLogger, ) -> 'MafHeader': """ :param lines: a sequence of lines :param validation_stringency: optionally the validation stringency to use, otherwise use the default (Silent) :param logger the logger to which to write errors :return: a MafHeader """ header = cls(validation_stringency=validation_stringency) def add_error(error: MafValidationError) -> None: header.validation_errors.append(error) for line_number, line in enumerate(lines): line_number = line_number + 1 # 1-based record, error = MafHeaderRecord.from_line(line, line_number) if error: assert record is None add_error(error) else: assert record is not None if record.key in header: add_error( MafValidationError( MafValidationErrorType.HEADER_DUPLICATE_KEYS, "Multiple header lines with key '%s' found" % record.key, line_number=line_number, ) ) else: header[record.key] = record if header.contigs(): if header.sort_order() and issubclass( header.sort_order().__class__, Coordinate ): sokey = header[MafHeader.SortOrderKey].value.name() header[MafHeader.SortOrderKey] = MafHeaderSortOrderRecord( value=sokey, contigs=header.contigs() ) header.validate(logger=logger, reset_errors=False) return header @classmethod def from_line_reader( cls, line_reader: LineReader, validation_stringency: ValidationStringency = None, logger: logging.Logger = Logger.RootLogger, ) -> 'MafHeader': """Reads a header from a line reader. :param line_reader: a line reader :param validation_stringency: optionally the validation stringency to use, otherwise use the default (Silent) :param logger the logger to which to write errors :return: a MafHeader """ lines = list() while True: line = line_reader.peek_line() if not line.startswith(MafHeader.HeaderLineStartSymbol): break lines.append(line_reader.read_line()) return cls.from_lines( lines=lines, validation_stringency=validation_stringency, logger=logger ) @classmethod def from_reader( cls, reader: 'MafReader', version: Optional[str] = None, annotation: Optional[str] = None, sort_order: SortOrderType = None, fasta_index: Optional[str] = None, contigs: Optional[list] = None, ) -> 'MafHeader': header: 'MafHeader' = deepcopy(reader.header()) if version: header[MafHeader.VersionKey] = MafHeaderVersionRecord(value=version) if annotation: header[MafHeader.AnnotationSpecKey] = MafHeaderAnnotationSpecRecord( value=annotation ) if fasta_index: handle = open(fasta_index, "r") contigs = [line.rstrip("\r\n").split("\t")[0] for line in handle] handle.close() header[MafHeader.ContigKey] = MafHeaderContigRecord(value=contigs) elif contigs: assert isinstance( contigs, list ), "contigs must be a list, but {0} found".format(type(contigs)) header[MafHeader.ContigKey] = MafHeaderContigRecord(value=contigs) if sort_order: header[MafHeader.SortOrderKey] = MafHeaderSortOrderRecord( value=sort_order, fasta_index=fasta_index, contigs=contigs ) if ( not contigs and hasattr(header[MafHeader.SortOrderKey].value, "_contigs") and getattr(header[MafHeader.SortOrderKey].value, "_contigs") ): header[MafHeader.ContigKey] = MafHeaderContigRecord( value=header[MafHeader.SortOrderKey].value._contigs ) return header @classmethod def from_defaults( cls, version: Optional[str] = None, annotation: Optional[str] = None, sort_order: SortOrderType = None, fasta_index: Optional[str] = None, contigs: Optional[list] = None, ) -> 'MafHeader': header = MafHeader() if version: header[MafHeader.VersionKey] = MafHeaderVersionRecord(value=version) if annotation: header[MafHeader.AnnotationSpecKey] = MafHeaderAnnotationSpecRecord( value=annotation ) if fasta_index: handle = open(fasta_index, "r") _contigs = [line.rstrip("\r\n").split("\t")[0] for line in handle] handle.close() header[MafHeader.ContigKey] = MafHeaderContigRecord(value=_contigs) elif contigs: assert isinstance( contigs, list ), "contigs must be a list, but {0} found".format(type(contigs)) header[MafHeader.ContigKey] = MafHeaderContigRecord(value=contigs) if sort_order: header[MafHeader.SortOrderKey] = MafHeaderSortOrderRecord( value=sort_order, fasta_index=fasta_index, contigs=contigs ) if ( not contigs and hasattr(header[MafHeader.SortOrderKey].value, "_contigs") and getattr(header[MafHeader.SortOrderKey].value, "_contigs") ): header[MafHeader.ContigKey] = MafHeaderContigRecord( value=header[MafHeader.SortOrderKey].value._contigs ) return header @classmethod def scheme_header_lines(cls, scheme: 'MafScheme') -> List[str]: """Gets the list of header lines as they would be printed in a MafHeader for the given scheme.""" return [ f"{MafHeader.HeaderLineStartSymbol}{MafHeader.VersionKey} {scheme.version()}", f"{MafHeader.HeaderLineStartSymbol}{MafHeader.AnnotationSpecKey} {scheme.annotation_spec()}", ]