def validate(source: str, version: str, output_csv: bool, schemas: Path): """ Validate a transit data file against a specified schema. """ if schemas: _load_schemas(schemas) try: validator = Validators.get_validator(version) except TransidateException as exc: console.rule(HEADER + "Error") raise ClickException(str(exc)) dataset_path = Path(source) dataset = DataSet(dataset_path) result = _validate(validator, dataset) if result.status == ValidationResult.OK: console.rule(HEADER + "Results") console.print("No issues found.") else: console.rule(HEADER + f"Results: {len(result.violations)} Issues found") ConsoleOutput(dataset=dataset, result=result).output() if output_csv: CSVOutput(dataset=dataset, result=result).output()
def output(self) -> None: output_path = self.get_output_path() console.print(f"Outputing CSV to {output_path.as_posix()}") with output_path.open("w") as f: headers = ["filename", "line", "message"] writer = csv.DictWriter(f, fieldnames=headers) self._write_csv(writer)
def list(schemas: Path): """ Lists all the schemas that transidate can validate a data set against. """ if schemas: _load_schemas(schemas) console.rule(HEADER + "Available Schemas") for name in Validators.registered_schemas: validator = Validators.get_validator(name) console.print(f"{name}: {validator.url}")
def schema(self) -> etree.XMLSchema: if self._schema: return self._schema console.print(f"Fetching schema from {self.url}.", overflow="ellipsis") response = requests.get(self.url) with tempfile.TemporaryDirectory() as tempdir: with zipfile.ZipFile(io.BytesIO(response.content)) as zf: console.print(f"Extracting schema to {tempdir}.") zf.extractall(tempdir) self._schema = self.get_xsd(Path(tempdir)) return self._schema
def get_xsd(self, schema_path: Path) -> etree.XMLSchema: paths = [p for p in schema_path.glob("**/" + self.root_path)] try: fullpath = paths[0] console.print(f"Parsing schema file {self.root_path}.") doc = etree.parse(fullpath.as_posix()) schema = etree.XMLSchema(doc) except OSError: raise NotSupported(f"Source {self.root_path!r} cannot be parsed.") except IndexError: raise NotSupported( f"Could not find {self.root_path!r} in schema directory") except etree.XMLSchemaParseError as exc: raise NotSupported(str(exc)) return schema
def validate(self, dataset: DataSet) -> ValidationResult: violations = [] status = ValidationResult.OK for d in dataset.documents(): console.print(f"Validating {d.name}.") try: self.schema.assertValid(d.tree) except etree.DocumentInvalid: status = ValidationResult.ERROR errors = self.schema.error_log # type: ignore violations += [Violation.from_log_entry(e) for e in errors] except etree.XMLSyntaxError as exc: status = ValidationResult.ERROR violations.append(Violation.from_syntax_error(exc)) return ValidationResult(status=status, violations=violations)
def output(self) -> None: for violation in self.result.violations: console.print( f"{violation.filename}:{violation.line}: {violation.message}")