Beispiel #1
0
    def test_validate(self):
        catalog = os.path.join(SAMPLES_DIR, "data.json")
        catalog = DataJson(catalog)
        distrib_meta = catalog.get_distribution(identifier="125.1")
        df = pd.read_csv(distrib_meta["downloadURL"],
                         parse_dates=["indice_tiempo"
                                      ]).set_index("indice_tiempo")
        dataset_meta = catalog.get_dataset(
            identifier=distrib_meta["dataset_identifier"])

        validate_distribution(df, catalog, dataset_meta, distrib_meta)
Beispiel #2
0
    def test_repeated_field_id(self):
        catalog = os.path.join(SAMPLES_DIR, "repeated_field_id.json")
        catalog = DataJson(catalog)
        identifier = "125.1"
        distribution = catalog.get_distribution(identifier=identifier)
        dataset = catalog.get_dataset(
            identifier=distribution["dataset_identifier"])

        df = pd.read_csv(distribution["downloadURL"],
                         parse_dates=["indice_tiempo"
                                      ]).set_index("indice_tiempo")

        validate_distribution(df, catalog, dataset, distribution)
Beispiel #3
0
    def run(self, distribution_model: Distribution, catalog: DataJson):
        """
        Valida las distribuciones de series de tiempo de un catálogo
        entero a partir de su URL, o archivo fuente

        Returns:
            bool: True si la distribución pasa las validaciones, False caso contrario
        """

        df = self.init_df(distribution_model)

        dataset_id = distribution_model.dataset.identifier
        if dataset_id is None:
            raise ValueError(
                NO_DATASET_IDENTIFIER.format(distribution_model.identifier))
        dataset = catalog.get_dataset(dataset_id)

        distribution = catalog.get_distribution(distribution_model.identifier)

        validate_distribution(df, catalog, dataset, distribution)

        return True