def test_validate(self): catalog = os.path.join(SAMPLES_DIR, "data.json") catalog = DataJson(catalog) distrib_meta = catalog.get_distribution(identifier="125.1") df = pd.read_csv(distrib_meta["downloadURL"], parse_dates=["indice_tiempo" ]).set_index("indice_tiempo") dataset_meta = catalog.get_dataset( identifier=distrib_meta["dataset_identifier"]) validate_distribution(df, catalog, dataset_meta, distrib_meta)
def test_repeated_field_id(self): catalog = os.path.join(SAMPLES_DIR, "repeated_field_id.json") catalog = DataJson(catalog) identifier = "125.1" distribution = catalog.get_distribution(identifier=identifier) dataset = catalog.get_dataset( identifier=distribution["dataset_identifier"]) df = pd.read_csv(distribution["downloadURL"], parse_dates=["indice_tiempo" ]).set_index("indice_tiempo") validate_distribution(df, catalog, dataset, distribution)
def run(self, distribution_model: Distribution, catalog: DataJson): """ Valida las distribuciones de series de tiempo de un catálogo entero a partir de su URL, o archivo fuente Returns: bool: True si la distribución pasa las validaciones, False caso contrario """ df = self.init_df(distribution_model) dataset_id = distribution_model.dataset.identifier if dataset_id is None: raise ValueError( NO_DATASET_IDENTIFIER.format(distribution_model.identifier)) dataset = catalog.get_dataset(dataset_id) distribution = catalog.get_distribution(distribution_model.identifier) validate_distribution(df, catalog, dataset, distribution) return True