def run(self): valid_df, distribution_df = False, None try: reader = CSVReader(self.distribution_metadata) valid_df, distribution_df = True, reader.read() logging.debug('>>> Descargó la distribución <<<') except Exception: logging.debug('>>> Falló la descarga de la distribución <<<') raise return distribution_df
def test_too_long_title(self): df = CSVReader( self.distribution, file_source=csv_path("70_character_long_column_title.csv"), ).read() with self.assertRaises(FieldTitleTooLongError): TitleLengthValidation(df).validate()
def test_single_null_not_ok_with_custom_option(self): df = CSVReader(self.distribution, file_source=csv_path("single_null.csv")).read() options = ValidationOptions.create_with_defaults( max_missing_proportion=0.2) with self.assertRaises(FieldTooManyMissingsError): MissingValuesValidation(df, options=options).validate()
def test_custom_length(self): df = CSVReader( self.distribution, file_source=csv_path("70_character_long_column_title.csv"), ).read() options = ValidationOptions.create_with_defaults( max_field_title_len=71) TitleLengthValidation(df, options=options).validate()
def test_validation_with_custom_max_too_small_proportion(self): df = CSVReader(self.distribution, file_source=csv_path("few_values.csv")).read() # Siempre es válido options = ValidationOptions.create_with_defaults( max_too_small_proportion=1.01) FieldViewValuesValidation(df, options=options).validate()
def test_custom_null_proportion(self): df = CSVReader(self.distribution, file_source=csv_path("all_null.csv")).read() options = ValidationOptions.create_with_defaults( max_null_series_proportion=1.1 ) # Se permite 100% null DistributionNullSeriesValidation( df, self.distribution, options=options ).validate()
def test_validation_with_custom_minimum_values(self): df = CSVReader(self.distribution, file_source=csv_path("few_values.csv")).read() FieldViewValuesValidation( df, None, None, options=ValidationOptions.create_with_defaults(minimum_values=0), ).validate()
def load_ts_distribution( self, identifier, catalog_id=None, is_text_file=None, is_excel_file=None, is_csv_file=None, file_source=None, ): distribution = self.get_distribution(identifier) method = get_distribution_generation_method(distribution) # se genera a partir de un archivo de texto con parámetros if is_text_file or method == "text_file": return generate_ts_distribution_from_text_file( self, identifier, catalog_id, file_source=file_source) # se lee a partir de un CSV que cumple con la especificación if is_csv_file or method == "csv_file": return CSVReader(distribution, self.verify_ssl, file_source).read() raise NotImplementedError("{} no se puede leer".format(identifier))
def test_all_null_distribution_is_invalid(self): df = CSVReader(self.distribution, file_source=csv_path("all_null.csv")).read() with self.assertRaises(DistributionTooManyNullSeriesError): DistributionNullSeriesValidation(df, self.distribution, None).validate()
def test_full_serie_ok(self): df = CSVReader(self.distribution, file_source=csv_path("sample_data.csv")).read() MissingValuesValidation(df).validate()
def _read_csv(self, filename): data_json = read_data_json(filename) distribution = data_json.get_distributions()[0] df = CSVReader(distribution).read() return df
def test_read_from_file_source(self): data_json = read_data_json("daily_periodicity_latin1.json") distribution = data_json.get_distributions()[0] path = csv_path("sample_data.csv") df = CSVReader(distribution, file_source=path).read() self.assertIn("title1", list(df.columns))
def test_validation_one_value_is_invalid(self): df = CSVReader(self.distribution, file_source=csv_path("few_values.csv")).read() with self.assertRaises(FieldFewValuesError): FieldViewValuesValidation(df).validate()