def test_single_null_not_ok_with_custom_option(self): df = CSVReader(self.distribution, file_source=csv_path("single_null.csv")).read() options = ValidationOptions.create_with_defaults( max_missing_proportion=0.2) with self.assertRaises(FieldTooManyMissingsError): MissingValuesValidation(df, options=options).validate()
def test_too_long_title(self): df = CSVReader( self.distribution, file_source=csv_path("70_character_long_column_title.csv"), ).read() with self.assertRaises(FieldTitleTooLongError): TitleLengthValidation(df).validate()
def test_custom_length(self): df = CSVReader( self.distribution, file_source=csv_path("70_character_long_column_title.csv"), ).read() options = ValidationOptions.create_with_defaults( max_field_title_len=71) TitleLengthValidation(df, options=options).validate()
def test_validation_with_custom_max_too_small_proportion(self): df = CSVReader(self.distribution, file_source=csv_path("few_values.csv")).read() # Siempre es válido options = ValidationOptions.create_with_defaults( max_too_small_proportion=1.01) FieldViewValuesValidation(df, options=options).validate()
def test_custom_null_proportion(self): df = CSVReader(self.distribution, file_source=csv_path("all_null.csv")).read() options = ValidationOptions.create_with_defaults( max_null_series_proportion=1.1 ) # Se permite 100% null DistributionNullSeriesValidation( df, self.distribution, options=options ).validate()
def test_validation_with_custom_minimum_values(self): df = CSVReader(self.distribution, file_source=csv_path("few_values.csv")).read() FieldViewValuesValidation( df, None, None, options=ValidationOptions.create_with_defaults(minimum_values=0), ).validate()
def test_all_null_distribution_is_invalid(self): df = CSVReader(self.distribution, file_source=csv_path("all_null.csv")).read() with self.assertRaises(DistributionTooManyNullSeriesError): DistributionNullSeriesValidation(df, self.distribution, None).validate()
def test_full_serie_ok(self): df = CSVReader(self.distribution, file_source=csv_path("sample_data.csv")).read() MissingValuesValidation(df).validate()
def test_read_from_file_source(self): data_json = read_data_json("daily_periodicity_latin1.json") distribution = data_json.get_distributions()[0] path = csv_path("sample_data.csv") df = CSVReader(distribution, file_source=path).read() self.assertIn("title1", list(df.columns))
def test_validation_one_value_is_invalid(self): df = CSVReader(self.distribution, file_source=csv_path("few_values.csv")).read() with self.assertRaises(FieldFewValuesError): FieldViewValuesValidation(df).validate()