Esempio n. 1
0
    def run(self):
        valid_df, distribution_df = False, None

        try:
            reader = CSVReader(self.distribution_metadata)
            valid_df, distribution_df = True, reader.read()
            logging.debug('>>> Descargó la distribución <<<')
        except Exception:
            logging.debug('>>> Falló la descarga de la distribución <<<')
            raise

        return distribution_df
 def test_too_long_title(self):
     df = CSVReader(
         self.distribution,
         file_source=csv_path("70_character_long_column_title.csv"),
     ).read()
     with self.assertRaises(FieldTitleTooLongError):
         TitleLengthValidation(df).validate()
Esempio n. 3
0
 def test_single_null_not_ok_with_custom_option(self):
     df = CSVReader(self.distribution,
                    file_source=csv_path("single_null.csv")).read()
     options = ValidationOptions.create_with_defaults(
         max_missing_proportion=0.2)
     with self.assertRaises(FieldTooManyMissingsError):
         MissingValuesValidation(df, options=options).validate()
 def test_custom_length(self):
     df = CSVReader(
         self.distribution,
         file_source=csv_path("70_character_long_column_title.csv"),
     ).read()
     options = ValidationOptions.create_with_defaults(
         max_field_title_len=71)
     TitleLengthValidation(df, options=options).validate()
Esempio n. 5
0
    def test_validation_with_custom_max_too_small_proportion(self):
        df = CSVReader(self.distribution,
                       file_source=csv_path("few_values.csv")).read()

        # Siempre es válido
        options = ValidationOptions.create_with_defaults(
            max_too_small_proportion=1.01)
        FieldViewValuesValidation(df, options=options).validate()
Esempio n. 6
0
    def test_custom_null_proportion(self):
        df = CSVReader(self.distribution, file_source=csv_path("all_null.csv")).read()

        options = ValidationOptions.create_with_defaults(
            max_null_series_proportion=1.1
        )  # Se permite 100% null
        DistributionNullSeriesValidation(
            df, self.distribution, options=options
        ).validate()
Esempio n. 7
0
    def test_validation_with_custom_minimum_values(self):
        df = CSVReader(self.distribution,
                       file_source=csv_path("few_values.csv")).read()

        FieldViewValuesValidation(
            df,
            None,
            None,
            options=ValidationOptions.create_with_defaults(minimum_values=0),
        ).validate()
Esempio n. 8
0
    def load_ts_distribution(
        self,
        identifier,
        catalog_id=None,
        is_text_file=None,
        is_excel_file=None,
        is_csv_file=None,
        file_source=None,
    ):
        distribution = self.get_distribution(identifier)
        method = get_distribution_generation_method(distribution)

        # se genera a partir de un archivo de texto con parámetros
        if is_text_file or method == "text_file":
            return generate_ts_distribution_from_text_file(
                self, identifier, catalog_id, file_source=file_source)

        # se lee a partir de un CSV que cumple con la especificación
        if is_csv_file or method == "csv_file":
            return CSVReader(distribution, self.verify_ssl, file_source).read()

        raise NotImplementedError("{} no se puede leer".format(identifier))
Esempio n. 9
0
 def test_all_null_distribution_is_invalid(self):
     df = CSVReader(self.distribution, file_source=csv_path("all_null.csv")).read()
     with self.assertRaises(DistributionTooManyNullSeriesError):
         DistributionNullSeriesValidation(df, self.distribution, None).validate()
Esempio n. 10
0
 def test_full_serie_ok(self):
     df = CSVReader(self.distribution,
                    file_source=csv_path("sample_data.csv")).read()
     MissingValuesValidation(df).validate()
Esempio n. 11
0
 def _read_csv(self, filename):
     data_json = read_data_json(filename)
     distribution = data_json.get_distributions()[0]
     df = CSVReader(distribution).read()
     return df
Esempio n. 12
0
 def test_read_from_file_source(self):
     data_json = read_data_json("daily_periodicity_latin1.json")
     distribution = data_json.get_distributions()[0]
     path = csv_path("sample_data.csv")
     df = CSVReader(distribution, file_source=path).read()
     self.assertIn("title1", list(df.columns))
Esempio n. 13
0
 def test_validation_one_value_is_invalid(self):
     df = CSVReader(self.distribution,
                    file_source=csv_path("few_values.csv")).read()
     with self.assertRaises(FieldFewValuesError):
         FieldViewValuesValidation(df).validate()