class BiasFairnessMetadata(CopyToTable): #### Bucket where all ingestions will be stored in AWS S3 bucket = luigi.Parameter() #### Defining the ingestion type to Luigi (`consecutive` or `initial`) ingest_type = luigi.Parameter() def requires(self): return BiasFairnessUnitTest(ingest_type=self.ingest_type, bucket=self.bucket) credentials = get_postgres_credentials("conf/local/credentials.yaml") user = credentials['user'] password = credentials['pass'] database = credentials['db'] host = credentials['host'] port = credentials['port'] table = 'dpa_metadata.bias_fairness' ## Metadata columns saved in RDS file columns = [("execution_time", "VARCHAR"), ("value_k", "VARCHAR"), ("v_group", "VARCHAR"), ("FOR_p", "VARCHAR"), ("FNR_p", "VARCHAR")] def rows(self): reader = pd.read_csv(csv_local_file, header=None) for element in reader.itertuples(index=False): yield element
class ExtractUnitTest(CopyToTable): ingest_type = luigi.Parameter() def requires(self): return APIDataIngestion(self.ingest_type) credentials = get_postgres_credentials("conf/local/credentials.yaml") user = credentials['user'] password = credentials['pass'] database = credentials['db'] host = credentials['host'] port = credentials['port'] table = 'dpa_unittest.extract' columns = [("Date", "VARCHAR"), ("Result", "VARCHAR")] def rows(self): reader = pd.read_csv(csv_local_file, header=None) for element in reader.itertuples(index=False): yield element if "FAILED" in reader[1][1]: raise TypeError("FAILED, you have an empty dataframe")
class Monitor(CopyToTable): #### Bucket where all ingestions will be stored in AWS S3 bucket = luigi.Parameter() #### Defining the ingestion type to Luigi (`consecutive` or `initial`) ingest_type = luigi.Parameter() def requires(self): return StorePredictionsApi(ingest_type=self.ingest_type, bucket=self.bucket) credentials = get_postgres_credentials("conf/local/credentials.yaml") user = credentials['user'] password = credentials['pass'] database = credentials['db'] host = credentials['host'] port = credentials['port'] table = 'dpa_monitor.monitor' ## Metadata columns saved in RDS file columns = [ ("id_client", "VARCHAR"), ("prediction_date", "VARCHAR"), ("model_label", "VARCHAR"), ("score_label_0", "VARCHAR"), ("score_label_1", "VARCHAR"), ] def rows(self): reader = pd.read_csv(csv_local_file, header=0) for element in reader.itertuples(index=False): yield element
class ModelTrainingMetadata(CopyToTable): #### Bucket where all ingestions will be stored in AWS S3 bucket = luigi.Parameter() #### Defining the ingestion type to Luigi (`consecutive` or `initial`) ingest_type = luigi.Parameter() csv_local_file = "src/pipeline/luigi/luigi_tmp_files/models_training_metadata.csv" def requires(self): return ModelTrainingTest(ingest_type=self.ingest_type, bucket=self.bucket) credentials = get_postgres_credentials("conf/local/credentials.yaml") user = credentials['user'] password = credentials['pass'] database = credentials['db'] host = credentials['host'] port = credentials['port'] table = 'dpa_metadata.model_training' ## ADAPTAR al numero de columnas correctas columns = [("execution_time", "VARCHAR"), ("no_models_trained", "VARCHAR"), ("type_models_trained", "VARCHAR")] def rows(self): reader = pd.read_csv(csv_local_file, header=None) for element in reader.itertuples(index=False): yield element
class SaveS3Metadata(CopyToTable): #### Bucket where all ingestions will be stored in AWS S3 bucket = luigi.Parameter() #### Defining the ingestion type to Luigi (`consecutive` or `initial`) ingest_type = luigi.Parameter() csv_local_file = "src/pipeline/luigi/luigi_tmp_files/saveS3_metadata.csv" def requires(self): return SaveS3UnitTest(ingest_type=self.ingest_type, bucket=self.bucket) credentials = get_postgres_credentials("conf/local/credentials.yaml") user = credentials['user'] password = credentials['pass'] database = credentials['db'] host = credentials['host'] port = credentials['port'] table = 'dpa_metadata.saveS3' ## Postgres table layout columns = [("save_time", "VARCHAR"), ("s3_bucket_name", "VARCHAR"), ("s3_key_name", "VARCHAR"), ("df_shape", "VARCHAR")] def rows(self): reader = pd.read_csv(csv_local_file, header=None) for element in reader.itertuples(index=False): yield element
class ModelTrainingTest(CopyToTable): #### Bucket where all ingestions will be stored in AWS S3 bucket = luigi.Parameter() #### Defining the ingestion type to Luigi (`consecutive` or `initial`) ingest_type = luigi.Parameter() def requires(self): return ModelTraining(ingest_type=self.ingest_type, bucket=self.bucket) credentials = get_postgres_credentials("conf/local/credentials.yaml") user = credentials['user'] password = credentials['pass'] database = credentials['db'] host = credentials['host'] port = credentials['port'] table = 'dpa_unittest.model_training' columns = [("Date", "VARCHAR"), ("Result", "VARCHAR")] def rows(self): reader = pd.read_csv(csv_local_file, header=None) for element in reader.itertuples(index=False): yield element if "FAILED" in reader[1][1]: raise TypeError("FAILED, your X_train have less than 50 rows")