def test_ValueError_decode(self): try: encoder = Encoder(df=self.df, encoding=self.encoding) encoder.decode(df=self.df, encoding=create_test_encoding(data_encoding='None')) except ValueError: pass
def test_encode(self): encoder = Encoder(df=self.df, encoding=self.encoding) encoded_df = self.df.copy() encoder.encode(df=encoded_df, encoding=self.encoding) self.assertDictEqual(self.how_it_should_be.to_dict(), encoded_df.to_dict())
def test_repeated_encode_decode(self): encoder = Encoder(df=self.df, encoding=self.encoding) encoded_df = self.df.copy() encoder.encode(df=encoded_df, encoding=self.encoding) encoder.decode(df=encoded_df, encoding=self.encoding) encoder.encode(df=encoded_df, encoding=self.encoding) encoder.decode(df=encoded_df, encoding=self.encoding) self.assertDictEqual(self.df.to_dict(), encoded_df.to_dict())
def test_NotImplementedException_decode(self): try: encoder = Encoder(df=self.df, encoding=self.encoding) encoder.decode( df=self.df, encoding=create_test_encoding( data_encoding=DataEncodings.ONE_HOT_ENCODER.value)) except NotImplementedError: pass
def retrieve_proper_encoder(job: Job) -> Encoder: if job.incremental_train is not None: return retrieve_proper_encoder(job.incremental_train) else: training_log, test_log, additional_columns = get_train_test_log(job.split) training_df, _ = encode_label_logs(training_log, test_log, job, additional_columns=additional_columns, encode=False) return Encoder(training_df, job.encoding)
def _data_encoder_encoder(job: Job, training_log, test_log) -> None: if job.type != JobTypes.LABELLING.value and \ job.encoding.value_encoding != ValueEncodings.BOOLEAN.value and \ job.predictive_model.predictive_model != PredictiveModels.TIME_SERIES_PREDICTION.value: if job.incremental_train is not None: encoder = retrieve_proper_encoder(job.incremental_train) else: if job.predictive_model.predictive_model != PredictiveModels.TIME_SERIES_PREDICTION.value and \ job.predictive_model.predictive_model != PredictiveModels.REGRESSION.value: encoder = Encoder(training_log, job.encoding) elif job.predictive_model.predictive_model == PredictiveModels.REGRESSION.value: encoder = Encoder(training_log.drop('label', axis=1), job.encoding) encoder.encode(training_log, job.encoding) encoder.encode(test_log, job.encoding)
def encode_label_logs(training_log: EventLog, test_log: EventLog, job: Job, additional_columns=None): training_log, cols = _encode_log(training_log, job.encoding, job.labelling, additional_columns=additional_columns, cols=None) # TODO pass the columns of the training log print('\tDataset not found in cache, building..') test_log, _ = _encode_log(test_log, job.encoding, job.labelling, additional_columns=additional_columns, cols=cols) labelling = job.labelling if (labelling.threshold_type in [ ThresholdTypes.THRESHOLD_MEAN.value, ThresholdTypes.THRESHOLD_CUSTOM.value ]) and (labelling.type in [ LabelTypes.ATTRIBUTE_NUMBER.value, LabelTypes.DURATION.value, LabelTypes.REMAINING_TIME.value ]): if labelling.threshold_type == ThresholdTypes.THRESHOLD_MEAN.value: threshold = training_log['label'].astype(float).mean() elif labelling.threshold_type == ThresholdTypes.THRESHOLD_CUSTOM.value: threshold = float(labelling.threshold) else: threshold = -1 training_log['label'] = training_log['label'].astype(float) < threshold test_log['label'] = test_log['label'].astype(float) < threshold if job.type != JobTypes.LABELLING.value and job.encoding.value_encoding != ValueEncodings.BOOLEAN.value and \ job.predictive_model.predictive_model != PredictiveModels.TIME_SERIES_PREDICTION.value: # init nominal encode encoder = Encoder(training_log, job.encoding) encoder.encode(training_log, job.encoding) encoder.encode(test_log, job.encoding) return training_log, test_log
def test_ValueError_init_encoder(self): try: Encoder(df=self.df, encoding=create_test_encoding(data_encoding='None')) except ValueError: pass
def test_encoder(self): encoder = Encoder(df=self.df, encoding=self.encoding) self.assertIsNotNone(encoder._encoder) self.assertIsNotNone(encoder._label_dict) self.assertIsNotNone(encoder._label_dict_decoder)