예제 #1
0
 def test_ValueError_decode(self):
     try:
         encoder = Encoder(df=self.df, encoding=self.encoding)
         encoder.decode(df=self.df,
                        encoding=create_test_encoding(data_encoding='None'))
     except ValueError:
         pass
예제 #2
0
    def test_encode(self):
        encoder = Encoder(df=self.df, encoding=self.encoding)
        encoded_df = self.df.copy()
        encoder.encode(df=encoded_df, encoding=self.encoding)

        self.assertDictEqual(self.how_it_should_be.to_dict(),
                             encoded_df.to_dict())
예제 #3
0
 def test_repeated_encode_decode(self):
     encoder = Encoder(df=self.df, encoding=self.encoding)
     encoded_df = self.df.copy()
     encoder.encode(df=encoded_df, encoding=self.encoding)
     encoder.decode(df=encoded_df, encoding=self.encoding)
     encoder.encode(df=encoded_df, encoding=self.encoding)
     encoder.decode(df=encoded_df, encoding=self.encoding)
     self.assertDictEqual(self.df.to_dict(), encoded_df.to_dict())
예제 #4
0
 def test_NotImplementedException_decode(self):
     try:
         encoder = Encoder(df=self.df, encoding=self.encoding)
         encoder.decode(
             df=self.df,
             encoding=create_test_encoding(
                 data_encoding=DataEncodings.ONE_HOT_ENCODER.value))
     except NotImplementedError:
         pass
예제 #5
0
def retrieve_proper_encoder(job: Job) -> Encoder:
    if job.incremental_train is not None:
        return retrieve_proper_encoder(job.incremental_train)
    else:
        training_log, test_log, additional_columns = get_train_test_log(job.split)
        training_df, _ = encode_label_logs(training_log, test_log, job, additional_columns=additional_columns,
                                           encode=False)
    return Encoder(training_df, job.encoding)
예제 #6
0
def _data_encoder_encoder(job: Job, training_log, test_log) -> None:
    if job.type != JobTypes.LABELLING.value and \
       job.encoding.value_encoding != ValueEncodings.BOOLEAN.value and \
       job.predictive_model.predictive_model != PredictiveModels.TIME_SERIES_PREDICTION.value:
        if job.incremental_train is not None:
            encoder = retrieve_proper_encoder(job.incremental_train)
        else:
            if job.predictive_model.predictive_model != PredictiveModels.TIME_SERIES_PREDICTION.value and \
               job.predictive_model.predictive_model != PredictiveModels.REGRESSION.value:
                encoder = Encoder(training_log, job.encoding)
            elif job.predictive_model.predictive_model == PredictiveModels.REGRESSION.value:
                encoder = Encoder(training_log.drop('label', axis=1), job.encoding)

        encoder.encode(training_log, job.encoding)
        encoder.encode(test_log, job.encoding)
예제 #7
0
def encode_label_logs(training_log: EventLog,
                      test_log: EventLog,
                      job: Job,
                      additional_columns=None):
    training_log, cols = _encode_log(training_log,
                                     job.encoding,
                                     job.labelling,
                                     additional_columns=additional_columns,
                                     cols=None)
    # TODO pass the columns of the training log
    print('\tDataset not found in cache, building..')
    test_log, _ = _encode_log(test_log,
                              job.encoding,
                              job.labelling,
                              additional_columns=additional_columns,
                              cols=cols)

    labelling = job.labelling
    if (labelling.threshold_type in [
            ThresholdTypes.THRESHOLD_MEAN.value,
            ThresholdTypes.THRESHOLD_CUSTOM.value
    ]) and (labelling.type in [
            LabelTypes.ATTRIBUTE_NUMBER.value, LabelTypes.DURATION.value,
            LabelTypes.REMAINING_TIME.value
    ]):
        if labelling.threshold_type == ThresholdTypes.THRESHOLD_MEAN.value:
            threshold = training_log['label'].astype(float).mean()

        elif labelling.threshold_type == ThresholdTypes.THRESHOLD_CUSTOM.value:
            threshold = float(labelling.threshold)
        else:
            threshold = -1
        training_log['label'] = training_log['label'].astype(float) < threshold
        test_log['label'] = test_log['label'].astype(float) < threshold

    if job.type != JobTypes.LABELLING.value and job.encoding.value_encoding != ValueEncodings.BOOLEAN.value and \
        job.predictive_model.predictive_model != PredictiveModels.TIME_SERIES_PREDICTION.value:
        # init nominal encode
        encoder = Encoder(training_log, job.encoding)
        encoder.encode(training_log, job.encoding)
        encoder.encode(test_log, job.encoding)

    return training_log, test_log
예제 #8
0
 def test_ValueError_init_encoder(self):
     try:
         Encoder(df=self.df,
                 encoding=create_test_encoding(data_encoding='None'))
     except ValueError:
         pass
예제 #9
0
    def test_encoder(self):
        encoder = Encoder(df=self.df, encoding=self.encoding)

        self.assertIsNotNone(encoder._encoder)
        self.assertIsNotNone(encoder._label_dict)
        self.assertIsNotNone(encoder._label_dict_decoder)