Beispiel #1
0
def main():
    directories = [
        "1 day", "7 days", "14 days", "30 days", "90 days", "180 days",
        "365 days"
    ]
    datasets = [
        "camel", "cloudstack", "cocoon", "hadoop", "deeplearning", "hive",
        "node", "ofbiz", "qpid"
    ]

    for dat in datasets:
        for time_ in directories:
            a = time.time()
            data = DataLoader.from_file(
                "/Users/ryedida/PycharmProjects/raise-package/issue_close_time/"
                + time_ + "/" + dat + ".csv",
                target="timeOpen",
                col_start=0)
            Transform("cfs").apply(data)
            Transform("smote").apply(data)

            config = {
                "n_runs":
                10,
                "transforms":
                ["normalize", "standardize", "robust", "maxabs", "minmax"] *
                30,
                "metrics": ["d2h", "f1", "pd", "pf", "prec"],
                "random":
                True,
                "learners": [
                    NaiveBayes(random=True, name='nb0'),
                    DecisionTree(random=True, name='dt0'),
                    LogisticRegressionClassifier(random=True, name='lr0'),
                    RandomForest(random=True, name='rf0')
                ],
                "log_path":
                "./dodge-log/",
                "data": [data],
                "name":
                dat + "-" + time_ + ""
            }
            for i in range(50):
                config["learners"].extend([
                    NaiveBayes(random=True, name=f'nb{i+1}'),
                    DecisionTree(random=True, name=f'dt{i+1}'),
                    LogisticRegressionClassifier(random=True, name=f'lr{i+1}'),
                    RandomForest(random=True, name=f'rf{i+1}')
                ])

            dodge = DODGE(config)
            dodge.optimize()
            b = time.time()
            file = open(f'./dodge-log/{dat}-{time_}.txt', 'a')
            print(f'Completed in {b-a} seconds.', file=file)
            file.close()
Beispiel #2
0
autoencoder.set_data(*data)
autoencoder.fit()

data.x_train = autoencoder.encode(K.constant(np.array(data.x_train)))
data.x_test = autoencoder.encode(K.constant(np.array(data.x_test)))

config = {
    'n_runs': 10,
    'data': [data],
    'metrics': ['f1', 'pd', 'prec'],
    'learners': [],
    'log_path': './ghost-log-defect',
    'transforms': ['standardize', 'normalize', 'minmax'] * 30,
    'random': True,
    'name': 'camel-ant'
}

for i in range(30):
    config['learners'].append(
        FeedforwardDL(weighted=True,
                      wfo=True,
                      smote=True,
                      random={
                          'n_units': (2, 6),
                          'n_layers': (2, 5)
                      },
                      n_epochs=100))

dodge = DODGE(config)
dodge.optimize()
Beispiel #3
0
def run(data: Data, name: str, config: dict):
    '''
    Runs one experiment, given a Data instance.

    :param {Data} data - The dataset to run on, NOT preprocessed.
    :param {str} name - The name of the experiment.
    :param {dict} config - The config to use. Must be one in the format used in `process_configs`.
    '''
    if config.get('ultrasample', False):
        # Apply WFO
        transform = Transform('wfo')
        transform.apply(data)

        # Reverse labels
        data.y_train = 1. - data.y_train
        data.y_test = 1. - data.y_test

        # Autoencode the inputs
        loss = 1e4
        while loss > 1e3:
            ae = Autoencoder(n_layers=2, n_units=[10, 7], n_out=5)
            ae.set_data(*data)
            ae.fit()

            loss = ae.model.history.history['loss'][-1]

        data.x_train = ae.encode(np.array(data.x_train))
        data.x_test = ae.encode(np.array(data.x_test))

    if config.get('dodge', False):
        # Tune the hyper-params
        dodge_config = {
            'n_runs': 10,
            'data': [data],
            'metrics': ['f1', 'd2h', 'pd', 'pf', 'prec'],
            'learners': [],
            'log_path': './ghost-log/',
            'transforms': ['standardize', 'normalize', 'minmax'] * 30,
            'random': True,
            'name': name
        }

        for _ in range(30):
            wfo = config.get('wfo', True)
            smote = config.get('smote', True)
            weighted = config.get('weighted_loss', True)

            dodge_config['learners'].append(
                FeedforwardDL(weighted=weighted,
                              wfo=wfo,
                              smote=smote,
                              random={
                                  'n_units': (2, 6),
                                  'n_layers': (2, 5)
                              },
                              n_epochs=50))

        dodge = DODGE(dodge_config)
        dodge.optimize()
        return

    # Otherwise, it's one of the untuned approaches.
    elif config.get('wfo', False):
        learner = FeedforwardDL(weighted=True,
                                wfo=True,
                                smote=True,
                                n_epochs=50)
        learner.set_data(*data)
        learner.fit()

    elif config.get('weighted_loss', False):
        learner = FeedforwardDL(weighted=True,
                                wfo=False,
                                smote=False,
                                n_epochs=50)
        learner.set_data(*data)
        learner.fit()

    else:
        learner = FeedforwardDL(weighted=False,
                                wfo=False,
                                smote=False,
                                n_epochs=50,
                                random={
                                    'n_layers': (1, 5),
                                    'n_units': (5, 20)
                                })
        learner.set_data(*data)
        learner.fit()

    # Get the results.
    preds = learner.predict(data.x_test)
    m = ClassificationMetrics(data.y_test, preds)
    m.add_metrics(['f1', 'd2h', 'pd', 'pf', 'prec'])
    results = m.get_metrics()
    return results