Ejemplo n.º 1
0
 def __init__(self, test_run=None):
     """Visualize results from a given test run."""
     self.results_id = self._resolve_results_id(test_run)
     self.results = self._grab_results(self.results_id)
     self.visualizers = [
         Registry.get_visualizer(v)() for v in VISUALIZATIONS
     ]
Ejemplo n.º 2
0
 def __init__(self, name=None):
     """Responsible for gathering and instantiating experiments, featurizers, and metrics."""
     self.name = name
     self.experiments = [Registry.get_experiment(e) for e in EXPERIMENTS]
     self.featurizers = [Registry.get_featurizer(f)() for f in FEATURIZERS]
     self.metrics = [Registry.get_metric(m)() for m in METRICS]
     self.columns = [
         "Dataset",
         "Featurizer",
         "Experiment",
         "Metric",
         "TrainSize",
         "Sampler",
         "Resampler",
         "Result",
         "TrainResult",
     ]
     self.results = pd.DataFrame(columns=self.columns)
Ejemplo n.º 3
0
    def _run_sub_experiment(self, experiment_cls, dataset, train, test, target,
                            current_setting):
        experiment = experiment_cls(
            Registry.get_resampler(current_setting["Resampler"]))

        name = experiment.name()
        internal_setting = {"Experiment": name}
        internal_setting.update(current_setting)
        if self.experiment_has_been_run(internal_setting):
            logging.info("Experiment has been run, skipping...")
            return
        logging.info("Training with settings {}".format(internal_setting))
        try:
            # You might find yourself wondering why we're using lists here instead of np arrays
            # The answer is that pandas sucks.
            train_set = list(dataset["Features"].iloc[train])
            train_labels = list(dataset[target].iloc[train])
            test_set = list(dataset["Features"].iloc[test])
            test_labels = list(dataset[target].iloc[test])
            data = (experiment.resample(train_set, train_labels) if
                    experiment.auto_resample_ else (train_set, train_labels))
            x, y = data
            before_fit = time.time()
            experiment.fit(x, y)
            train_time = time.time() - before_fit
            test_pred = experiment.predict(test_set, subset="TEST")

            before_pred = time.time()
            train_pred = experiment.predict(train_set, subset="TRAIN")
            pred_time = time.time() - before_pred
            experiment.cleanup()
            result = self._measure_experiment(
                target=test_labels,
                result=test_pred,
                train_target=train_labels,
                train_result=train_pred,
                internal_setting=internal_setting,
                train_time=train_time,
                pred_time=pred_time,
            )
            self._dump_results(result, experiment_name=self.name)
        except Exception:
            logging.exception(
                "Failed to run experiment: {}".format(internal_setting))
Ejemplo n.º 4
0
def sample(sampler, data, train_labels, train_indices, train_size):
    sampler = Registry.get_sampler(sampler)(
        data, train_labels, train_indices, train_size
    )
    return sampler.sample()
Ejemplo n.º 5
0
 def __init__(self):
     """
     Responsible for searching featurizer module and importing those specified in config.
     """
     self.featurizers = [Registry.get_featurizer(f)() for f in FEATURIZERS]