def __init__(self, test_run=None): """Visualize results from a given test run.""" self.results_id = self._resolve_results_id(test_run) self.results = self._grab_results(self.results_id) self.visualizers = [ Registry.get_visualizer(v)() for v in VISUALIZATIONS ]
def __init__(self, name=None): """Responsible for gathering and instantiating experiments, featurizers, and metrics.""" self.name = name self.experiments = [Registry.get_experiment(e) for e in EXPERIMENTS] self.featurizers = [Registry.get_featurizer(f)() for f in FEATURIZERS] self.metrics = [Registry.get_metric(m)() for m in METRICS] self.columns = [ "Dataset", "Featurizer", "Experiment", "Metric", "TrainSize", "Sampler", "Resampler", "Result", "TrainResult", ] self.results = pd.DataFrame(columns=self.columns)
def _run_sub_experiment(self, experiment_cls, dataset, train, test, target, current_setting): experiment = experiment_cls( Registry.get_resampler(current_setting["Resampler"])) name = experiment.name() internal_setting = {"Experiment": name} internal_setting.update(current_setting) if self.experiment_has_been_run(internal_setting): logging.info("Experiment has been run, skipping...") return logging.info("Training with settings {}".format(internal_setting)) try: # You might find yourself wondering why we're using lists here instead of np arrays # The answer is that pandas sucks. train_set = list(dataset["Features"].iloc[train]) train_labels = list(dataset[target].iloc[train]) test_set = list(dataset["Features"].iloc[test]) test_labels = list(dataset[target].iloc[test]) data = (experiment.resample(train_set, train_labels) if experiment.auto_resample_ else (train_set, train_labels)) x, y = data before_fit = time.time() experiment.fit(x, y) train_time = time.time() - before_fit test_pred = experiment.predict(test_set, subset="TEST") before_pred = time.time() train_pred = experiment.predict(train_set, subset="TRAIN") pred_time = time.time() - before_pred experiment.cleanup() result = self._measure_experiment( target=test_labels, result=test_pred, train_target=train_labels, train_result=train_pred, internal_setting=internal_setting, train_time=train_time, pred_time=pred_time, ) self._dump_results(result, experiment_name=self.name) except Exception: logging.exception( "Failed to run experiment: {}".format(internal_setting))
def sample(sampler, data, train_labels, train_indices, train_size): sampler = Registry.get_sampler(sampler)( data, train_labels, train_indices, train_size ) return sampler.sample()
def __init__(self): """ Responsible for searching featurizer module and importing those specified in config. """ self.featurizers = [Registry.get_featurizer(f)() for f in FEATURIZERS]