import os from preprocess import Dataset from global_utils import dump, JsonMetricQueueWriter from .search_session import SearchSession from .sklearn_args import SklearnSessionParser, SklearnSessionArgs from reflexive_import import ReflexiveImporter if __name__ == '__main__': parser = SklearnSessionParser() args = SklearnSessionArgs(parser) dataset = Dataset(args.datafile, args.dataroot) dataset.filter(args.labels) if args.balance: dataset.balance() dataset.sample(args.size) importer = ReflexiveImporter(module_name=args.model, var_list=["model", "parameter_distribution"], alias_list=["model", "param"]) session = SearchSession(importer["model"], importer["param"], dataset, args.n_iter, args.cv) session.report_args() # tune (search for) hyper-parameters session.fit() session.report_best() session.report_result() dump(session.search_results, os.path.join(args.output, "search-results.pkl"))
class BaseSessionBuilder: def __init__(self, args: TorchSessionArgs): self.args = args if self.args.verbose: print(self.args) self.importer = ReflexiveImporter( module_name=self.args.model, var_list=[ "builder_class", "model_args", "model_kwargs", "transformer" ], package_name="pytorch_models", ) self._dataset = None self._model = None self._device = None self._writer = None self._session = None self._set_device() self.static_model_kwargs = dict( pretrained_path=self.args.pretrained, device=self._device, ) def _seed(self): if self.args.seed is not None: np.random.seed(self.args.seed) if self.args.verbose: print("setting numpy random seed to {}".format(self.args.seed)) elif self.args.verbose: print("no random seed specified for numpy") def _set_dataset(self): if self._dataset is not None: return self._seed() self._dataset = Dataset( filename=self.args.datafile, folder=self.args.dataroot, transformer=self.importer["transformer"], normalize=self.args.normalize, ) if self.args.verbose: print("dataset loaded, {} classes in total".format( self._dataset.num_classes)) print("train_shape = {}, test_shape = {}".format( self._dataset.train.X.shape, self._dataset.test.X.shape)) self._dataset.filter(labels=self.args.labels) if self.args.balance: self._dataset.balance() self._dataset.sample(train_size=self.args.size, test_size=self.args.size) if self.args.verbose: print("dataset downsampled, {} classes in total".format( self._dataset.num_classes)) print("train_shape = {}, test_shape = {}".format( self._dataset.train.X.shape, self._dataset.test.X.shape)) def _set_model(self): if self._model is not None: return self._set_dataset() builder_class = self.importer["builder_class"] # type: callable model_args = self.importer["model_args"] # type: tuple model_kwargs = self.importer["model_kwargs"] # type: dict model_kwargs.update(self.static_model_kwargs) model_kwargs.update(dict(num_classes=self._dataset.num_classes)) model_builder = builder_class(*model_args, **model_kwargs) self._model = model_builder() if self.args.verbose: print("using model", self._model) def _set_device(self): if self._device is not None: return self._device = torch.device( "cuda" if self.args.cuda or torch.cuda.is_available() else "cpu") if self.args.verbose: print("using device: {}".format(self._device)) def _set_writer(self): if self._writer is not None: return self._writer = SummaryWriter(log_dir=self.args.logdir) if self.args.verbose: print("logging summaries at", self._writer.log_dir) def _set_session(self): if self._session is not None: return self._set_dataset() self._set_model() self._set_device() self._set_writer() @property def dataset(self): self._set_dataset() return self._dataset @property def model(self): self._set_model() return self._model @property def device(self): self._set_device() return self._device @property def writer(self): self._set_writer() return self._writer @property def session(self): self._set_session() return self._session def __call__(self, *args, **kwargs): return self.session