Ejemplo n.º 1
0
import os
from preprocess import Dataset
from global_utils import dump, JsonMetricQueueWriter
from .search_session import SearchSession
from .sklearn_args import SklearnSessionParser, SklearnSessionArgs
from reflexive_import import ReflexiveImporter

if __name__ == '__main__':
    parser = SklearnSessionParser()
    args = SklearnSessionArgs(parser)

    dataset = Dataset(args.datafile, args.dataroot)
    dataset.filter(args.labels)
    if args.balance:
        dataset.balance()
    dataset.sample(args.size)

    importer = ReflexiveImporter(module_name=args.model,
                                 var_list=["model", "parameter_distribution"],
                                 alias_list=["model", "param"])
    session = SearchSession(importer["model"], importer["param"], dataset,
                            args.n_iter, args.cv)
    session.report_args()

    # tune (search for) hyper-parameters
    session.fit()
    session.report_best()
    session.report_result()
    dump(session.search_results, os.path.join(args.output,
                                              "search-results.pkl"))
Ejemplo n.º 2
0
class BaseSessionBuilder:
    def __init__(self, args: TorchSessionArgs):
        self.args = args
        if self.args.verbose:
            print(self.args)

        self.importer = ReflexiveImporter(
            module_name=self.args.model,
            var_list=[
                "builder_class", "model_args", "model_kwargs", "transformer"
            ],
            package_name="pytorch_models",
        )

        self._dataset = None
        self._model = None
        self._device = None
        self._writer = None
        self._session = None
        self._set_device()

        self.static_model_kwargs = dict(
            pretrained_path=self.args.pretrained,
            device=self._device,
        )

    def _seed(self):
        if self.args.seed is not None:
            np.random.seed(self.args.seed)
            if self.args.verbose:
                print("setting numpy random seed to {}".format(self.args.seed))
        elif self.args.verbose:
            print("no random seed specified for numpy")

    def _set_dataset(self):
        if self._dataset is not None:
            return

        self._seed()

        self._dataset = Dataset(
            filename=self.args.datafile,
            folder=self.args.dataroot,
            transformer=self.importer["transformer"],
            normalize=self.args.normalize,
        )

        if self.args.verbose:
            print("dataset loaded, {} classes in total".format(
                self._dataset.num_classes))
            print("train_shape = {}, test_shape = {}".format(
                self._dataset.train.X.shape, self._dataset.test.X.shape))

        self._dataset.filter(labels=self.args.labels)
        if self.args.balance:
            self._dataset.balance()
        self._dataset.sample(train_size=self.args.size,
                             test_size=self.args.size)

        if self.args.verbose:
            print("dataset downsampled, {} classes in total".format(
                self._dataset.num_classes))
            print("train_shape = {}, test_shape = {}".format(
                self._dataset.train.X.shape, self._dataset.test.X.shape))

    def _set_model(self):
        if self._model is not None:
            return

        self._set_dataset()

        builder_class = self.importer["builder_class"]  # type: callable
        model_args = self.importer["model_args"]  # type: tuple
        model_kwargs = self.importer["model_kwargs"]  # type: dict
        model_kwargs.update(self.static_model_kwargs)
        model_kwargs.update(dict(num_classes=self._dataset.num_classes))

        model_builder = builder_class(*model_args, **model_kwargs)
        self._model = model_builder()

        if self.args.verbose:
            print("using model", self._model)

    def _set_device(self):
        if self._device is not None:
            return

        self._device = torch.device(
            "cuda" if self.args.cuda or torch.cuda.is_available() else "cpu")
        if self.args.verbose:
            print("using device: {}".format(self._device))

    def _set_writer(self):
        if self._writer is not None:
            return

        self._writer = SummaryWriter(log_dir=self.args.logdir)
        if self.args.verbose:
            print("logging summaries at", self._writer.log_dir)

    def _set_session(self):
        if self._session is not None:
            return

        self._set_dataset()
        self._set_model()
        self._set_device()
        self._set_writer()

    @property
    def dataset(self):
        self._set_dataset()
        return self._dataset

    @property
    def model(self):
        self._set_model()
        return self._model

    @property
    def device(self):
        self._set_device()
        return self._device

    @property
    def writer(self):
        self._set_writer()
        return self._writer

    @property
    def session(self):
        self._set_session()
        return self._session

    def __call__(self, *args, **kwargs):
        return self.session