Esempio n. 1
0
    def set_train_mode(self):
        """
        Training Mode

        - Pipeline
          1. read raw_data (DataReader)
          2. build vocabs (DataReader, Token)
          3. indexing tokens (DataReader, Token)
          4. convert to DataSet (DataReader)
          5. create DataLoader (DataLoader)
          6. define model and optimizer
          7. run!
        """
        logger.info("Config. \n" + pretty_json_dumps(self.config_dict) + "\n")

        data_reader, token_makers = self._create_data_and_token_makers()
        datas, helpers = data_reader.read()

        # Token & Vocab
        text_handler = TextHandler(token_makers, lazy_indexing=True)
        texts = data_reader.filter_texts(datas)

        token_counters = text_handler.make_token_counters(texts, config=self.config)
        text_handler.build_vocabs(token_counters)
        text_handler.index(datas, data_reader.text_columns)

        # iterator
        datasets = data_reader.convert_to_dataset(datas, helpers=helpers)  # with name

        self.config.iterator.cuda_devices = self.config.cuda_devices
        train_loader, valid_loader, test_loader = self._create_by_factory(
            DataLoaderFactory, self.config.iterator, param={"datasets": datasets}
        )

        checkpoint_dir = Path(self.config.trainer.log_dir) / "checkpoint"
        checkpoints = None
        if checkpoint_dir.exists():
            checkpoints = self._load_exist_checkpoints(checkpoint_dir)  # contain model and optimizer

        if checkpoints is None:
            model = self._create_model(token_makers, helpers=helpers)
            op_dict = self._create_by_factory(
                OptimizerFactory, self.config.optimizer, param={"model": model}
            )
        else:
            model = self._create_model(token_makers, checkpoint=checkpoints)
            op_dict = self._create_by_factory(
                OptimizerFactory, self.config.optimizer, param={"model": model}
            )
            utils.load_optimizer_checkpoint(op_dict["optimizer"], checkpoints)

        self.set_trainer(model, op_dict=op_dict)
        return train_loader, valid_loader, op_dict["optimizer"]
Esempio n. 2
0
    def _set_saved_config(self):
        saved_config_dict = self.model_checkpoint["config"]
        self.config_dict = saved_config_dict

        logger.info("Load saved_config ...")
        logger.info(pretty_json_dumps(saved_config_dict))

        saved_config = NestedNamespace()
        saved_config.load_from_json(saved_config_dict)

        is_use_gpu = self.config.use_gpu

        self.config = saved_config
        self.config.use_gpu = is_use_gpu
Esempio n. 3
0
    def _set_saved_config(self, cuda_devices):
        saved_config_dict = self.model_checkpoint["config"]
        saved_config_dict["iterator"]["batch_size"] = saved_config_dict[
            "iterator"]["batch_size"] // len(cuda_devices)
        self.config_dict = saved_config_dict

        logger.info("Load saved_config ...")
        logger.info(pretty_json_dumps(saved_config_dict))

        saved_config = NestedNamespace()
        saved_config.load_from_json(saved_config_dict)

        is_use_gpu = self.config.use_gpu

        self.config = saved_config
        self.config.use_gpu = is_use_gpu
        self.config.cuda_devices = cuda_devices