예제 #1
0
파일: tokens.py 프로젝트: zzozzolev/claf
    def create(self, config):
        if getattr(config, "tokenizer", None):
            tokenizers = make_all_tokenizers(
                convert_config2dict(config.tokenizer))
        else:
            tokenizers = {}

        token_names, token_types = config.names, config.types

        if len(token_names) != len(token_types):
            raise ValueError(
                "token_names and token_types must be same length.")

        token_makers = {"tokenizers": tokenizers}
        for token_name, token_type in sorted(zip(token_names, token_types)):
            token_config = getattr(config, token_name, {})
            if token_config != {}:
                token_config = convert_config2dict(token_config)

            # Token (tokenizer, indexer, embedding, vocab)
            token_config = {
                "tokenizers": tokenizers,
                "indexer_config": token_config.get("indexer", {}),
                "embedding_config": token_config.get("embedding", {}),
                "vocab_config": token_config.get("vocab", {}),
            }
            token_makers[token_name] = self.registry.get(
                f"token:{token_type}")(**token_config)
        return token_makers
예제 #2
0
    def common_setting(self, mode, config):
        """ Common Setting - experiment config, use_gpu and cuda_device_ids """
        self.config_dict = convert_config2dict(config)

        cuda_devices = self._get_cuda_devices()
        self.config.cuda_devices = cuda_devices
        self.config.slack_url = getattr(self.config, "slack_url", False)
예제 #3
0
    def load(self):
        # Tokenizers
        tokenizers_config = convert_config2dict(self.config.tokenizers)
        tokenizers = make_all_tokenizers(tokenizers_config)

        # Knowledge Base
        # - Wiki
        knowledge_base_config = self.config.knowledge_base
        self.docs, doc_name = self._load_knowledge_base(knowledge_base_config)

        # Reasoning
        # - Document Retrieval
        # - Reading Comprehension Experiment
        reasoning_config = self.config.reasoning

        self.document_retrieval = self._load_document_retrieval(
            reasoning_config.document_retrieval,
            tokenizers["word"],
            basename=doc_name)
        self.rc_experiment = self.make_module(
            reasoning_config.reading_comprehension)
        print("Ready ..! \n")