def create(self, config): if getattr(config, "tokenizer", None): tokenizers = make_all_tokenizers( convert_config2dict(config.tokenizer)) else: tokenizers = {} token_names, token_types = config.names, config.types if len(token_names) != len(token_types): raise ValueError( "token_names and token_types must be same length.") token_makers = {"tokenizers": tokenizers} for token_name, token_type in sorted(zip(token_names, token_types)): token_config = getattr(config, token_name, {}) if token_config != {}: token_config = convert_config2dict(token_config) # Token (tokenizer, indexer, embedding, vocab) token_config = { "tokenizers": tokenizers, "indexer_config": token_config.get("indexer", {}), "embedding_config": token_config.get("embedding", {}), "vocab_config": token_config.get("vocab", {}), } token_makers[token_name] = self.registry.get( f"token:{token_type}")(**token_config) return token_makers
def common_setting(self, mode, config): """ Common Setting - experiment config, use_gpu and cuda_device_ids """ self.config_dict = convert_config2dict(config) cuda_devices = self._get_cuda_devices() self.config.cuda_devices = cuda_devices self.config.slack_url = getattr(self.config, "slack_url", False)
def load(self): # Tokenizers tokenizers_config = convert_config2dict(self.config.tokenizers) tokenizers = make_all_tokenizers(tokenizers_config) # Knowledge Base # - Wiki knowledge_base_config = self.config.knowledge_base self.docs, doc_name = self._load_knowledge_base(knowledge_base_config) # Reasoning # - Document Retrieval # - Reading Comprehension Experiment reasoning_config = self.config.reasoning self.document_retrieval = self._load_document_retrieval( reasoning_config.document_retrieval, tokenizers["word"], basename=doc_name) self.rc_experiment = self.make_module( reasoning_config.reading_comprehension) print("Ready ..! \n")