コード例 #1
0
    def __init__(self,
                 model,
                 normalize=True,
                 embedding_file=None,
                 char_embedding_file=None,
                 num_workers=None):
        """
        Args:
            model: path to saved model file.
            normalize: squash output score to 0-1 probabilities with a softmax.
            embedding_file: if provided, will expand dictionary to use all
              available pretrained vectors in this file.
            num_workers: number of CPU processes to use to preprocess batches.
        """
        logger.info('Initializing model...')
        self.model = DocReader.load(model, normalize=normalize)

        if embedding_file:
            logger.info('Expanding dictionary...')
            utils.index_embedding_words(embedding_file)
            added_words = self.model.expand_dictionary(words)
            self.model.load_embeddings(added_words, embedding_file)
        if char_embedding_file:
            logger.info('Expanding dictionary...')
            chars = utils.index_embedding_chars(char_embedding_file)
            added_chars = self.model.expand_char_dictionary(chars)
            self.model.load_char_embeddings(added_chars, char_embedding_file)

        logger.info('Initializing tokenizer...')
        annotators = get_annotators_for_model(self.model)

        if num_workers is None or num_workers > 0:
            self.workers = ProcessPool(
                num_workers,
                initializer=init,
                initargs=({
                    'annotators': annotators
                }, ),
            )
        else:
            self.workers = None
            self.tokenizer = SpacyTokenizer(annotators=annotators)
コード例 #2
0
def init(options):
    global TOK
    TOK = SpacyTokenizer(**options)
    Finalize(TOK, TOK.shutdown, exitpriority=100)
コード例 #3
0
ファイル: preprocess.py プロジェクト: yucoian/MnemonicReader
def init():
    global TOK
    TOK = SpacyTokenizer(annotators=ANNTOTORS)
    Finalize(TOK, TOK.shutdown, exitpriority=100)