Python BidirectionalLanguageModel Examples

Programming Language: Python

Namespace/Package Name: utilities.bilm.model

Examples at hotexamples.com: 3

Python BidirectionalLanguageModel - 3 examples found. These are the top rated real world Python examples of utilities.bilm.model.BidirectionalLanguageModel extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

BidirectionalLanguageModel(3)

Frequently Used Methods

BidirectionalLanguageModel (3)

Example #1

Show file

    def make_ELMo(self):
        # Location of pretrained BiLM for the specified language
        # TBD check if ELMo language resources are present
        description = self._get_description('elmo-en')
        if description is not None:
            self.lang = description["lang"]
            vocab_file = description["path-vocab"]
            options_file = description["path-config"]
            weight_file = description["path_weights"]

            print('init ELMo')

            # Create a Batcher to map text to character ids
            self.batcher = Batcher(vocab_file, 50)

            # Build the biLM graph.
            self.bilm = BidirectionalLanguageModel(options_file, weight_file)

            # Input placeholders to the biLM.
            self.character_ids = tf.placeholder('int32',
                                                shape=(None, None, 50))
            self.embeddings_op = self.bilm(self.character_ids)

            with tf.variable_scope('', reuse=tf.AUTO_REUSE):
                # the reuse=True scope reuses weights from the whole context
                self.elmo_input = weight_layers('input',
                                                self.embeddings_op,
                                                l2_coef=0.0)

Example #2

Show file

    def dump_ELMo_token_embeddings(self, x_train):
        if not self.use_ELMo:
            print(
                "Warning: ELMo embeddings dump requested but embeddings object wrongly initialised"
            )
            return

        description = self._get_description('elmo-en')
        if description is not None:
            print("Building ELMo token dump")

            self.lang = description["lang"]
            options_file = description["path-config"]
            weight_file = description["path_weights"]
            working_path = description["path-dump"]

            all_tokens = set(['<S>', '</S>'])
            for i in range(0, len(x_train)):
                # as it is training, it is already tokenized
                tokens = x_train[i]
                for token in tokens:
                    if token not in all_tokens:
                        all_tokens.add(token)

            vocab_file = os.path.join(working_path, 'vocab_small.txt')
            with open(vocab_file, 'w') as fout:
                fout.write('\n'.join(all_tokens))

            tf.reset_default_graph()
            token_embedding_file = os.path.join(working_path,
                                                'elmo_token_embeddings.hdf5')
            dump_token_embeddings(vocab_file, options_file, weight_file,
                                  token_embedding_file)
            tf.reset_default_graph()

            self.batcher_token_dump = TokenBatcher(vocab_file)

            self.bilm_token_dump = BidirectionalLanguageModel(
                options_file,
                weight_file,
                use_character_inputs=False,
                embedding_weight_file=token_embedding_file)

            self.token_ids = tf.placeholder('int32', shape=(None, None))
            self.embeddings_op_token_dump = self.bilm_token_dump(
                self.token_ids)
            """
            with tf.variable_scope('', reuse=tf.AUTO_REUSE):
                # the reuse=True scope reuses weights from the whole context 
                self.elmo_input_token_dump = weight_layers('input', self.embeddings_op_token_dump, l2_coef=0.0)
            """
            print("ELMo token dump completed")

Example #3

Show file

    def make_ELMo(self):
        # Location of pretrained BiLM for the specified language
        # TBD check if ELMo language resources are present
        description = self._get_description('elmo-ko')
        if description is not None:
            self.lang = description["lang"]
            vocab_file = description["path-vocab"]
            options_file = description["path-config"]
            weight_file = description["path_weights"]

            print('init ELMo')

            # Create a Batcher to map text to character ids
            self.batcher = Batcher(vocab_file, 50)

            # Build the biLM graph.
            self.bilm = BidirectionalLanguageModel(options_file, weight_file)