Beispiel #1
0
    def load_model(self, model_config, vocabulary_file, embedding_matrix_file,
                   checkpoint_path):
        """Loads a skip-thoughts model.

    Args:
      model_config: Object containing parameters for building the model.
      vocabulary_file: Path to vocabulary file containing a list of newline-
        separated words where the word id is the corresponding 0-based index in
        the file.
      embedding_matrix_file: Path to a serialized numpy array of shape
        [vocab_size, embedding_dim].
      checkpoint_path: SkipThoughtsModel checkpoint file or a directory
        containing a checkpoint file.
    """
        tf.logging.info("Reading vocabulary from %s", vocabulary_file)
        with tf.gfile.GFile(vocabulary_file, mode="r") as f:
            lines = list(f.readlines())
        try:
            reverse_vocab = [line.decode("utf-8").strip() for line in lines]
        except:
            reverse_vocab = [line.strip() for line in lines]
        tf.logging.info("Loaded vocabulary with %d words.", len(reverse_vocab))

        tf.logging.info("Loading embedding matrix from %s",
                        embedding_matrix_file)
        # Note: tf.gfile.GFile doesn't work here because np.load() calls f.seek()
        # with 3 arguments.
        try:
            with open(embedding_matrix_file, "r") as f:
                embedding_matrix = np.load(f)
        except:
            embedding_matrix = np.load(embedding_matrix_file)
        tf.logging.info("Loaded embedding matrix with shape %s",
                        embedding_matrix.shape)

        word_embeddings = collections.OrderedDict(
            zip(reverse_vocab, embedding_matrix))

        g = tf.Graph()
        with g.as_default():
            encoder = skip_thoughts_encoder.SkipThoughtsEncoder(
                word_embeddings)
            restore_model = encoder.build_graph_from_config(
                model_config, checkpoint_path)

        sess = tf.Session(graph=g)
        restore_model(sess)

        self.encoders.append(encoder)
        self.sessions.append(sess)
Beispiel #2
0
    def load_model(self,model_config,vocab_file, embedding_matrix_file, checkpoint_path):
        with open(vocab_file,'r') as f:
            lines=list(f.readlines())
        #print(lines)
        reverse_vocab=[line.strip() for line in lines]
        embedding_matrix=np.load(embedding_matrix_file)
        word_embeddings = collections.OrderedDict(zip(reverse_vocab,embedding_matrix))

        encoder = skip_thoughts_encoder.SkipThoughtsEncoder(word_embeddings)
        restore_model = encoder.build_graph_from_config(model_config,
                                                      checkpoint_path)

        tf_sess = K.get_session()
        restore_model(tf_sess)
        
        self.encoders.append(encoder)
        self.sessions.append(tf_sess)