Esempio n. 1
0
def get_embeddings(hparams):
    if hparams.glove_path and hparams.vocab_path:
        tf.logging.info("Loading Glove embeddings...")
        vocab_array, vocab_dict = helpers.load_vocab(hparams.vocab_path)
        glove_vectors, glove_dict = helpers.load_glove_vectors(
            hparams.glove_path, vocab=set(vocab_array))
        initializer = helpers.build_initial_embedding_matrix(
            vocab_dict, glove_dict, glove_vectors, hparams.embedding_dim)
    else:
        tf.logging.info(
            "No glove/vocab path specificed, starting with random embeddings.")
        initializer = tf.random_uniform_initializer(-0.25, 0.25)

    if hparams.glove_path and hparams.vocab_path:
        return tf.get_variable("word_embeddings", initializer=initializer)
    elif hparams.vocab_path:
        vocab_array, vocab_dict = helpers.load_vocab(hparams.vocab_path)
        return tf.get_variable("word_embeddings",
                               shape=[len(vocab_dict), hparams.embedding_dim],
                               initializer=initializer)
    else:
        return tf.get_variable(
            "word_embeddings",
            shape=[hparams.vocab_size, hparams.embedding_dim],
            initializer=initializer)
Esempio n. 2
0
def get_embeddings(hparams):
    vocab_array, vocab_dict = helpers.load_vocab(hparams.vocab_path)
    print("vacab_array / dict loaded.")
    glove_vectors, glove_dict = helpers.load_glove_vectors(
        hparams.glove_path, vocab=set(vocab_array))
    print("glove_vectors / dict loaded.")
    W = helpers.build_initial_embedding_matrix(vocab_dict, glove_dict,
                                               glove_vectors,
                                               hparams.embedding_dim)
    print("Embedding matrix built.")
    return W
Esempio n. 3
0
def glove_init(embedding_size):
    """GloVe initialization"""
    glove_path = "data/glove.6B.100d.txt"
    vocab_path = "data/vocabulary.txt"
    tf.logging.info("Loading GloVe embeddings ...")
    vocab_array, vocab_dict = helpers.load_vocab(vocab_path)
    glove_vectors, glove_dict = helpers.load_glove_vectors(
        glove_path, vocab=set(vocab_array))
    initializer = helpers.build_initial_embedding_matrix(
        vocab_dict, glove_dict, glove_vectors, embedding_size)
    return initializer
Esempio n. 4
0
def get_embeddings(hparams):
    # 加载词汇表,训练数据中包含的
    vocab_array, vocab_dict = helpers.load_vocab(hparams.vocab_path)
    if hparams.vector_type == 'word2vec':
        word2vec_vectors, word2vec_dict = helpers.load_word2vec_vectors(
            hparams.word2vec_path, vocab=set(vocab_array))
        initializer = helpers.build_initial_embedding_matrix(
            vocab_dict, word2vec_dict, word2vec_vectors, hparams.embedding_dim)
    elif hparams.vector_type == 'glove':
        # glove_vectors所有出现此的向量; glove_dict记录出现此的位置
        glove_vectors, glove_dict = helpers.load_glove_vectors(
            hparams.glove_path, vocab=set(vocab_array))
        initializer = helpers.build_initial_embedding_matrix(
            vocab_dict, glove_dict, glove_vectors, hparams.embedding_dim)
    elif hparams.vector_type == 'fastText':
        # return load_embedding_vectors_fastText(vocab_array, hparams.glove_path, len(vocab_array))
        fastText_vectors, fastText_dict = helpers.load_fastText_vectors(
            hparams.fastText_path, vocab=set(vocab_array))
        initializer = helpers.build_initial_embedding_matrix(
            vocab_dict, fastText_dict, fastText_vectors, hparams.embedding_dim)

        # if hparams.glove_path and hparams.vocab_path:
        #     tf.logging.info("Loading Glove embeddings...")
        #     #加载词汇表,训练数据中包含的
        #     vocab_array, vocab_dict = helpers.load_vocab(hparams.vocab_path)
        #     #
        #     glove_vectors, glove_dict = helpers.load_glove_vectors(hparams.glove_path, vocab=set(vocab_array))
        #
        #     initializer = helpers.build_initial_embedding_matrix(vocab_dict, glove_dict, glove_vectors,
        #                                                          hparams.embedding_dim)
    else:
        tf.logging.info(
            "No glove/vocab path specificed, starting with random embeddings.")
        initializer = tf.random_uniform_initializer(-0.25, 0.25)  # 随机均匀

    # If initializer is a constant, do not specify shape.
    return tf.get_variable(
        "word_embeddings",
        # shape=[hparams.vocab_size, hparams.embedding_dim],
        initializer=initializer)
Esempio n. 5
0
def get_embeddings(hparams):
  if hparams.glove_path and hparams.vocab_path:
    tf.logging.info("Loading Glove embeddings...")
    vocab_array, vocab_dict = helpers.load_vocab(hparams.vocab_path)
    glove_vectors, glove_dict = helpers.load_glove_vectors(hparams.glove_path, vocab=set(vocab_array))
    initializer = helpers.build_initial_embedding_matrix(vocab_dict, glove_dict, glove_vectors, hparams.embedding_dim)
  else:
    tf.logging.info("No glove/vocab path specificed, starting with random embeddings.") #当eval或者再次train的时候,dual encoder中的get_embedding仍然会触发这句话
    initializer = tf.random_uniform_initializer(-0.25, 0.25) #一个initializer object,因而无需指定形状(因为get_variable时会先指定一个shape)?

  return tf.get_variable(
    "word_embeddings",
    shape=[hparams.vocab_size, hparams.embedding_dim],
    initializer=initializer)
def get_embeddings(hparams):
  if hparams.glove_path and hparams.vocab_path:
    tf.logging.info("Loading Glove embeddings...")
    vocab_array, vocab_dict = helpers.load_vocab(hparams.vocab_path)
    glove_vectors, glove_dict = helpers.load_glove_vectors(hparams.glove_path, vocab=set(vocab_array))
    initializer = helpers.build_initial_embedding_matrix(vocab_dict, glove_dict, glove_vectors, hparams.embedding_dim)
  else:
    tf.logging.info("No glove/vocab path specificed, starting with random embeddings.")
    initializer = tf.random_uniform_initializer(-0.25, 0.25)

  return tf.get_variable(
    "word_embeddings",
    shape=[hparams.vocab_size, hparams.embedding_dim],
    initializer=initializer)
Esempio n. 7
0
def get_embeddings(hparams):
    if hparams.w2v_path and hparams.vocab_path:
        tf.logging.info("Loading Glove embeddings...")
        vocab_array, vocab_dict = helpers.load_vocab(hparams.vocab_path)
        w2v_vectors, w2v_dict = helpers.load_w2v_vectors(
            hparams.w2v_path, vocab=set(vocab_array))
        initializer = helpers.build_initial_embedding_matrix(
            vocab_dict, w2v_dict, w2v_vectors, hparams.embedding_dim)
        #return tf.Variable(initializer, name="vocab_w")
    else:
        tf.logging.info(
            "No w2v/vocab path specificed, starting with random embeddings.")
        initializer = tf.random_uniform_initializer(-0.25, 0.25)

    return tf.get_variable("word_embeddings",
                           shape=[hparams.vocab_size, hparams.embedding_dim],
                           initializer=initializer)
Esempio n. 8
0
def get_embeddings(hparams, glove):
    if glove:
        tf.logging.info("Loading GloVe embedding ...")
        glove_path = "data/glove.6B.100d.txt"
        vocab_path = "data/vocabulary.txt"
        vocab_array, vocab_dict = helpers.load_vocab(vocab_path)
        glove_vectors, glove_dict = helpers.load_glove_vectors(
            glove_path, vocab=set(vocab_array))
        initializer = helpers.build_initial_embedding_matrix(
            vocab_dict, glove_dict, glove_vectors, hparams.embedding_dim)
    else:
        tf.logging.info("Loading random embedding ...")
        initializer = tf.random_uniform_initializer(-0.25, 0.25)

    return tf.get_variable("word_embeddings",
                           shape=[hparams.vocab_size, hparams.embedding_dim],
                           initializer=initializer)