def get_embeddings(hparams): if hparams.glove_path and hparams.vocab_path: tf.logging.info("Loading Glove embeddings...") vocab_array, vocab_dict = helpers.load_vocab(hparams.vocab_path) glove_vectors, glove_dict = helpers.load_glove_vectors( hparams.glove_path, vocab=set(vocab_array)) initializer = helpers.build_initial_embedding_matrix( vocab_dict, glove_dict, glove_vectors, hparams.embedding_dim) else: tf.logging.info( "No glove/vocab path specificed, starting with random embeddings.") initializer = tf.random_uniform_initializer(-0.25, 0.25) if hparams.glove_path and hparams.vocab_path: return tf.get_variable("word_embeddings", initializer=initializer) elif hparams.vocab_path: vocab_array, vocab_dict = helpers.load_vocab(hparams.vocab_path) return tf.get_variable("word_embeddings", shape=[len(vocab_dict), hparams.embedding_dim], initializer=initializer) else: return tf.get_variable( "word_embeddings", shape=[hparams.vocab_size, hparams.embedding_dim], initializer=initializer)
def get_embeddings(hparams): vocab_array, vocab_dict = helpers.load_vocab(hparams.vocab_path) print("vacab_array / dict loaded.") glove_vectors, glove_dict = helpers.load_glove_vectors( hparams.glove_path, vocab=set(vocab_array)) print("glove_vectors / dict loaded.") W = helpers.build_initial_embedding_matrix(vocab_dict, glove_dict, glove_vectors, hparams.embedding_dim) print("Embedding matrix built.") return W
def glove_init(embedding_size): """GloVe initialization""" glove_path = "data/glove.6B.100d.txt" vocab_path = "data/vocabulary.txt" tf.logging.info("Loading GloVe embeddings ...") vocab_array, vocab_dict = helpers.load_vocab(vocab_path) glove_vectors, glove_dict = helpers.load_glove_vectors( glove_path, vocab=set(vocab_array)) initializer = helpers.build_initial_embedding_matrix( vocab_dict, glove_dict, glove_vectors, embedding_size) return initializer
def get_embeddings(hparams): # 加载词汇表,训练数据中包含的 vocab_array, vocab_dict = helpers.load_vocab(hparams.vocab_path) if hparams.vector_type == 'word2vec': word2vec_vectors, word2vec_dict = helpers.load_word2vec_vectors( hparams.word2vec_path, vocab=set(vocab_array)) initializer = helpers.build_initial_embedding_matrix( vocab_dict, word2vec_dict, word2vec_vectors, hparams.embedding_dim) elif hparams.vector_type == 'glove': # glove_vectors所有出现此的向量; glove_dict记录出现此的位置 glove_vectors, glove_dict = helpers.load_glove_vectors( hparams.glove_path, vocab=set(vocab_array)) initializer = helpers.build_initial_embedding_matrix( vocab_dict, glove_dict, glove_vectors, hparams.embedding_dim) elif hparams.vector_type == 'fastText': # return load_embedding_vectors_fastText(vocab_array, hparams.glove_path, len(vocab_array)) fastText_vectors, fastText_dict = helpers.load_fastText_vectors( hparams.fastText_path, vocab=set(vocab_array)) initializer = helpers.build_initial_embedding_matrix( vocab_dict, fastText_dict, fastText_vectors, hparams.embedding_dim) # if hparams.glove_path and hparams.vocab_path: # tf.logging.info("Loading Glove embeddings...") # #加载词汇表,训练数据中包含的 # vocab_array, vocab_dict = helpers.load_vocab(hparams.vocab_path) # # # glove_vectors, glove_dict = helpers.load_glove_vectors(hparams.glove_path, vocab=set(vocab_array)) # # initializer = helpers.build_initial_embedding_matrix(vocab_dict, glove_dict, glove_vectors, # hparams.embedding_dim) else: tf.logging.info( "No glove/vocab path specificed, starting with random embeddings.") initializer = tf.random_uniform_initializer(-0.25, 0.25) # 随机均匀 # If initializer is a constant, do not specify shape. return tf.get_variable( "word_embeddings", # shape=[hparams.vocab_size, hparams.embedding_dim], initializer=initializer)
def get_embeddings(hparams): if hparams.glove_path and hparams.vocab_path: tf.logging.info("Loading Glove embeddings...") vocab_array, vocab_dict = helpers.load_vocab(hparams.vocab_path) glove_vectors, glove_dict = helpers.load_glove_vectors(hparams.glove_path, vocab=set(vocab_array)) initializer = helpers.build_initial_embedding_matrix(vocab_dict, glove_dict, glove_vectors, hparams.embedding_dim) else: tf.logging.info("No glove/vocab path specificed, starting with random embeddings.") #当eval或者再次train的时候,dual encoder中的get_embedding仍然会触发这句话 initializer = tf.random_uniform_initializer(-0.25, 0.25) #一个initializer object,因而无需指定形状(因为get_variable时会先指定一个shape)? return tf.get_variable( "word_embeddings", shape=[hparams.vocab_size, hparams.embedding_dim], initializer=initializer)
def get_embeddings(hparams): if hparams.glove_path and hparams.vocab_path: tf.logging.info("Loading Glove embeddings...") vocab_array, vocab_dict = helpers.load_vocab(hparams.vocab_path) glove_vectors, glove_dict = helpers.load_glove_vectors(hparams.glove_path, vocab=set(vocab_array)) initializer = helpers.build_initial_embedding_matrix(vocab_dict, glove_dict, glove_vectors, hparams.embedding_dim) else: tf.logging.info("No glove/vocab path specificed, starting with random embeddings.") initializer = tf.random_uniform_initializer(-0.25, 0.25) return tf.get_variable( "word_embeddings", shape=[hparams.vocab_size, hparams.embedding_dim], initializer=initializer)
def get_embeddings(hparams): if hparams.w2v_path and hparams.vocab_path: tf.logging.info("Loading Glove embeddings...") vocab_array, vocab_dict = helpers.load_vocab(hparams.vocab_path) w2v_vectors, w2v_dict = helpers.load_w2v_vectors( hparams.w2v_path, vocab=set(vocab_array)) initializer = helpers.build_initial_embedding_matrix( vocab_dict, w2v_dict, w2v_vectors, hparams.embedding_dim) #return tf.Variable(initializer, name="vocab_w") else: tf.logging.info( "No w2v/vocab path specificed, starting with random embeddings.") initializer = tf.random_uniform_initializer(-0.25, 0.25) return tf.get_variable("word_embeddings", shape=[hparams.vocab_size, hparams.embedding_dim], initializer=initializer)
def get_embeddings(hparams, glove): if glove: tf.logging.info("Loading GloVe embedding ...") glove_path = "data/glove.6B.100d.txt" vocab_path = "data/vocabulary.txt" vocab_array, vocab_dict = helpers.load_vocab(vocab_path) glove_vectors, glove_dict = helpers.load_glove_vectors( glove_path, vocab=set(vocab_array)) initializer = helpers.build_initial_embedding_matrix( vocab_dict, glove_dict, glove_vectors, hparams.embedding_dim) else: tf.logging.info("Loading random embedding ...") initializer = tf.random_uniform_initializer(-0.25, 0.25) return tf.get_variable("word_embeddings", shape=[hparams.vocab_size, hparams.embedding_dim], initializer=initializer)