def my_initializer(shape=None, dtype=tf.float32, partition_info=None, glove_activation=False): assert dtype is tf.float32 embedding_matrix = preprocessing.load_glove_embeddings( word_index, model) return embedding_matrix
def init_weights(self): glove_embeddings = preprocessing.load_glove_embeddings() embedding_weights = torch.FloatTensor(self.vocab_size, self.input_size) init.uniform(embedding_weights, a=-0.25, b=0.25) for k, v in glove_embeddings.items(): embedding_weights[k] = torch.FloatTensor(v) embedding_weights[0] = torch.FloatTensor([0] * self.input_size) del self.embedding.weight self.embedding.weight = nn.Parameter(embedding_weights)
def train_embedding_to_sentiment(): _, lexicon = load_sentiment_lexicon() if not os.path.exists('data/lexicon_words_indices.npy'): lexicon_pandas = pd.read_csv('lexicons/ts.lex.txt', sep=" ", usecols=[0], header=None) word2index, embedding_matrix = load_glove_embeddings( 'data/glove.twitter.27B.100d.txt', embedding_dim=100, vocab_size=1193514) lexicon_pandas.columns = ['word'] X = np.array([0] + [ word2index[word] if word in word2index else 0 for word in lexicon_pandas.word.values.tolist() ]) np.save('data/lexicon_words_indices', X) else: X = np.load('data/lexicon_words_indices.npy') embedding_matrix = np.load('data/embeddings_matrix2.npy') mask = [True] + (X[1:] > 0).tolist() y = lexicon[mask] X = X[mask] X = X.reshape(-1, 1) model = embedding_to_sentiment_model(X[0].shape, embedding_matrix, 1, y.shape[1]) opt = k.optimizers.Adam(lr=0.001, amsgrad=True) model.compile(optimizer=opt, loss='mean_squared_error', metrics=['mse', 'mae']) # checkpoint checkpoint = k.callbacks.ModelCheckpoint( 'models/embedding_sentiment_model-{epoch:02d}-{val_loss:.2f}.h5', monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=True, mode='min') csv_logger = k.callbacks.CSVLogger('logs/embedding_sentiment_model.log') model.fit(X, y, epochs=200, batch_size=5000, shuffle=True, callbacks=[checkpoint, csv_logger], validation_split=0.2)