コード例 #1
0
 def my_initializer(shape=None,
                    dtype=tf.float32,
                    partition_info=None,
                    glove_activation=False):
     assert dtype is tf.float32
     embedding_matrix = preprocessing.load_glove_embeddings(
         word_index, model)
     return embedding_matrix
コード例 #2
0
 def init_weights(self):
     glove_embeddings = preprocessing.load_glove_embeddings()
     embedding_weights = torch.FloatTensor(self.vocab_size, self.input_size)
     init.uniform(embedding_weights, a=-0.25, b=0.25)
     for k, v in glove_embeddings.items():
         embedding_weights[k] = torch.FloatTensor(v)
     embedding_weights[0] = torch.FloatTensor([0] * self.input_size)
     del self.embedding.weight
     self.embedding.weight = nn.Parameter(embedding_weights)
コード例 #3
0
def train_embedding_to_sentiment():
    _, lexicon = load_sentiment_lexicon()

    if not os.path.exists('data/lexicon_words_indices.npy'):
        lexicon_pandas = pd.read_csv('lexicons/ts.lex.txt',
                                     sep=" ",
                                     usecols=[0],
                                     header=None)
        word2index, embedding_matrix = load_glove_embeddings(
            'data/glove.twitter.27B.100d.txt',
            embedding_dim=100,
            vocab_size=1193514)
        lexicon_pandas.columns = ['word']
        X = np.array([0] + [
            word2index[word] if word in word2index else 0
            for word in lexicon_pandas.word.values.tolist()
        ])
        np.save('data/lexicon_words_indices', X)
    else:
        X = np.load('data/lexicon_words_indices.npy')
        embedding_matrix = np.load('data/embeddings_matrix2.npy')
    mask = [True] + (X[1:] > 0).tolist()
    y = lexicon[mask]
    X = X[mask]
    X = X.reshape(-1, 1)

    model = embedding_to_sentiment_model(X[0].shape, embedding_matrix, 1,
                                         y.shape[1])

    opt = k.optimizers.Adam(lr=0.001, amsgrad=True)

    model.compile(optimizer=opt,
                  loss='mean_squared_error',
                  metrics=['mse', 'mae'])
    # checkpoint
    checkpoint = k.callbacks.ModelCheckpoint(
        'models/embedding_sentiment_model-{epoch:02d}-{val_loss:.2f}.h5',
        monitor='val_loss',
        verbose=1,
        save_best_only=True,
        save_weights_only=True,
        mode='min')
    csv_logger = k.callbacks.CSVLogger('logs/embedding_sentiment_model.log')
    model.fit(X,
              y,
              epochs=200,
              batch_size=5000,
              shuffle=True,
              callbacks=[checkpoint, csv_logger],
              validation_split=0.2)