Ejemplo n.º 1
0
            w2v_model = pickle.load(f)
elif which_embedding == 'Google_W2V':
    if False:
        print('loading google news word2vec...')
        FILENAME = "GoogleNews-vectors-negative300.bin.gz"
        w2v_model = KeyedVectors.load_word2vec_format(FILENAME,
                                                      binary=True,
                                                      limit=500000)
        corpus_vocab = set(word
                           for sentence in train_sentences + test_sentences
                           for word in sentence)
        diff = list(corpus_vocab.difference(w2v_model.vocab))
        print(len(corpus_vocab))
        print(f'lacking {len(diff)} words')
        a = np.var(w2v_model.vectors)
        w2v_model.add(entities=diff,
                      weights=np.random.uniform(-a, -a, (len(diff), w2v_size)))
        del corpus_vocab
        del a

        with open('w2v_google.pkl', 'wb') as f:
            pickle.dump(w2v_model, f)

    else:
        print('loading saved google news word2vec...')
        with open('w2v_google.pkl', 'rb') as f:
            w2v_model = pickle.load(f)

print(w2v_model.wv.index2word[0])
# print(w2v_model.wv.vocab['</s>'].index) #dummy index 0인지 확인

pretrained_weights = w2v_model.wv.vectors
Ejemplo n.º 2
0
    if i < train_size:
        y_train[i, :] = [1.0, 0.0] if labels[index] == 1 else [0.0, 1.0]
    else:
        y_test[i - train_size, :] = [1.0, 0.0
                                     ] if labels[index] == 1 else [0.0, 1.0]

print(x_train.shape, y_test.shape)

# Neural Model
batch_size = 500
no_epochs = 20

model = Sequential()
model.add(
    Conv1D(32,
           kernel_size=3,
           activation='elu',
           padding='same',
           input_shape=(max_no_tokens, vector_size)))
model.add(Conv1D(32, kernel_size=3, activation='elu', padding='same'))
model.add(Conv1D(32, kernel_size=3, activation='relu', padding='same'))
model.add(MaxPooling1D(pool_size=3))
model.add(Bidirectional(LSTM(512, dropout=0.2, recurrent_dropout=0.3)))
model.add(Dense(512, activation='sigmoid'))
model.add(Dropout(0.2))
model.add(Dense(512, activation='sigmoid'))
model.add(Dropout(0.25))
model.add(Dense(512, activation='sigmoid'))
model.add(Dropout(0.25))
model.add(Dense(2, activation='softmax'))
model.compile(loss='categorical_crossentropy',
              optimizer=Adam(lr=0.0001, decay=1e-6),