w2v_model = pickle.load(f) elif which_embedding == 'Google_W2V': if False: print('loading google news word2vec...') FILENAME = "GoogleNews-vectors-negative300.bin.gz" w2v_model = KeyedVectors.load_word2vec_format(FILENAME, binary=True, limit=500000) corpus_vocab = set(word for sentence in train_sentences + test_sentences for word in sentence) diff = list(corpus_vocab.difference(w2v_model.vocab)) print(len(corpus_vocab)) print(f'lacking {len(diff)} words') a = np.var(w2v_model.vectors) w2v_model.add(entities=diff, weights=np.random.uniform(-a, -a, (len(diff), w2v_size))) del corpus_vocab del a with open('w2v_google.pkl', 'wb') as f: pickle.dump(w2v_model, f) else: print('loading saved google news word2vec...') with open('w2v_google.pkl', 'rb') as f: w2v_model = pickle.load(f) print(w2v_model.wv.index2word[0]) # print(w2v_model.wv.vocab['</s>'].index) #dummy index 0인지 확인 pretrained_weights = w2v_model.wv.vectors
if i < train_size: y_train[i, :] = [1.0, 0.0] if labels[index] == 1 else [0.0, 1.0] else: y_test[i - train_size, :] = [1.0, 0.0 ] if labels[index] == 1 else [0.0, 1.0] print(x_train.shape, y_test.shape) # Neural Model batch_size = 500 no_epochs = 20 model = Sequential() model.add( Conv1D(32, kernel_size=3, activation='elu', padding='same', input_shape=(max_no_tokens, vector_size))) model.add(Conv1D(32, kernel_size=3, activation='elu', padding='same')) model.add(Conv1D(32, kernel_size=3, activation='relu', padding='same')) model.add(MaxPooling1D(pool_size=3)) model.add(Bidirectional(LSTM(512, dropout=0.2, recurrent_dropout=0.3))) model.add(Dense(512, activation='sigmoid')) model.add(Dropout(0.2)) model.add(Dense(512, activation='sigmoid')) model.add(Dropout(0.25)) model.add(Dense(512, activation='sigmoid')) model.add(Dropout(0.25)) model.add(Dense(2, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.0001, decay=1e-6),