# 处理数据 X, y, classes = load_hotel_comment() X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, random_state=7432) num_classes = len(classes) # 转化成字id tokenizer = SimpleTokenizer() tokenizer.fit(X_train) X_train = tokenizer.transform(X_train) X_test = tokenizer.transform(X_test) maxlen = find_best_maxlen(X_train, mode="max") maxlen = 128 X_train = sequence.pad_sequences(X_train, maxlen=maxlen, dtype="int32", padding="post", truncating="post", value=0.0) X_test = sequence.pad_sequences(X_test, maxlen=maxlen, dtype="int32", padding="post", truncating="post", value=0.0)
self).train_step(data) # 执行普通的train_step embeddings.assign_sub(delta) # 删除Embedding矩阵上的扰动 return results X, y, classes = load_hotel_comment() X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.6, random_state=7384672) num_classes = len(classes) tokenizer = SimpleTokenizer() tokenizer.fit(X_train) maxlen = find_best_maxlen(X_train) # maxlen = 256 def create_dataset(X, y, maxlen=maxlen): X = tokenizer.transform(X) X = sequence.pad_sequences(X, maxlen=maxlen, dtype="int32", padding="post", truncating="post", value=0.0) y = tf.keras.utils.to_categorical(y) return X, y
value=0) return X def create_dataset(Xa, Xp, Xn, maxlen): Xa = tokenizer.transform(Xa) Xp = tokenizer.transform(Xp) Xn = tokenizer.transform(Xn) Xa = pad(Xa, maxlen) Xp = pad(Xp, maxlen) Xn = pad(Xn, maxlen) return Xa, Xp, Xn maxlen = find_best_maxlen(Xa) maxlen = 48 hdims = 128 epochs = 1 num_words = len(tokenizer) embedding_dims = 128 x1_input = Input(shape=(maxlen, )) x2_input = Input(shape=(maxlen, )) x3_input = Input(shape=(maxlen, )) # 计算全局mask x1_mask = Lambda(lambda x: tf.not_equal(x, 0))(x1_input) x2_mask = Lambda(lambda x: tf.not_equal(x, 0))(x2_input) x3_mask = Lambda(lambda x: tf.not_equal(x, 0))(x3_input) embedding = Embedding(num_words,