# 处理数据
X, y, classes = load_hotel_comment()
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    train_size=0.8,
                                                    random_state=7432)

num_classes = len(classes)
# 转化成字id
tokenizer = SimpleTokenizer()
tokenizer.fit(X_train)
X_train = tokenizer.transform(X_train)
X_test = tokenizer.transform(X_test)

maxlen = find_best_maxlen(X_train, mode="max")
maxlen = 128

X_train = sequence.pad_sequences(X_train,
                                 maxlen=maxlen,
                                 dtype="int32",
                                 padding="post",
                                 truncating="post",
                                 value=0.0)

X_test = sequence.pad_sequences(X_test,
                                maxlen=maxlen,
                                dtype="int32",
                                padding="post",
                                truncating="post",
                                value=0.0)
예제 #2
0
                        self).train_step(data)  # 执行普通的train_step
        embeddings.assign_sub(delta)  # 删除Embedding矩阵上的扰动
        return results


X, y, classes = load_hotel_comment()
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    train_size=0.6,
                                                    random_state=7384672)

num_classes = len(classes)
tokenizer = SimpleTokenizer()
tokenizer.fit(X_train)

maxlen = find_best_maxlen(X_train)
# maxlen = 256


def create_dataset(X, y, maxlen=maxlen):
    X = tokenizer.transform(X)
    X = sequence.pad_sequences(X,
                               maxlen=maxlen,
                               dtype="int32",
                               padding="post",
                               truncating="post",
                               value=0.0)
    y = tf.keras.utils.to_categorical(y)
    return X, y

                               value=0)
    return X


def create_dataset(Xa, Xp, Xn, maxlen):
    Xa = tokenizer.transform(Xa)
    Xp = tokenizer.transform(Xp)
    Xn = tokenizer.transform(Xn)

    Xa = pad(Xa, maxlen)
    Xp = pad(Xp, maxlen)
    Xn = pad(Xn, maxlen)
    return Xa, Xp, Xn


maxlen = find_best_maxlen(Xa)
maxlen = 48
hdims = 128
epochs = 1
num_words = len(tokenizer)
embedding_dims = 128

x1_input = Input(shape=(maxlen, ))
x2_input = Input(shape=(maxlen, ))
x3_input = Input(shape=(maxlen, ))
# 计算全局mask
x1_mask = Lambda(lambda x: tf.not_equal(x, 0))(x1_input)
x2_mask = Lambda(lambda x: tf.not_equal(x, 0))(x2_input)
x3_mask = Lambda(lambda x: tf.not_equal(x, 0))(x3_input)

embedding = Embedding(num_words,