def get_autoencoder_feature(data, max_features, max_len, model, tokenizer=None): ''' @description: 获取autoencoder 特征 @param {type} train, train data set test, test data set max_features, max_features max_len, max_len model, autoencoder model tokenizer, autoencoder tokenizer @return: DataFrame of train and test ''' # 格式化数据 X, _ = format_data(data, max_features, max_len, tokenizer=tokenizer, shuffle=True) # 使用autoencoder 的encoder 进行预测 data_ae = pd.DataFrame(model.predict(X, batch_size=64, verbose=1).max(axis=1), columns=['ae' + str(i) for i in range(max_len)]) return data_ae
def get_autoencoder_feature(data, max_features, max_len, model, tokenizer=None): ''' @description: get_autoencoder_feature @param {type} train, train data set test, test data set max_features, max_features max_len, max_len model, autoencoder model tokenizer, autoencoder tokenizer @return: DataFrame of train and test ''' ### TODO # 1. 返回autoencoder embedding x, _ = format_data(data, max_features, max_len, tokenizer=tokenizer, shuffle=True) data_ae = pd.DataFrame(model.predict(x, batch_size=64, verbose=1), columns=['ae' + str(i) for i in range(max_len)]) return data_ae
def train(self, data, epochs=1): ''' @description: Train autoencoder model @param {type} data, train data epochs, train how many times @return: ''' # 处理数据 self.X, self.tokenizer = format_data(data, self.max_features, self.max_len, shuffle=True) self.model.fit(self.X, self.X, epochs=epochs, batch_size=128, verbose=1)
def train(self, data, epochs=10): self.x, self.tokenizer = format_data(data, self.embedding_dim, self.max_len, shuffle=True) self.model.fit(self.x, self.x, epochs=epochs, batch_size=32)
def get_autoencoder_feature(data, max_features, max_len, model, tokenizer=None): x, _ = format_data(data, max_features, max_len, tokenizer=tokenizer, shuffle=True) data_ae = pd.DataFrame(model.predict(X, batch_size=64).max(axis=1), columns=['ae' + str(i) for i in range(max_len)]) return data_ae