Esempio n. 1
0
def get_autoencoder_feature(data,
                            max_features,
                            max_len,
                            model,
                            tokenizer=None):
    '''
    @description: 获取autoencoder 特征
    @param {type}
    train, train data set
    test, test data set
    max_features, max_features
    max_len, max_len
    model, autoencoder model
    tokenizer, autoencoder tokenizer
    @return: DataFrame of train and test
    '''
    # 格式化数据
    X, _ = format_data(data,
                       max_features,
                       max_len,
                       tokenizer=tokenizer,
                       shuffle=True)
    # 使用autoencoder 的encoder 进行预测
    data_ae = pd.DataFrame(model.predict(X, batch_size=64,
                                         verbose=1).max(axis=1),
                           columns=['ae' + str(i) for i in range(max_len)])
    return data_ae
Esempio n. 2
0
def get_autoencoder_feature(data,
                            max_features,
                            max_len,
                            model,
                            tokenizer=None):
    '''
    @description: get_autoencoder_feature
    @param {type}
    train, train data set
    test, test data set
    max_features, max_features
    max_len, max_len
    model, autoencoder model
    tokenizer, autoencoder tokenizer
    @return: DataFrame of train and test
    '''
    ### TODO
    # 1. 返回autoencoder embedding
    x, _ = format_data(data,
                       max_features,
                       max_len,
                       tokenizer=tokenizer,
                       shuffle=True)
    data_ae = pd.DataFrame(model.predict(x, batch_size=64, verbose=1),
                           columns=['ae' + str(i) for i in range(max_len)])

    return data_ae
Esempio n. 3
0
 def train(self, data, epochs=1):
     '''
     @description: Train autoencoder model
     @param {type}
     data, train data
     epochs, train how many times
     @return:
     '''
     # 处理数据
     self.X, self.tokenizer = format_data(data,
                                          self.max_features,
                                          self.max_len,
                                          shuffle=True)
     self.model.fit(self.X,
                    self.X,
                    epochs=epochs,
                    batch_size=128,
                    verbose=1)
Esempio n. 4
0
 def train(self, data, epochs=10):
     self.x, self.tokenizer = format_data(data, self.embedding_dim, self.max_len, shuffle=True)
     self.model.fit(self.x, self.x, epochs=epochs, batch_size=32)
Esempio n. 5
0
def get_autoencoder_feature(data, max_features, max_len, model, tokenizer=None):
    x, _ = format_data(data, max_features, max_len, tokenizer=tokenizer, shuffle=True)
    data_ae = pd.DataFrame(model.predict(X, batch_size=64).max(axis=1), columns=['ae' + str(i) for i in range(max_len)])
    return data_ae