Example #1
0
def _read_and_decode(split, max_ngram_len, feature='n-gram'):
    voca = Vocabulary(ku.voca_root)
    userhelper = UserHelper()
    reviews = ReviewLoader(ku.Movie, product_num=50).get_data()

    users = userhelper.get_users(reviews)
    user2idx = userhelper.user2idx(users)
    if feature == 'n-gram':
        feature2idx = voca.character_n_gram_table(reviews, min_threshold=6)
    else:
        feature2idx = voca.word_table(reviews, min_threshold=5)
    print('--------------------feature2idx-----------------', len(feature2idx))
    feature_loader = FeatureLoader(user2idx=user2idx,
                                   max_ngram_len=max_ngram_len,
                                   ngram2idx=feature2idx)
    training_split = int(len(reviews) * 0.8)
    valid_split = training_split - int(training_split * 0.2)
    if split == 'train':
        X, Y = feature_loader.load_n_gram_idx_feature_label(
            reviews[:valid_split], )
    elif split == 'valid':
        X, Y = feature_loader.load_n_gram_idx_feature_label(
            reviews[:valid_split])
    else:
        X, Y = feature_loader.load_n_gram_idx_feature_label(
            reviews[training_split:])
    # X, Y = tf.convert_to_tensor(X, dtype=tf.int32), tf.convert_to_tensor(Y, dtype=tf.int32)
    recons_Y = Y
    Y = keras.utils.to_categorical(Y, num_classes=len(user2idx))
    features = {'text': X, 'labels': Y, 'recons_labels': recons_Y}
    print('X.shape: ', X.shape)
    print('Y.shape: ', Y.shape)
    return features, len(user2idx), len(feature2idx), X.shape[0]
Example #2
0
 def feature_label(self):
     data_params = {
         'max_ngram_len': self.max_len,
         'user2idx': self.user2idx,
         'ngram2idx': self.feature2idx
     }
     feature_loader = FeatureLoader(**data_params)
     x, y = feature_loader.load_n_gram_idx_feature_label(self.reviews)
     return x, y
Example #3
0
def get_feature(reviews):
    if feature_name == 'n-gram':
        feature2idx = voca.character_n_gram_table(reviews, min_threshold=6)
    else:
        feature2idx = voca.word_table(reviews, min_threshold=5)
    feature_loader = FeatureLoader(user2idx=user2idx,
                                   max_ngram_len=max_len,
                                   ngram2idx=feature2idx)
    X, Y = feature_loader.load_n_gram_idx_feature_label(reviews)
    return X, Y, len(feature2idx)
Example #4
0
def get_feature(reviews, split):
    if feature_name == 'n-gram':
        feature2idx = voca.character_n_gram_table(reviews, min_threshold=6)
    else:
        feature2idx = voca.word_table(reviews, min_threshold=5)
    feature_loader = FeatureLoader(user2idx=user2idx,
                                   max_ngram_len=max_len,
                                   ngram2idx=feature2idx)
    training_split = int(len(reviews) * 0.8)
    valid_split = training_split - int(training_split * 0.2)
    if split == 'train':
        X, Y = feature_loader.load_n_gram_idx_feature_label(
            reviews[:training_split - valid_split])
    elif split == 'valid':
        X, Y = feature_loader.load_n_gram_idx_feature_label(
            reviews[training_split - valid_split:training_split])
    else:
        X, Y = feature_loader.load_n_gram_idx_feature_label(
            reviews[training_split:])

    Y = keras.utils.to_categorical(Y, num_classes=len(user2idx))
    return X, Y, len(feature2idx)
Example #5
0
 def load_feature_label(self, split):
     feature2idx = self.feature2idx()
     data_params = {
         'max_ngram_len': self.max_len,
         'user2idx': self.user2idx,
         'ngram2idx': feature2idx
     }
     feature_loader = FeatureLoader(**data_params)
     x, y = feature_loader.load_n_gram_idx_feature_label(self.reviews)
     train_split = int(x.shape[0] * 0.8)
     valid_split = train_split - int(train_split * 0.2)
     if split == 'train':
         x, y = x[:valid_split, :], y[:valid_split]
     elif split == 'valid':
         x, y = x[valid_split:train_split, :], y[valid_split:train_split]
     else:
         x, y = x[train_split:, :], y[train_split:]
     return torch.tensor(x, dtype=torch.long), torch.tensor(
         y, dtype=torch.long), len(feature2idx)
Example #6
0
}
feature_loader = FeatureLoader(**data_params)

param = {
    'kernel_size': [3, 5, 7],
    'batch_size': 32,
    'epochs': 100,
    'loss': 'categorical_crossentropy',
    'embedding_dim': 100,
    'user_num': len(user2idx),
    'max_ngram_len': max_ngram_len,
    'feature_num': 300,
    'vocab_size': len(ngram2idx)
}
#
#
x, y = feature_loader.load_n_gram_idx_feature_label(reviews)

training_split = int(0.8 * x.shape[0])
training_x, training_y = x[:training_split, :], y[:training_split]
testing_x, testing_y = x[training_split:, ], y[training_split:]

model = TextCNN(**param)
model.fit(training_x, training_y)
model.save_weight(ku.CNN_AST_model)
model.load_weight(ku.CNN_AST_model)
res = model.evaluate(testing_x, testing_y)
testing_loss = res[0]
testing_acc = res[1]
print('testing_loss: {}, testing_acc: {}'.format(testing_loss, testing_acc))