label = text_data.label features = text_data.feature_names num_features = len(features) num_classes = len(set(label)) print('num_classes = {}, num_features = {}'.format(num_classes, num_features)) sentences = text_data.creat_id_sentences(word_vocab, feature_vocab) sentences_length = [len(sentence) for sentence in sentences] mean_seq_length = np.mean(sentences_length) max_seq_length = np.max(sentences_length) print('mean_seq_length = {}, max_seq_length = {}'.format( mean_seq_length, max_seq_length)) if add_keyword_attention: keywords = text_data.extrac_keywords(keyword_num) keywords_id = [word_vocab[word] for word in list(chain(*keywords))] else: keywords_id = None train_x, train_y, test_x, test_y, mask_train, mask_test = text_data.data_split( text_data=sentences, seq_length=seq_length, hidden_dim=hidden_dim, test_rate=test_rate) print(len(train_x), len(train_y), len(test_x), len(test_y), len(mask_train), len(mask_test)) train_x = np.array(train_x) train_y = np.array(train_y) mask_train = np.array(mask_train)