def test_batch_generator(batch_size=32):
    batch_size //= 2
    index = 0
    while index < test_num:
        sentences = []
        batch_pos = test_pos_files[index:min(index + batch_size, test_num)]
        batch_neg = test_neg_files[index:min(index + batch_size, test_num)]
        index += batch_size
        for file_name in batch_pos:
            with codecs.open(os.path.join(TEST_ROOT, 'pos', file_name), 'r',
                             'utf8') as reader:
                text = reader.read().strip()
                sentences.append(get_word_list_eng(text))
        for file_name in batch_neg:
            with codecs.open(os.path.join(TEST_ROOT, 'neg', file_name), 'r',
                             'utf8') as reader:
                text = reader.read().strip()
            sentences.append(get_word_list_eng(text))
        yield wc_embd.get_batch_input(sentences)
print('Train: %d  Validate: %d' % (train_num, val_num))

# Generate dictionaries for words and characters
print('Get dictionaries....')
wc_embd = WordCharEmbd(
    word_min_freq=5,
    char_min_freq=2,
    word_ignore_case=True,
    char_ignore_case=False,
)
for file_name in train_pos_files:
    with codecs.open(os.path.join(TRAIN_ROOT, 'pos', file_name), 'r',
                     'utf8') as reader:
        text = reader.read().strip()
        wc_embd.update_dicts(get_word_list_eng(text))
for file_name in train_neg_files:
    with codecs.open(os.path.join(TRAIN_ROOT, 'neg', file_name), 'r',
                     'utf8') as reader:
        text = reader.read().strip()
        wc_embd.update_dicts(get_word_list_eng(text))

# Create model for classification
print('Create model...')
inputs, embd_layer = wc_embd.get_embedding_layer(word_embd_dim=150,
                                                 char_embd_dim=30,
                                                 char_hidden_dim=75,
                                                 char_hidden_layer_type='lstm')
lstm_layer = keras.layers.Bidirectional(
    keras.layers.LSTM(units=50),
    name='Bi-LSTM',