def run_dnn(data_name, dkeys):
    print('Working on: ' + data_name)
    # parameters
    sent_len = 50 # the max length of sentence
    wt_path = '../../data/weight/'+ data_name + '.npy'
    epoch_num = 10
    dp_rate = 0.2

    """Preprocess"""
    # training data
    train_data = load_data_iter(
        data_name, suffix='train'
    )
    # domain data
    domain_data, dlabel_encoder = load_domain_iter(data_name, dkeys)

    # load weights
    weights = np.load(wt_path)

    """Model, share layers between domain inputs and sentiment inputs"""
    # embedding
    embedding = Embedding(
        weights.shape[0], weights.shape[1], # size of data embedding
        weights=[weights], input_length=sent_len,
        trainable=True,
        name='embedding'
    )
    
    # Bi-LSTM
    bilstm_shared = Bidirectional(LSTM(200, dropout=dp_rate), name='shared_lstm')
    bilstm_senti = Bidirectional(LSTM(200, dropout=dp_rate), name='senti_lstm')
    bilstm_domain = Bidirectional(LSTM(200, dropout=dp_rate), name='domain_lstm')

    '''for sentiment clfs'''
    # input
    text_input = Input(shape=(sent_len,), dtype='int32', name='text_input')
    # define sentiment task layers
    emb_senti = embedding(text_input)
    lstm_shared = bilstm_shared(emb_senti)
    lstm_senti = bilstm_senti(emb_senti)
    merge_lstm = keras.layers.concatenate([lstm_senti, lstm_shared], axis=-1)
    dense_1 = Dense(128, activation='relu')(merge_lstm)
    dense_dp = Dropout(dp_rate)(dense_1)
    senti_preds = Dense(1, activation='sigmoid', name='senti')(dense_dp) # binary

    # for domain prediction
    hp_lambda = 0.01

    """Obtain the number of domain label, share layers with sentiment task"""
    domain_inputs = dict()
    for dkey in dkeys:
        domain_inputs[dkey] = [
            Input(
                shape=(sent_len,), dtype='int32', name='domain'+str(dkey)+'_input'
            )
        ]
        # shared layers start
        domain_inputs[dkey].append(
            embedding(domain_inputs[dkey][-1])
        )
        domain_inputs[dkey].append(
            bilstm_shared(domain_inputs[dkey][-1])
        )
        # shared layers end
        domain_inputs[dkey].append(
            bilstm_domain(domain_inputs[dkey][-2]) # embedding as input
        )
        domain_inputs[dkey].append(
            keras.layers.concatenate([domain_inputs[dkey][-2], domain_inputs[dkey][-1]], axis=-1)
        )
        domain_inputs[dkey].append(
            flipGradientTF.GradientReversal(
                hp_lambda, name='domain'+str(dkey)+'_flip'
            )(domain_inputs[dkey][-1])
        )
#        domain_inputs[dkey].append(
#            Dense(
#                128, activation='relu', name='domain'+str(dkey)+'_dense'
#            )(domain_inputs[dkey][-1])
#        )
#        domain_inputs[dkey].append(Dropout(dp_rate)(domain_inputs[dkey][-1]))

        dim_size = len(dlabel_encoder[dkey])
        print(dim_size)
        if dim_size == 2:
            dim_size = 1
        # check the label size
        if dim_size == 1:
            domain_inputs[dkey].append(
                Dense(
                    dim_size, activation='sigmoid', name='domain'+str(dkey)+'_pred'
                )(domain_inputs[dkey][-1])
            )
        else:
            domain_inputs[dkey].append(
                Dense(
                    dim_size, activation='softmax', name='domain'+str(dkey)+'_pred'
                )(domain_inputs[dkey][-1])
            )

    model_sentiment = Model(
        # the first element of each domain task is the input layer
        inputs=[text_input] + [domain_inputs[dkey][0] for dkey in sorted(dkeys)], 
        # the last layer of each domain task is the prediction layer        
        outputs=[senti_preds] + [domain_inputs[dkey][-1] for dkey in sorted(dkeys)],
    )

    # build loss (weight) for each domain
    loss_dict = {'senti': 'binary_crossentropy'}
    loss_w_dict = {'senti': 1}
    for dkey in dkeys:
        loss_w_dict['domain'+str(dkey)+'_pred'] = 0.1/len(dkeys)
        if len(dlabel_encoder[dkey]) > 2:
            loss_dict['domain'+str(dkey)+'_pred'] = 'categorical_crossentropy'
        else:
            loss_dict['domain'+str(dkey)+'_pred'] = 'binary_crossentropy'

    model_sentiment.compile(
        loss=loss_dict,
        loss_weights=loss_w_dict,
        optimizer='adam')
    print(model_sentiment.summary())

    # fit the model
    cls_w = {'senti:': 'auto'}
    for dkey in dkeys:
        cls_w['domain'+str(dkey)+'_pred'] = 'auto'

    # load the development set
    dev_data = load_data_iter(data_name, suffix='dev')
    best_dev = 0
    # test data
    test_data = load_data_iter(data_name, suffix='test')

    for e in range(epoch_num):
        accuracy = 0.0
        loss = 0.0
        step = 1

        print('--------------Epoch: {}--------------'.format(e))

        train_iter = data_gen(train_data)
        # train sentiment
        # train on batches
        for x_train, y_labels in train_iter:
            batch_docs, batch_labels = domain_data_gen(domain_data, dkeys, len(x_train))
            batch_docs['text_input'] = x_train
            batch_labels['senti'] = y_labels

            # skip only 1 class in the training data
            if len(np.unique(batch_labels['senti'])) == 1:
                continue

            # train sentiment model
            tmp_senti = model_sentiment.train_on_batch(
                batch_docs,
                batch_labels,
                class_weight=cls_w,
            )
            # calculate loss and accuracy
            loss += tmp_senti[0]
            loss_avg = loss / step
            if step % 40 == 0:
                print('Step: {}'.format(step))
                print('\tLoss: {}.'.format(loss_avg))
                print('-------------------------------------------------')
            step += 1

        # validation process
        y_preds_dev = []
        y_devs = []
        dev_iter = data_gen(dev_data)

        for x_dev, y_dev in dev_iter:
            x_dev = np.asarray(x_dev)
            tmp_preds = model_sentiment.predict([x_dev for _ in range(len(dkeys) + 1)])
            for item_tmp in tmp_preds[0]:
                y_preds_dev.append(np.round(item_tmp[0]))
            for item_tmp in y_dev:
                y_devs.append(int(item_tmp))
        cur_dev = f1_score(y_true=y_devs, y_pred=y_preds_dev, average='weighted')

        # if we get better dev result, test
        if cur_dev > best_dev:
            best_dev = cur_dev
            test_iter = data_gen(test_data)
            y_preds = []
            y_tests = []

            for x_test, y_test in test_iter:
                x_test = np.asarray(x_test)
                tmp_preds = model_sentiment.predict([x_test for _ in range(len(dkeys) + 1)])
                for item_tmp in tmp_preds[0]:
                    y_preds.append(np.round(item_tmp[0]))
                for item_tmp in y_test:
                    y_tests.append(int(item_tmp))
            test_result = open('./DANN_keras_sample_single_domain_lstm3_' + str(dkeys) + '.txt', 'a')
            test_result.write(data_name + '\t' + ','.join(map(str, dkeys)) + '\t' + str(e) + '\n')
            test_result.write(str(f1_score(y_true=y_tests, y_pred=y_preds, average='weighted')) + '\n')
            test_result.write(classification_report(y_true=y_tests, y_pred=y_preds, digits=3))
            test_result.write('...............................................................\n\n')
            test_result.flush()
def run_dnn(data_pair):
    print('Working on: ' + data_pair[1])
    wt_path = './weights/' + data_pair[1] + '.npy'
    train_path = './data/' + data_pair[1] + '_source.txt'
    valid_path = './data/' + data_pair[1] + '_valid.txt'
    test_path = './data/' + data_pair[1] + '_target.txt'
    epoch_num = 15

    # parameters
    sent_len = 60  # the max length of sentence

    # load the data
    domain_data, train_data, valid_data, test_data, label_encoder, domain_encoder = data_loader(
        data_pair[1])

    label_encoder = list(sorted(label_encoder))
    domain_encoder = list(sorted(domain_encoder))
    """Preprocess"""
    # load weights
    weights = np.load(wt_path)

    # inputs
    text_input = Input(shape=(sent_len, ), dtype='int32', name='text_input')
    domain_input = Input(shape=(sent_len, ),
                         dtype='int32',
                         name='domain_input')

    # shared embedding
    embedding = Embedding(
        weights.shape[0],
        weights.shape[1],  # size of data embedding
        weights=[weights],
        input_length=sent_len,
        trainable=False,
        name='embedding')

    # shared CNN
    conv1 = Conv1D(
        filters=300,
        kernel_size=5,
        padding='valid',
        strides=1,
    )
    conv2 = Conv1D(
        filters=200,
        kernel_size=7,
        padding='valid',
        strides=1,
    )
    max_pool = MaxPool1D()
    flatten = Flatten()

    # start to share
    sent_embed = embedding(text_input)
    domain_embed = embedding(domain_input)

    sent_conv1 = conv1(sent_embed)
    domain_conv1 = conv1(domain_embed)

    sent_conv2 = conv2(sent_conv1)
    domain_conv2 = conv2(domain_conv1)

    sent_pool = max_pool(sent_conv2)
    domain_pool = max_pool(domain_conv2)

    sent_flat = flatten(sent_pool)
    domain_flat = flatten(domain_pool)

    # for sentiment clf
    dense_1 = Dense(128, activation='relu')(sent_flat)
    dense_dp = Dropout(0.2)(dense_1)

    # for domain prediction
    hp_lambda = 0.01

    flip = flipGradientTF.GradientReversal(hp_lambda)(domain_flat)
    dense_da = Dense(128, activation='relu')(flip)
    dense_da_dp = Dropout(0.2)(dense_da)
    da_preds = Dense(len(domain_encoder), activation='softmax',
                     name='domain')(dense_da_dp)  # multiple

    if 'dianping' in data_pair[1] or 'amazon' in data_pair[
            1] or 'yelp' in data_pair[1]:
        sentiment_preds = Dense(3, activation='softmax',
                                name='senti')(dense_dp)  # multilabels
        model_sent = Model(
            inputs=[text_input, domain_input],
            outputs=[sentiment_preds, da_preds],
        )
        model_sent.compile(loss={
            'senti': 'categorical_crossentropy',
            'domain': 'categorical_crossentropy'
        },
                           loss_weights={
                               'senti': 1,
                               'domain': 0.001
                           },
                           optimizer='adam')
    else:
        sentiment_preds = Dense(1, activation='sigmoid',
                                name='senti')(dense_dp)  # binary
        model_sent = Model(
            inputs=[text_input, domain_input],
            outputs=[sentiment_preds, da_preds],
        )
        model_sent.compile(loss={
            'senti': 'binary_crossentropy',
            'domain': 'categorical_crossentropy'
        },
                           loss_weights={
                               'senti': 1,
                               'domain': 0.001
                           },
                           optimizer='adam')

    print(model_sent.summary())
    best_valid_f1 = 0.0

    # fit the model
    for e in range(epoch_num):
        accuracy = 0.0
        loss = 0.0
        step = 1

        print('--------------Epoch: {}--------------'.format(e))

        train_iter = data_gen(train_data)
        # train sentiment
        # train on batches
        for x_train, y_train in train_iter:
            # skip only 1 class in the training data
            if len(np.unique(y_train)) == 1:
                continue

            batch_docs, batch_labels = domain_data_gen(domain_data,
                                                       len(x_train))
            batch_docs['text_input'] = x_train

            # encoder the (domain) labels
            if len(label_encoder) > 2:
                y_train_tmp = []
                for idx in range(len(y_train)):
                    dlabel = [0] * len(label_encoder)
                    dlabel[label_encoder.index(y_train[idx])] = 1
                    y_train_tmp.append(dlabel)
                y_train = y_train_tmp

            dlabels = []
            for idx in range(len(batch_labels['domain'])):
                dlabel = [0] * len(domain_encoder)
                dlabel[domain_encoder.index(batch_labels['domain'][idx])] = 1
                dlabels.append(dlabel)

            batch_labels['domain'] = dlabels
            batch_labels['senti'] = y_train

            # convert to arrays
            for key in batch_docs:
                batch_docs[key] = np.asarray(batch_docs[key])
            for key in batch_labels:
                batch_labels[key] = np.asarray(batch_labels[key])

            # train sentiment model
            tmp_senti = model_sent.train_on_batch(batch_docs,
                                                  batch_labels,
                                                  class_weight={
                                                      'senti:': 'auto',
                                                      'domain': 'auto'
                                                  })
            # calculate loss and accuracy
            loss += tmp_senti[0]
            loss_avg = loss / step
            if step % 40 == 0:
                print('Step: {}'.format(step))
                print('\tLoss: {}.'.format(loss_avg))
                print('-------------------------------------------------')
            step += 1

        # each epoch try the valid data, get the best valid-weighted-f1 score
        print('Validating....................................................')
        valid_iter = data_gen(valid_data)
        y_preds_valids = []
        y_valids = []
        for x_valid, y_valid in valid_iter:
            x_valid = np.asarray(x_valid)
            tmp_preds_valid = model_sent.predict([x_valid, x_valid])
            for item_tmp in tmp_preds_valid[0]:
                y_preds_valids.append(item_tmp)
            for item_tmp in y_valid:
                y_valids.append(int(item_tmp))

        if len(y_preds_valids[0]) > 2:
            y_preds_valids = np.argmax(y_preds_valids, axis=1)
        else:
            y_preds_valids = [np.round(item[0]) for item in y_preds_valids]

        f1_valid = f1_score(y_true=y_valids,
                            y_pred=y_preds_valids,
                            average='weighted')
        print('Validating f1-weighted score: ' + str(f1_valid))

        # if the validation f1 score is good, then test
        if f1_valid > best_valid_f1:
            best_valid_f1 = f1_valid
            test_iter = data_gen(test_data)
            y_preds = []
            y_tests = []
            for x_test, y_test in test_iter:
                x_test = np.asarray(x_test)
                tmp_preds = model_sent.predict([x_test, x_test])
                for item_tmp in tmp_preds[0]:
                    y_preds.append(item_tmp)
                for item_tmp in y_test:
                    y_tests.append(int(item_tmp))

            if len(y_preds[0]) > 2:
                y_preds = np.argmax(y_preds, axis=1)
            else:
                y_preds = [np.round(item[0]) for item in y_preds]

            test_result = open('./results_shared.txt', 'a')
            test_result.write(data_pair[1] + '\n')
            test_result.write(
                'Epoch ' + str(e) +
                '..................................................\n')
            test_result.write(
                str(
                    f1_score(
                        y_true=y_tests, y_pred=y_preds, average='weighted')) +
                '\n')
            test_result.write('#####\n\n')
            test_result.write(
                classification_report(y_true=y_tests, y_pred=y_preds,
                                      digits=3))
            test_result.write(
                '...............................................................\n\n'
            )
Ejemplo n.º 3
0
def run_dnn(data_name, dkeys):
    print('Working on: ' + data_name)
    # parameters
    sent_len = 50  # the max length of sentence
    wt_path = '../../data/weight/' + data_name + '.npy'
    epoch_num = 20
    """Preprocess"""
    # training data
    train_data, dlabel_encoder = load_data_iter(data_name,
                                                dkeys,
                                                suffix='train')
    # load weights
    weights = np.load(wt_path)
    """Model"""
    # input
    text_input = Input(shape=(sent_len, ), dtype='int32', name='text_input')

    # embedding
    embedding = Embedding(
        weights.shape[0],
        weights.shape[1],  # size of data embedding
        weights=[weights],
        input_length=sent_len,
        trainable=True,
        name='embedding')(text_input)

    # CNN
    conv1 = Conv1D(
        filters=300,
        kernel_size=5,
        padding='valid',
        strides=1,
    )(embedding)
    conv2 = Conv1D(
        filters=200,
        kernel_size=7,
        padding='valid',
        strides=1,
    )(conv1)
    max_pool = MaxPool1D()(conv2)

    flatten = Flatten()(max_pool)

    # for sentiment clfs
    dense_1 = Dense(128, activation='relu')(flatten)
    dense_dp = Dropout(0.2)(dense_1)
    sentiment_preds = Dense(1, activation='sigmoid',
                            name='senti')(dense_dp)  # binary

    # for domain prediction
    hp_lambda = 0.01
    """Obtain the number of domain label"""
    domain_inputs = dict()
    for dkey in dkeys:
        domain_inputs[dkey] = []
        domain_inputs[dkey].append(
            flipGradientTF.GradientReversal(hp_lambda,
                                            name='domain' + str(dkey) +
                                            '_flip')(flatten))
        domain_inputs[dkey].append(
            Dense(128, activation='relu', name='domain' + str(dkey) +
                  '_dense')(domain_inputs[dkey][-1]))
        domain_inputs[dkey].append(Dropout(0.2)(domain_inputs[dkey][-1]))

        dim_size = len(dlabel_encoder[dkey])
        print(dim_size)
        if dim_size == 2:
            dim_size = 1
        # check the label size
        domain_inputs[dkey].append(
            Dense(dim_size,
                  activation='softmax',
                  name='domain' + str(dkey) + '_pred')(
                      domain_inputs[dkey][-1]))

    model_sentiment = Model(
        inputs=[text_input],
        # the last layer of each domain task is the prediction layer
        outputs=[sentiment_preds] +
        [domain_inputs[dkey][-1] for dkey in sorted(dkeys)],
    )

    # build loss (weight) for each domain
    loss_dict = {'senti': 'binary_crossentropy'}
    loss_w_dict = {'senti': 1}
    for dkey in dkeys:
        loss_w_dict['domain' + str(dkey) + '_pred'] = 0.1
        if len(dlabel_encoder[dkey]) > 2:
            loss_dict['domain' + str(dkey) +
                      '_pred'] = 'categorical_crossentropy'
        else:
            loss_dict['domain' + str(dkey) + '_pred'] = 'binary_crossentropy'

    model_sentiment.compile(loss=loss_dict,
                            loss_weights=loss_w_dict,
                            optimizer='adam')
    print(model_sentiment.summary())

    # fit the model
    cls_w = {'senti:': 'auto'}
    for dkey in dkeys:
        cls_w['domain' + str(dkey) + '_pred'] = 'auto'

    # load the development set
    dev_data, _ = load_data_iter(data_name, dkeys, suffix='dev')
    best_dev = 0

    # test data
    test_data, _ = load_data_iter(data_name, dkeys, suffix='test')

    for e in range(epoch_num):
        # shuffle the data
        np.random.shuffle(train_data)

        accuracy = 0.0
        loss = 0.0
        step = 1

        print('--------------Epoch: {}--------------'.format(e))

        train_iter = data_gen(train_data, dkeys)
        # train sentiment
        # train on batches
        for x_train, train_labels in train_iter:
            # skip only 1 class in the training data
            if len(np.unique(train_labels['senti'])) == 1:
                continue

            # train sentiment model
            tmp_senti = model_sentiment.train_on_batch(
                x_train,
                train_labels,
                class_weight=cls_w,
            )
            # calculate loss and accuracy
            loss += tmp_senti[0]
            loss_avg = loss / step
            if step % 40 == 0:
                print('Step: {}'.format(step))
                print('\tLoss: {}.'.format(loss_avg))
                print('-------------------------------------------------')
            step += 1

        # validation process
        y_preds_dev = []
        y_devs = []
        dev_iter = data_gen(dev_data, dkeys)

        for x_dev, y_dev in dev_iter:
            x_dev = np.asarray(x_dev)
            tmp_preds = model_sentiment.predict(x_dev)
            for item_tmp in tmp_preds[0]:
                y_preds_dev.append(np.round(item_tmp[0]))
            for item_tmp in y_dev['senti']:
                y_devs.append(int(item_tmp))
        cur_dev = f1_score(y_true=y_devs,
                           y_pred=y_preds_dev,
                           average='weighted')

        # if we get better dev result, test
        if cur_dev > best_dev:
            best_dev = cur_dev
            test_iter = data_gen(test_data, dkeys)
            y_preds = []
            y_tests = []

            for x_test, y_test in test_iter:
                x_test = np.asarray(x_test)
                tmp_preds = model_sentiment.predict(x_test)
                for item_tmp in tmp_preds[0]:
                    y_preds.append(np.round(item_tmp[0]))
                for item_tmp in y_test['senti']:
                    y_tests.append(int(item_tmp))
            test_result = open('./results_dann.txt', 'a')
            test_result.write(data_name + '\t' + ','.join(map(str, dkeys)) +
                              '\n')
            test_result.write(
                str(
                    f1_score(
                        y_true=y_tests, y_pred=y_preds, average='weighted')) +
                '\n')
            test_result.write(
                classification_report(y_true=y_tests, y_pred=y_preds,
                                      digits=3))
            test_result.write(
                '...............................................................\n\n'
            )
Ejemplo n.º 4
0
def run_dnn(data_pair):
    print('Working on: '+data_pair[1])
    wt_path = './weights/'+ data_pair[1] + '.npy'
    train_path = './data/'+ data_pair[1] + '_source.txt'
    valid_path = './data/' + data_pair[1] + '_valid.txt'
    test_path = './data/'+ data_pair[1] + '_target.txt'
    epoch_num = 15

    # parameters
    sent_len = 60 # the max length of sentence

    """Preprocess"""
    # load weights
    weights = np.load(wt_path)

    # input
    text_input = Input(shape=(sent_len,), dtype='int32', name='text_input')

    # embedding
    embedding = Embedding(
        weights.shape[0], weights.shape[1], # size of data embedding
        weights=[weights], input_length=sent_len,
        trainable=False,
        name='embedding'
    )(text_input)

    # CNN
    conv1 = Conv1D(
        filters=300,
        kernel_size=3,
        padding='valid',
        strides=1,
    )(embedding)
    conv2 = Conv1D(
        filters=200,
        kernel_size=5,
        padding='valid',
        strides=1,
    )(conv1)
    max_pool = MaxPool1D()(conv2)

    flatten = Flatten()(max_pool)

    # for sentiment clf
    dense_1 = Dense(128, activation='relu')(flatten)
    dense_dp = Dropout(0.2)(dense_1)

    # for domain prediction
    hp_lambda = 0.01

    """Obtain the number of domain label"""
    da_num = set()
    with open(train_path) as data_file:
        for line in data_file:
            da_num.add(line.strip().split('\t')[1]) # domain label position

    flip = flipGradientTF.GradientReversal(hp_lambda)(flatten)
    dense_da = Dense(128, activation='relu')(flip)
    dense_da_dp = Dropout(0.2)(dense_da)
    da_preds = Dense(len(da_num), activation='softmax', name='domain')(dense_da_dp) # multiple

    if 'dianping' in data_pair[1] or 'amazon' in data_pair[1] or 'yelp' in data_pair[1]:
        sentiment_preds = Dense(3, activation='softmax', name='senti')(dense_dp) # multilabels
        model_sentiment = Model(
            inputs=[text_input], outputs=[sentiment_preds, da_preds],
        )
        model_sentiment.compile(
            loss={'senti': 'categorical_crossentropy', 'domain':'categorical_crossentropy'},
            loss_weights={'senti': 1, 'domain':0.01},
            optimizer='adam')
    else:
        sentiment_preds = Dense(1, activation='sigmoid', name='senti')(dense_dp) # binary
        model_sentiment = Model(
            inputs=[text_input], outputs=[sentiment_preds, da_preds],
        )
        model_sentiment.compile(
            loss={'senti': 'binary_crossentropy', 'domain':'categorical_crossentropy'},
            loss_weights={'senti': 1, 'domain':0.01},
            optimizer='adam')

    print(model_sentiment.summary())
    best_valid_f1 = 0.0

    # fit the model
    for e in range(epoch_num):
        accuracy = 0.0
        loss = 0.0
        step = 1

        print('--------------Epoch: {}--------------'.format(e))

        train_iter = load_data_iter(train_path)
        # train sentiment
        # train on batches
        for x_train, time_labels, y_train in train_iter:
            # skip only 1 class in the training data
            if len(np.unique(y_train)) == 1:
                continue

            if time_labels.shape[0] != y_train.shape[0]:
                continue

            # train sentiment model
            tmp_senti = model_sentiment.train_on_batch(
                x_train,
                {'senti': y_train, 'domain': time_labels},
                class_weight={'senti:': 'auto', 'domain': 'auto'}
            )
            # calculate loss and accuracy
            loss += tmp_senti[0]
            loss_avg = loss / step
            if step % 40 == 0:
                print('Step: {}'.format(step))
                print('\tLoss: {}.'.format(loss_avg))
                print('-------------------------------------------------')
            step += 1

        # each epoch try the valid data, get the best valid-weighted-f1 score
        print('Validating....................................................')
        valid_iter = load_data_iter(valid_path, train=False)
        y_preds_valids = []
        y_valids = []
        for x_valid, y_valid in valid_iter:
            x_valid = np.asarray(x_valid)
            tmp_preds_valid = model_sentiment.predict(x_valid)
            for item_tmp in tmp_preds_valid[0]:
                y_preds_valids.append(item_tmp)
            for item_tmp in y_valid:
                y_valids.append(int(item_tmp))

        if len(y_preds_valids[0]) > 2:
            y_preds_valids = np.argmax(y_preds_valids, axis=1)
        else:
            y_preds_valids = [np.round(item[0]) for item in y_preds_valids]

        f1_valid = f1_score(y_true=y_valids, y_pred=y_preds_valids, average='weighted')
        print('Validating f1-weighted score: ' + str(f1_valid))

        # if the validation f1 score is good, then test
        if f1_valid > best_valid_f1:
            best_valid_f1 = f1_valid
            test_iter = load_data_iter(test_path, train=False)
            y_preds = []
            y_tests = []
            for x_test, y_test in test_iter:
                x_test = np.asarray(x_test)
                tmp_preds = model_sentiment.predict(x_test)
                for item_tmp in tmp_preds[0]:
                    y_preds.append(item_tmp)
                for item_tmp in y_test:
                    y_tests.append(int(item_tmp))

            if len(y_preds[0]) > 2:
                y_preds = np.argmax(y_preds, axis=1)
            else:
                y_preds = [np.round(item[0]) for item in y_preds]

            test_result = open('./results.txt', 'a')
            test_result.write(data_pair[1] + '\n')
            test_result.write('Epoch ' + str(e) + '..................................................\n')
            test_result.write(str(f1_score(y_true=y_tests, y_pred=y_preds, average='weighted')) + '\n')
            test_result.write('#####\n\n')
            test_result.write(classification_report(y_true=y_tests, y_pred=y_preds, digits=3))
            test_result.write('...............................................................\n\n')
            test_result.flush()