コード例 #1
0
ファイル: run-qa-6.py プロジェクト: FlamingHorizon/VRRM
def test_model(simulator, story_int, story_mask, q_int, q_mask, ans_int,
               ans_mask, params, old_std, f_test, vocab_text, inv_vocab_text):

    ent_list = {
        '1': 'office',
        '2': 'hallway',
        '3': 'kitchen',
        '4': 'garden',
        '5': 'bedroom',
        '6': 'bathroom',
        '7': 'mary',
        '8': 'john',
        '9': 'daniel',
        '10': 'sandra',
        '11': 'milk',
        '12': 'football',
        '13': 'apple'
    }
    # form vocabs and data
    numpy.set_printoptions(precision=3)
    n_samples = len(story_int)
    acc = 0.

    sys.stdout = old_std
    print 'n_samples:'
    print n_samples

    story_test = numpy.zeros(
        (1, params['max_story_len_test'], params['max_sent_len']))
    story_mask_test = numpy.zeros(
        (1, params['max_story_len_test'], params['max_sent_len'],
         params['dim_emb_story']))
    q_test = numpy.zeros((1, params['max_q_num_test'], params['max_sent_len']))
    q_mask_test = numpy.zeros(
        (1, params['max_q_num_test'], params['max_sent_len'],
         params['dim_emb_story']))

    act_selected_test = numpy.zeros(
        (1, params['max_story_len_test'],
         (params['ent_range']) * 1 * params['relation_num']))
    reward_test = numpy.ones(
        (1, params['max_story_len_test'],
         (params['ent_range']) * 1 * params['relation_num']))

    reward_immediate_test = numpy.zeros((1, params['max_story_len_test']))

    ans_test = numpy.zeros((1, params['max_q_num_test'], params['vocab_size']))
    ans_mask_test = numpy.zeros((1, params['max_q_num_test']))
    fin_test = numpy.zeros(
        (1, params['max_q_num_test'], params['dim_emb_env']))
    fin_test_pred = numpy.zeros(
        (1, params['max_q_num_test'], params['dim_emb_env']))
    fin_one_hot_test = numpy.zeros(
        (1, params['max_q_num_test'],
         (params['ent_range']) * 1 * params['relation_num']))
    reward_fin_test = numpy.ones(
        (1, params['max_q_num_test'],
         (params['ent_range']) * 1 * params['relation_num']))

    working_memory = working_environment(params['enti_num'],
                                         params['relation_num_expand'])
    working_embed = envEmbedding(params['relation_num_expand'],
                                 params['enti_num'], params['dim_emb_env'])
    work_space_table = varTable()

    mask_sim = numpy.zeros((1, params['max_story_len_test'],
                            params['dim_emb_story'] * params['max_sent_len']))

    print 'test started.'
    sys.stdout = f_test
    train_trace_id = 0  # always == 0
    for story_id in range(n_samples):
        story_test[train_trace_id, :len(story_int[0]), :len(
            story_int[0][0])] = story_int[story_id]
        story_mask_test[train_trace_id, :len(story_int[0]), :len(
            story_int[0][0]), :] = story_mask[story_id]
        q_test[train_trace_id, :len(q_int[0]), :len(q_int[0][0]
                                                    )] = q_int[story_id]
        q_mask_test[train_trace_id, :len(q_int[0]), :len(
            q_int[0][0]), :] = q_mask[story_id]
        ans_test[train_trace_id, :len(ans_int[0]), :len(ans_int[0][0]
                                                        )] = ans_int[story_id]

        for time_slice in range(params['max_story_len_test']):
            mask_sim[0][time_slice][:] = numpy.ones(params['dim_emb_story'] *
                                                    params['max_sent_len'])

        action_probs, retrieve_probs = simulator.predict([
            story_test[numpy.newaxis,
                       train_trace_id], story_mask_test[numpy.newaxis,
                                                        train_trace_id],
            q_test[numpy.newaxis, train_trace_id], q_mask_test[numpy.newaxis,
                                                               train_trace_id],
            mask_sim[:], reward_test[numpy.newaxis, train_trace_id],
            reward_fin_test[numpy.newaxis, train_trace_id]
        ])
        for time_slice in range(params['max_story_len_test']):

            tupleList, adjGraph, temp_index = working_memory.returnEnv()

            action_selected, action_one_hot = select_action_hard(
                action_probs[:, time_slice, :], params['epsilon'])

            act_selected_test[train_trace_id, time_slice, :] = action_one_hot
            arg_1_ptr = action_selected // ((1) * params['relation_num'])
            arg_2_ptr = (
                action_selected - arg_1_ptr *
                (1) * params['relation_num']) // params['relation_num']
            arg_r_ptr = (
                action_selected - arg_1_ptr *
                (1) * params['relation_num']) % params['relation_num'] + 1
            arg_r = arg_r_ptr

            flag = 0
            if time_slice < len(story_int[story_id]):
                for w_id in story_int[story_id][time_slice]:
                    # print inv_vocab_text[str(int(w_id))]
                    if inv_vocab_text[str(
                            int(w_id))] in ent_list.values() and flag == 0:
                        if arg_1_ptr == 0:
                            arg_1 = int(w_id)
                        else:
                            arg_2 = int(w_id)
                        flag = 1
                    elif inv_vocab_text[str(
                            int(w_id))] in ent_list.values() and flag == 1:
                        if arg_1_ptr == 0:
                            arg_2 = int(w_id)
                        else:
                            arg_1 = int(w_id)

            slice_reward = 0
            for tt in range(time_slice + 1):
                reward_immediate_test[train_trace_id][tt] += slice_reward * (
                    params['reward_decay']**(time_slice - tt))

            if arg_1 > 0 and arg_2 > 0 and story_mask_test[train_trace_id,
                                                           time_slice, 0,
                                                           0] > 0:
                # retrieve the table
                arg_1_int = work_space_table.retr_insert(arg_1, inv_vocab_text)
                arg_2_int = work_space_table.retr_insert(arg_2, inv_vocab_text)
                working_memory.modifyEnv((arg_1_int, arg_r, arg_2_int))

        # compute fin_train and fin_train_pred
        retrieved_relation_list = []
        reward_temp_list = []
        for q_idx in range(len(retrieve_probs[0])):
            retr_idx = numpy.argmax(retrieve_probs[0, q_idx, :])
            arg1_retr_ptr = retr_idx // ((1) * params['relation_num'])
            arg2_retr_ptr = (
                retr_idx - arg1_retr_ptr *
                (1) * params['relation_num']) // params['relation_num']
            relation_pred = (
                retr_idx - arg1_retr_ptr *
                (1) * params['relation_num']) % params['relation_num'] + 1

            flag = 0
            for q_w in q_int[story_id, 0]:
                if inv_vocab_text[str(
                        int(q_w))] in ent_list.values() and flag == 0:
                    if arg1_retr_ptr == 0:
                        arg1_retr = int(q_w)
                    else:
                        arg2_retr = int(q_w)
                    flag = 1
                elif inv_vocab_text[str(
                        int(q_w))] in ent_list.values() and flag == 1:
                    if arg1_retr_ptr == 0:
                        arg2_retr = int(q_w)
                    else:
                        arg1_retr = int(q_w)

            retrieve_reward_pre = 0
            reward_temp_list.append(retrieve_reward_pre)

            arg1_retr_int = work_space_table.retr(arg1_retr, inv_vocab_text)
            arg2_retr_int = work_space_table.retr(arg2_retr, inv_vocab_text)

            arg_retr_id = working_memory.retrieveRelation(
                arg1_retr_int, arg2_retr_int)

            retrieved_relation_list.append(arg_retr_id)

            one_hot_single = numpy.zeros(
                ((params['ent_range']) * 1 * params['relation_num']))
            one_hot_single[retr_idx] = 1
            fin_one_hot_test[train_trace_id, q_idx, :] = one_hot_single

        reward_q_total = 0.
        ans_shared = 0

        for q_idx in range(len(retrieve_probs[0])):
            ans_word_int = numpy.argmax(ans_test[train_trace_id][0]) + 1
            ans_word = inv_vocab_text[str(ans_word_int)]
            if retrieved_relation_list[q_idx] == 1:
                ans_pred = 'yes'
            else:
                ans_pred = 'no'

            reward_scalar_q, ans_q, label_q = compute_single_reward(
                ans_pred, ans_word)
            reward_scalar_q *= q_mask_test[train_trace_id, q_idx, 0, 0]

            if ans_q == label_q and q_idx == 0:
                acc += 1.

            reward_q_total += reward_scalar_q
        reward_q_total += reward_temp_list[0]

        for time_slice in range(params['max_story_len_test']):
            reward_immediate_test[train_trace_id,
                                  time_slice] += reward_q_total * (
                                      params['reward_decay']
                                      **(params['max_story_len'] - time_slice))
        for q_idx in range(len(retrieve_probs[0])):
            # pass
            reward_fin_test[
                train_trace_id,
                q_idx, :] *= reward_q_total  # used as input at the last softmax

        for time_slice in range(params['max_story_len_test']):
            # pass
            reward_test[train_trace_id,
                        time_slice, :] *= reward_immediate_test[
                            train_trace_id,
                            time_slice]  # used as input at the last softmax

        mask_sim *= 0
        working_memory.resetEnv()
        work_space_table.reset()

    sys.stdout = old_std
    print 'test result:'
    print acc / n_samples
コード例 #2
0
ファイル: run-qa-6.py プロジェクト: FlamingHorizon/VRRM
def train_two_phrase(train_file_names, test_file_names, params):
    K.set_epsilon(1e-4)
    ent_list = {
        '1': 'office',
        '2': 'hallway',
        '3': 'kitchen',
        '4': 'garden',
        '5': 'bedroom',
        '6': 'bathroom',
        '7': 'mary',
        '8': 'john',
        '9': 'daniel',
        '10': 'sandra',
        '11': 'milk',
        '12': 'football',
        '13': 'apple'
    }
    numpy.set_printoptions(precision=3)
    print 'loading data.'
    old_std = sys.stdout
    f_print = open('debug_print_phr1.txt', 'w')
    f_debug = open('debug_print_phr2.txt', 'w')
    f_test = open('debug_print_test.txt', 'w')

    sys.stdout = f_print
    lines = []
    for f_name in train_file_names:
        f = open(f_name, 'r')
        lines.extend(f.readlines())
        f.close()

    lines_test = []
    for f_name in test_file_names:
        f = open(f_name, 'r')
        lines_test.extend(f.readlines())
        f.close()

    vocab_text = {}
    inv_vocab_text = {}
    data = parse_stories(lines, vocab_text, inv_vocab_text, 1)
    data_test = parse_stories(lines_test, vocab_text, inv_vocab_text, 0)

    l_train = len(data)
    l_test = len(data_test)
    data = data[:min(params['train_set_size'], l_train)]
    data_test = data_test[:min(params['test_set_size'], l_test)]

    story_int, story_mask, q_int, q_mask, ans_int, ans_mask, sizes = int_stories(
        data, vocab_text)
    story_mask = repeatTensor(story_mask, params['dim_emb_story'])
    q_mask = repeatTensor(q_mask, params['dim_emb_story'])

    story_int_test, story_mask_test, q_int_test, q_mask_test, ans_int_test, ans_mask_test, sizes_test = int_stories(
        data_test, vocab_text)
    story_mask_test = repeatTensor(story_mask_test, params['dim_emb_story'])
    q_mask_test = repeatTensor(q_mask_test, params['dim_emb_story'])

    inv_vocab_text['0'] = 'dududu'

    params['max_story_len'] = sizes[0]
    params['max_sent_len'] = max(sizes[1], sizes[3])
    params['max_q_num'] = sizes[2]

    params['max_story_len_test'] = sizes_test[0]
    params['max_q_num_test'] = sizes_test[2]

    params['vocab_size'] = len(vocab_text)
    params['vocab_size_ans'] = len(vocab_text)
    n_samples = len(story_int)

    params['ent_range'] = 2

    print 'params:'
    print params
    print 'n_samples:'
    print n_samples
    print 'vocab_text:'
    print vocab_text
    sys.stdout = old_std

    story_train = numpy.zeros(
        (n_samples * params['story_sims_per_epoch'], params['max_story_len'],
         params['max_sent_len']))
    story_mask_train = numpy.zeros(
        (n_samples * params['story_sims_per_epoch'], params['max_story_len'],
         params['max_sent_len'], params['dim_emb_story']))
    q_train = numpy.zeros((n_samples * params['story_sims_per_epoch'],
                           params['max_q_num'], params['max_sent_len']))
    q_mask_train = numpy.zeros(
        (n_samples * params['story_sims_per_epoch'], params['max_q_num'],
         params['max_sent_len'], params['dim_emb_story']))

    act_selected_train = numpy.zeros(
        (n_samples * params['story_sims_per_epoch'], params['max_story_len'],
         (params['ent_range']) * 1 * params['relation_num']))
    reward_train = numpy.ones(
        (n_samples * params['story_sims_per_epoch'], params['max_story_len'],
         (params['ent_range']) * 1 * params['relation_num']))

    reward_immediate_train = numpy.zeros(
        (n_samples * params['story_sims_per_epoch'], params['max_story_len']))

    ans_train = numpy.zeros((n_samples * params['story_sims_per_epoch'],
                             params['max_q_num'], params['vocab_size']))
    ans_mask_train = numpy.zeros(
        (n_samples * params['story_sims_per_epoch'], params['max_q_num']))
    fin_train = numpy.zeros((n_samples * params['story_sims_per_epoch'],
                             params['max_q_num'], params['dim_emb_env']))
    fin_train_pred = numpy.zeros((n_samples * params['story_sims_per_epoch'],
                                  params['max_q_num'], params['dim_emb_env']))
    fin_one_hot_train = numpy.zeros(
        (n_samples * params['story_sims_per_epoch'], params['max_q_num'],
         (params['ent_range']) * 1 * params['relation_num']))
    reward_fin_train = numpy.ones(
        (n_samples * params['story_sims_per_epoch'], params['max_q_num'],
         (params['ent_range']) * 1 * params['relation_num']))

    print 'building model.'

    reasoner, simulator, debugger = DRL_Reasoner(params)
    test_simulator = DRL_Reasoner_Test(params)

    working_memory = working_environment(params['enti_num'],
                                         params['relation_num_expand'])
    working_embed = envEmbedding(params['relation_num_expand'],
                                 params['enti_num'], params['dim_emb_env'])
    work_space_table = varTable()

    mask_sim = numpy.zeros((1, params['max_story_len'],
                            params['dim_emb_story'] * params['max_sent_len']))

    all_sim_number = 0

    print 'two phrase training started.'

    for epoch_id in range(params['epoch_num']):
        train_trace_id = 0
        avg_reward_q_epoch = 0.
        avg_reward_action_epoch = numpy.zeros(params['max_story_len'])
        print 'epoch %d phrase 1' % epoch_id

        epoch_precision_rate = 0.
        sample_rate = numpy.zeros(n_samples)
        for story_id in range(n_samples):
            story_precision = 0.
            for sim_round in range(params['story_sims_per_epoch']):
                sys.stdout = f_print

                story_train[train_trace_id, :len(story_int[0]), :len(
                    story_int[0][0])] = story_int[story_id]
                story_mask_train[train_trace_id, :len(story_int[0]), :len(
                    story_int[0][0]), :] = story_mask[story_id]
                q_train[train_trace_id, :len(q_int[0]), :len(
                    q_int[0][0])] = q_int[story_id]
                q_mask_train[train_trace_id, :len(q_int[0]), :len(
                    q_int[0][0]), :] = q_mask[story_id]
                ans_train[train_trace_id, :len(ans_int[0]), :len(
                    ans_int[0][0])] = ans_int[story_id]

                for time_slice in range(params['max_story_len']):
                    mask_sim[0][time_slice][:] = numpy.ones(
                        params['dim_emb_story'] * params['max_sent_len'])

                action_probs, retrieve_probs = simulator.predict([
                    story_train[numpy.newaxis, train_trace_id],
                    story_mask_train[numpy.newaxis,
                                     train_trace_id], q_train[numpy.newaxis,
                                                              train_trace_id],
                    q_mask_train[numpy.newaxis, train_trace_id], mask_sim[:],
                    reward_train[numpy.newaxis, train_trace_id],
                    reward_fin_train[numpy.newaxis, train_trace_id]
                ])
                for time_slice in range(params['max_story_len']):

                    tupleList, adjGraph, temp_index = working_memory.returnEnv(
                    )

                    action_selected, action_one_hot = select_action(
                        action_probs[:, time_slice, :], params['epsilon'])

                    act_selected_train[train_trace_id,
                                       time_slice, :] = action_one_hot
                    arg_1_ptr = action_selected // (
                        (1) * params['relation_num']
                    )  # start from 0 (empty number)
                    arg_2_ptr = (
                        action_selected - arg_1_ptr *
                        (1) * params['relation_num']) // params['relation_num']
                    arg_r_ptr = (action_selected - arg_1_ptr *
                                 (1) * params['relation_num']
                                 ) % params['relation_num'] + 1
                    arg_r = arg_r_ptr

                    flag = 0
                    if time_slice < len(story_int[story_id]):
                        for w_id in story_int[story_id][time_slice]:
                            if inv_vocab_text[str(int(
                                    w_id))] in ent_list.values() and flag == 0:
                                if arg_1_ptr == 0:
                                    arg_1 = int(w_id)
                                else:
                                    arg_2 = int(w_id)
                                flag = 1
                            elif inv_vocab_text[str(int(
                                    w_id))] in ent_list.values() and flag == 1:
                                if arg_1_ptr == 0:
                                    arg_2 = int(w_id)
                                else:
                                    arg_1 = int(w_id)

                    slice_reward = 0
                    for tt in range(time_slice + 1):
                        reward_immediate_train[train_trace_id][
                            tt] += slice_reward * (params['reward_decay']
                                                   **(time_slice - tt))

                    if arg_1 > 0 and arg_2 > 0 and story_mask_train[
                            train_trace_id, time_slice, 0, 0] > 0:
                        arg_1_int = work_space_table.retr_insert(
                            arg_1, inv_vocab_text)
                        arg_2_int = work_space_table.retr_insert(
                            arg_2, inv_vocab_text)
                        working_memory.modifyEnv((arg_1_int, arg_r, arg_2_int))

                retrieved_relation_list = []
                reward_temp_list = []
                for q_idx in range(len(retrieve_probs[0])):
                    retr_idx, action_one_hot_retr = select_action(
                        retrieve_probs[0, q_idx, :], params['epsilon'])
                    arg1_retr_ptr = retr_idx // ((1) * params['relation_num'])
                    arg2_retr_ptr = (
                        retr_idx - arg1_retr_ptr *
                        (1) * params['relation_num']) // params['relation_num']
                    relation_pred = (retr_idx - arg1_retr_ptr *
                                     (1) * params['relation_num']
                                     ) % params['relation_num'] + 1

                    flag = 0
                    for q_w in q_int[story_id, 0]:
                        if inv_vocab_text[str(
                                int(q_w))] in ent_list.values() and flag == 0:
                            if arg1_retr_ptr == 0:
                                arg1_retr = int(q_w)
                            else:
                                arg2_retr = int(q_w)
                            flag = 1
                        elif inv_vocab_text[str(
                                int(q_w))] in ent_list.values() and flag == 1:
                            if arg1_retr_ptr == 0:
                                arg2_retr = int(q_w)
                            else:
                                arg1_retr = int(q_w)

                    retrieve_reward_pre = 0
                    reward_temp_list.append(retrieve_reward_pre)

                    arg1_retr_int = work_space_table.retr(
                        arg1_retr, inv_vocab_text)
                    arg2_retr_int = work_space_table.retr(
                        arg2_retr, inv_vocab_text)

                    arg_retr_id = working_memory.retrieveRelation(
                        arg1_retr_int, arg2_retr_int)

                    retrieved_relation_list.append(arg_retr_id)

                    one_hot_single = numpy.zeros(
                        ((params['ent_range']) * 1 * params['relation_num']))
                    one_hot_single[retr_idx] = 1
                    fin_one_hot_train[train_trace_id,
                                      q_idx, :] = one_hot_single

                reward_q_total = 0.
                ans_shared = 0
                for q_idx in range(len(retrieve_probs[0])):
                    ans_word_int = numpy.argmax(
                        ans_train[train_trace_id][0]) + 1
                    ans_word = inv_vocab_text[str(ans_word_int)]
                    if retrieved_relation_list[q_idx] == 1:
                        ans_pred = 'yes'
                    else:
                        ans_pred = 'no'
                    reward_scalar_q, ans_q, label_q = compute_single_reward(
                        ans_pred, ans_word)
                    reward_scalar_q *= q_mask_train[train_trace_id, q_idx, 0,
                                                    0]

                    if ans_q == label_q and q_idx == 0:
                        epoch_precision_rate += 1.
                        story_precision += 1.

                    reward_q_total += reward_scalar_q
                reward_q_total += reward_temp_list[0]

                for time_slice in range(params['max_story_len']):
                    reward_immediate_train[train_trace_id,
                                           time_slice] += reward_q_total * (
                                               params['reward_decay']
                                               **(params['max_story_len'] -
                                                  time_slice))
                for q_idx in range(len(retrieve_probs[0])):
                    # pass
                    if q_idx == 0:
                        mask_reward = 1.
                    else:
                        mask_reward = 0.
                    reward_fin_train[train_trace_id,
                                     q_idx, :] *= (reward_q_total *
                                                   mask_reward)

                avg_reward_q_epoch = avg_reward_q_epoch * (train_trace_id) / (
                    train_trace_id + 1) + reward_q_total / (train_trace_id + 1)

                for time_slice in range(params['max_story_len']):
                    # pass
                    reward_train[train_trace_id, time_slice, :] *= (
                        reward_immediate_train[train_trace_id, time_slice] *
                        story_mask_train[train_trace_id, time_slice, 0, 0]
                    )  # used as input at the last softmax
                    avg_reward_action_epoch[
                        time_slice] = avg_reward_action_epoch[time_slice] * (
                            train_trace_id
                        ) / (train_trace_id + 1) + reward_immediate_train[
                            train_trace_id, time_slice] / (train_trace_id + 1)

                train_trace_id += 1
                all_sim_number += 1
                mask_sim *= 0
                working_memory.resetEnv()
                work_space_table.reset()

            sample_rate[
                story_id] = story_precision / params['story_sims_per_epoch']

        for q_idx in range(params['max_q_num']):
            if 0:
                # pass
                reward_fin_train[:,
                                 q_idx, :] -= avg_reward_q_epoch  # used as input at the last softmax

        for time_slice in range(params['max_story_len']):
            if 0:
                # pass
                reward_train[:, time_slice, :] -= avg_reward_action_epoch[
                    time_slice]  # used as input at the last softmax

        epoch_precision_rate = epoch_precision_rate / (
            n_samples * params['story_sims_per_epoch'])

        sys.stdout = old_std
        print 'precision of this epoch: %f' % epoch_precision_rate
        print 'epoch %d phrase 2' % (epoch_id)
        print 'sample_rate:'
        print sample_rate
        # phrase2: go batch train on the trace pool.
        mask_sim_2 = numpy.ones(
            (n_samples * params['story_sims_per_epoch'],
             params['max_story_len'],
             params['dim_emb_story'] * params['max_sent_len']))

        reasoner.fit(
            [
                story_train, story_mask_train, q_train, q_mask_train,
                mask_sim_2, reward_train, reward_fin_train
            ], {
                'action_probs_re': act_selected_train,
                'retrieve_probs_re': fin_one_hot_train
            },
            batch_size=params['batch_size_phrase2'],
            nb_epoch=10,
            verbose=2)

        sys.stdout = old_std

        # test the model
        weights_train = [
            i for i in simulator.layers if len(i.get_weights()) != 0
        ]
        weights_test = [
            i for i in test_simulator.layers if len(i.get_weights()) != 0
        ]
        for (l1, l2) in zip(weights_train, weights_test):
            l2.set_weights(l1.get_weights())

        if (epoch_id + 1) % params['test_epoch_period'] == 0:
            test_model(test_simulator, story_int_test, story_mask_test,
                       q_int_test, q_mask_test, ans_int_test, ans_mask_test,
                       params, old_std, f_test, vocab_text, inv_vocab_text)

        sys.stdout = old_std
        story_train *= 0
        story_mask_train *= 0
        q_train *= 0
        q_mask_train *= 0
        act_selected_train *= 0
        reward_train = numpy.ones(
            (n_samples * params['story_sims_per_epoch'],
             params['max_story_len'], (params['ent_range']) * 1 *
             params['relation_num']))  # real number reward signal
        ans_train *= 0
        ans_mask_train *= 0
        fin_train *= 0
        fin_train_pred *= 0
        reward_fin_train = numpy.ones(
            (n_samples * params['story_sims_per_epoch'], params['max_q_num'],
             params['ent_range'] * 1 * params['relation_num']))
        fin_one_hot_train *= 0
        reward_immediate_train *= 0

    f_print.close()
    f_debug.close()
    f_test.close()
コード例 #3
0
def test_model(simulator, story_int, story_mask, q_int, q_mask, ans_int,
               ans_mask, params, old_std, f_test, vocab_text, inv_vocab_text):

    ent_list = {
        '1': 'office',
        '2': 'hallway',
        '3': 'kitchen',
        '4': 'garden',
        '5': 'bedroom',
        '6': 'bathroom',
        '7': 'mary',
        '8': 'john',
        '9': 'daniel',
        '10': 'sandra'
    }
    numpy.set_printoptions(precision=3)
    n_samples = len(story_int)
    acc = 0.

    sys.stdout = old_std
    print 'n_samples:'
    print n_samples

    story_test = numpy.zeros(
        (1, params['max_story_len'], params['max_sent_len']))
    story_mask_test = numpy.zeros(
        (1, params['max_story_len'], params['max_sent_len'],
         params['dim_emb_story']))
    q_test = numpy.zeros((1, params['max_q_num'], params['max_sent_len']))
    q_mask_test = numpy.zeros((1, params['max_q_num'], params['max_sent_len'],
                               params['dim_emb_story']))

    act_selected_test = numpy.zeros(
        (1, params['max_story_len'],
         (params['ent_range']) * 1 * params['relation_num']))  # one hot vector
    reward_test = numpy.ones(
        (1, params['max_story_len'], (params['ent_range']) * 1 *
         params['relation_num']))  # real number reward signal

    reward_immediate_test = numpy.zeros((1, params['max_story_len']))

    ans_test = numpy.zeros((1, params['max_q_num'], params['vocab_size']))
    ans_mask_test = numpy.zeros((1, params['max_q_num']))
    fin_test = numpy.zeros((1, params['max_q_num'], params['dim_emb_env']))
    fin_test_pred = numpy.zeros(
        (1, params['max_q_num'], params['dim_emb_env']))
    fin_one_hot_test = numpy.zeros(
        (1, params['max_q_num'], (1) * 1 * params['relation_num']))
    reward_fin_test = numpy.ones(
        (1, params['max_q_num'], (1) * 1 * params['relation_num']))

    working_memory = working_environment(params['enti_num'],
                                         params['relation_num_expand'])
    working_embed = envEmbedding(params['relation_num_expand'],
                                 params['enti_num'], params['dim_emb_env'])
    work_space_table = varTable()

    mask_sim = numpy.zeros((1, params['max_story_len'],
                            params['dim_emb_story'] * params['max_sent_len']))

    print 'test started.'
    sys.stdout = f_test
    train_trace_id = 0  # always == 0
    for story_id in range(n_samples):
        story_test[train_trace_id, :len(story_int[0]), :len(
            story_int[0][0])] = story_int[story_id]
        story_mask_test[train_trace_id, :len(story_int[0]), :len(
            story_int[0][0]), :] = story_mask[story_id]
        q_test[train_trace_id, :len(q_int[0]), :len(q_int[0][0]
                                                    )] = q_int[story_id]
        q_mask_test[train_trace_id, :len(q_int[0]), :len(
            q_int[0][0]), :] = q_mask[story_id]
        ans_test[train_trace_id, :len(ans_int[0]), :len(ans_int[0][0]
                                                        )] = ans_int[story_id]

        for time_slice in range(params['max_story_len']):
            mask_sim[0][time_slice][:] = numpy.ones(params['dim_emb_story'] *
                                                    params['max_sent_len'])

            # read and embed environment
            tupleList, adjGraph, temp_index = working_memory.returnEnv()

            action_probs, retrieve_probs = simulator.predict([
                story_test[numpy.newaxis, train_trace_id],
                story_mask_test[numpy.newaxis,
                                train_trace_id], q_test[numpy.newaxis,
                                                        train_trace_id],
                q_mask_test[numpy.newaxis, train_trace_id], mask_sim[:],
                reward_test[numpy.newaxis,
                            train_trace_id], reward_fin_test[numpy.newaxis,
                                                             train_trace_id]
            ])

            action_selected, action_one_hot = select_action_hard(
                action_probs[:, time_slice, :], params['epsilon'])

            act_selected_test[train_trace_id, time_slice, :] = action_one_hot
            arg_1_ptr = action_selected // ((1) * params['relation_num'])
            arg_2_ptr = (
                action_selected - arg_1_ptr *
                (1) * params['relation_num']) // params['relation_num']
            arg_r_ptr = (
                action_selected - arg_1_ptr *
                (1) * params['relation_num']) % params['relation_num'] + 1
            arg_r = arg_r_ptr

            order1 = arg_1_ptr / 2
            order2 = arg_1_ptr % 2

            flag = 0
            three_ents = []
            if story_mask_test[train_trace_id, time_slice, 0, 0] > 0:
                for w_id in story_int[story_id][time_slice]:
                    if inv_vocab_text[str(int(w_id))] in ent_list.values():
                        three_ents.append(int(w_id))
                if order1 == 0:
                    arg_1_first = three_ents[0]
                    arg_2_first = three_ents[2]
                else:
                    arg_1_first = three_ents[2]
                    arg_2_first = three_ents[0]

                if order2 == 0:
                    arg_1_second = three_ents[1]
                    arg_2_second = three_ents[2]
                else:
                    arg_1_second = three_ents[2]
                    arg_2_second = three_ents[1]

            slice_reward = 0
            for tt in range(time_slice + 1):
                reward_immediate_test[train_trace_id][tt] += slice_reward * (
                    params['reward_decay']**(time_slice - tt))

            if story_mask_test[train_trace_id, time_slice, 0, 0] > 0:
                # retrieve the table
                arg_1_int_first = work_space_table.retr_insert(
                    arg_1_first, inv_vocab_text)
                arg_2_int_first = work_space_table.retr_insert(
                    arg_2_first, inv_vocab_text)
                arg_1_int_second = work_space_table.retr_insert(
                    arg_1_second, inv_vocab_text)
                arg_2_int_second = work_space_table.retr_insert(
                    arg_2_second, inv_vocab_text)
                working_memory.modifyEnv(
                    (arg_1_int_first, arg_r, arg_2_int_first))
                working_memory.modifyEnv(
                    (arg_1_int_second, arg_r, arg_2_int_second))

        # compute fin_train and fin_train_pred
        retrieved_relation_list = []
        reward_temp_list = []
        for q_idx in range(len(retrieve_probs[0])):
            retr_idx = numpy.argmax(retrieve_probs[0, q_idx, :])
            arg1_retr_ptr = retr_idx // ((1) * params['relation_num'])
            arg2_retr_ptr = (
                retr_idx - arg1_retr_ptr *
                (1) * params['relation_num']) // params['relation_num']
            relation_pred = (
                retr_idx - arg1_retr_ptr *
                (1) * params['relation_num']) % params['relation_num'] + 1

            for q_w in q_int[story_id, 0]:
                if inv_vocab_text[str(int(q_w))] in ent_list.values():
                    arg1_retr = int(q_w)
                    break

            retrieve_reward_pre = 0
            reward_temp_list.append(retrieve_reward_pre)

            arg1_retr_int = work_space_table.retr(arg1_retr, inv_vocab_text)

            arg_retr_id = working_memory.retrieveArg(arg1_retr_int,
                                                     relation_pred)
            arg_retr_str = work_space_table.inv_retr(arg_retr_id)

            retrieved_relation_list.append(arg_retr_str)

            one_hot_single = numpy.zeros(((1) * 1 * params['relation_num']))
            one_hot_single[retr_idx] = 1
            fin_one_hot_test[train_trace_id, q_idx, :] = one_hot_single

        reward_q_total = 0.
        ans_shared = 0
        for q_idx in range(len(retrieve_probs[0])):
            ans_word_int = numpy.argmax(ans_test[train_trace_id][0]) + 1
            ans_word = inv_vocab_text[str(ans_word_int)]

            reward_scalar_q, ans_q, label_q = compute_single_reward(
                retrieved_relation_list[q_idx], ans_word)
            reward_scalar_q *= q_mask_test[train_trace_id, q_idx, 0, 0]

            if ans_q == label_q and q_idx == 0:
                acc += 1.

            reward_q_total += reward_scalar_q
        reward_q_total += reward_temp_list[0]

        for time_slice in range(params['max_story_len']):
            reward_immediate_test[train_trace_id,
                                  time_slice] += reward_q_total * (
                                      params['reward_decay']
                                      **(params['max_story_len'] - time_slice))
        for q_idx in range(len(retrieve_probs[0])):
            # pass
            reward_fin_test[train_trace_id, q_idx, :] *= reward_q_total

        for time_slice in range(params['max_story_len']):
            # pass
            reward_test[train_trace_id,
                        time_slice, :] *= reward_immediate_test[train_trace_id,
                                                                time_slice]

        mask_sim *= 0
        working_memory.resetEnv()
        work_space_table.reset()

    sys.stdout = old_std
    print 'test result:'
    print acc / n_samples
コード例 #4
0
def train_two_phrase(train_file_names, test_file_names, params):
    K.set_epsilon(1e-4)
    ent_list = {
        '1': 'office',
        '2': 'hallway',
        '3': 'kitchen',
        '4': 'garden',
        '5': 'bedroom',
        '6': 'bathroom',
        '7': 'mary',
        '8': 'john',
        '9': 'daniel',
        '10': 'sandra'
    }
    # form vocabs and data
    numpy.set_printoptions(precision=3)
    print 'loading data.'
    old_std = sys.stdout
    f_print = open('debug_print_phr1.txt', 'w')
    f_debug = open('debug_print_phr2.txt', 'w')
    f_test = open('debug_print_test.txt', 'w')

    sys.stdout = f_print
    lines = []
    for f_name in train_file_names:
        f = open(f_name, 'r')
        lines.extend(f.readlines())
        f.close()

    lines_test = []
    for f_name in test_file_names:
        f = open(f_name, 'r')
        lines_test.extend(f.readlines())
        f.close()

    vocab_text = {}
    inv_vocab_text = {}
    data = parse_stories(lines, vocab_text, inv_vocab_text)
    data_test = parse_stories(lines_test, vocab_text, inv_vocab_text)

    data = data[:params['train_set_size']]
    data_test = data_test[:params['test_set_size']]

    story_int, story_mask, q_int, q_mask, ans_int, ans_mask, sizes = int_stories(
        data, vocab_text)
    story_mask = repeatTensor(story_mask, params['dim_emb_story'])
    q_mask = repeatTensor(q_mask, params['dim_emb_story'])

    story_int_test, story_mask_test, q_int_test, q_mask_test, ans_int_test, ans_mask_test, sizes_test = int_stories(
        data_test, vocab_text)
    story_mask_test = repeatTensor(story_mask_test, params['dim_emb_story'])
    q_mask_test = repeatTensor(q_mask_test, params['dim_emb_story'])

    inv_vocab_text['0'] = 'dududu'

    params['max_story_len'] = max(sizes[2], sizes[0])
    params['max_sent_len'] = max(sizes[1], sizes[3])
    params['max_q_num'] = max(sizes[2], sizes[0])
    params['vocab_size'] = len(vocab_text)
    params['vocab_size_ans'] = len(vocab_text)
    n_samples = len(story_int)

    params['ent_range'] = 4  # normal order and inverse order for two tuples.

    print 'params:'
    print params
    print 'n_samples:'
    print n_samples
    print 'vocab_text:'
    print vocab_text
    sys.stdout = old_std

    story_train = numpy.zeros(
        (n_samples * params['story_sims_per_epoch'], params['max_story_len'],
         params['max_sent_len']))
    story_mask_train = numpy.zeros(
        (n_samples * params['story_sims_per_epoch'], params['max_story_len'],
         params['max_sent_len'], params['dim_emb_story']))
    q_train = numpy.zeros((n_samples * params['story_sims_per_epoch'],
                           params['max_q_num'], params['max_sent_len']))
    q_mask_train = numpy.zeros(
        (n_samples * params['story_sims_per_epoch'], params['max_q_num'],
         params['max_sent_len'], params['dim_emb_story']))

    act_selected_train = numpy.zeros(
        (n_samples * params['story_sims_per_epoch'], params['max_story_len'],
         (params['ent_range']) * 1 * params['relation_num']))
    reward_train = numpy.ones(
        (n_samples * params['story_sims_per_epoch'], params['max_story_len'],
         (params['ent_range']) * 1 * params['relation_num']))

    reward_immediate_train = numpy.zeros(
        (n_samples * params['story_sims_per_epoch'], params['max_story_len']))

    ans_train = numpy.zeros((n_samples * params['story_sims_per_epoch'],
                             params['max_q_num'], params['vocab_size']))
    ans_mask_train = numpy.zeros(
        (n_samples * params['story_sims_per_epoch'], params['max_q_num']))
    fin_train = numpy.zeros((n_samples * params['story_sims_per_epoch'],
                             params['max_q_num'], params['dim_emb_env']))
    fin_train_pred = numpy.zeros((n_samples * params['story_sims_per_epoch'],
                                  params['max_q_num'], params['dim_emb_env']))
    fin_one_hot_train = numpy.zeros(
        (n_samples * params['story_sims_per_epoch'], params['max_q_num'],
         (1) * 1 * params['relation_num']))
    reward_fin_train = numpy.ones(
        (n_samples * params['story_sims_per_epoch'], params['max_q_num'],
         (1) * 1 * params['relation_num']))

    print 'building model.'
    # simulate and generate final train data, do batch train and policy gd
    # build model
    reasoner, simulator, debugger, gradient_checker = DRL_Reasoner(params)

    working_memory = working_environment(params['enti_num'],
                                         params['relation_num_expand'])
    working_embed = envEmbedding(params['relation_num_expand'],
                                 params['enti_num'], params['dim_emb_env'])
    work_space_table = varTable()
    # for every single story-question group, take simulations, compute the env embeddings, actions taken, and total rewards.
    # take a certain number of simulations for each group, and use the pool as total train data, do sgd and batch training.
    # loop that progress for many epoches.
    mask_sim = numpy.zeros((1, params['max_story_len'],
                            params['dim_emb_story'] * params['max_sent_len']))

    all_sim_number = 0

    print 'two phrase training started.'

    for epoch_id in range(params['epoch_num']):
        train_trace_id = 0
        avg_reward_q_epoch = 0.
        avg_reward_action_epoch = numpy.zeros(params['max_story_len'])
        print 'epoch %d phrase 1' % epoch_id
        # phrase 1: simulate and train data generating
        epoch_precision_rate = 0.
        sample_rate = numpy.zeros(n_samples)
        for story_id in range(n_samples):
            story_precision = 0.
            for sim_round in range(params['story_sims_per_epoch']):
                sys.stdout = f_print
                if (epoch_id + 1) % params['print_epoch_period'] == 0:
                    print '=======simulation====== epoch = %d, story_id = %d, sim_round = %d' % (
                        epoch_id, story_id, sim_round)
                story_train[train_trace_id, :len(story_int[0]), :len(
                    story_int[0][0])] = story_int[story_id]
                story_mask_train[train_trace_id, :len(story_int[0]), :len(
                    story_int[0][0]), :] = story_mask[story_id]
                q_train[train_trace_id, :len(q_int[0]), :len(
                    q_int[0][0])] = q_int[story_id]
                q_mask_train[train_trace_id, :len(q_int[0]), :len(
                    q_int[0][0]), :] = q_mask[story_id]
                ans_train[train_trace_id, :len(ans_int[0]), :len(
                    ans_int[0][0])] = ans_int[story_id]

                for time_slice in range(params['max_story_len']):
                    mask_sim[0][time_slice][:] = numpy.ones(
                        params['dim_emb_story'] * params['max_sent_len'])

                action_probs, retrieve_probs = simulator.predict([
                    story_train[numpy.newaxis, train_trace_id],
                    story_mask_train[numpy.newaxis,
                                     train_trace_id], q_train[numpy.newaxis,
                                                              train_trace_id],
                    q_mask_train[numpy.newaxis, train_trace_id], mask_sim[:],
                    reward_train[numpy.newaxis, train_trace_id],
                    reward_fin_train[numpy.newaxis, train_trace_id]
                ])

                for time_slice in range(params['max_story_len']):

                    # read and embed environment
                    tupleList, adjGraph, temp_index = working_memory.returnEnv(
                    )

                    action_selected, action_one_hot = select_action(
                        action_probs[:, time_slice, :], params['epsilon'])

                    act_selected_train[train_trace_id,
                                       time_slice, :] = action_one_hot
                    arg_1_ptr = action_selected // (
                        (1) * params['relation_num']
                    )  # start from 0 (empty number)
                    arg_2_ptr = (
                        action_selected - arg_1_ptr *
                        (1) * params['relation_num']) // params['relation_num']
                    arg_r_ptr = (action_selected - arg_1_ptr *
                                 (1) * params['relation_num']
                                 ) % params['relation_num'] + 1
                    arg_r = arg_r_ptr

                    order1 = arg_1_ptr / 2
                    order2 = arg_1_ptr % 2

                    flag = 0
                    three_ents = []
                    if story_mask_train[train_trace_id, time_slice, 0, 0] > 0:
                        for w_id in story_int[story_id][time_slice]:
                            if inv_vocab_text[str(
                                    int(w_id))] in ent_list.values():
                                three_ents.append(int(w_id))

                        if order1 == 0:
                            arg_1_first = three_ents[0]
                            arg_2_first = three_ents[2]
                        else:
                            arg_1_first = three_ents[2]
                            arg_2_first = three_ents[0]

                        if order2 == 0:
                            arg_1_second = three_ents[1]
                            arg_2_second = three_ents[2]
                        else:
                            arg_1_second = three_ents[2]
                            arg_2_second = three_ents[1]

                    slice_reward = 0
                    for tt in range(time_slice + 1):
                        reward_immediate_train[train_trace_id][
                            tt] += slice_reward * (params['reward_decay']
                                                   **(time_slice - tt))

                    if story_mask_train[train_trace_id, time_slice, 0, 0] > 0:
                        # retrieve the table
                        arg_1_int_first = work_space_table.retr_insert(
                            arg_1_first, inv_vocab_text)
                        arg_2_int_first = work_space_table.retr_insert(
                            arg_2_first, inv_vocab_text)
                        arg_1_int_second = work_space_table.retr_insert(
                            arg_1_second, inv_vocab_text)
                        arg_2_int_second = work_space_table.retr_insert(
                            arg_2_second, inv_vocab_text)

                        working_memory.modifyEnv(
                            (arg_1_int_first, arg_r, arg_2_int_first))
                        working_memory.modifyEnv(
                            (arg_1_int_second, arg_r, arg_2_int_second))

                # compute fin_train and fin_train_pred
                retrieved_relation_list = []
                reward_temp_list = [
                ]  # reward for every question based on arg1/2 in q or not.
                for q_idx in range(len(retrieve_probs[0])):
                    retr_idx, action_one_hot_retr = select_action(
                        retrieve_probs[0, q_idx, :], params['epsilon'])
                    arg1_retr_ptr = retr_idx // ((1) * params['relation_num'])
                    arg2_retr_ptr = (
                        retr_idx - arg1_retr_ptr *
                        (1) * params['relation_num']) // params['relation_num']
                    relation_pred = (retr_idx - arg1_retr_ptr *
                                     (1) * params['relation_num']
                                     ) % params['relation_num'] + 1

                    # convert from ptr to id:
                    for q_w in q_int[story_id, 0]:
                        if inv_vocab_text[str(int(q_w))] in ent_list.values():
                            arg1_retr = int(q_w)
                            break

                    retrieve_reward_pre = 0
                    reward_temp_list.append(retrieve_reward_pre)

                    arg1_retr_int = work_space_table.retr(
                        arg1_retr, inv_vocab_text)

                    # relation_pred_emb = working_embed.returnSingleEmb(relation_pred, 1, 1)
                    arg_retr_id = working_memory.retrieveArg(
                        arg1_retr_int, relation_pred)
                    arg_retr_str = work_space_table.inv_retr(arg_retr_id)

                    retrieved_relation_list.append(arg_retr_str)

                    one_hot_single = numpy.zeros(
                        ((1) * 1 * params['relation_num']))
                    one_hot_single[retr_idx] = 1
                    fin_one_hot_train[train_trace_id,
                                      q_idx, :] = one_hot_single

                reward_q_total = 0.
                ans_shared = 0
                for q_idx in range(len(retrieve_probs[0])):
                    ans_word_int = numpy.argmax(
                        ans_train[train_trace_id][0]) + 1
                    ans_word = inv_vocab_text[str(ans_word_int)]
                    reward_scalar_q, ans_q, label_q = compute_single_reward(
                        retrieved_relation_list[q_idx], ans_word)
                    reward_scalar_q *= q_mask_train[train_trace_id, q_idx, 0,
                                                    0]

                    if ans_q == label_q and q_idx == 0:
                        epoch_precision_rate += 1.
                        story_precision += 1.

                    reward_q_total += reward_scalar_q
                reward_q_total += reward_temp_list[0]

                for time_slice in range(params['max_story_len']):
                    reward_immediate_train[train_trace_id,
                                           time_slice] += reward_q_total * (
                                               params['reward_decay']
                                               **(params['max_story_len'] -
                                                  time_slice))
                for q_idx in range(len(retrieve_probs[0])):
                    # pass
                    if q_idx == 0:
                        mask_reward = 1.
                    else:
                        mask_reward = 0.
                    reward_fin_train[train_trace_id, q_idx, :] *= (
                        reward_q_total * mask_reward
                    )  # used as input at the last softmax

                avg_reward_q_epoch = avg_reward_q_epoch * (train_trace_id) / (
                    train_trace_id + 1) + reward_q_total / (
                        train_trace_id + 1)  # update E[r] for final q.

                for time_slice in range(params['max_story_len']):
                    # pass
                    reward_train[train_trace_id, time_slice, :] *= (
                        reward_immediate_train[train_trace_id, time_slice] *
                        story_mask_train[train_trace_id, time_slice, 0, 0]
                    )  # used as input at the last softmax
                    avg_reward_action_epoch[
                        time_slice] = avg_reward_action_epoch[time_slice] * (
                            train_trace_id
                        ) / (train_trace_id + 1) + reward_immediate_train[
                            train_trace_id, time_slice] / (train_trace_id + 1)

                # one simulation finished.

                train_trace_id += 1
                all_sim_number += 1
                mask_sim *= 0
                working_memory.resetEnv()
                work_space_table.reset()

            sample_rate[
                story_id] = story_precision / params['story_sims_per_epoch']

        for q_idx in range(params['max_q_num']):
            if 0:
                # pass
                reward_fin_train[:,
                                 q_idx, :] -= avg_reward_q_epoch  # used as input at the last softmax

        for time_slice in range(params['max_story_len']):
            if 0:
                # pass
                reward_train[:, time_slice, :] -= avg_reward_action_epoch[
                    time_slice]  # used as input at the last softmax

        epoch_precision_rate = epoch_precision_rate / (
            n_samples * params['story_sims_per_epoch'])
        print 'the total answer precision of this epoch:'
        print epoch_precision_rate

        sys.stdout = old_std
        print 'precision of this epoch: %f' % epoch_precision_rate
        print 'epoch %d phrase 2' % (epoch_id)
        print 'sample_rate:'
        print sample_rate
        # phrase2: go batch train on the trace pool.
        mask_sim_2 = numpy.ones(
            (n_samples * params['story_sims_per_epoch'],
             params['max_story_len'],
             params['dim_emb_story'] * params['max_sent_len']))

        reasoner.fit(
            [
                story_train, story_mask_train, q_train, q_mask_train,
                mask_sim_2, reward_train, reward_fin_train
            ], {
                'action_probs_re': act_selected_train,
                'retrieve_probs_re': fin_one_hot_train
            },
            batch_size=params['batch_size_phrase2'],
            nb_epoch=10,
            verbose=2)

        sys.stdout = old_std

        # test the model
        if (epoch_id + 1) % params['test_epoch_period'] == 0:
            test_model(simulator, story_int_test, story_mask_test, q_int_test,
                       q_mask_test, ans_int_test, ans_mask_test, params,
                       old_std, f_test, vocab_text, inv_vocab_text)

        sys.stdout = old_std
        story_train *= 0
        story_mask_train *= 0
        q_train *= 0
        q_mask_train *= 0
        act_selected_train *= 0
        reward_train = numpy.ones(
            (n_samples * params['story_sims_per_epoch'],
             params['max_story_len'], (params['ent_range']) * 1 *
             params['relation_num']))  # real number reward signal
        ans_train *= 0
        ans_mask_train *= 0
        fin_train *= 0
        fin_train_pred *= 0
        reward_fin_train = numpy.ones(
            (n_samples * params['story_sims_per_epoch'], params['max_q_num'],
             1 * 1 * params['relation_num']))
        fin_one_hot_train *= 0
        reward_immediate_train *= 0

    f_print.close()
    f_debug.close()
    f_test.close()
コード例 #5
0
ファイル: run-qa-17.py プロジェクト: FlamingHorizon/VRRM
def test_model(simulator, story_int, story_mask, q_int, q_mask, ans_int,
               ans_mask, params, old_std, f_test, vocab_text, inv_vocab_text):
    relate_dict = {'left': 1, 'right': 2, 'above': 3, 'below': 4}
    adj_list = ['pink', 'blue', 'red', 'yellow']
    ent_list = ['triangle', 'rectangle', 'square', 'sphere']
    concat_ent_list = []
    for i in adj_list:
        for j in ent_list:
            concat_ent_list.append(i + j)
    concat_ent_list.extend(ent_list)

    # form vocabs and data
    numpy.set_printoptions(precision=3)
    n_samples = len(story_int)
    acc = 0.

    sys.stdout = old_std
    print 'n_samples:'
    print n_samples

    # initialize the env embeddings, actions taken, rewards for the whole train data of an epoch
    story_test = numpy.zeros(
        (1, params['max_story_len'], params['max_sent_len']))
    story_mask_test = numpy.zeros(
        (1, params['max_story_len'], params['max_sent_len'],
         params['dim_emb_story']))
    q_test = numpy.zeros((1, params['max_q_num'], params['max_sent_len']))
    q_mask_test = numpy.zeros((1, params['max_q_num'], params['max_sent_len'],
                               params['dim_emb_story']))

    # env_test = numpy.zeros((1, params['max_story_len'], params['dim_emb_env']))
    act_selected_test = numpy.zeros(
        (1, params['max_story_len'],
         (params['ent_range']) * 1 * params['relation_num']))
    reward_test = numpy.ones(
        (1, params['max_story_len'],
         (params['ent_range']) * 1 * params['relation_num']))

    reward_immediate_test = numpy.zeros((1, params['max_story_len']))

    ans_test = numpy.zeros((1, params['max_q_num'], params['vocab_size']))
    ans_mask_test = numpy.zeros((1, params['max_q_num']))
    fin_test = numpy.zeros((1, params['max_q_num'], params['dim_emb_env']))
    fin_test_pred = numpy.zeros(
        (1, params['max_q_num'], params['dim_emb_env']))
    fin_one_hot_test = numpy.zeros(
        (1, params['max_q_num'],
         (params['ent_range']) * 1 * params['relation_num']))
    reward_fin_test = numpy.ones(
        (1, params['max_q_num'],
         (params['ent_range']) * 1 * params['relation_num']))

    working_memory = working_environment(params['enti_num'],
                                         params['relation_num_expand'])
    working_embed = envEmbedding(params['relation_num_expand'],
                                 params['enti_num'], params['dim_emb_env'])
    work_space_table = varTable()

    mask_sim = numpy.zeros((1, params['max_story_len'],
                            params['dim_emb_story'] * params['max_sent_len']))

    print 'test started.'
    sys.stdout = f_test
    train_trace_id = 0  # always == 0
    sample_rate = numpy.zeros(n_samples)
    for story_id in range(n_samples):
        story_precision = 0.
        story_test[train_trace_id, :len(story_int[0]), :len(
            story_int[0][0])] = story_int[story_id]
        story_mask_test[train_trace_id, :len(story_int[0]), :len(
            story_int[0][0]), :] = story_mask[story_id]
        q_test[train_trace_id, :len(q_int[0]), :len(q_int[0][0]
                                                    )] = q_int[story_id]
        q_mask_test[train_trace_id, :len(q_int[0]), :len(
            q_int[0][0]), :] = q_mask[story_id]
        ans_test[train_trace_id, :len(ans_int[0]), :len(ans_int[0][0]
                                                        )] = ans_int[story_id]

        for time_slice in range(params['max_story_len']):
            mask_sim[0][time_slice][:] = numpy.ones(params['dim_emb_story'] *
                                                    params['max_sent_len'])

            tupleList, adjGraph, temp_index = working_memory.returnEnv()

            action_probs, retrieve_probs = simulator.predict([
                story_test[numpy.newaxis, train_trace_id],
                story_mask_test[numpy.newaxis,
                                train_trace_id], q_test[numpy.newaxis,
                                                        train_trace_id],
                q_mask_test[numpy.newaxis, train_trace_id], mask_sim[:],
                reward_test[numpy.newaxis,
                            train_trace_id], reward_fin_test[numpy.newaxis,
                                                             train_trace_id]
            ])

            action_selected, action_one_hot = select_action_hard(
                action_probs[:, time_slice, :], params['epsilon'])

            act_selected_test[train_trace_id, time_slice, :] = action_one_hot
            arg_1_ptr = action_selected // (
                (1) * params['relation_num'])  # start from 0 (empty number)
            arg_2_ptr = (
                action_selected - arg_1_ptr *
                (1) * params['relation_num']) // params['relation_num']
            arg_r_ptr = (
                action_selected - arg_1_ptr *
                (1) * params['relation_num']) % params['relation_num'] + 1
            arg_r = arg_r_ptr

            flag = 0
            arg_1 = 0
            arg_2 = 0
            if time_slice < len(story_int[story_id]):
                for w_id in story_int[story_id][time_slice]:
                    if inv_vocab_text[str(
                            int(w_id))] in concat_ent_list and flag == 0:
                        if arg_1_ptr == 0:
                            arg_1 = int(w_id)
                        else:
                            arg_2 = int(w_id)
                        flag = 1
                    elif inv_vocab_text[str(
                            int(w_id))] in concat_ent_list and flag == 1:
                        if arg_1_ptr == 0:
                            arg_2 = int(w_id)
                        else:
                            arg_1 = int(w_id)

            slice_reward = 0
            for tt in range(time_slice + 1):
                reward_immediate_test[train_trace_id][tt] += slice_reward * (
                    params['reward_decay']**(time_slice - tt))

            if arg_1 > 0 and arg_2 > 0 and story_mask_test[train_trace_id,
                                                           time_slice, 0,
                                                           0] > 0:
                # retrieve the table
                arg_1_int = work_space_table.retr_insert(arg_1, inv_vocab_text)
                arg_2_int = work_space_table.retr_insert(arg_2, inv_vocab_text)
                working_memory.modifyEnv((arg_1_int, arg_r, arg_2_int))

        # compute fin_train and fin_train_pred
        retrieved_relation_list = []
        pred_relation_list = []
        reward_temp_list = [
        ]  # reward for every question based on arg1/2 in q or not.
        for q_idx in range(len(retrieve_probs[0])):
            retr_idx = numpy.argmax(retrieve_probs[0, q_idx, :])
            arg1_retr_ptr = retr_idx // ((1) * params['relation_num'])
            arg2_retr_ptr = (
                retr_idx - arg1_retr_ptr *
                (1) * params['relation_num']) // params['relation_num']
            relation_pred = (
                retr_idx - arg1_retr_ptr *
                (1) * params['relation_num']) % params['relation_num'] + 1

            # convert from ptr to id:
            flag = 0
            arg1_retr = 0
            arg2_retr = 0

            for q_w in q_int[story_id, q_idx]:
                if inv_vocab_text[str(
                        int(q_w))] in concat_ent_list and flag == 0:
                    if arg1_retr_ptr == 0:
                        arg1_retr = int(q_w)
                    else:
                        arg2_retr = int(q_w)
                    flag = 1
                elif inv_vocab_text[str(
                        int(q_w))] in concat_ent_list and flag == 1:
                    if arg1_retr_ptr == 0:
                        arg2_retr = int(q_w)
                    else:
                        arg1_retr = int(q_w)

            retrieve_reward_pre = 0
            reward_temp_list.append(retrieve_reward_pre)

            arg1_retr_int = work_space_table.retr(arg1_retr, inv_vocab_text)
            arg2_retr_int = work_space_table.retr(arg2_retr, inv_vocab_text)

            relation_retr = working_memory.retrieveRelation(
                arg1_retr_int, arg2_retr_int)
            retrieved_relation_list.append(relation_retr)
            pred_relation_list.append(relation_pred)

            one_hot_single = numpy.zeros(
                ((params['ent_range']) * 1 * params['relation_num']))
            one_hot_single[retr_idx] = 1
            fin_one_hot_test[train_trace_id, q_idx, :] = one_hot_single

        reward_q_total = 0.
        reward_q_list = []
        for q_idx in range(len(retrieve_probs[0])):
            ans_word_int = numpy.argmax(ans_test[train_trace_id][q_idx]) + 1
            ans_word = inv_vocab_text[str(ans_word_int)]
            reward_scalar_q, ans_q, label_q = compute_single_reward_yn(
                retrieved_relation_list[q_idx], pred_relation_list[q_idx],
                ans_word)
            reward_scalar_q *= q_mask_test[train_trace_id, q_idx, 0, 0]

            if ans_q == label_q:
                acc += 1.
                story_precision += 1.

            reward_q_total += reward_scalar_q
            reward_q_list.append(reward_scalar_q)

        sample_rate[story_id] = story_precision / params['max_story_len']

        for time_slice in range(params['max_story_len']):
            reward_immediate_test[train_trace_id,
                                  time_slice] += reward_q_total * (
                                      params['reward_decay']
                                      **(params['max_story_len'] - time_slice))
        for q_idx in range(len(retrieve_probs[0])):
            # pass
            reward_fin_test[train_trace_id, q_idx, :] *= reward_q_list[
                q_idx]  # used as input at the last softmax

        for time_slice in range(params['max_story_len']):
            # pass
            reward_test[train_trace_id, time_slice, :] *= (
                reward_immediate_test[train_trace_id, time_slice] *
                story_mask_test[train_trace_id, time_slice, 0, 0]
            )  # used as input at the last softmax

        mask_sim *= 0
        working_memory.resetEnv()
        work_space_table.reset()

    sys.stdout = old_std
    print 'test result:'
    print sample_rate
    print 'total accuracy:'
    print acc / (n_samples * params['max_story_len'])
コード例 #6
0
ファイル: run-qa-17.py プロジェクト: FlamingHorizon/VRRM
def train_two_phrase(train_file_names, test_file_names, params):
    K.set_epsilon(1e-4)
    relate_dict = {'left': 1, 'right': 2, 'above': 3, 'below': 4}
    adj_list = ['pink', 'blue', 'red', 'yellow']
    ent_list = ['triangle', 'rectangle', 'square', 'sphere']
    concat_ent_list = []
    for i in adj_list:
        for j in ent_list:
            concat_ent_list.append(i + j)
    concat_ent_list.extend(ent_list)

    # form vocabs and data
    numpy.set_printoptions(precision=3)
    print 'loading data.'
    old_std = sys.stdout
    f_print = open('debug_print_phr1.txt', 'w')
    f_debug = open('debug_print_phr2.txt', 'w')
    f_test = open('debug_print_test.txt', 'w')

    sys.stdout = f_print
    lines = []
    for f_name in train_file_names:
        f = open(f_name, 'r')
        lines.extend(f.readlines())
        f.close()

    lines_test = []
    for f_name in test_file_names:
        f = open(f_name, 'r')
        lines_test.extend(f.readlines())
        f.close()

    vocab_text = {}
    inv_vocab_text = {}
    data = parse_stories(lines, vocab_text, inv_vocab_text)
    data_test = parse_stories(lines_test, vocab_text, inv_vocab_text)

    story_int, story_mask, q_int, q_mask, ans_int, ans_mask, sizes = int_stories(
        data, vocab_text)
    story_mask = repeatTensor(story_mask, params['dim_emb_story'])
    q_mask = repeatTensor(q_mask, params['dim_emb_story'])

    story_int_test, story_mask_test, q_int_test, q_mask_test, ans_int_test, ans_mask_test, sizes_test = int_stories(
        data_test, vocab_text)
    story_mask_test = repeatTensor(story_mask_test, params['dim_emb_story'])
    q_mask_test = repeatTensor(q_mask_test, params['dim_emb_story'])

    inv_vocab_text['0'] = 'dududu'

    params['max_story_len'] = max(sizes[2], sizes[0])
    params['max_story_len_valid'] = sizes[0]
    params['max_sent_len'] = max(sizes[1], sizes[3])
    params['max_q_num'] = max(sizes[2], sizes[0])
    params['vocab_size'] = len(vocab_text)
    params['vocab_size_ans'] = len(vocab_text)
    n_samples = len(story_int)

    params['ent_range'] = 2

    print 'params:'
    print params
    print 'n_samples:'
    print n_samples
    print 'vocab_text:'
    print vocab_text
    sys.stdout = old_std

    story_train = numpy.zeros(
        (n_samples * params['story_sims_per_epoch'], params['max_story_len'],
         params['max_sent_len']))
    story_mask_train = numpy.zeros(
        (n_samples * params['story_sims_per_epoch'], params['max_story_len'],
         params['max_sent_len'], params['dim_emb_story']))
    q_train = numpy.zeros((n_samples * params['story_sims_per_epoch'],
                           params['max_q_num'], params['max_sent_len']))
    q_mask_train = numpy.zeros(
        (n_samples * params['story_sims_per_epoch'], params['max_q_num'],
         params['max_sent_len'], params['dim_emb_story']))

    act_selected_train = numpy.zeros(
        (n_samples * params['story_sims_per_epoch'], params['max_story_len'],
         (params['ent_range']) * 1 * params['relation_num']))
    reward_train = numpy.ones(
        (n_samples * params['story_sims_per_epoch'], params['max_story_len'],
         (params['ent_range']) * 1 * params['relation_num']))

    reward_immediate_train = numpy.zeros(
        (n_samples * params['story_sims_per_epoch'], params['max_story_len']))

    ans_train = numpy.zeros((n_samples * params['story_sims_per_epoch'],
                             params['max_q_num'], params['vocab_size']))
    ans_mask_train = numpy.zeros(
        (n_samples * params['story_sims_per_epoch'], params['max_q_num']))
    fin_train = numpy.zeros((n_samples * params['story_sims_per_epoch'],
                             params['max_q_num'], params['dim_emb_env']))
    fin_train_pred = numpy.zeros((n_samples * params['story_sims_per_epoch'],
                                  params['max_q_num'], params['dim_emb_env']))
    fin_one_hot_train = numpy.zeros(
        (n_samples * params['story_sims_per_epoch'], params['max_q_num'],
         (params['ent_range']) * 1 * params['relation_num']))
    reward_fin_train = numpy.ones(
        (n_samples * params['story_sims_per_epoch'], params['max_q_num'],
         (params['ent_range']) * 1 * params['relation_num']))

    print 'building model.'

    reasoner, simulator, debugger, gradient_checker = DRL_Reasoner(params)

    working_memory = working_environment(params['enti_num'],
                                         params['relation_num_expand'])
    working_embed = envEmbedding(params['relation_num_expand'],
                                 params['enti_num'], params['dim_emb_env'])
    work_space_table = varTable()

    mask_sim = numpy.zeros((1, params['max_story_len'],
                            params['dim_emb_story'] * params['max_sent_len']))

    all_sim_number = 0

    print 'two phrase training started.'

    for epoch_id in range(params['epoch_num']):
        train_trace_id = 0
        avg_reward_q_epoch = 0.
        avg_reward_action_epoch = numpy.zeros(params['max_story_len'])
        print 'epoch %d phrase 1' % epoch_id
        epoch_precision_rate = 0.
        sample_rate = numpy.zeros(n_samples)
        for story_id in range(n_samples):
            story_precision = 0.
            for sim_round in range(params['story_sims_per_epoch']):
                sys.stdout = f_print
                story_train[train_trace_id, :len(story_int[0]), :len(
                    story_int[0][0])] = story_int[story_id]
                story_mask_train[train_trace_id, :len(story_int[0]), :len(
                    story_int[0][0]), :] = story_mask[story_id]
                q_train[train_trace_id, :len(q_int[0]), :len(
                    q_int[0][0])] = q_int[story_id]
                q_mask_train[train_trace_id, :len(q_int[0]), :len(
                    q_int[0][0]), :] = q_mask[story_id]
                ans_train[train_trace_id, :len(ans_int[0]), :len(
                    ans_int[0][0])] = ans_int[story_id]

                for time_slice in range(params['max_story_len']):
                    mask_sim[0][time_slice][:] = numpy.ones(
                        params['dim_emb_story'] * params['max_sent_len'])

                    # read and embed environment
                    tupleList, adjGraph, temp_index = working_memory.returnEnv(
                    )

                    action_probs, retrieve_probs = simulator.predict([
                        story_train[numpy.newaxis, train_trace_id],
                        story_mask_train[numpy.newaxis, train_trace_id],
                        q_train[numpy.newaxis,
                                train_trace_id], q_mask_train[numpy.newaxis,
                                                              train_trace_id],
                        mask_sim[:], reward_train[numpy.newaxis,
                                                  train_trace_id],
                        reward_fin_train[numpy.newaxis, train_trace_id]
                    ])

                    action_selected, action_one_hot = select_action(
                        action_probs[:, time_slice, :], params['epsilon'])

                    act_selected_train[train_trace_id,
                                       time_slice, :] = action_one_hot
                    arg_1_ptr = action_selected // (
                        (1) * params['relation_num']
                    )  # start from 0 (empty number)
                    arg_2_ptr = (
                        action_selected - arg_1_ptr *
                        (1) * params['relation_num']) // params['relation_num']
                    arg_r_ptr = (action_selected - arg_1_ptr *
                                 (1) * params['relation_num']
                                 ) % params['relation_num'] + 1
                    arg_r = arg_r_ptr

                    flag = 0
                    arg_1 = 0
                    arg_2 = 0
                    if time_slice < len(story_int[story_id]):
                        for w_id in story_int[story_id][time_slice]:
                            if inv_vocab_text[str(int(
                                    w_id))] in concat_ent_list and flag == 0:
                                if arg_1_ptr == 0:
                                    arg_1 = int(w_id)
                                else:
                                    arg_2 = int(w_id)
                                flag = 1
                            elif inv_vocab_text[str(int(
                                    w_id))] in concat_ent_list and flag == 1:
                                if arg_1_ptr == 0:
                                    arg_2 = int(w_id)
                                else:
                                    arg_1 = int(w_id)

                    slice_reward = 0
                    for tt in range(time_slice + 1):
                        reward_immediate_train[train_trace_id][
                            tt] += slice_reward * (params['reward_decay']
                                                   **(time_slice - tt))

                    if arg_1 > 0 and arg_2 > 0 and story_mask_train[
                            train_trace_id, time_slice, 0, 0] > 0:
                        # retrieve the table
                        arg_1_int = work_space_table.retr_insert(
                            arg_1, inv_vocab_text)
                        arg_2_int = work_space_table.retr_insert(
                            arg_2, inv_vocab_text)
                        working_memory.modifyEnv((arg_1_int, arg_r, arg_2_int))

                # compute fin_train and fin_train_pred
                retrieved_relation_list = []
                pred_relation_list = []
                reward_temp_list = [
                ]  # reward for every question based on arg1/2 in q or not.
                for q_idx in range(len(retrieve_probs[0])):
                    retr_idx, action_one_hot_retr = select_action(
                        retrieve_probs[:, q_idx, :], params['epsilon'])
                    arg1_retr_ptr = retr_idx // ((1) * params['relation_num'])
                    arg2_retr_ptr = (
                        retr_idx - arg1_retr_ptr *
                        (1) * params['relation_num']) // params['relation_num']
                    relation_pred = (retr_idx - arg1_retr_ptr *
                                     (1) * params['relation_num']
                                     ) % params['relation_num'] + 1
                    flag = 0
                    arg1_retr = 0
                    arg2_retr = 0

                    for q_w in q_int[story_id, q_idx]:
                        if inv_vocab_text[str(
                                int(q_w))] in concat_ent_list and flag == 0:
                            if arg1_retr_ptr == 0:
                                arg1_retr = int(q_w)
                            else:
                                arg2_retr = int(q_w)
                            flag = 1
                        elif inv_vocab_text[str(
                                int(q_w))] in concat_ent_list and flag == 1:
                            if arg1_retr_ptr == 0:
                                arg2_retr = int(q_w)
                            else:
                                arg1_retr = int(q_w)

                    retrieve_reward_pre = 0
                    reward_temp_list.append(retrieve_reward_pre)

                    arg1_retr_int = work_space_table.retr(
                        arg1_retr, inv_vocab_text)
                    arg2_retr_int = work_space_table.retr(
                        arg2_retr, inv_vocab_text)

                    # relation_pred_emb = working_embed.returnSingleEmb(relation_pred, 1, 1)
                    relation_retr = working_memory.retrieveRelation(
                        arg1_retr_int, arg2_retr_int)
                    retrieved_relation_list.append(relation_retr)
                    pred_relation_list.append(relation_pred)

                    one_hot_single = numpy.zeros(
                        ((params['ent_range']) * 1 * params['relation_num']))
                    one_hot_single[retr_idx] = 1
                    fin_one_hot_train[train_trace_id,
                                      q_idx, :] = one_hot_single

                reward_q_total = 0.
                reward_q_list = []
                for q_idx in range(len(retrieve_probs[0])):
                    ans_word_int = numpy.argmax(
                        ans_train[train_trace_id][q_idx]) + 1
                    ans_word = inv_vocab_text[str(ans_word_int)]
                    reward_scalar_q, ans_q, label_q = compute_single_reward_yn(
                        retrieved_relation_list[q_idx],
                        pred_relation_list[q_idx], ans_word)
                    reward_scalar_q *= q_mask_train[train_trace_id, q_idx, 0,
                                                    0]

                    if ans_q == label_q:
                        epoch_precision_rate += 1.
                        story_precision += 1.

                    reward_q_total += reward_scalar_q
                    reward_q_list.append(reward_scalar_q)

                for time_slice in range(params['max_story_len']):
                    reward_immediate_train[
                        train_trace_id, time_slice] += reward_q_total * (
                            params['reward_decay']
                            **(params['max_story_len_valid'] - time_slice))
                for q_idx in range(len(retrieve_probs[0])):
                    # pass
                    reward_fin_train[train_trace_id,
                                     q_idx, :] *= reward_q_list[q_idx]

                avg_reward_q_epoch = avg_reward_q_epoch * (train_trace_id) / (
                    train_trace_id + 1) + reward_q_total / (train_trace_id + 1)

                for time_slice in range(params['max_story_len']):
                    # pass
                    reward_train[train_trace_id, time_slice, :] *= (
                        reward_immediate_train[train_trace_id, time_slice] *
                        story_mask_train[train_trace_id, time_slice, 0, 0])
                    avg_reward_action_epoch[
                        time_slice] = avg_reward_action_epoch[time_slice] * (
                            train_trace_id
                        ) / (train_trace_id + 1) + reward_immediate_train[
                            train_trace_id, time_slice] / (train_trace_id + 1)

                train_trace_id += 1
                all_sim_number += 1
                mask_sim *= 0
                working_memory.resetEnv()
                work_space_table.reset()

            sample_rate[story_id] = story_precision / (
                params['story_sims_per_epoch'] * params['max_story_len'])

        for q_idx in range(params['max_q_num']):
            if 0:
                # pass
                reward_fin_train[:,
                                 q_idx, :] -= avg_reward_q_epoch  # used as input at the last softmax

        for time_slice in range(params['max_story_len']):
            if 0:
                # pass
                reward_train[:, time_slice, :] -= avg_reward_action_epoch[
                    time_slice]  # used as input at the last softmax

        epoch_precision_rate = epoch_precision_rate / (
            n_samples * params['story_sims_per_epoch'] *
            params['max_story_len'])

        sys.stdout = old_std
        print 'precision of this epoch: %f' % epoch_precision_rate
        print 'epoch %d phrase 2' % (epoch_id)
        print 'sample_rate:'
        print sample_rate
        # phrase2: go batch train on the trace pool.
        mask_sim_2 = numpy.ones(
            (n_samples * params['story_sims_per_epoch'],
             params['max_story_len'],
             params['dim_emb_story'] * params['max_sent_len']))

        reasoner.fit(
            [
                story_train, story_mask_train, q_train, q_mask_train,
                mask_sim_2, reward_train, reward_fin_train
            ], {
                'action_probs_re': act_selected_train,
                'retrieve_probs_re': fin_one_hot_train
            },
            batch_size=params['batch_size_phrase2'],
            nb_epoch=10,
            verbose=2)

        sys.stdout = old_std

        # test the model
        if (epoch_id + 1) % params['test_epoch_period'] == 0:
            test_model(simulator, story_int_test, story_mask_test, q_int_test,
                       q_mask_test, ans_int_test, ans_mask_test, params,
                       old_std, f_test, vocab_text, inv_vocab_text)

        sys.stdout = old_std
        story_train *= 0
        story_mask_train *= 0
        q_train *= 0
        q_mask_train *= 0
        act_selected_train *= 0
        reward_train = numpy.ones(
            (n_samples * params['story_sims_per_epoch'],
             params['max_story_len'], (params['ent_range']) * 1 *
             params['relation_num']))  # real number reward signal
        ans_train *= 0
        ans_mask_train *= 0
        fin_train *= 0
        fin_train_pred *= 0
        reward_fin_train = numpy.ones(
            (n_samples * params['story_sims_per_epoch'], params['max_q_num'],
             (params['ent_range']) * 1 * params['relation_num']))
        fin_one_hot_train *= 0
        reward_immediate_train *= 0

    f_print.close()
    f_debug.close()
    f_test.close()