コード例 #1
0
                            # 'r_sample' is the list of out_logits list and 'actions' is the list of output tokens.
                            # The output tokens are sampled following probabilitis by using chain_sampling.
                            r_sample, actions = net.decode_chain_sampling(
                                item_enc,
                                beg_embedding,
                                data.MAX_TOKENS,
                                context[idx],
                                stop_at_token=end_token)
                            total_samples += 1

                            # Omit duplicate action sequence to decrease the computing time and to avoid the case that
                            # the probability of such kind of duplicate action sequences would be increased redundantly and abnormally.
                            duplicate_flag = False
                            if len(action_memory) > 0:
                                for temp_list in action_memory:
                                    if utils.duplicate(temp_list, actions):
                                        duplicate_flag = True
                                        break
                            if not duplicate_flag:
                                action_memory.append(actions)
                            else:
                                skipped_samples += 1
                                continue
                            # Show what the output action sequence is.
                            action_tokens = []
                            for temp_idx in actions:
                                if temp_idx in rev_emb_dict and rev_emb_dict.get(
                                        temp_idx) != '#END':
                                    action_tokens.append(
                                        str(rev_emb_dict.get(
                                            temp_idx)).upper())
コード例 #2
0
                            # The output tokens are sampled following probability by using chain_sampling.
                            actions = action_sequence_list[sample_index]
                            r_sample = sample_logits_list[sample_index]
                        else:
                            r_sample, actions = net.decode_chain_sampling(
                                item_enc,
                                beg_embedding,
                                data.MAX_TOKENS,
                                context[idx],
                                stop_at_token=end_token)
                            # Omit duplicate action sequence to decrease the computing time and to avoid the case that
                            # the probability of such kind of duplicate action sequences would be increased redundantly and abnormally.
                            duplicate_flag = False
                            if len(chain_sampling_action_memory) > 0:
                                for temp_list in chain_sampling_action_memory:
                                    if utils.duplicate(temp_list, actions):
                                        duplicate_flag = True
                                        break
                            if not duplicate_flag:
                                chain_sampling_action_memory.append(actions)
                            else:
                                continue

                        # Show what the output action sequence is.
                        action_tokens = []
                        for temp_idx in actions:
                            if temp_idx in rev_emb_dict and rev_emb_dict.get(
                                    temp_idx) != '#END':
                                action_tokens.append(
                                    str(rev_emb_dict.get(temp_idx)).upper())
                        # If the last parameter is false, it means that the 0-1 reward is used to calculate the accuracy.