# 'r_sample' is the list of out_logits list and 'actions' is the list of output tokens. # The output tokens are sampled following probabilitis by using chain_sampling. r_sample, actions = net.decode_chain_sampling( item_enc, beg_embedding, data.MAX_TOKENS, context[idx], stop_at_token=end_token) total_samples += 1 # Omit duplicate action sequence to decrease the computing time and to avoid the case that # the probability of such kind of duplicate action sequences would be increased redundantly and abnormally. duplicate_flag = False if len(action_memory) > 0: for temp_list in action_memory: if utils.duplicate(temp_list, actions): duplicate_flag = True break if not duplicate_flag: action_memory.append(actions) else: skipped_samples += 1 continue # Show what the output action sequence is. action_tokens = [] for temp_idx in actions: if temp_idx in rev_emb_dict and rev_emb_dict.get( temp_idx) != '#END': action_tokens.append( str(rev_emb_dict.get( temp_idx)).upper())
# The output tokens are sampled following probability by using chain_sampling. actions = action_sequence_list[sample_index] r_sample = sample_logits_list[sample_index] else: r_sample, actions = net.decode_chain_sampling( item_enc, beg_embedding, data.MAX_TOKENS, context[idx], stop_at_token=end_token) # Omit duplicate action sequence to decrease the computing time and to avoid the case that # the probability of such kind of duplicate action sequences would be increased redundantly and abnormally. duplicate_flag = False if len(chain_sampling_action_memory) > 0: for temp_list in chain_sampling_action_memory: if utils.duplicate(temp_list, actions): duplicate_flag = True break if not duplicate_flag: chain_sampling_action_memory.append(actions) else: continue # Show what the output action sequence is. action_tokens = [] for temp_idx in actions: if temp_idx in rev_emb_dict and rev_emb_dict.get( temp_idx) != '#END': action_tokens.append( str(rev_emb_dict.get(temp_idx)).upper()) # If the last parameter is false, it means that the 0-1 reward is used to calculate the accuracy.