def test_program_to_lispress_with_quotes_inside_string(): # a string with a double-quote in it v, _ = mk_value_op(value='i got quotes"', schema="String", idx=0) program = Program(expressions=[v]) rendered_lispress = render_pretty(program_to_lispress(program)) assert rendered_lispress == '(#(String "i got quotes\\""))' round_tripped, _ = lispress_to_program(parse_lispress(rendered_lispress), 0) assert round_tripped == program
def to_canonical_form(tokenized_lispress: str) -> str: """Returns canonical form of a tokenized lispress. The canonical form is un-tokenized and compact; it also sorts named arguments in alphabetical order. """ lispress = seq_to_lispress(tokenized_lispress.split(" ")) program, _ = lispress_to_program(lispress, 0) round_tripped = program_to_lispress(program) return render_compact(round_tripped)
def test_surface_to_program_round_trips(): """ Goes all the way to `Program` and so is stricter than `test_surface_to_sexp_round_trips`. """ for surface_string in surface_strings: surface_string = surface_string.strip() sexp = parse_lispress(surface_string) program, _ = lispress_to_program(sexp, 0) round_tripped_sexp = program_to_lispress(program) round_tripped_surface_string = render_pretty(round_tripped_sexp, max_width=60) assert round_tripped_surface_string == surface_string
def _try_round_trip(lispress_str: str) -> str: """ If `lispress_str` is valid lispress, round-trips it to and from `Program`. This puts named arguments in alphabetical order. If it is not valid, returns the original string unmodified. """ try: # round-trip to canonicalize lispress = parse_lispress(lispress_str) program, _ = lispress_to_program(lispress, 0) round_tripped = program_to_lispress(program) return render_compact(round_tripped) except Exception: # pylint: disable=W0703 return lispress_str
def program_to_seq(program: Program) -> List[str]: lispress = program_to_lispress(program) return lispress_to_seq(lispress)
def create_programs_for_trade_dialogue( trade_dialogue: Dict[str, Any], keep_all_domains: bool, remove_none: bool, fill_none: bool, salience_model: SalienceModelBase, no_revise: bool, avoid_empty_plan: bool, utterance_tokenizer: UtteranceTokenizer, ) -> Tuple[Dialogue, int, List[Dict[str, Any]]]: """Creates programs for a TRADE dialogue. Returns: A tuple of (Dialogue, the number of refer calls, refer call report at each turn). """ if remove_none: assert not fill_none # the execution trace of the program for the dialogue # updated at the end of each trade_turn last_execution_trace = ExecutionTrace(slot_values=dict()) # the number of refer calls in the program num_refer_calls: int = 0 # the flatten belief state at the previous turn last_belief_dict: Dict[str, str] = {} pointer_count: int = 0 topic_pointers: Dict[str, Any] = {} topic_keys: DefaultDict[str, Set[str]] = defaultdict(set) dataflow_turns: List[Turn] = [] refer_call_reports: List[Dict[str, Any]] = [] last_nonempty_diff_dicts: List[Dict[str, str]] = [] trade_turns = trade_dialogue["dialogue"] assert not trade_turns[0]["system_transcript"].strip( ), "the leading agent utterance should be empty" for turn_index, trade_turn in enumerate(trade_turns): curr_belief_dict = flatten_belief_state( belief_state=trade_turn["belief_state"], keep_all_domains=keep_all_domains, remove_none=remove_none, ) if fill_none: # Sometimes the user may activate a domain without any constraint, # in this case, all slot values may be "none" (or "dontcare"?). active_domains: Set[str] = { get_domain_and_slot_name(slot_fullname=slot_fullname)[0] for slot_fullname, slot_value in curr_belief_dict.items() } for domain in active_domains: # adds "none" to activate domains so the model can get more supervision signals # note slots in inactivate domains are not added for slot_name in DATAFLOW_SLOT_NAMES_FOR_DOMAIN[domain]: slot_fullname = "{}-{}".format(domain, slot_name) if slot_fullname not in curr_belief_dict: curr_belief_dict[slot_fullname] = "none" ( expressions, pointer_count, refer_call_report, last_nonempty_diff_dicts, ) = create_program_for_trade_turn( trade_turn=trade_turn, curr_belief_dict=curr_belief_dict, last_belief_dict=last_belief_dict, topic_pointers=topic_pointers, topic_keys=topic_keys, execution_trace=last_execution_trace, pointer_count=pointer_count, salience_model=salience_model, no_revise=no_revise, avoid_empty_plan=avoid_empty_plan, last_nonempty_diff_dicts=last_nonempty_diff_dicts, ) program = Program(expressions=expressions) lispress = render_compact(program_to_lispress(program)) dataflow_turn = Turn( turn_index=trade_turn["turn_idx"], user_utterance=build_user_utterance( text=trade_turn["transcript"], utterance_tokenizer=utterance_tokenizer), # NOTE: The agentUtterance should be the one following the user trade_turn. # In the original MultiWoZ data, there is an ending agent utterance. This agent utterance is # removed in the TRADE preprocessing script because it doesn't change the belief state. # For now, we use a special NULL string for the last trade_turn. agent_utterance=build_agent_utterance( text=trade_turns[turn_index + 1]["system_transcript"] if turn_index + 1 < len(trade_turns) else "", utterance_tokenizer=utterance_tokenizer, described_entities=[], ), lispress=lispress, skip=False, program_execution_oracle=ProgramExecutionOracle( # no exception for MultiWoZ has_exception=False, # all refer calls are correct since we use a best-effort conversion refer_are_correct=True, ), ) dataflow_turns.append(dataflow_turn) refer_call_report["dialogueId"] = trade_dialogue["dialogue_idx"] refer_call_reports.append(refer_call_report) num_refer_calls += refer_call_report["numResolveCalls"] # update the execution trace reconstructed_belief_dict, execution_result = execute_program_for_turn( curr_turn=dataflow_turn, last_execution_trace=last_execution_trace, last_belief_dict=None if no_revise else last_belief_dict, salience_model=salience_model, ) # makes sure the round-trip is successful assert (reconstructed_belief_dict == curr_belief_dict ), "turn {} in {} does not round-trip".format( trade_turn["turn_idx"], trade_dialogue["dialogue_idx"]) last_execution_trace = update_execution_trace( last_execution_trace=last_execution_trace, partial_execution_result=execution_result, ) last_belief_dict = curr_belief_dict dataflow_dialogue = Dialogue(dialogue_id=trade_dialogue["dialogue_idx"], turns=dataflow_turns) return dataflow_dialogue, num_refer_calls, refer_call_reports