def test_program_to_lispress_with_quotes_inside_string():
    # a string with a double-quote in it
    v, _ = mk_value_op(value='i got quotes"', schema="String", idx=0)
    program = Program(expressions=[v])
    rendered_lispress = render_pretty(program_to_lispress(program))
    assert rendered_lispress == '(#(String "i got quotes\\""))'
    round_tripped, _ = lispress_to_program(parse_lispress(rendered_lispress), 0)
    assert round_tripped == program
def to_canonical_form(tokenized_lispress: str) -> str:
    """Returns canonical form of a tokenized lispress.

    The canonical form is un-tokenized and compact; it also sorts named arguments in alphabetical order.
    """
    lispress = seq_to_lispress(tokenized_lispress.split(" "))
    program, _ = lispress_to_program(lispress, 0)
    round_tripped = program_to_lispress(program)
    return render_compact(round_tripped)
def test_surface_to_program_round_trips():
    """
    Goes all the way to `Program` and so is stricter
    than `test_surface_to_sexp_round_trips`.
    """
    for surface_string in surface_strings:
        surface_string = surface_string.strip()
        sexp = parse_lispress(surface_string)
        program, _ = lispress_to_program(sexp, 0)
        round_tripped_sexp = program_to_lispress(program)
        round_tripped_surface_string = render_pretty(round_tripped_sexp, max_width=60)
        assert round_tripped_surface_string == surface_string
def _try_round_trip(lispress_str: str) -> str:
    """
    If `lispress_str` is valid lispress, round-trips it to and from `Program`.
    This puts named arguments in alphabetical order.
    If it is not valid, returns the original string unmodified.
    """
    try:
        # round-trip to canonicalize
        lispress = parse_lispress(lispress_str)
        program, _ = lispress_to_program(lispress, 0)
        round_tripped = program_to_lispress(program)
        return render_compact(round_tripped)
    except Exception:  # pylint: disable=W0703
        return lispress_str
Example #5
0
def program_to_seq(program: Program) -> List[str]:
    lispress = program_to_lispress(program)
    return lispress_to_seq(lispress)
Example #6
0
def create_programs_for_trade_dialogue(
    trade_dialogue: Dict[str, Any],
    keep_all_domains: bool,
    remove_none: bool,
    fill_none: bool,
    salience_model: SalienceModelBase,
    no_revise: bool,
    avoid_empty_plan: bool,
    utterance_tokenizer: UtteranceTokenizer,
) -> Tuple[Dialogue, int, List[Dict[str, Any]]]:
    """Creates programs for a TRADE dialogue.

    Returns:
        A tuple of (Dialogue, the number of refer calls, refer call report at each turn).
    """
    if remove_none:
        assert not fill_none

    # the execution trace of the program for the dialogue
    # updated at the end of each trade_turn
    last_execution_trace = ExecutionTrace(slot_values=dict())
    # the number of refer calls in the program
    num_refer_calls: int = 0
    # the flatten belief state at the previous turn
    last_belief_dict: Dict[str, str] = {}

    pointer_count: int = 0
    topic_pointers: Dict[str, Any] = {}
    topic_keys: DefaultDict[str, Set[str]] = defaultdict(set)

    dataflow_turns: List[Turn] = []
    refer_call_reports: List[Dict[str, Any]] = []
    last_nonempty_diff_dicts: List[Dict[str, str]] = []

    trade_turns = trade_dialogue["dialogue"]
    assert not trade_turns[0]["system_transcript"].strip(
    ), "the leading agent utterance should be empty"

    for turn_index, trade_turn in enumerate(trade_turns):
        curr_belief_dict = flatten_belief_state(
            belief_state=trade_turn["belief_state"],
            keep_all_domains=keep_all_domains,
            remove_none=remove_none,
        )
        if fill_none:
            # Sometimes the user may activate a domain without any constraint,
            # in this case, all slot values may be "none" (or "dontcare"?).
            active_domains: Set[str] = {
                get_domain_and_slot_name(slot_fullname=slot_fullname)[0]
                for slot_fullname, slot_value in curr_belief_dict.items()
            }
            for domain in active_domains:
                # adds "none" to activate domains so the model can get more supervision signals
                # note slots in inactivate domains are not added
                for slot_name in DATAFLOW_SLOT_NAMES_FOR_DOMAIN[domain]:
                    slot_fullname = "{}-{}".format(domain, slot_name)
                    if slot_fullname not in curr_belief_dict:
                        curr_belief_dict[slot_fullname] = "none"

        (
            expressions,
            pointer_count,
            refer_call_report,
            last_nonempty_diff_dicts,
        ) = create_program_for_trade_turn(
            trade_turn=trade_turn,
            curr_belief_dict=curr_belief_dict,
            last_belief_dict=last_belief_dict,
            topic_pointers=topic_pointers,
            topic_keys=topic_keys,
            execution_trace=last_execution_trace,
            pointer_count=pointer_count,
            salience_model=salience_model,
            no_revise=no_revise,
            avoid_empty_plan=avoid_empty_plan,
            last_nonempty_diff_dicts=last_nonempty_diff_dicts,
        )

        program = Program(expressions=expressions)
        lispress = render_compact(program_to_lispress(program))

        dataflow_turn = Turn(
            turn_index=trade_turn["turn_idx"],
            user_utterance=build_user_utterance(
                text=trade_turn["transcript"],
                utterance_tokenizer=utterance_tokenizer),
            # NOTE: The agentUtterance should be the one following the user trade_turn.
            # In the original MultiWoZ data, there is an ending agent utterance. This agent utterance is
            # removed in the TRADE preprocessing script because it doesn't change the belief state.
            # For now, we use a special NULL string for the last trade_turn.
            agent_utterance=build_agent_utterance(
                text=trade_turns[turn_index + 1]["system_transcript"]
                if turn_index + 1 < len(trade_turns) else "",
                utterance_tokenizer=utterance_tokenizer,
                described_entities=[],
            ),
            lispress=lispress,
            skip=False,
            program_execution_oracle=ProgramExecutionOracle(
                # no exception for MultiWoZ
                has_exception=False,
                # all refer calls are correct since we use a best-effort conversion
                refer_are_correct=True,
            ),
        )
        dataflow_turns.append(dataflow_turn)

        refer_call_report["dialogueId"] = trade_dialogue["dialogue_idx"]
        refer_call_reports.append(refer_call_report)
        num_refer_calls += refer_call_report["numResolveCalls"]

        # update the execution trace
        reconstructed_belief_dict, execution_result = execute_program_for_turn(
            curr_turn=dataflow_turn,
            last_execution_trace=last_execution_trace,
            last_belief_dict=None if no_revise else last_belief_dict,
            salience_model=salience_model,
        )
        # makes sure the round-trip is successful
        assert (reconstructed_belief_dict == curr_belief_dict
                ), "turn {} in {} does not round-trip".format(
                    trade_turn["turn_idx"], trade_dialogue["dialogue_idx"])

        last_execution_trace = update_execution_trace(
            last_execution_trace=last_execution_trace,
            partial_execution_result=execution_result,
        )

        last_belief_dict = curr_belief_dict

    dataflow_dialogue = Dialogue(dialogue_id=trade_dialogue["dialogue_idx"],
                                 turns=dataflow_turns)
    return dataflow_dialogue, num_refer_calls, refer_call_reports