def main(config):
    """
    Creates input files for y_exp mturk task from conversation/rating mturk task.

    input: file of logs (in ParlaiDialog format) from Mturk task 1 with turn-by-turn
        quality ratings 1-5
    output: file of logs (in ParlaiDialog format) sliced up to begin at the start of
        an episode or following a new topic request, and ending with a y_exp
    """
    new_episodes = []
    old_episodes = [e for e in extract_parlai_episodes(config['infile'])]
    for episode in old_episodes:
        for parley in episode:
            if any(
                parley.context.startswith(x)
                for x in (NEW_TOPIC_REQUEST.lower(), INITIAL_PROMPT.lower())
            ):
                new_episode = []
            new_episode.append(parley)
            if parley.context.startswith(SUGGESTION_REQUEST.lower()):
                new_episodes.append(new_episode)

    # Create parlai dialog file for easy viewing
    with open(config['outfile'], 'w') as f:
        for episode in new_episodes:
            num_parleys = len(episode)
            for i, parley in enumerate(episode):
                if i == num_parleys - 1:
                    parley.episode_done = True
                f.write(f"{i}\t{parley.to_parlai()}\n")
    print(
        f"Extracted {len(new_episodes)} episodes out of {len(old_episodes)} "
        f"original episodes and wrote them to {config['outfile']}."
    )
def main(config):
    """Extracts training data for the negative response classifier (NRC) from Mturk logs

    input: file of logs (in ParlaiDialog format) from Mturk task 1 with turn-by-turn
        quality ratings 1-5
    output: file of episodes (self-feeding format) w/ +1/-1 ratings indicating
        positive/negative example
    """
    examples = []
    positives = config['positives'].split(',')
    negatives = config['negatives'].split(',')
    assert(len(set(positives).intersection(set(negatives))) == 0)

    num_episodes = 0
    num_parleys = 0
    for episode in extract_parlai_episodes(config['infile']):
        num_episodes += 1
        history = []
        for parley in episode:
            num_parleys += 1

            # Update history (not including stock control flow responses)
            if parley.context.startswith(INITIAL_PROMPT.lower()):
                # Conversation prompt, first utterance
                history = [parley.response]
            elif parley.context.startswith(SUGGESTION_REQUEST.lower()):
                # Asked for y_exp, got y_exp
                pass
            elif parley.context.startswith(NEW_TOPIC_REQUEST.lower()):
                # Asked for new topic, got a first utterance
                history = [parley.response]
            else:
                history.append(parley.context)
                history.append(parley.response)

            # Only create a new example if this parley's rating is relevant
            if parley.reward in (positives + negatives):
                # Concatenate history and add speaker tokens as necessary
                # history_size refers to the total number of utterances
                # (history_size == 0 means predict sentiment from '__null__')
                # response that's being classified (so if history_size == 0 then
                # classify based only on the response w/o any extra context).
                # Note that the response being classified should always be preceded by
                # __p1__ (the human), not __p2__ (the bot).
                if config['history_size'] < 0:
                    utterances = history
                elif config['history_size'] == 0:
                    utterances = ['__null__']
                else:
                    utterances = history[-config['history_size']:]

                context = add_person_tokens(utterances, last_speaker=1)

                label = 1 if parley.reward in positives else -1

                example = Parley(context, label)
                examples.append(example)

    with open(config['outfile'], 'w') as outfile:
        for ex in examples:
            outfile.write(json.dumps(ex.to_dict()) + '\n')

    print(f"Extracted {len(examples)} ratings out of {num_episodes} episodes "
          f"({num_parleys} parleys) and wrote them to {config['outfile']} with "
          f"histsz == {config['history_size']}.")