Example #1
0
 def read_kaggle_reviews(cls, path):
     titles = []
     genres = []
     templates = []
     # first sentence, last sentence, middle sentence
     # TODO: more complex tag (e.g. sentiment, plot, see Yoav's style paper)
     tag_review = lambda pos: 'first' if pos == 0 else 'last' if pos == 1 else 'middle'
     with open(path, 'r') as csvfile:
         reader = csv.DictReader(csvfile, delimiter=',')
         for i, row in enumerate(reader):
             id_ = i
             titles.append({'movie_id': id_, 'title': row['title']})
             movie_genres = eval(row['genres']) if row['genres'] else []
             genres.extend([{
                 'movie_id': id_,
                 'genre': x['name']
             } for x in movie_genres])
             utterances = eval(row['templates']) if row['templates'] else []
             templates.extend([{
                 'id': generate_uuid('T'),
                 'movie_id': id_,
                 'template': x,
                 'tag': 'inform-{}'.format(tag_review(j)),
                 'source': 'kaggle',
                 'context_tag': 'ask'
             } for j, x in enumerate(utterances) if len(x.split()) < 20])
     return titles, genres, templates
Example #2
0
def generate_scenario(schema, base_price, price_unit, discounts, listings):
    for listing in listings:
        listing = process_listing(listing)
        if listing:
            base_price = int(listing['price'])
            if base_price < price_unit:
                continue
            for ranges in generate_price_range(base_price, price_unit,
                                               discounts):
                kbs = generate_kbs(schema, listing)
                kbs[BUYER].facts['personal'].update(ranges[BUYER])
                kbs[SELLER].facts['personal'].update(ranges[SELLER])
                yield Scenario(generate_uuid('S'), listing['post_id'],
                               listing['category'], listing['images'],
                               schema.attributes, kbs)
Example #3
0
    def read_templates(cls, path):
        """Read handcoded templates.

        csv header: tag, context_tag, template

        """
        templates = []
        with open(path, 'r') as csvfile:
            reader = csv.DictReader(csvfile, delimiter=',')
            for row in reader:
                data = {k: None if v == '' else v for k, v in row.iteritems()}
                data['id'] = generate_uuid('T')
                data['source'] = 'handcoded'
                templates.append(data)
        return templates
Example #4
0
    def simulate(self, max_turns=None, verbose=False):
        '''
        Simulate a dialogue.
        '''
        self.events = []
        self.max_turns = max_turns
        time = 0
        num_turns = 0
        game_over = False
        self.describe_scenario()
        while not game_over:
            for agent, session in enumerate(self.sessions):
                event = session.send()
                time += 1
                if not event:
                    continue

                event.time = time
                self.event_callback(event)
                self.events.append(event)

                if verbose:
                    print('agent=%s: session=%s, event=%s' %
                          (agent, type(session).__name__, event.to_dict()))
                else:
                    action = event.action
                    data = event.data
                    event_output = data if action == 'message' else "Action: {0}, Data: {1}".format(
                        action, data)
                    print('agent=%s, event=%s' % (agent, event_output))
                num_turns += 1
                if self.game_over() or (max_turns and num_turns >= max_turns):
                    game_over = True
                    break

                for partner, other_session in enumerate(self.sessions):
                    if agent != partner:
                        other_session.receive(event)

        uuid = generate_uuid('E')
        outcome = self.get_outcome()
        if verbose:
            print('outcome: %s' % outcome)
            print('----------------')
        # TODO: add configurable names to systems and sessions
        return Example(self.scenario, uuid, self.events, outcome, uuid, None)
Example #5
0
    def read_rotten_reviews(cls, path):
        titles = []
        genres = []
        templates = []
        movies = json.load(open(path, "r"))
        for i, row in enumerate(movies):
            id_ = i
            titles.append({'movie_id': id_, 'title': row['title']})
            for tag, utterances in row['templates'].items():
                templates.extend([{
                    'id': generate_uuid('T'),
                    'movie_id': id_,
                    'template': x,
                    'tag': 'inform-{}'.format(tag),
                    'source': row['source'],
                    'context_tag': 'ask'
                } for x in utterances if len(x.split()) < 28])

        return titles, genres, templates
Example #6
0
 def retrieve_candidates(self, dialogue, json_dict=False):
     '''
     dialogue: a Dialogue object
     json_dict: if True, return a list of dictionary containing kb, context etc.;
         otherwise just a list of candidates corresponding to each turn.
         NOTE: candidates are only available to 'agent speaking' turns.
     return a candidate list for each 'decoding' turn.
     '''
     prev_turns = []
     prev_roles = []
     category = dialogue.kb.facts['item']['Category']
     title = dialogue.kb.facts['item']['Title']
     role = dialogue.role
     results = []
     for turn_id, (agent, turn, role) in enumerate(
             izip(dialogue.agents, dialogue.token_turns, dialogue.roles)):
         if agent != dialogue.agent:
             candidates = None
         else:
             candidates = self.search(role, category, title, prev_turns)
         if json_dict:
             r = {
                 'exid': generate_uuid('E'),
                 'uuid': dialogue.uuid,
                 'role': dialogue.role,
                 'kb': dialogue.kb.to_dict(),
                 'agent': agent,
                 'turn_id': turn_id,
                 'prev_turns': list(prev_turns),
                 'prev_roles': list(prev_roles),
                 'target': turn,
                 'candidates': candidates,
             }
             if self.slot_detector:
                 r['kb_context'] = self.slot_detector.get_context_tokens(
                     dialogue.kb)
         else:
             r = candidates
         results.append(r)
         prev_turns.append(turn)
         prev_roles.append(role)
     return results
Example #7
0
parser.add_argument('--schema-path')
parser.add_argument(
    '--scenario-ints-file',
    help=
    'Path to the file containing 6 integers per line that describes the scenario'
)
parser.add_argument('--output', help='Path to the output JSON scenario file')
args = parser.parse_args()

schema = Schema(args.schema_path)

scenarios = []
with open(args.scenario_ints_file) as fin:
    kbs = []
    names = ['book', 'hat', 'ball']
    for line in fin:
        ints = [int(x) for x in line.strip().split()]
        kb = KB.from_ints(schema.attributes, names, ints)
        kbs.append(kb)
        if len(kbs) == 2:
            if kbs[0].item_counts != kbs[1].item_counts:
                del kbs[0]
                continue
            assert kbs[0].item_counts == kbs[1].item_counts
            scenario = Scenario(generate_uuid("FB"), schema.attributes, kbs)
            scenarios.append(scenario)
            kbs = []

scenario_db = ScenarioDB(scenarios)
write_json(scenario_db.to_dict(), args.output)