def read_kaggle_reviews(cls, path): titles = [] genres = [] templates = [] # first sentence, last sentence, middle sentence # TODO: more complex tag (e.g. sentiment, plot, see Yoav's style paper) tag_review = lambda pos: 'first' if pos == 0 else 'last' if pos == 1 else 'middle' with open(path, 'r') as csvfile: reader = csv.DictReader(csvfile, delimiter=',') for i, row in enumerate(reader): id_ = i titles.append({'movie_id': id_, 'title': row['title']}) movie_genres = eval(row['genres']) if row['genres'] else [] genres.extend([{ 'movie_id': id_, 'genre': x['name'] } for x in movie_genres]) utterances = eval(row['templates']) if row['templates'] else [] templates.extend([{ 'id': generate_uuid('T'), 'movie_id': id_, 'template': x, 'tag': 'inform-{}'.format(tag_review(j)), 'source': 'kaggle', 'context_tag': 'ask' } for j, x in enumerate(utterances) if len(x.split()) < 20]) return titles, genres, templates
def generate_scenario(schema, base_price, price_unit, discounts, listings): for listing in listings: listing = process_listing(listing) if listing: base_price = int(listing['price']) if base_price < price_unit: continue for ranges in generate_price_range(base_price, price_unit, discounts): kbs = generate_kbs(schema, listing) kbs[BUYER].facts['personal'].update(ranges[BUYER]) kbs[SELLER].facts['personal'].update(ranges[SELLER]) yield Scenario(generate_uuid('S'), listing['post_id'], listing['category'], listing['images'], schema.attributes, kbs)
def read_templates(cls, path): """Read handcoded templates. csv header: tag, context_tag, template """ templates = [] with open(path, 'r') as csvfile: reader = csv.DictReader(csvfile, delimiter=',') for row in reader: data = {k: None if v == '' else v for k, v in row.iteritems()} data['id'] = generate_uuid('T') data['source'] = 'handcoded' templates.append(data) return templates
def simulate(self, max_turns=None, verbose=False): ''' Simulate a dialogue. ''' self.events = [] self.max_turns = max_turns time = 0 num_turns = 0 game_over = False self.describe_scenario() while not game_over: for agent, session in enumerate(self.sessions): event = session.send() time += 1 if not event: continue event.time = time self.event_callback(event) self.events.append(event) if verbose: print('agent=%s: session=%s, event=%s' % (agent, type(session).__name__, event.to_dict())) else: action = event.action data = event.data event_output = data if action == 'message' else "Action: {0}, Data: {1}".format( action, data) print('agent=%s, event=%s' % (agent, event_output)) num_turns += 1 if self.game_over() or (max_turns and num_turns >= max_turns): game_over = True break for partner, other_session in enumerate(self.sessions): if agent != partner: other_session.receive(event) uuid = generate_uuid('E') outcome = self.get_outcome() if verbose: print('outcome: %s' % outcome) print('----------------') # TODO: add configurable names to systems and sessions return Example(self.scenario, uuid, self.events, outcome, uuid, None)
def read_rotten_reviews(cls, path): titles = [] genres = [] templates = [] movies = json.load(open(path, "r")) for i, row in enumerate(movies): id_ = i titles.append({'movie_id': id_, 'title': row['title']}) for tag, utterances in row['templates'].items(): templates.extend([{ 'id': generate_uuid('T'), 'movie_id': id_, 'template': x, 'tag': 'inform-{}'.format(tag), 'source': row['source'], 'context_tag': 'ask' } for x in utterances if len(x.split()) < 28]) return titles, genres, templates
def retrieve_candidates(self, dialogue, json_dict=False): ''' dialogue: a Dialogue object json_dict: if True, return a list of dictionary containing kb, context etc.; otherwise just a list of candidates corresponding to each turn. NOTE: candidates are only available to 'agent speaking' turns. return a candidate list for each 'decoding' turn. ''' prev_turns = [] prev_roles = [] category = dialogue.kb.facts['item']['Category'] title = dialogue.kb.facts['item']['Title'] role = dialogue.role results = [] for turn_id, (agent, turn, role) in enumerate( izip(dialogue.agents, dialogue.token_turns, dialogue.roles)): if agent != dialogue.agent: candidates = None else: candidates = self.search(role, category, title, prev_turns) if json_dict: r = { 'exid': generate_uuid('E'), 'uuid': dialogue.uuid, 'role': dialogue.role, 'kb': dialogue.kb.to_dict(), 'agent': agent, 'turn_id': turn_id, 'prev_turns': list(prev_turns), 'prev_roles': list(prev_roles), 'target': turn, 'candidates': candidates, } if self.slot_detector: r['kb_context'] = self.slot_detector.get_context_tokens( dialogue.kb) else: r = candidates results.append(r) prev_turns.append(turn) prev_roles.append(role) return results
parser.add_argument('--schema-path') parser.add_argument( '--scenario-ints-file', help= 'Path to the file containing 6 integers per line that describes the scenario' ) parser.add_argument('--output', help='Path to the output JSON scenario file') args = parser.parse_args() schema = Schema(args.schema_path) scenarios = [] with open(args.scenario_ints_file) as fin: kbs = [] names = ['book', 'hat', 'ball'] for line in fin: ints = [int(x) for x in line.strip().split()] kb = KB.from_ints(schema.attributes, names, ints) kbs.append(kb) if len(kbs) == 2: if kbs[0].item_counts != kbs[1].item_counts: del kbs[0] continue assert kbs[0].item_counts == kbs[1].item_counts scenario = Scenario(generate_uuid("FB"), schema.attributes, kbs) scenarios.append(scenario) kbs = [] scenario_db = ScenarioDB(scenarios) write_json(scenario_db.to_dict(), args.output)