def process_event(self, e, kb): ''' Tokenize, link entities ''' if e.action == 'message': # Lower, tokenize, link entity entity_tokens = self.lexicon.link_entity(tokenize(e.data), kb=kb, scale=True, price_clip=4.) if entity_tokens: return entity_tokens else: return None elif e.action == 'offer': data = e.data['price'] if data is None: return None price = PriceScaler._scale_price(kb, data) entity_tokens = [markers.OFFER, self.price_to_entity(price)] return entity_tokens elif e.action == 'quit': entity_tokens = [markers.QUIT] return entity_tokens elif e.action == 'accept': entity_tokens = [markers.ACCEPT] return entity_tokens elif e.action == 'reject': entity_tokens = [markers.REJECT] return entity_tokens else: raise ValueError('Unknown event action.')
def process_event(self, e, kb): ''' Tokenize, link entities ''' from cocoa.core.event import Event # Process semi-event if not isinstance(e, Event): if len(e) < 2 or e[1] is None: return e else: # print('e is:', e) e[1] = PriceScaler._scale_price(kb, e[1]) return e if e.action == 'message': # Lower, tokenize, link entity entity_tokens = self.lexicon.link_entity(tokenize(e.data), kb=kb, scale=True, price_clip=4.) if entity_tokens: return entity_tokens else: return None elif e.action == 'offer': data = e.data['price'] if data is None: return None price = PriceScaler._scale_price(kb, data) entity_tokens = [markers.OFFER, self.price_to_entity(price)] return entity_tokens elif e.action == 'quit': entity_tokens = [markers.QUIT] return entity_tokens elif e.action == 'accept': entity_tokens = [markers.ACCEPT] return entity_tokens elif e.action == 'reject': entity_tokens = [markers.REJECT] return entity_tokens else: raise ValueError('Unknown event action.')
def _entity_to_str(self, entity_token, kb): if entity_token[0] is None: return None raw_price = PriceScaler.unscale_price(kb, entity_token) if isinstance(raw_price, Entity): price = raw_price.canonical.value else: price = raw_price.value human_readable_price = "${}".format(price) return human_readable_price
def get_price_trend(cls, price_tracker, chat, agent=None): def _normalize_price(seen_price): return (float(seller_target) - float(seen_price)) / ( float(seller_target) - float(buyer_target)) scenario = NegotiationScenario.from_dict(None, chat['scenario']) # print chat['scenario'] kbs = scenario.kbs roles = { kbs[0].facts['personal']['Role']: 0, kbs[1].facts['personal']['Role']: 1 } buyer_target = kbs[roles[utils.BUYER]].facts['personal']['Target'] seller_target = kbs[roles[utils.SELLER]].facts['personal']['Target'] prices = [] for e in chat['events']: if e['action'] == 'message': if agent is not None and e['agent'] != agent: continue raw_tokens = tokenize(e['data']) # link entity linked_tokens = price_tracker.link_entity(raw_tokens, kb=kbs[e['agent']]) for token in linked_tokens: if isinstance(token, Entity): try: replaced = PriceScaler.unscale_price( kbs[e['agent']], token) except OverflowError: print "Raw tokens: ", raw_tokens print "Overflow error: {:s}".format(token) print kbs[e['agent']].facts print "-------" continue norm_price = _normalize_price(replaced.canonical.value) if 0. <= norm_price <= 2.: # if the number is greater than the list price or significantly lower than the buyer's # target it's probably not a price prices.append(norm_price) # do some stuff here elif e['action'] == 'offer': norm_price = _normalize_price(e['data']['price']) if 0. <= norm_price <= 2.: prices.append(norm_price) # prices.append(e['data']['price']) # print "Chat: {:s}".format(chat['uuid']) # print "Trend:", prices return prices
def example_to_str(self, example, controller, rewards): verbose_str = [] from core.price_tracker import PriceScaler for session_id, session in enumerate(controller.sessions): bottom, top = PriceScaler.get_price_range(session.kb) s = 'Agent[{}: {}], bottom ${}, top ${}'.format( session_id, session.kb.role, bottom, top) verbose_str.append(s) strs = example.to_text() for str in strs: verbose_str.append(str) s = "reward: [0]{}\nreward: [1]{}".format(rewards[0], rewards[1]) verbose_str.append(s) return verbose_str
def lf_to_tokens(self, kb, lf): intent = lf['intent'] if intent == 'accept': intent = markers.ACCEPT elif intent == 'reject': intent = markers.REJECT elif intent == 'quit': intent = markers.QUIT elif intent == 'offer': intent = markers.OFFER tokens = [intent] if lf.get('price') is not None: p = lf['price'] price = Entity.from_elements(surface=p, value=p, type='price') tokens.append(PriceScaler.scale_price(kb, price)) return tokens
def example_to_str(self, example, controller, rewards, sid=None): verbose_str = [] from core.price_tracker import PriceScaler if sid is not None: verbose_str.append('[Scenario id: {}]'.format(sid)) for session_id, session in enumerate(controller.sessions): bottom, top = PriceScaler.get_price_range(session.kb) s = 'Agent[{}: {}], bottom ${}, top ${}'.format(session_id, session.kb.role, bottom, top) verbose_str.append(s) verbose_str.append("They are negotiating for "+session.kb.facts['item']['Category']) strs = self.example_to_text(example) for str in strs: verbose_str.append(str) s = "reward: [0]{}\nreward: [1]{}".format(rewards[0], rewards[1]) verbose_str.append(s) return verbose_str
def original_price(cls, kb, utterance): s = [ PriceScaler.unscale_price(kb, x) if is_entity(x) else x for x in utterance ] return s
def scale_price(cls, kb, utterance): return [ PriceScaler.scale_price(kb, x) if is_entity(x) else x for x in utterance ]
def get_price_number(self, entity, kb): raw_price = PriceScaler.unscale_price(kb, entity) return raw_price.canonical.value
def _entity_to_str(self, entity_token, kb): raw_price = PriceScaler.unscale_price(kb, entity_token) human_readable_price = "${}".format(raw_price.canonical.value) return human_readable_price
def learn(self, args): if args.model_type == 'reinforce': train_policy = True train_critic = False elif args.model_type == 'critic': train_policy = False train_critic = True elif args.model_type == 'tom': train_policy = False train_critic = False rewards = [None] * 2 s_rewards = [None] * 2 critic_report_stats = RLStatistics() critic_stats = RLStatistics() last_time = time.time() tensorboard_every = 1 history_train_losses = [[], []] for i in range(args.num_dialogues): # Rollout scenario = self._get_scenario() controller = self._get_controller(scenario, split='train') # print('set controller for{} {}.'.format(self.training_agent, controller)) controller.sessions[0].set_controller(controller) controller.sessions[1].set_controller(controller) example = controller.simulate(args.max_turns, verbose=args.verbose) for session_id, session in enumerate(controller.sessions): # if args.only_run != True and session_id != self.training_agent: # continue # Compute reward reward = self.get_reward(example, session) # Standardize the reward all_rewards = self.all_rewards[session_id] all_rewards.append(reward) s_reward = (reward - np.mean(all_rewards)) / max( 1e-4, np.std(all_rewards)) rewards[session_id] = reward s_rewards[session_id] = s_reward for session_id, session in enumerate(controller.sessions): # Only train one agent if session_id != self.training_agent: continue batch_iter = session.iter_batches() T = next(batch_iter) if train_policy: loss = self.update(batch_iter, reward, self.model, discount=args.discount_factor) history_train_losses[session_id].append(loss) if train_critic: stats = self.update_critic(batch_iter, reward, self.critic, discount=args.discount_factor) critic_report_stats.update(stats) critic_stats.update(stats) # print('verbose: ', args.verbose) if args.verbose: if train_policy or args.model_type == 'tom': from core.price_tracker import PriceScaler for session_id, session in enumerate(controller.sessions): bottom, top = PriceScaler.get_price_range(session.kb) print('Agent[{}: {}], bottom ${}, top ${}'.format( session_id, session.kb.role, bottom, top)) strs = example.to_text() for str in strs: print(str) print("reward: [0]{}\nreward: [1]{}".format( self.all_rewards[0][-1], self.all_rewards[1][-1])) # print("Standard reward: [0]{} [1]{}".format(s_rewards[0], s_rewards[1])) # Save logs on tensorboard if (i + 1) % tensorboard_every == 0: for j in range(2): self.writer.add_scalar( 'agent{}/reward'.format(j), np.mean(self.all_rewards[j][-tensorboard_every:]), i) if len(history_train_losses[j]) >= tensorboard_every: tmp = np.concatenate( history_train_losses[j][-tensorboard_every:], axis=0) tmp = np.mean(tmp, axis=0) self.writer.add_scalar('agent{}/total_loss'.format(j), tmp[0], i) self.writer.add_scalar('agent{}/logp_loss'.format(j), tmp[1], i) self.writer.add_scalar('agent{}/intent_loss'.format(j), tmp[2], i) self.writer.add_scalar('agent{}/price_loss'.format(j), tmp[3], i) if ((i + 1) % args.report_every) == 0: import seaborn as sns import matplotlib.pyplot as plt if args.histogram: sns.set_style('darkgrid') if train_policy: for j in range(2): print('agent={}'.format(j), end=' ') print('step:', i, end=' ') print('reward:', rewards[j], end=' ') print('scaled reward:', s_rewards[j], end=' ') print('mean reward:', np.mean(self.all_rewards[j])) if args.histogram: self.agents[ j].env.dialogue_generator.get_policyHistogram( ) if train_critic: critic_report_stats.output(i + 1, 0, 0, last_time) critic_report_stats = RLStatistics() print('-' * 10) if args.histogram: plt.show() last_time = time.time() # Save model if (i > 0 and i % 100 == 0) and not args.only_run: if train_policy: valid_stats = self.validate(args) self.drop_checkpoint( args, i, valid_stats, model_opt=self.agents[ self.training_agent].env.model_args) self.update_opponent('policy') elif train_critic: # TODO: reverse! self.drop_checkpoint( args, i, critic_stats, model_opt=self.agents[ self.training_agent].env.model_args) critic_stats = RLStatistics() else: valid_stats = self.validate(args) print('valid result: ', valid_stats.str_loss())
def get_price_number(self, entity, kb): raw_price = PriceScaler.unscale_price(kb, entity) return entity_to_value(raw_price)