Esempio n. 1
0
 def process_event(self, e, kb):
     '''
     Tokenize, link entities
     '''
     if e.action == 'message':
         # Lower, tokenize, link entity
         entity_tokens = self.lexicon.link_entity(tokenize(e.data),
                                                  kb=kb,
                                                  scale=True,
                                                  price_clip=4.)
         if entity_tokens:
             return entity_tokens
         else:
             return None
     elif e.action == 'offer':
         data = e.data['price']
         if data is None:
             return None
         price = PriceScaler._scale_price(kb, data)
         entity_tokens = [markers.OFFER, self.price_to_entity(price)]
         return entity_tokens
     elif e.action == 'quit':
         entity_tokens = [markers.QUIT]
         return entity_tokens
     elif e.action == 'accept':
         entity_tokens = [markers.ACCEPT]
         return entity_tokens
     elif e.action == 'reject':
         entity_tokens = [markers.REJECT]
         return entity_tokens
     else:
         raise ValueError('Unknown event action.')
Esempio n. 2
0
    def process_event(self, e, kb):
        '''
        Tokenize, link entities
        '''
        from cocoa.core.event import Event
        # Process semi-event
        if not isinstance(e, Event):
            if len(e) < 2 or e[1] is None:
                return e
            else:
                # print('e is:', e)
                e[1] = PriceScaler._scale_price(kb, e[1])
                return e

        if e.action == 'message':
            # Lower, tokenize, link entity
            entity_tokens = self.lexicon.link_entity(tokenize(e.data),
                                                     kb=kb,
                                                     scale=True,
                                                     price_clip=4.)
            if entity_tokens:
                return entity_tokens
            else:
                return None
        elif e.action == 'offer':
            data = e.data['price']
            if data is None:
                return None
            price = PriceScaler._scale_price(kb, data)
            entity_tokens = [markers.OFFER, self.price_to_entity(price)]
            return entity_tokens
        elif e.action == 'quit':
            entity_tokens = [markers.QUIT]
            return entity_tokens
        elif e.action == 'accept':
            entity_tokens = [markers.ACCEPT]
            return entity_tokens
        elif e.action == 'reject':
            entity_tokens = [markers.REJECT]
            return entity_tokens
        else:
            raise ValueError('Unknown event action.')
Esempio n. 3
0
    def _entity_to_str(self, entity_token, kb):
        if entity_token[0] is None:
            return None

        raw_price = PriceScaler.unscale_price(kb, entity_token)

        if isinstance(raw_price, Entity):
            price = raw_price.canonical.value
        else:
            price = raw_price.value
        human_readable_price = "${}".format(price)
        return human_readable_price
Esempio n. 4
0
    def get_price_trend(cls, price_tracker, chat, agent=None):
        def _normalize_price(seen_price):
            return (float(seller_target) - float(seen_price)) / (
                float(seller_target) - float(buyer_target))

        scenario = NegotiationScenario.from_dict(None, chat['scenario'])
        # print chat['scenario']
        kbs = scenario.kbs
        roles = {
            kbs[0].facts['personal']['Role']: 0,
            kbs[1].facts['personal']['Role']: 1
        }

        buyer_target = kbs[roles[utils.BUYER]].facts['personal']['Target']
        seller_target = kbs[roles[utils.SELLER]].facts['personal']['Target']

        prices = []
        for e in chat['events']:
            if e['action'] == 'message':
                if agent is not None and e['agent'] != agent:
                    continue
                raw_tokens = tokenize(e['data'])
                # link entity
                linked_tokens = price_tracker.link_entity(raw_tokens,
                                                          kb=kbs[e['agent']])
                for token in linked_tokens:
                    if isinstance(token, Entity):
                        try:
                            replaced = PriceScaler.unscale_price(
                                kbs[e['agent']], token)
                        except OverflowError:
                            print "Raw tokens: ", raw_tokens
                            print "Overflow error: {:s}".format(token)
                            print kbs[e['agent']].facts
                            print "-------"
                            continue
                        norm_price = _normalize_price(replaced.canonical.value)
                        if 0. <= norm_price <= 2.:
                            # if the number is greater than the list price or significantly lower than the buyer's
                            # target it's probably not a price
                            prices.append(norm_price)
                # do some stuff here
            elif e['action'] == 'offer':
                norm_price = _normalize_price(e['data']['price'])
                if 0. <= norm_price <= 2.:
                    prices.append(norm_price)
                # prices.append(e['data']['price'])

        # print "Chat: {:s}".format(chat['uuid'])
        # print "Trend:", prices

        return prices
Esempio n. 5
0
    def example_to_str(self, example, controller, rewards):
        verbose_str = []
        from core.price_tracker import PriceScaler
        for session_id, session in enumerate(controller.sessions):
            bottom, top = PriceScaler.get_price_range(session.kb)
            s = 'Agent[{}: {}], bottom ${}, top ${}'.format(
                session_id, session.kb.role, bottom, top)
            verbose_str.append(s)

        strs = example.to_text()
        for str in strs:
            verbose_str.append(str)
        s = "reward: [0]{}\nreward: [1]{}".format(rewards[0], rewards[1])
        verbose_str.append(s)
        return verbose_str
Esempio n. 6
0
 def lf_to_tokens(self, kb, lf):
     intent = lf['intent']
     if intent == 'accept':
         intent = markers.ACCEPT
     elif intent == 'reject':
         intent = markers.REJECT
     elif intent == 'quit':
         intent = markers.QUIT
     elif intent == 'offer':
         intent = markers.OFFER
     tokens = [intent]
     if lf.get('price') is not None:
         p = lf['price']
         price = Entity.from_elements(surface=p, value=p, type='price')
         tokens.append(PriceScaler.scale_price(kb, price))
     return tokens
Esempio n. 7
0
    def example_to_str(self, example, controller, rewards, sid=None):
        verbose_str = []
        from core.price_tracker import PriceScaler
        if sid is not None:
            verbose_str.append('[Scenario id: {}]'.format(sid))
        for session_id, session in enumerate(controller.sessions):
            bottom, top = PriceScaler.get_price_range(session.kb)
            s = 'Agent[{}: {}], bottom ${}, top ${}'.format(session_id, session.kb.role, bottom, top)
            verbose_str.append(s)
        verbose_str.append("They are negotiating for "+session.kb.facts['item']['Category'])

        strs = self.example_to_text(example)
        for str in strs:
            verbose_str.append(str)
        s = "reward: [0]{}\nreward: [1]{}".format(rewards[0], rewards[1])
        verbose_str.append(s)
        return verbose_str
Esempio n. 8
0
 def original_price(cls, kb, utterance):
     s = [
         PriceScaler.unscale_price(kb, x) if is_entity(x) else x
         for x in utterance
     ]
     return s
Esempio n. 9
0
 def scale_price(cls, kb, utterance):
     return [
         PriceScaler.scale_price(kb, x) if is_entity(x) else x
         for x in utterance
     ]
Esempio n. 10
0
 def get_price_number(self, entity, kb):
     raw_price = PriceScaler.unscale_price(kb, entity)
     return raw_price.canonical.value
Esempio n. 11
0
 def _entity_to_str(self, entity_token, kb):
     raw_price = PriceScaler.unscale_price(kb, entity_token)
     human_readable_price = "${}".format(raw_price.canonical.value)
     return human_readable_price
Esempio n. 12
0
    def learn(self, args):
        if args.model_type == 'reinforce':
            train_policy = True
            train_critic = False
        elif args.model_type == 'critic':
            train_policy = False
            train_critic = True
        elif args.model_type == 'tom':
            train_policy = False
            train_critic = False

        rewards = [None] * 2
        s_rewards = [None] * 2

        critic_report_stats = RLStatistics()
        critic_stats = RLStatistics()
        last_time = time.time()

        tensorboard_every = 1
        history_train_losses = [[], []]

        for i in range(args.num_dialogues):
            # Rollout
            scenario = self._get_scenario()
            controller = self._get_controller(scenario, split='train')
            # print('set controller for{} {}.'.format(self.training_agent, controller))
            controller.sessions[0].set_controller(controller)
            controller.sessions[1].set_controller(controller)
            example = controller.simulate(args.max_turns, verbose=args.verbose)

            for session_id, session in enumerate(controller.sessions):
                # if args.only_run != True and session_id != self.training_agent:
                #     continue

                # Compute reward
                reward = self.get_reward(example, session)
                # Standardize the reward
                all_rewards = self.all_rewards[session_id]
                all_rewards.append(reward)
                s_reward = (reward - np.mean(all_rewards)) / max(
                    1e-4, np.std(all_rewards))

                rewards[session_id] = reward
                s_rewards[session_id] = s_reward

            for session_id, session in enumerate(controller.sessions):
                # Only train one agent
                if session_id != self.training_agent:
                    continue

                batch_iter = session.iter_batches()
                T = next(batch_iter)

                if train_policy:
                    loss = self.update(batch_iter,
                                       reward,
                                       self.model,
                                       discount=args.discount_factor)
                    history_train_losses[session_id].append(loss)

                if train_critic:
                    stats = self.update_critic(batch_iter,
                                               reward,
                                               self.critic,
                                               discount=args.discount_factor)
                    critic_report_stats.update(stats)
                    critic_stats.update(stats)

            # print('verbose: ', args.verbose)

            if args.verbose:
                if train_policy or args.model_type == 'tom':
                    from core.price_tracker import PriceScaler
                    for session_id, session in enumerate(controller.sessions):
                        bottom, top = PriceScaler.get_price_range(session.kb)
                        print('Agent[{}: {}], bottom ${}, top ${}'.format(
                            session_id, session.kb.role, bottom, top))

                    strs = example.to_text()
                    for str in strs:
                        print(str)
                    print("reward: [0]{}\nreward: [1]{}".format(
                        self.all_rewards[0][-1], self.all_rewards[1][-1]))
                    # print("Standard reward: [0]{} [1]{}".format(s_rewards[0], s_rewards[1]))

            # Save logs on tensorboard
            if (i + 1) % tensorboard_every == 0:
                for j in range(2):
                    self.writer.add_scalar(
                        'agent{}/reward'.format(j),
                        np.mean(self.all_rewards[j][-tensorboard_every:]), i)
                    if len(history_train_losses[j]) >= tensorboard_every:
                        tmp = np.concatenate(
                            history_train_losses[j][-tensorboard_every:],
                            axis=0)
                        tmp = np.mean(tmp, axis=0)
                        self.writer.add_scalar('agent{}/total_loss'.format(j),
                                               tmp[0], i)
                        self.writer.add_scalar('agent{}/logp_loss'.format(j),
                                               tmp[1], i)
                        self.writer.add_scalar('agent{}/intent_loss'.format(j),
                                               tmp[2], i)
                        self.writer.add_scalar('agent{}/price_loss'.format(j),
                                               tmp[3], i)

            if ((i + 1) % args.report_every) == 0:
                import seaborn as sns
                import matplotlib.pyplot as plt
                if args.histogram:
                    sns.set_style('darkgrid')

                if train_policy:
                    for j in range(2):
                        print('agent={}'.format(j), end=' ')
                        print('step:', i, end=' ')
                        print('reward:', rewards[j], end=' ')
                        print('scaled reward:', s_rewards[j], end=' ')
                        print('mean reward:', np.mean(self.all_rewards[j]))
                        if args.histogram:
                            self.agents[
                                j].env.dialogue_generator.get_policyHistogram(
                                )

                if train_critic:
                    critic_report_stats.output(i + 1, 0, 0, last_time)
                    critic_report_stats = RLStatistics()

                print('-' * 10)
                if args.histogram:
                    plt.show()

                last_time = time.time()

            # Save model
            if (i > 0 and i % 100 == 0) and not args.only_run:
                if train_policy:
                    valid_stats = self.validate(args)
                    self.drop_checkpoint(
                        args,
                        i,
                        valid_stats,
                        model_opt=self.agents[
                            self.training_agent].env.model_args)
                    self.update_opponent('policy')

                elif train_critic:
                    # TODO: reverse!
                    self.drop_checkpoint(
                        args,
                        i,
                        critic_stats,
                        model_opt=self.agents[
                            self.training_agent].env.model_args)
                    critic_stats = RLStatistics()
                else:
                    valid_stats = self.validate(args)
                    print('valid result: ', valid_stats.str_loss())
Esempio n. 13
0
 def get_price_number(self, entity, kb):
     raw_price = PriceScaler.unscale_price(kb, entity)
     return entity_to_value(raw_price)