def _episode_to_range(episode_number) -> Tuple[int, int]: num_sentences = Sentence.objects.count() if num_sentences == 0: raise NoDataException start = episode_number * Configuration.get_active().episode_size end = (episode_number + 1) * Configuration.get_active().episode_size if episode_number < 0: start += num_sentences end += num_sentences return start, end
def on_message(self, sentence): assert isinstance(sentence, Sentence) logger.debug( "Received notification on new message to bot, processing...") factory = self._get_and_update_factory(sentence) context = self._create_context_from_sentence(sentence) state = State(sentence) # mapping unknown_intent -> "Sorry didn't understand that" is hard coded, to avoid confusion if sentence.intent.template.name != Configuration.get_active( ).unknown_intent.name: action_name = self.bot.query({ 'state_input': numpy.array([state.as_vector()]), 'context_input': numpy.array([context.as_matrix()]) }) else: action_name = Configuration.get_active( ).didnt_understand_intent.name human_user = User.objects.get(username=sentence.said_by) chat = Chat.objects.get(initiator=human_user, receiver=self._bot_user) sentence_value = factory.create_response(action_name) message = Message.objects.create(value=sentence_value, sent_by=self._bot_user, sent_in=chat) dialogue = sentence.said_in intent_template = IntentTemplate.objects.get(name=action_name) intent = Intent.objects.create(template=intent_template) response = Sentence.objects.create(value=sentence_value, said_in=dialogue, said_by=self._bot_user, raw_sentence=message, intent=intent, sentiment=0, created_by=self.bot.model_base_name) update_user_profile_for_single_dialogue(response.said_in) response.refresh_from_db() logger.info( 'Responded to sentence {}(intent="{}") with action "{}". Done processing message!' .format( sentence, sentence.intent.template.name if sentence.intent is not None else 'Unknown intent', action_name)) self.bot.train()
def all_transitions_from_turns( turns: List[Turn], context_length=Configuration.get_active().context_length): """ Creates transitions from a list of turns. Reverses that list of turns, so put in a list of turns with ascending "dates" of these turns. :param turns: List of Turns :param context_length: length of context for each turn :return: a list of Transitions """ transitions = [] num_turns = len(turns) turns = turns[::-1] # one slice needs to have context_length turns for the context # and one for each State s_0 (current state) as well as State s_1 (future state) slice_size = context_length + 2 for i, turn in enumerate(turns): remaining_turns = min(num_turns - i, slice_size) turns_slice = turns[i:i + remaining_turns] if len(turns_slice) >= 2: # need at least 2 states for a proper transition transition = Transition.single_transition_from_turns( turns_slice, context_length) transitions.append(transition) return transitions
def vector_from_sentence(sentence: Sentence): if sentence.intent is None: raise NoIntentError(sentence) if not Configuration.get_active().is_action_intent( sentence.intent.template.name): raise NoActionIntentError(sentence.intent, sentence) return Action.vector_from_name(sentence.intent.template.name)
def __init__(self): self._base_sentences = Configuration.get_active( ).response_templates.all() for template in self._base_sentences: template.prepare() self._context = {} self._tracking_table = { 'query.player.height': ['player'], 'query.player.information.age': ['player'], 'query.player.information.goals': ['player'], 'query.player.information.shoe': ['player'], 'query.player.news': ['content-type', 'player'], 'query.player.news.more': ['player', 'content-type'], 'userprofile.response.name': ['given-name'], 'userprofile.response.favorite_player': ['player'], 'userprofile.response.age': ['age'] } self._response_builders = { 'common.hi': self._greeting, 'common.thanks': self._thanks, 'common.how_are_you': self._how_are_you, 'response.player.news': self._content, 'response.player.information.age': self._player_age, 'response.player.information.height': self._player_height, 'response.player.information.goals': self._player_goals, 'response.player.information.shoe': self._player_shoe, 'offer.player.news': self._offer_content, } self._content_interface = SimpleContentInterface()
def _init_callbacks(self): """ Initializes callbacks needed for this model. Implement to specify callbacks used. Defaults to using only TensorBoard callback. """ self._callbacks = [ TensorBoard(log_dir=os.path.join( Configuration.get_active().log_dir, self._model.name)) ]
def get_single_context(turns, context_length: int = Configuration.get_active().context_length): """ Creates a context object from turns, useful for training :param turns: the Turns that make up the context, e.g. the turns prior to the current Sentence/State :param context_length: number of steps into the past :return: a Context object see __init__ """ assert len(turns) <= context_length states = [turn.user for turn in turns] actions = [turn.bot for turn in turns] return Context(states, actions, context_length)
def __init__(self, sentence: Sentence = None): """ creates a new state, if sentence is given, the fields will be populated with values from this sentence :param sentence: an object of type Sentence, which will be used to populate the state """ if sentence is not None: assert isinstance(sentence, Sentence) self.intent_name = sentence.intent.template.name self.intent_vector = State._intent_vector_from_sentence(sentence) try: self._intent_index = Configuration.get_active().state_index_for_name(self.intent_name) except ValueError: # not in list --> unknown intent self._intent_index = Configuration.get_active().state_index_for_name( Configuration.get_active().unknown_intent.name ) self.sentiment = float(sentence.sentiment) self.user_profile = sentence.user_profile self.user_profile_vector = State._convert_user_profile(sentence.user_profile)
def predict(self, inputs: Dict[str, numpy.ndarray]): """ Simple delegate for the keras.model.predict function :param inputs: input as dict of input_name->numpy.ndarray :return: an array of quality vectors for each input row (state/context pair), not processed, see keras.model.predict """ with self._graph.as_default(): return self._model.predict( inputs, batch_size=Configuration.get_active().batch_size)
def single_transition_from_turns( turns: List[Turn], context_length=Configuration.get_active().context_length): assert len( turns ) >= 2, 'You need at least 2 turns for a Transition (= s_0->s_1 with no context)' final_turn = turns[-1] current_turn = turns[-2] context = Context.get_single_context(turns[:-2], context_length) transition = Transition(current_turn.user, current_turn.bot, context, final_turn.user) return transition
def __init__(self, sentence: Sentence = None): """ :param sentence: sentence to get the action from :raises NoIntentError: if the given sentence has no intent :raises NoActionIntentError: if the given sentence has an intent, that cannot be interpreted as action """ self._action_vector = Action.vector_from_sentence(sentence) self._action_index = Configuration.get_active().action_index_for_name( sentence.intent.template.name) self.reward = float(sentence.reward) self.terminal = bool(sentence.terminal)
def run(self): # TODO: why? sleep(5) logger.info("Started TurnsTerminator.") logger.info("TurnsTerminator checking for unterminated sentences.") while True: last_sentence_in_dialogues = Sentence.objects.raw( 'SELECT * FROM turns_sentence GROUP BY said_in_id ORDER BY said_on DESC;' ) for sentence in last_sentence_in_dialogues: pause = timezone.now() - sentence.said_on pause = pause.seconds + pause.days * Configuration.get_active( ).seconds_per_day if pause >= Configuration.get_active( ).seconds_for_terminal and not sentence.terminal: logger.info( "Got no new sentence after {} seconds, {} has to be a terminal sentence" .format(pause, sentence.value)) sentence.terminal = True sentence.save() # Nyquist frequency, so we don't miss terminals sleep(Configuration.get_active().seconds_for_terminal / 2)
def train(self, validate=True): """Trains for a single episode :return: a bool indicating whether or not training was possible and successful. """ if not self.training_allowed: return False logger.debug('Acquiring lock...') with self._training_lock: if not self._can_sample_batch(self.episodes_seen): logger.debug('No more episodes to train on are available!') return False with self._graph.as_default(): episode_logs = {} training_loss = 0 batches_trained = 0 logger.info( 'Bot training on episode #{}'.format(self.episodes_seen + 1)) for step in range( 0, Configuration.get_active().steps_per_episode): num_samples, x, y = self._sample_batch(self.episodes_seen) if num_samples > 0: loss = self._perform_training_step((x, y)) batch_logs = {'loss': loss} self._on_batch_end(batches_trained, batch_logs) training_loss += batch_logs['loss'] batches_trained += 1 self.samples_seen += num_samples else: return False training_loss /= batches_trained episode_logs['loss'] = training_loss if validate: num_samples, x, y = self._sample_batch(self.episodes_seen) if num_samples > 0: episode_logs['val_loss'] = self._model.test_on_batch( x, y) episode_logs[ 'est_value'] = self._model.predict_on_batch(x) episode_logs['est_value'] = episode_logs[ 'est_value'].sum() / num_samples episode_logs['true_value'] = y['quality_output'].sum( ) / num_samples self.episodes_seen += 1 self._save_stats() self._save_weights() self._on_episode_end(self.episodes_seen, episode_logs) return True
def __str__(self) -> str: return '<Action {}>'.format( Configuration.get_active().action_intents[self._action_index].name)
def __init__(self): super().__init__(float(Configuration.get_active().epsilon))
def _sample_batch(self, episode_number) -> Tuple[int, Dict, Dict]: start, stop = DeepMindNoContextBot._episode_to_range(episode_number) sentences = \ [Sentence.sample_sentence_in_range(self._bot_user.username, start, stop) for _ in range(0, Configuration.get_active().batch_size)] transitions = [] for sentence in sentences: context_sentences = [] while len(context_sentences) < 4: assert sentence.said_by == self._bot_user.username context_sentences = Sentence.objects.filter( said_in=sentence.said_in).filter( said_on__lte=sentence.said_on).order_by( '-said_on' )[:Configuration.get_active().context_length * 2 + 4] context_sentences = list(reversed(context_sentences)) if len(context_sentences) < 4: # re-sample, since this sample has not enough context sentence = Sentence.sample_sentence_in_range( self._bot_user.username, start, stop) try: a1 = Action(context_sentences.pop()) s1 = State(context_sentences.pop()) turns = Turn.sentences_to_turns(context_sentences, self._bot_user) # context turns list should only be CONTEXT_LENGTH long raw_context_t1 = turns[0:Configuration.get_active(). context_length] context_t1 = Context.get_single_context( raw_context_t1, Configuration.get_active().context_length) a0 = Action(context_sentences.pop()) s0 = State(context_sentences.pop()) turns = Turn.sentences_to_turns(context_sentences, self._bot_user) context_t0 = Context.get_single_context( turns, Configuration.get_active().context_length) except IntentError as e: logger.error( 'Error occurred while processing sampled sentence {}. See below.' .format(sentence)) raise e transition = Transition(s0, a0, context_t0, s1, a1, context_t1) transitions.append(transition) if len(transitions) < Configuration.get_active().batch_size: return 0, {}, {} assert len(transitions) == Configuration.get_active().batch_size actions = [transition.action_t0 for transition in transitions] states = numpy.array( [transition.state_t0.as_vector() for transition in transitions]) contexts = numpy.array( [transition.context_t0.as_matrix() for transition in transitions]) future_states = numpy.array( [transition.state_t1.as_vector() for transition in transitions]) future_contexts = numpy.array( [transition.context_t1.as_matrix() for transition in transitions]) terminals = numpy.array( [0 if transition.terminal else 1 for transition in transitions]) rewards = numpy.array( [transition.action_t0.reward for transition in transitions]) assert future_states.shape == (Configuration.get_active( ).batch_size, ) + Configuration.get_active().state_shape assert contexts.shape == (Configuration.get_active().batch_size, ) + Configuration.get_active().context_shape assert rewards.shape == (Configuration.get_active().batch_size, ) target_quality = self._target.predict({ 'state_input': future_states, 'context_input': future_contexts }) assert target_quality.shape == ( Configuration.get_active().batch_size, Configuration.get_active().number_actions) quality_batch = target_quality.max(axis=1).flatten() quality_batch *= self.discount quality_batch *= terminals logger.debug("Future qualities are {}".format(quality_batch)) logger.debug('Rewards are {}'.format(rewards)) quality_batch = rewards + quality_batch logger.debug("Working with qualities {}".format(quality_batch)) target_quality = numpy.zeros( (Configuration.get_active().batch_size, Configuration.get_active().number_actions)) for target, action, quality in zip(target_quality, actions, quality_batch): target[action.intent_index] = quality return (len(states), { 'state_input': states, 'context_input': contexts }, { 'quality_output': target_quality })
def model_directory(self): return os.path.join(Configuration.get_active().weights_dir, self._model.name)
def __init__(self): super().__init__(float(Configuration.get_active().discount))
def _prediction_to_action_name(self, prediction: numpy.ndarray) -> str: prediction = prediction[0] return list(Configuration.get_active().action_intents.all())[ prediction.argmax()].name