def __setitem__(self, k, v): if k in self.__fields: super(Selector, self).__setitem__(U(k), U(v)) super(Selector, self).__setitem__(self.__mask, self[self.__mask] | self.__fields[k][0]) else: raise ReferenceError("%s is not allowed." % k)
def _respond_to(self, state, last_user_utt_dict, last_bot_utt, user_utt_dict): del last_user_utt_dict # unused state.rounds += 1 question = user_utt_dict['corefed_utt'] if state.sentences == None: return state, 'Adrian, give me an article!', 0.0 if self.was_i_last(state.my_previous_answer, last_bot_utt): state.position += 1 if state.position >= len(state.sentences): return state, 'I have no more to say. Maybe we should finish now?', 0.01 val, txt = state.sentences[state.position] val = 0.9 * val + 0.1 * trigger(question) val *= 0.95**state.position # discount factor state.my_previous_answer = txt # Look for curse words if [ t for t in tokenizer.tokenize(txt.lower(), correct_spelling=False) if t in profanity.badwords ]: val = 0.0 response = U(brainy_smurf_intro() + ' ' + txt) state.my_last_text = response state.my_last_score = val return state, response, val
def async_respond_to_queued_utts(session_key, user_utt, talker_names): user_utt = U(user_utt) failsafe_user_utt = { 'raw_utt': user_utt, 'spelled_utt': user_utt, 'spelled_tags': [], 'corefed_utt': user_utt, 'corefed_tags': [] } failsafe_bot_response = { 'talker_name': "failsafe", 'utt': u'Sorry, could you say that again, please :)', 'score': -1.0, 'confidence': -1.0, 'talker_weight': 1.0 } pipe = db.pipeline() pipe.set(last_bot_utt_key(session_key), AsyncBot.none_pickle, nx=True) pipe.set(new_user_utt_key(session_key), AsyncBot.none_pickle, nx=True) pipe.rename(new_user_utt_key(session_key), last_user_utt_key(session_key)) pipe.set(new_user_utt_key(session_key), pickle.dumps(failsafe_user_utt, -1)) pipe.delete(new_bot_utts_key(session_key)) pipe.zadd(new_bot_utts_key(session_key), pickle.dumps(failsafe_bot_response, -1), -failsafe_bot_response['score']) pipe.delete(new_bot_followups_key(session_key)) pipe.execute() responders = [ async_talkers[tn] for tn in talker_names if not getattr(async_talkers[tn].klass._respond_to, '_no_op', False) ] follow_uppers = [ async_talkers[tn] for tn in talker_names if not getattr(async_talkers[tn].klass.follow_up, '_no_op', False) ] sa = dict(immutable=True) if config.celery_timeouts: sa['soft_time_limit'] = config.talker_respond_timeout sa['time_limit'] = config.talker_respond_timeout + 2 job = ( AsyncBot.preprocessor.preprocess.signature( (user_utt, session_key), **sa) | celery.group( t.respond_to.signature((session_key, ), **sa) for t in responders) | # add noop because of https://github.com/celery/celery/issues/3585 AsyncBot.noop.si(session_key)) if follow_uppers: job = (job | celery.group( t.follow_up.signature((session_key, ), **sa) for t in follow_uppers)) job = job | AsyncBot.combine_responses_and_follow_ups.signature( (session_key, ), **sa) return job.delay(time_limit=3)
def spell_sentence(self, sent): tokens = tokenize(sent, correct_spelling=False) tokens = [self.spell_token(t) for t in tokens] ret = U(detokenize(tokens)) if config.debug: print("Spelling '%s' with ew=%s gives '%s'" % (sent, self.extra_words, ret)) return ret
def dump(self, filename=None, compressed=True, pretty=True): '''dump device window and pull to local file.''' content = self.server.jsonrpc.dumpWindowHierarchy(compressed, None) if filename: with open(filename, "wb") as f: f.write(content.encode("utf-8")) if pretty and "\n " not in content: xml_text = xml.dom.minidom.parseString(content.encode("utf-8")) content = U(xml_text.toprettyxml(indent=' ')) return content
def main(): global TASKS # Append re-executions TASKS += [t.get_re_execution() for t in TASKS] # Initialization gama = [t for t in TASKS if t.X == Criticality.HI] cee = [t for t in TASKS if t.X == Criticality.LO] U_HI_LO = U(TASKS, Criticality.HI, Criticality.LO) U_HI_HI = U(TASKS, Criticality.HI, Criticality.HI) U_LO_LO = U(TASKS, Criticality.LO, Criticality.LO) cee = sorted([t for t in cee if t.TYPE == TaskType.PRIMARY], key=lambda t: t.u(Criticality.LO)) \ + sorted([t for t in cee if t.TYPE == TaskType.RE_EXECUTION], key=lambda t: t.u(Criticality.LO)) x1 = U_HI_LO / (1 - U_LO_LO) x2 = (1 - U_HI_HI) / U_LO_LO x = x2 if x2 < x1: raise AlgorithmException(f"Not Schedulable: x2({x2}) < x1({x1})") else: while len(cee) > 1: u_LO = cee[0].u(Criticality.LO) U_HI_LO += u_LO U_HI_HI += u_LO U_LO_LO -= u_LO x1 = U_HI_LO / (1 - U_LO_LO) x2 = (1 - U_HI_HI) / U_LO_LO if x1 <= x2: gama.append(cee[0]) x = x2 cee.pop(0) else: return x return x
def async_set_article(session_key, article, talker_names): article = U(article) # set a failsafe version of the article db.set(article_key(session_key), pickle.dumps({ 'text': article, 'corefed_text': article }, -1)) talkers = [ async_talkers[tn] for tn in talker_names if not getattr(async_talkers[tn].klass.set_article, '_no_op', False) ] sa = dict(immutable=True) if config.celery_timeouts: sa['soft_time_limit'] = config.talker_article_timeout sa['time_limit'] = config.talker_article_timeout + 2 job = (AsyncBot.preprocessor.set_article.signature( (article, session_key), **sa) | celery.group( t.set_article.signature((session_key, ), **sa) for t in talkers)) return job.delay()
def lower(s): return U(s).lower()
def set_article(self, state, article_dict): article = U(article_dict['text']) doc_zero = word_tokenize(article) article_lower = article.lower() doc_zero = [w.lower().encode('utf8') for w in doc_zero] titles = self.find_phrases(doc_zero) length_for_phrases = {} for t in titles: L = t.split() if len(L) > 1: print 'FOUND PHRASE: ', t length_for_phrases[t] = len(L) else: idf = self.TA.get_idf(t.lower()) if idf > 7.5: print 'FOUND PHRASE: ', t, idf length_for_phrases[t] = 1 print term_score = self.TA.idf_term_score(doc_zero) new_query = [] for i, (score, term) in enumerate(term_score): if i <= 5 or score >= 12.0 and not set(term) <= digits: new_query.append(term) print 'QUERYING SIMPLE-WIKI:', new_query res = self.TA.find_document_zero(new_query).items() S = [] position = 0 score_for_title = dd(lambda: 0) for t, search_score in sorted(res, key=lambda x: -x[1])[:12]: score_for_title[t] = 0.7 * search_score for t, length in length_for_phrases.items(): if length == 1: sc = 0.3 elif length == 2: sc = 0.7 elif length == 3: sc = 1.0 else: sc = 1.4 score_for_title[t] += 0.6 * sc for t, search_score in score_for_title.items(): print 'TITLE:', search_score, t, t in self.sentences if t in self.sentences: first = True for s in self.sentences[t]: A = document_score(t, article_lower, search_score, position, True) B = self.sentence_score(doc_zero, s, t) if first and t in length_for_phrases: B += 0.2 S.append(((1.5 * A + B) / 2.5, s)) #print ' ', s first = False position += 1 S.sort() S.reverse() print for v, s in S[:3]: print 'BRAINY SENTENCE:', s, v print 'TOTAL NUMBER OF BRAINY SENTENCES:', len(S) print state.sentences = S return state
def lower_string(s): return U(s).lower().encode('utf8')
def _respond_to(self, state, last_user_utt_dict, last_bot_utt, user_utt_dict): del last_user_utt_dict # unused question = user_utt_dict['corefed_utt'] original_question = question question = question.strip() # Precalculate score modifiers q_bonus = question_bonus(original_question) if USE_IDF_SCALING: question_tags = user_utt_dict['corefed_tags'] score_idf_bonus = idf_score_modifier(question_tags) print('SWT idf_score: ', score_idf_bonus) if is_definition(question.lower()): def_bonus = DEFINITION_BONUS else: def_bonus = 0.0 question = delete_def_prefix(question.lower()) wiktionary_query = question # This doesn't do very much, vast majority of these scenarios # is solved by idf scaling. if ((question in greetings or question in popular) and random.random() < GREETING_PROBABILITY): self.defult_answer_update_state(state) return state, U(self.dont_want + question), 0.01 query_words = word_tokenize(lower_string(question)) iwaslast = self.was_i_last(state.previous_answer, last_bot_utt) # If SWT talked, remember what it said if iwaslast: self.TA.ban(state.previous_title) state.what_i_was_defining[state.previous_word] += 1 print('Increasing', state.previous_word) # Check if user chose one of the followup topics if iwaslast: chosen_continuation = self.follow_up_continuation( state, query_words) else: chosen_continuation = '' # If no followup was chosen, search Wiki for a phrase to define if not chosen_continuation: vr = self.TA.query(query_words) if not vr: response_value = 0 to_define = None else: response_value, to_define = vr response_value = max(response_value, 0.)**0.5 # response_value is now approximately in [0,1] response_value /= 4.0 response_value = min(response_value, 1.) response_value *= (1 - DEFINITION_BONUS - 2 * FOLLOW_UP_BONUS) continuation_multiplier = 1.0 # Otherwise define chosen followup else: response_value = 1 - 2 * FOLLOW_UP_BONUS to_define = chosen_continuation continuation_multiplier = 4.0 def apply_score_mods(sc): if USE_IDF_SCALING: sc *= score_idf_bonus sc += q_bonus sc *= continuation_multiplier return sc # Continue only if SWT has something to talk about if to_define is None: self.defult_answer_update_state(state) return state, U(self.def_resp), 0 state.previous_title = to_define txt = get_definition(to_define) # Probably not needed, but I'm not sure. It doesn't hurt -- Maciek if not txt: self.defult_answer_update_state(state) return state, U(strange_topic(to_define)), 0.1 if has_pronoun(question): response_value -= PRONOUN_PENALTY # Multiplier based on how many important words from the query # appear in the answer. query_words_set = set(query_words) answer_words_set = set( word_tokenize(lower_string(txt + ' ' + to_define))) total_mult = ( (0.1 + self.TA.idf_sum(query_words_set & answer_words_set)) / (0.1 + self.TA.idf_sum(query_words_set))) response_value *= total_mult**0.5 print('First response value=', response_value) resp_key = title_key(lower_string(to_define)) state.followers.add(resp_key) # If the match is good enough, continue with it and try to find # followup topics. if response_value > 0.55: new_topics = follow_ups(resp_key) # Otherwise try to get wiktionary definition else: new_topics = [] wiktionary_answer = get_wiktionary_definition(wiktionary_query) if wiktionary_answer: wiktionary_answer = U(wiktionary_answer) state.previous_word = wiktionary_query state.previous_answer = wiktionary_answer num_defs = state.what_i_was_defining[wiktionary_query] value = (0.9 + def_bonus) * (WIKTIONARY_DISCOUNT**num_defs) if len(original_question.split()) == 1: length = len(original_question) if length <= 3: value *= 0.5 elif length <= 5: value *= 0.7 return state, wiktionary_answer, apply_score_mods(value) else: state.previous_word = None # Currently no followups are allowed after Wiktionary answer # If base answer is good enough and we have some followups, add them if new_topics and total_mult > FOLLOW_UP_THR: candidate_pairs = [ (val, title) for (val, title) in new_topics[1:] if (':' not in title and title not in state.followers)][:2] subjects = [title for (val, title) in candidate_pairs] val = sum([val * FOLLOW_UP_BONUS for (val, title) in candidate_pairs]) state.followers.update(subjects) if len(subjects) == 1: # quite impossible txt += ' You will probably be interested in ' + subjects[0] elif len(subjects) > 1: txt += ' ' + follow_up_text(subjects[0], subjects[1]) response_value += val state.previous_followup = [key_title(s) for s in subjects] else: state.previous_followup = None txt = U(txt) state.previous_answer = txt # modify and normalize score response_value += def_bonus response_value = min(1., max(0., response_value)) return state, txt, apply_score_mods(response_value)
import matplotlib.pyplot as plt import numpy as np from utils import U if __name__ == "__main__": x = np.linspace(-1.5, 1.5, 1000) # plt.plot(x, Ur(x, 1, 0.2, 1)) # plt.show() t = np.linspace(0, 10, 1000) # plt.plot(t, f(t, 1.0)) # plt.show() X, T = np.meshgrid(x, t) plt.contourf(X, T, U(X, T, alpha=0.2, tau=1.0, flashing=True), levels=100) plt.colorbar() plt.show()