def warmup_nlp(): """ Make sure all heavy caches are populated before we fork to benefit from the Copy-on-Write kernel optimization """ logger.info("warming up NLP code") # import here to keep io_workers from pulling this into memory also from solariat_nlp import (extract_intentions, classify_content) global _extract_intentions, _classify_content _extract_intentions = extract_intentions _classify_content = classify_content extract_intentions("Initializing and filling up the caches") extract_intentions("Processing multiple. Utterances! In one post >:)") logger.info('done')
def handle_reject(self, content): item = {'content': content, 'speech_acts': extract_intentions(content)} vec = self.make_post_vector(item) self.clf.train([vec], [0]) TextChannelFilterItem.objects.create(content=content, channel_filter=self, filter_type='rejected', vector=vec) self.save()
def handle_accept(self, content): # Call super class and get the vector item = {'content': content, 'speech_acts': extract_intentions(content)} vec = self.make_post_vector(item) self.clf.train([vec], [1]) TextChannelFilterItem.objects.create(content=content, channel_filter=self, filter_type='accepted', vector=vec) self.save()
def batch_predict(self, content_list): result = [] for content in content_list: if not self.helper.match(content, self): result.append({'text': content, 'score': 0}) else: item = { 'content': content, 'speech_acts': extract_intentions(content) } result.append({ 'text': content, 'score': self._predict_fit(item) }) return result
def create_by_user(self, user, **kw): """ :param user: GSA user whose credentials were used to create a new WebClick object :param kw: Any WebClick specific data :return: """ assert 'query' in kw, 'Missing required "query" parameter in kwargs=%s' % kw channel = kw['channels'][0] channel = channel if isinstance( channel, Channel) else Channel.objects.get(id=channel) if 'actor_id' not in kw: browser_signature = kw.get('browser_signature') browser_cookie = kw.get('browser_cookie') user_id = kw.get('user_id') session_id = kw.get('session_id') if not (session_id or user_id or browser_cookie or browser_signature): session_id = ANONYMOUS_FAQ_ID profile = WebProfile.objects.create_by_user( user, # account=channel.account, browser_signature=browser_signature, browser_cookie=browser_cookie, user_id=user_id, session_id=session_id) kw['actor_id'] = profile.id if channel.account and APP_JOURNEYS in channel.account.available_apps: CustomerProfile = channel.account.get_customer_profile_class() customer_profile = CustomerProfile.objects.create( account_id=channel.account.id) customer_profile.add_profile(profile) kw['actor_id'] = customer_profile.id if 'safe_create' in kw: kw.pop('safe_create') kw['is_inbound'] = True try: lang = detect_prob(kw['query'])[0] except (DetectorSetupError, LanguageInconclusiveError): lang = Language(('en', 1.0)) kw['speech_acts'] = extract_intentions(kw['query'], lang=lang.lang) for field in kw.keys(): if field not in FAQQueryEvent.fields: del kw[field] event = FAQEventManager.create(self, **kw) return event
def predict(self, content): if not self.helper.match(content, self): return 0 item = {'content': content, 'speech_acts': extract_intentions(content)} return self._predict_fit(item)