def lookup_history(self, event, lookback_window): actor_num, _ = unpack_event_id(event.id) id_lower_bound = pack_event_id(actor_num, utc(event._created - timedelta(seconds=lookback_window))) id_upper_bound = pack_event_id(actor_num, utc(event._created)) event_sequence = self.find(id__lte=id_upper_bound, id__gte=id_lower_bound)[:] return event_sequence
def events_for_actor(self, start, end, actor_num): id_lower_bound = pack_event_id(actor_num, utc(start)) id_upper_bound = pack_event_id(actor_num, utc(end)) if id_lower_bound == id_upper_bound: res = self.find(id=id_upper_bound) else: res = self.find(id__lte=id_upper_bound, id__gte=id_lower_bound) return res
def construct_feature_space(self, event, features_metadata=None): actor_num, _ = unpack_event_id(event.id) id_lower_bound = pack_event_id(actor_num, utc(event._created - timedelta(seconds=self.lookback_window))) id_upper_bound = pack_event_id(actor_num, utc(event._created)) event_sequence = Event.objects(id__lte=id_upper_bound, id__gte=id_lower_bound)[:] vector_space = [] for event in event_sequence: event_vector = self.vectorize(event, features_metadata) if event_vector is not None: vector_space.append(event_vector) return vector_space
def gen_id(cls, is_inbound, actor_id, _created, in_reply_to_native_id, parent_event=None): actor_num = cls.get_actor(True, actor_id).actor_num packed = pack_event_id(actor_num, _created) return packed
def gen_id(cls, account, actor_id, _created, in_reply_to_native_id, parent_event=None): CustomerProfile = account.get_customer_profile_class() actor_num = CustomerProfile.objects.get(id=actor_id).actor_num packed = pack_event_id(actor_num, _created) return packed
def _get_id_query(from_dt, to_dt, model): # although USER_NUM_WIDTH is defined as 30 in utils/id_encoder.py, value greater or equal to 20 will raise # OverflowError: MongoDB can only handle up to 8-byte ints USER_NUM_WIDTH = 20 min_actor_num = 0 max_actor_num = (1 << USER_NUM_WIDTH) - 1 from_dt = from_dt.replace(tzinfo=pytz.UTC) to_dt = to_dt.replace(tzinfo=pytz.UTC) min_id = pack_event_id(min_actor_num, from_dt) max_id = pack_event_id(max_actor_num, to_dt) query = { '_id': { '$gte': min_id, '$lte': max_id }, '_t': model.__name__, } return query
def create_by_user(self, user, **kw): post_lang = get_language(kw) kw['lang'] = post_lang if 'content' not in kw: kw['content'] = 'No verbatim provided' assert kw.get('case_number'), kw kw['is_inbound'] = True kw['safe_create'] = True # We need to override posts for NPS, # so we need to check if post exist, # if post exist let remove it and re-create if kw.get('actor_id'): CustomerProfile = user.account.get_customer_profile_class() actor_num = CustomerProfile.objects.get( kw.get('actor_id')).actor_num elif kw.get('user_profile'): # TODO: [gsejop] create anonymous CustomerProfile? # actor_num = kw['user_profile'].customer_profile.actor_num actor_num = kw['user_profile'].actor_num else: actor_num = 0 if kw['user_profile']: kw['profile_data'] = kw['user_profile'].data nps_event_id = pack_event_id(actor_num, kw['_created']) try: nps_post = self.get(id=nps_event_id) raise Exception('NPSOutcome with nps_event_id: %s exists already' % nps_event_id) except NPSOutcome.DoesNotExist: pass try: nps_post = NPSOutcome.get_by_native_id(kw['native_id']) raise Exception('NPSOutcome with native_id: %s exists already' % kw['native_id']) except NPSOutcome.DoesNotExist: pass normalize_post_params(user, kw) post = super(NPSOutcomeManager, self).create_by_user(user, **kw) # self._postprocess_new_post(user, post, sync) # _set_channel_and_tag_assignments(post) # post.compute_journey_information() # why we needed this? PostManager.create_by_user already did this return post
def gen_id(cls, is_inbound, actor_id, _created, in_reply_to_native_id, parent_event=None): # Read The actor. This can result in multiple reads across different profile classes # which is very expensive. Need to cache this or find a more efficient concept that # does not require a read. TODO: pass actor_num directly actor_num = cls.get_actor(is_inbound, actor_id).actor_num # This is required so that we can efficiently query an event sequence with a range # query and a customer id. To do that we must figure out if there is a parent event # that is inbound. Only need to worry about this case if this event is outbound! if in_reply_to_native_id and not is_inbound: try: parent_event = Event.get_by_native_id(in_reply_to_native_id) except cls.DoesNotExist: pass else: actor_num = parent_event.actor.actor_num elif parent_event: assert isinstance(parent_event.actor, (UserProfile, DynamicImportedProfile)), parent_event.actor actor_num = parent_event.actor.actor_num # Finally, the id is an actor number and a time stamp, and will always be a Customer. return pack_event_id(actor_num, _created)
def process_event_batch(account_id, batch_size): start_time = datetime.now() # TODO: Once it works, break this up in specific sub-calls to make code more readable. account = Account.objects.get(account_id) CustomerProfile = account.get_customer_profile_class() AgentProfile = account.get_agent_profile_class() # TODO: account_id should be used for account specific collections customer_event_map = defaultdict(list) journey_event_map = defaultdict(list) customer_profile_map = dict() agent_profile_map = dict() journey_type_map = dict() num_to_id_customer = dict() customer_id_to_num = dict() for journey_type in JourneyType.objects.find(account_id=account_id): journey_type_map[journey_type.display_name] = journey_type agent_ids = set() # TODO: After account specific collection is done this should work just fine / uncomment # event_batch = Event.objects.find(_was_processed=False).sort(_created=1)[:batch_size] event_batch = Event.objects.find( channels__in=[c.id for c in account.get_current_channels()], _was_processed=False).sort(_created=1)[:batch_size] if not event_batch: print "No new events found" return for event in event_batch: actor_num, _ = unpack_event_id(event.id) agent_id = None if not event.is_inbound: agent_id = event.actor_id agent_ids.add(agent_id) customer_event_map[actor_num].append((event, agent_id)) all_customers = CustomerProfile.objects.find(actor_num__in=customer_event_map.keys())[:] for customer in all_customers: customer_profile_map[customer.actor_num] = customer num_to_id_customer[customer.actor_num] = customer.id customer_id_to_num[customer.id] = customer.actor_num all_active_journeys = CustomerJourney.objects.find(account_id=account.id, customer_id__in=num_to_id_customer.values(), status=JourneyStageType.IN_PROGRESS)[:] event_sequence_query = [] for journey in all_active_journeys: journey._event_sequence = [] journey_event_map[journey.customer_id].append(journey) for agent in journey.agent_ids: agent_ids.add(agent) actor_num = customer_id_to_num[journey.customer_id] id_lower_bound = pack_event_id(actor_num, utc(journey.first_event_date)) id_upper_bound = pack_event_id(actor_num, utc(journey.last_event_date)) event_sequence_query.append({'_id': {'$gte': id_lower_bound, '$lte': id_upper_bound}}) actor_id_events = defaultdict(list) if event_sequence_query: all_required_events = Event.objects.find(**{'$or': event_sequence_query})[:] for event in sorted(all_required_events, key=lambda x: x.created_at): actor_num, _ = unpack_event_id(event.id) customer_id = num_to_id_customer[actor_num] for journey in journey_event_map[customer_id]: if utc(journey.first_event_date) <= utc(event.created_at) <= utc(journey.last_event_date): journey._event_sequence.append(event) actor_id_events[customer_id].append(event) all_agents = AgentProfile.objects.find(id__in=agent_ids)[:] for agent in all_agents: agent_profile_map[agent.id] = agent print "Finished loading all the required data in " + str(datetime.now() - start_time) start_time = datetime.now() # All ongoing journeys for this customers are considered. For all of the customers that don't have any active # journeys we need to figure out what new type to start. If for customer_num, customer_events in customer_event_map.iteritems(): # TODO: If we need to, this would be a point where we can split based on customer id if customer_num not in customer_profile_map: continue # Events from different account. Will be fixed by account specific collections customer = customer_profile_map[customer_num] for (event, agent) in customer_events: event._was_processed = True actor_num, _ = unpack_event_id(event.id) customer_id = num_to_id_customer[actor_num] journey_candidates = journey_event_map[customer.id] direct_mappings = dict() for mapping in event.journey_mapping: journey_type_name, journey_stage_name = mapping.split('__') direct_mappings[journey_type_name] = journey_stage_name for journey_type in journey_type_map.values(): found_journey_stage = None if journey_type.display_name in direct_mappings: found_journey_stage = [stage for stage in journey_type.available_stages if stage.display_name == direct_mappings[journey_type.display_name]][0] else: for journey_stage in journey_type.available_stages: if journey_stage.evaluate_event(event, customer, actor_id_events.get(customer_id, [])): found_journey_stage = journey_stage break found_match = False # First step is to try and find it in existing journeys for journey in journey_candidates: # All the currently in progress or completed journeys that are matched to same stage if journey.journey_type_id == journey_type.id: # and (journey.status == JourneyStageType.IN_PROGRESS or # journey.f_current_stage == found_journey_stage): found_match = True journey.agents = [agent_profile_map[a_id] for a_id in journey.agent_ids] journey.customer_profile = customer journey.current_event = event journey.journey_type = journey_type journey.process_event(event, customer, agent_profile_map[agent] if agent else None, found_journey_stage) journey.event_sequence = journey.event_sequence + [event] if found_journey_stage: # If we didn't find any match in existing journeys, create a new one. We create it in memory # So as to not do any extra mongo calls. if not found_match: journey = CustomerJourney(customer_id=customer.id, journey_type_id=journey_type.id, first_event_date=event.created_at, account_id=account_id, status=JourneyStageType.IN_PROGRESS, node_sequence=[], node_sequence_agr=[], journey_attributes_schema=journey_type.journey_attributes_schema) journey._event_sequence = [] journey_candidates.append(journey) journey.agents = [agent_profile_map[a_id] for a_id in journey.agent_ids] journey.customer_profile = customer journey.current_event = event journey.journey_type = journey_type journey.process_event(event, customer, agent_profile_map[agent] if agent else None, found_journey_stage) journey.event_sequence = journey.event_sequence + [event] # TODO: As it is, it will still be one call per journey journey_type.journeys_num += 1 print "Finished computing journey info in " + str(datetime.now() - start_time) start_time = datetime.now() # Upsert all journeys, all customer profiles, all agent profiles, all events if all_agents: bulk_agents = AgentProfile.objects.coll.initialize_unordered_bulk_op() for agent in all_agents: bulk_agents.find({"_id": agent.id}).upsert().update({'$set': agent.data}) bulk_agents.execute() if all_customers: bulk_customers = CustomerProfile.objects.coll.initialize_unordered_bulk_op() for customer in all_customers: bulk_customers.find({"_id": customer.id}).upsert().update({'$set': customer.data}) bulk_customers.execute() if event_batch: bulk_events = Event.objects.coll.initialize_unordered_bulk_op() for event in event_batch: bulk_events.find({"_id": event.id}).upsert().update({'$set': event.data}) bulk_events.execute() if journey_event_map.values(): bulk_journeys = CustomerJourney.objects.coll.initialize_unordered_bulk_op() have_journeys = False for customer_journeys in journey_event_map.values(): for journey in customer_journeys: have_journeys = True if journey.id: bulk_journeys.find({"_id": journey.id}).upsert().update({'$set': journey.data}) else: bulk_journeys.insert(journey.data) if have_journeys: bulk_journeys.execute() else: print "No journeys to upsert" print "Finished all the bulk inserts in " + str(datetime.now() - start_time) for journey_type in journey_type_map.values(): journey_type.compute_cardinalities() journey_type.update(journeys_num=journey_type.journeys_num)
def create_by_user(self, user, **kw): safe_create = kw.pop('safe_create', False) if not safe_create: raise AppException("Use db.post.utils.factory_by_user instead") add_to_queue = kw.pop('add_to_queue', False) sync = kw.pop('sync', False) # We might consider dropping this entirely self.doc_class.patch_post_kw(kw) # handling extra_fields chat_data = kw.pop('chat_data', None) kw.setdefault("extra_fields", {}) if chat_data: kw["extra_fields"].update({"chat": chat_data}) kw["extra_fields"].setdefault("chat", {}) session_id = kw.get( "session_id", None) or kw["extra_fields"]["chat"].get("session_id") if not session_id: session_id = self.gen_session_id() kw["session_id"] = session_id chat_created_at = chat_data.get('created_at', None) if chat_data else dict() if chat_created_at: kw['_created'] = utc(parse_datetime(chat_created_at)) assert 'actor_id' in kw, "No 'actor_id' provided with chat message, could not infer it based on " + str( kw) assert 'is_inbound' in kw, "No 'is_inbound' provided with chat message, could not infer it based on " + str( kw) CustomerProfile = user.account.get_customer_profile_class() AgentProfile = user.account.get_agent_profile_class() if 'user_profile' not in kw: # If we have customer id but no specific profile, try to find it in our system if kw['is_inbound']: customer_or_agent = CustomerProfile.objects.get(kw['actor_id']) else: customer_or_agent = AgentProfile.objects.get(kw['actor_id']) profile = customer_or_agent.get_profile_of_type(ChatProfile) if not profile: profile = ChatProfile.anonymous_profile(platform='Chat') kw['user_profile'] = profile if not kw['is_inbound']: # We know it's outbound post, we need to figure out actor id based on parent from chat session try: parent = self.doc_class.objects.find( session_id=session_id, is_inbound=True).sort(_created=-1).limit(1)[:][0] # if we can't figure it out, let's put untracked post as a parent except IndexError: parent = UntrackedChatPost() kw['_id'] = pack_event_id(parent.actor.actor_num, kw['_created']) else: actor_num = self.doc_class.get_actor(True, kw['actor_id']).actor_num kw['_id'] = pack_event_id(actor_num, kw['_created']) # We know that it's inbound post, but may be the first post in conversation was outbound. # If that's the case, then this outbound post was fired by UntrackedProfile # Now we can encode id using current CustomerProfile instead of UntrackedProfile outbount_events = self.doc_class.objects.find( session_id=session_id, is_inbound=False)[:] for e in outbount_events: parent_actor_num, dt = unpack_event_id(e.id) if parent_actor_num == 0: e.delete() e.id = pack_event_id(actor_num, dt) e.save() kw['force_create'] = True lang_data = kw.pop('lang', Language(('en', 1))) # creation post = self.create(**kw) # postprocess_new_post(user, post) - failing for now, something with tag assignments assert post.session_id, "ChatPost should have chat session_id" self._set_post_lang(post, lang_data) postprocess_new_post(user, post, add_to_queue) get_service_channel(post.channel).post_received(post) return post
def customer_history(self, customer, limit=10): raise RuntimeError("call Event.customer_history") assert isinstance(customer, DynamicImportedProfile) id_upper_bound = pack_event_id(customer.actor_num, now()) id_lower_bound = pack_event_id(customer.actor_num, now() - timedelta(seconds=120)) return self.find(id__lte=id_upper_bound, id__gte=id_lower_bound).limit(limit).sort(id=-1)[:limit]