def __init__(self, channel=None, from_ts=None, to_ts=None, agents=None, statuses=None, languages=None, group_by='agent', plot_by='time', plot_type=None, no_transform=False, **kwargs): self.channel = channel self.from_ts = Timeslot(from_ts).timeslot self.to_ts = Timeslot(to_ts or from_ts).timeslot self.agents = self.ensure_agents(agents, group_by, channel) self.statuses = self.ensure_statuses(statuses) self.languages = map(get_lang_id, languages or []) if group_by not in ( 'time', 'agent', 'lang'): # Just in case we get parameter from UI, ignore it group_by = None self.group_by = group_by self.plot_by = plot_by self.plot_type = plot_type self.no_transform = no_transform
def __get_conversations(data): "preparing query params and performing first bulk query to get the conversations" assert data.get('level') in ("hour", "day"), data.get('level') channel_ts_key = "channel_ts_day" if data.get('level') == "day" else "channel_ts_hour" channel_ts_lower_bound = make_channel_ts( data.get('channel_id'), Timeslot(data.get('from'), data.get('level'))) channel_ts_upper_bound = make_channel_ts( data.get('channel_id'), Timeslot(data.get('to'), data.get('level'))) query = { channel_ts_key+"__lte": to_binary(channel_ts_upper_bound), channel_ts_key+"__gte": to_binary(channel_ts_lower_bound), "is_closed": True} if data.get('categories'): categories_param = [] for cat in data.get('categories'): if isinstance(cat, int): categories_param.append(cat) elif isinstance(cat, (str, unicode)): categories_param.append(ConversationQualityTrends.get_category_code(cat)) else: raise Exception("Wrong type for category param; value: %s, type: %s", cat, type(cat)) query["quality__in"] = categories_param conversations = (Conversation.objects(**query) .limit(data.get('limit')) .skip(data.get('offset'))) if 'time' == data.get('sort_by'): conversations = conversations.sort(**{'last_modified': 1}) return conversations
def postprocess_params(self, params): r = params if 'channel_id' in r: r['channel'] = get_channels(self.user, r['channel_id']) set_languages_param(r) if 'from' in r and 'to' in r: from_date = r['from'] to_date = r['to'] or from_date from_dt, to_dt = parse_date_interval(from_date, to_date) r['from_ts'] = Timeslot(from_dt, r['level']) r['to_ts'] = Timeslot(to_dt, r['level']) r['agents'] = get_agents(self.user, r['agents'] or []) r['statuses'] = get_statuses(r['statuses'], r['plot_type']) if r['sentiments'] is not None: assert r[ 'intentions'] is None, 'intentions and sentiments cannot be set together' r['intentions'] = translate_sentiments_to_intentions( r['sentiments']) # for some reports we show only problem posts if is_problem(r['plot_type']): r['intentions'] = [SATYPE_NAME_TO_ID_MAP['problem']] # -- cleanup -- del r['channel_id'] r.pop('from', None) r.pop('to', None) del r['sentiments'] del r['level'] return params
def get_prev_timeslot_range(from_ts, to_ts): from_tsp = from_ts.timestamp to_tsp = to_ts.timestamp ONE_DAY_SEC = 24 * 60 * 60 delta = to_tsp - from_tsp + ONE_DAY_SEC return (Timeslot.from_timestamp(from_tsp - delta, level=from_ts.level), Timeslot.from_timestamp(to_tsp - delta, level=to_ts.level))
def transform_data(self, data, features): """ Transform data we got from mongodb on data we can plot in the UI based on the features list. """ level = Timeslot(self.from_ts).level assert level == Timeslot(self.to_ts).level if self.plot_by == 'time': results = self.transform_time_based_plot(data, features, level) elif self.plot_by == 'distribution': results = self.transform_distribution_plot(data, features, level) return {"ok": True, "level": level, "list": results.values()}
def by_time_span(self, channel=None, parent_topic=None, intentions=None, statuses=None, agents=None, languages=None, from_ts=None, to_ts=None, limit=100): # Use the aggregation framework to resolve the counts: # match on channel + slot + hashed_parents [+ status [+ intention_type ]] # group on topic, sum(leaf or node count?) # sort(count, -1) # limit(100) F = ChannelHotTopics.F from_ts = Timeslot(from_ts).timeslot to_ts = Timeslot(to_ts or from_ts).timeslot time_range = list(gen_timeslots(from_ts, to_ts, closed_range=False)) assert len(time_range) <= 7, "Max allowed range is 7 days, got %s %s" % (len(time_range), time_range) if len(time_range) == 1: time_query = {F("time_slot"): time_range[0]} else: time_query = {F("time_slot"): {"$in": time_range}} channel_num = get_channel_num(channel) if parent_topic is None: parents = [] else: parents = get_topic_hash(parent_topic) intention_ids = set(intentions or [ALL_INTENTIONS_ID]) intention_ids = map(get_intention_id, intention_ids) statuses = set(statuses or SpeechActMap.STATUS_NAME_MAP) statuses = map(get_status_code, statuses) languages = map(get_lang_id, languages or []) match_query_base = { F("channel_num") : channel_num, F("status") : {"$in" : statuses}, F("hashed_parents") : parents, } match_query_base.update(time_query) agent_ids = [a.agent_id for a in (agents or [])] or [ALL_AGENTS] match_query_filters = { "es.at": {"$in": agent_ids}, "es.in": {"$in": intention_ids} } match_query_filters.update(make_lang_query(languages)) return self.execute_pipeline(match_query_base, match_query_filters, limit)
def test_pack_unpack_stats_id(self): original = ( 1234, # channel_num 987654, # topic_hash 2, # status code 1122334 # timeslot ) st_id = pack_stats_id(*original) unpacked = unpack_stats_id(st_id) self.assertEqual(original, unpacked) ts = Timeslot('2013-04-22 21:40') original = ( self.channel, # channel "Hello World", # topic 'accepted', # status ts # timeslot ) st_id = pack_stats_id(*original) unpacked = unpack_stats_id(st_id) self.assertEqual(original[0].counter, unpacked[0]) self.assertEqual(get_topic_hash(original[1]), unpacked[1]) self.assertEqual(get_status_code(original[2]), unpacked[2]) self.assertEqual(original[3].timeslot, unpacked[3])
def _get_stats(self, closing_time, level, category=None): time_slot = Timeslot(closing_time, level) data = {"time_slot": time_slot.timeslot} if category is not None: category = ConversationQualityTrends.get_category_code(category) data["category"] = category stats = [x for x in ConversationQualityTrends.objects(**data)] return stats
def test_purge_none(self): TWO_DAYS_AGO = now() - timedelta(days=2) self._make_laptops_and_icecream(TWO_DAYS_AGO) stats = purge_stats(self.channel) last_purged = stats["last_purged"] days = stats["purge_days"] months = stats["purge_months"] self.channel.reload() self.assertEqual( datetime_to_timeslot(self.channel.last_purged, 'hour'), datetime_to_timeslot(last_purged, 'hour')) # Should have purged over 15 days for time slots since we never urged before self.assertEqual(len(days), 15) # Months purged depends on how far in we are to the month when we run the test self.assertTrue(len(months) in [2, 3]) import solariat_bottle.utils.purging class MockLocaltime(object): tm_mday = 6 solariat_bottle.utils.purging.localtime = MockLocaltime stats = purge_stats(self.channel) last_purged = stats["last_purged"] days = stats["purge_days"] months = stats["purge_months"] self.assertEqual(len(days), 1) self.assertEqual(days[0], decode_timeslot(Timeslot(level='day').timeslot)) self.assertEqual(len(months), 0) class MockLocaltime(object): tm_mday = 8 solariat_bottle.utils.purging.localtime = MockLocaltime stats = purge_stats(self.channel) last_purged = stats["last_purged"] days = stats["purge_days"] months = stats["purge_months"] self.assertEqual(len(days), 1) self.assertEqual(len(months), 1) self.assertEqual(months[0], decode_timeslot(Timeslot(level='month').timeslot))
def get_stats(self, channel, agents): statuses = [ SpeechActMap.POTENTIAL, SpeechActMap.ACTIONABLE, SpeechActMap.ACTUAL, SpeechActMap.REJECTED ] stats = ChannelTopicTrends.objects.by_time_span(channel=channel, from_ts=Timeslot(0), to_ts=Timeslot(None), statuses=statuses, group_by='status', plot_by='distribution', plot_type='topics', agents=agents, no_transform=True) result = {} for status in statuses: result.setdefault(status, 0) for s in stats: result[int(s['_id']['grp'])] = s['count'] return result
def test_select_by_time_span(self): content = "I need a mac laptop" post = self._create_db_post(content) intention_title = post.speech_acts[0]['intention_type'] intention_id = get_sa_type_id(intention_title) leafs = ["mac laptop"] nodes = leafs + ["laptop"] for level in ('hour', 'day'): for topic in leafs: res = ChannelTopicTrends.objects.by_time_span( channel=self.channel, topic_pairs=[[topic, True]], from_ts=Timeslot(level=level)) self.assertEqual(len(res), 1) embed = res[0].filter(is_leaf=True, intention=int(intention_id))[0] self.assertEqual(embed.topic_count, 1) embed = res[0].filter(is_leaf=True, intention=int(ALL_INTENTIONS.oid))[0] self.assertEqual(embed.topic_count, 1) for topic in nodes: res = ChannelTopicTrends.objects.by_time_span( channel=self.channel, topic_pairs=[[topic, False]], from_ts=Timeslot(level=level)) self.assertEqual(len(res), 1) embed = res[0].filter(is_leaf=False, intention=int(intention_id))[0] self.assertEqual(embed.topic_count, 1) embed = res[0].filter(is_leaf=False, intention=int(ALL_INTENTIONS.oid))[0] self.assertEqual(embed.topic_count, 1)
def postprocess_params(self, params): params = super(TrendsView, self).postprocess_params(params) get_pair = lambda x: (x['topic'], x['topic_type'] != 'node') to_ts = params['to_ts'] date_now = now() if to_ts.timestamp_ms > datetime_to_timestamp_ms(date_now): params['to_ts'] = Timeslot(date_now, to_ts.level) params['topic_pairs'] = map(get_pair, params['topics']) del params['topics'] return params
def test_select_by_time_span_2(self): ''' Create multiple posts and make sure the slots for terms get aggregated. ''' content = "I need a mac laptop" leafs = ["mac laptop"] nodes = leafs + ["laptop"] N = 5 for i in range(N): self._create_db_post(content) time.sleep(0.01) for level in ('hour', 'day'): for topic in leafs: stats = ChannelTopicTrends.objects.by_time_span( channel=self.channel, topic_pairs=[[topic, True]], from_ts=Timeslot(level=level), to_ts=Timeslot(level=level)) self.assertEqual(len(stats), 1) embed_stat = stats[0].filter(intention=int(ALL_INTENTIONS.oid), is_leaf=True) self.assertEqual(embed_stat[0].topic_count, N) for topic in nodes: stats = ChannelTopicTrends.objects.by_time_span( channel=self.channel, topic_pairs=[[topic, False]], from_ts=Timeslot(level=level), to_ts=Timeslot(level=level)) self.assertEqual(len(stats), 1) embed_stat = stats[0].filter(intention=int(ALL_INTENTIONS.oid), is_leaf=False) self.assertEqual(embed_stat[0].topic_count, N)
def setUp(self): super(ConversationTest, self).setUp() #settings.DEBUG_STAT_UPDATE = False self.start_date = now() self.i = self.sc.inbound_channel self.o = self.sc.outbound_channel self.sc.add_username('@test') # Create 2 Smart Tags, for different use keywords self.laptop_tag = self._create_smart_tag(self.i, 'Laptops Tag', status='Active', keywords=['laptop']) self.display_tag = self._create_smart_tag(self.i, 'Other Tag', status='Active', keywords=['display']) self.from_ts_hour = Timeslot(point=self.start_date, level='hour')
def _check_account_volume(user, account): ''' Handles the email warnings for volume thresholding. Returns a boolean flag to indicate that the monthly volume threshold has been exceeded. ''' if not account.package or account.package.name == "Internal": return False if account.is_threshold_surpassed_sent: return True from solariat_bottle.db.account import (account_stats, VOLUME_NOTIFICATION_THRESHOLD, THRESHOLD_WARNING, THRESHOLD_SURPASSED_WARNING) volume_limit = account.package.volume month_start, month_end = Timeslot(level='month').interval posts = account_stats(account, user, start_date=month_start, end_date=month_end) number_posts = posts.get('number_of_posts') warning_limit = account.volume_warning_limit surpassed_limit = account.volume_surpassed_limit send_warning = False if number_posts >= warning_limit and number_posts < surpassed_limit: if not account.is_threshold_warning_sent: # Send warning email send_warning = True percentage = str(VOLUME_NOTIFICATION_THRESHOLD["Warning"]) + "%" warning = THRESHOLD_WARNING elif number_posts >= surpassed_limit: if not account.is_threshold_surpassed_sent: # Send surpassed email send_warning = True percentage = str(VOLUME_NOTIFICATION_THRESHOLD["Surpassed"]) + "%" warning = THRESHOLD_SURPASSED_WARNING if send_warning: from solariat_bottle.utils.mailer import send_account_posts_limit_warning account.set_threshold_warning(warning) for admin in account.admins: send_account_posts_limit_warning(admin, percentage, volume_limit) return False
def _json_trial(user, item, stats_by_account=None, with_stats=False): result = { "account_name": item.name or "Unnamed Trial", "account_id": str(item.id), "start_date": js_ts(item.start_date), "end_date": js_ts(item.end_date), "created_at": js_ts(item.created), "status": item.status } if stats_by_account: result["stats"] = stats_by_account[item] elif with_stats: month_start, month_end = Timeslot(level='month').interval result["stats"] = account_stats(item, user, start_date=month_start, end_date=month_end) return result
def _json_account(acct, user=None, with_stats=False, start_date=None, end_date=None, cache=None): if cache is None: cache = {} if acct: if (session.get('sf_oauthToken', False) and acct.access_token is not None and acct.account_type == 'Salesforce'): is_sf_auth = True else: is_sf_auth = False package = "Internal" if acct.package is not None: package = acct.package.name csm = acct.customer_success_manager if csm is not None: csm = csm.email adm = {'first': None, 'last': None, 'email': None} try: # Note: Taking the first element might not be best if 'admins' in cache: admins = cache['admins'] else: admins = [ admin_user for admin_user in acct.admins if not admin_user.is_staff ] admin = admins[0] except IndexError: # only staff are admins pass else: adm['first'] = admin.first_name adm['last'] = admin.last_name adm['email'] = admin.email a_dict = { 'id': str(acct.id), 'name': acct.name, 'channels_count': acct.get_current_channels(status__ne='Archived').count(), 'account_type': acct.account_type, 'package': package, 'created_at': datetime_to_timestamp_ms(acct.created), 'is_current': user and user.current_account and user.current_account.id == acct.id, 'is_admin': user and (acct.can_edit(user) or user.is_superuser), 'is_super': user and user.is_superuser, 'is_staff': user and user.is_staff, 'is_analyst': user and user.is_analyst, 'is_only_agent': user and user.is_only_agent, 'signature': user and user.signature_suffix, 'is_sf_auth': is_sf_auth, 'end_date': acct.end_date and datetime_to_timestamp_ms(acct.end_date), 'configured_apps': acct.available_apps.keys(), 'available_apps': CONFIGURABLE_APPS.keys(), 'selected_app': acct.selected_app, 'customer_success_manager': csm, 'notes': acct.notes, 'admin': adm, 'is_active': acct.is_active, 'status': acct.status, 'monthly_volume': 0, 'is_locked': acct.is_locked, 'updated_at': datetime_to_timestamp_ms(acct.updated_at) if acct.updated_at else None, 'recovery_days': acct.recovery_days, 'event_processing_lock': acct.event_processing_lock, } if 'users_count' in cache: a_dict['users_count'] = cache['users_count'] else: a_dict['users_count'] = len( [u for u in acct.get_users() if not u.is_system]) if 'all_users_count' in cache: a_dict['all_users_count'] = cache['all_users_count'] else: a_dict['all_users_count'] = len( [u for u in acct.get_all_users() if not u.is_system]) if user and user.is_admin: a_dict['gse_api_key'] = acct.gse_api_key if with_stats and user: a_dict['stats'] = account_stats(acct, user, start_date, end_date) today = dt.datetime.now() start_of_month = dt.datetime(today.year, today.month, 1) a_dict['monthly_volume'] = account_stats(acct, user, start_of_month, today) today_start, today_end = Timeslot(level='day').interval a_dict['daily_volume'] = account_stats(acct, user, today_start, today_end) a_dict[ 'daily_volume_notification_emails'] = acct.daily_post_volume_notification.alert_emails return a_dict return None
def by_time_span(self, channel=None, from_ts=None, to_ts=None, topic_pairs=None, intentions=None, statuses=None, agents=None, languages=None, group_by='topic', plot_by='time', plot_type=None, no_transform=False): """ :param channel: can be a string or a sequence :param from_ts: starting timeslot :param to_ts: end timeslot :param group_by: the type of grouping we are doing for aggregation :param topic_pairs: list of pairs (<topic:str>, <is_leaf:bool>) :param statuses: list of <status:int|str> :param agents: list of <User>, where each user should have .agent_id != 0 :param languages: list of language codes or ids :param group_by: <str:"topic"|"intention"|"status"|"agent"> :returns: stats by time span """ agents = self.preprocess_agents(agents, group_by, channel) if statuses: statuses = is_iterable(statuses) and statuses or [statuses] statuses = map(get_status_code, statuses) else: statuses = SpeechActMap.STATUS_NAME_MAP.keys() intention_ids = map(get_intention_id, intentions or []) or [ALL_INTENTIONS_INT] topic_pairs = topic_pairs or [[ALL_TOPICS, False]] languages = map(get_lang_id, languages or []) from_ts = Timeslot(from_ts).timeslot to_ts = Timeslot(to_ts or from_ts).timeslot or_query = [] for (topic, _), status in product(topic_pairs, statuses): # channel can be a string or a sequence if isinstance(channel, seq_types): for c in channel: from_id, to_id = self.get_id_intervals( c, from_ts, to_ts, topic, status) or_query.append({"_id": {"$gte": from_id, "$lte": to_id}}) else: from_id, to_id = self.get_id_intervals(channel, from_ts, to_ts, topic, status) or_query.append({"_id": {"$gte": from_id, "$lte": to_id}}) if len(or_query) == 1: indexed_match_query = or_query[0] else: indexed_match_query = {"$or": or_query} initial_pipeline = [{"$match": indexed_match_query}] match_query = {} if plot_type: match_query = { "$and": [ self.filter_topics(topic_pairs), self.construct_filter_query(intention_ids, statuses, agents, languages) ] } pipeline = self.assemble_pipeline(initial_pipeline, match_query, plot_type, plot_by, group_by) res = self.execute_pipeline(pipeline) if not res['ok']: error_msg = "Aggregate error=%s" % res #LOGGER.error("%s pipeline=%s", error_msg, pformat(pipeline)) return {'ok': False, 'error': error_msg} features = { 'agent': [(u.agent_id, u) for u in (agents or [])], 'intention': intention_ids, 'topic': topic_pairs, 'status': statuses, 'lang': make_lang_features(languages), 'time': None }[group_by] return self.postprocess_results(res, pipeline, no_transform, plot_type, from_ts, to_ts, group_by, plot_by, features)
def by_time_span(self, channel=None, from_ts=None, to_ts=None, agents=None, statuses=None, languages=None, group_by='agent', plot_by='time', plot_type=None, no_transform=False): """ :param channel: can be a string or a sequence :param from_ts: starting timeslot :param to_ts: end timeslot :param group_by: the type of grouping we are doing for aggregation :returns: stats by time span """ agents = self.preprocess_agents(agents, group_by, channel) if statuses: statuses = is_iterable(statuses) and statuses or [statuses] statuses = map(get_status_code, statuses) else: statuses = SpeechActMap.STATUS_NAME_MAP.keys() from_ts = Timeslot(from_ts).timeslot to_ts = Timeslot(to_ts or from_ts).timeslot or_query = [] for status in statuses: # channel can be a string or a sequence if isinstance(channel, seq_types): for c in channel: from_id, to_id = self.get_id_intervals( c, status, from_ts, to_ts) or_query.append({"_id": {"$gte": from_id, "$lte": to_id}}) else: from_id, to_id = self.get_id_intervals(channel, status, from_ts, to_ts) or_query.append({"_id": {"$gte": from_id, "$lte": to_id}}) if len(or_query) == 1: indexed_match_query = or_query[0] else: indexed_match_query = {"$or": or_query} initial_pipeline = [{"$match": indexed_match_query}] match_query = {} if plot_type: match_query = self.construct_filter_query(statuses, agents, languages) pipeline = self.assemble_pipeline(initial_pipeline, match_query, plot_type, plot_by, group_by) res = self.execute_pipeline(pipeline) if group_by is None: features = None else: features = { 'agent': [(u.agent_id, u) for u in (agents or [])], 'lang': make_lang_features(languages), 'time': None }[group_by] return self.postprocess_results(res, pipeline, no_transform, plot_type, from_ts, to_ts, group_by, plot_by, features)
def get_term_stats(term, level): stats = ChannelTopicTrends.objects.by_time_span( self.channel, topic_pairs=[[term, False]], from_ts=Timeslot(level=level)) return tuple(stats)
def _get_stats(self, level): stats = ChannelTrends.objects.by_time_span( self.channel, from_ts=Timeslot(level=level)) return tuple(stats)
def transform(data, from_ts=None, to_ts=None, group_by='topic', plot_by='time', plot_type='topics', features=None): """ Transforms aggregation data to plot data """ def group_by_timeslot_label(data): by_timeslot_label = defaultdict(dict) for item in data: time_slot = item['_id'].get('ts', 0) label = item['_id'].get('grp', 'count') by_timeslot_label[time_slot][label] = item return by_timeslot_label def _get_count(stats_data, stat_type='count'): return stats_data.get(stat_type, 0) def get_feature_key(feature): if group_by in ('topic', 'agent', 'lang'): try: return feature[0] except (TypeError, IndexError): return feature elif group_by in ('intention', 'status'): return int(feature) return 'count' def to_client_tz_offset(js_timestamp, tz_offset): if tz_offset: js_timestamp -= 1000.0 * tz_offset * 60 return js_timestamp def get_time_data(groups, y_axis): total_counts = defaultdict(int) total_items = defaultdict(int) data = defaultdict(list) for slot in gen_timeslots(from_ts, to_ts): timestamp = timeslot_to_timestamp_ms(slot) features_data = groups.get(slot, {}) for feature in y_axis: feature_key = get_feature_key(feature) if features_data.get(feature_key): count = _get_count(features_data[feature_key]) total_counts[feature_key] += count total_items[feature_key] += 1 data[feature_key].append([timestamp, count]) else: data[feature_key].append([timestamp, 0]) if plot_type == 'response-time': # return average as result result_counts = defaultdict(float) for key, value in total_counts.iteritems(): if total_items.get(key): result_counts[key] = round(value / total_items[key], 2) else: result_counts[key] = 0 else: result_counts = total_counts return data, result_counts results = {} level = Timeslot(from_ts).level assert level == Timeslot(to_ts).level if plot_by == 'time': groups = group_by_timeslot_label(data) y_axis = features or ['count'] data, counts = get_time_data(groups, y_axis) for f in y_axis: feature = get_feature_key(f) if not counts.get(feature): continue data_series = { "label": translate_label(f, group_by), "data": data.get(feature, []), "level": level, "count": counts.get(feature, 0) } if group_by == 'topic': data_series['topic_type'] = f[1] and 'leaf' or 'node' results[feature] = data_series elif plot_by == 'distribution': groups = group_by_timeslot_label(data)[0] y_axis = features or groups.keys() idx = 0 for f in y_axis: feature = get_feature_key(f) idx += 1 if feature not in groups: continue count = _get_count(groups[feature]) data_series = { "label": translate_label(f, group_by), "data": [[idx * 2, count]] } if group_by == 'topic': data_series['topic_type'] = f[1] and 'leaf' or 'node' results[feature] = data_series return {"ok": True, "level": level, "list": results.values()}
def test_topic_counts(self): base_filters = dict( channel=self.channel, from_ts=Timeslot(self.start_date, self.level), to_ts=Timeslot(self.end_date, self.level), topic_pairs=[('laptop', True), ('display', True)], intentions=['likes', 'needs', 'recommendation', 'problem'], statuses=[0, 1, 2, 3], plot_type='topics', no_transform=True) # Expected list of dicts # For time line # {'_id': {'grp': <group_by>, 'ts': <timestamp>}, 'count': 1} # # For distribution # {'_id': {'grp': <group_by>}, 'count': <total_count>} # plot_by = time # group_by = topic data = ChannelTopicTrends.objects.by_time_span(group_by='topic', plot_by='time', **base_filters) # verify time_slots self.assertEqual(set(map(lambda x: x['_id']['ts'], data)), set(self.time_slots)) self._assert_time(data, ['laptop', 'display']) # plot_by = distribution # group_by = topic data = ChannelTopicTrends.objects.by_time_span(group_by='topic', plot_by='distribution', **base_filters) self._assert_distribution(data, ['laptop', 'display']) # plot_by = time # group_by = intention data = ChannelTopicTrends.objects.by_time_span(group_by='intention', plot_by='time', **base_filters) # verify time_slots self.assertEqual(set(map(lambda x: x['_id']['ts'], data)), set(self.time_slots)) self._assert_time(data, [need, problem]) # plot_by = distribution # group_by = intention data = ChannelTopicTrends.objects.by_time_span(group_by='intention', plot_by='distribution', **base_filters) self._assert_distribution(data, [need, problem]) # plot_by = time # group_by = status data = ChannelTopicTrends.objects.by_time_span(group_by='status', plot_by='time', **base_filters) status = 1 # verify time_slots self.assertEqual(set(map(lambda x: x['_id']['ts'], data)), set(self.time_slots)) self._assert_time(data, [status]) # plot_by = distribution # group_by = status data = ChannelTopicTrends.objects.by_time_span(group_by='status', plot_by='distribution', **base_filters) self._assert_distribution(data, [status]) #test __ALL_topics__ case base_filters.pop('topic_pairs') # plot_by = time # group_by = topic data = ChannelTopicTrends.objects.by_time_span(group_by='topic', plot_by='time', **base_filters) # verify time_slots self.assertEqual(set(map(lambda x: x['_id']['ts'], data)), set(self.time_slots)) self._assert_time(data, [ALL_TOPICS]) # plot_by = distribution # group_by = topic data = ChannelTopicTrends.objects.by_time_span(group_by='topic', plot_by='distribution', **base_filters) self._assert_distribution(data, [ALL_TOPICS]) # plot_by = time # group_by = intention data = ChannelTopicTrends.objects.by_time_span(group_by='intention', plot_by='time', **base_filters) # verify time_slots self.assertEqual(set(map(lambda x: x['_id']['ts'], data)), set(self.time_slots)) self._assert_time(data, [need, problem]) # plot_by = distribution # group_by = intention data = ChannelTopicTrends.objects.by_time_span(group_by='intention', plot_by='distribution', **base_filters) self._assert_distribution(data, [need, problem]) # plot_by = time # group_by = status data = ChannelTopicTrends.objects.by_time_span(group_by='status', plot_by='time', **base_filters) status = 1 # verify time_slots self.assertEqual(set(map(lambda x: x['_id']['ts'], data)), set(self.time_slots)) self._assert_time(data, [status]) # plot_by = distribution # group_by = status data = ChannelTopicTrends.objects.by_time_span(group_by='status', plot_by='distribution', **base_filters) self._assert_distribution(data, [status])
def more_like_post(post, channel): """ Returns a queryset of similar posts in a given channels. Similarity determined by list of topics and intentions of the initial post. Note that we are looking for posts that are similar, but with opposite status, since we want to re-lable """ from solariat_bottle.db.post.base import Post from solariat_bottle.db.speech_act import SpeechActMap from solariat_bottle.db.channel.base import Channel from solariat_bottle.db.conversation import Conversation from solariat.utils.timeslot import Timeslot, DURATION_DAY topics = [] intention_ids = [] channel = Channel.objects.ensure_channels([channel])[0] assignment = post.get_assignment(channel) if channel.is_smart_tag: # for smart tags lookup similar posts in parent channel parent_channel = Channel.objects.get(channel.parent_channel) status = [SpeechActMap.POTENTIAL, SpeechActMap.ACTIONABLE, SpeechActMap.ACTUAL, SpeechActMap.REJECTED] else: parent_channel = channel status = [SpeechActMap.POTENTIAL] if assignment in SpeechActMap.ASSIGNED: ''' Postitive assignment could cause a more precise classification of a Potential post and could revert the assignment for Rejected posts ''' status.append(SpeechActMap.REJECTED) elif assignment in {'rejected', 'discarded'}: ''' Conversely, may reject potential posts and may cause a reversion of prior allocation for Actionable ''' status.append(SpeechActMap.ACTIONABLE) else: raise AppException("An internal state is not expected: %s. Please contact support for assistance." % assignment) for sa in post.speech_acts: topics.extend(sa['intention_topics']) intention_ids.append(sa['intention_type_id']) # The basic post lookup that just searches for the latest objects res, more_posts_available = Post.objects.by_time_point( parent_channel, ['__ALL__'], from_ts = Timeslot(post.created_at-DURATION_DAY), to_ts = Timeslot(post.created_at+timedelta(hours=1)), status = status, intention = intention_ids, languages = [post.language], limit = 10) res = set(res) if (channel.is_smart_tag): # Part of new re-labeling. If tag for a post is rejected, we should # go through all posts from the post conversation and through first # RESPONSE_DEPTH_FACTOR responses containing the tag service_channel = get_service_channel(channel) if service_channel: conversations = Conversation.objects.lookup_conversations(service_channel, [post]) if len(conversations) == 1: # First extend with all other posts from this conversation that have that tag # assigned to them res |= set([p for p in Post.objects(id__in=list(conversations[0].posts)) if (str(p.id) != str(post.id) and str(channel.id) in p.tag_assignments)]) # Now go through the first RESPONSE_DEPTH_FACTOR responses which have that tag assigned elif (not channel.is_smart_tag and SpeechActMap.STATUS_MAP[post.get_assignment(channel)] in [SpeechActMap.ACTIONABLE, SpeechActMap.REJECTED]): # In case we reject a post, go through all the posts for the first RESPONSE_DEPTH_FACTOR responses from # the same service channel channels = [channel] if channel.parent_channel is not None: service_channel = Channel.objects.get(id=channel.parent_channel) channels.append(service_channel) channel_filter = [ c.id for c in channels ] channel_filter_refs = [DBRef('Channel', ch) for ch in channel_filter] if SpeechActMap.STATUS_MAP[post.get_assignment(channel)] == SpeechActMap.REJECTED: target_status = [SpeechActMap.POTENTIAL, SpeechActMap.ACTIONABLE] else: target_status = [SpeechActMap.POTENTIAL, SpeechActMap.REJECTED] return list(res)