def test_extract_signature(self): up = self._make_agent('*****@*****.**', '^IZ', None) content = "You don't need an appointment.\n^IZ" self.assertEqual( Post(content=content, actor_id=up.id, is_inbound=False, _native_id='1').extract_signature(), '^IZ') content = "You don't need an appointment. ^IZ" self.assertEqual( Post(content=content, actor_id=up.id, is_inbound=False, _native_id='2').extract_signature(), '^IZ')
def test_nonmatching_sentiments(self): """ Posts, based on setup: (u'I need a bike. I like Honda.', Positive), (u'Can somebody recommend a sturdy laptop?', Neutral), (u'I need an affordabl laptop. And a laptop bag', Neutral), (u'Whatever you buy, let it be an Apple laptop', Neutral), (u'I would like to have a thin and lightweight laptop.', Neutral), (u'Thank you very much!', Positive), (u"You're gonna end up with a broken laptop", Negative) """ from solariat_nlp.sentiment import extract_sentiment from solariat_bottle.db.post.base import Post print[(p.content, extract_sentiment(p.content)['sentiment']) for p in Post.objects()] trends = self.get_trends( **{ 'channel_id': str(self.channel.id), 'from': self.one_day_before_str, 'to': self.one_day_after_str, 'level': 'hour', 'topics': [{ 'topic': 'laptop bag', 'topic_type': 'leaf' }], 'sentiments': ['neutral'], 'plot_type': 'sentiment', 'group_by': 'status', }) self.assertTrue(trends) # we have some positive and negative examples
def test_select_by_time_span_3(self): past_dt = now() - relativedelta(months=1) # big enough for all levels post1 = self._create_db_post(_created=past_dt, content='i need some carrot') post2 = self._create_db_post(content='i need some carrot') self.assertEqual( Post.objects(channels__in=[self.channel.id]).count(), 2) for level in ('hour', 'day'): result = ChannelTopicTrends.objects.by_time_span( channel=self.channel, topic_pairs=[['carrot', True]], from_ts=datetime_to_timeslot(past_dt, level), to_ts=datetime_to_timeslot(None, level)) self.assertEqual(len(result), 2) result = ChannelTopicTrends.objects.by_time_span( channel=self.channel, topic_pairs=[['carrot', True]], from_ts=datetime_to_timeslot( past_dt + relativedelta(**{level + 's': 1}), level), to_ts=datetime_to_timeslot(None, level)) self.assertEqual(len(result), 1)
def su_queue_view(user, channel_id): query = {} if channel_id != 'all': try: from solariat_bottle.utils.post import get_service_channel service_channel = get_service_channel(Channel.objects.get(channel_id)) dispatch_channel = service_channel.get_outbound_channel(user) except Exception as exc: return jsonify(ok=False, channel_id=channel_id, error=str(exc)) else: channel_ids = [] if service_channel: channel_ids.append(str(service_channel.id)) if dispatch_channel: channel_ids.append(str(dispatch_channel.id)) query = dict(channel_id__in=channel_ids) limit = int(request.args.get('limit', 20)) offset = int(request.args.get('offset', 0)) from solariat_bottle.db.queue_message import QueueMessage from solariat_bottle.db.post.base import Post messages = [] for message in QueueMessage.objects(**query).limit(limit).skip(offset): post_data = post_to_data(Post(message.post_data)) post_data['message_id'] = str(message.id) post_data['reserved_until'] = str(message.reserved_until) messages.append(post_data) return jsonify(channel_id=channel_id, limit=limit, offset=offset, result=messages, total=QueueMessage.objects(**query).count())
def get_posts_by_id(post_id): """ Returns QuerySet of posts list :param post_id: antive or tango id :return: """ from solariat.db.fields import Binary from solariat_bottle.db.post.facebook import FacebookEventMap from solariat_bottle.db.post.twitter import TwitterEventMap from solariat_bottle.db.post.base import Post post_ids = [] for em in FacebookEventMap.objects(native_id=post_id): post_ids.append(em.event_id) for em in TwitterEventMap.objects(native_id=post_id): post_ids.append(em.event_id) try: post_ids.append(Binary(post_id.decode('base64'))) except Exception: pass try: post_ids.append(long(post_id)) except Exception: pass return Post.objects(id__in=post_ids)
def test_impressions(self): "Test impressions stats" pl1 = self._create_db_matchable('foo') pl2 = self._create_db_matchable('bar') pl3 = self._create_db_matchable('baz') response = self.do_post('posts', version='v1.2', channel=str(self.channel.id), content='i need a foo for bar but not baz') post_dict = response['item'] #matchables = post_dict['matchables'] response = self.do_post('postmatches', version='v1.2', post=post_dict['id'], impressions=[str(pl1.id), str(pl2.id)], rejects=[str(pl3.id)]) self.assertEqual(response['item']['rejects'][0], str(pl3.id)) time_slot = timeslot.datetime_to_timeslot(Post.objects()[0].created) response = self.do_get('channelstats', version='v1.2', channel=str(self.channel.id), time_slot=time_slot) # month stats object stats = response['list'][0] self.assertEqual(stats['number_of_impressions'], 2)
def test_api_case(self): """ Verify with a test case that when a reply post is submittted to the system via API end point that no update is made to the classifier """ self.inbound.adaptive_learning_enabled = True self.inbound.save() self.inbound.reload() original_clf_hash = sha1(self.inbound.channel_filter.clf.packed_model) token = self.get_token() dummy_id = 'dummy_id' data = { 'content': 'Test post', 'lang': 'en', 'channel': str(self.inbound.id), 'token': token, 'twitter': { 'id': dummy_id } } resp = self.client.post('/api/v2.0/posts', data=json.dumps(data), content_type='application/json', base_url='https://localhost') post_data = json.loads(resp.data) self.assertEqual(resp.status_code, 200) self.assertTrue(post_data['ok']) post = Post.objects(channels=self.inbound.id)[0] reply_data = { 'content': 'Reply post', 'lang': 'en', 'channel': str(self.outbound.id), 'token': token, 'user_profile': { 'screenname': 'random_screenname' }, 'twitter': { 'in_reply_to_status_id': dummy_id, 'id': 'reply_dummy_id' } } resp = self.client.post('/api/v2.0/posts', data=json.dumps(reply_data), content_type='application/json', base_url='https://localhost') post.reload() post_data = json.loads(resp.data) self.assertEqual(resp.status_code, 200) self.assertTrue(post_data['ok']) self.inbound.channel_filter.reload() latest_clf_hash = sha1(self.inbound.channel_filter.clf.packed_model) self.assertNotEqual(original_clf_hash.hexdigest(), latest_clf_hash.hexdigest())
def select_and_reserve(self, channel, limit=DEFAULT_LIMIT, reserve_time=DEFAULT_RESERVE_TIME): """ Query batch of messages from database and reserve it until successful pull callback """ from solariat_bottle.db.post.base import Post log_enabled = is_enabled(channel) query = { 'channel_id': str(channel), 'reserved_until': { '$lt': datetime.utcnow() } } messages = self.find(**query).limit(limit) result = [] duplicate_count = 0 queue_messages = [] salt_length = 5 batch_token = None deadline = datetime.utcnow() + timedelta(seconds=reserve_time) expired_tokens = set([]) for message in messages: if batch_token is None: batch_token = '%s%s%s' % (datetime.utcnow().__hash__(), message.id, Random().getrandbits(salt_length)) if message.batch_token: duplicate_count += 1 # If we re-added these posts, then the token has expired expired_tokens.add(message.batch_token) message.reserved_until = deadline message.batch_token = batch_token message.save() if log_enabled: queue_messages.append( Post(message.post_data).plaintext_content) result.append(message) if expired_tokens: self.coll.update({'batch_token': { '$in': list(expired_tokens) }}, {'$set': { 'batch_token': None }}, multi=True) if log_enabled: get_logger(channel).info( u"QMD: Pulling / Reserving from queue messages: %s", unicode(queue_messages)) return result, duplicate_count
def test_channel_stats(self): content = 'I need a bike. I like Honda .' # self._create_db_matchable(url='google.com', # creative='search for bike here') post = self._create_db_post(content) self.assertTrue(post.id is not None) self.assertTrue( Post.objects(channels__in=[self.channel.id]).count(), 1) for stats in get_levels(self.channel): stats.reload() self.assertEqual(stats.number_of_posts, 1) self.assertEqual(stats.feature_counts['2'], 1) self.assertEqual(stats.feature_counts['4'], 1)
def test_crud(self): post = self._create_db_post(content='I need a new moto bike', ) self.assertTrue( post.user_tag.startswith('unknown') or post.user_tag.startswith('anonymous')) count = Post.objects.find(channels=str(self.channel.id)).count() self.assertEqual(count, 1) self.assertEqual( Post.objects.get(post.id)['content'], 'I need a new moto bike') post.delete() self.assertEqual( Post.objects(channels__in=[self.channel.id]).count(), 0)
def _create_posts(self): past_created = now() - timedelta(minutes=7 * 24 * 60) post1 = self._create_db_post(_created=past_created, content='i need some carrot') past_created = now() - timedelta(minutes=7 * 24 * 60 + 10) post2 = self._create_db_post(_created=past_created, content='Where I can buy a carrot?') self._create_db_post(content='i need some carrot') self._create_db_post(content='Where I can buy a carrot?') self.assertEqual( Post.objects(channels__in=[self.channel.id]).count(), 4)
def test_post_stats(self): self.assertEqual(ChannelStats.objects().count(), 0) response = self.do_post('posts', version='v1.2', channel=str(self.channel.id), content='i need a foo for bar but not baz') # Should have allocated stats for each level self.assertEqual(ChannelStats.objects().count(), 3) post = Post.objects()[0] for stats in get_levels(self.channel, post.created): stats.reload() #print stats.to_dict() self.assertEqual(stats.number_of_posts, 1) self.assertEqual(stats.feature_counts, {'0': 1, '2': 1})
def su_print_conversation(user, conv_id): try: conv = Conversation.objects.get(long(conv_id)) except (Conversation.DoesNotExist, ValueError): return jsonify(ok=False, error='Does not exists') from solariat_bottle.db.post.base import Post data = { 'conversation_data': bson_safe(conv.data), 'posts': [] } for post in Post.objects(id__in=conv.posts): data['posts'].append(post_to_data(post)) return jsonify(data)
def clear_reserved_id_based(self, post_ids): reserver = self.find(id__in=post_ids)[:] channel_ids = [] for message in reserver: channel_ids.extend(list(message.channel_id)) if is_enabled(channel_ids): from solariat_bottle.db.post.base import Post queue_messages = [] for message in reserver: queue_messages.append( Post(message.post_data).plaintext_content) get_logger(channel_ids).info( u"QMD: Confirming / Clearing from queue messages: %s", unicode(queue_messages)) return self.remove(id__in=post_ids)
def _do_test(self, ds_bot, posts_before=0): self.assertEqual(Post.objects.count(), posts_before) if not ds_bot.is_running(): self.start_bot(ds_bot) post_data = patch_created_at(SAMPLE_VALID_DATA, now()) ds_bot.post_received(json.dumps(post_data)) self.wait_bot(ds_bot) self.assertEqual(Post.objects.count(), posts_before + 1) created_post = Post.objects().sort(_created=-1).limit(1)[0] u_p = UserProfile.objects.get(user_name='user1_solariat') # Check fields that are required on user profile self.assertDictEqual( u_p.platform_data, { u'lang': u'en', u'statuses_count': 1905, u'screen_name': u'user1_solariat', u'friends_count': 13, u'name': u'user1_solariat', u'created_at': u'Tue, 07 May 2013 19:35:50 +0000', u'profile_image_url': u'http://pbs.twimg.com/profile_images/468781442852339712/69CJihsO_normal.jpeg', u'id': 1411050992, u'followers_count': 8, u'id_str': u'1411050992', u'location': u'San Francisco', u'profile_image_url_https': u'https://pbs.twimg.com/profile_images/468781442852339712/69CJihsO_normal.jpeg', u'description': u'Teacher' }) self.assertEqual(created_post.content, post_data['data']['twitter']['text']) self.assertTrue( str(self.channel.inbound_channel.id) in created_post.channel_assignments) self.assertEqual( created_post.channel_assignments[str( self.channel.inbound_channel.id)], 'highlighted') # Check that we actually hold everything in wrapped data print(created_post.wrapped_data) print(post_data['data']) self.assertDictEqual(created_post.wrapped_data, post_data['data'])
def remove_reserved(self, batch_token): ''' Remove all records from database with provided batch_token ''' reserver = self.find(**{'batch_token': batch_token})[:] channel_ids = [] for message in reserver: channel_ids.extend(list(message.channel_id)) if is_enabled(channel_ids): from solariat_bottle.db.post.base import Post queue_messages = [] for message in reserver: queue_messages.append( Post(message.post_data).plaintext_content) get_logger(channel_ids).info( u"QMD: Confirming / Clearing from queue messages: %s", unicode(queue_messages)) return self.remove(**{'batch_token': batch_token})
def setUp(self): MainCase.setUp(self) past_id = ObjectId.from_datetime( datetime.now() - timedelta(minutes=7*24*60)) post1 = self._create_db_post( id=past_id, channel=self.channel, content = 'i need some foo') #Post.objects.insert(post1.data) past_id = ObjectId.from_datetime( datetime.now() - timedelta(minutes=7*24*60+10)) post2 = self._create_db_post( id=past_id, channel=self.channel, content='where i can find a foo?') #Post.objects.insert(post2.data) post3 = self._create_db_post( channel=self.channel, content='i need some foo') post4 = self._create_db_post( channel=self.channel, content='where i can find a foo?') post5 = self._create_db_post(channel=self.channel, content='LOL') self.assertEqual(Post.objects(channels__in=[self.channel.id]).count(), 5)
def setUp(self): UICase.setUp(self) self.login() first_date = utc(datetime(2012, 1, 1)) post1 = self._create_db_post(_created=first_date, content='i need some carrot') self.assertEqual( Post.objects(channels__in=[self.channel.id]).count(), 1) # 1 jan + 10 minutes second_date = first_date + timedelta(minutes=10) post2 = self._create_db_post(_created=second_date, content='where i can buy a carrot?') self.assertEqual( Post.objects(channels__in=[self.channel.id]).count(), 2) # 1 jan + 7 days third_date = first_date + timedelta(minutes=7 * 60 * 24) post3 = self._create_db_post(_created=third_date, content='i need some carrot') self.assertEqual( Post.objects(channels__in=[self.channel.id]).count(), 3) forth_date = third_date + timedelta(minutes=10) post4 = self._create_db_post(_created=forth_date, content='where i can buy a carrot?') self.assertEqual( Post.objects(channels__in=[self.channel.id]).count(), 4) # This will not be created, only for stats post5 = Post(channels=[self.channel.id], content='LOL', actor_id=post4.user_profile.id, is_inbound=True, _native_id='1', _created=post4._created) self.assertEqual( Post.objects(channels__in=[self.channel.id]).count(), 4) no_post_created(post5, utc(forth_date + timedelta(minutes=10))) self.now = now()
def fetch_posts(channels, start_ts, end_ts, topics, statuses, intentions, min_conf, agents, sort_by='time', limit=100, message_type=None, create_date_limit=None, languages=None): from solariat_bottle.db.post.utils import get_platform_class from solariat_bottle.db.channel.base import Channel from solariat.db.fields import BytesField # --- Preliminary range query for the core matching elements --- topics = [ t if isinstance(t, dict) else dict(topic=t, topic_type='leaf') for t in topics ] to_binary = BytesField().to_mongo match_query_base = [] for channel in channels: for status in statuses: # compute id bounds for all posts for this slot id_lower_bound = pack_speech_act_map_id(channel, status, start_ts, 0) id_upper_bound = pack_speech_act_map_id(channel, status, end_ts, BIGGEST_POST_VALUE) # add an id-constraining query assert start_ts <= end_ts assert id_upper_bound >= id_lower_bound match_query_base.append({ '_id': { "$gte": to_binary(id_lower_bound), "$lte": to_binary(id_upper_bound) } }) primary_filter = {"$or": match_query_base} # Add intention restrictions, which operate in the main fields primary_filter["ic"] = {"$gte": min_conf} if intentions: primary_filter["ii"] = {"$in": intentions} if message_type is not None: primary_filter["mtp"] = {"$in": message_type} # Constrain for agents, again, at the primary level if agents: primary_filter["at"] = {"$in": agents} if languages: from solariat_bottle.db.channel_trends import make_lang_query primary_filter = { "$and": [ primary_filter, make_lang_query(languages, SpeechActMap.language.db_field) ] } pipeline = [{"$match": primary_filter}] # Generate Secondary Filter only if we have topic constraints. topics_match_query = [] for topic in topics: if topic['topic'] != ALL_TOPICS: topics_match_query.append({ 'tt.l': topic['topic_type'] == 'leaf', 'tt.t': topic['topic'] }) if topics_match_query: pipeline.append({"$unwind": "$tt"}) if len(topics_match_query) == 1: pipeline.append({"$match": topics_match_query[0]}) else: pipeline.append({"$match": {"$or": topics_match_query}}) # First impose a limit because we cannot spend all day fetching data, and in the worst # case, the data could be huge. So this limit is selected as a reasonable case for searching # posts. We also allow the input param to over-ride this value if it exceeds it. pipeline.append({"$limit": max(10000, limit)}) # We want the data in sorted order in general. pipeline.append({"$sort": {"ca": -1}}) # Now throttle the resulst to a workable page, where specified platform = None for ch in channels: if not isinstance(ch, Channel): ch = Channel.objects.get(ch) channel_platform = ch.platform if platform and platform != channel_platform: # TODO: Is this the correct approach or should we just # return a bunch of base posts objects in this case ? raise AppException( "Trying to fetch posts over multiple platforms!") else: platform = channel_platform # Use the correct class depending on the platform we are searching for Post = get_platform_class(platform) are_more_speech_acts_fetched = True len_res_result = 0 # we start with such limit because there are # ~2 speech acts per post on average sa_limit = 2 * limit posts = set([]) # posts are created from speech acts (SA) # there may be several SAs for one post # we keep increasing `sa_limit` for the SA query until n=limit posts are fetched # or until no more SA are fetched while len(posts) < limit and are_more_speech_acts_fetched: pipeline.append({"$limit": sa_limit}) res = SpeechActMap.objects.coll.aggregate(pipeline) new_posts = Post.objects( id__in=list(set([r['pt'] for r in res['result']]))) if create_date_limit: new_posts = [ p for p in new_posts if p.created_at < create_date_limit ] posts.update(set(new_posts)) if len_res_result < len(res['result']): len_res_result = len(res['result']) sa_limit = 2 * sa_limit else: are_more_speech_acts_fetched = False # we add new limit to the pipeline in the beginning of the while loop del pipeline[-1] posts = list(posts) posts.sort(key=lambda p: p.created_at, reverse=True) # start_time = datetime.now() #LOGGER.debug("PostManager.by_time_point Aggregated and retrieved in %s sec. Result=%d", # datetime.now()-start_time, # len(posts)) #LOGGER.debug("PostManager.by_time_point Pipeline=\n%s", pprint.pformat(pipeline)) return posts
def purge_channel_outdated_posts_and_sas(channel, now_date=None, run_in_prod_mod=False): """ purges outdated posts and sas basing on CHANNEL_ENTITIES_KEEP_DAYS setting """ today_dt = now_date if now_date else utc(now()) delta = relativedelta(days=get_var("CHANNEL_ENTITIES_KEEP_DAYS")) to_dt = today_dt - delta # counting chunks CHUNK_SIZE = 100 post_number = Post.objects(channels=str(channel.id), _created__lt=to_dt).count() res = {'post_total': 0, 'sas_total': 0} if not post_number: LOGGER.info("purge_outdated_posts:: %s: no posts to purge" % channel.title) return res chunks_number = post_number / CHUNK_SIZE + 1 start_dt = datetime.now() # handling posts and sas chunk by chunk for i in range(chunks_number): offset = i * CHUNK_SIZE t0 = datetime.now() # getting posts for removal if run_in_prod_mod: post_query = Post.objects(channels=str(channel.id), _created__lt=to_dt).limit(CHUNK_SIZE) else: post_query = Post.objects( channels=str(channel.id), _created__lt=to_dt).limit(CHUNK_SIZE).skip(offset) posts = [p for p in post_query] post_query = None LOGGER.info( 'purge_outdated_posts:: %s: chunk #%s of %s chunks (%s posts_number; post query timedelta: %s', channel.title, i, chunks_number, post_number, datetime.now() - t0) post_ids = [ long(p.id) if isinstance(p.id, (str, unicode)) and p.id.isdigit() else p.id for p in posts ] if run_in_prod_mod: # perform actual removal t0 = datetime.now() sas_res = SpeechActMap.objects.coll.remove( SpeechActMap.objects.get_query(post__in=post_ids)) post_res = Post.objects.coll.remove( Post.objects.get_query(id__in=post_ids)) LOGGER.info( 'purge_outdated_posts:: %s: post removed: %s; sas removed: %s;' ' chunk #%s of %s chunks; sas and post' ' remove queries timedelta: %s', channel.title, post_res['n'], sas_res['n'], i, chunks_number, datetime.now() - t0) res['post_total'] += post_res['n'] res['sas_total'] += sas_res['n'] else: t0 = datetime.now() # getting sas for removal sas = [s for s in SpeechActMap.objects(post__in=post_ids)] LOGGER.info( 'purge_outdated_posts:: %s: chunk #%s of %s chunks; sas count: %s; sas query timedelta: %s', channel.title, i, chunks_number, len(sas), datetime.now() - t0) res['post_total'] += len(posts) res['sas_total'] += len(sas) LOGGER.info('purge_outdated_posts:: %s: total timedelta: %s; stats: %s', channel.title, datetime.now() - start_dt, res) return res
def test_queue_integration(self): """ Covers full integration from starting Subscriber, through TwitterTimelineRequest's fetchers, HistoricLoader until PostCreator.create_post(). """ from solariat_bottle.settings import LOGGER from solariat_bottle.db.channel.twitter import TwitterServiceChannel from solariat_bottle.db.historic_data import QueuedHistoricData from solariat_bottle.db.post.base import Post from solariat_bottle.daemons.twitter.historics.timeline_request import \ DirectMessagesRequest, SentDirectMessagesRequest, SearchRequest, UserTimelineRequest from solariat_bottle.db.user_profiles.user_profile import UserProfile # reduce amount of data for long-running integration test FakeTwitterApi.SEARCH_DATA_LENGTH = 50 FakeTwitterApi.TIMELINE_DATA_LENGTH = 50 FakeTwitterApi.DM_DATA_LENGTH = 50 FakeTwitterApi.DM_SENT_DATA_LENGTH = 50 FakeTwitterApi.ALL_DATA_LENGTH = 200 FakeTwitterApi.CREATED_FROM = FakeTwitterApi.CREATED_TO - timedelta(days=1) FakeTwitterApi.init_next_params() SearchRequest.SEARCH_LIMIT = 10 UserTimelineRequest.FETCH_LIMIT = 20 DirectMessagesRequest.DIRECT_MESSAGES_LIMIT = 20 SentDirectMessagesRequest.DIRECT_MESSAGES_LIMIT = 20 profile = UserProfile.objects.upsert('Twitter', profile_data=dict(user_name='jarvis', user_id='99188210')) channel = TwitterServiceChannel.objects.create_by_user(self.user, title='SC') channel.add_username(profile.user_name) channel.add_keyword(u'keywörd') def get_id_date_pair(post_data): if 'twitter' in post_data: post_data = post_data['twitter'] return int(post_data['id']), post_data['created_at'] fetched_data = [] def _save_tweets(fn): def decorated(tweets, *args, **kwargs): LOGGER.debug('PUSH_POSTS, len:%s', len(tweets)) fetched_data.extend([get_id_date_pair(t) for t in tweets]) return fn(tweets, *args, **kwargs) return decorated queued_data = [] def _save_queued_data(method): def _method(*args, **kwargs): queued_data[:] = [ get_id_date_pair(i.solariat_post_data) for i in QueuedHistoricData.objects(subscription=subscription) ] LOGGER.debug('QUEUED_POSTS, len: %s', len(queued_data)) self.assertTrue(len(queued_data) == FakeTwitterApi.ALL_DATA_LENGTH, msg="len=%d %s" % (len(queued_data), queued_data)) self.assertEqual(set(queued_data), set(fetched_data), msg=u"\nqueued =%s\nfetched=%s" % (queued_data, fetched_data)) return method(*args, **kwargs) return _method subscription = TwitterRestHistoricalSubscription.objects.create( created_by=self.user, channel_id=channel.id, from_date=FakeTwitterApi.CREATED_FROM, to_date=FakeTwitterApi.CREATED_TO ) subscriber = TwitterHistoricsSubscriber(subscription) subscriber.push_posts = _save_tweets(subscriber.push_posts) subscriber.historic_loader.load = _save_queued_data(subscriber.historic_loader.load) subscriber.start_historic_load() self.assertEqual(subscriber.get_status(), SUBSCRIPTION_FINISHED) self.assertEqual(Post.objects(channels__in=[ subscription.channel.inbound, subscription.channel.outbound]).count(), FakeTwitterApi.ALL_DATA_LENGTH) SearchRequest.SEARCH_LIMIT = 100 UserTimelineRequest.FETCH_LIMIT = 200 DirectMessagesRequest.DIRECT_MESSAGES_LIMIT = 200 SentDirectMessagesRequest.DIRECT_MESSAGES_LIMIT = 200
def get_post_by_content(self, content): # since content is encrypted, scan all posts post = [p for p in Post.objects() if p.content == content][0] return post
def test_multi_post(self): contents = [ 'Any recommendations for a basketball scholarship? I need a basketball scholarship.', 'Any recommendations for a basketball scholarship? I need a basketball scholarship.', 'I love my display!', 'My display is just not working out for me :-(', 'Any recommendations for a display?', 'I like my display' ] for content in contents: post = self._create_db_post(content, channel=self.channel) from solariat_bottle.db.speech_act import SpeechActMap stats_by_topic_intention = {} #Calculating stats iterating through SAM from solariat_bottle.db.post.base import Post for post in Post.objects(channels__in=[self.channel.id]): for sa in post.speech_acts: topics = sa['intention_topics'] int_id = sa['intention_type_id'] topics.append('__ALL__') for topic in topics: if topic in stats_by_topic_intention: if str(int_id) in stats_by_topic_intention[topic]: stats_by_topic_intention[topic][str(int_id)] += 1 else: stats_by_topic_intention[topic][str(int_id)] = 1 else: stats_by_topic_intention[topic] = {str(int_id): 1} expected_stats_from_sam = { u'basketball scholarship': { '1': 2, '2': 2 }, u'display': { '1': 1, '3': 1, '4': 2 }, '__ALL__': { '1': 3, '3': 1, '2': 2, '4': 2 } } self.assertDictEqual(stats_by_topic_intention, expected_stats_from_sam) time_slot = datetime_to_timeslot( Post.objects( channels__in=[self.channel.id]).limit(1)[0].created_at, 'hour') status = SpeechActMap.ACTIONABLE #Now verify SAM stats correspond to ChannelTopicTrends stats for topic, sa_stats in stats_by_topic_intention.iteritems(): if topic == '__ALL__': continue stat = ChannelTopicTrends(channel=self.channel, time_slot=time_slot, topic=topic, status=status) stat.reload() ctt_by_int = {} filtered = stat.filter(is_leaf=True, intention__ne=0) for s in filtered: ctt_by_int[str(s.intention)] = s.topic_count self.assertDictEqual(ctt_by_int, sa_stats)
def more_like_post(post, channel): """ Returns a queryset of similar posts in a given channels. Similarity determined by list of topics and intentions of the initial post. Note that we are looking for posts that are similar, but with opposite status, since we want to re-lable """ from solariat_bottle.db.post.base import Post from solariat_bottle.db.speech_act import SpeechActMap from solariat_bottle.db.channel.base import Channel from solariat_bottle.db.conversation import Conversation from solariat.utils.timeslot import Timeslot, DURATION_DAY topics = [] intention_ids = [] channel = Channel.objects.ensure_channels([channel])[0] assignment = post.get_assignment(channel) if channel.is_smart_tag: # for smart tags lookup similar posts in parent channel parent_channel = Channel.objects.get(channel.parent_channel) status = [SpeechActMap.POTENTIAL, SpeechActMap.ACTIONABLE, SpeechActMap.ACTUAL, SpeechActMap.REJECTED] else: parent_channel = channel status = [SpeechActMap.POTENTIAL] if assignment in SpeechActMap.ASSIGNED: ''' Postitive assignment could cause a more precise classification of a Potential post and could revert the assignment for Rejected posts ''' status.append(SpeechActMap.REJECTED) elif assignment in {'rejected', 'discarded'}: ''' Conversely, may reject potential posts and may cause a reversion of prior allocation for Actionable ''' status.append(SpeechActMap.ACTIONABLE) else: raise AppException("An internal state is not expected: %s. Please contact support for assistance." % assignment) for sa in post.speech_acts: topics.extend(sa['intention_topics']) intention_ids.append(sa['intention_type_id']) # The basic post lookup that just searches for the latest objects res, more_posts_available = Post.objects.by_time_point( parent_channel, ['__ALL__'], from_ts = Timeslot(post.created_at-DURATION_DAY), to_ts = Timeslot(post.created_at+timedelta(hours=1)), status = status, intention = intention_ids, languages = [post.language], limit = 10) res = set(res) if (channel.is_smart_tag): # Part of new re-labeling. If tag for a post is rejected, we should # go through all posts from the post conversation and through first # RESPONSE_DEPTH_FACTOR responses containing the tag service_channel = get_service_channel(channel) if service_channel: conversations = Conversation.objects.lookup_conversations(service_channel, [post]) if len(conversations) == 1: # First extend with all other posts from this conversation that have that tag # assigned to them res |= set([p for p in Post.objects(id__in=list(conversations[0].posts)) if (str(p.id) != str(post.id) and str(channel.id) in p.tag_assignments)]) # Now go through the first RESPONSE_DEPTH_FACTOR responses which have that tag assigned elif (not channel.is_smart_tag and SpeechActMap.STATUS_MAP[post.get_assignment(channel)] in [SpeechActMap.ACTIONABLE, SpeechActMap.REJECTED]): # In case we reject a post, go through all the posts for the first RESPONSE_DEPTH_FACTOR responses from # the same service channel channels = [channel] if channel.parent_channel is not None: service_channel = Channel.objects.get(id=channel.parent_channel) channels.append(service_channel) channel_filter = [ c.id for c in channels ] channel_filter_refs = [DBRef('Channel', ch) for ch in channel_filter] if SpeechActMap.STATUS_MAP[post.get_assignment(channel)] == SpeechActMap.REJECTED: target_status = [SpeechActMap.POTENTIAL, SpeechActMap.ACTIONABLE] else: target_status = [SpeechActMap.POTENTIAL, SpeechActMap.REJECTED] return list(res)