def test_facebook_post(self): updated_at = now().replace(microsecond=0) created_at = TIMESLOT_EPOCH - timedelta(days=300) content = 'post image caption' status_id = "12345678901_123456789101" from solariat_bottle.db.channel.facebook import FacebookServiceChannel data = { 'content': content, 'channels': [ FacebookServiceChannel.objects.create_by_user(self.user, title='Fb') ], 'user_profile': { 'user_name': 'fb_test_user' }, 'facebook': { 'facebook_post_id': status_id, 'text': content, 'created_at': str(created_at), '_wrapped_data': { 'type': 'status', 'source_id': 'fake', 'source_type': 'event', 'created_at': str(created_at), 'updated_time': str(updated_at) } } } post1 = self._create_db_post(**data) actor_id, creation_time = unpack_event_id(post1.id) self.assertEqual(post1.created_at.replace(microsecond=0), updated_at) self.assertEqual(creation_time.replace(microsecond=0), updated_at)
def setUp(self): super(TopicCountPlotDataCase, self).setUp() self.login() self.start_date = timeslot.parse_date('04/24/2013') self.end_date = timeslot.parse_date('04/25/2013') self.level = 'hour' timeline = [] start_date = timeslot.parse_date('04/24/2013') while start_date < self.end_date: timeline.append(start_date) start_date += timeslot.timedelta(seconds=60 * 60 * 2) #every 2 hours self.time_slots = map(lambda d: datetime_to_timeslot(d, 'hour'), timeline) contents = cycle([ 'I need a laptop', #laptop, need (intention id=2) 'My laptop is not working out for me:(', #laptop, problem (intention id=3) 'I need a display', #display, need 'My display is not working out for me:(', #display, problem ]) posts = [] for _created in timeline: post = self._create_db_post(contents.next(), _created=_created) #print post.speech_acts posts.append(post)
def get_method_params(self): get = self.filters.get params = super(SearchRequest, self).get_method_params() params.update({ "include_entities": True, "count": self.SEARCH_LIMIT, "q": self.build_query(get('keywords')), "lang": get('language', get('lang')) }) if not params['q']: return None start_date = get('start_date') end_date = get('end_date') if start_date: start_date_str = start_date.strftime(self.DATE_FORMAT) params['since'] = start_date_str if end_date: if (end_date.hour, end_date.minute) != (0, 0): end_date = end_date + timedelta(days=1) end_date_str = end_date.strftime(self.DATE_FORMAT) params['until'] = end_date_str return params
def test_fetch_by_date(self): # check start_date. let fetcher made 2 requests (max resp len: 200) one_sec = timedelta(seconds=1) FakeTwitterApi.restore_settings() api = FakeTwitterApi() _, start_date = api.DM[300] start_date += one_sec LOGGER.debug('++++++ DATES: (%s)', (start_date, api.DM[200][1], api.DM[100][1])) res = DirectMessagesFetcher(api, **{"start_date": start_date}) statuses = list(res.fetch()) self.assertEqual(len(statuses), 300) # checking end_date filter FakeTwitterApi.restore_settings() api = FakeTwitterApi() _, start_date = api.DM[300] _, end_date = api.DM[150] start_date += one_sec res = DirectMessagesFetcher(api, **{'start_date': start_date, 'end_date': end_date}) statuses = list(res.fetch()) self.assertEqual(len(statuses), 150) from solariat.utils.timeslot import parse_datetime self.assertTrue(all(start_date <= parse_datetime(s['created_at']) <= end_date for s in statuses))
def test_export_analytics(self): with mail.record_messages() as outbox: resp = self.do_export_request({ 'channel_id': str(self.channel.id), 'from': self.created_at_str, 'to': (self.created_at + timedelta(hours=1)).strftime('%Y-%m-%d %H:%M:%S'), 'topics': [{ 'topic': 'laptop', 'topic_type': 'node' }], 'intentions': [], 'thresholds': dict(intention=.1), 'statuses': ['actionable'] }) self.assertEqual(resp['message'], ExportPostsView.SUCCESS_MSG_TPL % self.user.email) self.assertTrue({'id', 'input_filter_hash', 'state', 'created_at'} < set(resp['task']), resp) msg = outbox[0] at = msg.attachments[0] self.assertTrue(at) self.assertEqual(DataExport.objects.count(), 1) export_item = DataExport.objects.get() self.assertEqual(export_item.state, DataExport.State.SUCCESS)
def setup_posts(self): self.created_at = now() self.created_at_str = self.created_at.strftime('%Y-%m-%d %H:%M:%S') base_tweet = self._create_tweet('parent tweet', _created=self.created_at - timedelta(milliseconds=1)) n = 0 for content in ('I need a bike. I like Honda.', 'Can somebody recommend a sturdy laptop?', 'I need an affordabl laptop. And a laptop bag', 'Whatever you buy, let it be an Apple laptop', 'I would like to have a thin and lightweight laptop.'): self._create_tweet(content=content, _created=self.created_at + timedelta(milliseconds=n), in_reply_to=base_tweet) n += 1
def gen_id(padding=0, **kw): facebook_data = kw.pop('facebook_data', None) p_id = FacebookPost.gen_id( is_inbound=kw['is_inbound'], actor_id=kw['actor_id'], in_reply_to_native_id=facebook_data.get('in_reply_to_status_id') if facebook_data else None, _created=kw['_created'] + timedelta(milliseconds=padding)) return p_id
def __getitem__(self, index): """ Returns :index data item. index=0 more recent data item, the bigger index, the older data. """ id = self._last_id - index * self.id_inc created_at = self._to_date - timedelta(seconds=(index * self.created_at_inc_sec)) return id, created_at
def __init__(self, init_id, data_len, from_date, to_date): self.data_len = data_len self._to_date = to_date self.created_at = to_date self.created_at_inc_sec = (to_date - from_date).total_seconds() / data_len self.created_at_inc = timedelta(seconds=self.created_at_inc_sec) self._last_id = init_id self.id = init_id self.id_inc = 11
def restore_settings(cls): cls.SEARCH_DATA_LENGTH = 500 cls.TIMELINE_DATA_LENGTH = 500 cls.DM_DATA_LENGTH = 500 cls.DM_SENT_DATA_LENGTH = 500 cls.ALL_DATA_LENGTH = sum([ cls.SEARCH_DATA_LENGTH, cls.TIMELINE_DATA_LENGTH, cls.DM_DATA_LENGTH, cls.DM_SENT_DATA_LENGTH ]) cls.CREATED_TO = now() cls.CREATED_FROM = cls.CREATED_TO - timedelta(days=5) cls.RAISE_EXCEPTION = False # if True, return first response, fail on second cls.init_next_params()
def handle_rate_limit_error(self, error, path, failed_request_time, log_item): manager = FacebookRateLimitInfo.objects error_code = self._parse_fb_error_code(error) if error_code not in FB_RATE_LIMIT_ERRORS: return None last_rate_limit_info = manager.get_last_rate_limit_info( self.access_token, error_code, path) back_off_config = FacebookRateLimitInfo.LIMITS_CONFIG[error_code] if last_rate_limit_info: last_wait_time = last_rate_limit_info.wait_time wait_time = timedelta(seconds=min( back_off_config.end, last_wait_time * back_off_config.factor)) if utc(last_rate_limit_info.failed_request_time + wait_time) > utc(failed_request_time): wait_until = last_rate_limit_info.failed_request_time + wait_time else: wait_until = timedelta(seconds=back_off_config.start) + failed_request_time else: wait_until = timedelta(seconds=back_off_config.start) + failed_request_time after = last_rate_limit_info and last_rate_limit_info.wait_until return manager.add_rate_limit_info( self.access_token, error_code, utc(failed_request_time), path, utc(wait_until), str(self._channel), after, log_item)
def test_twitter_post(self): created_at = TIMESLOT_EPOCH - timedelta(minutes=1) content = 'tweet content' status_id = fake_status_id() data = { 'content': content, 'channels': [self.channel], 'user_profile': { 'user_name': 'test_user' }, 'twitter': { 'id': status_id, 'created_at': str(created_at), 'text': content } } post1 = self._create_db_post(**data) actor_id, creation_time = unpack_event_id(post1.id) self.assertEqual(post1.created_at.replace(microsecond=0), now().replace(microsecond=0)) self.assertTrue( now() - timedelta(seconds=1) <= creation_time <= now() + timedelta(seconds=1), creation_time)
def test_integer_id(self): def make_id_ts_left(time_slot, dummy): components = ( (time_slot, 22), (dummy, 42), ) id_ = pack_components(*components) return id_ def make_id_ts_right(time_slot, dummy): components = ((dummy, 42), (time_slot, 22)) id_ = pack_components(*components) return id_ TimeSlotIntegerId.objects.coll.remove() from solariat.utils.timeslot import datetime_to_timeslot, parse_date_interval, timedelta, timeslot_to_datetime start_date, end_date = parse_date_interval('02/21/2013', '05/21/2013') step = timedelta(hours=24) dates = [] while start_date < end_date: dates.append(start_date) start_date += step assert len(dates) == 90 data = enumerate(dates[::-1], start=100) for dummy, date in data: dummy %= 5 time_slot = datetime_to_timeslot(date) id_ = make_id_ts_left(time_slot, dummy) doc = TimeSlotIntegerId(id=id_, time_slot=time_slot, dummy=dummy) doc.save() #print list(TimeSlotIntegerId.objects.coll.find()) #fetch interval start_date, end_date = parse_date_interval('03/21/2013', '04/21/2013') #start_dummy = 0 #end_dummy = (1L << 41) - 1 start_id = make_id_ts_left(datetime_to_timeslot(start_date, 'hour'), 0) end_id = make_id_ts_left(datetime_to_timeslot(end_date, 'hour'), 0) # print start_id.bit_length() # print end_id.bit_length() for doc in TimeSlotIntegerId.objects(id__gte=start_id, id__lte=end_id): print timeslot_to_datetime(doc.time_slot) self.assertGreaterEqual(doc.time_slot, datetime_to_timeslot(start_date, 'hour')) self.assertLessEqual(doc.time_slot, datetime_to_timeslot(end_date, 'hour'))
def test_conversation_recovery_throttled(self): from solariat.utils.timeslot import now, timedelta post_native_data, expected_conv_id = self.posts_native_data[2] fbs = FacebookServiceChannel.objects.create_by_user(self.user, title='FBS', posts_tracking_enabled=True) post = self._create_db_post(post_native_data['_wrapped_data']['message'], channel=fbs, facebook=post_native_data) conv_id = fbs.get_conversation_id(post) self.assertEqual(conv_id, expected_conv_id) conv = Conversation.objects.get() self.login(user=self.user) requests = mock.MagicMock() params = {'channel': str(fbs.id), 'limit': 10, 'reserve_time': 30, 'mode': 'conversation', 'token': self.auth_token} def assert_requests_get_called_once(): from solariat_bottle.settings import FBOT_URL, FB_DEFAULT_TOKEN url = FBOT_URL + '/json/restore-conversation?token=%s&conversation=%s' % (FB_DEFAULT_TOKEN, conv.id) requests.request.assert_called_once_with('get', url, verify=False, timeout=None) with mock.patch.dict('sys.modules', {'requests': requests}): response = self.client.get(get_api_url('queue/fetch'), data=json.dumps(params), content_type='application/json', base_url='https://localhost') assert_requests_get_called_once() self.assertFalse(conv.mark_corrupted()) assert_requests_get_called_once() for update in [dict(unset__last_recovery_ts=True), dict(last_recovery_ts=None), dict(last_recovery_ts=now() - timedelta(hours=1))]: conv.update(**update) requests.request.reset_mock() self.assertTrue(conv.mark_corrupted()) assert_requests_get_called_once() self.assertFalse(conv.mark_corrupted()) assert_requests_get_called_once()
def last_online(self, reference): """Returns the last event for stream referenced by username or stream ref if it's not SUSPEND, otherwise None. If the last event is ONLINE or KEEP_ALIVE, then bot was probably killed with SIGKILL. """ def event_tuple(item): if not item: return None return item['m'], item['u'], item['t'], item['s'] last_event = None ref_id = self.get_ref_id(reference) query = {"t": {"$gt": now() - timedelta(hours=1)}, "u": ref_id} for item in self.events_coll.find(query).sort([("t", -1), ("_id", -1)]).limit(1): if item['m'] != Events.EVENT_SUSPEND: last_event = item return event_tuple(last_event)
def test_queue_integration(self): """ Covers full integration from starting Subscriber, through TwitterTimelineRequest's fetchers, HistoricLoader until PostCreator.create_post(). """ from solariat_bottle.settings import LOGGER from solariat_bottle.db.channel.twitter import TwitterServiceChannel from solariat_bottle.db.historic_data import QueuedHistoricData from solariat_bottle.db.post.base import Post from solariat_bottle.daemons.twitter.historics.timeline_request import \ DirectMessagesRequest, SentDirectMessagesRequest, SearchRequest, UserTimelineRequest from solariat_bottle.db.user_profiles.user_profile import UserProfile # reduce amount of data for long-running integration test FakeTwitterApi.SEARCH_DATA_LENGTH = 50 FakeTwitterApi.TIMELINE_DATA_LENGTH = 50 FakeTwitterApi.DM_DATA_LENGTH = 50 FakeTwitterApi.DM_SENT_DATA_LENGTH = 50 FakeTwitterApi.ALL_DATA_LENGTH = 200 FakeTwitterApi.CREATED_FROM = FakeTwitterApi.CREATED_TO - timedelta(days=1) FakeTwitterApi.init_next_params() SearchRequest.SEARCH_LIMIT = 10 UserTimelineRequest.FETCH_LIMIT = 20 DirectMessagesRequest.DIRECT_MESSAGES_LIMIT = 20 SentDirectMessagesRequest.DIRECT_MESSAGES_LIMIT = 20 profile = UserProfile.objects.upsert('Twitter', profile_data=dict(user_name='jarvis', user_id='99188210')) channel = TwitterServiceChannel.objects.create_by_user(self.user, title='SC') channel.add_username(profile.user_name) channel.add_keyword(u'keywörd') def get_id_date_pair(post_data): if 'twitter' in post_data: post_data = post_data['twitter'] return int(post_data['id']), post_data['created_at'] fetched_data = [] def _save_tweets(fn): def decorated(tweets, *args, **kwargs): LOGGER.debug('PUSH_POSTS, len:%s', len(tweets)) fetched_data.extend([get_id_date_pair(t) for t in tweets]) return fn(tweets, *args, **kwargs) return decorated queued_data = [] def _save_queued_data(method): def _method(*args, **kwargs): queued_data[:] = [ get_id_date_pair(i.solariat_post_data) for i in QueuedHistoricData.objects(subscription=subscription) ] LOGGER.debug('QUEUED_POSTS, len: %s', len(queued_data)) self.assertTrue(len(queued_data) == FakeTwitterApi.ALL_DATA_LENGTH, msg="len=%d %s" % (len(queued_data), queued_data)) self.assertEqual(set(queued_data), set(fetched_data), msg=u"\nqueued =%s\nfetched=%s" % (queued_data, fetched_data)) return method(*args, **kwargs) return _method subscription = TwitterRestHistoricalSubscription.objects.create( created_by=self.user, channel_id=channel.id, from_date=FakeTwitterApi.CREATED_FROM, to_date=FakeTwitterApi.CREATED_TO ) subscriber = TwitterHistoricsSubscriber(subscription) subscriber.push_posts = _save_tweets(subscriber.push_posts) subscriber.historic_loader.load = _save_queued_data(subscriber.historic_loader.load) subscriber.start_historic_load() self.assertEqual(subscriber.get_status(), SUBSCRIPTION_FINISHED) self.assertEqual(Post.objects(channels__in=[ subscription.channel.inbound, subscription.channel.outbound]).count(), FakeTwitterApi.ALL_DATA_LENGTH) SearchRequest.SEARCH_LIMIT = 100 UserTimelineRequest.FETCH_LIMIT = 200 DirectMessagesRequest.DIRECT_MESSAGES_LIMIT = 200 SentDirectMessagesRequest.DIRECT_MESSAGES_LIMIT = 200
def event_stream(self, n=10, data_format='json', event_type='twitter'): from solariat_bottle.db.post.chat import ChatProfile templates = { 'twitter': json.dumps({'content': 'i am a tweet', 'user_profile': {'user_name': 'test'}, 'twitter': {'id': '%(id)s', 'created_at': '%(now)s'}}), 'facebook': json.dumps({'content': 'i am a tweet', 'user_profile': {'user_name': 'test'}, 'facebook': {'facebook_post_id': '%(id)s', '_wrapped_data': {'type': 'status', 'source_type': 'status', 'source_id': 'somepageid'}, 'created_at': '%(now)s'}}), 'web': json.dumps({'content': '', '_platform': 'Web', 'url': 'g-tel.com', 'element_html': 'click'}), 'faq': json.dumps({'content': '', 'query': 'How do I dispute an item in my bill?'}), 'branch': json.dumps({'content': '', '_platform': 'Branch', 'is_inbound': True}), 'chat': json.dumps({'content': 'Hello. Can anyone tell me which iPhone is best for me?', 'chat_data': {'created_at': '%(now)s'}}), 'voice': json.dumps({'content': 'What is my account balance?', 'chat_data': {'created_at': '%(now)s'}}), 'email': json.dumps({'content': 'Hi by email, what is my account balance?', 'email_data': { 'cc': [], 'sender': '*****@*****.**', 'recipients': ['*****@*****.**'], 'subject': 'Test', 'created_at': '%(now)s', }}), # TODO: NPSOutcome couldn't be created in general way # 'voc': json.dumps({'content': 'Good', # 'score': 8, # 'response_type': 'Passive', # 'case_number': 'a1-1', # 'user_profile': None, # '_created': '%(now)s' # }), } from six import StringIO from solariat_bottle.scripts.data_load.demo_helpers import CsvPrinter class JsonPrinter(object): def __init__(self, stream): self.stream = stream def write_data(self, data): self.stream.write(json.dumps(data)) self.stream.write('\n') stream = StringIO() printer = {'json': JsonPrinter, 'csv': CsvPrinter}[data_format](stream) for idx in range(n): ctx = {'id': idx + 1e6, 'now': now() - timedelta(seconds=random.choice(xrange(100)))} printer.write_data(json.loads(templates[event_type] % ctx)) stream.seek(0) return stream