def test_create_event_message(self): services = ['facebook', 'twitter', 'instagram', 'foursquare', 'linkedin'] tim_author_id = 1234 service_user_id = 'service_user_id' service_event_id = '987654321' service_link_name = 'instagram' service_link_event_id = '543216789' json_dict = {'key': 'value'} for service in services: message = messages.create_event_message( service, tim_author_id, messages.CURRENT_STATE, service_user_id, service_event_id, json_dict, [messages.create_event_link(service_link_name, service_link_event_id)]) self.assertEqual(message['header']['type'], service + '.event') self.assertEqual(message['message']['tim_author_id'], tim_author_id) self.assertEqual(message['message']['state'], messages.CURRENT_STATE) self.assertEqual(message['message']['service_author_id'], service_user_id) self.assertEqual(message['message']['service_event_id'], service_event_id) self.assertEqual(message['message']['service_event_json'], json_dict) self.assertEqual(message['message']['links'][0]['service_id'], service_link_name) self.assertEqual(message['message']['links'][0]['service_event_id'], service_link_event_id)
def fetch(self, service_id, service_author_id, service_event_id, callback): asm = self.get_author_service_map(service_author_id) if asm.access_token: access_token = asm.access_token else: access_token = self.oauth_config['user1_access_token'] args = {'access_token': access_token} # fetch latest version of event url = '{0}{1}{2}?{3}'.format(self.oauth_config['endpoint'], self.MEDIA_INFO, service_event_id, urllib.urlencode(args)) raw_obj = json_serializer.load(urllib2.urlopen(url)) post = raw_obj['data'] interpreter = InstagramEventInterpreter(post, asm, self.oauth_config) # TODO - unclear if/why the link meta data should be included -- included here because # relationships are not being properly maintained callback(create_instagram_event( asm.author_id, CURRENT_STATE, service_author_id, interpreter.get_id(), post, [create_event_link(data_access.service.name_to_id('instagram'), '_{0}@{1}'.format(self.service_name, asm.author_id))]))
def fetch(self, service_author_id, callback): super(FacebookEventCollector, self).fetch(service_author_id, callback) state = self.fetch_begin(service_author_id) self.fetch_log_info(state) asm = state['asm'] args = {'access_token': asm.access_token} # get only events since last update or past year depending on if this # is the first collection of not if asm.most_recent_event_timestamp: since = calendar.timegm((asm.most_recent_event_timestamp - self.MOST_RECENT_OVERLAP).utctimetuple()) else: since = calendar.timegm((datetime.utcnow() - self.NEW_LOOKBACK_WINDOW).utctimetuple()) args['since'] = since # fetch all new posts posts_url = unicode('{0}{1}?{2}').format(self.oauth_config['endpoint'], self.FEED_COLLECTION, urllib.urlencode(args)) total_accepted = 0 while posts_url and total_accepted < self.MAX_EVENTS: logging.debug('requesting: "%s"', posts_url) posts_obj = json_serializer.load(urllib2.urlopen(posts_url)) # process the item # TODO loop termination on various constraints is not exact # for element in the feed for post in posts_obj['data']: # currently only interested in 'status' posts from the user if post['from']['id'] == service_author_id: post_type = post.get('type', None) # if this is a status update and there is an action or the # user is tagged in the story keep it # TODO: check for user in story_tags is experimental if post_type == 'status': tagged = False if 'story_tags' in post: for story_tag in post['story_tags'].itervalues(): for entity in story_tag: if int(entity['id']) == int(service_author_id): tagged = True break if tagged: break if not post.get('actions') and not tagged: continue # skip photo and checkin posts. they will get picked-up by their respective # processing below if post_type == 'photo' or post_type == 'checkin': continue interpreter = FacebookEventInterpreter(post, asm, self.oauth_config) if self.screen_event(interpreter, state): total_accepted = total_accepted + 1 callback(create_facebook_event(asm.author_id, CURRENT_STATE, service_author_id, interpreter.get_id(), post)) # setup for the next page (if any). Check that we're not looping ?? do we even need to check ?? next_url = posts_obj['paging']['next'] if 'paging' in posts_obj and 'next' in posts_obj['paging'] else None posts_url = next_url if next_url and next_url != posts_url else None # while posts # collect photos for all time if this is the first update; otherwise # only collect photos since the last update. Setting since to None # and remove the 'since' property from the query args will collect # all photos if not asm.most_recent_event_timestamp: since = None del args['since'] albums_url = unicode('{0}{1}?{2}').format(self.oauth_config['endpoint'], self.ALBUMS_COLLECTION, urllib.urlencode({'access_token': asm.access_token})) while albums_url: albums_obj = json_serializer.load(urllib2.urlopen(albums_url)) for album in albums_obj.get('data', []): # skip photos posted to friend's walls if album['type'] == 'friends_walls': continue created_time = calendar.timegm(datetime.strptime(album['created_time'], "%Y-%m-%dT%H:%M:%S+0000").utctimetuple()) updated_time = calendar.timegm(datetime.strptime(album['updated_time'], "%Y-%m-%dT%H:%M:%S+0000").utctimetuple()) if since == None or created_time >= since or updated_time >= since: # set the type to 'album so it will match what you get when it's directly # queried; also makes it easier for the event process to identify it album['type'] = 'album' interpreter = FacebookEventInterpreter(post, asm, self.oauth_config) # send event message callback(create_facebook_event(asm.author_id, CURRENT_STATE, service_author_id, interpreter.get_id(), album)) # if album_id = album['id'] # check for any new photos in the album photos_url = unicode('{0}{1}{2}?{3}').format(self.oauth_config['endpoint'], album_id, self.PHOTOS_COLLECTION, urllib.urlencode(args)) while photos_url: photos_obj = json_serializer.load(urllib2.urlopen(photos_url)) for photo in photos_obj.get('data', []): photo['type'] = 'photo' interpreter = FacebookEventInterpreter(post, asm, self.oauth_config) # event message callback(create_facebook_event( asm.author_id, CURRENT_STATE, service_author_id, interpreter.get_id(), photo, [create_event_link(data_access.service.name_to_id('facebook'), album_id)])) # setup for the next page (if any). Check that we're not looping ?? do we even need to check ?? next_url = photos_obj['paging']['next'] if 'paging' in photos_obj and 'next' in photos_obj['paging'] else None photos_url = next_url if next_url and next_url != photos_url else None # while photos # setup for the next page (if any). Check that we're not looping ?? do we even need to check ?? next_url = albums_obj['paging']['next'] if 'paging' in albums_obj and 'next' in albums_obj['paging'] else None albums_url = next_url if next_url and next_url != albums_url else None # while albums # fetch all new checkins checkins_url = unicode('{0}{1}?{2}').format(self.oauth_config['endpoint'], self.CHECKIN_COLLECTION, urllib.urlencode(args)) total_accepted = 0 while checkins_url and total_accepted < self.MAX_EVENTS: checkins_obj = json_serializer.load(urllib2.urlopen(checkins_url)) # process the item # TODO loop termination on various constraints is not exact # for element in the feed for checkin_obj in checkins_obj['data']: # filter checkins not directly from this user if checkin_obj['from']['id'] == service_author_id: # set the type to checkin. When querying for checkins the # type property is missing checkin_obj['type'] = 'checkin' interpreter = FacebookEventInterpreter(post, asm, self.oauth_config) if self.screen_event(interpreter, state): total_accepted = total_accepted + 1 callback(create_facebook_event(asm.author_id, CURRENT_STATE, service_author_id, interpreter.get_id(), post)) # setup for the next page (if any). Check that we're not looping ?? do we even need to check ?? next_url = checkins_obj['paging']['next'] if 'paging' in checkins_obj and 'next' in checkins_obj['paging'] else None checkins_url = next_url if next_url and next_url != posts_url else None # while checkins # terminate the fetch self.fetch_end(state)
def fetch(self, service_author_id, callback): super(InstagramEventCollector, self).fetch(service_author_id, callback) state = self.fetch_begin(service_author_id) self.fetch_log_info(state) asm = state["asm"] if asm.access_token: access_token = asm.access_token user_media = self.USER_MEDIA.format("self") else: access_token = self.oauth_config["user1_access_token"] user_media = self.USER_MEDIA.format(asm.service_author_id) args = {"access_token": access_token, "count": self.PAGE_SIZE} # get only events since last update or past year depending on if this # is the first collection of not if asm.most_recent_event_timestamp: min_timestamp = calendar.timegm((asm.most_recent_event_timestamp - self.MOST_RECENT_OVERLAP).utctimetuple()) else: min_timestamp = calendar.timegm((datetime.utcnow() - self.NEW_LOOKBACK_WINDOW).utctimetuple()) args["min_timestamp"] = min_timestamp # setup the url for fetching a page of posts url = "{0}{1}?{2}".format(self.oauth_config["endpoint"], user_media, urllib.urlencode(args)) total_accepted = 0 while url and total_accepted < self.MAX_EVENTS: raw_obj = json_serializer.load(urllib2.urlopen(url)) # for element in the feed for post in raw_obj.get("data", []): interpreter = InstagramEventInterpreter(post, asm, self.oauth_config) if self.screen_event(interpreter, state): total_accepted = total_accepted + 1 callback( create_instagram_event( asm.author_id, CURRENT_STATE, service_author_id, interpreter.get_id(), post, [ create_event_link( data_access.service.name_to_id("instagram"), "_{0}@{1}".format(self.service_name, asm.author_id), ) ], ) ) # if # for # setup for the next page (if any) url = ( raw_obj["pagination"]["next_url"] if "pagination" in raw_obj and "next_url" in raw_obj["pagination"] else None ) # terminate the fetch self.fetch_end(state)