def fromJSONFields(self, fromJSON, auxData=None): super(LinkedInConnectEvent, self).fromJSONFields(fromJSON, auxData) # query for the connection's profile image and bio connection = self.raw_json['updateContent']['person']['connections']['values'][0] if connection['id'] != 'private': url = 'http://api.linkedin.com/v1/people/id=%s:(headline,summary,picture-url)' % connection['id'] try: # request the user's updates contentJSON = make_request(auxData, url, {'x-li-format': 'json'}) contentObj = json_serializer.load_string(contentJSON) self.headline = contentObj.get('headline') self.summary = contentObj.get('summary') self.photo = contentObj.get('pictureUrl') except urllib2.URLError: self.log.error('***ERROR*** parse error') self.log.error(contentJSON) pass return self
def get_author_profile(self,afm,db_session,oauth_config): # Create our OAuth consumer instance consumer = oauth.Consumer(oauth_config['key'], oauth_config['secret']) token = oauth.Token(key=afm.access_token,secret=afm.access_token_secret) client = oauth.Client(consumer, token) # request the user's profile content = make_request(client,'https://api.twitter.com/1/account/verify_credentials.json') str_content = content.decode('utf-8') respJSON = json.loads(str_content) # print json.dumps(respJSON, sort_keys=True, indent=2) profileJSON = {} if respJSON.has_key('name'): profileJSON['name'] = respJSON['name'] if respJSON.has_key('location'): profileJSON['location'] = respJSON['location'] if respJSON.has_key('profile_image_url'): profileJSON['picture_url'] = respJSON['profile_image_url'] if respJSON.has_key('description'): profileJSON['headline'] = respJSON['description'] profileJSON['public_profile_url'] = 'https://twitter.com/#!/%s' % respJSON['screen_name'] return profileJSON
def fetch(self, service_id, service_author_id, service_event_id, callback): asm = self.get_author_service_map(service_author_id) # TODO - temporary until we figure out a better solution for # not over-driving Twitter with un-authenticated events if not asm.access_token: return if asm.access_token: consumer = oauth.Consumer(self.oauth_config['key'], self.oauth_config['secret']) token = oauth.Token(asm.access_token, asm.access_token_secret) client = oauth.Client(consumer, token) args = {'id': service_event_id, 'include_entities': '1', 'trim_user': '******'} # if not authenticated provide the user_id query arg if not asm.access_token: args['user_id'] = asm.service_author_id url = TWEET_STATUS % (self.oauth_config['endpoint'], urllib.urlencode(args)) # TODO - remove the try/except once figure out a better solution for not # exceeding Twitter's rate limits try: json_obj = json_serializer.load_string(make_request(client, url)) if asm.access_token \ else json_serializer.load(urllib2.urlopen(url)) except urllib2.URLError, e: logging.error('ERROR REQUEST URL: {0}'.format(url)) logging.error('ERROR REASON: {0}, {1}'.format(e.code, e.read())) raise
def linkedin_callback(request): error_msg = None author_id = unauthenticated_userid(request) # the oauth_token is request as a query arg; the auth_token_secret is in session store oauth_token = request.params['oauth_token'] oauth_token_secret = request.session['oauth_token_secret'] oauth_verifier = request.params['oauth_verifier'] # Step 3: Once the consumer has redirected the user back to the oauth_callback # URL you can request the access token the user has approved. You use the # request token to sign this request. After this is done you throw away the # request token and use the access token returned. You should store this # access token somewhere safe, like a database, for future use. consumer = oauth.Consumer(tim_config['oauth'][SERVICE]['key'], tim_config['oauth'][SERVICE]['secret']) token = oauth.Token(oauth_token, oauth_token_secret) token.set_verifier(oauth_verifier) client = oauth.Client(consumer, token) resp, content = client.request(tim_config['oauth'][SERVICE]['access_token_url'], "POST") if resp['status'] != '200': raise Exception("Invalid response %s (%s)." % (resp['status'], content)) access_dict = dict(urlparse.parse_qsl(content)) # these are the real deal and need to be stored securely in the DB access_token = access_dict['oauth_token'] access_token_secret = access_dict['oauth_token_secret'] # Create our OAuth consumer instance token = oauth.Token(key=access_token, secret=access_token_secret) client = oauth.Client(consumer, token) url = '{endpoint}{resource}'.format(endpoint=tim_config['oauth'][SERVICE]['endpoint'], resource='people/~:(id)') response = make_request(client, url, {'x-li-format': 'json'}) json_dict = json.loads(response) linkedin_id = json_dict['id'] url = '{endpoint}/v1/authors/{author}/services'.format(endpoint=tim_config['api']['endpoint'], author=author_id) payload = {'name': SERVICE, 'access_token': access_token, 'access_token_secret': access_token_secret, 'service_author_id': linkedin_id} headers = {'content-type': 'application/json; charset=utf-8'} cookies = request.cookies try: r = requests.post(url, data=json.dumps(payload), headers=headers, cookies=cookies) r.raise_for_status() except requests.exceptions.RequestException, e: log.error(e.message) if e.response.status_code == 409: error_msg = 'Service already exists for this author ({message})'.format(message=e.message)
def fetch(self, service_id, service_author_id, service_event_id, callback): asm = self.get_author_service_map(service_author_id) consumer = oauth.Consumer(self.oauth_config['key'], self.oauth_config['secret']) token = oauth.Token(asm.access_token, asm.access_token_secret) client = oauth.Client(consumer, token) # check if this event isCommentable or isLikable event_json, = db.Session().query(ServiceEvent.json). \ filter(and_(ServiceEvent.author_service_map_id == asm.id, ServiceEvent.event_id == service_event_id)).one() event_obj = json_serializer.load_string(event_json) update_obj = None if event_obj.get("isCommentable", False): url = UPDATE_COMMENTS % (self.oauth_config['endpoint'], service_event_id) update_obj = json_serializer.load_string(make_request(client, url, {'x-li-format': 'json'})) likes_obj = None if event_obj.get("isLikable", False): url = UPDATE_LIKES % (self.oauth_config['endpoint'], service_event_id) likes_obj = json_serializer.load_string(make_request(client, url, {'x-li-format': 'json'})) # merge update and likes together into one object if update_obj or likes_obj: if update_obj: event_obj['updateComments'] = update_obj if likes_obj: event_obj['isLiked'] = likes_obj['_total'] > 0 event_obj['numLikes'] = likes_obj['_total'] event_obj['likes'] = likes_obj interpreter = LinkedinEventInterpreter(event_obj, asm, self.oauth_config) callback(create_linkedin_event(asm.author_id, CURRENT_STATE, service_author_id, interpreter.get_id(), event_obj))
def linkedin_callback(request): # the oauth_token is request as a query arg; the auth_token_secret is in session store oauth_token = request.params['oauth_token'] oauth_token_secret = request.session['oauth_token_secret'] oauth_verifier = request.params['oauth_verifier'] # Step 3: Once the consumer has redirected the user back to the oauth_callback # URL you can request the access token the user has approved. You use the # request token to sign this request. After this is done you throw away the # request token and use the access token returned. You should store this # access token somewhere safe, like a database, for future use. consumer = oauth.Consumer(oauth_config[FEATURE]['key'], oauth_config[FEATURE]['secret']) token = oauth.Token(oauth_token,oauth_token_secret) token.set_verifier(oauth_verifier) client = oauth.Client(consumer, token) resp, content = client.request(oauth_config[FEATURE]['access_token_url'], "POST") access_token = dict(urlparse.parse_qsl(content)) # these are the real deal and need to be stored securely in the DB accessToken = access_token['oauth_token'] accessTokenSecret = access_token['oauth_token_secret'] # Create our OAuth consumer instance token = oauth.Token(key=accessToken,secret=accessTokenSecret) client = oauth.Client(consumer, token) response = make_request(client,'http://api.linkedin.com/v1/people/~:(id)',{'x-li-format':'json'}) respJSON = json.loads(response) linkedinId = respJSON['id'] json_payload = json.dumps({'access_token': accessToken, 'access_token_secret': accessTokenSecret, 'service_author_id': linkedinId}) headers = {'Content-Type':'application/json; charset=utf-8'} url = '{0}/v1/authors/{1}/services/{2}'.format(request.registry.settings['mi.api.endpoint'], authenticated_userid(request), FEATURE) log.info(url) req = RequestWithMethod(url, 'PUT', json_payload, headers) res = urllib2.urlopen(req) resJSON = json.loads(res.read()) try: request.session['linkedin_access_token'] = accessToken request.session['linkedin_access_token_secret'] = accessTokenSecret except Exception, e: print e
def twitter_callback(request): # the oauth_token is request as a query arg; the auth_token_secret is in session store oauth_token = request.params['oauth_token'] oauth_token_secret = request.session['oauth_token_secret'] oauth_verifier = request.params['oauth_verifier'] # Step 3: Once the consumer has redirected the user back to the oauth_callback # URL you can request the access token the user has approved. You use the # request token to sign this request. After this is done you throw away the # request token and use the access token returned. You should store this # access token somewhere safe, like a database, for future use. consumer_key = oauth_config[FEATURE]['key'] consumer_secret = oauth_config[FEATURE]['secret'] consumer = oauth.Consumer(consumer_key, consumer_secret) token = oauth.Token(oauth_token, oauth_token_secret) client = oauth.Client(consumer, token) token.set_verifier(oauth_verifier) resp, content = client.request(oauth_config[FEATURE]['access_token_url'], "POST") access_token = dict(urlparse.parse_qsl(content)) # these are the real deal and need to be stored securely in the DB oauth_token = access_token['oauth_token'] oauth_token_secret = access_token['oauth_token_secret'] token = oauth.Token(oauth_token, oauth_token_secret) client = oauth.Client(consumer, token) userInfoJSON = json.loads(make_request(client, 'https://api.twitter.com/1/account/verify_credentials.json').decode('utf-8')) json_payload = json.dumps({'access_token': oauth_token, 'access_token_secret': oauth_token_secret, 'service_author_id': userInfoJSON['id']}) headers = {'Content-Type': 'application/json; charset=utf-8'} req = RequestWithMethod('%s/v1/authors/%s/services/%s' % (request.registry.settings['mi.api.endpoint'], authenticated_userid(request), FEATURE), 'PUT', json_payload, headers) try: res = urllib2.urlopen(req) resJSON = json.loads(res.read()) except HTTPError, e: # TODO: handle errors more gracefully here (caused by, e.g., API S3 bucket not existing) print e.read()
def init_connection(self): connection = self.json["updateContent"]["person"]["connections"]["values"][0] if connection["id"] != "private" and self._client is None: url = "%speople/id=%s:(headline,summary,picture-url)" % (self.oauth_config["endpoint"], connection["id"]) # request the user's updates json_str = make_request(self.get_oauth_client(), url, {"x-li-format": "json"}) json_obj = json_serializer.load_string(json_str) self.headline = json_obj.get("headline") self.summary = json_obj.get("summary") self.photo = json_obj.get("pictureUrl") else: self.headline = None self.summary = None self.photo = None
def get_author_profile(self, service_author_id, asm): asm = self.fetch_begin(service_author_id, asm) args = {'user_id': asm.service_author_id, 'include_entities': True} # Create our OAuth consumer instance if asm.access_token: consumer = oauth.Consumer(self.oauth_config['key'], self.oauth_config['secret']) token = oauth.Token(key=asm.access_token, secret=asm.access_token_secret) client = oauth.Client(consumer, token) url = '%s%s?%s' % (self.oauth_config['endpoint'], USER_INFO, urllib.urlencode(args)) # request the user's profile json_obj = json_serializer.load_string(make_request(client, url)) if asm.access_token \ else json_serializer.load(urllib2.urlopen(url)) profile_json = {} if 'name' in json_obj: profile_json['name'] = json_obj['name'] if 'location' in json_obj: profile_json['location'] = json_obj['location'] if 'profile_image_url' in json_obj: profile_json['picture_url'] = json_obj['profile_image_url'] if 'description' in json_obj: profile_json['headline'] = json_obj['description'] profile_json['public_profile_url'] = 'https://twitter.com/#!/%s' % json_obj['screen_name'] return profile_json
def twitter_callback(request): error_msg = None author_id = unauthenticated_userid(request) # the oauth_token is request as a query arg; the auth_token_secret is in session store oauth_token = request.params["oauth_token"] oauth_token_secret = request.session["oauth_token_secret"] oauth_verifier = request.params["oauth_verifier"] # Step 3: Once the consumer has redirected the user back to the oauth_callback # URL you can request the access token the user has approved. You use the # request token to sign this request. After this is done you throw away the # request token and use the access token returned. You should store this # access token somewhere safe, like a database, for future use. consumer_key = tim_config["oauth"][SERVICE]["key"] consumer_secret = tim_config["oauth"][SERVICE]["secret"] consumer = oauth.Consumer(consumer_key, consumer_secret) token = oauth.Token(oauth_token, oauth_token_secret) client = oauth.Client(consumer, token) token.set_verifier(oauth_verifier) resp, content = client.request(tim_config["oauth"][SERVICE]["access_token_url"], "POST") if resp["status"] != "200": raise Exception("Invalid response {status} ({message}).".format(status=resp["status"], message=content)) access_token = dict(urlparse.parse_qsl(content)) # these tokens are the real deal and need to be passed to the API oauth_token = access_token["oauth_token"] oauth_token_secret = access_token["oauth_token_secret"] token = oauth.Token(oauth_token, oauth_token_secret) client = oauth.Client(consumer, token) # get some information about the user from twitter url = "{endpoint}account/verify_credentials.json".format(endpoint=tim_config["oauth"][SERVICE]["endpoint"]) json_dict = json.loads(make_request(client, url).decode("utf-8")) url = "{endpoint}/v1/authors/{author}/services".format(endpoint=tim_config["api"]["endpoint"], author=author_id) payload = { "name": SERVICE, "access_token": oauth_token, "access_token_secret": oauth_token_secret, "service_author_id": json_dict["id"], } headers = {"content-type": "application/json; charset=utf-8"} cookies = request.cookies try: r = requests.post(url, data=json.dumps(payload), headers=headers, cookies=cookies) r.raise_for_status() except requests.exceptions.RequestException, e: log.error(e.message) if e.response.status_code == 409: error_msg = "Service already exists for this author ({message})".format(message=e.message)
def get_author_profile(self, service_author_id, asm): asm = self.fetch_begin(service_author_id, asm) # setup what we need for oauth consumer = oauth.Consumer(self.oauth_config['key'], self.oauth_config['secret']) if asm.access_token: token = oauth.Token(key=asm.access_token, secret=asm.access_token_secret) else: token = oauth.Token(self.oauth_config['user1_access_token'], self.oauth_config['user1_access_token_secret']) client = oauth.Client(consumer, token) url_path = self.PROFILE_INFO if asm.access_token \ else self.PUBLIC_PROFILE_INFO.format(urllib.quote(asm.service_author_id, '')) url = '%s%s' % (self.oauth_config['endpoint'], url_path) # request the user's profile json_obj = json_serializer.load_string(make_request(client, url, {'x-li-format': 'json'})) profile_json = {} firstName = lastName = '' if 'firstName' in json_obj: firstName = profile_json['first_name'] = json_obj['firstName'] if 'lastName' in json_obj: lastName = profile_json['last_name'] = json_obj['lastName'] # if we have a non-empty string add it to the json name = ('%s %s' % (firstName, lastName)).strip() if len(name) > 0: profile_json['name'] = name if 'industry' in json_obj: profile_json['industry'] = json_obj['industry'] if 'headline' in json_obj: profile_json['headline'] = json_obj['headline'] if 'pictureUrl' in json_obj: profile_json['picture_url'] = json_obj['pictureUrl'] if 'location' in json_obj and 'name' in json_obj['location']: profile_json['location'] = json_obj['location']['name'] if 'summary' in json_obj: profile_json['summary'] = json_obj['summary'] if 'specialties' in json_obj: profile_json['specialties'] = json_obj['specialties'] if 'publicProfileUrl' in json_obj: profile_json['public_profile_url'] = json_obj['publicProfileUrl'] if 'positions' in json_obj and 'values' in json_obj['positions']: positions = [] for position in json_obj['positions']['values']: position_json = {} if 'company' in position: if 'name' in position['company']: position_json['company'] = position['company']['name'] if 'industry' in position['company']: position_json['industry'] = position['company']['industry'] if 'summary' in position: position_json['summary'] = position['summary'] if 'title' in position: position_json['title'] = position['title'] positions.append(position_json) profile_json['positions'] = positions return profile_json
def build_one(self,afm,dbSession, oauthConfig, incremental): super(TwitterFullCollector, self).build_one(afm,dbSession,oauthConfig,incremental) # get the name of the author authorName = dbSession.query(Author.author_name).filter_by(id=afm.author_id).one() consumer = oauth.Consumer(oauthConfig['key'], oauthConfig['secret']) token = oauth.Token(afm.access_token, afm.access_token_secret) client = oauth.Client(consumer, token) auxData = json.loads(afm.auxillary_data) userId = int(auxData['id']) try: # API endpoing for querying user info url = '%s%s' % (oauthConfig['endpoint'],USER_INFO) userInfoJSON = json.loads(make_request(client,url)) twitterUserId = userInfoJSON['id'] if twitterUserId != userId: raise Exception("Bad state - mis-matched twitter user ids") profileImageUrl = userInfoJSON['profile_image_url'] if userInfoJSON.has_key('profile_image_url') else None traversal = self.beginTraversal(dbSession,afm,profileImageUrl) page = 1 # API endpoint for getting user timeline url = '%s%s?%s' % (oauthConfig['endpoint'],USER_TIMELINE,urllib.urlencode({'include_rts':'1','include_entities':'1','count':'200','page':page})) while url and traversal.totalAccepted < 200: content = make_request(client,url) try: rawJSON = json.loads(content) except: self.log.error('***ERROR*** parse error') self.log.error(content) continue if len(rawJSON) == 0: url = None continue for post in rawJSON: # process the item #print json.dumps(post, sort_keys=True, indent=2) event = Event.TwitterEvent(afm.author_id).fromJSON(post) self.writeEvent(event,traversal) # setup for the next page (if any) page = page + 1 url = '%s%s?%s' % (oauthConfig['endpoint'],USER_TIMELINE,urllib.urlencode({'include_rts':'1','include_entities':'1','count':'200','page':page})) self.endTraversal(traversal,authorName) except Exception, e: self.log.error('****ERROR****') self.log.error(e) dbSession.rollback() raise #continue
def fetch(self, service_author_id, callback): super(TwitterEventCollector, self).fetch(service_author_id, callback) state = self.fetch_begin(service_author_id) self.fetch_log_info(state) asm = state['asm'] args = {'include_rts': 1, 'include_entities': 1, 'trim_user': 1, 'count': 200} # use authenticated access if we can if asm.access_token: consumer = oauth.Consumer(self.oauth_config['key'], self.oauth_config['secret']) token = oauth.Token(asm.access_token, asm.access_token_secret) client = oauth.Client(consumer, token) else: args['user_id'] = asm.service_author_id if asm.most_recent_event_id: args['since_id'] = asm.most_recent_event_id # API endpoint for getting user timeline url = '%s%s?%s' % (self.oauth_config['endpoint'], USER_TIMELINE, urllib.urlencode(args)) min_age = datetime.utcnow() - self.NEW_LOOKBACK_WINDOW last_id = None while True: try: raw_json = json_serializer.load_string(make_request(client, url)) if asm.access_token \ else json_serializer.load(urllib2.urlopen(url)) except urllib2.URLError, e: logging.error('ERROR REQUEST URL: {0}'.format(url)) logging.error('ERROR REASON: {0}, {1}'.format(e.code, e.read())) raise # check if nothing returned and terminate loop if so if len(raw_json) == 0: break for post in raw_json: # process the item #print json.dumps(post, sort_keys=True, indent=2) interpreter = TwitterEventInterpreter(post, asm, self.oauth_config) last_id = interpreter.get_id() # terminate fetching any more events if we've gone beyond the lookback window if interpreter.get_create_time() < min_age: url = None break if self.screen_event(interpreter, state): callback(create_twitter_event(asm.author_id, CURRENT_STATE, service_author_id, interpreter.get_id(), post)) if not url: break # setup for the next page (if any) args['max_id'] = long(last_id) - 1 url = '%s%s?%s' % (self.oauth_config['endpoint'], USER_TIMELINE, urllib.urlencode(args))
def build_one(self, afm, dbSession, oauthConfig, incremental): super(LinkedInFullCollector, self).build_one(afm, dbSession, oauthConfig, incremental) # get the name of the author authorName = dbSession.query(Author.author_name).filter_by(id=afm.author_id).one() auxData = json.loads(afm.auxillary_data) userId = auxData['id'] # setup what we need for oauth consumer = oauth.Consumer(oauthConfig['key'], oauthConfig['secret']) token = oauth.Token(key=afm.access_token, secret=afm.access_token_secret) client = oauth.Client(consumer, token) try: # request the user's profile response = make_request(client, 'http://api.linkedin.com/v1/people/~:(picture-url)', {'x-li-format': 'json'}) respJSON = json.loads(response) profileImageURL = respJSON['pictureUrl'] if 'pictureUrl' in respJSON else None traversal = self.beginTraversal(dbSession, afm, profileImageURL) # optimization to request only those since we've last updated args = {'scope': 'self', 'type': ['APPS', 'CMPY', 'CONN', 'JOBS', 'JGRP', 'PICT', 'PRFX', 'RECU', 'PRFU', 'QSTN', 'SHAR', 'VIRL'], 'count': PAGE_SIZE} # incremental if traversal.baselineLastUpdateTime: # since a little before the last update time args['after'] = '%s000' % int(mktime(traversal.baselineLastUpdateTime.timetuple())) # full else: # limit to only one year of data args['after'] = '%s000' % int(mktime((traversal.now - FULL_LOOKBACK_WINDOW).timetuple())) offset = 0 # args['start'] = offset url = '%s?%s' % ('http://api.linkedin.com/v1/people/~/network/updates', urllib.urlencode(args, True)) while url and traversal.totalAccepted < 200: # request the user's updates content = make_request(client, url, {'x-li-format': 'json'}) try: rawJSON = json.loads(content) except: self.log.error('***ERROR*** parse error') self.log.error(content) continue # print json.dumps(rawJSON, sort_keys=True, indent=2) if rawJSON.get('_total', 0) == 0: url = None continue LinkedInEvent.eventsFromJSON(self, rawJSON, traversal, afm.author_id, userId, client) # setup for the next page (if any) if rawJSON['_total'] < PAGE_SIZE: url = None else: offset = offset + PAGE_SIZE # args['start'] = offset url = '%s?%s' % ('http://api.linkedin.com/v1/people/~/network/updates', urllib.urlencode(args, True)) self.endTraversal(traversal, authorName) except Exception, e: self.log.error('****ERROR****') self.log.error(e) dbSession.rollback() raise # continue
def get_author_profile(self,afm,db_session,oauth_config): # Create our OAuth consumer instance consumer = oauth.Consumer(oauth_config['key'], oauth_config['secret']) token = oauth.Token(key=afm.access_token,secret=afm.access_token_secret) client = oauth.Client(consumer, token) # request the user's profile response = make_request(client,'http://api.linkedin.com/v1/people/~:(first-name,last-name,headline,public-profile-url,picture-url,location:(name),industry,summary,specialties,associations,honors,interests,positions:(title,summary,company))',{'x-li-format':'json'}) respJSON = json.loads(response) # print json.dumps(respJSON, sort_keys=True, indent=2) profileJSON = {} firstName = lastName = '' if respJSON.has_key('firstName'): firstName = profileJSON['first_name'] = respJSON['firstName'] if respJSON.has_key('lastName'): lastName = profileJSON['last_name'] = respJSON['lastName'] # if we have a non-empty string add it to the json name = ('%s %s' % (firstName,lastName)).strip() if len(name) > 0: profileJSON['name'] = name if respJSON.has_key('industry'): profileJSON['industry'] = respJSON['industry'] if respJSON.has_key('headline'): profileJSON['headline'] = respJSON['headline'] if respJSON.has_key('pictureUrl'): profileJSON['picture_url'] = respJSON['pictureUrl'] if respJSON.has_key('location') and respJSON['location'].has_key('name'): profileJSON['location'] = respJSON['location']['name'] if respJSON.has_key('summary'): profileJSON['summary'] = respJSON['summary'] if respJSON.has_key('specialties'): profileJSON['specialties'] = respJSON['specialties'] if respJSON.has_key('publicProfileUrl'): profileJSON['public_profile_url'] = respJSON['publicProfileUrl'] if respJSON.has_key('positions') and respJSON['positions'].has_key('values'): positions = [] for position in respJSON['positions']['values']: positionJSON = {} if position.has_key('company'): if position['company'].has_key('name'): positionJSON['company'] = position['company']['name'] if position['company'].has_key('industry'): positionJSON['industry'] = position['company']['industry'] if position.has_key('summary'): positionJSON['summary'] = position['summary'] if position.has_key('title'): positionJSON['title'] = position['title'] positions.append(positionJSON) profileJSON['positions'] = positions return profileJSON
def fetch(self, service_author_id, callback): super(LinkedinEventCollector, self).fetch(service_author_id, callback) state = self.fetch_begin(service_author_id) self.fetch_log_info(state) asm = state['asm'] # if this author has no access_token they are unauthorized and we # don't collect LinkedIn events for them if not asm.access_token: return service_author_id = asm.service_author_id min_age = datetime.utcnow() - self.NEW_LOOKBACK_WINDOW # setup what we need for oauth consumer = oauth.Consumer(self.oauth_config['key'], self.oauth_config['secret']) token = oauth.Token(key=asm.access_token, secret=asm.access_token_secret) client = oauth.Client(consumer, token) args = {'scope': 'self', 'count': self.PAGE_SIZE} # get only events since last update or past year depending on if this # is the first collection of not if asm.most_recent_event_timestamp: after = calendar.timegm((asm.most_recent_event_timestamp - self.MOST_RECENT_OVERLAP).utctimetuple()) * 1000 else: after = calendar.timegm((datetime.utcnow() - self.NEW_LOOKBACK_WINDOW).utctimetuple()) * 1000 args['after'] = after offset = 0 args['start'] = offset url = '%s%s?%s' % (self.oauth_config['endpoint'], UPDATE_RESOURCE, urllib.urlencode(args, True)) total_count = 0 while url: # request the user's updates raw_json = json_serializer.load_string(make_request(client, url, {'x-li-format': 'json'})) if raw_json == None or raw_json.get('_total', 0) == 0: url = None break for post in raw_json.get('values', []): update_type = post['updateType'] if update_type in self.SUPPORTED_TYPES: if update_type == 'CONN' and post['updateContent']['person']['id'] == service_author_id: # the response can contain multiple connections that the member has made. We'll # separate them into individual responses postClone = copy.deepcopy(post) for connection in post['updateContent']['person']['connections']['values']: postClone['updateContent']['person']['connections'] = {"_total": 1, "values": [copy.deepcopy(connection)]} interpreter = LinkedinEventInterpreter(postClone, asm, self.oauth_config) if interpreter.get_create_time() < min_age: url = None break if self.screen_event(interpreter, state): callback(create_linkedin_event(asm.author_id, CURRENT_STATE, service_author_id, interpreter.get_id(), postClone)) elif (update_type == 'PREC' or update_type == 'SVPR') and post['updateContent']['person']['id'] == service_author_id: interpreter = LinkedinEventInterpreter(post, asm, self.oauth_config) if interpreter.get_create_time() < min_age: url = None break if self.screen_event(interpreter, state): callback(create_linkedin_event(asm.author_id, CURRENT_STATE, service_author_id, interpreter.get_id(), post)) elif update_type == 'SHAR': interpreter = LinkedinEventInterpreter(post, asm, self.oauth_config) if interpreter.get_create_time() < min_age: url = None break if self.screen_event(interpreter, state): callback(create_linkedin_event(asm.author_id, CURRENT_STATE, service_author_id, interpreter.get_id(), post)) elif update_type == 'MSFC' and post['updateContent']['companyPersonUpdate']['person']['id'] == service_author_id: interpreter = LinkedinEventInterpreter(post, asm, self.oauth_config) if interpreter.get_create_time() < min_age: url = None break if self.screen_event(interpreter, state): callback(create_linkedin_event(asm.author_id, CURRENT_STATE, service_author_id, interpreter.get_id(), post)) elif update_type == 'JOBP' and post['updateContent']['job']['jobPoster']['id'] == service_author_id: interpreter = LinkedinEventInterpreter(post, asm, self.oauth_config) if interpreter.get_create_time() < min_age: url = None break if self.screen_event(interpreter, state): callback(create_linkedin_event(asm.author_id, CURRENT_STATE, service_author_id, interpreter.get_id(), post)) elif update_type == 'JGRP' and post['updateContent']['person']['id'] == service_author_id: # the response can contain multiple groups that the member has joined. We'll # separate them into individual responses postClone = copy.deepcopy(post) for group in post['updateContent']['person']['memberGroups']['values']: postClone['updateContent']['person']['memberGroups'] = {"_total": 1, "values": [copy.deepcopy(group)]} interpreter = LinkedinEventInterpreter(postClone, asm, self.oauth_config) if interpreter.get_create_time() < min_age: url = None break if self.screen_event(interpreter, state): callback(create_linkedin_event(asm.author_id, CURRENT_STATE, service_author_id, interpreter.get_id(), postClone)) elif update_type == 'STAT' and post['updateContent']['person']['id'] == service_author_id: interpreter = LinkedinEventInterpreter(post, asm, self.oauth_config) if interpreter.get_create_time() < min_age: url = None break if self.screen_event(interpreter, state): callback(create_linkedin_event(asm.author_id, CURRENT_STATE, service_author_id, interpreter.get_id(), post)) else: if not update_type in self.IGNORED_TYPES: logging.warning('???? skipping linkedIn event: %s' % update_type) # if the url is None stop if not url: break # if the url is None stop if not url: break total_count = total_count + raw_json['_count'] if '_count' in raw_json else raw_json['_total'] if raw_json['_total'] == total_count: url = None break offset = offset + self.PAGE_SIZE args['start'] = offset url = '%s%s?%s' % (self.oauth_config['endpoint'], UPDATE_RESOURCE, urllib.urlencode(args, True)) print total_count # terminate the fetch self.fetch_end(state)