def get_company_profile(cookie,company_id,keyword): if keyword == None: url='https://www.linkedin.com/voyager/api/search/cluster?count=40&guides=List(v->PEOPLE,facetCurrentCompany->%s)&origin=OTHER&q=guided&start=0' % company_id else: url = "https://www.linkedin.com/voyager/api/search/cluster?count=40&guides=List(v->PEOPLE,facetCurrentCompany->%s)&keywords=%s&origin=OTHER&q=guided&start=0" % (company_id,keyword) data=http.connect(url,cookie) if data == None: logger.red('Unable to authenticate to LinkedIn') quit() return data.text
def company_profile(cookie,company_id,keyword): # This function requests the companies profile and returns the data if keyword == None: url='https://www.linkedin.com/voyager/api/search/cluster?count=40&guides=List(v->PEOPLE,facetCurrentCompany->%s)&origin=OTHER&q=guided&start=0' % company_id logger.debug('Requesting %s from company_profile()' % url) else: url = "https://www.linkedin.com/voyager/api/search/cluster?count=40&guides=List(v->PEOPLE,facetCurrentCompany->%s)&keywords=%s&origin=OTHER&q=guided&start=0" % (company_id,keyword) logger.debug('Requesting %s from company_profile()' % url) data=http.connect(url,cookie) if data == None: logger.red('Unable to authenticate to LinkedIn') quit() return data.text
def user_data(results, pages, cookie, company_id, domain, email_format): # Every page returns a dictionary of data, each dictionary is added to this list. users_per_page = [] for page in range(0, pages + 1): if page + 1 == 25: break if results < 40: # This method pulls 40 results per page. If the available results is less then 40 # Set results_per_age to whatever the number is results_per_page = results results_to_fetch = results else: # However, if the amount of available results is higher than the per page limit, set the per page limit to the max (40) results_per_page = 40 # Every time this is hit, the start point in the api is incremented. First, it gets 0 - 40, then 40 - 80 and so on. # This can be dynamically figured out by multiplying the page number (1) by the results_per_page (40). results_to_fetch = results_per_page * page # In order to stop this loop from requesting more than is available, and then breaking it, this if statement limits that: if results_to_fetch >= results: break url = "https://www.linkedin.com/voyager/api/search/cluster?count=40&guides=List(v->PEOPLE,facetCurrentCompany->%s)&origin=OTHER&q=guided&start=%s" % ( company_id, results_to_fetch) logger.blue('Pulling from page %s' % logger.BLUE(page)) data = http.connect(url, cookie) result = data.text.encode('UTF-8') try: result = json.loads(result) except Exception as e: x = str(e) logger.red(e) quit() users = extract_data(result, domain, email_format) users_per_page.append(users) return users_per_page
def get_users(data,pages,total_employees,keyword): #Grab the user data per page cookie = data.cookie company_id = data.company_id email_format = data.email_format keyword = data.keyword domain = data.domain validation = data.validation api_key = data.api_key # Every page returns a dictionary of data, each dictionary is added to this list. people_on_this_page=0 logger.debug(str(vars(data))) userdata_per_page = [] for page in range(0,pages+1): if page+1 == 25: logger.debug('Breaking, pages exceed 25') break if total_employees < 40: logger.debug('Locking users per page to match total_employees') # This method pulls 40 total_employees per page. If the available total_employees is less then 40 # Set total_employees_per_age to whatever the number is total_employees_per_page = total_employees total_employees_to_fetch = total_employees else: logger.debug('Locking users per page to 40') # However, if the amount of available total_employees is higher than the per page limit, set the per page limit to the max (40) total_employees_per_page = 40 # Every time this is hit, the start point in the api is incremented. First, it gets 0 - 40, then 40 - 80 and so on. # This can be dynamically figured out by multiplying the page number (1) by the total_employees_per_page (40). total_employees_to_fetch = total_employees_per_page * page # In order to stop this loop from requesting more than is available, and then breaking it, this if statement limits that: if total_employees_to_fetch >= total_employees: break # Loop over pages if keyword == None: # Experimental if statement, this request should work at this point(?) logger.debug('No keyword set [getting user per page]') url = "https://www.linkedin.com/voyager/api/search/cluster?count=40&guides=List(v->PEOPLE,facetCurrentCompany->%s)&origin=OTHER&q=guided&start=%s" % (company_id,total_employees_to_fetch) else: # In theory, this will only grab users per page with the keyword logger.debug('Using keyword %s' % logger.MAGENTA(keyword)) url = "https://www.linkedin.com/voyager/api/search/cluster?count=40&guides=List(v->PEOPLE,facetCurrentCompany->%s)&keywords=%s&origin=OTHER&q=guided&start=%s" % (company_id,keyword,total_employees_to_fetch) logger.debug('Requesting %s from get_users()' % url) logger.blue('Pulling from page %s' % logger.BLUE(page+1)) api_response=http.connect(url,cookie) result = api_response.text.encode('UTF-8') try: result = json.loads(result) #contains data for ~40 people except Exception as e: print(e) quit() people_on_this_page=people_on_this_page+len(result['elements'][0]['elements']) if people_on_this_page > 0: logger.green('Successfully pulled %s users' % logger.GREEN(str(people_on_this_page))) userdata_per_page.append(result) # This part could do with threading users = parse_users(data,userdata_per_page,total_employees) logger.debug('Sending list of json objects to parse_users()') return users