def linkedin(self): url = self.data['sites']['linkedin.com']['url'] nickname = self.data['sites']['linkedin.com']['nickname'] login = os.environ.get('LINKEDIN_LOGIN') password = os.environ.get('LINKEDIN_PASSWORD') api = Linkedin(login, password) result = {} profile = api.get_profile(nickname) result.update(profile) contact = api.get_profile_contact_info(nickname) result.update(contact) network = api.get_profile_network_info(nickname) result.update(network) skills = api.get_profile_skills(nickname) result.update({'skills': skills}) result.update({'url': url}) return result
def getName(account, link): api = Linkedin(account, 'Europa007') theURL = link.replace("\n", "") id = theURL.split("/")[4] profile = api.get_profile(id) lastname = profile['lastName'] firstname = profile['firstName'] name = "{} {}".format(firstname, lastname) return name
class LinkedinEngine: def __init__(self): self.api = Linkedin(LINKEDIN_USER_NAME, LINKEDIN_PASSWORD) def search_company(self, company_name): return self.api.search_people(keywords=company_name) def get_profile(self, user_name): time.sleep(random.randint(0, 3)) user_name = url_parse(user_name) get_contact_dict = self.api.get_profile_contact_info(user_name) get_profile_dict = self.api.get_profile(user_name) return toolz.merge(get_contact_dict, get_profile_dict)
def postJsonHandler(): content = request.get_json() api = Linkedin(content['email'],content['password']) connections=api.search_people(network_depth ='F',limit=5) querry=[] for item in connections: data = {} value = item['urn_id'] retrieve=api.get_profile(value) name=retrieve['firstName']+' '+retrieve['lastName'] link='https://www.linkedin.com/in/'+item['public_id']+'/' data['name'] = name data['link']=link querry.append(data) return jsonify(querry)
def do(self): # pass obj = User.objects.all() try: api = Linkedin('*****@*****.**', 'marvm123') # api = '' except: print("Unable to handle linkedin api check for network connections...") api = '' for i in obj: if i.linkedin_url: username = i.username profile_link = i.linkedin_url profile_link = profile_link.replace('https://www.linkedin.com/in/','').replace('/','') try: profile = api.get_profile(profile_link) getProfile(username,profile) time.sleep(2) except: profile = ""
def main(): global email global password print(email) print(password) options = get_options() keyword = options.keyword emailformat = options.emailformat seperator = options.seperator print("[+] emailformat : ", emailformat) print("[+] seperator : ", seperator) print("[+] keyword : ", keyword) print("[+] output example : firstname" + seperator + "lastname@" + emailformat) # Authenticate using any Linkedin account credentials api = Linkedin(email, password) people = api.search_people(keyword_company=keyword) print(people) exit() results = [] for person in people: #print(person['public_id']) try: user = api.get_profile(person['public_id']) except Exception as e: print(e) try: email = user['firstName'] + "." + user[ 'lastName'] + "@" + emailformat + "\n" results.append(email.lower()) print(user['firstName'] + "." + user['lastName']) except Exception as e: print(e) with open("mails_output.txt", "w+") as output: for i in results: output.write(i) output.write("\n")
import json from linkedin_api import Linkedin with open('credentials.json', 'r') as f: credentials = json.load(f) if credentials: linkedin = Linkedin(credentials['username'], credentials['password']) profile = linkedin.get_profile('ACoAABQ11fIBQLGQbB1V1XPBZJsRwfK5r1U2Rzw') profile['contact_info'] = \ linkedin.get_profile_contact_info('ACoAABQ11fIBQLGQbB1V1XPBZJsRwfK5r1U2Rzw') connections = linkedin.get_profile_connections(profile['profile_id'])
def linkedinextract(id): url11 = request.args.get('url11') anna = request.args.get('aname') anem = request.args.get('aemail') anad = request.args.get('aaddress') anph = request.args.get('aphone') anni = request.args.get('anic') skills1 = request.args.get('dskills') name1 = request.args.get('dname') projects1 = request.args.get('dproject') degree1 = request.args.get('ddeg') univercity1 = request.args.get('duni') experience11 = request.args.get('dexp') phone1 = request.args.get('dmobile') address1 = request.args.get('daddress') nic1 = request.args.get('daddress') print(url11) lname = [] lskills = [] lexperience = [] ldegree = [] luniversity = [] lemail1 = [] lmobile = [] # try: url11 = 'ravindu-landekumbura-19950214' linkedin1 = Linkedin('*****@*****.**', 'net@telecom') linkprofile = linkedin1.get_profile(url11) print(linkprofile) contact = linkedin1.get_profile_contact_info(url11) print(contact) lname = [] lname1o = linkprofile['firstName'] lname.append(json.dumps(lname1o)) # lname="Ravindu landekumbura" lskills = [] skills = (linkprofile['skills']) for skill in skills: z = skill['name'] ls = json.dumps(z) lskills.append(ls) lexperience = [] experience1 = (linkprofile['experience']) for ex in experience1: z = ex['companyName'], ex['title'] d = json.dumps(z) lexperience.append(d) university1 = (linkprofile['education']) print(university1) luniversity = [] ldegree = [] for sch in university1: school = sch['school'] print(school) nm = school['schoolName'] luniversity.append(nm) dm = sch['degreeName'] ldegree.append(dm) lluniversity = json.dumps(luniversity) lldegree = json.dumps(ldegree) lexperience1 = json.dumps(experience1) llskills = json.dumps(lskills) lemail1 = json.dumps(contact['email_address']) lmobile = json.dumps(contact['phone_numbers']) # except: print('cannot connect') mycursor.execute("SELECT email from user where id=%s;", [id]) rows = mycursor.fetchall() for ele in rows: email1 = json.dumps(ele[0]).replace('[]', "") if request.method == 'POST': uid = id uemail = request.form['uemail'] upassword = request.form['upassword'] sql2 = "INSERT INTO cv_reg (id,email,password) VALUES (%s, %s, %s)" val = (uid, uemail, upassword) mycursor.execute(sql2, val) mydb.commit() return redirect( url_for('clogin', id1=uid, nameu=uemail, passu=upassword)) return render_template('linkedin.html', urlx=url11, skillsx=skills1, namex=name1, emailx=email1, projectsx=projects1, degreesx=degree1, universityx=univercity1, experiencex=experience11, mobilex=phone1, addressx=address1, linkedinx=url11, nicx=nic1, namexx=lname, skillsxx=llskills, experiencexx=lexperience, unversityxx=lluniversity, degreexx=lldegree, emailxx=lemail1, mobilexx=lmobile, aname=anna, aemail=anem, aaddress=anad, aphone=anph, anic=anni, ski=lskills, len=len(lskills))
email = '*****@*****.**' password = '******' # In[4]: # Authenticate using any Linkedin account credentials api = Linkedin(email, password) # In[5]: username = '******' # In[6]: # Get a profile profile = api.get_profile(username) # # Extract Experience # In[7]: for a in profile['experience']: print('Company Name :', a['companyName']) print('Title :', a['title']) print('TimePeriod :', str(a['timePeriod']['startDate']['year'])) try: print('Description :', a['description']) except: pass print()
network_depth='F', limit=100) first_degree = [] for item in connections: # if (item['distance'] == 'DISTANCE_1'): # Increment the existing user's count. first_degree.append(item) querry = [] for item in first_degree: data = {} value = item['urn_id'] retrieve = api.get_profile(value) name = retrieve['firstName'] + ' ' + retrieve['lastName'] experience = retrieve['experience'] skills = retrieve['skills'] link = 'https://www.linkedin.com/in/' + item['public_id'] + '/' data['name'] = name data['link'] = link data['skills'] = skills data['experience'] = experience querry.append(data) with open('connections.txt', 'w') as file: file.write(json.dumps(first_degree)) with open('profileDetails.txt', 'w') as file: file.write(json.dumps(querry))
def getConnections(req: func.HttpRequest) -> func.HttpResponse: results=[] content = req.get_json() logging.info(content) tag = content['tag'].lower() keyword = content['keyword'].lower() email = content['email'] filteredConnections = [] if (path.exists(email+'.txt') == False): api = Linkedin(content['email'], content['password']) logging.info("Starting Search People") connections = api.search_people() for connection in connections: id = connection['public_id'] if (id not in filteredConnections and connection['distance'] == 'DISTANCE_1' and len(filteredConnections) <= 5): filteredConnections.append(id) for public_id in filteredConnections: retrieve = api.get_profile(public_id) name = retrieve['firstName']+' '+retrieve['lastName'] link = 'https://www.linkedin.com/in/'+public_id+'/' # imgr=retrieve['profilePictureOriginalImage']['com.linkedin.common.VectorImage']['rootUrl'] # imgi=retrieve['profilePictureOriginalImage']['com.linkedin.common.VectorImage']['artifacts'][1]['fileIdentifyingUrlPathSegment'] # data['img']=imgr+imgi data = {} data['name'] = name data['link'] = link data['initials'] = ''.join(letter[0].upper() for letter in name.split()) if ('skills' in retrieve): data['skills'] = retrieve['skills'] data['headline'] = retrieve['headline'] if ('experience' in retrieve and len(retrieve['experience']) > 0): data['company'] = retrieve['experience'][0]['companyName'] data['title'] = retrieve['experience'][0]['title'] if ('locationName' in retrieve): data['location'] = retrieve['locationName'] elif ('geoLocationName' in retrieve): data['location'] = retrieve['geoLocationName'] profilesInfo['people'].append(data) with open(email+'.txt', 'w') as file: file.write(json.dumps(profilesInfo)) with open(email+'.txt', 'r') as file: profiles = json.load(file) for profile in profiles['people']: if (len(results) >= 5): break if (tag == 'location' and 'location' in profile and keyword.lower() in profile['location'].lower()): data = populateData(profile) results.append(data) elif (tag == 'company' and 'company' in profile and keyword.lower() in profile['company'].lower()): data = populateData(profile) results.append(data) elif (tag == 'title' and 'title' in profile and keyword.lower() in profile['title'].lower()): data = populateData(profile) results.append(data) elif (tag == 'skills' and 'skills' in profile): skillist = profile['skills'] if any(keyword.lower() in s['name'].lower() for s in skillist[1:]): data = populateData(profile) results.append(data) return func.HttpResponse(json.dumps(results))
class Session: def __init__(self): self.version = '1.3.1' self.username = None self.password = None self.authenticated = False # sheet properties self.sheet_path = None self.sheet_type = None self.default_sheet_type = 'excel' # keep track of parse counts in memory self.total_parsed = 0 self.parsed = 0 # additional options self.log_filename = 'liscrape-log.log' self.ignore_duplicates = False self.debug = False # gui self.gui = GUI(self) # history, load validity self.history = History(self) self.history.history = self.history.load() self.history.check_validity() def start_log(self): logging.basicConfig(filename=self.log_filename, level=logging.DEBUG, format='%(asctime)s %(message)s', datefmt='%d/%m/%Y %H:%M:%S') def get_log_length(self): if not os.path.isfile(self.log_filename): return 0 with open(self.log_filename, 'r') as log_file: return sum(1 for row in log_file) def load_log(self): if self.get_log_length() == 0: return '-- Log is empty --\n' with open(session.log_filename, 'r') as log_file: return log_file.read() def clear_log(self): try: logging.shutdown() except Exception as e: sg.popup(traceback.format_exc()) logging.exception(f'Exception attempting to shutdown logging: {e}') return if os.path.isfile(self.log_filename): os.remove(self.log_filename) sg.popup(f'Log file {self.log_filename} successfully removed!') # restart log, refresh log length self.start_log() self.gui.window['log_length'].update( f'Log file length: {self.get_log_length()} lines') self.gui.window['output_window'].update(self.load_log()) else: sg.popup('Nothing to remove!') def remove_contacts(self): if os.path.isfile('linkedin_scrape.xlsx'): os.remove('linkedin_scrape.xlsx') sg.popup('Contacts file linkedin_scrape.xlsx removed!') if os.path.isfile('linkedin_scrape.csv'): os.remove('linkedin_scrape.csv') sg.popup('Contacts file linkedin_scrape.xlsx csv!') def clear_config(self): self.history.history = {} if os.path.isfile('config.json'): with open('config.json', 'r') as config_file: config = json.load(config_file) users = config['users'] with open('config.json', 'w') as config_file: config = {'users': users, 'history': {}, 'theme': None} json.dump(config, config_file, indent=4) sg.popup('Configuration file cleared!') def load_sheet_length(self): if not os.path.isfile(self.sheet_path): logging.info( f'Sheet {self.sheet_path} does not exist: returning total_parsed=0' ) self.total_parsed = 0 else: logging.info(f'Sheet {self.sheet_path} exists: getting length.') if self.sheet_type == 'csv': with open(self.sheet_path, 'r') as csv_file: csv_reader = csv.reader(csv_file) self.total_parsed = sum(1 for row in csv_reader) elif self.sheet_type == 'excel': df = pd.read_excel(self.sheet_path) self.total_parsed = len(df.index) return self.total_parsed def load_configuration(self): if not os.path.isfile('config.json'): return [] with open('config.json', 'r') as config_file: try: config = json.load(config_file) except Exception as error: logging.exception(error) os.remove('config.json') return () return tuple(config['users'].keys()) if len( config['users'].keys()) > 0 else () def load_theme(self): if not os.path.isfile('config.json'): return 'SystemDefault' with open('config.json', 'r') as config_file: try: config = json.load(config_file) except Exception as error: logging.exception(error) os.remove('config.json') return () try: return config[ 'theme'] if config['theme'] != None else 'SystemDefault' except KeyError: with open('config.json', 'r') as config_file: config = json.load(config_file) config['theme'] = None with open('config.json', 'w') as config_file: json.dump(config, config_file, indent=4) return self.load_theme() def save_theme(self, theme): if not os.path.isfile('config.json'): with open('config.json', 'w') as config_file: config = {'users': {}, 'history': {}, 'theme': None} else: with open('config.json', 'r') as config_file: config = json.load(config_file) config['theme'] = theme with open('config.json', 'w') as config_file: json.dump(config, config_file, indent=4) return True def load_password_from_config(self, username): with open('config.json', 'r') as config_file: config = json.load(config_file) try: return config['users'][username] except: sg.popup('Error finding password from configuration!', title='Error', keep_on_top=True) raise Exception('Error finding password from configuration!') def store_login(self, username, password): if not os.path.isfile('config.json'): with open('config.json', 'w') as config_file: config = {'users': {}, 'history': {}, 'theme': None} else: with open('config.json', 'r') as config_file: config = json.load(config_file) config['users'][username] = password with open('config.json', 'w') as config_file: json.dump(config, config_file, indent=4) return True def sign_in(self, username, password, remember_login, refresh_cookies): self.username = username self.password = password auth_success = self.authenticate(refresh_cookies) if self.authenticated and remember_login: success = self.store_login(username, password) if success: print('Login stored into config file successfully!') return auth_success def authenticate(self, refresh_cookies): try: self.application = Linkedin(self.username, self.password, debug=True, refresh_cookies=refresh_cookies) self.authenticated = True return True except Exception as error: logging.exception(error) if 'BAD_EMAIL' in error.args: sg.popup('Incorrect email: try again.', title='Incorrect email', keep_on_top=True) elif 'CHALLENGE' in error.args: sg.popup('Error: LinkedIn requires a sign-in challenge.', title='Linkedin error', keep_on_top=True) elif 'Expecting value: line 1 column 1 (char 0)' in error.args: sg.popup( 'Linkedin is refusing to sign in. Please try again later.', title='Unable to sign in', keep_on_top=True) else: sg.popup( f'Error arguments: {error.args}\n{traceback.format_exc()}', title='Unhandled exception', keep_on_top=True) return False # perform the API calls def linkedin_api_call(self, queue, event): while not event.is_set() or not queue.empty(): profile_url = queue.get() if not self.debug: try: # two API requests: profile and contact info profile = self.application.get_profile(profile_url) except Exception as error: logging.exception(f'Error loading profile: {error}') logging.info(traceback.format_exc()) return None try: contact_info = self.application.get_profile_contact_info( profile_url) except Exception as error: logging.exception(f'Error loading contact info: {error}') logging.info(traceback.format_exc()) contact_info = {} else: try: # a sample profile for debugging purposes profile = { 'lastName': 'SquarePants', 'firstName': 'SpongeBob', 'industryName': 'Professional retard', 'profile_id': f'DEBUG-{random.randint(0,99999)}' } contact_info = { 'email_address': '*****@*****.**', 'websites': ['*****@*****.**'], 'twitter': '@pants', 'phone_numbers': ['+001'] } except Exception as error: logging.exception(f'Error loading profile: {error}') print(f'⛔️ Error loading profile: {error}') return None self.store_profile(profile, contact_info) def store_profile(self, profile, contact_info): def set_diff(dict, full_set): ''' Calculate the difference between the full key set and the provided key set. Return the keys that exist in the dictionary, so the missing ones can be set to Nonetypes. ''' ignored_keys = {key for key in full_set if key not in dict.keys()} return full_set.difference(ignored_keys) # the full set of keys a complete profile would have profile_keys_full = { 'firstName', 'lastName', 'profile_id', 'headline', 'summary', 'industryName', 'geoCountryName', 'languages' } contact_keys_full = {'birthdate', 'email_address', 'phone_numbers'} # if the profile is lacking keys, replace their values with Nonetypes profile_keys = set_diff(profile, profile_keys_full) contact_keys = set_diff(contact_info, contact_keys_full) # map profile keys to CRM-compatible column names column_map = { 'firstName': 'First name', 'lastName': 'Last name', 'profile_id': 'Linkedin profile ID', 'headline': 'Linkedin headline', 'summary': 'Linkedin summary', 'industryName': 'Industry', 'geoCountryName': 'Location', 'languages': 'Languages', 'birthdate': 'Birthday', 'email_address': 'Email address', 'phone_numbers': 'Phone number' } # generate the profile: this is stored later profile_dict = {} # generate the profile_dict: map API resp. keys to column names, add Nonetypes for key in profile_keys_full: if key == 'languages' and key in profile_keys: # languages: a list of dictionaries with name:value try: if type(profile['languages']) == list: if len(profile['languages']) != 0: language_string = '' for dict in profile['languages']: language_string += dict['name'] language_string += ', ' profile['languages'] = language_string[0:-2] else: profile['languages'] = '' except Exception as e: profile['languages'] = '' logging.exception(f'Error setting language: {e}') logging.info(traceback.format_exc()) if key in profile_keys: profile_dict[column_map[key]] = profile[key] else: profile_dict[column_map[key]] = '' # same as above, but for contact keys for key in contact_keys_full: if key == 'phone_numbers' and key in contact_keys: try: for val in contact_info['phone_numbers']: if len(contact_info['phone_numbers']) > 0: numbers = '' for dict in contact_info['phone_numbers']: numbers += dict['number'] numbers += f' ({dict["type"]})' numbers += ', ' contact_info['phone_numbers'] = numbers[0:-2] except: contact_info['phone_numbers'] = '' if key in contact_keys: profile_dict[column_map[key]] = contact_info[key] else: profile_dict[column_map[key]] = '' logging.info(f'profile_dict generated: {profile_dict}') # if this contact is not a duplicate, or we are ignoring duplicates, continue: else, return if not self.history.add(profile_dict['Linkedin profile ID'], self.ignore_duplicates): #sg.popup('This profile has already been added: avoiding duplicate.', font=('Helvetica', 11), title='Duplicate', keep_on_top=True) print( f'⚠️ Duplicate detected ({profile_dict["Linkedin profile ID"]})\n' ) return if self.sheet_type == 'csv': field_names = profile_dict.keys() if not os.path.isfile( self.sheet_path) and self.sheet_type == 'csv': with open(self.sheet_path, 'w', newline='') as csv_file: csv.DictWriter(csv_file, fieldnames=field_names).writeheader() print(f'Created file: {self.sheet_path}') with open(self.sheet_path, 'a', newline='') as csv_file: csv.DictWriter(csv_file, fieldnames=field_names).writerow(profile_dict) elif self.sheet_type == 'excel': # convert dictionary to a dataframe for key, val in profile_dict.items(): profile_dict[key] = [val] try: df = pd.DataFrame(profile_dict, columns=column_map.values()) except Exception as error: logging.exception(f'Exception creating df: {error}') logging.info(traceback.format_exc()) # store (file exists) if os.path.isfile(self.sheet_path): try: book = load_workbook(self.sheet_path) with pd.ExcelWriter(self.sheet_path, engine='openpyxl') as writer: writer.book = book writer.sheets = { ws.title: ws for ws in book.worksheets } for sheetname in writer.sheets: df.to_excel( writer, sheet_name=sheetname, startrow=writer.sheets[sheetname].max_row, index=False, header=False) except Exception as e: logging.exception(f'Error storing profile in file: {e}') logging.info(traceback.format_exc()) else: try: with pd.ExcelWriter(self.sheet_path, engine='openpyxl') as writer: df.to_excel(writer, sheet_name='Sheet1', index=False, header=True) except Exception as e: logging.exception( f'Error storing first profile in file: {e}') logging.info(traceback.format_exc()) print( f'✅ Stored profile {profile_dict["Linkedin profile ID"]} to {self.sheet_path}\n' ) logging.info( f'Stored profile {profile_dict["Linkedin profile ID"]} to {self.sheet_path}' ) self.parsed += 1 self.total_parsed += 1
import urllib import requests import json from linkedin_api import Linkedin if __name__ == '__main__': api = Linkedin('*****@*****.**', 'DelhiBelly11#Snatch00') profile = api.get_profile('venkata-ratnadeep-suri') contact_info = api.get_profile_contact_info('venkata-ratnadeep-suri') connections = api.get_profile_connections('venkata-ratnadeep-suri') with open('venkata-ratnadeep-suri.txt', 'w') as p: json.dump(profile, p) with open('venkata-ratnadeep-suri', 'w') as c: json.dump(contact_info, c) with open('venkata-ratnadeep-suri', 'w') as connect: json.dump(connections, connect)
# # LinkedIn # In[9]: from linkedin_api import Linkedin api = Linkedin("*****@*****.**", "psosm1234") # In[2]: username = "******" # # Extract Education # In[3]: profile = api.get_profile(username) # returns a dictionary edu = profile["education"] # returns list of dictionaries # In[4]: # extracting education for analysing event timeline req = ["schoolName", "degreeName", "timePeriod", "fieldOfStudy"] for entity in edu: for requirement in req: if requirement in entity: print(requirement, ":", " ", entity[requirement]) print() # # Extract Experience
#https://github.com/tomquirk/linkedin-api/blob/master/DOCS.md #https://github.com/tomquirk/linkedin-api/blob/master/linkedin_api/linkedin.py #https://github.com/tomquirk/linkedin-api/ from linkedin_api import Linkedin api = Linkedin('login email', 'Password123$$') search = api.search_people( keywords='company name,sales manager,account executive', industries=['43']) array = [] for i in range(len(search)): try: if search[i]['distance'] == "DISTANCE_3": pass else: print(search[i]['public_id']) array.append((search[i]['public_id'])) except: pass array2 = [] for i in array[:10]: data = api.get_profile(i) array2.append(data) for i in array2: print(i["firstName"], i["lastName"], i["experience"][0]["companyName"]) #i["headline"])
def postJsonHandler(): content = request.get_json() api = Linkedin(content['email'], content['password']) tag = content['tag'].lower() keyword = content['keyword'].lower() connections = api.search_people(network_depth='F', limit=10) querry = [] for item in connections: if (len(querry) >= 5): break data = {} value = item['public_id'] retrieve = api.get_profile(value) if (tag == 'location'): if ('locationName' in retrieve): if (keyword in retrieve['locationName'].lower()): name = retrieve['firstName'] + ' ' + retrieve['lastName'] link = 'https://www.linkedin.com/in/' + item[ 'public_id'] + '/' #imgr=retrieve['profilePictureOriginalImage']['com.linkedin.common.VectorImage']['rootUrl'] #imgi=retrieve['profilePictureOriginalImage']['com.linkedin.common.VectorImage']['artifacts'][1]['fileIdentifyingUrlPathSegment'] data['name'] = name data['link'] = link data['initials'] = ''.join(letter[0].upper() for letter in name.split()) data['info'] = retrieve['summary'] #data['img']=imgr+imgi querry.append(data) elif (tag == 'company'): if ('experience' in retrieve): if (keyword in retrieve['experience'][0]['companyName'].lower()): name = retrieve['firstName'] + ' ' + retrieve['lastName'] link = 'https://www.linkedin.com/in/' + item[ 'public_id'] + '/' #imgr=retrieve['profilePictureOriginalImage']['com.linkedin.common.VectorImage']['rootUrl'] #imgi=retrieve['profilePictureOriginalImage']['com.linkedin.common.VectorImage']['artifacts'][1]['fileIdentifyingUrlPathSegment'] data['name'] = name data['link'] = link data['initials'] = ''.join(letter[0].upper() for letter in name.split()) data['info'] = retrieve['summary'] #data['img']=imgr+imgi querry.append(data) elif (tag == 'title'): if ('experience' in retrieve): if (keyword in retrieve['experience'][0]['title'].lower()): name = retrieve['firstName'] + ' ' + retrieve['lastName'] link = 'https://www.linkedin.com/in/' + item[ 'public_id'] + '/' #imgr=retrieve['profilePictureOriginalImage']['com.linkedin.common.VectorImage']['rootUrl'] #imgi=retrieve['profilePictureOriginalImage']['com.linkedin.common.VectorImage']['artifacts'][1]['fileIdentifyingUrlPathSegment'] data['name'] = name data['link'] = link data['initials'] = ''.join(letter[0].upper() for letter in name.split()) data['info'] = retrieve['summary'] #data['img']=imgr+imgi querry.append(data) elif (tag == 'skills'): if ('Skills' in retrieve): skillist = retrieve['Skills'] if any(keyword in s.lower() for s in skillist[1:]): name = retrieve['firstName'] + ' ' + retrieve['lastName'] link = 'https://www.linkedin.com/in/' + item[ 'public_id'] + '/' #imgr=retrieve['profilePictureOriginalImage']['com.linkedin.common.VectorImage']['rootUrl'] #imgi=retrieve['profilePictureOriginalImage']['com.linkedin.common.VectorImage']['artifacts'][1]['fileIdentifyingUrlPathSegment'] data['name'] = name data['link'] = link data['initials'] = ''.join(letter[0].upper() for letter in name.split()) data['info'] = retrieve['summary'] #data['img']=imgr+imgi querry.append(data) else: name = retrieve['firstName'] + ' ' + retrieve['lastName'] link = 'https://www.linkedin.com/in/' + item['public_id'] + '/' #imgr=retrieve['profilePictureOriginalImage']['com.linkedin.common.VectorImage']['rootUrl'] #imgi=retrieve['profilePictureOriginalImage']['com.linkedin.common.VectorImage']['artifacts'][1]['fileIdentifyingUrlPathSegment'] data['name'] = name data['link'] = link data['initials'] = ''.join(letter[0].upper() for letter in name.split()) if ('summary' in retrieve): data['info'] = retrieve['summary'] #data['img']=imgr+imgi querry.append(data) return jsonify(querry)
for k in range(lenSearch): print("k,key, profile",k,search_keys[k],profiles[k]) api = Linkedin(usern, passw, refresh_cookies=True) for k in range(lenSearch): #for k in range(4,5): for dictk in out_dict.keys(): out_dict[dictk].append("") if len(profiles[k])==0: out_dict["firstName"][-1] = (" ".join(fullnames[k])) out_dict["personCode"][-1] = personcodes[k] out_dict["firstName"][-1] = out_dict["firstName"][-1] .replace('`','') out_dict["nkeys"][-1] = 0 continue print(fullnames[k],profiles[k]) profile = api.get_profile(profiles[k][0]["public_id"]) # print(profile) out_dict["firstName"][-1] = profile["firstName"] if "firstName" in profile.keys() else "" out_dict["lastName"][-1] = profile["lastName"] if "lastName" in profile.keys() else "" out_dict["personCode"][-1] = personcodes[k] out_dict["linkedinProfile"][-1] = "/www.linkedin.com/in/"+profiles[k][0]["public_id"]+"/" out_dict["location"][-1] = profile["locationName"] if "locationName" in profile.keys() else "" out_dict["nkeys"][-1] = profiles[k][0]["nkeys"] #removing all backticks since they mess up exporting to stata format out_dict["firstName"][-1] = out_dict["firstName"][-1] .replace('`','') out_dict["lastName"][-1] = out_dict["lastName"][-1] .replace('`','') out_dict["personCode"][-1] = out_dict["personCode"][-1] .replace('`','')
import json from traitlets import link import os from linkedin_api import Linkedin import pandas as pd import time linkedin_api = Linkedin("*****@*****.**","password", refresh_cookies=True, debug=True) comapany = linkedin_api.get_company(public_id="google") comapanyid = int(comapany['url'].split('/')[len(comapany['url'].split('/')) - 1]) results = linkedin_api.search_people1(start=0,limit=10,current_company=comapanyid,regions="us:49",keywords="Software Engineer") print(len(results)) search_results = pd.DataFrame() for result in results: contact_info = linkedin_api.get_profile_contact_info(public_id=result['public_id']) profile = linkedin_api.get_profile(urn_id=result['urn_id']) data_firstname = profile['firstName'] data_lastname = profile['lastName'] data_url = "https://www.linkedin.com/in/%s" % \ result['public_id'] data_location = profile['locationName'] if "locationName" in profile else " " data_country = profile['geoCountryName'] if "geoCountryName" in profile else " " data_jobpost = profile['headline'] if "headline" in profile else " " data_exp = "" for exp in profile['experience']: data_exp += "[" data_exp += exp['locationName'] + "|" if "locationName" in exp else " " data_exp += exp['companyName'] + "|" if "companyName" in exp else " " data_exp += str(exp['timePeriod']['startDate']['month']) + " " if "timePeriod" in exp and "startDate" in exp['timePeriod'] and "month" in exp['timePeriod']['startDate'] else " " data_exp += str(exp['timePeriod']['startDate']['year']) + "|" if "timePeriod" in exp and "startDate" in exp['timePeriod'] and "year" in exp['timePeriod']['startDate'] else " " data_exp += str(exp['timePeriod']['endDate']['month']) + " " if "timePeriod" in exp and "endDate" in exp['timePeriod'] and "month" in exp['timePeriod']['endDate'] else " "
#using linkedin-api grabs profile info, takes in public profile id. this is like www.linkedin.com/in/***your_profile_name_11101*** from linkedin_api import Linkedin api = Linkedin('your@email', 'your_pass!') #get profiles from public ids, private ids will be included in results import pandas as pd df = pd.read_csv('linkedin_input.csv') ids = list(df['id']) profile_data = [] from random import randint from time import sleep for i in ids: profile_info = api.get_profile(i) print('getting', i) profile_data.append(profile_info) sleep(randint(2, 6)) #keys returned: ['summary', 'industryName', 'lastName', 'locationName', #'student', 'elt', 'industryUrn', 'firstName', 'entityUrn', #'location', 'headline', 'displayPictureUrl', 'profile_id', #'experience', 'skills', 'education'] ids = [] summaries = [] locations = [] headlines = [] current_company = [] industry = []
continue title.add(row[1]) csvfile.close() p_list = [] for t in title: p_list.append(t.split('/')[-2]) # Authenticate using any Linkedin account credentials api = Linkedin('*****@*****.**', 'Hello!11') pro_list = [] n = 1 for p in p_list: profile = api.get_profile(p) pro_list.append(profile) print(n, ' ', p) n += 1 p_list = [] with open('profile_list_remains.txt', 'r') as f: lines = f.readlines() for line in lines: # GET a profile p_list.append(line.strip()) import json with open('143dataanother146.json', 'w') as f:
"education1_schoolName": [], "education1_degreeName": [], "education1_fieldOfStudy": [], "education1_grade": [], "education1_startDate": [], "education1_endDate": [], "education1_description": [], "education2_schoolName": [], "education2_degreeName": [], "education2_fieldOfStudy": [], "education2_grade": [], "education2_startDate": [], "education2_endDate": [], "education2_description": [], "education3_schoolName": [], "education3_degreeName": [], "education3_fieldOfStudy": [], "education3_grade": [], "education3_startDate": [], "education3_endDate": [], "education3_description": [], "education4_schoolName": [], "education4_degreeName": [], "education4_fieldOfStudy": [], "education4_grade": [], "education4_startDate": [], "education4_endDate": [], "education4_description": [], "education5_schoolName": [], "education5_degreeName": [], "education5_fieldOfStudy": [], "education5_grade": [], "education5_startDate": [], "education5_endDate": [], "education5_description": [], } ###linkedinProfile="/www.linkedin.com/in/"+profiles[k][0]["public_id"]+"/" lenSearch=len(indexes) api = Linkedin(usern, passw, refresh_cookies=True) for k in range(lenSearch): for dictk in out_dict.keys(): out_dict[dictk].append("") profile = api.get_profile(profileurls[k].split('/')[4]) #get public_id from profile url if len(profile)==0: out_dict["firstName"][-1] = (" ".join(fullnames[k])) out_dict["personCode"][-1] = personcodes[k] out_dict["firstName"][-1] = out_dict["firstName"][-1] .replace('`','') out_dict["nkeys"][-1] = 0 out_dict["index"][-1] = indexes[k] continue out_dict["firstName"][-1] = profile["firstName"] if "firstName" in profile.keys() else "" out_dict["lastName"][-1] = profile["lastName"] if "lastName" in profile.keys() else "" out_dict["personCode"][-1] = personcodes[k] out_dict["linkedinProfile"][-1] = profileurls[k] out_dict["location"][-1] = profile["locationName"] if "locationName" in profile.keys() else "" out_dict["nkeys"][-1] = -1 out_dict["index"][-1] = indexes[k]