def person_profile(url: str) -> dict: """Fetch a person's profile based on their LI URL.""" driver = ChromeDriver.get() person = Person(url, driver) person.scrape(close_on_complete=False, timeout=10) sleep(2+5*random()) # should be safe to go back once; needed for search and profile to work. driver.execute_script("window.history.go(-1)") return person.to_dict()
def get_profile_linkedin(driver, url): """Scrapes a person""" person = Person(linkedin_url=url, name=None, about=[], experiences=[], educations=[], interests=[], accomplishments=[], company=None, job_title=None, driver=driver, scrape=False) person.scrape(close_on_complete=False) return person
def for_single_url(url): chrome_options = Options() chrome_options.add_argument("--headless") path_webdriver = "/home/ravishekhartiwari/Desktop/LINKDIN_SCRAPP/chromedriver_linux64/chromedriver" driver = webdriver.Chrome(executable_path=path_webdriver, chrome_options=chrome_options) #email = "" #password = "" #actions.login(driver,email,password) # if email and password isnt given, it'll prompt in terminal actions.login(driver) person = Person(url, driver=driver) person_details = { "name": person.name, "experiences": person.experiences, "education": person.educations, "company": person.company, "job_title": person.job_title, "about": person.about, "linkdin_url": person.linkedin_url } #person.scrape(close_on_complete=True) #driver.close() return (person_details)
def get_linkedin_user_profile(url): browser = webdriver.Chrome(ChromeDriverManager().install()) email = "*****@*****.**" password = "******" actions.login( browser, email, password) # if email and password isnt given, it'll prompt in terminal person = Person(url, driver=browser) return person
def handle(self, *args, **options): driver = driver_factory() for user in tqdm( User.objects.filter(profile__linkedin__isnull=False).exclude( profile__linkedin__in=["", " ", "\n"])): user_data = { "name": "", "about": "", "experiences": [], "education": [], "interests": [], } try: person = Person( user.profile.linkedin, contacts=[], driver=driver, close_on_complete=False, ) user_data["name"] = person.name user_data["about"] = person.about for experience in person.experiences: user_data["experiences"].append({ "description": experience.description, "position_title": experience.position_title.replace( "Nome da empresa\n", ""), "duration": experience.duration, }) for education in person.educations: user_data["educations"].append({ "from_date": education.from_date, "to_date": education.to_date, "degree": education.degree, "company": education.company, }) user_data["interests"] = [ interest.title for interest in person.interests ] ProfilerData.objects.get_or_create(user=user, linkedin_data=user_data) except Exception as e: pass driver.close()
def main(): # # get image # actions.login(driver, email, password) # if email and password is nt given, it'll prompt in terminal person = Person("https://www.linkedin.com/in/namachi", driver=driver) # a = Person("https://www.linkedin.com/in/sornam-arumugam-48943715", driver=driver) with open("output_data.csv", "w") as out_file: print_person(person) out_file.write(str(person))
def main(): driver = webdriver.Firefox() email = "*****@*****.**" password = "******" actions.login( driver, email, password) # if email and password isnt given, it'll prompt in terminal # person = Person("https://www.linkedin.com/in/andre-iguodala-65b48ab5", driver=driver) # person = Person("https://www.linkedin.com/in/ahmet-aydin-41a37a111", driver=driver, get=True, # close_on_complete=False) person = Person("https://www.linkedin.com/in/alican-d%C3%B6nmez-00549098/", driver=driver, get=True, close_on_complete=False) print(person)
def get_information_bulk(data): chrome_options = Options() chrome_options.add_argument("--headless") path_webdriver = "/home/ravishekhartiwari/Desktop/LINKDIN_SCRAPP/chromedriver_linux64/chromedriver" driver = webdriver.Chrome(executable_path=path_webdriver, chrome_options=chrome_options) #email = "" #password = "" #actions.login(driver,email,password) # if email and password isnt given, it'll prompt in terminal actions.login(driver) sheet_obj = data.active m_row = sheet_obj.max_row count = 0 name = [] experiences = [] education = [] company = [] job_title = [] about = [] linkdin_url = [] for i in range(1, m_row + 1): cell_obj = sheet_obj.cell(row=i, column=1) url = cell_obj.value person = Person((str(url)).strip(), driver=driver) name.append(person.name) experiences.append(person.experiences) education.append(person.educations) company.append(person.company) job_title.append(person.job_title) about.append(person.about) linkdin_url.append(person.linkedin_url) #driver.close() return (name, experiences, education, company, job_title, about, linkdin_url, driver)
def scrape(proxy_ip, lista, username=Settings.username, password=Settings.password): first = True lista_result = [] for persona in lista: print("Starting " + proxy_ip) try: for element in lista: PROXY = str(proxy_ip) if Settings.Proxy_request: webdriver.DesiredCapabilities.CHROME['proxy'] = { "httpProxy": PROXY, "ftpProxy": PROXY, "sslProxy": PROXY, "noProxy": None, "proxyType": "MANUAL", "autodetect": False } if first: first = False driver = webdriver.Chrome(ChromeDriverManager().install()) actions.login( driver, username, password ) # if email and password isnt given, it'll prompt in terminal person = Person(persona, driver=driver, close_on_complete=False) print(person) lista_result.append(person) except Exception as e: print(e) try: driver.quit() except Exception as e: print(e) print("FINE DEL THREADS") return lista_result
def processProfile(driver, data, index, pause_cnt): if pause_cnt and index % pause_cnt == 0: pause(driver) csv_output = [] try: print('Processing profile ' + str(index) + ' ... ' + str(data[0])) profile = Person(data[0], driver=driver, scrape=False) profile.experiences = [] # Needed due to a bug in the library profile.educations = [] # Needed due to a bug in the library profile.scrape(close_on_complete=False) for ed in profile.educations: new_row = data[1:] # Dump any eventual pre-existing cols new_row.extend([ 'education', ed.institution_name, ed.from_date[38:], # Needed due to a bug in the library ed.to_date ]) csv_output.append(new_row) for w in profile.experiences: new_row = data[1:] # Dump any eventual pre-existing cols new_row.extend([ 'work', w.institution_name, w.position_title, w.description, w.from_date[15:], # Needed due to a bug in the library w.to_date ]) csv_output.append(new_row) except: e = sys.exc_info()[0] print('Error processing ' + str(data[0]) + ' error: ' + str(e)) csv_output.append([data[0], 'error', e]) pass return csv_output
from linkedin_scraper import ( actions, Person ) from selenium import webdriver from selenium.webdriver.chrome.options import Options options = Options() # options.binary_location = '/usr/bin/google-chrome-stable' options.binary_location = '/usr/bin/brave-browser-stable' driver = webdriver.Chrome(executable_path='/usr/local/bin/chromedriver', options=options) email = "" password = "" actions.login(driver, email, password) person = Person( "https://www.linkedin.com/in/andre-iguodala-65b48ab5", driver=driver) # person.scrape(close_on_complete=False) print(person)
profileUrl = [] check = 'https://www.linkedin.com/' with open('profiler.csv', newline='', encoding="mbcs") as f: reader = csv.reader(f, delimiter=';') for row in reader: res = [idx for idx in row if idx.lower().startswith(check.lower())] profileUrl.append(res) conn = sqlite3.connect('profiler.db') c = conn.cursor() count = 1 while True: urls = ''.join(profileUrl[count]) person = Person(urls, driver=driver, scrape=False) person.scrape(close_on_complete=False) count = count + 1 name = ''.join(person.name) about = ' '.join(person.about) personUrl = ''.join(person.linkedin_url) personExperience = '; '.join(map(str, person.experiences)) personSkills = '; '.join(person.skills) if person.job_title == "Utvecklare" or "Testare" or "Testautomatiserare" or "Software Tester" or "Automationsutvecklare" or "Automations developer": c.execute('INSERT INTO persons VALUES (?, ?, ?, ?, ?)', (name, about, personUrl, personExperience, personSkills)) conn.commit()
import os from linkedin_scraper import Person, actions from selenium import webdriver from selenium.webdriver.chrome.options import Options chrome_options = Options() chrome_options.add_argument("--headless") driver = webdriver.Chrome("./chromedriver", options=chrome_options) email = os.getenv("LINKEDIN_USER") password = os.getenv("LINKEDIN_PASSWORD") actions.login(driver, email, password) # if email and password isnt given, it'll prompt in terminal person = Person("https://www.linkedin.com/in/adrian0350", contacts=[], driver=driver) print("Person: " + person.name) print("Person contacts: ") for contact in person.contacts: print("Contact: " + contact.name + " - " + contact.occupation + " -> " + contact.url)
import csv, os, json import requests from exceptions import ValueError from time import sleep import scrapper_lib companyurls = ['https://www.linkedin.com/company/tata-consultancy-services'] extracted_data = [] #for url in companyurls: # extracted_data.append(scrapper_lib.linkedin_companies_parser(url)) # f = open('data.json', 'w') # json.dump(extracted_data, f, indent=4) # #import urllib2 as URL # #urlopener= URL.build_opener() #urlopener.addheaders = [('User-agent', 'Mozilla/5.0')] #html= urlopener.open('https://www.linkedin.com/in/manuwhs').read() from subprocess import call #call(["export CHROMEDRIVER=~/chromedriver", ""]) os.system("export CHROMEDRIVER=~/chromedriver") from selenium import webdriver driver = webdriver.Chrome('./chromedriver') from linkedin_scraper import Person person = Person("https://www.linkedin.com/in/manuwhs")
from linkedin_scraper import Person rick_fox = Person("https://www.linkedin.com/in/rifox?trk=pub-pbmap") iggy = Person("https://www.linkedin.com/in/andre-iguodala-65b48ab5")
with open(outfilepath, 'a+', newline='') as csvfile: writer = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_ALL) writer.writerow([person]) # loop through the URLs people = [] failed_urls = [] invalid_urls = [] person = None for url in urls: try: person = Person(url, driver=driver, close_on_complete=False) name = person.name # Picking experience if len(person.experiences) > 0: title = person.experiences[0].position_title.decode( 'utf8' ) if person.experiences[0].position_title is not None else '' company = person.experiences[0].institution_name.decode( 'utf8' ) if person.experiences[0].institution_name is not None else '' else: title = "" company = "" # Picking education
from linkedin_scraper import Person rick_fox = Person("https://www.linkedin.com/in/rifox?trk=pub-pbmap") iggy = Person("https://www.linkedin.com/in/andre-iguodala-65b48ab5") Anirudra = Person("https://in.linkedin.com/in/anirudra-choudhury-109635b1")
# Rather than doing the scraping ourselves, let the library do the work # We just need to get the link to the profile, which should be a href link # in the html. src = browser.page_source soup = BeautifulSoup(src, 'lxml') # Use BeautifulSoup to get the linkedin profile link (may not work) # Need some more testing to make sure it works consistently link = soup.find( 'a', { 'class': 'app-aware-link ember-view search-result__result-link' }).get('href') print(link) # Using the linkedin_scraper api to do the scraping linkedin_person = Person(link, driver=browser, scrape=False) linkedin_person.scrape(close_on_complete=False) print(linkedin_person) # testing keyword search # keyword search works, need to convery linkedin_person # into a string, and keyword search is case sensitive # so must convert entire scraped profile and keywords into lower case profile = str(linkedin_person).lower() if ("black" in profile): print("keyword search works") else: print("does not work") # For a list of people use a dictionary with name + company as key and # profile results as value
email = "" password = "" file_p = open("table.csv", "a+") driver = webdriver.Chrome("/usr/bin/chromedriver", chrome_options=chrome_options) actions.login( driver, email, password) # if email and password isnt given, it'll prompt in terminal # people_jaana.txt is a text file which has the links for each profile with open("people_jaana.txt") as fp: lines = fp.readlines() for line in lines: try: person = Person(line, driver=driver) file_str = person.name if (len(person.experiences)): file_str += "," + person.experiences[0].position_title.decode( "utf-8") + "," + person.experiences[ 0].institution_name.decode("utf-8") else: file_str += "," + "," file_str += "," + line print(file_str) file_p.write(file_str) except: print(line) file_p.close() driver.close()
def scrap(self, url): self.person = Person(url, driver=self.driver)
"https://www.linkedin.com/in/melinda-brown/"] ''' for pizza in linkedin_urls: person = Person(linkedin_url = pizza, driver=driver, scrape = False) person.scrape(close_on_complete=False) dict_row={'person':person} dict_array.append(dict_row) sleep(randrange(5, 10)) ''' victoria = Person(linkedin_url = "https://www.linkedin.com/in/victoriakythai/", driver=driver, scrape = False) victoria.scrape(close_on_complete=False) pizza1 = " ".join(str(x) for x in [victoria]) dict_row={'person':pizza1} dict_array.append(dict_row) driver.delete_all_cookies() driver.get("https://www.linkedin.com") email_button = driver.find_element_by_xpath('//input[@id="login-email"]') email_button.send_keys(user_email) sleep(randrange(5,9)) pass_button = driver.find_element_by_xpath('//input[@id="login-password"]') pass_button.send_keys(password) sleep(randrange(5,9))
print(" Capturando coordenadas - Região: " + linha['address'] + " lat:" + str(lat) + " long:" + str(lng)) sql = "update customers set address_latitude=%s,address_longitude=%s where userId=%s" cursor.execute(sql, (lat, lng, linha['userId'])) conexao.commit() print('') print("Buscando informações de redes sociais...") print('') # Analisa localizacao e perfil rede social. for linha in resultado: print('') print(' Capturando perfil: ' + linha['linkedin']) person = Person(linha['linkedin'], driver=driver, scrape=False) person.scrape(close_on_complete=False) experiences = str(person.experiences[0]) inicio = experiences.find(" at ") token_end = experiences.find(" from ") companyCrawled = experiences[inicio + 4:token_end] r = requests.get( 'https://maps.googleapis.com/maps/api/place/findplacefromtext/json?input=' + companyCrawled + '&inputtype=textquery&fields=photos,formatted_address,name,rating,opening_hours,geometry&key=' + GOOGLE_MAPS_KEY)
browser.get(url) email = browser.find_element_by_xpath('//*[@id="session_key-login"]') email.send_keys('*****@*****.**') time.sleep(2) password = browser.find_element_by_xpath('//*[@id="session_password-login"]') password.send_keys('marigi@98') time.sleep(2) button = browser.find_element_by_xpath('//*[@id="btn-primary"]') button.click() time.sleep(5) person = Person("https://www.linkedin.com/in/andre-iguodala-65b48ab5", driver=browser, scrape=False) person.scrape(close_on_complete=False) browser.close() # browser.get('https://www.linkedin.com/search/results/people/?facetNetwork=%5B%22F%22%5D&origin=MEMBER_PROFILE_CANNED_SEARCH') # # connections = browser.find_element_by_xpath('//*[@id="ember5061"]/span[1]/span') # time.sleep(10) # # print(connections) # # browser.close() # person = Person('https://www.linkedin.com/in/brandeddavid/', driver=browser, scrape=True)
from linkedin_scraper import Person, actions from selenium import webdriver path="/home/ravishekhartiwari/Desktop/LINKDIN_SCRAPP/chromedriver_linux64/chromedriver" driver = webdriver.Chrome(executable_path=path) #email = "*****@*****.**" #password = "******" actions.login(driver, email, password) # if email and password isnt given, it'll prompt in terminal person = Person() actions.scrape(close_on_complete=True)
if (profile_link is None): members_dict[person] = "None" continue # get the link profile_link = profile_link.get('href') # Need an exception block because sometimes when no search # results appear it was still able to get a link which gives # an error to the web scraper because there are no fields # it can extract try: # Must set the fields as empty list because otherwise each web scraped result # will just append to the previous results and by the end, you will just have # a really long and overlapping profile of everyone you have scraped scraping = Person(profile_link, about=[], experiences=[], educations=[], interests=[], accomplishments=[], driver=browser,scrape=False) scraping.scrape(close_on_complete=False) results = str(scraping).lower() # gives it some time to process time.sleep(1) # Create a list and for each keyword that is in the # scraped results, append it, and at the end add it # as the value with the name of the individual as the key dict_list = [] for keyword in keywords: if (keyword in results): dict_list.append(keyword) members_dict[person] = dict_list
# Finds the person through search # Search and Lookup result documentation: # https://github.com/rocketreach/rocketreach_python/blob/master/rocketreach/person.py rr = rocketreach.Gateway(rocketreach.GatewayConfig('apikey')) # # Code for Searches (does not return linkedin link) s = rr.person.search().filter(name="") result = s.execute() for person in result.people: print(person) # Code for lookups f = open("info.txt","w") result = rr.person.lookup(person_id=id) if result.is_success: person = result.person f.write("ID: "+ person.id) f.write("Name: "+ person.name) f.write("Employer: "+ person.current_employer) f.write("LinkedIn: "+ person.linkedin_url) # linkedin_urls.append(person.linkedin_url) # LinkedIn Scraper driver = webdriver.Chrome() linkedin_urls = ["https://www.linkedin.com/in/teri-williams-cohee-99811029"] actions.login(driver, "username", "password") linkedin_person = Person(linkedin_urls[0], driver=driver,scrape=False) linkedin_person.scrape(close_on_complete=False) print(linkedin_person)
def main(): linkedin_urls = read_csv('only_linkedin_urls.csv') driver = browse('https://www.linkedin.com') connect(driver, '*****@*****.**', '123tototorres&') # for normal user behavior in the website driver.execute_script( "(function(){try{for(i in document.getElementsByTagName('a')){let el = document.getElementsByTagName('a')[i]; " "if(el.innerHTML.includes('Contact info')){el.click();}}}catch(e){}})()" ) # Wait 5 seconds for the page to load time.sleep(3) list_of_profiles = [] # get profiles information for elems in linkedin_urls: for elem in elems: profile = [] driver.get(elem) driver.implicitly_wait(10) time.sleep(3) person = Person(linkedin_url=str(elem), driver=driver, scrape=False) person.scrape(close_on_complete=False) #number of connection try: connections = driver.find_element_by_xpath( '//*[@id="ember56"]/div[2]/div[2]/div[1]/ul[2]/li[2]/span' ).text except: connections = [] print('\n') ## url print('url : ', person.linkedin_url) profile.append(person.linkedin_url) #name print('name : ', person.name) profile.append(person.name) ## position print('position : ', person.job_title) profile.append(person.job_title) ## location print('location : ', person.location) profile.append(person.location) ## connections print('connections : ', connections) profile.append(connections) ## experiences print('experiences : ', person.experiences) profile.append(person.experiences) ## educations print('education : ', person.educations) profile.append(person.educations) ## interests print('interests : ', person.interests) profile.append(person.interests) print('\n') write_profile_info_in_file(profile, 'profiles.csv') list_of_profiles.append(profile) person.experiences.clear() person.educations.clear() person.interests.clear() time.sleep(5) driver.quit()
from linkedin_scraper import Person, actions from selenium import webdriver driver = webdriver.Chrome('C:/chromedriver') email = "*****@*****.**" password = "******" actions.login( driver, email, password) # if email and password isnt given, it'll prompt in terminal person = Person("https://www.linkedin.com/in/rodriguescajetan", driver=driver) print(person)