def for_single_url(url): chrome_options = Options() chrome_options.add_argument("--headless") path_webdriver = "/home/ravishekhartiwari/Desktop/LINKDIN_SCRAPP/chromedriver_linux64/chromedriver" driver = webdriver.Chrome(executable_path=path_webdriver, chrome_options=chrome_options) #email = "" #password = "" #actions.login(driver,email,password) # if email and password isnt given, it'll prompt in terminal actions.login(driver) person = Person(url, driver=driver) person_details = { "name": person.name, "experiences": person.experiences, "education": person.educations, "company": person.company, "job_title": person.job_title, "about": person.about, "linkdin_url": person.linkedin_url } #person.scrape(close_on_complete=True) #driver.close() return (person_details)
def get_linkedin_user_profile(url): browser = webdriver.Chrome(ChromeDriverManager().install()) email = "*****@*****.**" password = "******" actions.login( browser, email, password) # if email and password isnt given, it'll prompt in terminal person = Person(url, driver=browser) return person
def main(): # # get image # actions.login(driver, email, password) # if email and password is nt given, it'll prompt in terminal person = Person("https://www.linkedin.com/in/namachi", driver=driver) # a = Person("https://www.linkedin.com/in/sornam-arumugam-48943715", driver=driver) with open("output_data.csv", "w") as out_file: print_person(person) out_file.write(str(person))
def li_login_scrape(self): driver = webdriver.Chrome('/path/to/chromedriver.exe') email = '*****@*****.**' password = '******' actions.login(driver, email, password) driver.get(self.url) experience = driver.find_elements_by_css_selector( '#experience-section .pv-profile-section') for item in experience: print(item.text) return experience
def __init__(self): options = webdriver.ChromeOptions() options.add_argument("--headless") options.add_argument("--start-maximized") self.driver = webdriver.Chrome("./chromedriver", chrome_options=options) actions.login(self.driver, email, password) self.driver.get( "https://www.linkedin.com/in/mahesh-k-software-engineer/") self.driver.execute_script( "window.scrollTo(0, document.body.scrollHeight);") print("Scrolled to Bottom")
def main(): driver = webdriver.Firefox() email = "*****@*****.**" password = "******" actions.login( driver, email, password) # if email and password isnt given, it'll prompt in terminal # person = Person("https://www.linkedin.com/in/andre-iguodala-65b48ab5", driver=driver) # person = Person("https://www.linkedin.com/in/ahmet-aydin-41a37a111", driver=driver, get=True, # close_on_complete=False) person = Person("https://www.linkedin.com/in/alican-d%C3%B6nmez-00549098/", driver=driver, get=True, close_on_complete=False) print(person)
def get_information_bulk(data): chrome_options = Options() chrome_options.add_argument("--headless") path_webdriver = "/home/ravishekhartiwari/Desktop/LINKDIN_SCRAPP/chromedriver_linux64/chromedriver" driver = webdriver.Chrome(executable_path=path_webdriver, chrome_options=chrome_options) #email = "" #password = "" #actions.login(driver,email,password) # if email and password isnt given, it'll prompt in terminal actions.login(driver) sheet_obj = data.active m_row = sheet_obj.max_row count = 0 name = [] experiences = [] education = [] company = [] job_title = [] about = [] linkdin_url = [] for i in range(1, m_row + 1): cell_obj = sheet_obj.cell(row=i, column=1) url = cell_obj.value person = Person((str(url)).strip(), driver=driver) name.append(person.name) experiences.append(person.experiences) education.append(person.educations) company.append(person.company) job_title.append(person.job_title) about.append(person.about) linkdin_url.append(person.linkedin_url) #driver.close() return (name, experiences, education, company, job_title, about, linkdin_url, driver)
def scrape(proxy_ip, lista, username=Settings.username, password=Settings.password): first = True lista_result = [] for persona in lista: print("Starting " + proxy_ip) try: for element in lista: PROXY = str(proxy_ip) if Settings.Proxy_request: webdriver.DesiredCapabilities.CHROME['proxy'] = { "httpProxy": PROXY, "ftpProxy": PROXY, "sslProxy": PROXY, "noProxy": None, "proxyType": "MANUAL", "autodetect": False } if first: first = False driver = webdriver.Chrome(ChromeDriverManager().install()) actions.login( driver, username, password ) # if email and password isnt given, it'll prompt in terminal person = Person(persona, driver=driver, close_on_complete=False) print(person) lista_result.append(person) except Exception as e: print(e) try: driver.quit() except Exception as e: print(e) print("FINE DEL THREADS") return lista_result
def crawler(request, query): print( "*************************************************************************************************************************************************************" ) print(query) email = "*****@*****.**" password = "******" actions.login(driver, email, password) name_list = [] print(request.GET) # proxy_list = get_proxies() # print(proxy_list) # making a request to a google custom search engine custom_search_engine_url = "https://www.googleapis.com/customsearch/v1" linkedin_url_list = set() for i in range(0, 1): print(str(i) + "loop info") PARAMS = { 'key': 'AIzaSyByUxDR0YO701YOETlSJZn6bfFNWIjtQBM', 'cx': '009462381166450434430:ecyvn9zudgu', 'q': query, 'start': i * 10 } # sending get request and saving the response as response object r = requests.get(url=custom_search_engine_url, params=PARAMS) # extracting data in json format custom_search_engine_data = r.json() for j in range(len(custom_search_engine_data['items'])): linkedin_url_list.add( custom_search_engine_data['items'][j]['link']) # linkedin_url_data = custom_search_engine_data['items'] # # print(custom_search_engine_data['items'][0]['link']) print(linkedin_url_list) dicty = {} for linkedin in linkedin_url_list: temp = get_individual_result_from_linkedin(linkedin, driver) dicty[linkedin] = get_individual_result_from_linkedin(linkedin, driver) print(temp) profile_model = Profile() URL = list(temp.keys())[0] profile_model.name = URL.split("/")[-1] profile_model.linkedin_url = URL profile_model.companies = json.dumps(temp[URL]['companies']) profile_model.current_job = temp[URL]['current_employer'] profile_model.certifications = json.dumps(temp[URL]['certifications']) profile_model.is_updated = False profile_model.save() p = Profile() profile_all = p.objects.all() return 1 # return HttpResponse( # json.dumps( # { # 'linkedin_url-list': str(linkedin_url_list), # 'post_name': str(name_list), # 'post_content': 1, # 'dict': dicty O
from linkedin_scraper import ( actions, Person ) from selenium import webdriver from selenium.webdriver.chrome.options import Options options = Options() # options.binary_location = '/usr/bin/google-chrome-stable' options.binary_location = '/usr/bin/brave-browser-stable' driver = webdriver.Chrome(executable_path='/usr/local/bin/chromedriver', options=options) email = "" password = "" actions.login(driver, email, password) person = Person( "https://www.linkedin.com/in/andre-iguodala-65b48ab5", driver=driver) # person.scrape(close_on_complete=False) print(person)
import os from linkedin_scraper import person, actions from selenium import webdriver from samples import score_calculator driver = webdriver.Chrome( r"C:\Users\Admin\PycharmProjects\LinkedInScrapingWithQualification\Windows\chromedriver.exe" ) email = "*****@*****.**" password = "" #enter password f = open("LinkedIn_URL.txt", "r") text = f.read() #print(text) actions.login( driver, email, password) # if email and password isnt given, it'll prompt in terminal data = person.Person(text, driver=driver) print("") score_calculator.calc_score()
def __init__(self, email, password): self.driver = webdriver.Chrome(ChromeDriverManager().install()) #self.driver = webdriver.Chrome('C:/Users/Frienddo/Desktop/htn2020/chromedriver.exe') actions.login(self.driver, email, password) # if email and password isnt given, it'll prompt in terminal self.person = None
def driver_factory(): chrome_options = Options() chrome_options.add_argument("--headless") driver = webdriver.Chrome(options=chrome_options) actions.login(driver, settings.LINKEDIN_EMAIL, settings.LINKEDIN_PASSWORD) return driver
# Finds the person through search # Search and Lookup result documentation: # https://github.com/rocketreach/rocketreach_python/blob/master/rocketreach/person.py rr = rocketreach.Gateway(rocketreach.GatewayConfig('apikey')) # # Code for Searches (does not return linkedin link) s = rr.person.search().filter(name="") result = s.execute() for person in result.people: print(person) # Code for lookups f = open("info.txt","w") result = rr.person.lookup(person_id=id) if result.is_success: person = result.person f.write("ID: "+ person.id) f.write("Name: "+ person.name) f.write("Employer: "+ person.current_employer) f.write("LinkedIn: "+ person.linkedin_url) # linkedin_urls.append(person.linkedin_url) # LinkedIn Scraper driver = webdriver.Chrome() linkedin_urls = ["https://www.linkedin.com/in/teri-williams-cohee-99811029"] actions.login(driver, "username", "password") linkedin_person = Person(linkedin_urls[0], driver=driver,scrape=False) linkedin_person.scrape(close_on_complete=False) print(linkedin_person)
def authenticate(self, login, password): print('Logging in') actions.login(self.driver, login, password) print('Crawler has logged in')