예제 #1
0
def for_single_url(url):

    chrome_options = Options()
    chrome_options.add_argument("--headless")

    path_webdriver = "/home/ravishekhartiwari/Desktop/LINKDIN_SCRAPP/chromedriver_linux64/chromedriver"
    driver = webdriver.Chrome(executable_path=path_webdriver,
                              chrome_options=chrome_options)

    #email = ""
    #password = ""
    #actions.login(driver,email,password) # if email and password isnt given, it'll prompt in terminal
    actions.login(driver)

    person = Person(url, driver=driver)

    person_details = {
        "name": person.name,
        "experiences": person.experiences,
        "education": person.educations,
        "company": person.company,
        "job_title": person.job_title,
        "about": person.about,
        "linkdin_url": person.linkedin_url
    }

    #person.scrape(close_on_complete=True)
    #driver.close()

    return (person_details)
def get_linkedin_user_profile(url):
    browser = webdriver.Chrome(ChromeDriverManager().install())
    email = "*****@*****.**"
    password = "******"
    actions.login(
        browser, email,
        password)  # if email and password isnt given, it'll prompt in terminal
    person = Person(url, driver=browser)
    return person
예제 #3
0
def main():
    #
    # get image
    #
    actions.login(driver, email, password)  # if email and password is nt given, it'll prompt in terminal
    person = Person("https://www.linkedin.com/in/namachi", driver=driver)
    # a = Person("https://www.linkedin.com/in/sornam-arumugam-48943715", driver=driver)
    with open("output_data.csv", "w") as out_file:
        print_person(person)
        out_file.write(str(person))
예제 #4
0
 def li_login_scrape(self):
     driver = webdriver.Chrome('/path/to/chromedriver.exe')
     email = '*****@*****.**'
     password = '******'
     actions.login(driver, email, password)
     driver.get(self.url)
     experience = driver.find_elements_by_css_selector(
         '#experience-section .pv-profile-section')
     for item in experience:
         print(item.text)
     return experience
예제 #5
0
 def __init__(self):
     options = webdriver.ChromeOptions()
     options.add_argument("--headless")
     options.add_argument("--start-maximized")
     self.driver = webdriver.Chrome("./chromedriver",
                                    chrome_options=options)
     actions.login(self.driver, email, password)
     self.driver.get(
         "https://www.linkedin.com/in/mahesh-k-software-engineer/")
     self.driver.execute_script(
         "window.scrollTo(0, document.body.scrollHeight);")
     print("Scrolled to Bottom")
예제 #6
0
def main():
    driver = webdriver.Firefox()

    email = "*****@*****.**"
    password = "******"
    actions.login(
        driver, email,
        password)  # if email and password isnt given, it'll prompt in terminal
    # person = Person("https://www.linkedin.com/in/andre-iguodala-65b48ab5", driver=driver)
    # person = Person("https://www.linkedin.com/in/ahmet-aydin-41a37a111", driver=driver, get=True,
    #                 close_on_complete=False)

    person = Person("https://www.linkedin.com/in/alican-d%C3%B6nmez-00549098/",
                    driver=driver,
                    get=True,
                    close_on_complete=False)
    print(person)
예제 #7
0
def get_information_bulk(data):

    chrome_options = Options()
    chrome_options.add_argument("--headless")

    path_webdriver = "/home/ravishekhartiwari/Desktop/LINKDIN_SCRAPP/chromedriver_linux64/chromedriver"
    driver = webdriver.Chrome(executable_path=path_webdriver,
                              chrome_options=chrome_options)

    #email = ""
    #password = ""
    #actions.login(driver,email,password) # if email and password isnt given, it'll prompt in terminal
    actions.login(driver)

    sheet_obj = data.active
    m_row = sheet_obj.max_row
    count = 0

    name = []
    experiences = []
    education = []
    company = []
    job_title = []
    about = []
    linkdin_url = []

    for i in range(1, m_row + 1):
        cell_obj = sheet_obj.cell(row=i, column=1)
        url = cell_obj.value

        person = Person((str(url)).strip(), driver=driver)

        name.append(person.name)
        experiences.append(person.experiences)
        education.append(person.educations)
        company.append(person.company)
        job_title.append(person.job_title)
        about.append(person.about)
        linkdin_url.append(person.linkedin_url)

    #driver.close()

    return (name, experiences, education, company, job_title, about,
            linkdin_url, driver)
예제 #8
0
 def scrape(proxy_ip,
            lista,
            username=Settings.username,
            password=Settings.password):
     first = True
     lista_result = []
     for persona in lista:
         print("Starting " + proxy_ip)
         try:
             for element in lista:
                 PROXY = str(proxy_ip)
             if Settings.Proxy_request:
                 webdriver.DesiredCapabilities.CHROME['proxy'] = {
                     "httpProxy": PROXY,
                     "ftpProxy": PROXY,
                     "sslProxy": PROXY,
                     "noProxy": None,
                     "proxyType": "MANUAL",
                     "autodetect": False
                 }
             if first:
                 first = False
                 driver = webdriver.Chrome(ChromeDriverManager().install())
                 actions.login(
                     driver, username, password
                 )  # if email and password isnt given, it'll prompt in terminal
             person = Person(persona,
                             driver=driver,
                             close_on_complete=False)
             print(person)
             lista_result.append(person)
         except Exception as e:
             print(e)
     try:
         driver.quit()
     except Exception as e:
         print(e)
     print("FINE DEL THREADS")
     return lista_result
예제 #9
0
def crawler(request, query):
    print(
        "*************************************************************************************************************************************************************"
    )
    print(query)
    email = "*****@*****.**"
    password = "******"
    actions.login(driver, email, password)

    name_list = []

    print(request.GET)

    # proxy_list = get_proxies()

    # print(proxy_list)

    # making a request to a google custom search engine
    custom_search_engine_url = "https://www.googleapis.com/customsearch/v1"

    linkedin_url_list = set()

    for i in range(0, 1):
        print(str(i) + "loop info")
        PARAMS = {
            'key': 'AIzaSyByUxDR0YO701YOETlSJZn6bfFNWIjtQBM',
            'cx': '009462381166450434430:ecyvn9zudgu',
            'q': query,
            'start': i * 10
        }

        # sending get request and saving the response as response object
        r = requests.get(url=custom_search_engine_url, params=PARAMS)

        # extracting data in json format
        custom_search_engine_data = r.json()

        for j in range(len(custom_search_engine_data['items'])):
            linkedin_url_list.add(
                custom_search_engine_data['items'][j]['link'])

            # linkedin_url_data = custom_search_engine_data['items']
            #
            # print(custom_search_engine_data['items'][0]['link'])

    print(linkedin_url_list)

    dicty = {}

    for linkedin in linkedin_url_list:
        temp = get_individual_result_from_linkedin(linkedin, driver)
        dicty[linkedin] = get_individual_result_from_linkedin(linkedin, driver)
        print(temp)

        profile_model = Profile()

        URL = list(temp.keys())[0]
        profile_model.name = URL.split("/")[-1]
        profile_model.linkedin_url = URL
        profile_model.companies = json.dumps(temp[URL]['companies'])
        profile_model.current_job = temp[URL]['current_employer']
        profile_model.certifications = json.dumps(temp[URL]['certifications'])
        profile_model.is_updated = False
        profile_model.save()
    p = Profile()
    profile_all = p.objects.all()

    return 1

    # return HttpResponse(
    #     json.dumps(
    #         {
    #             'linkedin_url-list': str(linkedin_url_list),
    #             'post_name': str(name_list),
    #             'post_content': 1,
    #             'dict': dicty
    O
예제 #10
0
from linkedin_scraper import (
    actions,
    Person
)
from selenium import webdriver
from selenium.webdriver.chrome.options import Options


options = Options()
# options.binary_location = '/usr/bin/google-chrome-stable'
options.binary_location = '/usr/bin/brave-browser-stable'
driver = webdriver.Chrome(executable_path='/usr/local/bin/chromedriver',
                          options=options)

email = ""
password = ""
actions.login(driver, email, password)
person = Person(
    "https://www.linkedin.com/in/andre-iguodala-65b48ab5", driver=driver)

# person.scrape(close_on_complete=False)

print(person)
import os
from linkedin_scraper import person, actions
from selenium import webdriver
from samples import score_calculator
driver = webdriver.Chrome(
    r"C:\Users\Admin\PycharmProjects\LinkedInScrapingWithQualification\Windows\chromedriver.exe"
)

email = "*****@*****.**"
password = ""  #enter password
f = open("LinkedIn_URL.txt", "r")
text = f.read()
#print(text)
actions.login(
    driver, email,
    password)  # if email and password isnt given, it'll prompt in terminal
data = person.Person(text, driver=driver)
print("")
score_calculator.calc_score()
예제 #12
0
 def __init__(self, email, password):
     self.driver = webdriver.Chrome(ChromeDriverManager().install())
     #self.driver = webdriver.Chrome('C:/Users/Frienddo/Desktop/htn2020/chromedriver.exe')
     actions.login(self.driver, email, password) # if email and password isnt given, it'll prompt in terminal
     self.person = None
예제 #13
0
def driver_factory():
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    driver = webdriver.Chrome(options=chrome_options)
    actions.login(driver, settings.LINKEDIN_EMAIL, settings.LINKEDIN_PASSWORD)
    return driver
예제 #14
0
# Finds the person through search
# Search and Lookup result documentation:
# https://github.com/rocketreach/rocketreach_python/blob/master/rocketreach/person.py
rr = rocketreach.Gateway(rocketreach.GatewayConfig('apikey'))

# # Code for Searches (does not return linkedin link)
s = rr.person.search().filter(name="")
result = s.execute()
for person in result.people:
    print(person)

# Code for lookups
f = open("info.txt","w")
result = rr.person.lookup(person_id=id)
if result.is_success:
	person = result.person
    f.write("ID: "+ person.id)
    f.write("Name: "+ person.name)
    f.write("Employer: "+ person.current_employer)
    f.write("LinkedIn: "+ person.linkedin_url)
    # linkedin_urls.append(person.linkedin_url)


# LinkedIn Scraper
driver = webdriver.Chrome()
linkedin_urls = ["https://www.linkedin.com/in/teri-williams-cohee-99811029"]
actions.login(driver, "username", "password")
linkedin_person = Person(linkedin_urls[0], driver=driver,scrape=False)
linkedin_person.scrape(close_on_complete=False)
print(linkedin_person)
예제 #15
0
 def authenticate(self, login, password):
     print('Logging in')
     actions.login(self.driver, login, password)
     print('Crawler has logged in')