Beispiel #1
0
def person_profile(url: str) -> dict:
    """Fetch a person's profile based on their LI URL."""
    driver = ChromeDriver.get()
    person = Person(url, driver)
    person.scrape(close_on_complete=False, timeout=10)
    sleep(2+5*random())
    # should be safe to go back once; needed for search and profile to work.
    driver.execute_script("window.history.go(-1)")
    return person.to_dict()
Beispiel #2
0
def get_profile_linkedin(driver, url):
    """Scrapes a person"""
    person = Person(linkedin_url=url,
                    name=None,
                    about=[],
                    experiences=[],
                    educations=[],
                    interests=[],
                    accomplishments=[],
                    company=None,
                    job_title=None,
                    driver=driver,
                    scrape=False)
    person.scrape(close_on_complete=False)
    return person
def for_single_url(url):

    chrome_options = Options()
    chrome_options.add_argument("--headless")

    path_webdriver = "/home/ravishekhartiwari/Desktop/LINKDIN_SCRAPP/chromedriver_linux64/chromedriver"
    driver = webdriver.Chrome(executable_path=path_webdriver,
                              chrome_options=chrome_options)

    #email = ""
    #password = ""
    #actions.login(driver,email,password) # if email and password isnt given, it'll prompt in terminal
    actions.login(driver)

    person = Person(url, driver=driver)

    person_details = {
        "name": person.name,
        "experiences": person.experiences,
        "education": person.educations,
        "company": person.company,
        "job_title": person.job_title,
        "about": person.about,
        "linkdin_url": person.linkedin_url
    }

    #person.scrape(close_on_complete=True)
    #driver.close()

    return (person_details)
def get_linkedin_user_profile(url):
    browser = webdriver.Chrome(ChromeDriverManager().install())
    email = "*****@*****.**"
    password = "******"
    actions.login(
        browser, email,
        password)  # if email and password isnt given, it'll prompt in terminal
    person = Person(url, driver=browser)
    return person
    def handle(self, *args, **options):
        driver = driver_factory()

        for user in tqdm(
                User.objects.filter(profile__linkedin__isnull=False).exclude(
                    profile__linkedin__in=["", " ", "\n"])):
            user_data = {
                "name": "",
                "about": "",
                "experiences": [],
                "education": [],
                "interests": [],
            }
            try:
                person = Person(
                    user.profile.linkedin,
                    contacts=[],
                    driver=driver,
                    close_on_complete=False,
                )
                user_data["name"] = person.name
                user_data["about"] = person.about

                for experience in person.experiences:
                    user_data["experiences"].append({
                        "description":
                        experience.description,
                        "position_title":
                        experience.position_title.replace(
                            "Nome da empresa\n", ""),
                        "duration":
                        experience.duration,
                    })

                for education in person.educations:
                    user_data["educations"].append({
                        "from_date":
                        education.from_date,
                        "to_date":
                        education.to_date,
                        "degree":
                        education.degree,
                        "company":
                        education.company,
                    })

                user_data["interests"] = [
                    interest.title for interest in person.interests
                ]

                ProfilerData.objects.get_or_create(user=user,
                                                   linkedin_data=user_data)
            except Exception as e:
                pass

        driver.close()
Beispiel #6
0
def main():
    #
    # get image
    #
    actions.login(driver, email, password)  # if email and password is nt given, it'll prompt in terminal
    person = Person("https://www.linkedin.com/in/namachi", driver=driver)
    # a = Person("https://www.linkedin.com/in/sornam-arumugam-48943715", driver=driver)
    with open("output_data.csv", "w") as out_file:
        print_person(person)
        out_file.write(str(person))
def main():
    driver = webdriver.Firefox()

    email = "*****@*****.**"
    password = "******"
    actions.login(
        driver, email,
        password)  # if email and password isnt given, it'll prompt in terminal
    # person = Person("https://www.linkedin.com/in/andre-iguodala-65b48ab5", driver=driver)
    # person = Person("https://www.linkedin.com/in/ahmet-aydin-41a37a111", driver=driver, get=True,
    #                 close_on_complete=False)

    person = Person("https://www.linkedin.com/in/alican-d%C3%B6nmez-00549098/",
                    driver=driver,
                    get=True,
                    close_on_complete=False)
    print(person)
def get_information_bulk(data):

    chrome_options = Options()
    chrome_options.add_argument("--headless")

    path_webdriver = "/home/ravishekhartiwari/Desktop/LINKDIN_SCRAPP/chromedriver_linux64/chromedriver"
    driver = webdriver.Chrome(executable_path=path_webdriver,
                              chrome_options=chrome_options)

    #email = ""
    #password = ""
    #actions.login(driver,email,password) # if email and password isnt given, it'll prompt in terminal
    actions.login(driver)

    sheet_obj = data.active
    m_row = sheet_obj.max_row
    count = 0

    name = []
    experiences = []
    education = []
    company = []
    job_title = []
    about = []
    linkdin_url = []

    for i in range(1, m_row + 1):
        cell_obj = sheet_obj.cell(row=i, column=1)
        url = cell_obj.value

        person = Person((str(url)).strip(), driver=driver)

        name.append(person.name)
        experiences.append(person.experiences)
        education.append(person.educations)
        company.append(person.company)
        job_title.append(person.job_title)
        about.append(person.about)
        linkdin_url.append(person.linkedin_url)

    #driver.close()

    return (name, experiences, education, company, job_title, about,
            linkdin_url, driver)
Beispiel #9
0
 def scrape(proxy_ip,
            lista,
            username=Settings.username,
            password=Settings.password):
     first = True
     lista_result = []
     for persona in lista:
         print("Starting " + proxy_ip)
         try:
             for element in lista:
                 PROXY = str(proxy_ip)
             if Settings.Proxy_request:
                 webdriver.DesiredCapabilities.CHROME['proxy'] = {
                     "httpProxy": PROXY,
                     "ftpProxy": PROXY,
                     "sslProxy": PROXY,
                     "noProxy": None,
                     "proxyType": "MANUAL",
                     "autodetect": False
                 }
             if first:
                 first = False
                 driver = webdriver.Chrome(ChromeDriverManager().install())
                 actions.login(
                     driver, username, password
                 )  # if email and password isnt given, it'll prompt in terminal
             person = Person(persona,
                             driver=driver,
                             close_on_complete=False)
             print(person)
             lista_result.append(person)
         except Exception as e:
             print(e)
     try:
         driver.quit()
     except Exception as e:
         print(e)
     print("FINE DEL THREADS")
     return lista_result
Beispiel #10
0
def processProfile(driver, data, index, pause_cnt):
    if pause_cnt and index % pause_cnt == 0:
        pause(driver)
    csv_output = []
    try:
        print('Processing profile ' + str(index) + ' ... ' + str(data[0]))
        profile = Person(data[0], driver=driver, scrape=False)
        profile.experiences = []  # Needed due to a bug in the library
        profile.educations = []  # Needed due to a bug in the library
        profile.scrape(close_on_complete=False)
        for ed in profile.educations:
            new_row = data[1:]  # Dump any eventual pre-existing cols
            new_row.extend([
                'education',
                ed.institution_name,
                ed.from_date[38:],  # Needed due to a bug in the library
                ed.to_date
            ])
            csv_output.append(new_row)
        for w in profile.experiences:
            new_row = data[1:]  # Dump any eventual pre-existing cols
            new_row.extend([
                'work',
                w.institution_name,
                w.position_title,
                w.description,
                w.from_date[15:],  # Needed due to a bug in the library
                w.to_date
            ])
            csv_output.append(new_row)
    except:
        e = sys.exc_info()[0]
        print('Error processing ' + str(data[0]) + ' error: ' + str(e))
        csv_output.append([data[0], 'error', e])
        pass
    return csv_output
Beispiel #11
0
from linkedin_scraper import (
    actions,
    Person
)
from selenium import webdriver
from selenium.webdriver.chrome.options import Options


options = Options()
# options.binary_location = '/usr/bin/google-chrome-stable'
options.binary_location = '/usr/bin/brave-browser-stable'
driver = webdriver.Chrome(executable_path='/usr/local/bin/chromedriver',
                          options=options)

email = ""
password = ""
actions.login(driver, email, password)
person = Person(
    "https://www.linkedin.com/in/andre-iguodala-65b48ab5", driver=driver)

# person.scrape(close_on_complete=False)

print(person)
Beispiel #12
0
profileUrl = []

check = 'https://www.linkedin.com/'

with open('profiler.csv', newline='', encoding="mbcs") as f:
    reader = csv.reader(f, delimiter=';')
    for row in reader:
        res = [idx for idx in row if idx.lower().startswith(check.lower())]
        profileUrl.append(res)

conn = sqlite3.connect('profiler.db')
c = conn.cursor()

count = 1
while True:
    urls = ''.join(profileUrl[count])
    person = Person(urls, driver=driver, scrape=False)
    person.scrape(close_on_complete=False)
    count = count + 1

    name = ''.join(person.name)
    about = ' '.join(person.about)
    personUrl = ''.join(person.linkedin_url)
    personExperience = '; '.join(map(str, person.experiences))
    personSkills = '; '.join(person.skills)

    if person.job_title == "Utvecklare" or "Testare" or "Testautomatiserare" or "Software Tester" or "Automationsutvecklare" or "Automations developer":
        c.execute('INSERT INTO persons VALUES (?, ?, ?, ?, ?)',
                  (name, about, personUrl, personExperience, personSkills))
        conn.commit()
Beispiel #13
0
import os
from linkedin_scraper import Person, actions
from selenium import webdriver
from selenium.webdriver.chrome.options import Options

chrome_options = Options()
chrome_options.add_argument("--headless")
driver = webdriver.Chrome("./chromedriver", options=chrome_options)

email = os.getenv("LINKEDIN_USER")
password = os.getenv("LINKEDIN_PASSWORD")
actions.login(driver, email, password) # if email and password isnt given, it'll prompt in terminal
person = Person("https://www.linkedin.com/in/adrian0350", contacts=[], driver=driver)

print("Person: " + person.name)
print("Person contacts: ")

for contact in person.contacts:
	print("Contact: " + contact.name + " - " + contact.occupation + " -> " + contact.url)
Beispiel #14
0
import csv, os, json
import requests
from exceptions import ValueError
from time import sleep

import scrapper_lib

companyurls = ['https://www.linkedin.com/company/tata-consultancy-services']
extracted_data = []

#for url in companyurls:
#    extracted_data.append(scrapper_lib.linkedin_companies_parser(url))
#    f = open('data.json', 'w')
#    json.dump(extracted_data, f, indent=4)
#

#import urllib2 as URL
#
#urlopener= URL.build_opener()
#urlopener.addheaders = [('User-agent', 'Mozilla/5.0')]
#html= urlopener.open('https://www.linkedin.com/in/manuwhs').read()

from subprocess import call
#call(["export CHROMEDRIVER=~/chromedriver", ""])
os.system("export CHROMEDRIVER=~/chromedriver")
from selenium import webdriver
driver = webdriver.Chrome('./chromedriver')

from linkedin_scraper import Person
person = Person("https://www.linkedin.com/in/manuwhs")
from linkedin_scraper import Person

rick_fox = Person("https://www.linkedin.com/in/rifox?trk=pub-pbmap")
iggy = Person("https://www.linkedin.com/in/andre-iguodala-65b48ab5") 
    with open(outfilepath, 'a+', newline='') as csvfile:
        writer = csv.writer(csvfile,
                            delimiter=',',
                            quotechar='"',
                            quoting=csv.QUOTE_ALL)
        writer.writerow([person])


# loop through the URLs
people = []
failed_urls = []
invalid_urls = []
person = None
for url in urls:
    try:
        person = Person(url, driver=driver, close_on_complete=False)
        name = person.name

        # Picking experience
        if len(person.experiences) > 0:
            title = person.experiences[0].position_title.decode(
                'utf8'
            ) if person.experiences[0].position_title is not None else ''
            company = person.experiences[0].institution_name.decode(
                'utf8'
            ) if person.experiences[0].institution_name is not None else ''
        else:
            title = ""
            company = ""

        # Picking education
from linkedin_scraper import Person

rick_fox = Person("https://www.linkedin.com/in/rifox?trk=pub-pbmap")
iggy = Person("https://www.linkedin.com/in/andre-iguodala-65b48ab5")
Anirudra = Person("https://in.linkedin.com/in/anirudra-choudhury-109635b1")
Beispiel #18
0
# Rather than doing the scraping ourselves, let the library do the work
# We just need to get the link to the profile, which should be a href link
# in the html.
src = browser.page_source
soup = BeautifulSoup(src, 'lxml')

# Use BeautifulSoup to get the linkedin profile link (may not work)
# Need some more testing to make sure it works consistently
link = soup.find(
    'a', {
        'class': 'app-aware-link ember-view search-result__result-link'
    }).get('href')
print(link)

# Using the linkedin_scraper api to do the scraping
linkedin_person = Person(link, driver=browser, scrape=False)
linkedin_person.scrape(close_on_complete=False)
print(linkedin_person)

# testing keyword search
# keyword search works, need to convery linkedin_person
# into a string, and keyword search is case sensitive
# so must convert entire scraped profile and keywords into lower case
profile = str(linkedin_person).lower()
if ("black" in profile):
    print("keyword search works")
else:
    print("does not work")

# For a list of people use a dictionary with name + company as key and
# profile results as value
Beispiel #19
0
email = ""
password = ""
file_p = open("table.csv", "a+")
driver = webdriver.Chrome("/usr/bin/chromedriver",
                          chrome_options=chrome_options)
actions.login(
    driver, email,
    password)  # if email and password isnt given, it'll prompt in terminal

# people_jaana.txt is a text file which has the links for each profile
with open("people_jaana.txt") as fp:
    lines = fp.readlines()
    for line in lines:
        try:
            person = Person(line, driver=driver)
            file_str = person.name
            if (len(person.experiences)):
                file_str += "," + person.experiences[0].position_title.decode(
                    "utf-8") + "," + person.experiences[
                        0].institution_name.decode("utf-8")
            else:
                file_str += "," + ","
            file_str += "," + line
            print(file_str)
            file_p.write(file_str)
        except:
            print(line)
file_p.close()
driver.close()
Beispiel #20
0
 def scrap(self, url):
     self.person = Person(url, driver=self.driver)
                 "https://www.linkedin.com/in/melinda-brown/"]
'''
for pizza in linkedin_urls:

    person = Person(linkedin_url = pizza, driver=driver, scrape = False)

    person.scrape(close_on_complete=False)

    dict_row={'person':person}

    dict_array.append(dict_row)

    sleep(randrange(5, 10))
'''

victoria = Person(linkedin_url = "https://www.linkedin.com/in/victoriakythai/", driver=driver, scrape = False)
victoria.scrape(close_on_complete=False)

pizza1 = " ".join(str(x) for x in [victoria])

dict_row={'person':pizza1}
dict_array.append(dict_row)

driver.delete_all_cookies()
driver.get("https://www.linkedin.com")
email_button = driver.find_element_by_xpath('//input[@id="login-email"]')
email_button.send_keys(user_email)
sleep(randrange(5,9))
pass_button = driver.find_element_by_xpath('//input[@id="login-password"]')
pass_button.send_keys(password)
sleep(randrange(5,9))
Beispiel #22
0
        print("     Capturando coordenadas - Região: " + linha['address'] +
              " lat:" + str(lat) + " long:" + str(lng))
        sql = "update customers set address_latitude=%s,address_longitude=%s where userId=%s"
        cursor.execute(sql, (lat, lng, linha['userId']))
        conexao.commit()

print('')
print("Buscando informações de redes sociais...")
print('')

# Analisa localizacao e perfil rede social.
for linha in resultado:
    print('')
    print('     Capturando perfil: ' + linha['linkedin'])

    person = Person(linha['linkedin'], driver=driver, scrape=False)
    person.scrape(close_on_complete=False)

    experiences = str(person.experiences[0])

    inicio = experiences.find(" at ")
    token_end = experiences.find(" from ")

    companyCrawled = experiences[inicio + 4:token_end]

    r = requests.get(
        'https://maps.googleapis.com/maps/api/place/findplacefromtext/json?input='
        + companyCrawled +
        '&inputtype=textquery&fields=photos,formatted_address,name,rating,opening_hours,geometry&key='
        + GOOGLE_MAPS_KEY)
Beispiel #23
0
browser.get(url)

email = browser.find_element_by_xpath('//*[@id="session_key-login"]')
email.send_keys('*****@*****.**')
time.sleep(2)

password = browser.find_element_by_xpath('//*[@id="session_password-login"]')
password.send_keys('marigi@98')
time.sleep(2)

button = browser.find_element_by_xpath('//*[@id="btn-primary"]')
button.click()
time.sleep(5)

person = Person("https://www.linkedin.com/in/andre-iguodala-65b48ab5",
                driver=browser,
                scrape=False)

person.scrape(close_on_complete=False)

browser.close()

# browser.get('https://www.linkedin.com/search/results/people/?facetNetwork=%5B%22F%22%5D&origin=MEMBER_PROFILE_CANNED_SEARCH')
#
# connections = browser.find_element_by_xpath('//*[@id="ember5061"]/span[1]/span')
# time.sleep(10)
#
# print(connections)
#
# browser.close()
# person = Person('https://www.linkedin.com/in/brandeddavid/', driver=browser, scrape=True)
Beispiel #24
0
    
from linkedin_scraper import Person, actions
from selenium import webdriver

path="/home/ravishekhartiwari/Desktop/LINKDIN_SCRAPP/chromedriver_linux64/chromedriver"
driver = webdriver.Chrome(executable_path=path)


#email = "*****@*****.**"
#password = "******"
actions.login(driver, email, password) # if email and password isnt given, it'll prompt in terminal
person = Person()    


actions.scrape(close_on_complete=True)
    if (profile_link is None):
        members_dict[person] = "None"
        continue
    
    # get the link
    profile_link = profile_link.get('href')

    # Need an exception block because sometimes when no search
    # results appear it was still able to get a link which gives
    # an error to the web scraper because there are no fields
    # it can extract
    try:
        # Must set the fields as empty list because otherwise each web scraped result
        # will just append to the previous results and by the end, you will just have
        # a really long and overlapping profile of everyone you have scraped
        scraping = Person(profile_link, about=[], experiences=[], educations=[], interests=[], accomplishments=[], driver=browser,scrape=False)
        scraping.scrape(close_on_complete=False)
        results = str(scraping).lower()


        # gives it some time to process
        time.sleep(1)

        # Create a list and for each keyword that is in the
        # scraped results, append it, and at the end add it
        # as the value with the name of the individual as the key
        dict_list = []
        for keyword in keywords:
            if (keyword in results):
                dict_list.append(keyword)
        members_dict[person] = dict_list
Beispiel #26
0
# Finds the person through search
# Search and Lookup result documentation:
# https://github.com/rocketreach/rocketreach_python/blob/master/rocketreach/person.py
rr = rocketreach.Gateway(rocketreach.GatewayConfig('apikey'))

# # Code for Searches (does not return linkedin link)
s = rr.person.search().filter(name="")
result = s.execute()
for person in result.people:
    print(person)

# Code for lookups
f = open("info.txt","w")
result = rr.person.lookup(person_id=id)
if result.is_success:
	person = result.person
    f.write("ID: "+ person.id)
    f.write("Name: "+ person.name)
    f.write("Employer: "+ person.current_employer)
    f.write("LinkedIn: "+ person.linkedin_url)
    # linkedin_urls.append(person.linkedin_url)


# LinkedIn Scraper
driver = webdriver.Chrome()
linkedin_urls = ["https://www.linkedin.com/in/teri-williams-cohee-99811029"]
actions.login(driver, "username", "password")
linkedin_person = Person(linkedin_urls[0], driver=driver,scrape=False)
linkedin_person.scrape(close_on_complete=False)
print(linkedin_person)
def main():

    linkedin_urls = read_csv('only_linkedin_urls.csv')
    driver = browse('https://www.linkedin.com')
    connect(driver, '*****@*****.**', '123tototorres&')

    # for normal user behavior in the website
    driver.execute_script(
        "(function(){try{for(i in document.getElementsByTagName('a')){let el = document.getElementsByTagName('a')[i]; "
        "if(el.innerHTML.includes('Contact info')){el.click();}}}catch(e){}})()"
    )

    # Wait 5 seconds for the page to load
    time.sleep(3)
    list_of_profiles = []

    # get profiles information
    for elems in linkedin_urls:
        for elem in elems:

            profile = []
            driver.get(elem)
            driver.implicitly_wait(10)
            time.sleep(3)

            person = Person(linkedin_url=str(elem),
                            driver=driver,
                            scrape=False)

            person.scrape(close_on_complete=False)

            #number of connection
            try:
                connections = driver.find_element_by_xpath(
                    '//*[@id="ember56"]/div[2]/div[2]/div[1]/ul[2]/li[2]/span'
                ).text
            except:
                connections = []

            print('\n')
            ## url
            print('url : ', person.linkedin_url)
            profile.append(person.linkedin_url)
            #name
            print('name : ', person.name)
            profile.append(person.name)
            ## position
            print('position : ', person.job_title)
            profile.append(person.job_title)

            ## location
            print('location : ', person.location)
            profile.append(person.location)
            ## connections
            print('connections : ', connections)
            profile.append(connections)
            ## experiences
            print('experiences : ', person.experiences)
            profile.append(person.experiences)
            ## educations
            print('education : ', person.educations)
            profile.append(person.educations)
            ## interests
            print('interests : ', person.interests)
            profile.append(person.interests)
            print('\n')

            write_profile_info_in_file(profile, 'profiles.csv')
            list_of_profiles.append(profile)

            person.experiences.clear()
            person.educations.clear()
            person.interests.clear()

    time.sleep(5)
    driver.quit()
Beispiel #28
0
from linkedin_scraper import Person, actions
from selenium import webdriver
driver = webdriver.Chrome('C:/chromedriver')

email = "*****@*****.**"
password = "******"
actions.login(
    driver, email,
    password)  # if email and password isnt given, it'll prompt in terminal
person = Person("https://www.linkedin.com/in/rodriguescajetan", driver=driver)
print(person)