Beispiel #1
0
def person_profile(url: str) -> dict:
    """Fetch a person's profile based on their LI URL."""
    driver = ChromeDriver.get()
    person = Person(url, driver)
    person.scrape(close_on_complete=False, timeout=10)
    sleep(2+5*random())
    # should be safe to go back once; needed for search and profile to work.
    driver.execute_script("window.history.go(-1)")
    return person.to_dict()
Beispiel #2
0
def get_profile_linkedin(driver, url):
    """Scrapes a person"""
    person = Person(linkedin_url=url,
                    name=None,
                    about=[],
                    experiences=[],
                    educations=[],
                    interests=[],
                    accomplishments=[],
                    company=None,
                    job_title=None,
                    driver=driver,
                    scrape=False)
    person.scrape(close_on_complete=False)
    return person
def processProfile(driver, data, index, pause_cnt):
    if pause_cnt and index % pause_cnt == 0:
        pause(driver)
    csv_output = []
    try:
        print('Processing profile ' + str(index) + ' ... ' + str(data[0]))
        profile = Person(data[0], driver=driver, scrape=False)
        profile.experiences = []  # Needed due to a bug in the library
        profile.educations = []  # Needed due to a bug in the library
        profile.scrape(close_on_complete=False)
        for ed in profile.educations:
            new_row = data[1:]  # Dump any eventual pre-existing cols
            new_row.extend([
                'education',
                ed.institution_name,
                ed.from_date[38:],  # Needed due to a bug in the library
                ed.to_date
            ])
            csv_output.append(new_row)
        for w in profile.experiences:
            new_row = data[1:]  # Dump any eventual pre-existing cols
            new_row.extend([
                'work',
                w.institution_name,
                w.position_title,
                w.description,
                w.from_date[15:],  # Needed due to a bug in the library
                w.to_date
            ])
            csv_output.append(new_row)
    except:
        e = sys.exc_info()[0]
        print('Error processing ' + str(data[0]) + ' error: ' + str(e))
        csv_output.append([data[0], 'error', e])
        pass
    return csv_output
def main():

    linkedin_urls = read_csv('only_linkedin_urls.csv')
    driver = browse('https://www.linkedin.com')
    connect(driver, '*****@*****.**', '123tototorres&')

    # for normal user behavior in the website
    driver.execute_script(
        "(function(){try{for(i in document.getElementsByTagName('a')){let el = document.getElementsByTagName('a')[i]; "
        "if(el.innerHTML.includes('Contact info')){el.click();}}}catch(e){}})()"
    )

    # Wait 5 seconds for the page to load
    time.sleep(3)
    list_of_profiles = []

    # get profiles information
    for elems in linkedin_urls:
        for elem in elems:

            profile = []
            driver.get(elem)
            driver.implicitly_wait(10)
            time.sleep(3)

            person = Person(linkedin_url=str(elem),
                            driver=driver,
                            scrape=False)

            person.scrape(close_on_complete=False)

            #number of connection
            try:
                connections = driver.find_element_by_xpath(
                    '//*[@id="ember56"]/div[2]/div[2]/div[1]/ul[2]/li[2]/span'
                ).text
            except:
                connections = []

            print('\n')
            ## url
            print('url : ', person.linkedin_url)
            profile.append(person.linkedin_url)
            #name
            print('name : ', person.name)
            profile.append(person.name)
            ## position
            print('position : ', person.job_title)
            profile.append(person.job_title)

            ## location
            print('location : ', person.location)
            profile.append(person.location)
            ## connections
            print('connections : ', connections)
            profile.append(connections)
            ## experiences
            print('experiences : ', person.experiences)
            profile.append(person.experiences)
            ## educations
            print('education : ', person.educations)
            profile.append(person.educations)
            ## interests
            print('interests : ', person.interests)
            profile.append(person.interests)
            print('\n')

            write_profile_info_in_file(profile, 'profiles.csv')
            list_of_profiles.append(profile)

            person.experiences.clear()
            person.educations.clear()
            person.interests.clear()

    time.sleep(5)
    driver.quit()
Beispiel #5
0
email.send_keys('*****@*****.**')
time.sleep(2)

password = browser.find_element_by_xpath('//*[@id="session_password-login"]')
password.send_keys('marigi@98')
time.sleep(2)

button = browser.find_element_by_xpath('//*[@id="btn-primary"]')
button.click()
time.sleep(5)

person = Person("https://www.linkedin.com/in/andre-iguodala-65b48ab5",
                driver=browser,
                scrape=False)

person.scrape(close_on_complete=False)

browser.close()

# browser.get('https://www.linkedin.com/search/results/people/?facetNetwork=%5B%22F%22%5D&origin=MEMBER_PROFILE_CANNED_SEARCH')
#
# connections = browser.find_element_by_xpath('//*[@id="ember5061"]/span[1]/span')
# time.sleep(10)
#
# print(connections)
#
# browser.close()
# person = Person('https://www.linkedin.com/in/brandeddavid/', driver=browser, scrape=True)
# time.sleep(10)
#
# print(person.scrape())
Beispiel #6
0
# We just need to get the link to the profile, which should be a href link
# in the html.
src = browser.page_source
soup = BeautifulSoup(src, 'lxml')

# Use BeautifulSoup to get the linkedin profile link (may not work)
# Need some more testing to make sure it works consistently
link = soup.find(
    'a', {
        'class': 'app-aware-link ember-view search-result__result-link'
    }).get('href')
print(link)

# Using the linkedin_scraper api to do the scraping
linkedin_person = Person(link, driver=browser, scrape=False)
linkedin_person.scrape(close_on_complete=False)
print(linkedin_person)

# testing keyword search
# keyword search works, need to convery linkedin_person
# into a string, and keyword search is case sensitive
# so must convert entire scraped profile and keywords into lower case
profile = str(linkedin_person).lower()
if ("black" in profile):
    print("keyword search works")
else:
    print("does not work")

# For a list of people use a dictionary with name + company as key and
# profile results as value
# Wrap the code in lines 54 - 74 into a loop when there are multiple results
'''
for pizza in linkedin_urls:

    person = Person(linkedin_url = pizza, driver=driver, scrape = False)

    person.scrape(close_on_complete=False)

    dict_row={'person':person}

    dict_array.append(dict_row)

    sleep(randrange(5, 10))
'''

victoria = Person(linkedin_url = "https://www.linkedin.com/in/victoriakythai/", driver=driver, scrape = False)
victoria.scrape(close_on_complete=False)

pizza1 = " ".join(str(x) for x in [victoria])

dict_row={'person':pizza1}
dict_array.append(dict_row)

driver.delete_all_cookies()
driver.get("https://www.linkedin.com")
email_button = driver.find_element_by_xpath('//input[@id="login-email"]')
email_button.send_keys(user_email)
sleep(randrange(5,9))
pass_button = driver.find_element_by_xpath('//input[@id="login-password"]')
pass_button.send_keys(password)
sleep(randrange(5,9))
submit_button = driver.find_element_by_xpath('//input[@id="login-submit"]')
        members_dict[person] = "None"
        continue
    
    # get the link
    profile_link = profile_link.get('href')

    # Need an exception block because sometimes when no search
    # results appear it was still able to get a link which gives
    # an error to the web scraper because there are no fields
    # it can extract
    try:
        # Must set the fields as empty list because otherwise each web scraped result
        # will just append to the previous results and by the end, you will just have
        # a really long and overlapping profile of everyone you have scraped
        scraping = Person(profile_link, about=[], experiences=[], educations=[], interests=[], accomplishments=[], driver=browser,scrape=False)
        scraping.scrape(close_on_complete=False)
        results = str(scraping).lower()


        # gives it some time to process
        time.sleep(1)

        # Create a list and for each keyword that is in the
        # scraped results, append it, and at the end add it
        # as the value with the name of the individual as the key
        dict_list = []
        for keyword in keywords:
            if (keyword in results):
                dict_list.append(keyword)
        members_dict[person] = dict_list
    except NoSuchElementException: