def test_profile_scraper():
    with ProfileScraper(driver_options=HEADLESS_OPTIONS) as ps:
        profile = ps.scrape(user='******')

    profile_info = profile.to_dict()

    for a in profile.attributes:
        assert profile_info[a]

    # Skills
    skills = profile_info['skills']
    for s in skills:
        assert s['name'] is not None
        assert len(s['endorsements']) > 0

    # Personal Info
    personal_info = profile_info['personal_info']
    assert personal_info['name'] == "Austin O'Boyle"
    assert len(personal_info['websites']) > 0
    non_nulls = [
        'headline', 'company', 'school', 'summary', 'location', 'followers',
        'email', 'image'
    ]
    for a in non_nulls:
        assert personal_info[a]

    # Contacts
    assert personal_info['contacts'] == '500+'

    # Accomplishments
    accomplishments = profile_info['accomplishments']
    non_nulls = [
        'certifications', 'courses', 'honors', 'projects', 'languages'
    ]
    for a in non_nulls:
        assert accomplishments[a]

    # Interests
    assert profile_info['interests']

    # Experiences
    experiences = profile_info['experiences']
    jobs = experiences['jobs']
    assert jobs
    for job in jobs:
        assert job['date_range']
        assert job['company']
        assert job['title']

    education = experiences['education']
    assert education
    for school in education:
        assert school['name']
        assert school['date_range']

    volunteering = experiences['volunteering']
    assert volunteering
    for v in volunteering:
        assert v['title']
        assert v['date_range']
Example #2
0
def test_profile_scraper():
    with ProfileScraper(driver_options=HEADLESS_OPTIONS) as ps:
        profile = ps.scrape(user="******")

    profile_info = profile.to_dict()

    for a in profile.attributes:
        assert profile_info[a]

    # Skills
    skills = profile_info["skills"]
    for s in skills:
        assert s["name"] is not None
        assert len(s["endorsements"]) > 0

    # Personal Info
    personal_info = profile_info["personal_info"]
    assert personal_info["name"] == "Austin O'Boyle"
    assert len(personal_info["websites"]) > 0
    non_nulls = [
        "headline",
        "company",
        "school",
        "summary",
        "location",
        "image",
    ]
    for a in non_nulls:
        assert personal_info[a]

    # Accomplishments
    accomplishments = profile_info["accomplishments"]
    non_nulls = ["courses", "honors", "projects", "languages"]
    for a in non_nulls:
        assert accomplishments[a]

    # Interests
    assert profile_info["interests"]

    # Experiences
    experiences = profile_info["experiences"]
    jobs = experiences["jobs"]
    assert jobs
    for job in jobs:
        assert job["date_range"]
        assert job["company"]
        assert job["title"]

    education = experiences["education"]
    assert education
    for school in education:
        assert school["name"]
        assert school["date_range"]

    volunteering = experiences["volunteering"]
    assert volunteering
    for v in volunteering:
        assert v["title"]
        assert v["date_range"]
Example #3
0
def scrape_user(user_id):
    """
    Scrape a user
    """
    with ProfileScraper() as scraper:
        profile = scraper.scrape(user=user_id)

    return profile.to_dict()
def scrape_person(url):
    user_name = url.split('in/')[1]
    user_name = user_name.split('/')[0]
    print(user_name)
    # scrape a person
    with ProfileScraper(driver_options=HEADLESS_OPTIONS,cookie='AQEDATCqAAsEnwLsAAABceUL55UAAAFyCRhrlVYAHF3D2I07SBdYzkXulfZyZSL6M5Y_Ap17KE5qIXPGP5MiebSzuJFFIiQNI6Gj3LREGMgwtZdTtQk09LHenXAOIC9zEkedjhbHxoZDGC2ejC0MfNwS') as scraper:
        profile = scraper.scrape(user=user_name)
    print(profile.to_dict())
Example #5
0
def job(connections, output_file=None):
    assert output_file is not None
    with ProfileScraper() as ps:
        for connection in connections:
            connection['profile'] = ps.scrape(user=connection['id']).to_dict()
            connection['mutual_connections'] = list(
                map(lambda x: x['id'], ps.get_mutual_connections()))
    with open(output_file, 'w') as f:
        json.dump(connections, f)
def test(title, loc, li_at):
    try:
        with ProfileIdsGoogle(cookie=li_at) as idsScrap:
            idsScrap.getIds(jobTitle=title, location=loc)
        users = []
        data = []
        with open('result.csv', 'r') as csvFile:
            reader = csv.reader(csvFile, delimiter=',')
            for row in reader:
                for index in row:
                    users.append(index)
        csvFile.close()
        if users:
            with ProfileScraper(cookie=li_at) as scraper:
                for i in users:
                    profile = scraper.scrape(user=i)
                    if profile is not None:
                        data.append(profile.to_dict())
            CreateDoc().create(data=data)
            return data

    except Exception as e:
        print(e)
        return e
from scrape_linkedin import ProfileScraper

import pandas as pd
import json

outDir = "/Users/ruiliu/Desktop/"
# Connect connections

with ProfileScraper(
        cookie=
        'AQEDASgc_sYBUdX-AAABb2foSO0AAAFwfNSGnE4AVgMRkHBIlGE--Dsqwz4X-V4q_8YRMYCzXpOdVQqu6_LDEtnjvIuPauzu7JZ81QiPa5r2nCEWimHSJzLqaxt3uYBAuCQG6c2nNus2Lnt0GSsdh2iN'
) as temp:
    conn = temp.get_mutual_connections(user='******')
    connections_1 = pd.DataFrame.from_dict(conn)
    connections = connections_1

connections.to_csv(outDir + "ruiliu.csv", index=False)

# Get the connections of your connection
# for ID in connections['id']:
#         with ProfileScraper(
#                 cookie='AQEDAQCkIfkBtuB8AAABae9XtXEAAAFrTE831lYAs8NLYorXlE-H0kDIW2UHDUlg-Slp0T7k-wzwLO1dvsZUl9d9kKS3lxjQFbnV0UBgIioEulyIL6Mu_fdzJGdkFYyjjjPvB23rNUlu7IcjDy3h-JOR') as temp:
#                 conn = temp.get_mutual_connections(user=ID)
#                 connections_3 = pd.DataFrame.from_dict(conn)

# Collect profile

#
#
# ids = open("/Users/ruiliu/Desktop/resume_connection/xuyang.txt", "r").readlines()
#
Example #8
0
mydb = mysql.connector.connect(host="localhost",
                               user="******",
                               password="",
                               database="profiles")
# pro = ["andrew-fintel-599b7230/"]
with open('data.csv', 'r') as file:
    reader = csv.reader(file)
    for row in reader:
        print(row[2])
        p = row[2].replace('https://www.linkedin.com/in/', '')
        if (p != ''):
            pro.append(p)
res = {}

with ProfileScraper(
        cookie=
        'AQEDATGl_YYFYSDUAAABc8MO0OUAAAFz5xtU5VYAGfdDM0R7QwECX1qXZ8vx03Qf6ptXeSlkoN-8gF5xkdNXRHQO2J8B9y6prcEP6PG70pfgSmWEaUgPUbVZz_BgyTU2FNioIKAjnJ7ZCPWqXSAdZIeh'
) as scraper:
    for a in pro:
        print(a)
        try:
            profile = scraper.scrape(user=a)
            res[a] = profile.to_dict()
            mycursor = mydb.cursor()
            # sql = "INSERT INTO linkedin (name, json) VALUES ("+res[a]["personal_info"]["name"]+","+str(json.dumps(res[a]))+")";
            sql = "INSERT INTO linkedin (name,profile_name ,json) VALUES (%s,%s,%s)"
            val = (str(res[a]["personal_info"]["name"]), a,
                   str(json.dumps(res[a])))
            print(sql)
            mycursor.execute(sql, val)
            mydb.commit()
            print(mycursor.rowcount, "record inserted.")
def test_profile_scraper_connections():
    with ProfileScraper(driver_options=HEADLESS_OPTIONS) as ps:
        profile = ps.scrape(user='******')
    profile_info = profile.to_dict()
    assert profile_info['personal_info']['contacts'] == '500+'
import json
from scrape_linkedin import ProfileScraper
import pandas as pd

names = []
linkedin_urls = []

with open('ProfileLinks/DataScienceTresNoName.csv') as csvDataFile:
    csvReader = csv.reader(csvDataFile)
    next(csvReader, None)
    for row in csvReader:
        #names.append(row[1])
        linkedin_urls.append(row[0])

for linkedin_url in linkedin_urls:
    with ProfileScraper() as scraper:
        profile = scraper.scrape(user=linkedin_url)

        # for personal info
        personalinfo = pd.DataFrame.from_dict(profile.personal_info,
                                              orient='index')
        personalinfo = personalinfo.transpose()

        # for skills
        skills = pd.DataFrame.from_dict(profile.skills)

        # for experiences
        experiences = pd.DataFrame.from_dict(profile.experiences,
                                             orient='index')
        experiences = experiences.transpose()