Python Spiderの例

プログラミング言語: Python

名前空間/パッケージ名: juniorguru.scrapers.spiders.linkedin

メソッド/関数: Spider

hotexamples.comのコード掲載数: 11

Python Spider - 11件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのjuniorguru.scrapers.spiders.linkedin.Spiderの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

def test_spider_parse_job():
    response = HtmlResponse('https://example.com/example/',
                            body=Path(FIXTURES_DIR / 'job.html').read_bytes())
    jobs = list(linkedin.Spider().parse_job(response))

    assert len(jobs) == 1

    job = jobs[0]

    assert sorted(job.keys()) == sorted([
        'title',
        'link',
        'company_name',
        'company_link',
        'location',
        'employment_types',
        'posted_at',
        'description_html',
        'experience_levels',
    ])
    assert job['title'] == 'Start kariéry jako Junior C++ Programátor/ka'
    assert job['link'] == 'https://example.com/example/'
    assert job['company_name'] == 'Experis Czech Republic'
    assert job[
        'company_link'] == 'https://cz.linkedin.com/company/experis-czech-republic?trk=public_jobs_topcard_org_name'
    assert job['location'] == 'Prague, Czech Republic'
    assert job['employment_types'] == ['full-time']
    assert job['experience_levels'] == ['entry level']
    assert job['posted_at'].date() == date.today() - timedelta(weeks=3)
    assert '<li>3 Sick days ročně' in job['description_html']

コード例 #2

ファイルを表示

def test_spider_parse_job_applicants():
    response = HtmlResponse('https://example.com/example/',
                            body=Path(FIXTURES_DIR /
                                      'job_applicants.html').read_bytes())
    job = next(linkedin.Spider().parse_job(response))

    assert job['posted_at'].date() == date.today() - timedelta(weeks=2)

コード例 #3

ファイルを表示

def test_spider_parse_job_description_doesnt_include_criteria_list():
    response = HtmlResponse('https://example.com/example/',
                            body=Path(FIXTURES_DIR / 'job.html').read_bytes())
    job = next(linkedin.Spider().parse_job(response))

    assert 'Employment type' not in job['description_html']
    assert 'Information Technology and Services' not in job['description_html']

コード例 #4

ファイルを表示

ファイル: test_linkedin.py プロジェクト: miiila/junior.guru

def test_spider_parse_job_applicants():
    response = HtmlResponse('https://example.com/example/',
                            body=Path(FIXTURES_DIR /
                                      'job_applicants.html').read_bytes())
    job = next(linkedin.Spider().parse_job(response)).cb_kwargs['item']

    assert job['posted_at'] == date.today()

コード例 #5

ファイルを表示

ファイル: test_linkedin.py プロジェクト: benabraham/junior.guru

def test_spider_parse_job():
    response = HtmlResponse('https://example.com/example/',
                            body=Path(FIXTURES_DIR / 'job.html').read_bytes())
    jobs = list(linkedin.Spider().parse_job(response))

    assert len(jobs) == 1

    job = jobs[0]

    assert sorted(job.keys()) == sorted([
        'title', 'link', 'company_name', 'company_link', 'locations_raw',
        'employment_types', 'posted_at', 'description_html',
        'experience_levels', 'company_logo_urls', 'remote',
    ])
    assert job['title'] == 'Software Engineer'
    assert job['link'] == 'https://ca.linkedin.com/jobs/view/software-engineer-at-adaptavist-2230926500'
    assert job['company_name'] == 'Adaptavist'
    assert job['company_link'] == 'https://uk.linkedin.com/company/adaptavist'
    assert job['locations_raw'] == ['Toronto, Ontario, Canada']
    assert job['remote'] is False
    assert job['employment_types'] == ['full-time']
    assert job['experience_levels'] == ['entry level']
    assert job['posted_at'] == date.today() - timedelta(weeks=3)
    assert job['company_logo_urls'] == ['https://media-exp1.licdn.com/dms/image/C4D0BAQHhfg0SSuymNA/company-logo_100_100/0?e=1612396800&v=beta&t=GoeZ9Wui3hJSaLrewZdVNpWFm3YCMOSsmte2maE7S3o']
    assert '<li>ReactJS, Webpack</li>' in job['description_html']

コード例 #6

ファイルを表示

ファイル: test_linkedin.py プロジェクト: benabraham/junior.guru

def test_spider_parse_job_no_company_link():
    response = HtmlResponse('https://example.com/example/',
                            body=Path(FIXTURES_DIR / 'job_no_company_link.html').read_bytes())
    job = next(linkedin.Spider().parse_job(response)).cb_kwargs['item']

    assert job['company_name'] == 'Grafton Temporary Staffing'
    assert 'company_link' not in job
    assert job['locations_raw'] == ['Praha 4']

コード例 #7

ファイルを表示

def test_spider_parse_job_no_company_link():
    response = HtmlResponse('https://example.com/example/',
                            body=Path(FIXTURES_DIR /
                                      'job_no_company_link.html').read_bytes())
    job = next(linkedin.Spider().parse_job(response))

    assert job['company_name'] == 'Ubiquiti'
    assert 'company_link' not in job
    assert job['location'] == 'Pilsen, Plzeň, Czech Republic'

コード例 #8

ファイルを表示

ファイル: test_linkedin.py プロジェクト: miiila/junior.guru

def test_spider_parse_job_apply_on_company_website():
    response = HtmlResponse(
        'https://example.com/example/',
        body=Path(FIXTURES_DIR /
                  'job_apply_on_company_website.html').read_bytes())
    job = next(linkedin.Spider().parse_job(response)).cb_kwargs['item']

    assert job[
        'link'] == 'https://jobs.cisco.com/jobs/ProjectDetail/Software-Engineer/1304909?source=juniorguru'

コード例 #9

ファイルを表示

ファイル: test_linkedin.py プロジェクト: benabraham/junior.guru

def test_spider_parse_end():
    response = HtmlResponse('https://example.com/seeMoreJobPostings/',
                            body=Path(FIXTURES_DIR / 'more_end.html').read_bytes())
    requests = list(linkedin.Spider().parse(response))
    job_requests = list(filter(lambda r: '/jobPosting/' in r.url, requests))
    more_requests = list(filter(lambda r: '/seeMoreJobPostings/' in r.url, requests))

    assert len(job_requests) == 21
    assert len(more_requests) == 0

コード例 #10

ファイルを表示

ファイル: test_linkedin.py プロジェクト: benabraham/junior.guru

def test_spider_parse():
    response = HtmlResponse('https://example.com/seeMoreJobPostings/',
                            body=Path(FIXTURES_DIR / 'more.html').read_bytes())
    requests = list(linkedin.Spider().parse(response))
    job_requests = list(filter(lambda r: '/jobPosting/' in r.url, requests))
    more_requests = list(filter(lambda r: '/seeMoreJobPostings/' in r.url, requests))

    assert len(job_requests) == 25
    assert job_requests[0].url == 'https://cz.linkedin.com/jobs-guest/jobs/api/jobPosting/1846698040'

    assert len(more_requests) == 1
    assert 'start=25' in more_requests[0].url

コード例 #11

ファイルを表示

def test_spider_parse():
    response = HtmlResponse('https://example.com/seeMoreJobPostings/',
                            body=Path(FIXTURES_DIR / 'more.html').read_bytes())
    requests = list(linkedin.Spider().parse(response))
    job_requests = list(filter(lambda r: '/jobs/view/' in r.url, requests))
    more_requests = list(
        filter(lambda r: '/seeMoreJobPostings/' in r.url, requests))

    assert len(job_requests) == 25
    assert 'position' not in job_requests[0].url
    assert 'pageNum' not in job_requests[0].url

    assert len(more_requests) == 1
    assert 'start=25' in more_requests[0].url