Python Spider 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: juniorguru.scrapers.spiders.stackoverflow

메소드/함수: Spider

hotexamples.com에서의 예제들: 5

Python Spider - 5개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 juniorguru.scrapers.spiders.stackoverflow.Spider에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

def test_spider_parse_job_posted_at_as_li():
    response = HtmlResponse('https://example.com/example/',
                            body=Path(FIXTURES_DIR /
                                      'job_posted_at_as_li.html').read_bytes())
    job = next(stackoverflow.Spider().parse_job(response))

    assert job['posted_at'] == date.today() - timedelta(days=29)

예제 #2

파일 보기

def test_spider_parse_job():
    response = HtmlResponse('https://example.com/example/',
                            body=Path(FIXTURES_DIR / 'job.html').read_bytes())
    jobs = list(stackoverflow.Spider().parse_job(response))

    assert len(jobs) == 1

    job = jobs[0]

    assert sorted(job.keys()) == sorted([
        'title',
        'link',
        'company_name',
        'company_link',
        'locations_raw',
        'employment_types',
        'posted_at',
        'description_html',
        'experience_levels',
        'company_logo_urls',
        'remote',
    ])
    assert job['title'] == 'Solution Engineer (M/F/X)'
    assert job['link'] == 'https://example.com/example/'
    assert job['company_name'] == 'QUAJOO GmbH'
    assert job[
        'company_link'] == 'https://example.com/jobs/companies/quajoo-gmbh'
    assert job['locations_raw'] == ['Leipzig, Deutschland']
    assert job['employment_types'] == ['Full-time']
    assert job['experience_levels'] == ['junior', 'mid-level']
    assert job['posted_at'] == date.today() - timedelta(days=27)
    assert job['company_logo_urls'] == ['https://i.stack.imgur.com/kUWEv.png']
    assert 'what QUAJOO offers you:</strong>' in job['description_html']

예제 #3

파일 보기

def test_spider_parse_job_via():
    response = HtmlResponse('https://example.com/example/',
                            body=Path(FIXTURES_DIR /
                                      'job_via.html').read_bytes())
    job = next(stackoverflow.Spider().parse_job(response))

    assert job['company_name'] == 'CBOE Global Markets'
    assert job['company_link'] == 'https://www.cboe.com/'
    assert job['locations_raw'] == ['London, UK']

예제 #4

파일 보기

def test_spider_parse():
    response = HtmlResponse('https://example.com/example/',
                            body=Path(FIXTURES_DIR / 'jobs.html').read_bytes())
    requests = list(stackoverflow.Spider().parse(response))
    job_requests = list(filter(lambda r: '/jobs/' in r.url, requests))
    pagination_requests = list(filter(lambda r: '/jobs?' in r.url, requests))

    assert len(job_requests) == 25
    assert len(pagination_requests) == 7

예제 #5

파일 보기

def test_spider_parse_job_remote():
    response = HtmlResponse('https://example.com/example/',
                            body=Path(FIXTURES_DIR /
                                      'job_remote.html').read_bytes())
    job = next(stackoverflow.Spider().parse_job(response))

    assert job['company_name'] == 'Hummingbot'
    assert job[
        'company_link'] == 'https://example.com/jobs/companies/hummingbot'
    assert job['remote'] is True
    assert job.get('locations_raw') is None