예제 #1
0
def test_spider_parse_job_posted_at_as_li():
    response = HtmlResponse('https://example.com/example/',
                            body=Path(FIXTURES_DIR /
                                      'job_posted_at_as_li.html').read_bytes())
    job = next(stackoverflow.Spider().parse_job(response))

    assert job['posted_at'] == date.today() - timedelta(days=29)
예제 #2
0
def test_spider_parse_job():
    response = HtmlResponse('https://example.com/example/',
                            body=Path(FIXTURES_DIR / 'job.html').read_bytes())
    jobs = list(stackoverflow.Spider().parse_job(response))

    assert len(jobs) == 1

    job = jobs[0]

    assert sorted(job.keys()) == sorted([
        'title',
        'link',
        'company_name',
        'company_link',
        'locations_raw',
        'employment_types',
        'posted_at',
        'description_html',
        'experience_levels',
        'company_logo_urls',
        'remote',
    ])
    assert job['title'] == 'Solution Engineer (M/F/X)'
    assert job['link'] == 'https://example.com/example/'
    assert job['company_name'] == 'QUAJOO GmbH'
    assert job[
        'company_link'] == 'https://example.com/jobs/companies/quajoo-gmbh'
    assert job['locations_raw'] == ['Leipzig, Deutschland']
    assert job['employment_types'] == ['Full-time']
    assert job['experience_levels'] == ['junior', 'mid-level']
    assert job['posted_at'] == date.today() - timedelta(days=27)
    assert job['company_logo_urls'] == ['https://i.stack.imgur.com/kUWEv.png']
    assert 'what QUAJOO offers you:</strong>' in job['description_html']
예제 #3
0
def test_spider_parse_job_via():
    response = HtmlResponse('https://example.com/example/',
                            body=Path(FIXTURES_DIR /
                                      'job_via.html').read_bytes())
    job = next(stackoverflow.Spider().parse_job(response))

    assert job['company_name'] == 'CBOE Global Markets'
    assert job['company_link'] == 'https://www.cboe.com/'
    assert job['locations_raw'] == ['London, UK']
예제 #4
0
def test_spider_parse():
    response = HtmlResponse('https://example.com/example/',
                            body=Path(FIXTURES_DIR / 'jobs.html').read_bytes())
    requests = list(stackoverflow.Spider().parse(response))
    job_requests = list(filter(lambda r: '/jobs/' in r.url, requests))
    pagination_requests = list(filter(lambda r: '/jobs?' in r.url, requests))

    assert len(job_requests) == 25
    assert len(pagination_requests) == 7
예제 #5
0
def test_spider_parse_job_remote():
    response = HtmlResponse('https://example.com/example/',
                            body=Path(FIXTURES_DIR /
                                      'job_remote.html').read_bytes())
    job = next(stackoverflow.Spider().parse_job(response))

    assert job['company_name'] == 'Hummingbot'
    assert job[
        'company_link'] == 'https://example.com/jobs/companies/hummingbot'
    assert job['remote'] is True
    assert job.get('locations_raw') is None