Ejemplo n.º 1
0
def test_recover_scraped_data(valid_config, scrape_data):
    base = ScraperBase(valid_config)
    pickle_save = base.save_pickle(scrape_data)
    assert pickle_save is True, "Should save the pickle successfully"
    base.recover_scraped_data()
    data = base.load_csv()
    assert data[0].get('job_id') == scrape_data[0].get('job_id'), \
        "The recovered data must be equal to the one in scrape_data"
Ejemplo n.º 2
0
def scraper_base_instance(valid_config):
    return ScraperBase(valid_config)
Ejemplo n.º 3
0
 def save_to_csv():
     s = ScraperBase(valid_config).save_csv(scrape_data)
     return s is True
Ejemplo n.º 4
0
def save_pickle(valid_config):
    base_instance = ScraperBase(valid_config)
    pickle_save = base_instance.save_pickle(scrape_data)
    assert pickle_save is True
Ejemplo n.º 5
0
def test_send_requests_pass():
    url = "https://www.glassdoor.com/Job/jobs.htm?suggestCount=0&suggestChosen=false&clickSource=searchBtn&typedKeyword=Software+developer&sc.keyword=Software+developer&locT=N&locId=130&jobType="
    response = ScraperBase.send_request(url, 'get')
    assert response is not None
    assert 'Developer' in response
Ejemplo n.º 6
0
def test_run_pre_scrape_filters(job_links, saved_csv, valid_config):
    assert saved_csv() is True
    pre_filter = ScraperBase(valid_config).run_pre_scrape_filters(
        job_links, "glassdoor")
    assert len(pre_filter) == 2, \
        "Should filter out existing job_ids from the returned job_links"
Ejemplo n.º 7
0
def test_load_pickle(valid_config, scrape_data):
    pickle_save = ScraperBase(valid_config).save_pickle(scrape_data)
    assert pickle_save is True
    assert ScraperBase(valid_config).load_pickle() != []
Ejemplo n.º 8
0
def test_load_csv(valid_config, scrape_data):
    csv_save = ScraperBase(valid_config).save_csv(scrape_data)
    assert csv_save is True
    assert ScraperBase(valid_config).load_csv() != []
Ejemplo n.º 9
0
def test_send_requests_fail():
    url = "https://www.glassdoor.com/about/faq"
    response = ScraperBase.send_request(url, 'geT')
    assert response is None