def test_read_search_results(self): #Tests non-existing file exception with self.assertRaises(IOError): read_search_results('test.html') html, encoding = read_search_results() #Test returned instance type are both string self.assertIsInstance(html, str) self.assertIsInstance(encoding, str)
def test_add_location(): with pytest.raises(TypeError): add_location() with pytest.raises(TypeError): add_location(None) response, encoding = read_search_results() parsed_page = parse_source(response) listing_generator = extract_listings(parsed_page) listing = listing_generator.next() olddict = copy(listing) add_location(listing) newdict = listing assert len(newdict) == len(olddict) + 1 # Added a single entry, "location" for key in olddict: assert olddict[key] == newdict[key] with pytest.raises(KeyError): olddict['location'] assert isinstance(newdict['location'], dict) locationdict = newdict['location'] assert 'data-latitude' in locationdict assert 'data-longitude' in locationdict # The following lines should not raise exceptions float(locationdict['data-latitude']) float(locationdict['data-longitude'])
def test_ask_google_for_address(): """This function should return google's response (dictionary) given a listing with location data""" with pytest.raises(TypeError): ask_google_for_address() # listing required response, encoding = read_search_results() parsed_page = parse_source(response) listing_generator = extract_listings(parsed_page) listing = listing_generator.next() add_location(listing) goog_data = ask_google_for_address(listing) assert isinstance(goog_data, dict) assert 'results' in goog_data assert isinstance(goog_data['results'], list) first_result = goog_data['results'][0] assert isinstance(first_result, dict) # We are implicitly testing find_best_address() here, too assert 'types' in first_result assert 'street_address' in first_result['types'] # I'm more or less assuming the above is the case; # google seems to reliably do this assert 'formatted_address' in first_result assert isinstance(first_result['formatted_address'], unicode)
def test_extract_listings(): test_body, test_encoding = read_search_results() test_parse = parse_source(test_body, test_encoding) test_data = extract_listings(test_parse) assert isinstance(test_data, GeneratorType) for dict_ in test_data: assert isinstance(dict_, dict)
def test_fetch(scrape_craigslist): from scraper import read_search_results body, encoding1 = scrape_craigslist apartment, encoding2 = read_search_results() assert apartment == body assert encoding1 == encoding2
def test_parse_source(): with pytest.raises(TypeError): parse_source() with pytest.raises(TypeError): parse_source(None) response, encoding = read_search_results() parsed_page = parse_source(response) assert isinstance(parsed_page, BeautifulSoup) assert "Queen Anne" in parsed_page.prettify()
def test_extract_listing(scrape_craigslist): from scraper import parse_source, extract_listings, read_search_results body, encoding = scrape_craigslist parsed_body = parse_source(body, encoding) extracted = extract_listings(parsed_body) apartment = parse_source(read_search_results()[0], encoding) apartment2 = extract_listings(apartment) assert extracted == apartment2
def test_format_google_request_parameters(): """This function should return valid request parameters (dictionary) to send to google, given scraped location dictionary""" with pytest.raises(TypeError): format_google_request_parameters() # listing required response, encoding = read_search_results() parsed_page = parse_source(response) listing_generator = extract_listings(parsed_page) listing = listing_generator.next() add_location(listing) request_params = format_google_request_parameters(listing['location']) assert isinstance(request_params, dict) assert 'latlng' in request_params assert request_params['sensor'] == 'false'
def test_extract_listings(): with pytest.raises(TypeError): extract_listings() with pytest.raises(AttributeError): extract_listings(None).next() response, encoding = read_search_results() parsed_page = parse_source(response) listing_generator = extract_listings(parsed_page) assert isinstance(listing_generator, types.GeneratorType) testdict = listing_generator.next() assert isinstance(testdict, dict) assert isinstance(testdict['size'], unicode) assert "br" in testdict['size'] assert isinstance(testdict['description'], unicode) assert isinstance(testdict['link'], unicode) assert "html" in testdict['link'] assert isinstance(testdict['price'], unicode) assert '$' in testdict['price'] assert testdict['price'].strip("$").isdigit()
def testRead(self): testfile = scraper.read_search_results("testresults.html") self.assertEqual(testfile, self.contents)
def test_read_search_results(): response, encoding = read_search_results() assert encoding == "utf-8" assert isinstance(response, unicode) assert "Queen Anne" in response # I searched Queen Anne to make the file
def test_parse_source(): test_body, test_encoding = read_search_results() test_parse = parse_source(test_body, test_encoding) assert isinstance(test_parse, bs4.BeautifulSoup)
def test_read_search_result(): test_body, test_encoding = read_search_results() assert "<span class=\"desktop\">craigslist</span>" in test_body assert test_encoding == 'utf-8'
def test_parse_source(self): html, encoding = read_search_results() #Test returned instance type is bs4.BeatifulSoup self.assertIsInstance(parse_source(html), bs4.BeautifulSoup)
def test_read_results(): a = read_search_results() assert a[1] == 'utf-8' assert isinstance(a[0], str)