def test_parse_source(self): with self.assertRaises(TypeError): # Needs args to work. scraper.parse_source() with self.assertRaises(TypeError): # Needs _particular kinds_ of args to work! scraper.parse_source(None)
def test_parse_source(): with pytest.raises(TypeError): parse_source() with pytest.raises(TypeError): parse_source(None) response, encoding = read_search_results() parsed_page = parse_source(response) assert isinstance(parsed_page, BeautifulSoup) assert "Queen Anne" in parsed_page.prettify()
def test_extract_listing(scrape_craigslist): from scraper import parse_source, extract_listings, read_search_results body, encoding = scrape_craigslist parsed_body = parse_source(body, encoding) extracted = extract_listings(parsed_body) apartment = parse_source(read_search_results()[0], encoding) apartment2 = extract_listings(apartment) assert extracted == apartment2
def test_add_location(): with pytest.raises(TypeError): add_location() with pytest.raises(TypeError): add_location(None) response, encoding = read_search_results() parsed_page = parse_source(response) listing_generator = extract_listings(parsed_page) listing = listing_generator.next() olddict = copy(listing) add_location(listing) newdict = listing assert len(newdict) == len(olddict) + 1 # Added a single entry, "location" for key in olddict: assert olddict[key] == newdict[key] with pytest.raises(KeyError): olddict['location'] assert isinstance(newdict['location'], dict) locationdict = newdict['location'] assert 'data-latitude' in locationdict assert 'data-longitude' in locationdict # The following lines should not raise exceptions float(locationdict['data-latitude']) float(locationdict['data-longitude'])
def test_ask_google_for_address(): """This function should return google's response (dictionary) given a listing with location data""" with pytest.raises(TypeError): ask_google_for_address() # listing required response, encoding = read_search_results() parsed_page = parse_source(response) listing_generator = extract_listings(parsed_page) listing = listing_generator.next() add_location(listing) goog_data = ask_google_for_address(listing) assert isinstance(goog_data, dict) assert 'results' in goog_data assert isinstance(goog_data['results'], list) first_result = goog_data['results'][0] assert isinstance(first_result, dict) # We are implicitly testing find_best_address() here, too assert 'types' in first_result assert 'street_address' in first_result['types'] # I'm more or less assuming the above is the case; # google seems to reliably do this assert 'formatted_address' in first_result assert isinstance(first_result['formatted_address'], unicode)
def test_extract_listings(): test_body, test_encoding = read_search_results() test_parse = parse_source(test_body, test_encoding) test_data = extract_listings(test_parse) assert isinstance(test_data, GeneratorType) for dict_ in test_data: assert isinstance(dict_, dict)
def test_format_google_request_parameters(): """This function should return valid request parameters (dictionary) to send to google, given scraped location dictionary""" with pytest.raises(TypeError): format_google_request_parameters() # listing required response, encoding = read_search_results() parsed_page = parse_source(response) listing_generator = extract_listings(parsed_page) listing = listing_generator.next() add_location(listing) request_params = format_google_request_parameters(listing['location']) assert isinstance(request_params, dict) assert 'latlng' in request_params assert request_params['sensor'] == 'false'
def setUp(self): # I actually went with Queen Anne at first as well # because it was a memorably-named part of town, # and it was likely to have 1-bedrooms in the # (wide) range I used as default values. self.content, self.encoding = scraper.return_apartment_search_results( query="Queen Anne") self.parsed_html = scraper.parse_source(self.content, self.encoding) # Some tests also run when setting up... # Should be just like an actual query: assert self.encoding == 'utf-8' assert "Queen Anne" in self.content
def test_extract_listings(): with pytest.raises(TypeError): extract_listings() with pytest.raises(AttributeError): extract_listings(None).next() response, encoding = read_search_results() parsed_page = parse_source(response) listing_generator = extract_listings(parsed_page) assert isinstance(listing_generator, types.GeneratorType) testdict = listing_generator.next() assert isinstance(testdict, dict) assert isinstance(testdict['size'], unicode) assert "br" in testdict['size'] assert isinstance(testdict['description'], unicode) assert isinstance(testdict['link'], unicode) assert "html" in testdict['link'] assert isinstance(testdict['price'], unicode) assert '$' in testdict['price'] assert testdict['price'].strip("$").isdigit()
def test_add_address(self): # This test function inspired by and partially paraphrased from: # https://github.com/jbbrokaw/basic-scraper/blob/master/test_scraper.py # Also tests extract_listings(), parse_source() # and return_data_from_file(). with self.assertRaises(TypeError): scraper.add_address() with self.assertRaises(TypeError): scraper.add_address(None) content = scraper.return_data_from_file('search_results.html') encoding = 'utf-8' parsed_html = scraper.parse_source(content, encoding) apartment_listing = scraper.extract_listings(parsed_html).next() assert 'data-latitude' in apartment_listing['location'] assert 'data-longitude' in apartment_listing['location']
def testExtract(self): self.assertEqual(type(scraper.extract_listings(scraper.parse_source(*self.contents))), list)
def testParse(self): results = scraper.parse_source(*self.contents) self.assertEqual(type(results), scraper.BeautifulSoup)
def test_parse_source(): test_body, test_encoding = read_search_results() test_parse = parse_source(test_body, test_encoding) assert isinstance(test_parse, bs4.BeautifulSoup)
def test_parse_source(self): html, encoding = read_search_results() #Test returned instance type is bs4.BeatifulSoup self.assertIsInstance(parse_source(html), bs4.BeautifulSoup)
def test_extract_listings(): a = parse_source("<span class='price'>$1450</span>") b = extract_listings(a) assert len(b) == 0
def test_parse_source(): a = str(parse_source("Hello")) assert a == u'<html><head></head><body>Hello</body></html>'