예제 #1
0
    def test_parse_source(self):

        with self.assertRaises(TypeError):
            # Needs args to work.
            scraper.parse_source()

        with self.assertRaises(TypeError):
            # Needs _particular kinds_ of args to work!
            scraper.parse_source(None)
예제 #2
0
def test_parse_source():
    with pytest.raises(TypeError):
        parse_source()
    with pytest.raises(TypeError):
        parse_source(None)

    response, encoding = read_search_results()
    parsed_page = parse_source(response)
    assert isinstance(parsed_page, BeautifulSoup)
    assert "Queen Anne" in parsed_page.prettify()
예제 #3
0
def test_extract_listing(scrape_craigslist):
    from scraper import parse_source, extract_listings, read_search_results

    body, encoding = scrape_craigslist
    parsed_body = parse_source(body, encoding)

    extracted = extract_listings(parsed_body)

    apartment = parse_source(read_search_results()[0], encoding)
    apartment2 = extract_listings(apartment)

    assert extracted == apartment2
예제 #4
0
def test_add_location():
    with pytest.raises(TypeError):
        add_location()
    with pytest.raises(TypeError):
        add_location(None)

    response, encoding = read_search_results()
    parsed_page = parse_source(response)
    listing_generator = extract_listings(parsed_page)
    listing = listing_generator.next()
    olddict = copy(listing)
    add_location(listing)
    newdict = listing
    assert len(newdict) == len(olddict) + 1  # Added a single entry, "location"
    for key in olddict:
        assert olddict[key] == newdict[key]

    with pytest.raises(KeyError):
        olddict['location']

    assert isinstance(newdict['location'], dict)
    locationdict = newdict['location']
    assert 'data-latitude' in locationdict
    assert 'data-longitude' in locationdict
    # The following lines should not raise exceptions
    float(locationdict['data-latitude'])
    float(locationdict['data-longitude'])
예제 #5
0
def test_ask_google_for_address():
    """This function should return google's response (dictionary)
    given a listing with location data"""
    with pytest.raises(TypeError):
        ask_google_for_address()  # listing required

    response, encoding = read_search_results()
    parsed_page = parse_source(response)
    listing_generator = extract_listings(parsed_page)
    listing = listing_generator.next()
    add_location(listing)

    goog_data = ask_google_for_address(listing)
    assert isinstance(goog_data, dict)
    assert 'results' in goog_data
    assert isinstance(goog_data['results'], list)
    first_result = goog_data['results'][0]
    assert isinstance(first_result, dict)

    # We are implicitly testing find_best_address() here, too
    assert 'types' in first_result
    assert 'street_address' in first_result['types']
    # I'm more or less assuming the above is the case;
    # google seems to reliably do this
    assert 'formatted_address' in first_result
    assert isinstance(first_result['formatted_address'], unicode)
예제 #6
0
def test_extract_listings():
    test_body, test_encoding = read_search_results()
    test_parse = parse_source(test_body, test_encoding)
    test_data = extract_listings(test_parse)
    assert isinstance(test_data, GeneratorType)
    for dict_ in test_data:
        assert isinstance(dict_, dict)
예제 #7
0
def test_format_google_request_parameters():
    """This function should return valid request parameters (dictionary)
    to send to google, given scraped location dictionary"""
    with pytest.raises(TypeError):
        format_google_request_parameters()  # listing required

    response, encoding = read_search_results()
    parsed_page = parse_source(response)
    listing_generator = extract_listings(parsed_page)
    listing = listing_generator.next()
    add_location(listing)

    request_params = format_google_request_parameters(listing['location'])
    assert isinstance(request_params, dict)
    assert 'latlng' in request_params
    assert request_params['sensor'] == 'false'
예제 #8
0
    def setUp(self):


        # I actually went with Queen Anne at first as well
        # because it was a memorably-named part of town,
        # and it was likely to have 1-bedrooms in the
        # (wide) range I used as default values.
        self.content, self.encoding = scraper.return_apartment_search_results(
            query="Queen Anne")

        self.parsed_html = scraper.parse_source(self.content, self.encoding)

        # Some tests also run when setting up...
        # Should be just like an actual query:
        assert self.encoding == 'utf-8'
        assert "Queen Anne" in self.content
예제 #9
0
def test_extract_listings():
    with pytest.raises(TypeError):
        extract_listings()
    with pytest.raises(AttributeError):
        extract_listings(None).next()

    response, encoding = read_search_results()
    parsed_page = parse_source(response)
    listing_generator = extract_listings(parsed_page)
    assert isinstance(listing_generator, types.GeneratorType)
    testdict = listing_generator.next()
    assert isinstance(testdict, dict)
    assert isinstance(testdict['size'], unicode)
    assert "br" in testdict['size']
    assert isinstance(testdict['description'], unicode)
    assert isinstance(testdict['link'], unicode)
    assert "html" in testdict['link']
    assert isinstance(testdict['price'], unicode)
    assert '$' in testdict['price']
    assert testdict['price'].strip("$").isdigit()
예제 #10
0
    def test_add_address(self):

        # This test function inspired by and partially paraphrased from:
        # https://github.com/jbbrokaw/basic-scraper/blob/master/test_scraper.py

        # Also tests extract_listings(), parse_source()
        # and return_data_from_file().

        with self.assertRaises(TypeError):
            scraper.add_address()
        with self.assertRaises(TypeError):
            scraper.add_address(None)

        content = scraper.return_data_from_file('search_results.html')
        encoding = 'utf-8'
        parsed_html = scraper.parse_source(content, encoding)
        apartment_listing = scraper.extract_listings(parsed_html).next()

        assert 'data-latitude' in apartment_listing['location']
        assert 'data-longitude' in apartment_listing['location']
예제 #11
0
 def testExtract(self):
     self.assertEqual(type(scraper.extract_listings(scraper.parse_source(*self.contents))), list)
예제 #12
0
 def testParse(self):
     results = scraper.parse_source(*self.contents)
     self.assertEqual(type(results), scraper.BeautifulSoup)
예제 #13
0
def test_parse_source():
    test_body, test_encoding = read_search_results()
    test_parse = parse_source(test_body, test_encoding)
    assert isinstance(test_parse, bs4.BeautifulSoup)
예제 #14
0
 def test_parse_source(self):
     html, encoding = read_search_results()
     #Test returned instance type is bs4.BeatifulSoup
     self.assertIsInstance(parse_source(html), bs4.BeautifulSoup)
예제 #15
0
def test_extract_listings():
    a = parse_source("<span class='price'>$1450</span>")
    b = extract_listings(a)
    assert len(b) == 0
예제 #16
0
def test_parse_source():
    a = str(parse_source("Hello"))
    assert a == u'<html><head></head><body>Hello</body></html>'