Ejemplo n.º 1
0
 def test_read_search_results(self):
     #Tests non-existing file exception
     with self.assertRaises(IOError):
         read_search_results('test.html')
     html, encoding = read_search_results()
     #Test returned instance type are both string
     self.assertIsInstance(html, str)
     self.assertIsInstance(encoding, str)
Ejemplo n.º 2
0
def test_add_location():
    with pytest.raises(TypeError):
        add_location()
    with pytest.raises(TypeError):
        add_location(None)

    response, encoding = read_search_results()
    parsed_page = parse_source(response)
    listing_generator = extract_listings(parsed_page)
    listing = listing_generator.next()
    olddict = copy(listing)
    add_location(listing)
    newdict = listing
    assert len(newdict) == len(olddict) + 1  # Added a single entry, "location"
    for key in olddict:
        assert olddict[key] == newdict[key]

    with pytest.raises(KeyError):
        olddict['location']

    assert isinstance(newdict['location'], dict)
    locationdict = newdict['location']
    assert 'data-latitude' in locationdict
    assert 'data-longitude' in locationdict
    # The following lines should not raise exceptions
    float(locationdict['data-latitude'])
    float(locationdict['data-longitude'])
Ejemplo n.º 3
0
def test_ask_google_for_address():
    """This function should return google's response (dictionary)
    given a listing with location data"""
    with pytest.raises(TypeError):
        ask_google_for_address()  # listing required

    response, encoding = read_search_results()
    parsed_page = parse_source(response)
    listing_generator = extract_listings(parsed_page)
    listing = listing_generator.next()
    add_location(listing)

    goog_data = ask_google_for_address(listing)
    assert isinstance(goog_data, dict)
    assert 'results' in goog_data
    assert isinstance(goog_data['results'], list)
    first_result = goog_data['results'][0]
    assert isinstance(first_result, dict)

    # We are implicitly testing find_best_address() here, too
    assert 'types' in first_result
    assert 'street_address' in first_result['types']
    # I'm more or less assuming the above is the case;
    # google seems to reliably do this
    assert 'formatted_address' in first_result
    assert isinstance(first_result['formatted_address'], unicode)
Ejemplo n.º 4
0
def test_extract_listings():
    test_body, test_encoding = read_search_results()
    test_parse = parse_source(test_body, test_encoding)
    test_data = extract_listings(test_parse)
    assert isinstance(test_data, GeneratorType)
    for dict_ in test_data:
        assert isinstance(dict_, dict)
Ejemplo n.º 5
0
def test_fetch(scrape_craigslist):
    from scraper import read_search_results

    body, encoding1 = scrape_craigslist
    apartment, encoding2 = read_search_results()
    assert apartment == body
    assert encoding1 == encoding2
Ejemplo n.º 6
0
def test_parse_source():
    with pytest.raises(TypeError):
        parse_source()
    with pytest.raises(TypeError):
        parse_source(None)

    response, encoding = read_search_results()
    parsed_page = parse_source(response)
    assert isinstance(parsed_page, BeautifulSoup)
    assert "Queen Anne" in parsed_page.prettify()
Ejemplo n.º 7
0
def test_extract_listing(scrape_craigslist):
    from scraper import parse_source, extract_listings, read_search_results

    body, encoding = scrape_craigslist
    parsed_body = parse_source(body, encoding)

    extracted = extract_listings(parsed_body)

    apartment = parse_source(read_search_results()[0], encoding)
    apartment2 = extract_listings(apartment)

    assert extracted == apartment2
Ejemplo n.º 8
0
def test_format_google_request_parameters():
    """This function should return valid request parameters (dictionary)
    to send to google, given scraped location dictionary"""
    with pytest.raises(TypeError):
        format_google_request_parameters()  # listing required

    response, encoding = read_search_results()
    parsed_page = parse_source(response)
    listing_generator = extract_listings(parsed_page)
    listing = listing_generator.next()
    add_location(listing)

    request_params = format_google_request_parameters(listing['location'])
    assert isinstance(request_params, dict)
    assert 'latlng' in request_params
    assert request_params['sensor'] == 'false'
Ejemplo n.º 9
0
def test_extract_listings():
    with pytest.raises(TypeError):
        extract_listings()
    with pytest.raises(AttributeError):
        extract_listings(None).next()

    response, encoding = read_search_results()
    parsed_page = parse_source(response)
    listing_generator = extract_listings(parsed_page)
    assert isinstance(listing_generator, types.GeneratorType)
    testdict = listing_generator.next()
    assert isinstance(testdict, dict)
    assert isinstance(testdict['size'], unicode)
    assert "br" in testdict['size']
    assert isinstance(testdict['description'], unicode)
    assert isinstance(testdict['link'], unicode)
    assert "html" in testdict['link']
    assert isinstance(testdict['price'], unicode)
    assert '$' in testdict['price']
    assert testdict['price'].strip("$").isdigit()
Ejemplo n.º 10
0
 def testRead(self):
     testfile = scraper.read_search_results("testresults.html")
     self.assertEqual(testfile, self.contents)
Ejemplo n.º 11
0
def test_read_search_results():
    response, encoding = read_search_results()
    assert encoding == "utf-8"
    assert isinstance(response, unicode)
    assert "Queen Anne" in response  # I searched Queen Anne to make the file
Ejemplo n.º 12
0
def test_parse_source():
    test_body, test_encoding = read_search_results()
    test_parse = parse_source(test_body, test_encoding)
    assert isinstance(test_parse, bs4.BeautifulSoup)
Ejemplo n.º 13
0
def test_read_search_result():
    test_body, test_encoding = read_search_results()
    assert "<span class=\"desktop\">craigslist</span>" in test_body
    assert test_encoding == 'utf-8'
Ejemplo n.º 14
0
 def test_parse_source(self):
     html, encoding = read_search_results()
     #Test returned instance type is bs4.BeatifulSoup
     self.assertIsInstance(parse_source(html), bs4.BeautifulSoup)
Ejemplo n.º 15
0
def test_read_results():
    a = read_search_results()
    assert a[1] == 'utf-8'
    assert isinstance(a[0], str)