import requests import sys from dps import parser from itertools import groupby # If trying to import this file, exit. This file can only be ran as a script if __name__ != '__main__': print 'Can only be used as a script' sys.exit(0) # All dps data is assumed to have already been downloaded and extracted to the # following directory. data = [] for file in glob.glob('data/*.html'): with open(file, 'r') as f: data.extend(parser.parse_page(f.read())) with open('.database', 'r') as f: connection = sqlite3.connect(f.read().strip()) # Keep track of requests made, if we have made 2000 - stop because google will # start denying at 2500 reqs = 0 # Sort and group by location so we can reduce the number of requests made to # the database and insert in bulk data = sorted(data, key=lambda x: x[2]) for location, group in groupby(data, key=lambda x: x[2]): group = list(group) location = parser.normalize_address(location) print location, len(group)
def test_not_listed(self): data = parser.parse_page(get_testdata('not-listed.htm')) last_crime = data[-1][1] self.assertEqual('Not listed', last_crime)
def test_event_length(self): data = parser.parse_page(get_testdata('events.htm')) self.assertEqual(14, len(data))