Example #1
0
import requests
import sys
from dps import parser
from itertools import groupby

# If trying to import this file, exit. This file can only be ran as a script
if __name__ != '__main__':
    print 'Can only be used as a script'
    sys.exit(0)

# All dps data is assumed to have already been downloaded and extracted to the
# following directory.
data = []
for file in glob.glob('data/*.html'):
    with open(file, 'r') as f:
        data.extend(parser.parse_page(f.read()))

with open('.database', 'r') as f:
    connection = sqlite3.connect(f.read().strip())

# Keep track of requests made, if we have made 2000 - stop because google will
# start denying at 2500
reqs = 0

# Sort and group by location so we can reduce the number of requests made to
# the database and insert in bulk
data = sorted(data, key=lambda x: x[2])
for location, group in groupby(data, key=lambda x: x[2]):
    group = list(group)
    location = parser.normalize_address(location)
    print location, len(group)
Example #2
0
 def test_not_listed(self):
     data = parser.parse_page(get_testdata('not-listed.htm'))
     last_crime = data[-1][1]
     self.assertEqual('Not listed', last_crime)
Example #3
0
 def test_event_length(self):
     data = parser.parse_page(get_testdata('events.htm'))
     self.assertEqual(14, len(data))
Example #4
0
 def test_not_listed(self):
     data = parser.parse_page(get_testdata('not-listed.htm'))
     last_crime = data[-1][1]
     self.assertEqual('Not listed', last_crime)
Example #5
0
 def test_event_length(self):
     data = parser.parse_page(get_testdata('events.htm'))
     self.assertEqual(14, len(data))
Example #6
0
import requests
import sys
from dps import parser
from itertools import groupby

# If trying to import this file, exit. This file can only be ran as a script
if __name__ != '__main__':
    print 'Can only be used as a script'
    sys.exit(0)

# All dps data is assumed to have already been downloaded and extracted to the
# following directory.
data = [] 
for file in glob.glob('data/*.html'):
    with open(file, 'r') as f:
        data.extend(parser.parse_page(f.read()))

with open('.database', 'r') as f:
    connection = sqlite3.connect(f.read().strip())

# Keep track of requests made, if we have made 2000 - stop because google will
# start denying at 2500
reqs = 0

# Sort and group by location so we can reduce the number of requests made to
# the database and insert in bulk
data = sorted(data, key=lambda x: x[2])
for location, group in groupby(data, key=lambda x: x[2]):
    group = list(group)
    location = parser.normalize_address(location)
    print location, len(group)