def test_parse_000038(self): """ parse 000038.html""" p = parsepolicelog.PoliceLogParser(TEST_DATA_IN) L = p.parse_log("000038.html", 2009) # # should be 70 crime reports in 000038 self.assert_(len(L) == 70) # # check distribution of parsed crimes distrib = {} for crime in L: category = crime['category'] if distrib.has_key(category): distrib[category] = distrib[category] + 1 else: distrib[category] = 1 self.assert_(distrib['auto burglary'] == 12) self.assert_(distrib['grand theft'] == 5) self.assert_(distrib['suspicious circumstances'] == 10) self.assert_(distrib['domestic disturbance'] == 4) self.assert_(distrib['battery'] == 5) self.assert_(distrib['commercial burglary'] == 2) self.assert_(distrib['residential burglary'] == 5) self.assert_(distrib['vandalism'] == 13) self.assert_(distrib['robbery'] == 4) self.assert_(distrib['assault'] == 1) self.assert_(distrib['stolen vehicle'] == 9)
def test_parse_short_log(self): """ parse short_log.html""" p = parsepolicelog.PoliceLogParser(source_dir=TEST_DATA_IN) L = p.parse_log("short_log.html", 2009) # # two reports in short_log.html self.assert_(len(L) == 2) # # verify first report #</table>Auto Burglary <P></p> #<p class=story_text>2100 block Creeden Way, 1/23<P></p> R = L[0] self.assert_(R['category'] == 'auto burglary') self.assert_(R['date_day'] == 23) self.assert_(R['date_month'] == 1) self.assert_(R['date_year'] == 2009) self.assert_(R['address'].lower() == '2100 block creeden way') self.assert_(R['map_scale'] == mapscale.BLOCK) # verify report #<p class=story_text>Commercial Burglary<P></p> #<p class=story_text>California St./S. Rengstorff Ave., 2/26<P></p> R = L[1] self.assert_(R['category'] == 'commercial burglary') self.assert_(R['date_day'] == 26) self.assert_(R['date_month'] == 2) self.assert_(R['date_year'] == 2009) self.assert_( R['address'].lower() == 'california st./s. rengstorff ave.') self.assert_(R['map_scale'] == mapscale.INTERSECTION)
def test_map_scale_id(self): p = parsepolicelog.PoliceLogParser() line = "<p class=story_text>200 block Rock St., 1/29<P></p>" report = p.parse_report_line(line) self.assert_(report['map_scale'] == mapscale.BLOCK) self.assert_(report['date_month'] == 1) self.assert_(report['date_day'] == 29) line = "<p class=story_text>2500 West Middlefield Rd., 12/30<P></p>" report = p.parse_report_line(line) self.assert_(report['map_scale'] == mapscale.EXACT) self.assert_(report['date_month'] == 12) self.assert_(report['date_day'] == 30) line = "<p class=story_text>Crittenden Lane/N. Shoreline Blvd., 1/30<P></p>" report = p.parse_report_line(line) self.assert_(report['map_scale'] == mapscale.INTERSECTION) self.assert_(report['date_month'] == 1) self.assert_(report['date_day'] == 30) line = "<p class=story_text>Safeway, North Shoreline Blvd., 1/25 <P></p>" report = p.parse_report_line(line) self.assert_(report['map_scale'] == mapscale.OTHER) self.assert_(report['date_month'] == 1) self.assert_(report['date_day'] == 25)
def setUp(self): print 'setUp: database_populated=%s' % self.database_populated if TestParsePoliceLog.database_populated == False: # # set up database issue_num = 39 police_log = PoliceLog(issue_number=issue_num, issue_exists=True, pub_date=datetime.date(2006, 3, 10)) police_log.save() # # associate db entry with # log file in test directory test_dir = TEST_DATA_IN findpolicelog.sync_download_issue_with_db(issue_num, test_dir) # # parse crime log and load crimes into db p = parsepolicelog.PoliceLogParser(test_dir) p.parse_log_and_populate_db(issue_num, issue_num) # # remember that we've performed setup TestParsePoliceLog.database_populated = True else: pass
def test_crimereport_unique_hash(self): """ verify that crimereport hash prevents duplicate crime report entries in db""" # # set up database issue_num = 39 # police_log=PoliceLog(issue_number=issue_num, # issue_exists=True, # pub_date=datetime.date(2006,3,10)) # police_log.save() # # # # associate db entry with # # log file in test directory test_dir = TEST_DATA_IN # findpolicelog.sync_download_issue_with_db(issue_num,test_dir) # # # # parse crime log and load crimes into db # p=parsepolicelog.PoliceLogParser(test_dir) # p.parse_log_and_populate_db(issue_num,issue_num) # # count entries in db crime_report_list = CrimeReport.objects.filter( policelog__issue_number__exact=issue_num) num_reports1 = len(crime_report_list) # # parse crime log and load crimes into db AGAIN p = parsepolicelog.PoliceLogParser(test_dir) p.parse_log_and_populate_db(issue_num, issue_num) crime_report_list = CrimeReport.objects.filter( policelog__issue_number__exact=issue_num) num_reports2 = len(crime_report_list) # # verify that second parse_log_and_populate_db didn't result # in additional db entries self.assert_(num_reports1 == num_reports2)
def test_parse_000039_01(self): line = '''<p class=story_text>Moffett Blvd./Stevens Creek, 1/30<P></p>''' p = parsepolicelog.PoliceLogParser() crime_report = p.parse_report_line(line) self.assert_(crime_report['map_scale'] == mapscale.INTERSECTION)
def test_parse_000039_00(self): line = '''<p class=story_text>700 block San Pablo, 1/30<P></p>''' p = parsepolicelog.PoliceLogParser() crime_report = p.parse_report_line(line) self.assert_(crime_report['map_scale'] == mapscale.BLOCK)
def test_populate_db_issue_000038(self): """ parse issue #38 police log, stuff into database""" # # set up database issue_num = 38 police_log = PoliceLog(issue_number=issue_num, issue_exists=True, pub_date=datetime.date(2006, 3, 10)) police_log.save() # # associate db entry with # log file in test directory test_dir = TEST_DATA_IN findpolicelog.sync_download_issue_with_db(issue_num, test_dir) # # parse crime log and load crimes into db p = parsepolicelog.PoliceLogParser(test_dir) p.parse_log_and_populate_db(issue_num, issue_num) # # should be 70 crime reports associated with 000038 # the following query set is equivalent to: # police_log=PoliceLog.objects.get(issue_number__exact=issue_num) # crime_report_list=police_log.crimereport_set.all() crime_report_list = CrimeReport.objects.filter( policelog__issue_number__exact=issue_num) self.assert_(len(crime_report_list) == 70) if False: for crime_report in crime_report_list: print "%d %d %s (%s)" % ( crime_report.category, crime_report.line_num, crime_report.address, mapscale.dict[crime_report.map_scale]) # # parse_and_populate_db shouldn't have created an issue 39 try: police_log = PoliceLog.objects.get(issue_number__exact=issue_num + 1) except PoliceLog.DoesNotExist: pass # # check distribution of parsed crimes category_distrib = get_crime_type_distribution(crime_report_list) scale_distrib = get_mapscale_distribution(crime_report_list) self.assert_(category_distrib['auto burglary'] == 12) self.assert_(category_distrib['grand theft'] == 5) self.assert_(category_distrib['suspicious circumstances'] == 10) self.assert_(category_distrib['domestic disturbance'] == 4) self.assert_(category_distrib['battery'] == 5) self.assert_(category_distrib['commercial burglary'] == 2) self.assert_(category_distrib['residential burglary'] == 5) self.assert_(category_distrib['vandalism'] == 13) self.assert_(category_distrib['robbery'] == 4) self.assert_(category_distrib['stolen vehicle'] == 9) self.assert_(category_distrib['assault'] == 1) self.assert_(scale_distrib[mapscale.BLOCK] == 56) self.assert_(scale_distrib[mapscale.INTERSECTION] == 1) self.assert_(scale_distrib[mapscale.EXACT] == 2) self.assert_(scale_distrib[mapscale.OTHER] == 11)