Exemple #1
0
    def test_parse_000038(self):
        """
        parse 000038.html"""

        p = parsepolicelog.PoliceLogParser(TEST_DATA_IN)
        L = p.parse_log("000038.html", 2009)

        #
        # should be 70 crime reports in 000038
        self.assert_(len(L) == 70)

        #
        # check distribution of parsed crimes
        distrib = {}
        for crime in L:
            category = crime['category']
            if distrib.has_key(category):
                distrib[category] = distrib[category] + 1
            else:
                distrib[category] = 1

        self.assert_(distrib['auto burglary'] == 12)
        self.assert_(distrib['grand theft'] == 5)
        self.assert_(distrib['suspicious circumstances'] == 10)
        self.assert_(distrib['domestic disturbance'] == 4)
        self.assert_(distrib['battery'] == 5)
        self.assert_(distrib['commercial burglary'] == 2)
        self.assert_(distrib['residential burglary'] == 5)
        self.assert_(distrib['vandalism'] == 13)
        self.assert_(distrib['robbery'] == 4)
        self.assert_(distrib['assault'] == 1)
        self.assert_(distrib['stolen vehicle'] == 9)
Exemple #2
0
    def test_parse_short_log(self):
        """
        parse short_log.html"""

        p = parsepolicelog.PoliceLogParser(source_dir=TEST_DATA_IN)
        L = p.parse_log("short_log.html", 2009)

        #
        # two reports in short_log.html
        self.assert_(len(L) == 2)

        #
        # verify first report
        #</table>Auto Burglary <P></p>
        #<p class=story_text>2100 block Creeden Way, 1/23<P></p>
        R = L[0]
        self.assert_(R['category'] == 'auto burglary')
        self.assert_(R['date_day'] == 23)
        self.assert_(R['date_month'] == 1)
        self.assert_(R['date_year'] == 2009)
        self.assert_(R['address'].lower() == '2100 block creeden way')
        self.assert_(R['map_scale'] == mapscale.BLOCK)

        # verify report
        #<p class=story_text>Commercial Burglary<P></p>
        #<p class=story_text>California St./S. Rengstorff Ave., 2/26<P></p>
        R = L[1]
        self.assert_(R['category'] == 'commercial burglary')
        self.assert_(R['date_day'] == 26)
        self.assert_(R['date_month'] == 2)
        self.assert_(R['date_year'] == 2009)
        self.assert_(
            R['address'].lower() == 'california st./s. rengstorff ave.')
        self.assert_(R['map_scale'] == mapscale.INTERSECTION)
Exemple #3
0
    def test_map_scale_id(self):
        p = parsepolicelog.PoliceLogParser()

        line = "<p class=story_text>200 block Rock St., 1/29<P></p>"
        report = p.parse_report_line(line)
        self.assert_(report['map_scale'] == mapscale.BLOCK)
        self.assert_(report['date_month'] == 1)
        self.assert_(report['date_day'] == 29)

        line = "<p class=story_text>2500 West Middlefield Rd., 12/30<P></p>"
        report = p.parse_report_line(line)
        self.assert_(report['map_scale'] == mapscale.EXACT)
        self.assert_(report['date_month'] == 12)
        self.assert_(report['date_day'] == 30)

        line = "<p class=story_text>Crittenden Lane/N. Shoreline Blvd., 1/30<P></p>"
        report = p.parse_report_line(line)
        self.assert_(report['map_scale'] == mapscale.INTERSECTION)
        self.assert_(report['date_month'] == 1)
        self.assert_(report['date_day'] == 30)

        line = "<p class=story_text>Safeway, North Shoreline Blvd., 1/25 <P></p>"
        report = p.parse_report_line(line)
        self.assert_(report['map_scale'] == mapscale.OTHER)
        self.assert_(report['date_month'] == 1)
        self.assert_(report['date_day'] == 25)
Exemple #4
0
    def setUp(self):
        print 'setUp: database_populated=%s' % self.database_populated
        if TestParsePoliceLog.database_populated == False:
            #
            # set up database
            issue_num = 39
            police_log = PoliceLog(issue_number=issue_num,
                                   issue_exists=True,
                                   pub_date=datetime.date(2006, 3, 10))
            police_log.save()

            #
            # associate db entry with
            # log file in test directory
            test_dir = TEST_DATA_IN
            findpolicelog.sync_download_issue_with_db(issue_num, test_dir)

            #
            # parse crime log and load crimes into db
            p = parsepolicelog.PoliceLogParser(test_dir)
            p.parse_log_and_populate_db(issue_num, issue_num)

            #
            # remember that we've performed setup
            TestParsePoliceLog.database_populated = True
        else:
            pass
Exemple #5
0
    def test_crimereport_unique_hash(self):
        """
        verify that crimereport hash prevents duplicate crime report entries in db"""

        #
        # set up database
        issue_num = 39
        #         police_log=PoliceLog(issue_number=issue_num,
        #                              issue_exists=True,
        #                              pub_date=datetime.date(2006,3,10))
        #         police_log.save()

        #         #
        #         # associate db entry with
        #         # log file in test directory
        test_dir = TEST_DATA_IN
        #         findpolicelog.sync_download_issue_with_db(issue_num,test_dir)

        #         #
        #         # parse crime log and load crimes into db
        #         p=parsepolicelog.PoliceLogParser(test_dir)
        #         p.parse_log_and_populate_db(issue_num,issue_num)

        #
        # count entries in db
        crime_report_list = CrimeReport.objects.filter(
            policelog__issue_number__exact=issue_num)
        num_reports1 = len(crime_report_list)

        #
        # parse crime log and load crimes into db AGAIN
        p = parsepolicelog.PoliceLogParser(test_dir)
        p.parse_log_and_populate_db(issue_num, issue_num)
        crime_report_list = CrimeReport.objects.filter(
            policelog__issue_number__exact=issue_num)
        num_reports2 = len(crime_report_list)

        #
        # verify that second parse_log_and_populate_db didn't result
        # in additional db entries
        self.assert_(num_reports1 == num_reports2)
Exemple #6
0
    def test_parse_000039_01(self):
        line = '''<p class=story_text>Moffett Blvd./Stevens Creek, 1/30<P></p>'''

        p = parsepolicelog.PoliceLogParser()
        crime_report = p.parse_report_line(line)
        self.assert_(crime_report['map_scale'] == mapscale.INTERSECTION)
Exemple #7
0
    def test_parse_000039_00(self):
        line = '''<p class=story_text>700 block San Pablo, 1/30<P></p>'''

        p = parsepolicelog.PoliceLogParser()
        crime_report = p.parse_report_line(line)
        self.assert_(crime_report['map_scale'] == mapscale.BLOCK)
Exemple #8
0
    def test_populate_db_issue_000038(self):
        """
        parse issue #38 police log, stuff into database"""

        #
        # set up database
        issue_num = 38
        police_log = PoliceLog(issue_number=issue_num,
                               issue_exists=True,
                               pub_date=datetime.date(2006, 3, 10))
        police_log.save()

        #
        # associate db entry with
        # log file in test directory
        test_dir = TEST_DATA_IN
        findpolicelog.sync_download_issue_with_db(issue_num, test_dir)

        #
        # parse crime log and load crimes into db
        p = parsepolicelog.PoliceLogParser(test_dir)
        p.parse_log_and_populate_db(issue_num, issue_num)

        #
        # should be 70 crime reports associated with 000038
        # the following query set is equivalent to:
        #   police_log=PoliceLog.objects.get(issue_number__exact=issue_num)
        #   crime_report_list=police_log.crimereport_set.all()
        crime_report_list = CrimeReport.objects.filter(
            policelog__issue_number__exact=issue_num)

        self.assert_(len(crime_report_list) == 70)

        if False:
            for crime_report in crime_report_list:
                print "%d %d  %s (%s)" % (
                    crime_report.category, crime_report.line_num,
                    crime_report.address,
                    mapscale.dict[crime_report.map_scale])

        #
        # parse_and_populate_db shouldn't have created an issue 39
        try:
            police_log = PoliceLog.objects.get(issue_number__exact=issue_num +
                                               1)
        except PoliceLog.DoesNotExist:
            pass

        #
        # check distribution of parsed crimes
        category_distrib = get_crime_type_distribution(crime_report_list)
        scale_distrib = get_mapscale_distribution(crime_report_list)

        self.assert_(category_distrib['auto burglary'] == 12)
        self.assert_(category_distrib['grand theft'] == 5)
        self.assert_(category_distrib['suspicious circumstances'] == 10)
        self.assert_(category_distrib['domestic disturbance'] == 4)
        self.assert_(category_distrib['battery'] == 5)
        self.assert_(category_distrib['commercial burglary'] == 2)
        self.assert_(category_distrib['residential burglary'] == 5)
        self.assert_(category_distrib['vandalism'] == 13)
        self.assert_(category_distrib['robbery'] == 4)
        self.assert_(category_distrib['stolen vehicle'] == 9)
        self.assert_(category_distrib['assault'] == 1)

        self.assert_(scale_distrib[mapscale.BLOCK] == 56)
        self.assert_(scale_distrib[mapscale.INTERSECTION] == 1)
        self.assert_(scale_distrib[mapscale.EXACT] == 2)
        self.assert_(scale_distrib[mapscale.OTHER] == 11)