Python CrimeLookup Examples

Programming Language: Python

Namespace/Package Name: CrimeLookup

Class/Type: CrimeLookup

Examples at hotexamples.com: 2

Python CrimeLookup - 2 examples found. These are the top rated real world Python examples of CrimeLookup.CrimeLookup extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

generate(1)

Example #1

Show file

File: Generator.py Project: janmoravec/czech-crime

    def __init__(self, year, month, omitZeroValues, onCompleteCallback):

        # null everything
        self.areaLookup = ""
        self.crimeLookup = ""
        self.timeLookup = ""
        self.writer = ""
        self.areaWriter = ""
        self.crimeWriter = ""
        self.timeWriter = ""
        self.recordId = 1

        self.year = year
        self.month = month
        self.omitZeroValues = omitZeroValues
        self.onCompleteCallback = onCompleteCallback

        # 1) a) generate area lookup
        with xlrd.open_workbook("areaLookup.xls") as wb:
            sh = wb.sheet_by_index(0)
            self.areaLookup = AreaLookup(sh)

            # b) time lookup
        with xlrd.open_workbook("timeLookup.xls") as wb:
            sh = wb.sheet_by_index(0)
            self.timeLookup = TimeLookup(sh)

            # 2) generate lookups

            # area lookup
        if self.generateAreaLookup:

            with open("../generated/AreaLookup.csv", "wb") as f:

                rows = self.areaLookup.generate()
                numRows = len(rows)

                self.areaWriter = UnicodeWriter(f)
                for rowIndex in range(numRows):
                    row = rows[rowIndex]
                    self.areaWriter.writerow(row)

                    # crime lookup - is generated manually
        if self.generateCrimeLookup:

            with xlrd.open_workbook("../files/a______.xls", "wb") as wb:
                sh = wb.sheet_by_index(0)
                self.crimeLookup = CrimeLookup(sh)

                with open("../generated/CrimeLookup.csv", "wb") as f:

                    rows = self.crimeLookup.generate()
                    numRows = len(rows)

                    # 			self.crimeWriter = UnicodeWriter( f )
                    # 			for rowIndex in range( numRows ):
                    # 				row = rows[ rowIndex ]
                    # replace dot zero
                    # row[0] = row[0].replace( ".0","" )

                    # 				self.crimeWriter.writerow( [ row[0],row[1] ] )

                    # time lookup
        if self.generateTimeLookup:

            with open("../generated/TimeLookup.csv", "wb") as f:

                rows = self.timeLookup.generate()
                numRows = len(rows)

                self.timeWriter = UnicodeWriter(f)
                for rowIndex in range(numRows):
                    row = rows[rowIndex]
                    self.timeWriter.writerow(row)

                    # 3) go to folders and go through all folder and process all files
                    # itirate through all files

        if self.generateCrimeData:

            # -files
            # -2003
            # -1
            # -2
            # -...
            # -2004
            # -...

            directory = "../files"

            # itirate through all year folders
            yearFolders = listdir(directory)

            for yearFolder in yearFolders:

                # check if is year we're interested in
                if str(yearFolder) == str(self.year):

                    # itirate through all month folders
                    monthsFolders = listdir(directory + "/" + yearFolder)

                    for monthFolder in monthsFolders:

                        # check if is month we're interested in
                        if str(monthFolder) == str(self.month):
                            # itirate through files in month folder
                            files = listdir(directory + "/" + yearFolder + "/" + monthFolder)

                            # get time period id
                            periodId = self.timeLookup.getTimeIdByYearAndMonth(int(yearFolder), int(monthFolder))

                            for file in files:
                                # check only for excel files
                                if ".xls" in file or ".xlsx" in file:
                                    # omit files with underscore
                                    if not "__L" in file and not "__R" in file and not "__X" in file:

                                        # temp constrain to generate just one file
                                        # if "a0011__" in file :
                                        url = directory + "/" + yearFolder + "/" + monthFolder + "/" + file

                                        # create district sheet
                                        districtSheet = self.processFile(url, periodId)
                                        # temp
                                        if districtSheet:
                                            self.districtCrimeDataSheetsByCode[districtSheet.code] = districtSheet

                                            # self.files.append( self.processFile( url, periodId ) )

                                            # 4) add Letiste to respective districts
            transports = [
                {"from": "x004110", "to": "0011"},
                {"from": "x064160", "to": "0602"},
                {"from": "x074170", "to": "0704"},
                {"from": "x174150", "to": "1706"},
                {"from": "x194130", "to": "1903"},
                # add train stations
                {"from": "x060050", "to": "0602"},
                {"from": "x070050", "to": "0707"},
            ]

            lenTransports = len(transports)
            for transportIndex in range(lenTransports):
                transport = transports[transportIndex]
                baseDistrictSheet = self.districtCrimeDataSheetsByCode[transport["to"]]
                addingDistrictSheet = self.districtCrimeDataSheetsByCode[transport["from"]]
                baseDistrictSheet.addDistrictCrimeDataSheet(addingDistrictSheet)

                # 5) generate all files from district crime data
            rows = self.generate()

            # don't need all the objects any more
            self.clear()

            # 6) write to csv file
            fileName = str(self.year) + ":" + str("01-") + str(self.month)
            if not self.omitZeroValues:
                fileName = fileName + ":with-zeros"

            with open("../generated/crimeData-" + fileName + ".csv", "wb") as f:
                print "start writing file " + unicode(fileName)
                self.writer = UnicodeWriter(f)

                # write header
                # self.writer.writerow( self.COLUMN_NAMES )

                # write rest of the content
                numRows = len(rows)
                for rowIndex in range(numRows):

                    row = rows[rowIndex]
                    # print rows
                    self.writer.writerow(row)

                    # complete callback
                if self.onCompleteCallback:
                    self.onCompleteCallback()

Example #2

Show file

File: Generator.py Project: janmoravec/czech-crime

class Generator:

    COLUMN_NAMES = [
        "id",
        "time",
        "area",
        "crime",
        "found",
        "found-end",
        "found-total",
        "solved",
        "solved-perc",
        "solved-additionally",
        "commited-drugged",
        "commited-alcohol",
        "commited-recidivst",
        "commited-under-15",
        "comitted-15-17",
        "comitted-under-18",
        "charged-total",
        "charged-recidivist",
        "charged-under-15",
        "charged-15-17",
        "charged-women",
        "damage-total",
        "damage-found",
    ]

    areaLookup = ""
    crimeLookup = ""
    timeLookup = ""
    writer = ""
    areaWriter = ""
    crimeWriter = ""
    timeWriter = ""
    recordId = 1
    files = []
    districtCrimeDataSheetsByCode = {}
    districtCrimeDataSheets = []

    # store areasheets with keys, so we can find them retrospectively to add another areasheet
    areaSheets = {}

    generateAreaLookup = False
    generateCrimeLookup = False
    generateTimeLookup = False
    generateCrimeData = True

    def __init__(self, year, month, omitZeroValues, onCompleteCallback):

        # null everything
        self.areaLookup = ""
        self.crimeLookup = ""
        self.timeLookup = ""
        self.writer = ""
        self.areaWriter = ""
        self.crimeWriter = ""
        self.timeWriter = ""
        self.recordId = 1

        self.year = year
        self.month = month
        self.omitZeroValues = omitZeroValues
        self.onCompleteCallback = onCompleteCallback

        # 1) a) generate area lookup
        with xlrd.open_workbook("areaLookup.xls") as wb:
            sh = wb.sheet_by_index(0)
            self.areaLookup = AreaLookup(sh)

            # b) time lookup
        with xlrd.open_workbook("timeLookup.xls") as wb:
            sh = wb.sheet_by_index(0)
            self.timeLookup = TimeLookup(sh)

            # 2) generate lookups

            # area lookup
        if self.generateAreaLookup:

            with open("../generated/AreaLookup.csv", "wb") as f:

                rows = self.areaLookup.generate()
                numRows = len(rows)

                self.areaWriter = UnicodeWriter(f)
                for rowIndex in range(numRows):
                    row = rows[rowIndex]
                    self.areaWriter.writerow(row)

                    # crime lookup - is generated manually
        if self.generateCrimeLookup:

            with xlrd.open_workbook("../files/a______.xls", "wb") as wb:
                sh = wb.sheet_by_index(0)
                self.crimeLookup = CrimeLookup(sh)

                with open("../generated/CrimeLookup.csv", "wb") as f:

                    rows = self.crimeLookup.generate()
                    numRows = len(rows)

                    # 			self.crimeWriter = UnicodeWriter( f )
                    # 			for rowIndex in range( numRows ):
                    # 				row = rows[ rowIndex ]
                    # replace dot zero
                    # row[0] = row[0].replace( ".0","" )

                    # 				self.crimeWriter.writerow( [ row[0],row[1] ] )

                    # time lookup
        if self.generateTimeLookup:

            with open("../generated/TimeLookup.csv", "wb") as f:

                rows = self.timeLookup.generate()
                numRows = len(rows)

                self.timeWriter = UnicodeWriter(f)
                for rowIndex in range(numRows):
                    row = rows[rowIndex]
                    self.timeWriter.writerow(row)

                    # 3) go to folders and go through all folder and process all files
                    # itirate through all files

        if self.generateCrimeData:

            # -files
            # -2003
            # -1
            # -2
            # -...
            # -2004
            # -...

            directory = "../files"

            # itirate through all year folders
            yearFolders = listdir(directory)

            for yearFolder in yearFolders:

                # check if is year we're interested in
                if str(yearFolder) == str(self.year):

                    # itirate through all month folders
                    monthsFolders = listdir(directory + "/" + yearFolder)

                    for monthFolder in monthsFolders:

                        # check if is month we're interested in
                        if str(monthFolder) == str(self.month):
                            # itirate through files in month folder
                            files = listdir(directory + "/" + yearFolder + "/" + monthFolder)

                            # get time period id
                            periodId = self.timeLookup.getTimeIdByYearAndMonth(int(yearFolder), int(monthFolder))

                            for file in files:
                                # check only for excel files
                                if ".xls" in file or ".xlsx" in file:
                                    # omit files with underscore
                                    if not "__L" in file and not "__R" in file and not "__X" in file:

                                        # temp constrain to generate just one file
                                        # if "a0011__" in file :
                                        url = directory + "/" + yearFolder + "/" + monthFolder + "/" + file

                                        # create district sheet
                                        districtSheet = self.processFile(url, periodId)
                                        # temp
                                        if districtSheet:
                                            self.districtCrimeDataSheetsByCode[districtSheet.code] = districtSheet

                                            # self.files.append( self.processFile( url, periodId ) )

                                            # 4) add Letiste to respective districts
            transports = [
                {"from": "x004110", "to": "0011"},
                {"from": "x064160", "to": "0602"},
                {"from": "x074170", "to": "0704"},
                {"from": "x174150", "to": "1706"},
                {"from": "x194130", "to": "1903"},
                # add train stations
                {"from": "x060050", "to": "0602"},
                {"from": "x070050", "to": "0707"},
            ]

            lenTransports = len(transports)
            for transportIndex in range(lenTransports):
                transport = transports[transportIndex]
                baseDistrictSheet = self.districtCrimeDataSheetsByCode[transport["to"]]
                addingDistrictSheet = self.districtCrimeDataSheetsByCode[transport["from"]]
                baseDistrictSheet.addDistrictCrimeDataSheet(addingDistrictSheet)

                # 5) generate all files from district crime data
            rows = self.generate()

            # don't need all the objects any more
            self.clear()

            # 6) write to csv file
            fileName = str(self.year) + ":" + str("01-") + str(self.month)
            if not self.omitZeroValues:
                fileName = fileName + ":with-zeros"

            with open("../generated/crimeData-" + fileName + ".csv", "wb") as f:
                print "start writing file " + unicode(fileName)
                self.writer = UnicodeWriter(f)

                # write header
                # self.writer.writerow( self.COLUMN_NAMES )

                # write rest of the content
                numRows = len(rows)
                for rowIndex in range(numRows):

                    row = rows[rowIndex]
                    # print rows
                    self.writer.writerow(row)

                    # complete callback
                if self.onCompleteCallback:
                    self.onCompleteCallback()

    def addRecordArray(self, sourceArr, arrToAdd):
        sourceLen = len(sourceArr)
        if sourceLen > 0:
            for i in range(sourceLen):
                # first three columns just identifies record
                if i > 2:
                    sourceArr[i] = str(float(sourceArr[i]) + float(arrToAdd[i]))
        else:
            sourceArr = copy(arrToAdd)
        return sourceArr

    def processFile(self, fileUrl, timeId):

        # print "======= processing file: " + fileUrl + " ========== "

        with xlrd.open_workbook(fileUrl) as wb:

            numSheets = wb.nsheets

            fileRecords = []
            sheetRecords = []

            districtName = wb.sheet_by_index(0).row(4)[2].value
            areaName = wb.sheet_by_index(0).row(5)[2].value
            districtCode = str(self.areaLookup.getAreaCodeByName(districtName))

            # if Letiste or train station, need to add next row to make it more specific
            firstSheetName = wb.sheet_by_index(0).name
            if districtCode == "-1" or firstSheetName == "a060050" or firstSheetName == "a070050":
                # print "getting district code"
                districtName = districtName + "-" + areaName
                districtCode = str(self.areaLookup.getAreaCodeByName(districtName))

                # data records for districts
            districtCrimeDataSheet = DistrictCrimeDataSheet(districtCode, districtName, timeId)
            self.districtCrimeDataSheets.append(districtCrimeDataSheet)
            # self.districtCrimeDataSheetsByCode[ districtCode ] = districtCrimeDataSheet

            print unicode(districtName) + " - " + unicode(districtCode)

            if districtCode == -1:
                print districtCode
                print districtName

            for sheetIndex in range(numSheets):

                # print "========= processing sheet " + str( sheetIndex ) + " ========="
                sheet = wb.sheet_by_index(sheetIndex)
                areaSheet = AreaCrimeDataSheet(sheet, timeId, self.omitZeroValues)
                districtCrimeDataSheet.addAreaCrimeDataSheet(areaSheet)

                # testing correct naming
                areaName = areaSheet.name
                # taking code straight from name of the sheet
                areaCode = areaSheet.code

                if areaCode == -1:
                    Logger.throwError("Unknown Area: " + unicode(areaCode) + "," + unicode(areaName))

                    # print unicode( areaName )
                print unicode(areaCode) + "," + unicode(areaName)

                # generatedRecords = areaSheet.generate()
                # print generatedRecords
                # lenRecords = len( generatedRecords )

                # for recordIndex in range( lenRecords ):
                # get single record
                ##	record = generatedRecords[ recordIndex ]
                # print "========= processing single record " + str( recordIndex ) + " ========="
                # 	record.insert( 0, str( timeId ) )
                # increment variable value
                # 	self.recordId += 1

                # fileRecords.extend( generatedRecords )

                # fileRecords.extend( districtCrimeDataSheet.generate() )

                # return fileRecords
            return districtCrimeDataSheet

    def generate(self):

        # print "======= generating ======="
        files = []
        lenDistrictSheet = len(self.districtCrimeDataSheets)

        for sheetIndex in range(lenDistrictSheet):
            # for districtSheetIndex in self.districtCrimeDataSheetsByCode:
            # print "========= new district ========== " + unicode( sheetIndex )

            districtSheet = self.districtCrimeDataSheets[
                sheetIndex
            ]  # self.districtCrimeDataSheetsByCode[ districtSheetIndex ]
            files.extend(districtSheet.generate())

        return files

    def clear(self):
        self.districtCrimeDataSheets = []
        self.districtCrimeDataSheetsByCode.clear()