def GetZipData(): ''' Reads zipCodes.csv into memory, returns it as a dict. ''' parser = ParseCSV(dataFile) return parser.getLabelledData()
def GetZipsBorough(): parser = ParseCSV(boroughsFile) (fields, data) = parser.getRawData() zipBoroughs = {} for idx in xrange(len(data[0])): zipCode = data[0][idx] borough = data[1][idx] if zipCode not in zipBoroughs: zipBoroughs[zipCode] = borough return zipBoroughs
def GetZipIncidentsFromFile(): ''' returns a dict of { (Zip Code):incidents } ''' parser = ParseCSV(incidentFile, (1, 2)) (fields, data) = parser.getRawData() # Not all of the zip codes and counts are well-formed. # Using this regex to try and only find numbers in them. numberParser = re.compile(r'[^\d.]+') zipsDict = {} for i in xrange(len(data[0])): zipCode = data[0][i] zipCount = numberParser.sub('', data[1][i]) # I'm not actually sure if this is a count. It looks like it might be, but there's no description anywhere. # I am likewise going to assume that any entry (aka a blank one) is just "1". if zipCount is '': zipCount = 1 if zipCode is '': continue # It seems like there are a bunch of errors in the formatting # This is attemptin gto deal with that noise elif len(zipCode) > 5 and zipCode[0:5].isdigit(): zipCode = zipCode[0:5] elif len(zipCode) > 5 and zipCode.isdigit(): print zipCode continue try: if zipCode in zipsDict: zipsDict[zipCode] += int(zipCount) else: zipsDict[zipCode] = int(zipCount) except ValueError: print "Value Error occured for ZipCode", zipCode, " and count", zipCount return zipsDict