Esempio n. 1
0
def GetZipData():
    '''
    Reads zipCodes.csv into memory, returns it as a dict.
    '''
    parser = ParseCSV(dataFile)

    return parser.getLabelledData()
Esempio n. 2
0
def GetZipsBorough():
    parser = ParseCSV(boroughsFile)
    (fields, data) = parser.getRawData()

    zipBoroughs = {}

    for idx in xrange(len(data[0])):
        zipCode = data[0][idx]
        borough = data[1][idx]

        if zipCode not in zipBoroughs:
            zipBoroughs[zipCode] = borough

    return zipBoroughs
Esempio n. 3
0
def GetZipIncidentsFromFile():
    '''
    returns a dict of
    { (Zip Code):incidents }
    '''
    parser = ParseCSV(incidentFile, (1, 2))
    (fields, data) = parser.getRawData()

    # Not all of the zip codes and counts are well-formed.
    # Using this regex to try and only find numbers in them.
    numberParser = re.compile(r'[^\d.]+')

    zipsDict = {}
    for i in xrange(len(data[0])):

        zipCode = data[0][i]
        zipCount = numberParser.sub('', data[1][i])   # I'm not actually sure if this is a count. It looks like it might be, but there's no description anywhere.

        # I am likewise going to assume that any entry (aka a blank one) is just "1".
        if zipCount is '':
            zipCount = 1
        if zipCode is '':
            continue
        # It seems like there are a bunch of errors  in the formatting
        # This is attemptin gto deal with that noise
        elif len(zipCode) > 5 and zipCode[0:5].isdigit():
            zipCode = zipCode[0:5]
        elif len(zipCode) > 5 and zipCode.isdigit():
            print zipCode
            continue

        try:
            if zipCode in zipsDict:
                zipsDict[zipCode] += int(zipCount)
            else:
                zipsDict[zipCode] = int(zipCount)
        except ValueError:
            print "Value Error occured for ZipCode", zipCode, " and count", zipCount

    return zipsDict