Esempio n. 1
0
def saveCrimeSubsetCsv(infile, start_time, end_time, crime_list, loc_list=None, outfile=None, nrec=None, chicago_side=None):
    
    
    datadir = os.path.join("..", "..", "Data")
    chicago.set_data_directory(datadir)
    
    
    outdir = os.path.dirname(infile)
    if outfile == None:
        crime_str = "-".join(crime_list)
        if loc_list != None:
            crime_str += "_RES"
        chicago_side_short = "a"
        if chicago_side.upper() == "SOUTH":
            chicago_side_short = "s"
        outfilebase = "_".join(["chi", "all", chicago_side_short, crime_str, getSixDigitDate(start_time), getSixDigitDate(end_time)])
        if nrec != None:
            outfilebase += "_" + str(nrec)
        outfilebase += ".csv"
        outfile = os.path.join(outdir, outfilebase)
    print(outfile)
    row_ctr = 0
    with open(outfile, "w") as outf:
        outf_writer = csv.writer(outf, lineterminator='\n')
        
        
        
        # Obtain data as TimedPoints object, and corresponding csv rows
        points_crime, csv_rows, csv_header = chicago.load(infile, crime_list, type="all", withcsvrows=True)
        outf_writer.writerow(csv_header)
        points_csv_dict = dict(zip([tuple(p) for p in points_crime], csv_rows))
        
        # If a region of Chicago has been specified ("South"), do that here
        points_crime_region = points_crime
        if chicago_side != None:
            region_polygon = chicago.get_side(chicago_side)
            points_crime_region = open_cp.geometry.intersect_timed_points(points_crime, region_polygon)
        csv_rows_region = [points_csv_dict[tuple(p)] for p in points_crime_region]
        
        
        for row in csv_rows_region:
            if row[5] not in crime_list:
                print("Wait how'd that happen?")
                print(row)
                print(row[5])
                print(crime_list)
                sys.exit(1)
            
            if loc_list==None or row[7] in loc_list:
                
                
                row_m, row_d, row_y = row[2].split()[0].split("/")
                row_date = np.datetime64("-".join([row_y, row_m, row_d]))
                if start_time <= row_date and row_date < end_time:
                    outf_writer.writerow(row)
                    row_ctr += 1
                    if nrec != None and row_ctr >= nrec:
                        break
    print(f"Wrote header and {row_ctr} rows to {outfile}")
    return
Esempio n. 2
0
def trialLoadGenericDataOLD(filepath):

    sys.path.insert(0, os.path.abspath(".."))
    # Elements from PredictCode's custom "open_cp" package
    import open_cp
    import open_cp.geometry
    import open_cp.sources.chicago as chicago
    from open_cp.data import TimedPoints

    crime_type_set = {"BURGLARY"}

    datadir = os.path.join("..", "..", "Data")
    #chicago_file_name = "chicago_all_old.csv"
    #chicago_file_name = "chi_all_s_BURGLARY_010101_190101.csv"
    chicago_file_name = "chi_all_s_BURGLARY_RES_010101_190101.csv"
    chicago_side = "South"
    chicago_load_type = "snapshot"
    if "all" in chicago_file_name:
        chicago_load_type = "all"
    chicago_file_path = os.path.join(datadir, chicago_file_name)
    # Chicago module requires this line to access some data
    chicago.set_data_directory(datadir)

    points_crime = chicago.load(chicago_file_path,
                                crime_type_set,
                                type=chicago_load_type)

    std_field_names = ["_DESC_FIELD", "_X_FIELD", "_Y_FIELD", "_TIME_FIELD"]

    custom_field_names = [
        'Primary Type', 'X Coordinate', 'Y Coordinate', 'Date'
    ]

    field_name_map = dict(zip(std_field_names, custom_field_names))

    date_format_csv = "%m/%d/%Y %I:%M:%S %p"

    def dt_convert(date_string, date_format=date_format_csv):
        return datetime.datetime.strptime(date_string, date_format)

    data = []

    with open(filepath) as f:
        reader = csv.reader(f)
        header = next(reader)
        header_num_map = dict(zip(header, range(len(header))))
        field_num_map = dict([(x, header_num_map[field_name_map[x]])
                              for x in std_field_names])

        for row in reader:
            desc = row[field_num_map["_DESC_FIELD"]].strip()
            if desc not in crime_type_set:
                continue
            x = row[field_num_map["_X_FIELD"]].strip()
            y = row[field_num_map["_Y_FIELD"]].strip()
            t = row[field_num_map["_TIME_FIELD"]].strip()

            data.append((dt_convert(t), float(x), float(y)))

    data.sort(key=lambda triple: triple[0])
    xcoords = np.empty(len(data))
    ycoords = np.empty(len(data))
    for i, triple in enumerate(data):
        xcoords[i], ycoords[i] = triple[1], triple[2]
    times = [triple[0] for triple in data]

    to_meters = True
    _FEET_IN_METERS = 3937 / 1200

    if to_meters:
        xcoords /= _FEET_IN_METERS
        ycoords /= _FEET_IN_METERS

    timedpoints = TimedPoints.from_coords(times, xcoords, ycoords)

    print(len(points_crime.timestamps))
    print(type(points_crime))

    print(len(timedpoints.timestamps))
    print(type(timedpoints))

    print(points_crime == timedpoints)
    print(points_crime.timestamps == timedpoints.timestamps)
    print(all(points_crime.timestamps == timedpoints.timestamps))

    print(points_crime.xcoords == timedpoints.xcoords)
    print(points_crime.ycoords == timedpoints.ycoords)
    print(all(points_crime.xcoords == timedpoints.xcoords))
    print(all(points_crime.ycoords == timedpoints.ycoords))

    print(points_crime.xcoords == timedpoints.xcoords)

    print(points_crime.bounding_box == timedpoints.bounding_box)
    print(points_crime.coords == timedpoints.coords)
    print(all((points_crime.coords == timedpoints.coords).flatten()))

    sys.exit(0)

    ### OBTAIN GRIDDED REGION

    # Obtain polygon shapely object for region of interest
    region_polygon = chicago.get_side(chicago_side)

    # Obtain data set within relevant region
    points_crime_region = open_cp.geometry.intersect_timed_points(
        points_crime, region_polygon)

    print(len(points_crime_region.timestamps))
    print(type(points_crime_region))

    sys.exit(0)

    return points_crime_region
Esempio n. 3
0
def get_side(side="South"):
    return chicago.get_side(side)
Esempio n. 4
0
clock_time = time.time()

#points_crime = chicago.load(chicago_file_path, crime_type_set)
points_crime = chicago.load(chicago_file_path, crime_type_set, type="all")


print("...loaded data.\nTime taken: {}".format(time.time() - clock_time))


### OBTAIN GRIDDED REGION

clock_time = time.time()
print("Loading region and data subset...")

# Obtain polygon shapely object for region of interest
region_polygon = chicago.get_side(chicago_side)

# Obtain data set
points_crime_region = open_cp.geometry.intersect_timed_points(points_crime, region_polygon)

# Obtain grid with cells only overlaid on relevant region
masked_grid_region = open_cp.geometry.mask_grid_by_intersection(
        region_polygon, 
        open_cp.data.Grid(
                xsize=cell_width, 
                ysize=cell_height, 
                xoffset=0, 
                yoffset=0))

# Get a list/tuple of all cellcoords in the region
cellcoordlist_region = getRegionCells(masked_grid_region)