def saveCrimeSubsetCsv(infile, start_time, end_time, crime_list, loc_list=None, outfile=None, nrec=None, chicago_side=None): datadir = os.path.join("..", "..", "Data") chicago.set_data_directory(datadir) outdir = os.path.dirname(infile) if outfile == None: crime_str = "-".join(crime_list) if loc_list != None: crime_str += "_RES" chicago_side_short = "a" if chicago_side.upper() == "SOUTH": chicago_side_short = "s" outfilebase = "_".join(["chi", "all", chicago_side_short, crime_str, getSixDigitDate(start_time), getSixDigitDate(end_time)]) if nrec != None: outfilebase += "_" + str(nrec) outfilebase += ".csv" outfile = os.path.join(outdir, outfilebase) print(outfile) row_ctr = 0 with open(outfile, "w") as outf: outf_writer = csv.writer(outf, lineterminator='\n') # Obtain data as TimedPoints object, and corresponding csv rows points_crime, csv_rows, csv_header = chicago.load(infile, crime_list, type="all", withcsvrows=True) outf_writer.writerow(csv_header) points_csv_dict = dict(zip([tuple(p) for p in points_crime], csv_rows)) # If a region of Chicago has been specified ("South"), do that here points_crime_region = points_crime if chicago_side != None: region_polygon = chicago.get_side(chicago_side) points_crime_region = open_cp.geometry.intersect_timed_points(points_crime, region_polygon) csv_rows_region = [points_csv_dict[tuple(p)] for p in points_crime_region] for row in csv_rows_region: if row[5] not in crime_list: print("Wait how'd that happen?") print(row) print(row[5]) print(crime_list) sys.exit(1) if loc_list==None or row[7] in loc_list: row_m, row_d, row_y = row[2].split()[0].split("/") row_date = np.datetime64("-".join([row_y, row_m, row_d])) if start_time <= row_date and row_date < end_time: outf_writer.writerow(row) row_ctr += 1 if nrec != None and row_ctr >= nrec: break print(f"Wrote header and {row_ctr} rows to {outfile}") return
def trialLoadGenericDataOLD(filepath): sys.path.insert(0, os.path.abspath("..")) # Elements from PredictCode's custom "open_cp" package import open_cp import open_cp.geometry import open_cp.sources.chicago as chicago from open_cp.data import TimedPoints crime_type_set = {"BURGLARY"} datadir = os.path.join("..", "..", "Data") #chicago_file_name = "chicago_all_old.csv" #chicago_file_name = "chi_all_s_BURGLARY_010101_190101.csv" chicago_file_name = "chi_all_s_BURGLARY_RES_010101_190101.csv" chicago_side = "South" chicago_load_type = "snapshot" if "all" in chicago_file_name: chicago_load_type = "all" chicago_file_path = os.path.join(datadir, chicago_file_name) # Chicago module requires this line to access some data chicago.set_data_directory(datadir) points_crime = chicago.load(chicago_file_path, crime_type_set, type=chicago_load_type) std_field_names = ["_DESC_FIELD", "_X_FIELD", "_Y_FIELD", "_TIME_FIELD"] custom_field_names = [ 'Primary Type', 'X Coordinate', 'Y Coordinate', 'Date' ] field_name_map = dict(zip(std_field_names, custom_field_names)) date_format_csv = "%m/%d/%Y %I:%M:%S %p" def dt_convert(date_string, date_format=date_format_csv): return datetime.datetime.strptime(date_string, date_format) data = [] with open(filepath) as f: reader = csv.reader(f) header = next(reader) header_num_map = dict(zip(header, range(len(header)))) field_num_map = dict([(x, header_num_map[field_name_map[x]]) for x in std_field_names]) for row in reader: desc = row[field_num_map["_DESC_FIELD"]].strip() if desc not in crime_type_set: continue x = row[field_num_map["_X_FIELD"]].strip() y = row[field_num_map["_Y_FIELD"]].strip() t = row[field_num_map["_TIME_FIELD"]].strip() data.append((dt_convert(t), float(x), float(y))) data.sort(key=lambda triple: triple[0]) xcoords = np.empty(len(data)) ycoords = np.empty(len(data)) for i, triple in enumerate(data): xcoords[i], ycoords[i] = triple[1], triple[2] times = [triple[0] for triple in data] to_meters = True _FEET_IN_METERS = 3937 / 1200 if to_meters: xcoords /= _FEET_IN_METERS ycoords /= _FEET_IN_METERS timedpoints = TimedPoints.from_coords(times, xcoords, ycoords) print(len(points_crime.timestamps)) print(type(points_crime)) print(len(timedpoints.timestamps)) print(type(timedpoints)) print(points_crime == timedpoints) print(points_crime.timestamps == timedpoints.timestamps) print(all(points_crime.timestamps == timedpoints.timestamps)) print(points_crime.xcoords == timedpoints.xcoords) print(points_crime.ycoords == timedpoints.ycoords) print(all(points_crime.xcoords == timedpoints.xcoords)) print(all(points_crime.ycoords == timedpoints.ycoords)) print(points_crime.xcoords == timedpoints.xcoords) print(points_crime.bounding_box == timedpoints.bounding_box) print(points_crime.coords == timedpoints.coords) print(all((points_crime.coords == timedpoints.coords).flatten())) sys.exit(0) ### OBTAIN GRIDDED REGION # Obtain polygon shapely object for region of interest region_polygon = chicago.get_side(chicago_side) # Obtain data set within relevant region points_crime_region = open_cp.geometry.intersect_timed_points( points_crime, region_polygon) print(len(points_crime_region.timestamps)) print(type(points_crime_region)) sys.exit(0) return points_crime_region
def get_side(side="South"): return chicago.get_side(side)
clock_time = time.time() #points_crime = chicago.load(chicago_file_path, crime_type_set) points_crime = chicago.load(chicago_file_path, crime_type_set, type="all") print("...loaded data.\nTime taken: {}".format(time.time() - clock_time)) ### OBTAIN GRIDDED REGION clock_time = time.time() print("Loading region and data subset...") # Obtain polygon shapely object for region of interest region_polygon = chicago.get_side(chicago_side) # Obtain data set points_crime_region = open_cp.geometry.intersect_timed_points(points_crime, region_polygon) # Obtain grid with cells only overlaid on relevant region masked_grid_region = open_cp.geometry.mask_grid_by_intersection( region_polygon, open_cp.data.Grid( xsize=cell_width, ysize=cell_height, xoffset=0, yoffset=0)) # Get a list/tuple of all cellcoords in the region cellcoordlist_region = getRegionCells(masked_grid_region)