def test_get_gfs_val(test_dict_loc="data/temp_dict.pkl"): with open(test_dict_loc) as fpkl: gfs_dict = cPickle.load(fpkl) lat = 89.5 lon = 50.0 val = get_gfs_val(lat, lon, 0, gfs_dict) assert (np.abs(gfs_dict[(1, 1)][1, 100] - val) <= TOL) val2 = get_gfs_val(lat - .1, lon - .1, 0, gfs_dict) assert (np.abs(gfs_dict[(1, 1)][1, 100] - val2) <= TOL)
def get_feat_df(year, outfile=None, fire_df_loc='/extra/zbutler0/data/west_coast.pkl', gfs_locs=('/extra/zbutler0/data/temp_dict.pkl', '/extra/zbutler0/data/hum_dict.pkl', '/extra/zbutler0/data/vpd_dict.pkl'), gfs_names=('temp', 'humidity', 'vpd'), clust_thresh=10): with open(fire_df_loc) as ffire: fire_df = cPickle.load(ffire) fire_df = fire_df[fire_df.year == year] if "dayofyear" not in fire_df: fire_df = add_daymonth(fire_df) # If no XYs, create them, assuming we're in Alaska if "x" not in fire_df: fire_df = append_xy(fire_df, ak_bb) gfs_dict_dict = dict() for loc, name in zip(gfs_locs, gfs_names): with open(loc) as fpkl: gfs_dict_dict[name] = cPickle.load(fpkl) gfs_vecs = dict() for name, gfs_dict in gfs_dict_dict.iteritems(): gfs_vecs[name] = np.zeros(len(fire_df)) + np.nan for i, fire_event in enumerate(fire_df.index): for name, gfs_dict in gfs_dict_dict.iteritems(): try: lat = fire_df.lat[fire_event] lon = fire_df.long[fire_event] day = fire_df.day[fire_event] month = fire_df.month[fire_event] gfs_vecs[name][i] = get_gfs_val(lat, lon, day, month, gfs_dict, year) except KeyError as e: pass except IndexError as e: pass for name, vec in gfs_vecs.iteritems(): fire_df[name] = pd.Series(vec, index=fire_df.index) if outfile: with open(outfile, 'w') as fout: cPickle.dump(fire_df, fout, cPickle.HIGHEST_PROTOCOL) return fire_df
def compute_feat_df(year, fire_df, clusts, gfs_dict_dict): """ Get a DataFrame to make active fire prediction easy :param year: Year we want to look at :param fire_df: DataFrame of active fires. Should contain fields day, month, x, and y :param clusts: Cluster assignments for each detection :param gfs_dict_dict: Dict of dicts, each inner dict representing a GFS (weather) layer :return: a DataFrame for prediction, with fields fire id, day, day_cent, n_det, n_det_cum, hull_size, hull_size_cum, gfs... where we have as many gfs fields as the len zof gfs_dict_dict """ detections = fire_df[fire_df.year == year] N = len(detections) clust_vals = np.unique(clusts) df_dict = dict() df_dict['fire_id'] = [] df_dict['day'] = [] df_dict['day_cent'] = [] df_dict['n_det'] = [] df_dict['n_det_cum'] = [] #df_dict['hull_size'] = [] #df_dict['hull_size_cum'] = [] df_dict['lat'] = [] df_dict['lon'] = [] df_dict['x'] = [] df_dict['y'] = [] for name in gfs_dict_dict.keys(): df_dict[name] = [] for clust in clust_vals: clust_dets = detections[clusts == clust] days = clust_dets.dayofyear min_day = np.min(days) max_day = np.max(days) center_lat = np.mean(clust_dets.lat) center_lon = np.mean(clust_dets.long) center_x = np.mean(clust_dets.x) center_y = np.mean(clust_dets.y) for day in xrange(min_day, max_day + 1): # We'll have exactly one entry in our DataFrame for this cluster on this day df_dict['lat'].append(center_lat) df_dict['lon'].append(center_lon) df_dict['x'].append(center_x) df_dict['y'].append(center_y) day_dets = clust_dets[(clust_dets.dayofyear == day)] cum_dets = clust_dets[(clust_dets.dayofyear <= day)] df_dict['fire_id'].append(clust) df_dict['day'].append(day) df_dict['day_cent'].append(day - min_day) df_dict['n_det'].append(len(day_dets)) df_dict['n_det_cum'].append(len(cum_dets)) #if len(day_dets) > 2: # xys = np.column_stack((day_dets.x, day_dets.y)) # df_dict['hull_size'].append(ConvexHull(xys).volume) #else: # df_dict['hull_size'].append(0.) #if len(cum_dets) > 2: # xys_cum = np.column_stack((cum_dets.x, cum_dets.y)) # df_dict['hull_size_cum'].append(ConvexHull(xys_cum).volume) #else: # df_dict['hull_size_cum'].append(0.) month, dayofmonth = day2monthday(day, leapyear=(year % 4)) for name, gfs_dict in gfs_dict_dict.iteritems(): try: gfs_val = get_gfs_val(center_lat, center_lon, dayofmonth, month, gfs_dict, year) df_dict[name].append(gfs_val) except KeyError: df_dict[name].append(np.nan) except IndexError: df_dict[name].append(np.nan) return pd.DataFrame(df_dict)