Exemple #1
0
def test_get_gfs_val(test_dict_loc="data/temp_dict.pkl"):
    with open(test_dict_loc) as fpkl:
        gfs_dict = cPickle.load(fpkl)
    lat = 89.5
    lon = 50.0
    val = get_gfs_val(lat, lon, 0, gfs_dict)
    assert (np.abs(gfs_dict[(1, 1)][1, 100] - val) <= TOL)
    val2 = get_gfs_val(lat - .1, lon - .1, 0, gfs_dict)
    assert (np.abs(gfs_dict[(1, 1)][1, 100] - val2) <= TOL)
def get_feat_df(year,
                outfile=None,
                fire_df_loc='/extra/zbutler0/data/west_coast.pkl',
                gfs_locs=('/extra/zbutler0/data/temp_dict.pkl',
                          '/extra/zbutler0/data/hum_dict.pkl',
                          '/extra/zbutler0/data/vpd_dict.pkl'),
                gfs_names=('temp', 'humidity', 'vpd'),
                clust_thresh=10):
    with open(fire_df_loc) as ffire:
        fire_df = cPickle.load(ffire)
    fire_df = fire_df[fire_df.year == year]
    if "dayofyear" not in fire_df:
        fire_df = add_daymonth(fire_df)
    # If no XYs, create them, assuming we're in Alaska
    if "x" not in fire_df:
        fire_df = append_xy(fire_df, ak_bb)

    gfs_dict_dict = dict()
    for loc, name in zip(gfs_locs, gfs_names):
        with open(loc) as fpkl:
            gfs_dict_dict[name] = cPickle.load(fpkl)

    gfs_vecs = dict()
    for name, gfs_dict in gfs_dict_dict.iteritems():
        gfs_vecs[name] = np.zeros(len(fire_df)) + np.nan

    for i, fire_event in enumerate(fire_df.index):
        for name, gfs_dict in gfs_dict_dict.iteritems():
            try:
                lat = fire_df.lat[fire_event]
                lon = fire_df.long[fire_event]
                day = fire_df.day[fire_event]
                month = fire_df.month[fire_event]
                gfs_vecs[name][i] = get_gfs_val(lat, lon, day, month, gfs_dict,
                                                year)
            except KeyError as e:
                pass
            except IndexError as e:
                pass
    for name, vec in gfs_vecs.iteritems():
        fire_df[name] = pd.Series(vec, index=fire_df.index)

    if outfile:
        with open(outfile, 'w') as fout:
            cPickle.dump(fire_df, fout, cPickle.HIGHEST_PROTOCOL)
    return fire_df
def compute_feat_df(year, fire_df, clusts, gfs_dict_dict):
    """ Get a DataFrame to make active fire prediction easy
    :param year: Year we want to look at
    :param fire_df: DataFrame of active fires. Should contain fields day, month, x, and y
    :param clusts: Cluster assignments for each detection
    :param gfs_dict_dict: Dict of dicts, each inner dict representing a GFS (weather) layer
    :return: a DataFrame for prediction, with fields fire id, day, day_cent, n_det, n_det_cum, hull_size, hull_size_cum,
                gfs...  where we have as many gfs fields as the len zof gfs_dict_dict
    """
    detections = fire_df[fire_df.year == year]
    N = len(detections)
    clust_vals = np.unique(clusts)

    df_dict = dict()
    df_dict['fire_id'] = []
    df_dict['day'] = []
    df_dict['day_cent'] = []
    df_dict['n_det'] = []
    df_dict['n_det_cum'] = []
    #df_dict['hull_size'] = []
    #df_dict['hull_size_cum'] = []
    df_dict['lat'] = []
    df_dict['lon'] = []
    df_dict['x'] = []
    df_dict['y'] = []
    for name in gfs_dict_dict.keys():
        df_dict[name] = []

    for clust in clust_vals:
        clust_dets = detections[clusts == clust]
        days = clust_dets.dayofyear
        min_day = np.min(days)
        max_day = np.max(days)
        center_lat = np.mean(clust_dets.lat)
        center_lon = np.mean(clust_dets.long)
        center_x = np.mean(clust_dets.x)
        center_y = np.mean(clust_dets.y)
        for day in xrange(min_day, max_day + 1):
            # We'll have exactly one entry in our DataFrame for this cluster on this day
            df_dict['lat'].append(center_lat)
            df_dict['lon'].append(center_lon)
            df_dict['x'].append(center_x)
            df_dict['y'].append(center_y)
            day_dets = clust_dets[(clust_dets.dayofyear == day)]
            cum_dets = clust_dets[(clust_dets.dayofyear <= day)]
            df_dict['fire_id'].append(clust)
            df_dict['day'].append(day)
            df_dict['day_cent'].append(day - min_day)
            df_dict['n_det'].append(len(day_dets))
            df_dict['n_det_cum'].append(len(cum_dets))
            #if len(day_dets) > 2:
            #    xys = np.column_stack((day_dets.x, day_dets.y))
            #    df_dict['hull_size'].append(ConvexHull(xys).volume)
            #else:
            #    df_dict['hull_size'].append(0.)
            #if len(cum_dets) > 2:
            #    xys_cum = np.column_stack((cum_dets.x, cum_dets.y))
            #    df_dict['hull_size_cum'].append(ConvexHull(xys_cum).volume)
            #else:
            #    df_dict['hull_size_cum'].append(0.)

            month, dayofmonth = day2monthday(day, leapyear=(year % 4))
            for name, gfs_dict in gfs_dict_dict.iteritems():
                try:
                    gfs_val = get_gfs_val(center_lat, center_lon, dayofmonth,
                                          month, gfs_dict, year)
                    df_dict[name].append(gfs_val)
                except KeyError:
                    df_dict[name].append(np.nan)
                except IndexError:
                    df_dict[name].append(np.nan)

    return pd.DataFrame(df_dict)