def make_gif(df, gfs_dict, name="Temp"): ak_bb = [55, 71, -165, -138] lats = gfs_dict['lats'] lons = gfs_dict['lons'] plot_bb_0 = np.where(lats[:, 0] <= ak_bb[0])[0][0] plot_bb_1 = np.where(lats[:, 0] <= ak_bb[1])[0][0] plot_bb_2 = np.where(lons[0, :] >= (ak_bb[2] % 360))[0][0] plot_bb_3 = np.where(lons[0, :] >= (ak_bb[3] % 360))[0][0] mp = Basemap(projection="merc", lat_0=55, lon_0=-165, llcrnrlat=55, llcrnrlon=-165, urcrnrlat=71, urcrnrlon=-138, resolution='i') start_day = monthday2day(6, 1) end_day = monthday2day(9, 1) min_temp = gfs_dict['min'] max_temp = gfs_dict['max'] prev_lats = [] prev_lons = [] for dayy in xrange(start_day, end_day): if len(prev_lats): mp.plot(np.array(prev_lons), np.array(prev_lats), 'ko') monthday = day2monthday(dayy) today_fires = df[(df.year == 2013) & (df.month == monthday[0]) & (df.day == monthday[1])] if len(today_fires): mp_lons, mp_lats = mp(np.array(today_fires.lon), np.array(today_fires.lat)) mp.plot(mp_lons, mp_lats, 'ro') prev_lats += list(mp_lats) prev_lons += list(mp_lons) temp_vals = gfs_dict[monthday] mp.imshow(temp_vals[plot_bb_0 - 1:plot_bb_1 - 1:-1, plot_bb_2:plot_bb_3], vmin=min_temp, vmax=max_temp) plt.title("%s for %d/%d" % (name, monthday[0], monthday[1])) mp.drawcoastlines() mp.colorbar() plt.savefig('gifmaking/day%d.png' % dayy) plt.close() os.system( 'convert -delay 100 -loop 0 gifmaking/day*.png gifmaking/%s_loop_2013.gif' % name)
def color_fire_by_time(modis, fire_bb, year, outfi='pics/small_fire_over_time'): my_fires = modis[(modis.lat < fire_bb[1]) & (modis.lat > fire_bb[0]) & (modis.lon < fire_bb[3]) & (modis.lon > fire_bb[2]) & (modis.year == year)] min_month = np.min(my_fires.month) min_dayy = np.min(my_fires[my_fires.month == min_month].day) min_day = monthday2day(min_month, min_dayy, leapyear=(year % 4) == 0) max_month = np.max(my_fires.month) max_dayy = np.max(my_fires[my_fires.month == min_month].day) max_day = monthday2day(max_month, max_dayy, leapyear=(year % 4) == 0) lats = [] longs = [] colors = [] month = min_month day = min_dayy while month < max_month or day < max_dayy: dayofyear = monthday2day(month, day, leapyear=(year % 4) == 0) todays_fires = my_fires[(my_fires.month == month) & (my_fires.day == day)] today_longs, today_lats = np.array(todays_fires.lon), np.array( todays_fires.lat) lats += list(today_lats) longs += list(today_longs) colors += [float(dayofyear)] * len( todays_fires) # / (max_day - min_day)]*len(todays_fires) year, month, day = increment_day(year, month, day) plt.scatter(longs, lats, c=colors) frame = plt.gca() frame.axes.get_xaxis().set_visible(False) frame.axes.get_yaxis().set_visible(False) plt.colorbar() plt.title("%d Alaska fire over time (color=day of year)" % year) plt.savefig(outfi)
def get_time_series(df, clusts, n_CCs, zero_centered=False): time_series = [] for clust in xrange(n_CCs): clust_fires = df.iloc[np.where(clusts == clust)] time_arr = np.zeros(len(clust_fires)) for i, (month, day) in enumerate(zip(clust_fires.month, clust_fires.day)): my_day = monthday2day(month, day, leapyear=False) time_arr[i] = my_day sorted_times = np.sort(time_arr) if zero_centered: min_day = sorted_times[0] time_series.append(sorted_times - min_day) else: time_series.append(sorted_times) return time_series
def get_summary_stats(df, clusts, n_CCs): # Summary stats we want to collect len_arr = np.zeros(n_CCs) # Total detections in fire hull_area_arr = np.zeros(n_CCs) # Area of the convex hull of detections mean_dist_from_center_arr = np.zeros(n_CCs) # Mean distance from centroid of detections fire_duration_arr = np.zeros(n_CCs) # How long each fire lasts for clust in xrange(n_CCs): clust_fires = df.iloc[np.where(clusts == clust)] len_arr[clust] = len(clust_fires) mean_x = np.mean(clust_fires.x) mean_y = np.mean(clust_fires.y) dist_from_center_arr = [] for y, x in zip(clust_fires.y, clust_fires.x): dist_from_center_arr.append(np.sqrt((x - mean_x) ** 2 + (y - mean_y) ** 2)) xy_mat = np.column_stack((clust_fires.x, clust_fires.y)) if len(clust_fires) >= 3: hull_area_arr[clust] = ConvexHull(xy_mat).volume else: hull_area_arr[clust] = 0 mean_dist_from_center_arr[clust] = np.mean(dist_from_center_arr) min_day = np.inf max_day = -np.inf for i, (month, day) in enumerate(zip(clust_fires.month, clust_fires.day)): my_day = monthday2day(month, day, leapyear=False) if my_day < min_day: min_day = my_day if my_day > max_day: max_day = my_day fire_duration_arr[clust] = max_day - min_day ret_dict = dict() ret_dict['len'] = len_arr ret_dict['area'] = hull_area_arr ret_dict['dist_from_center'] = mean_dist_from_center_arr ret_dict['duration'] = fire_duration_arr return ret_dict
def add_daymonth(df): days = map(lambda x, y, z: monthday2day(x, y, leapyear=(z % 4)), df.month, df.day, df.year) df.loc[:, 'dayofyear'] = days return df
def station_csv_to_pandas(csv_file, outfi): with open(csv_file) as fcsv: header = fcsv.readline().strip().split(",") name2col_num = dict() for key, val in fields_of_interest.iteritems(): name2col_num[val] = header.index(key) res_dict = dict() for key in name2col_num.keys(): if key == "date": res_dict["day"] = [] res_dict["month"] = [] res_dict["year"] = [] res_dict["dayofyear"] = [] continue res_dict[key] = [] # first_time = 1 prev_spl = [] prev_hour = 23 prev_day = 1 prev_time = datetime.strptime("23:00", DT_FMT) for line in fcsv: spl = line.strip().split(",") # if first_time: # res_dict['lat'] = float(spl[name2col_num['lat']]) # res_dict['lon'] = float(spl[name2col_num['lon']]) # first_time = 0 dt = spl[name2col_num["date"]] year = int(dt.split(" ")[0].split("-")[0]) month = int(dt.split(" ")[0].split("-")[1]) day = int(dt.split(" ")[0].split("-")[2]) hour = int(dt.split(" ")[1].split(":")[0]) minute = int(dt.split(" ")[1].split(":")[1]) if prev_spl: print prev_spl[name2col_num["rain"]] if hour >= 12 and prev_hour == 11: prev_delta = NOON - prev_time current_delta = datetime.strptime("%d:%d" % (hour, minute), DT_FMT) - NOON if abs(prev_delta.total_seconds()) < abs( current_delta.total_seconds()): my_spl = prev_spl else: my_spl = spl for key in name2col_num.keys(): if key == "date": res_dict["day"].append(day) res_dict["month"].append(month) res_dict["year"].append(year) res_dict["dayofyear"].append( monthday2day(month, day, year % 4 == 0)) continue if key == "lat" or key == "lon" or key == "rain": continue try: res_dict[key].append(float(my_spl[name2col_num[key]])) except ValueError: res_dict[key].append(np.nan) elif prev_day != day: # the last timestamp was the last of that day and thus has rain rain_str = prev_spl[name2col_num["rain"]] if rain_str.startswith("T"): res_dict["rain"].append(0.0) else: res_dict["rain"].append(float(rain_str)) prev_day = day prev_hour = hour prev_time = datetime.strptime("%d:%d" % (hour, minute), DT_FMT) prev_spl = spl # Gotta do rain one last time: rain_str = prev_spl[name2col_num["rain"]] if rain_str.startswith("T"): res_dict["rain"].append(0.0) else: res_dict["rain"].append(float(rain_str)) for key, val in res_dict.iteritems(): print "%s: %d" % (key, len(val)) res_df = pd.DataFrame(res_dict) with open(outfi, 'w') as fout: cPickle.dump(res_df, fout, protocol=cPickle.HIGHEST_PROTOCOL)
def convert_to_pd_batch(my_dir, outfi=None, beginning=2007, ending=2016, verbose=False): """ Take CSV files with MODIS active fire data and convert them to a DataFrame :param my_dir: Directory with MODIS files :param outfi: Optional file to dump output DataFrame to :param beginning: beginning year to use :param ending: ending year to use :return: DataFrame with MODIS active fire data """ # First, build up lists with each column year_list = [] month_list = [] day_list = [] dayofyear_list = [] hour_list = [] minute_list = [] lat_list = [] lon_list = [] frp_list = [] satellite_list = [] confidence_list = [] for i, fname in enumerate(os.listdir(my_dir)): year = int(fname.split(".")[1][0:4]) if not beginning <= year <= ending: continue with open(os.path.join(my_dir, fname)) as fin: fin.readline() # Ignore header for line in fin: yyyymmdd = line.split()[0] year = int(yyyymmdd[0:4]) month = int(yyyymmdd[4:6]) day = int(yyyymmdd[6:]) year_list.append(year) month_list.append(month) day_list.append(day) dayofyear_list.append( monthday2day(month, day, leapyear=(year % 4 == 0))) hhmm = line.split()[1] hour_list.append(int(hhmm[0:2])) minute_list.append(int(hhmm[2:])) lat_list.append(float(line.split()[3])) lon_list.append(float(line.split()[4])) frp_list.append(float(line.split()[8])) satellite_list.append(line.split()[2]) confidence_list.append(float(line.split()[9]) / 100.) if verbose: print "finished reading file %d" % i # Now, make a dictionary that we will then cast to a DataFrame pd_dict = dict() pd_dict['year'] = year_list pd_dict['month'] = month_list pd_dict['day'] = day_list pd_dict['dayofyear'] = dayofyear_list pd_dict['hour'] = hour_list pd_dict['minute'] = minute_list pd_dict['lat'] = lat_list pd_dict['lon'] = lon_list pd_dict['frp'] = frp_list pd_dict['confidence'] = confidence_list pd_dict['satellite'] = satellite_list df = pd.DataFrame(pd_dict) print "created DataFrame of size %d" % (len(df)) if outfi: with open(outfi, 'w') as fout: cPickle.dump(df, fout, protocol=cPickle.HIGHEST_PROTOCOL) return df