def create_crops(merged_boxes, hyperspectral_pool=None, rgb_pool=None, sensor="hyperspectral", expand=0, hyperspectral_savedir="."): """Crop sensor data based on a dataframe of geopandas bounding boxes Args: merged_boxes: geopandas dataframe with bounding box geometry, plotID, siteID, and species label hyperspectral_pool: glob string for looking up matching sensor tiles expand: units in meters to add to crops to give context around deepforest box hyperspectral_savedir: location to save convert .tif from .h5 files Returns: crops: list of cropped sensor data labels: species id labels box_index: unique index and plot_data length. """ crops = [] labels = [] sites = [] box_index = [] elevations = [] heights = [] for index, row in merged_boxes.iterrows(): #Crop and append box = row["geometry"] plot_name = row["plotID"] site = row["plotID"].split("_")[0] elevation = int(row["elevation"]) height = row["height"] #get sensor data if sensor == "rgb": try: sensor_path = find_sensor_path(bounds=box.bounds, lookup_pool=rgb_pool) except: raise ValueError("Cannot find RGB data path for box bounds {} for plot_name {}".format(box.bounds,plot_name)) elif sensor == "hyperspectral": try: rgb_path = find_sensor_path(bounds=box.bounds, lookup_pool=rgb_pool) except: raise ValueError("Cannot find RGB data path for box bounds {} for plot_name {}".format(box.bounds,plot_name)) try: hyperspectral_h5_path = find_sensor_path(bounds=box.bounds, lookup_pool=hyperspectral_pool) except: raise ValueError("Cannot find hyperspectral data path for box bounds {} for plot_name {}".format(box.bounds,plot_name)) sensor_path = convert_h5(hyperspectral_h5_path, rgb_path, savedir=hyperspectral_savedir) crop = crop_image(sensor_path=sensor_path, box=box, expand=expand) crops.append(crop) sites.append(site) labels.append(row["taxonID"]) elevations.append(elevation) heights.append(height) box_index.append("{}_{}".format(plot_name,index)) return crops, labels, sites, heights, elevations, box_index
def create_crops(merged_boxes, hyperspectral_pool=None, rgb_pool=None, sensor="hyperspectral", expand=0, hyperspectral_savedir="."): """Crop sensor data based on a dataframe of geopandas bounding boxes Args: merged_boxes: geopandas dataframe with bounding box geometry, plotID, and species label hyperspectral_pool: glob string for looking up matching sensor tiles expand: units in meters to add to crops to give context around deepforest box hyperspectral_savedir: location to save convert .tif from .h5 files Returns: crops: list of cropped sensor data labels: species id labels box_index: unique index and plot_data length. """ crops = [] labels = [] box_index = [] for index, row in merged_boxes.iterrows(): #Crop and append box = row["geometry"] plot_name = row["plotID"] #get sensor data if sensor == "rgb": sensor_path = find_sensor_path(bounds=box.bounds, lookup_pool=rgb_pool, sensor="rgb") elif sensor == "hyperspectral": rgb_path = find_sensor_path(bounds=box.bounds, lookup_pool=rgb_pool, sensor="rgb") hyperspectral_h5_path = find_sensor_path( bounds=box.bounds, lookup_pool=hyperspectral_pool, sensor="hyperspectral") sensor_path = convert_h5(hyperspectral_h5_path, rgb_path, savedir=hyperspectral_savedir) crop = crop_image(sensor_path=sensor_path, box=box, expand=expand) crops.append(crop) labels.append(row["taxonID"]) box_index.append("{}_{}".format(plot_name, index)) return crops, labels, box_index
def process_plot(plot_data, rgb_pool, deepforest_model): """For a given NEON plot, find the correct sensor data, predict trees and associate bounding boxes with field data Args: plot_data: geopandas dataframe in a utm projection deepforest_model: deepforest model used for prediction Returns: merged_boxes: geodataframe of bounding box predictions with species labels """ #DeepForest prediction rgb_sensor_path = find_sensor_path(bounds=plot_data.total_bounds, lookup_pool=rgb_pool, sensor="rgb") boxes = predict_trees(deepforest_model=deepforest_model, rgb_path=rgb_sensor_path, bounds=plot_data.total_bounds) if boxes.empty: raise ValueError("No trees predicted in plot: {}, skipping.".format(plot_data.plotID.unique()[0])) #Merge results with field data, buffer on edge merged_boxes = gpd.sjoin(boxes, plot_data) #If no remaining boxes just take a box around center if merged_boxes.empty: merged_boxes= create_boxes(plot_data) #If there are multiple boxes, take the center box grouped = merged_boxes.groupby("indvdID") cleaned_boxes = [] for value, group in grouped: choosen_box = choose_box(group, plot_data) cleaned_boxes.append(choosen_box) merged_boxes = gpd.GeoDataFrame(pd.concat(cleaned_boxes),crs=merged_boxes.crs) merged_boxes = merged_boxes.drop(columns=["xmin","xmax","ymin","ymax"]) return merged_boxes
def postprocess_CHM(df, lookup_pool): """Field measured height must be within min_diff meters of canopy model""" #Extract zonal stats try: CHM_path = find_sensor_path(lookup_pool=lookup_pool, bounds=df.total_bounds) except Exception as e: raise ValueError( "Cannot find CHM path for {} from plot {} in lookup_pool: {}". format(df.total_bounds, df.plotID.unique(), e)) draped_boxes = rasterstats.zonal_stats( df.geometry.__geo_interface__, CHM_path, add_stats={'q99': non_zero_99_quantile}) df["CHM_height"] = [x["q99"] for x in draped_boxes] #if height is null, assign it df.height.fillna(df["CHM_height"], inplace=True) return df
def process_plot(plot_data, rgb_pool, deepforest_model): """For a given NEON plot, find the correct sensor data, predict trees and associate bounding boxes with field data Args: plot_data: geopandas dataframe in a utm projection deepforest_model: deepforest model used for prediction Returns: merged_boxes: geodataframe of bounding box predictions with species labels """ #DeepForest prediction rgb_sensor_path = find_sensor_path(bounds=plot_data.total_bounds, lookup_pool=rgb_pool, sensor="rgb") boxes = predict_trees(deepforest_model=deepforest_model, rgb_path=rgb_sensor_path, bounds=plot_data.total_bounds) #Merge results with field data, buffer on edge merged_boxes = gpd.sjoin(boxes, plot_data) merged_boxes = merged_boxes.drop(columns=["xmin", "xmax", "ymin", "ymax"]) return merged_boxes
def extract_features(df, x, model_class, hyperspectral_pool, site_label_dict, domain_label_dict, HSI_size=20, k_neighbors=5): """Generate features Args: df: a geopandas dataframe x: individual id to use a target model_class: A deeptreeattention model class to extract layer features hyperspectral_pool: glob dir to search for sensor files HSI_size: size of HSI crop site_label_dict: dictionary of numeric site labels domain_label_dict: dictionary of numeric domain labels k_neighbors: number of neighbors to extract Returns: feature_array: a feature matrix of encoded bottleneck layer """ #Due to resampling, there will be multiple rows of the same point, all are identical. target = df[df.individual == x].head(1) target = target.reset_index(drop=True) sensor_path = find_sensor_path(bounds=target.total_bounds, lookup_pool=hyperspectral_pool) #Encode metadata site = target.siteID.values[0] numeric_site = site_label_dict[site] one_hot_sites = tf.one_hot(numeric_site, model_class.sites) domain = target.domainID.values[0] numeric_domain = domain_label_dict[domain] one_hot_domains = tf.one_hot(numeric_domain, model_class.domains) #ToDO bring h5 into here. #elevation = elevation_from_tile(sensor_path)/1000 elevation = 100/1000 metadata = [elevation, one_hot_sites, one_hot_domains] neighbor_pool = df[~(df.individual == x)].reset_index(drop=True) raster = rasterio.open(sensor_path) feature_array, distances = predict_neighbors(target, metadata=metadata, HSI_size=HSI_size, raster=raster, neighbor_pool=neighbor_pool, model=model_class.ensemble_model, k_neighbors=k_neighbors) return feature_array, distances
def process_plot(plot_data, rgb_pool, deepforest_model, debug=False): """For a given NEON plot, find the correct sensor data, predict trees and associate bounding boxes with field data Args: plot_data: geopandas dataframe in a utm projection deepforest_model: deepforest model used for prediction Returns: merged_boxes: geodataframe of bounding box predictions with species labels """ #DeepForest prediction try: rgb_sensor_path = find_sensor_path(bounds=plot_data.total_bounds, lookup_pool=rgb_pool) except Exception as e: raise ValueError("cannot find RGB sensor for {}".format( plot_data.plotID.unique())) boxes = predict_trees(deepforest_model=deepforest_model, rgb_path=rgb_sensor_path, bounds=plot_data.total_bounds) if boxes.empty: raise ValueError("No trees predicted in plot: {}, skipping.".format( plot_data.plotID.unique()[0])) if debug: interim_dir = os.path.abspath(ROOT) boxes.to_file("{}/data/interim/{}_boxes_raw.shp".format( interim_dir, plot_data.plotID.unique()[0])) #Merge results with field data, buffer on edge merged_boxes = gpd.sjoin(boxes, plot_data) ##If no remaining boxes just take a box around center missing_ids = plot_data[~plot_data.individual.isin(merged_boxes.individual )] if not missing_ids.empty: created_boxes = create_boxes(missing_ids) merged_boxes = merged_boxes.append(created_boxes) #If there are multiple boxes per point, take the center box grouped = merged_boxes.groupby("individual") cleaned_boxes = [] for value, group in grouped: choosen_box = choose_box(group, plot_data) cleaned_boxes.append(choosen_box) merged_boxes = gpd.GeoDataFrame(pd.concat(cleaned_boxes), crs=merged_boxes.crs) merged_boxes = merged_boxes.drop(columns=["xmin", "xmax", "ymin", "ymax"]) ##if there are multiple points per box, take the tallest point. cleaned_points = [] for value, group in merged_boxes.groupby("box_id"): if group.shape[0] > 1: print("removing {} points for within a deepforest box".format( group.shape[0] - 1)) cleaned_points.append(group[group.height == group.height.max()]) else: cleaned_points.append(group) merged_boxes = gpd.GeoDataFrame(pd.concat(cleaned_points), crs=merged_boxes.crs) #assert plot_data.shape[0] == merged_boxes.shape[0] return merged_boxes
def extract_features(df, x, model_class, hyperspectral_pool, site_label_dict, domain_label_dict, HSI_size=20, k_neighbors=5): """Generate features Args: df: a geopandas dataframe x: individual id to use a target model_class: A deeptreeattention model class to extract layer features hyperspectral_pool: glob dir to search for sensor files HSI_size: size of HSI crop site_label_dict: dictionary of numeric site labels domain_label_dict: dictionary of numeric domain labels k_neighbors: number of neighbors to extract Returns: feature_array: a feature matrix of encoded bottleneck layer """ #Due to resampling, there will be multiple rows of the same point, all are identical. #Always pick itself as neighbor 1 target = df[df.individual == x].head(1) target = target.reset_index(drop=True) sensor_path = find_sensor_path(bounds=target.total_bounds, lookup_pool=hyperspectral_pool) #Encode metadata site = target.siteID.values[0] numeric_site = site_label_dict[site] one_hot_sites = tf.one_hot(numeric_site, model_class.sites) domain = target.domainID.values[0] numeric_domain = domain_label_dict[domain] one_hot_domains = tf.one_hot(numeric_domain, model_class.domains) #for tests, dummy elevation variable try: elevation = elevation_from_tile(sensor_path) / 1000 except: print("Dummy variable for elevation debug") elevation = 100 / 1000 metadata = [elevation, one_hot_sites, one_hot_domains] neighbor_pool = df #If there are no neighbors, return 0's if neighbor_pool.empty: feature_array = np.zeros( (k_neighbors, model_class.ensemble_model.output.shape[1])) distances = np.repeat(9999, k_neighbors) else: raster = rasterio.open(sensor_path) feature_array, distances = predict_neighbors( target, metadata=metadata, HSI_size=HSI_size, raster=raster, neighbor_pool=neighbor_pool, model=model_class.ensemble_model, k_neighbors=k_neighbors) #enforce dtype return feature_array, distances