def find_csvs(overwrite=True): """ Find training csvs in site path """ DeepForest_config = config.load_config() data_paths = {} for site in DeepForest_config['training_csvs']: file_path = DeepForest_config[site]["training_csvs"] search_path = os.path.join(file_path,"*.csv") found_csvs = glob.glob(search_path) if not overwrite: #find completed sites completed = glob.glob(os.path.join(DeepForest_config[site]["h5"],"*.csv")) #get geographic index and store p = re.compile("(\d+_\d+)_image") completed_geo_index = [p.findall(x)[0] for x in completed] print("There are {} completed files".format(len(completed_geo_index))) #For each found csv, has it been completed? p2 = re.compile("(\d+_\d+)_c") for x in found_csvs[:]: geo_index = p2.findall(x)[0] if geo_index in completed_geo_index: found_csvs.remove(x) print("{} already run".format(geo_index)) data_paths[site] = found_csvs return data_paths
def run_HPC(data_paths): ################# # Setup dask cluster ################# from dask_jobqueue import SLURMCluster from dask.distributed import Client, wait DeepForest_config = config.load_config() num_workers = DeepForest_config["num_hipergator_workers"] #job args extra_args = [ "--error=/home/b.weinstein/logs/dask-worker-%j.err", "--account=ewhite", "--output=/home/b.weinstein/logs/dask-worker-%j.out" ] cluster = SLURMCluster(processes=1, queue='hpg2-compute', cores=1, memory='13GB', walltime='24:00:00', job_extra=extra_args, local_directory="/home/b.weinstein/logs/", death_timeout=300) print(cluster.job_script()) cluster.adapt(minimum=num_workers, maximum=num_workers) dask_client = Client(cluster) #Start dask dask_client.run_on_scheduler(start_tunnel) for site in data_paths: futures = dask_client.map(Generate.run, data_paths[site], site=site, DeepForest_config=DeepForest_config) wait(futures) print("{} complete".format(site)) print("All sites complete")
def test_multigpu_training(): experiment = Experiment(api_key="ypQZhYfs3nSyKzOfz13iuJpj2", project_name='deeplidar', log_code=True) DeepForest_config = config.load_config(dir="..") DeepForest_config["save_image_path"] = "../snapshots/" data = generators.load_retraining_data(DeepForest_config) train_generator, validation_generator = generators.create_h5_generators( data, DeepForest_config=DeepForest_config) #imagenet pretraining weights backbone = models.backbone(DeepForest_config["backbone"]) weights = backbone.download_imagenet() model, training_model, prediction_model = create_models( backbone_retinanet=backbone.retinanet, num_classes=train_generator.num_classes(), weights=weights, multi_gpu=DeepForest_config["num_GPUs"], freeze_backbone=False, nms_threshold=DeepForest_config["nms_threshold"], input_channels=DeepForest_config["input_channels"]) #start training history = training_model.fit_generator( generator=train_generator, steps_per_epoch=train_generator.size() / DeepForest_config["batch_size"], epochs=DeepForest_config["epochs"], verbose=2, shuffle=False, workers=DeepForest_config["workers"], use_multiprocessing=DeepForest_config["use_multiprocessing"], max_queue_size=DeepForest_config["max_queue_size"], experiment=experiment)
from memory_profiler import profile import matplotlib.pyplot as plt precision = 8 fp = open('h5_memory.log', 'w+') #Path hack dir_path = os.path.dirname(os.path.realpath(__file__)) parent_path = os.path.abspath(os.path.join(os.getcwd(), os.pardir)) sys.path.append(parent_path) print(parent_path) from DeepForest import h5_generator, preprocess, config from DeepForest.utils import generators, image_utils DeepForest_config = config.load_config(dir="..") #Load xml annotations tiles = generators.load_retraining_data(DeepForest_config) train, test = generators.split_tiles(tiles, DeepForest_config) @profile(precision=precision, stream=fp) def test_h5_generator(train, DeepForest_config): #Training Generator generator = h5_generator.H5generator(train) for i in range(len(generator)): inputs, targets = generator.__getitem__(i) assert len(targets) == 2, "targets has incorrect length"
import h5py import glob import os from DeepForest import config #Load config DeepForest_config = config.load_config() pattern = os.path.join(DeepForest_config["TEAK"]["h5"], "*.h5") files = glob.glob(pattern) counter = 0 for f in files: try: hf = h5py.File(f, 'r') shape = hf['train_imgs'][0, ].shape print("{t} has a shape {s}".format(t=f, s=shape)) except Exception as e: print("{f} failed with error message {e}".format(f=f, e=e)) counter += 1 filpath = os.path.splitext(f)[0] to_delete_csv = filpath + ".csv" try: pass os.remove(to_delete_csv) except Exception as e: print(e) try: pass os.remove(f)
mode = mode_parser.parse_args() import os import pandas as pd import glob import numpy as np from datetime import datetime from DeepForest.config import load_config from DeepForest import preprocess #save time for logging dirname = datetime.now().strftime("%Y%m%d_%H%M%S") #Load DeepForest_config and data file based on training or retraining mode DeepForest_config = load_config("train") data = preprocess.load_data(DeepForest_config["training_csvs"], DeepForest_config["rgb_res"], DeepForest_config["lidar_path"]) #Log site site = DeepForest_config["evaluation_site"] ##Preprocess Filters## if DeepForest_config['preprocess']['zero_area']: data = preprocess.zero_area(data) #pass an args object instead of using command line args = [ "--epochs", str(DeepForest_config["epochs"]), "--batch-size",
if __name__ == '__main__': import argparse #Set training or training mode_parser = argparse.ArgumentParser( description='Retinanet training or finetuning?') mode_parser.add_argument('--saved_model', help='train or retrain?') mode = mode_parser.parse_args() import pandas as pd import numpy as np from DeepForest.config import load_config DeepForest_config = load_config("..") DeepForest_config["evaluation_site"] = ["OSBS"] trained_models = { #"SJER":"/orange/ewhite/b.weinstein/retinanet/20190715_133239/resnet50_30.h5", #"TEAK":"/orange/ewhite/b.weinstein/retinanet/20190713_230957/resnet50_40.h5", #"NIWO":"/orange/ewhite/b.weinstein/retinanet/20190712_055958/resnet50_40.h5", #"MLBS":"/orange/ewhite/b.weinstein/retinanet/20190712_035528/resnet50_40.h5", "All": [ "/orange/ewhite/b.weinstein/retinanet/20190715_123358/resnet50_40.h5" ] } results = [] for training_site in trained_models: trained_model_list = trained_models[training_site]
def sample(n=50): """ Grab n random images from across the site """ #Load config DeepForest_config = config.load_config() #Read in data data = preprocess.load_data(data_dir=DeepForest_config['training_csvs'], res=0.1, lidar_path=DeepForest_config["lidar_path"]) #Create windows windows = preprocess.create_windows( data, DeepForest_config, base_dir=DeepForest_config["evaluation_tile_dir"]) selected_windows = windows[["tile", "window"]].drop_duplicates().sample(n=n) generator = onthefly_generator.OnTheFlyGenerator( data=data, windowdf=selected_windows, DeepForest_config=DeepForest_config) folder_dir = os.path.join("data", DeepForest_config["evaluation_site"], "samples") if not os.path.exists(folder_dir): os.mkdir(folder_dir) for i in range(generator.size()): #Load image - done for side effects, allow to skip bad tiles. try: three_channel = generator.load_image(i) except: continue #load lidar generator.load_lidar_tile() generator.clip_las() if generator.clipped_las == None: continue #name RGB tilename = os.path.splitext(generator.image_data[i]["tile"])[0] tilename = tilename + "_" + str( generator.image_data[i]["window"]) + ".tif" filename = os.path.join(folder_dir, tilename) #Write cv2.imwrite(filename, generator.image) #name .laz tilename = os.path.splitext(generator.image_data[i]["tile"])[0] tilename = tilename + "_" + str( generator.image_data[i]["window"]) + ".laz" filename = os.path.join(folder_dir, tilename) #Write .laz generator.clipped_las.write(filename)
'{}.png'.format(fname)), name=fname) # copy detections to all_detections for label in range(generator.num_classes()): all_detections[i][label] = image_detections[ image_detections[:, -1] == label, :-1] return all_detections # set the modified tf session as backend in keras keras.backend.tensorflow_backend.set_session(get_session()) #load config DeepForest_config = config.load_config("../") trained_models = { "SJER": "/orange/ewhite/b.weinstein/retinanet/20190715_133239/resnet50_30.h5", "TEAK": "/orange/ewhite/b.weinstein/retinanet/20190713_230957/resnet50_40.h5", "NIWO": "/orange/ewhite/b.weinstein/retinanet/20190712_055958/resnet50_40.h5", "MLBS": "/orange/ewhite/b.weinstein/retinanet/20190712_035528/resnet50_40.h5", "All": "/orange/ewhite/b.weinstein/retinanet/20190715_123358/resnet50_40.h5" } for trained_model in trained_models:
def run_local(data_paths): DeepForest_config = config.load_config() for site in DeepForest_config['training_csvs']: for path in data_paths[site]: Generate.run(path, DeepForest_config=DeepForest_config, site=site)
def run_test(data_paths): DeepForest_config = config.load_config() site = DeepForest_config['training_csvs'][0] Generate.run(data_paths[site][0], DeepForest_config=DeepForest_config, site=site)
from datetime import datetime from DeepForest.config import load_config from DeepForest import preprocess #Set training or training mode_parser = argparse.ArgumentParser(description='Retinanet training or finetuning?') mode_parser.add_argument('--mode', help='train or retrain?' ) mode_parser.add_argument('--dir', help='destination dir' ) mode_parser.add_argument('--saved_model', help='train or retrain?' ) mode = mode_parser.parse_args() #set experiment and log configs experiment = Experiment(api_key="ypQZhYfs3nSyKzOfz13iuJpj2",project_name='deeplidar',log_code=True) DeepForest_config = load_config() #Log parameters experiment.log_parameter("Start Time", mode.dir) experiment.log_parameter("Training Mode", mode.mode) experiment.log_parameters(DeepForest_config) DeepForest_config["mode"] = mode.mode if mode.mode == "train": data = load_training_data(DeepForest_config) if mode.mode == "retrain": data = load_retraining_data(DeepForest_config) for x in DeepForest_config["evaluation_site"]: DeepForest_config[x]["h5"] = os.path.join(DeepForest_config[x]["h5"],"hand_annotations")
from comet_ml import Experiment import sys import os from datetime import datetime import glob import pandas as pd import copy #insert path from DeepForest.config import load_config from DeepForest.utils.generators import load_retraining_data from train import main as training_main from eval import main as eval_main #load config - clean original_DeepForest_config = load_config() pretraining_models = { "SJER": "/orange/ewhite/b.weinstein/retinanet/20190318_144257/resnet50_02.h5", "TEAK": "/orange/ewhite/b.weinstein/retinanet/20190315_150652/resnet50_02.h5", "All": "/orange/ewhite/b.weinstein/retinanet/20190314_150323/resnet50_03.h5" } #pretraining_models = {"SJER" : "/Users/ben/Documents/DeepLidar/snapshots/TEAK_20190125_125012_fullmodel.h5"} sites = [["SJER"], ["TEAK"], ["SJER", "TEAK"]] #For each site, match the hand annotations with the pretraining model results = []
if __name__ == '__main__': import numpy as np from DeepForest import preprocess from DeepForest.config import load_config #Set training or training mode_parser = argparse.ArgumentParser( description='Retinanet training or finetuning?') mode_parser.add_argument('--mode', help='train or retrain?') mode = mode_parser.parse_args() #Load DeepForest_config file DeepForest_config = load_config(mode.mode) if mode.mode == "retrain": #Load hand annotated data data = preprocess.load_xml(DeepForest_config["hand_annotations"], DeepForest_config["rgb_res"]) if mode.mode == "train": #Load psuedo-labels data = preprocess.load_data(DeepForest_config["training_csvs"], DeepForest_config["rgb_res"]) ##Preprocess Filters## if DeepForest_config['preprocess']['zero_area']: