def generate_test(config, params, split_name="train", seed=None, sobol_generator=None): # Find data_dir data_dirpath = python_utils.choose_first_existing_path( config["data_dir_candidates"]) if data_dirpath is None: print_utils.print_error("ERROR: Data directory not found!") exit() data_dirpath = os.path.expanduser(data_dirpath) print_utils.print_info("Using data from {}".format(data_dirpath)) root_dir = os.path.join(data_dirpath, config["data_root_partial_dirpath"]) alpha, x, density, gt, noise, curvature = generate_data( root_dir, params, split_name=split_name, seed=seed, sobol_generator=sobol_generator) noisy_gt = gt + noise import matplotlib.pyplot as plt f = plt.figure() f.set_tight_layout({"pad": .0}) ax = f.gca() # plt.scatter(alpha, noisy_gt, s=10) ax.plot(alpha, noisy_gt) ax.set_xlabel("alpha") ax.set_ylabel("y") # plt.title("Sinusoid, freq = {}".format(params["f"])) plt.show()
def main(): # --- Process args --- # args = get_args() config = run_utils.load_config(args.config) if config is None: print_utils.print_error( "ERROR: cannot continue without a config file. Exiting now...") exit() distribution = "uniform" params = { "n": args.sample_count, "f": args.frequency, "s": args.noise_std, "d": distribution, } sobol_generator = rand_utils.SobolGenerator() # sobol_generator = None generate_test(config, params, split_name="train", seed=0, sobol_generator=sobol_generator) generate_test(config, params, split_name="val", seed=1, sobol_generator=sobol_generator) generate_test(config, params, split_name="test", seed=2, sobol_generator=sobol_generator)
def main(): # --- Process args --- # args = get_args() config = run_utils.load_config(args.config) if config is None: print_utils.print_error( "ERROR: cannot continue without a config file. Exiting now...") exit() if args.batch_size is not None: config["batch_size"] = args.batch_size distribution = "uniform" dataset_params = { "n": args.sample_count, "f": args.frequency, "s": args.noise_std, "d": distribution, } # Find data_dir data_dirpath = python_utils.choose_first_existing_path( config["data_dir_candidates"]) if data_dirpath is None: print_utils.print_error("ERROR: Data directory not found!") exit() data_dirpath = os.path.expanduser(data_dirpath) print_utils.print_info("Using data from {}".format(data_dirpath)) root_dir = os.path.join(data_dirpath, config["data_root_partial_dirpath"]) sobol_generator = rand_utils.SobolGenerator() train_ds = Synthetic1DDataset(root_dir=root_dir, params=dataset_params, split_name="train", sobol_generator=sobol_generator, transform=torchvision.transforms.Compose([ transforms.ToTensor(), transforms.ToDevice(device="cuda") ])) train_dl = DataLoader(train_ds, batch_size=config["batch_size"], shuffle=True, num_workers=4) for i_batch, sample_batched in enumerate(train_dl): print( i_batch, sample_batched['density'].max(), # sample_batched['gt'], # sample_batched['noise'], )
def get_termlangs(repositories, all_languages): print_info("Searching for manifests ..") termlangs = {} for m in find_files(repositories, ".manifest"): print_info("Found Manifest:" +str(m)) try: term = parse_manifest(m) if term.language not in termlangs: termlangs[term.language] = [] termlangs[term.language].append(term) except Exception as e: import traceback traceback.print_exc() print_error("Failed" + e) return termlangs
def main(): # --- Process args --- # args = get_args() config = run_utils.load_config(args.config) if config is None: print_utils.print_error( "ERROR: cannot continue without a config file. Exiting now...") exit() if args.runs_dirpath is not None: config["runs_dirpath"] = args.runs_dirpath if args.mode == "image": plot_stats.plot_stats(config, args.run_name, args.source_idx_list) elif args.mode == "1d": plot_stats_1d.plot(config, args.run_name)
def compute_grads(config, run_params, dataset_params, split_name): # print("# --- Compute grads --- #") working_dir = os.path.dirname(os.path.abspath(__file__)) # Find data_dir data_dirpath = python_utils.choose_first_existing_path( config["data_dir_candidates"]) if data_dirpath is None: print_utils.print_error("ERROR: Data directory not found!") exit() # print_utils.print_info("Using data from {}".format(data_dirpath)) root_dir = os.path.join(data_dirpath, config["data_root_partial_dirpath"]) # setup run directory: runs_dir = os.path.join(working_dir, config["runs_dirpath"]) run_dirpath = None try: run_dirpath = run_utils.setup_run_dir(runs_dir, run_params["run_name"]) except ValueError: print_utils.print_error( "Run name {} was not found. Aborting...".format( run_params["run_name"])) exit() # Choose device dev = torch.device("cuda") if torch.cuda.is_available() else torch.device( "cpu") # Instantiate dataset ds = Synthetic1DDataset(root_dir=root_dir, params=dataset_params, split_name=split_name, transform=torchvision.transforms.Compose([ transforms.ToTensor(), transforms.ToDevice(device=dev) ])) dl = DataLoader(ds, batch_size=1) model = Simple1DInputNet(config, run_params["capacity"]) model.to(dev) analyzer = Analyzer(config, model, run_dirpath) analyzer.compute_and_save_grads(dl)
def compute_and_save_grads(self, dl): # Loading model if self.load_checkpoint(self.checkpoints_dirpath) is None: # Try last checkpoint print_utils.print_error("Checkpoint {} could not be loaded. Aborting...".format(self.checkpoints_dirpath)) exit() self.model.train() pbar = tqdm(dl, desc="Compute grads: ") for batch_index, batch in enumerate(pbar): grads, pred = self.model.compute_grads(batch, return_pred=True) grads = grads.cpu().detach().numpy() pred = pred.cpu().detach().numpy() # Save grads in run_dirpath grads_filename = "grads.{:05d}.npy".format(batch_index) pred_filename = "pred.{:05d}.npy".format(batch_index) grads_filepath = os.path.join(self.grads_dirpath, grads_filename) pred_filepath = os.path.join(self.grads_dirpath, pred_filename) np.save(grads_filepath, grads) np.save(pred_filepath, pred)
def build_from_resource(resource, term, term_dir, language, theme): try: print_info("Project building..\t" + str(resource.filename)) except: print_error("Project failed due to filename encoding: " + str(resource.filename)) return None project = Project.parse_project_meta(resource) project_dir = os.path.join(term_dir,"%.02d"%(project.number)) makedirs(project_dir) try: built_project = Project.build_project(term, project, language, theme, project_dir) except: print_error("Project failed while building: " + str(resource.filename)) return None print_info("Project done!\t\t" + str(resource.filename)) return built_project
def plot_stats(config, run_name, source_idx_list): print("# --- Plot stats --- #") working_dir = os.path.dirname(os.path.abspath(__file__)) # setup run directory: runs_dir = os.path.join(working_dir, config["runs_dirpath"]) run_dirpath = None try: run_dirpath = run_utils.setup_run_dir(runs_dir, run_name) except ValueError: print_utils.print_error( "Run name {} was not found. Aborting...".format(run_name)) exit() stats_dirpath = os.path.join(run_dirpath, "stats") stats_list = load_stats_list(stats_dirpath, source_idx_list) plot_k_nearest(stats_list) for stats in stats_list: plot_hist(stats)
def main(): # --- Process args --- # args = get_args() config = run_utils.load_config(args.config) if config is None: print_utils.print_error( "ERROR: cannot continue without a config file. Exiting now...") exit() if args.batch_size is not None: config["batch_size"] = args.batch_size if args.exps_dirpath is not None: config["exps_dirpath"] = args.exps_dirpath distribution = "uniform" params = { "run_count": args.run_count, "sample_count": args.sample_count, "frequency": args.frequency, "noise_std": args.noise_std, "distribution": distribution, } stats_params = { "neighbors_t": args.neighbors_t, "neighbors_n": args.neighbors_n, } working_dir = os.path.dirname(os.path.abspath(__file__)) # Setup exp directory: exps_dir = os.path.join(working_dir, config["exps_dirpath"]) exp_dirpath = run_utils.setup_run_dir(exps_dir, args.exp_name, args.new_exp) # Launch experiments launch_experiments(config, exp_dirpath, args.new_exp, args.recompute_stats, params, stats_params) # Aggregate results aggregate_results(exp_dirpath, params, stats_params)
def restore_checkpoint(self, sess, saver, checkpoints_dir): """ :param sess: :param saver: :param checkpoints_dir: :return: True if a checkpoint was found and restored, False if no checkpoint was found """ checkpoint = tf.train.get_checkpoint_state(checkpoints_dir) if checkpoint and checkpoint.model_checkpoint_path: # Check if the model has a checkpoint print_utils.print_info("Restoring {} checkpoint {}".format( self.model_name, checkpoint.model_checkpoint_path)) try: saver.restore(sess, checkpoint.model_checkpoint_path) except tf.errors.InvalidArgumentError: print_utils.print_error( "ERROR: could not load checkpoint.\n" "\tThis is likely due to: .\n" "\t\t - the model graph definition has changed from the checkpoint thus weights do not match\n" .format(checkpoints_dir)) exit() return True else: return False
def main(): # --- Process args --- # args = get_args() config = run_utils.load_config(args.config) if config is None: print_utils.print_error( "ERROR: cannot continue without a config file. Exiting now...") exit() print_utils.print_info("Using downscaling factors: {}".format(args.ds_fac)) run_name_list = [RUN_NAME_FORMAT.format(ds_fac) for ds_fac in args.ds_fac] # --- Read image --- # print_utils.print_info("Reading image...") image_filepath = get_abs_path(args.image) image, image_metadata = read_image(image_filepath, args.pixelsize) image = clip_image(image, 0, 255) # hist = np.histogram(image) # print_hist(hist) im_min, im_max = get_min_max(image, std_factor=3) # print("min: {}, max: {}".format(im_min, im_max)) image = stretch_image(image, im_min, im_max, 0, 255) image = clip_image(image, 0, 255) # hist = np.histogram(image) # print_hist(hist) print("Image stats:") print("\tShape: {}".format(image.shape)) print("\tMin: {}".format(image.min())) print("\tMax: {}".format(image.max())) # --- Read shapefile if it exists --- # if args.shapefile is not None: shapefile_filepath = get_abs_path(args.shapefile) gt_polygons = get_shapefile_annotations(image_filepath, shapefile_filepath) else: # --- Load or fetch OSM building data --- # gt_polygons = get_osm_annotations(image_filepath) # --- Print polygon info --- # print("Polygons stats:") print("\tCount: {}".format(len(gt_polygons))) print("\tMin: {}".format(min([polygon.min() for polygon in gt_polygons]))) print("\tMax: {}".format(max([polygon.max() for polygon in gt_polygons]))) if not check_polygons_in_image(image, gt_polygons): print_utils.print_error( "ERROR: polygons are not inside the image. This is most likely due to using the wrong projection when reading the input shapefile. Aborting..." ) exit() print_utils.print_info("Aligning building annotations...") aligned_polygons = test.test_align_gt(args.runs_dirpath, image, image_metadata, gt_polygons, args.batch_size, args.ds_fac, run_name_list, config["disp_max_abs_value"], output_shapefiles=False) print_utils.print_info("Saving aligned building annotations...") save_annotations(args.image, aligned_polygons)
def train(config, run_params, dataset_params): # print("# --- Starting training --- #") run_name = run_params["run_name"] new_run = run_params["new_run"] init_run_name = run_params["init_run_name"] working_dir = os.path.dirname(os.path.abspath(__file__)) # Find data_dir data_dirpath = python_utils.choose_first_existing_path( config["data_dir_candidates"]) if data_dirpath is None: print_utils.print_error("ERROR: Data directory not found!") exit() # print_utils.print_info("Using data from {}".format(data_dirpath)) root_dir = os.path.join(data_dirpath, config["data_root_partial_dirpath"]) # setup init checkpoints directory path if one is specified: if init_run_name is not None: init_run_dirpath = run_utils.setup_run_dir(config["runs_dirpath"], init_run_name) _, init_checkpoints_dirpath = run_utils.setup_run_subdirs( init_run_dirpath) else: init_checkpoints_dirpath = None # setup run directory: runs_dir = os.path.join(working_dir, config["runs_dirpath"]) run_dirpath = run_utils.setup_run_dir(runs_dir, run_name, new_run) # save config in logs directory run_utils.save_config(config, run_dirpath) # save args args_filepath = os.path.join(run_dirpath, "args.json") python_utils.save_json( args_filepath, { "run_name": run_name, "new_run": new_run, "init_run_name": init_run_name, "batch_size": config["batch_size"], }) # Choose device # dev = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") dev = "cpu" # For small networks and experiments, cpu is much faster # Instantiate dataset # sobol_generator = rand_utils.SobolGenerator() sobol_generator = None train_ds = Synthetic1DDataset(root_dir=root_dir, params=dataset_params, split_name="train", sobol_generator=sobol_generator, transform=torchvision.transforms.Compose([ transforms.ToTensor(), transforms.ToDevice(device=dev) ])) val_ds = Synthetic1DDataset(root_dir=root_dir, params=dataset_params, split_name="val", sobol_generator=sobol_generator, transform=torchvision.transforms.Compose([ transforms.ToTensor(), transforms.ToDevice(device=dev) ])) # print(train_ds.alpha) # print(val_ds.alpha) # exit() # Generate test dataset here because if using Sobel numbers, all datasets should be using the same SobolGenerator # so that they do not generate the same samples. test_ds = Synthetic1DDataset(root_dir=root_dir, params=dataset_params, split_name="test", sobol_generator=sobol_generator, transform=torchvision.transforms.Compose([ transforms.ToTensor(), transforms.ToDevice(device=dev) ])) train_dl = DataLoader(train_ds, batch_size=config["batch_size"], shuffle=True) val_dl = DataLoader(val_ds, batch_size=config["batch_size"]) success = False while not success: try: model = Simple1DInputNet(config) model.to(dev) optimizer = torch.optim.Adam(model.parameters(), lr=config["lr"], weight_decay=config["weight_decay"]) loss_func = measures.l1_loss trainer = Trainer(config, model, optimizer, loss_func, init_checkpoints_dirpath, run_dirpath) trainer.fit(config, train_dl, val_dl) success = True except ValueError: # Catches NaN errors # Try again run_utils.wipe_run_subdirs(run_dirpath) print("\nTry again\n") pass
from html_builders import * import os import os.path import sys import shutil import collections import json import codecs import tempfile import string try: import yaml except ImportError: print_error("You need to install pyyaml using pip or easy_install, sorry") sys.exit(-10) # todo : real classes Term = collections.namedtuple('Term', \ 'id manifest title description language number projects extras') Extra = collections.namedtuple('Extra', 'name materials note') # Process files within project and resource containers def build_extra(term, extra, language, theme, output_dir): note = [] if extra.note: note.extend(process_file(extra.note, \ note_style, \ language, \ theme, \
def similarity_stats_1d(config, run_name, dataset_params, split_name, stats_params): # print("# --- Similarity Stats --- #") working_dir = os.path.dirname(os.path.abspath(__file__)) # Find data_dir data_dirpath = python_utils.choose_first_existing_path( config["data_dir_candidates"]) if data_dirpath is None: print_utils.print_error("ERROR: Data directory not found!") exit() # print_utils.print_info("Using data from {}".format(data_dirpath)) root_dir = os.path.join(data_dirpath, config["data_root_partial_dirpath"]) # setup run directory: runs_dir = os.path.join(working_dir, config["runs_dirpath"]) run_dirpath = None try: run_dirpath = run_utils.setup_run_dir(runs_dir, run_name) except ValueError: print_utils.print_error( "Run name {} was not found. Aborting...".format(run_name)) exit() # Instantiate dataset # ds = Synthetic1DDataset(root_dir=root_dir, params=dataset_params, split_name="test", # distribution="triangular" # ) ds = Synthetic1DDataset(root_dir=root_dir, params=dataset_params, split_name=split_name, transform=None) sample_count = len(ds) # Load grads and pred grads_dirpath = os.path.join(run_dirpath, "grads") grads_filepath_list = python_utils.get_filepaths(grads_dirpath, endswith_str=".npy", startswith_str="grads.") grads_list = [ np.load(grads_filepath) for grads_filepath in tqdm(grads_filepath_list, desc="Loading grads") ] # print("Grads shape: {}".format(grads_list[0].shape)) pred_filepath_list = python_utils.get_filepaths(grads_dirpath, endswith_str=".npy", startswith_str="pred.") pred_list = [ np.load(pred_filepath) for pred_filepath in tqdm(pred_filepath_list, desc="Loading pred") ] # Create stats dir stats_dirpath = os.path.join(run_dirpath, "stats_1d") os.makedirs(stats_dirpath, exist_ok=True) # import time # t1 = time.clock() neighbor_count, neighbor_count_no_normalization = netsimilarity_utils.compute_soft_neighbor_count( grads_list) neighbors_filepath = os.path.join(stats_dirpath, "neighbors_soft.npy") np.save(neighbors_filepath, neighbor_count) neighbors_filepath = os.path.join(stats_dirpath, "neighbors_soft_no_normalization.npy") np.save(neighbors_filepath, neighbor_count_no_normalization) if not COMPUTE_ONLY_NEIGHBORS_SOFT: # Compute similarity matrix similarity_mat = netsimilarity_utils.compute_similarity_mat_1d( grads_list) # Compute number of neighbors # Hard-thresholding: for t in stats_params["neighbors_t"]: neighbor_count = netsimilarity_utils.compute_neighbor_count( similarity_mat, "hard", t=t) neighbors_filepath = os.path.join( stats_dirpath, "neighbors_hard_t_{}.npy".format(t)) np.save(neighbors_filepath, neighbor_count) # # Soft estimate # neighbor_count = netsimilarity_utils.compute_neighbor_count(similarity_mat, "soft") # neighbors_filepath = os.path.join(stats_dirpath, "neighbors_soft.npy") # np.save(neighbors_filepath, neighbor_count) # Mix for n in stats_params["neighbors_n"]: neighbor_count = netsimilarity_utils.compute_neighbor_count( similarity_mat, "less_soft", n=n) neighbors_filepath = os.path.join( stats_dirpath, "neighbors_less_soft_n_{}.npy".format(n)) np.save(neighbors_filepath, neighbor_count) # print("Time to compute number of neighbors:") # print(time.clock() - t1) # Save inputs for key in ["alpha", "x", "density", "gt", "noise", "curvature"]: filepath = os.path.join(stats_dirpath, "{}.npy".format(key)) values = [sample[key] for sample in ds] np.save(filepath, values) # Save outputs pred_filepath = os.path.join(stats_dirpath, "pred.npy") pred = [pred[0] for pred in pred_list] np.save(pred_filepath, pred) # Error error_filepath = os.path.join(stats_dirpath, "error.npy") error = [ np.abs(sample["gt"] - pred[0]) for sample, pred in zip(ds, pred_list) ] np.save(error_filepath, error) # Losses logs_dirpath = os.path.join(run_dirpath, "logs") final_losses = python_utils.load_json( os.path.join(logs_dirpath, "final_losses.json")) train_loss_filepath = os.path.join(stats_dirpath, "train_loss.npy") np.save(train_loss_filepath, final_losses["train_loss"]) val_loss_filepath = os.path.join(stats_dirpath, "val_loss.npy") np.save(val_loss_filepath, final_losses["val_loss"]) loss_ratio_filepath = os.path.join(stats_dirpath, "loss_ratio.npy") np.save(loss_ratio_filepath, final_losses["val_loss"] / final_losses["train_loss"])