def _get_hyperparameter_space(project_parameters): hyperparameter_space_config = load_yaml( filepath=project_parameters.hyperparameter_config_path) assert hyperparameter_space_config is not None, 'the hyperparameter space config has not any content.' hyperparameter_space = {} for parameter_type in hyperparameter_space_config.keys(): assert parameter_type in [ 'int', 'float', 'choice' ], 'the type is wrong, please check it. the type: {}'.format( parameter_type) for parameter_name, parameter_value in hyperparameter_space_config[ parameter_type].items(): if parameter_name == 'cutoff_freq_low' or parameter_name == 'cutoff_freq_high': pass else: assert parameter_name in project_parameters, 'the parameter name is wrong, please check it. the parameter name: {}'.format( parameter_name) if parameter_type == 'int': hyperparameter_space[parameter_name] = ray.tune.randint( lower=min(parameter_value), upper=max(parameter_value)) elif parameter_type == 'float': hyperparameter_space[parameter_name] = ray.tune.uniform( lower=min(parameter_value), upper=max(parameter_value)) elif parameter_type == 'choice': hyperparameter_space[parameter_name] = ray.tune.choice( categories=parameter_value) return hyperparameter_space
def __init__(self, path, *args, **kwargs): if isinstance(path, str): path = Path('metadata', 'datasets', f'{path}.yaml') assert path.exists() super(DatasetMeta, self).__init__(path=path, *args, **kwargs) if 'fs' not in self.meta: logger.exception( KeyError(f'The file {path} does not contain the key "fs"')) self.inv_lookup = dict() for task_name in self.meta['tasks'].keys(): task_label_file = metadata_path('tasks', f'{task_name}.yaml') task_labels = load_yaml(task_label_file) dataset_labels = self.meta['tasks'][task_name]['target_transform'] if not set(dataset_labels.keys()).issubset(task_labels.keys()): logger.exception( ValueError( f'The following labels from dataset {path} are not accounted for in {task_label_file}: ' f'{set(dataset_labels.keys()).difference(task_labels.keys())}' )) self.inv_lookup[task_name] = { dataset_labels[kk]: kk for kk, vv in dataset_labels.items() }
def main(args): root = args['--root'] experiment = build_experiment(load_yaml(args['--cfg'])) bar = Bar("Patch directory", max=len(experiment.test_set)) iqa_metrics = defaultdict(list) for patch_idx in patches_subset_from(experiment.test_set): patch_directory = os.path.join(root, patch_idx) if not os.path.isdir(patch_directory): # Some patches aren't predicted by ESTARFM as it requires a sample before and one after continue for date in os.listdir(patch_directory): # Load predicted bands date_directory = os.path.join(patch_directory, date) files_paths = [os.path.join(date_directory, band) for band in os.listdir(date_directory)] predicted_bands = load_in_multiband_raster(files_paths) # Load groundtruth bands target_directory = os.path.join(args['--target'], patch_idx, 'landsat', date) target_files_paths = [os.path.join(target_directory, band) for band in os.listdir(target_directory)] target_bands = load_in_multiband_raster(target_files_paths) # Compute PSNR and SSIM by band patch_bands_iqa = defaultdict(list) for src, tgt in zip(predicted_bands, target_bands): data_range = np.max([src, tgt]) src = src.clip(min=np.finfo(np.float16).eps) / data_range tgt = tgt.clip(min=np.finfo(np.float16).eps) / data_range patch_bands_iqa['psnr'] += [metrics.psnr(tgt, src)] patch_bands_iqa['ssim'] += [metrics.ssim(tgt, src)] # Record bandwise value iqa_metrics['psnr'] += [patch_bands_iqa['psnr']] iqa_metrics['ssim'] += [patch_bands_iqa['ssim']] # Compute bandwise spectral angle mapper predicted_patch = np.dstack(predicted_bands).astype(np.float32) target_patch = np.dstack(target_bands).astype(np.float32) sam = metrics.sam(target_patch, predicted_patch).mean(axis=(0, 1)) iqa_metrics['sam'] += [sam] # Log running averages avg_psnr, avg_ssim, avg_sam = np.mean(iqa_metrics['psnr']), np.mean(iqa_metrics['ssim']), np.mean(iqa_metrics['sam']) bar.suffix = "PSNR = {:.2f} | SSIM = {:.4f} | SAM = {:.6f}".format(avg_psnr, avg_ssim, avg_sam) bar.next() # Make bandwise average output dictionnary bandwise_avg_psnr = np.asarray(iqa_metrics['psnr']).mean(axis=0).astype(np.float64) bandwise_avg_ssim = np.asarray(iqa_metrics['ssim']).mean(axis=0).astype(np.float64) bandwise_avg_sam = np.asarray(iqa_metrics['sam']).mean(axis=0).astype(np.float64) avg_iqa_metrics = {'test_psnr': bandwise_avg_psnr.tolist(), 'test_ssim': bandwise_avg_ssim.tolist(), 'test_sam': bandwise_avg_sam.tolist()} os.makedirs(args['--o'], exist_ok=True) dump_path = os.path.join(args['--o'], f"test_scores_starfm.json") save_json(dump_path, avg_iqa_metrics)
def lambda_handler(event, context): yaml = load_yaml('src/config.yml') print(get_secret(yaml['secret_name'])) return { "statusCode": 200, "body": json.dumps({ "status": 'processed', }), }
def main(args): # Instantiate reader and writer bands_reader = readers.MODISBandReader(root=args['--root']) scene_writer = writers.MODISSceneWriter(root=args['--o']) # Load scenes specification file scenes_specs = load_yaml(args['--scenes_specs']) # Run loading, merging of bands and reprojection logging.info(f"Merging bands {scenes_specs['bands']} of MODIS and reprojecting on CRS:EPSG {scenes_specs['EPSG']}") load_stack_and_reproject_scenes(reader=bands_reader, writer=scene_writer, scenes_specs=scenes_specs)
def main(args): # Instantiate readers and exporter landsat_reader = readers.LandsatSceneReader(root=args['--landsat_root']) modis_reader = readers.MODISSceneReader(root=args['--modis_root']) export = PatchExport(output_dir=args['--o']) logging.info("Loaded scenes readers") # Load scenes specifications scenes_specs = load_yaml(args['--scenes_specs']) # Compute scenes alignement features out of landsat rasters intersecting_bbox, max_resolution = compute_registration_features( scenes_specs=scenes_specs, reader=landsat_reader) logging.info("Computed registration features") for date in scenes_specs['dates']: # Load corresponding rasters landsat_raster, modis_raster, qa_raster = load_rasters( date=date, landsat_reader=landsat_reader, modis_reader=modis_reader) # Register rasters together logging.info(f"Date {date} : Aligning rasters") landsat_raster = align_raster(landsat_raster, intersecting_bbox, max_resolution) qa_raster = align_raster(qa_raster, intersecting_bbox, max_resolution) modis_raster = align_modis_raster(modis_raster, intersecting_bbox, max_resolution) # Compute valid pixel map out of landsat quality assessment raster logging.info(f"Date {date} : Computing valid pixel map") valid_pixels = compute_landsat_raster_valid_pixels_map(qa_raster) # Instantiate iterator over raster windows windows_iterator = make_windows_iterator( image_size=(landsat_raster.height, landsat_raster.width), window_size=scenes_specs['patch_size'], valid_pixels=valid_pixels, validity_threshold=scenes_specs['validity_threshold']) # Run patches extraction and dumping bar = Bar(f"Date {date} : Extracting patches from rasters") for patch_idx, window in windows_iterator: extract_and_dump_patch(landsat_raster=landsat_raster, modis_raster=modis_raster, window=window, patch_idx=patch_idx, date=date, export=export) bar.next()
def load_data_eval(): print('==================') print('Loading data ...') start = time() # Define datasets data = pd.read_csv(all_paths['rois_{}_dataset_csv'.format( cfg.args['ROI_CROP_TYPE'])]) if cfg.cli_args.random_split: all_patients = data.patient_id.unique() random.shuffle(all_patients) pivot_idx = int(cfg.TRAIN_TEST_SPLIT * len(all_patients)) test_patients = all_patients[pivot_idx:] else: patients_split = u.load_yaml(all_paths['train_test_split_yaml']) test_patients = patients_split['EVAL'] test_data = data.loc[data.patient_id.isin(test_patients)] print('{} patients and {} slices'.format(len(test_patients), len(test_data))) load_preprocessing.load_transform_image() _, testset = get_train_test_datasets( test_data=test_data, transform_test=cfg.transform_image_test) print('==================') print('Preprocessing data ...') cfg.preprocessing = u.load_pickle( join(cfg.cli_args.tensorboard_path, 'preprocessing.pkl')) # cfg.res['preprocessing'] = [str(cfg.preprocessing)] # cfg.res['batch_preprocessing'] = [str(cfg.transform_image_train)] testset.data = cfg.preprocessing(testset.data) cfg.testloader = DataLoader(testset, batch_size=cfg.BATCH_SIZE, shuffle=False) print("Dataset process time: ", time() - start)
def _get_optimizer(model_parameters, project_parameters): optimizer_config = load_yaml( filepath=project_parameters.optimizer_config_path) optimizer_name = list(optimizer_config.keys())[0] if optimizer_name in dir(optim): for name, value in optimizer_config.items(): if value is None: optimizer = eval( 'optim.{}(params=model_parameters, lr={})'.format( optimizer_name, project_parameters.lr)) elif type(value) is dict: value = ('{},' * len(value)).format( *['{}={}'.format(a, b) for a, b in value.items()]) optimizer = eval( 'optim.{}(params=model_parameters, lr={}, {})'.format( optimizer_name, project_parameters.lr, value)) else: assert False, '{}: {}'.format(name, value) return optimizer else: assert False, 'please check the optimizer. the optimizer config: {}'.format( optimizer_config)
def __init__(self, path, *args, **kwargs): self.path = Path(path) self.name = self.path.stem self.meta = dict() if path: try: meta = load_yaml(path) if meta is None: logger.info( f'The content metadata module "{self.name}" from {path} is empty. Assigning empty dict' ) meta = dict() else: assert isinstance(meta, dict) self.meta = meta except FileNotFoundError: # logger.warn(f'The metadata file for "{self.name}" was not found.') pass
from src.abstractFeaturegenerator import ( Feature, create_memo, get_arguments, generate_features, save_column, ) from src.utils import mkdir, load_yaml config = load_yaml() Feature.dir = config["path"]["feature"] """ import category_encoders as ce class NeighborhoodOrdinal(Feature): def create_features(self): # self.columns には特徴量生成に必要な列名を書く self.columns = ["Neighborhood"] self.load(self.columns) oe = ce.ordinal.OrdinalEncoder() self.train["Neighborhood_ordinal"] = oe.fit_transform( self.train["Neighborhood"] ) self.test["Neighborhood_ordinal"] = oe.transform(self.test["Neighborhood"]) create_memo("Neighborhood_ordinal", "Neighborhood をラベル化した") """ if __name__ == "__main__": # train / test を列ごとに分割して保存しておく save_column()
# fast_dev_run=True, ) trainer.fit(exif_trainer, datamodule=dm) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( "--config", help="path to the config file", default="configs/train/exif_sc.yaml", ) parser.add_argument( "--checkpoints_dir", help="directory to save checkpoint weights", default="checkpoints", ) parser.add_argument("--gpu", help="which gpu id to use", type=int, default=0) parser.add_argument( "--wandb", action="store_true", help="whether to log to Weights & Biases", ) args = parser.parse_args() # Load config file config = load_yaml(args.config) main(config, args)
cfg.mean_norm = [0.485, 0.456, 0.406] # mean and std of ImageNet cfg.std_norm = [0.229, 0.224, 0.225] # mean and std of ImageNet # cfg.mean_norm = [0.5054398602192114, 0.5054398602192114, 0.5055198023370465] # cfg.std_norm = [0.2852900917900417, 0.2852900917900417, 0.2854451397158079] # Set device if torch.cuda.is_available(): cfg.device = torch.device('cuda') else: cfg.device = torch.device('cpu') print(cfg.device) # load args args_dict = u.load_yaml(all_paths['classification_args']) args_list = u.dict_cross(args_dict) if not cfg.DEBUG: res = u.load_or_create_df(PATH_RESULTS) else: res = pd.DataFrame() # Train and test for i, args in enumerate(args_list): print('==================') print('==================') print('Args number {} / {}'.format(i+1, len(args_list))) print('Time since beginning: {} '.format(u.format_time(time() - start_all))) res = res.append(main(args=args), sort=True)
trainer = pl.Trainer(**params) # Run testing trainer.test(experiment) def make_logger(args, cfg): """Build logger instance pointing to specified output directory """ save_dir = os.path.dirname(args['--o']) name = os.path.basename(args['--o']) version = os.path.basename( os.path.dirname(os.path.dirname(cfg['testing']['chkpt']))) logger = Logger(save_dir=save_dir, name=name, version=version, test=True) return logger if __name__ == "__main__": # Read input args args = docopt(__doc__) # Load configuration file cfg = load_yaml(args["--cfg"]) # Update args if necessary if args['--chkpt']: cfg['testing']['chkpt'] = args['--chkpt'] # Run testing main(args, cfg)
def parse(self): project_parameters = self._parser.parse_args() if project_parameters.parameters_config_path is not None: project_parameters = argparse.Namespace(**self._get_new_dict( old_dict=vars(project_parameters), yaml_dict=load_yaml(filepath=abspath( project_parameters.parameters_config_path)))) else: del project_parameters.parameters_config_path # base project_parameters.data_path = abspath( path=project_parameters.data_path) if project_parameters.predefined_dataset is not None and project_parameters.mode != 'predict': project_parameters.data_path = join( project_parameters.data_path, project_parameters.predefined_dataset) makedirs(project_parameters.data_path, exist_ok=True) project_parameters.use_cuda = torch.cuda.is_available( ) and not project_parameters.no_cuda project_parameters.gpus = project_parameters.gpus if project_parameters.use_cuda else 0 # data preparation if project_parameters.predefined_dataset is not None: if project_parameters.predefined_dataset == 'SPEECHCOMMANDS': project_parameters.classes = sorted([ 'backward', 'bed', 'bird', 'cat', 'dog', 'down', 'eight', 'five', 'follow', 'forward', 'four', 'go', 'happy', 'house', 'learn', 'left', 'marvin', 'nine', 'no', 'off', 'on', 'one', 'right', 'seven', 'sheila', 'six', 'stop', 'three', 'tree', 'two', 'up', 'visual', 'wow', 'yes', 'zero' ]) project_parameters.sample_rate = 16000 project_parameters.max_waveform_length = 1 * project_parameters.sample_rate else: project_parameters.classes = sorted(project_parameters.classes) project_parameters.max_waveform_length *= project_parameters.sample_rate project_parameters.class_to_idx = { c: idx for idx, c in enumerate(project_parameters.classes) } project_parameters.num_classes = len(project_parameters.classes) assert not any( (np.array(project_parameters.cutoff_freq) / project_parameters.sample_rate) > 1 ), "please check the cutoff_freq whether it satisfies Nyquist's theorem." project_parameters.use_balance = not project_parameters.no_balance and project_parameters.predefined_dataset is None if project_parameters.transform_config_path is not None: project_parameters.transform_config_path = abspath( project_parameters.transform_config_path) if project_parameters.sox_effect_config_path is not None: project_parameters.sox_effect_config_path = abspath( project_parameters.sox_effect_config_path) # model project_parameters.optimizer_config_path = abspath( project_parameters.optimizer_config_path) if isfile(project_parameters.backbone_model): project_parameters.backbone_model = abspath( project_parameters.backbone_model) if project_parameters.checkpoint_path is not None and isfile( project_parameters.checkpoint_path): project_parameters.checkpoint_path = abspath( project_parameters.checkpoint_path) if not 0. <= project_parameters.alpha <= 1.: assert False, 'please check the alpha value, the alpha value is limit from 0 to 1. input alpha value is {}'.format( project_parameters.alpha) # train if project_parameters.val_iter is None: project_parameters.val_iter = project_parameters.train_iter project_parameters.use_early_stopping = not project_parameters.no_early_stopping if project_parameters.use_early_stopping: # because the PyTorch lightning needs to get validation loss in every training epoch. project_parameters.val_iter = 1 # predict project_parameters.use_gui = project_parameters.gui # evaluate if project_parameters.mode == 'evaluate': project_parameters.k_fold_data_path = './k_fold_dataset{}'.format( datetime.now().strftime('%Y%m%d%H%M%S')) # tune if project_parameters.tune_gpu is None: project_parameters.tune_gpu = torch.cuda.device_count( ) / project_parameters.tune_cpu if project_parameters.mode == 'tune': project_parameters.num_workers = project_parameters.tune_cpu project_parameters.hyperparameter_config_path = abspath( project_parameters.hyperparameter_config_path) return project_parameters