def verify_paths(self, config, p): keep_temp = config.get('keep_temp', False) # clean out WORKING_DIR if we're not keeping temp files: if os.path.exists(p.WORKING_DIR) and not \ (keep_temp or p.WORKING_DIR == p.OUTPUT_DIR): gfdl_util.rmtree_wrapper(p.WORKING_DIR) try: for dir_name, create_ in (('CODE_ROOT', False), ('OBS_DATA_REMOTE', False), ('OBS_DATA_ROOT', True), ('MODEL_DATA_ROOT', True), ('WORKING_DIR', True)): util.check_dir(p, dir_name, create=create_) except Exception as exc: _log.fatal((f"Input settings for {dir_name} mis-specified (caught " f"{repr(exc)}.)")) util.exit_handler(code=1) # Use GCP to create OUTPUT_DIR on a volume that may be read-only if not os.path.exists(p.OUTPUT_DIR): gfdl_util.make_remote_dir(p.OUTPUT_DIR, self.timeout, self.dry_run, log=_log)
def parse_flags(self, cli_obj): if cli_obj.config.get('dry_run', False): cli_obj.config['test_mode'] = True if cli_obj.config.get('disable_preprocessor', False): _log.warning( ("User disabled metadata checks and unit conversion in " "preprocessor."), tags=util.ObjectLogTag.BANNER) if cli_obj.config.get('overwrite_file_metadata', False): _log.warning(("User chose to overwrite input file metadata with " "framework values (convention = '%s')."), cli_obj.config.get('convention', ''), tags=util.ObjectLogTag.BANNER) # check this here, otherwise error raised about missing caselist is not informative try: if cli_obj.config.get('CASE_ROOT_DIR', ''): util.check_dir(cli_obj.config['CASE_ROOT_DIR'], 'CASE_ROOT_DIR', create=False) except Exception as exc: _log.fatal(( f"Mis-specified input for CASE_ROOT_DIR (received " f"'{cli_obj.config.get('CASE_ROOT_DIR', '')}', caught {repr(exc)}.)" )) util.exit_handler(code=1)
def download(self, path_output, path_forecast, start_date='2020-05-01', end_date='2020-05-10', parameter_list=['PM25'], forecast_day_list = [0, 1, 2, 3, 4]): assert type(parameter_list) == list, 'parameter_list should be of "list" type, given: {}'.format(type(parameter_list)) assert type(forecast_day_list) == list, 'forecast_day_list should be of "list" type, given: {}'.format(type(forecast_day_list)) for parameter in parameter_list: if parameter not in self.paramter: raise('Invalid paramter passed: {}, possible paramters: {}'.format(parameter, self.paramter)) for forecast_day in forecast_day_list: if forecast_day not in self.forecast_day: raise('Invalid forecast_day passed: {}, possible forecast_day: {}'.format(forecast_day, self.forecast_day)) start_date = datetime.datetime.strptime(start_date, '%Y-%m-%d') end_date = datetime.datetime.strptime(end_date, '%Y-%m-%d') # Number of days after 2000-01-01 start_day = (start_date - self.base_date).days end_day = (end_date - self.base_date).days # Since we want to include last day also for day_interval in range(start_day, end_day + 1): temp_date = self.base_date + datetime.timedelta(days = day_interval) temp_year = temp_date.year temp_month = temp_date.month temp_day = temp_date.day temp_path_output = os.path.join(path_output, str(temp_year), str(temp_month), str(temp_day)) temp_path_forecast = os.path.join(path_forecast, str(temp_year), str(temp_month), str(temp_day)) util.check_dir(temp_path_output) util.check_dir(temp_path_forecast) for parameter in parameter_list: for forecast_day in forecast_day_list: path_file = self._date_to_silam_path(day_interval, parameter=parameter, forecast_day=forecast_day) if forecast_day == 0: _temp_path_output = os.path.join(temp_path_output, os.path.basename(path_file)) else: _temp_path_output = os.path.join(temp_path_forecast, os.path.basename(path_file)) logging.info('Downloading {}'.format(path_file)) if os.path.isfile(_temp_path_output): logging.warning('File NC already present. So Skipping!') continue if os.path.isfile(util.get_file_name(_temp_path_output) + '.tif'): logging.warning('File TIF already present. So Skipping!') continue try: wget.download(path_file, _temp_path_output) except Exception as e: logging.warning('Unable to download data. Error: {}'.format(e)) continue
def set_dataset(self, anno_dir, img_dir, img_ext="JPG"): """学習用のデータセットを用意する Args: anno_dir (str or pathlib): アノテーションデータのディレクトリ img_dir (str or pathlib): 画像ディレクトリ img_ext (str, optional): 画像ファイルの拡張子. Defaults to "JPG". Raises: ValueError: class定義がされていない場合 Returns: OrderedDict: 学習データセット keys: 画像ID. 画像ファイル名の拡張子なしの文字列. value: dict{keys=["image", "bboxs", "obj_names", "obj_ids"]} """ if self.classes is None: raise ValueError( "Classes is None. You should set classes (use set_classes). ") path_anno = util.check_dir(anno_dir) path_img = util.check_dir(img_dir) self.path_anno = path_anno self.path_img = path_img self.logger.info(f"annotation_file_path: {anno_dir}") self.logger.info(f"image_file_path: {img_dir}") anno_files = self.get_annotation_files(anno_dir) self.annotation_files = anno_files self.logger.info(f"annotation_file_size: {len(anno_files)}") image_ids = [] dataset = OrderedDict() for anno_file in tqdm(anno_files): bboxs, obj_names, meta_info = self.get_bounding_box_data( xml_file=anno_file) obj_ids = self.get_object_ids(obj_names=obj_names) img_file = path_img / meta_info["filename"] try: img_arr = self.read_image(img_file=img_file) except FileNotFoundError: self.logger.warning( f"Image file does not exist for inputted xml_file. : {str(img_file)}" ) continue image_id = img_file.stem dataset[image_id] = { "image": img_arr, "bboxs": bboxs, "obj_names": obj_names, "obj_ids": obj_ids, } image_ids.append(image_id) self.dataset = dataset self.image_ids = image_ids return dataset
def setup_pod_directories(self): """Check and create directories specific to this POD. """ util.check_dir(self, 'POD_CODE_DIR', create=False) util.check_dir(self, 'POD_OBS_DATA', create=False) util.check_dir(self, 'POD_WK_DIR', create=True) dirs = ('model/PS', 'model/netCDF', 'obs/PS', 'obs/netCDF') for d in dirs: util.check_dir(os.path.join(self.POD_WK_DIR, d), create=True)
def verify_paths(self, config, p): # needs to be here, instead of PathManager, because we subclass it in # NOAA_GFDL keep_temp = config.get('keep_temp', False) # clean out WORKING_DIR if we're not keeping temp files: if os.path.exists(p.WORKING_DIR) and not \ (keep_temp or p.WORKING_DIR == p.OUTPUT_DIR): shutil.rmtree(p.WORKING_DIR) try: for dir_name, create_ in (('CODE_ROOT', False), ('OBS_DATA_ROOT', False), ('MODEL_DATA_ROOT', True), ('WORKING_DIR', True)): util.check_dir(p, dir_name, create=create_) except Exception as exc: _log.fatal((f"Input settings for {dir_name} mis-specified (caught " f"{repr(exc)}.)")) util.exit_handler(code=1)
def get_annotation_files(self, dir, ext="xml"): """アノテーションデータファイルのリストをPathlib.Path形式で取得する Args: dir (str): アノテーションデータの格納されているディレクトリのパス ext (str, optional): アノテーションデータファイルの拡張子. Defaults to "xml". Raises: FileNotFoundError: アノテーションデータファイルが見つからなかった Returns: list(pathlib.Path): アノテーションデータファイルのリスト """ path_anno = util.check_dir(dir) files = list(path_anno.glob(f"**/*.{ext}")) if len(files) < 1: raise FileNotFoundError("Annotation File does not exists.") return files
import config from src import model from src import dataGenerator from src import util from src import model from src import pre_processing # physical_devices = tf.config.experimental.list_physical_devices('GPU') # assert len(physical_devices) > 0, "Not enough GPU hardware devices available" # _config = tf.config.experimental.set_memory_growth(physical_devices[0], True) # Checking for logging folder and creating logging file util.check_dir(config.path_log) util.set_logger(os.path.join(config.path_log, 'train.log')) # Checking for param json folder and creating logging file param_dict = config.model_param util.save_json(param_dict, config.path_param) logging.info('Loading Parameters') params = util.Params(config.path_param) # Training Data to Data Generator format logging.info('Training Data to Data Generator format') data = util.load_txt(config.path_training) + util.load_txt(config.path_testing) index = np.arange(len(data)) np.random.shuffle(index)
from src import util experiment = 1 path_training = 'data/train.txt' path_testing = 'data/test.txt' path_output = 'experiments/e1' path_log = 'log' model_param = { 'dimension_size': 100, 'max_subword': 1, 'batch_size': 1024, 'learning_rate': 10e-4, 'window_size': 3, 'epochs': 20000, 'valid_size': 16, # Random set of words to evaluate similarity on. 'valid_window': 100, # Only pick dev samples in the head of the distribution. 'vocab_size': 5000 } ####################### DO NOT CHANGE FROM HERE ######################### path_param = os.path.join(path_output, 'param.json') path_output_model = os.path.join(path_output, 'model') util.check_dir(path_output) util.check_dir(path_log) util.check_dir(path_output_model)
import tensorflow as tf import keras from keras.optimizers import Adam from keras import backend as K from keras.callbacks import ModelCheckpoint, TensorBoard from keras.models import load_model from src import metric, model, io, util, dataGenerator, loss from src.bf_grid import bf_grid import config physical_devices = tf.config.experimental.list_physical_devices('GPU') assert len(physical_devices) > 0, "Not enough GPU hardware devices available" _config = tf.config.experimental.set_memory_growth(physical_devices[0], True) timing = {} util.check_dir(config.path_logs) util.set_logger(os.path.join(config.path_logs, 'train.log')) parser = argparse.ArgumentParser( description='See description below to see all available options') parser.add_argument('-pt', '--pretrained', help='Continuining training from the given model. \ [Default] is no model given', default=None, type=str, required=False) parser.add_argument('-w', '--weight',
def set_output_directory(self, out): _ = util.check_dir(out, mkdir=True) self.out = out self.logger.info(f"set output: {out}")
import gdal import os import config from src import util import logging util.check_dir(config.path_logs) util.set_logger(os.path.join(config.path_logs, 'generateMutiRes.log')) def drange(start: float, stop: float, step: float): r = start while r < stop: yield round(r, 1) r += step def getRes(path_tif: str) -> None: """ Reading Resolution of input iamge Input: path_tif: path of input to be processed Output: None """ ds = gdal.Open(path_tif) resolution = round(ds.GetGeoTransform()[1], 1) return resolution logging.info('Iterating Training data in folder: {}'.format(config.path_image))
def test_check_dir_exist_dir(self): dir_path = "tools" actual = util.check_dir(dir=dir_path, mkdir=False) assert actual
def test_check_dir_not_exist(self): dir_path = "test/not_exist_dir" with pytest.raises(FileNotFoundError): _ = util.check_dir(dir=dir_path, mkdir=False)
import logging import config import os from src import io, util util.check_dir(config.path_logs) util.set_logger(os.path.join(config.path_logs, 'generateDataset.log')) # Checking directories util.check_dir(config.path_model) util.check_dir(config.path_weight) util.check_dir(config.path_prediction) util.check_dir(config.path_tiled) util.check_dir(config.path_tiled_image) util.check_dir(config.path_tiled_label) # Validation util.check_dir(config.path_vali_tiled_image) util.check_dir(config.path_vali_tiled_label) path_image = config.path_image_vrt path_label = config.path_label_vrt path_vali_image = config.path_vali_image_vrt path_vali_label = config.path_vali_label_vrt logging.info( 'path_image: {}, path_label: {}, path_vali_image: {}, path_vali_label: {}'. format(path_image, path_label, path_vali_image, path_vali_label)) logging.info('Tiling Training Images...')
def prep_test(p4_name, p4_dir=P4_DIR): p4_file = p4_dir.joinpath(p4_name) target_dir = TARGET_DIR.joinpath(p4_file.stem) util.del_dir(target_dir) util.check_dir(target_dir) return p4_file, target_dir
def test_check_dir_mkdir(self, clear_temporary_dir): with pytest.raises(FileNotFoundError): _ = util.check_dir(dir=TMP_DIR, mkdir=False) dir_path = util.check_dir(dir=TMP_DIR, mkdir=True) assert dir_path.exists()
from src import postprocess from src import metric from src import io from src import util from src import bf_grid from src import metric from src import dataGenerator from src import model import config # physical_devices = tf.config.experimental.list_physical_devices('GPU') # assert len(physical_devices) > 0, "Not enough GPU hardware devices available" # _config = tf.config.experimental.set_memory_growth(physical_devices[0], True) util.check_dir(config.path_logs) util.set_logger(os.path.os.path.join(config.path_logs, 'testing.log')) parser = argparse.ArgumentParser( description='See description below to see all available options') parser.add_argument( '-sg', '--skipGridding', help='If skipping grididing while testing. [Default] False', type=bool, default=False, required=False) parser.add_argument('-d', '--data',
import os import config from src import util import numpy as np import cv2 import os from tqdm import tqdm import logging paramDict = config.modelParam util.save_json(paramDict, config.pathParam) params = util.Params(config.pathParam) # Checking for logging folder and creating logging file util.check_dir(config.pathLog) util.set_logger(os.path.join(config.pathLog, 'generateAugmentation.log')) logging.info(' Reading all the image file present in training data folder') # Reading all the image file present in training data folder pathTrainingData = [] for root, dirs, files in os.walk(config.pathTraining): for file in files: if file.endswith(tuple(config.imageFormat)): pathTrainingData.append(os.path.join(root, file)) # Checking for Image augmentations xflip = params.xflip yflip = params.yflip if (xflip) and (yflip) is True: