Exemple #1
0
    def verify_paths(self, config, p):
        keep_temp = config.get('keep_temp', False)
        # clean out WORKING_DIR if we're not keeping temp files:
        if os.path.exists(p.WORKING_DIR) and not \
            (keep_temp or p.WORKING_DIR == p.OUTPUT_DIR):
            gfdl_util.rmtree_wrapper(p.WORKING_DIR)

        try:
            for dir_name, create_ in (('CODE_ROOT', False), ('OBS_DATA_REMOTE',
                                                             False),
                                      ('OBS_DATA_ROOT',
                                       True), ('MODEL_DATA_ROOT',
                                               True), ('WORKING_DIR', True)):
                util.check_dir(p, dir_name, create=create_)
        except Exception as exc:
            _log.fatal((f"Input settings for {dir_name} mis-specified (caught "
                        f"{repr(exc)}.)"))
            util.exit_handler(code=1)

        # Use GCP to create OUTPUT_DIR on a volume that may be read-only
        if not os.path.exists(p.OUTPUT_DIR):
            gfdl_util.make_remote_dir(p.OUTPUT_DIR,
                                      self.timeout,
                                      self.dry_run,
                                      log=_log)
Exemple #2
0
    def parse_flags(self, cli_obj):
        if cli_obj.config.get('dry_run', False):
            cli_obj.config['test_mode'] = True

        if cli_obj.config.get('disable_preprocessor', False):
            _log.warning(
                ("User disabled metadata checks and unit conversion in "
                 "preprocessor."),
                tags=util.ObjectLogTag.BANNER)
        if cli_obj.config.get('overwrite_file_metadata', False):
            _log.warning(("User chose to overwrite input file metadata with "
                          "framework values (convention = '%s')."),
                         cli_obj.config.get('convention', ''),
                         tags=util.ObjectLogTag.BANNER)
        # check this here, otherwise error raised about missing caselist is not informative
        try:
            if cli_obj.config.get('CASE_ROOT_DIR', ''):
                util.check_dir(cli_obj.config['CASE_ROOT_DIR'],
                               'CASE_ROOT_DIR',
                               create=False)
        except Exception as exc:
            _log.fatal((
                f"Mis-specified input for CASE_ROOT_DIR (received "
                f"'{cli_obj.config.get('CASE_ROOT_DIR', '')}', caught {repr(exc)}.)"
            ))
            util.exit_handler(code=1)
    def download(self, path_output, path_forecast, start_date='2020-05-01', end_date='2020-05-10', parameter_list=['PM25'], forecast_day_list = [0, 1, 2, 3, 4]):

        assert type(parameter_list) == list,  'parameter_list should be of "list" type, given: {}'.format(type(parameter_list))
        assert type(forecast_day_list) == list,  'forecast_day_list should be of "list" type, given: {}'.format(type(forecast_day_list))

        for parameter in parameter_list:
            if parameter not in self.paramter:
                raise('Invalid paramter passed: {}, possible paramters: {}'.format(parameter, self.paramter))
        
        for forecast_day in forecast_day_list:
            if forecast_day not in self.forecast_day:
                raise('Invalid forecast_day passed: {}, possible forecast_day: {}'.format(forecast_day, self.forecast_day))


        start_date = datetime.datetime.strptime(start_date, '%Y-%m-%d')
        end_date = datetime.datetime.strptime(end_date, '%Y-%m-%d')

        # Number of days after 2000-01-01
        start_day = (start_date - self.base_date).days
        end_day = (end_date - self.base_date).days

        # Since we want to include last day also
        for day_interval in range(start_day, end_day + 1):
            temp_date = self.base_date + datetime.timedelta(days = day_interval)
            temp_year = temp_date.year
            temp_month = temp_date.month
            temp_day = temp_date.day

            temp_path_output = os.path.join(path_output, str(temp_year), str(temp_month), str(temp_day))
            temp_path_forecast = os.path.join(path_forecast, str(temp_year), str(temp_month), str(temp_day))

            util.check_dir(temp_path_output)
            util.check_dir(temp_path_forecast)

            for parameter in parameter_list:
                for forecast_day in forecast_day_list:
                    path_file = self._date_to_silam_path(day_interval, parameter=parameter, forecast_day=forecast_day)

                    if forecast_day == 0:
                        _temp_path_output = os.path.join(temp_path_output, os.path.basename(path_file))
                    else:
                        _temp_path_output = os.path.join(temp_path_forecast, os.path.basename(path_file))

                    logging.info('Downloading {}'.format(path_file))
                    if os.path.isfile(_temp_path_output):
                        logging.warning('File NC already present. So Skipping!')
                        continue
                    
                    if os.path.isfile(util.get_file_name(_temp_path_output) + '.tif'):
                        logging.warning('File TIF already present. So Skipping!')
                        continue

                    try:    
                        wget.download(path_file, _temp_path_output)
                    except Exception as e:
                        logging.warning('Unable to download data. Error: {}'.format(e))
                        continue
Exemple #4
0
    def set_dataset(self, anno_dir, img_dir, img_ext="JPG"):
        """学習用のデータセットを用意する

        Args:
            anno_dir (str or pathlib): アノテーションデータのディレクトリ
            img_dir (str or pathlib): 画像ディレクトリ
            img_ext (str, optional): 画像ファイルの拡張子. Defaults to "JPG".

        Raises:
            ValueError: class定義がされていない場合

        Returns:
            OrderedDict: 学習データセット
                        keys: 画像ID. 画像ファイル名の拡張子なしの文字列.
                        value: dict{keys=["image", "bboxs", "obj_names", "obj_ids"]}
        """
        if self.classes is None:
            raise ValueError(
                "Classes is None. You should set classes (use set_classes). ")
        path_anno = util.check_dir(anno_dir)
        path_img = util.check_dir(img_dir)
        self.path_anno = path_anno
        self.path_img = path_img
        self.logger.info(f"annotation_file_path: {anno_dir}")
        self.logger.info(f"image_file_path: {img_dir}")

        anno_files = self.get_annotation_files(anno_dir)
        self.annotation_files = anno_files
        self.logger.info(f"annotation_file_size: {len(anno_files)}")

        image_ids = []
        dataset = OrderedDict()
        for anno_file in tqdm(anno_files):
            bboxs, obj_names, meta_info = self.get_bounding_box_data(
                xml_file=anno_file)
            obj_ids = self.get_object_ids(obj_names=obj_names)
            img_file = path_img / meta_info["filename"]
            try:
                img_arr = self.read_image(img_file=img_file)
            except FileNotFoundError:
                self.logger.warning(
                    f"Image file does not exist for inputted xml_file. : {str(img_file)}"
                )
                continue
            image_id = img_file.stem
            dataset[image_id] = {
                "image": img_arr,
                "bboxs": bboxs,
                "obj_names": obj_names,
                "obj_ids": obj_ids,
            }
            image_ids.append(image_id)
        self.dataset = dataset
        self.image_ids = image_ids
        return dataset
Exemple #5
0
    def setup_pod_directories(self):
        """Check and create directories specific to this POD.
        """
        util.check_dir(self, 'POD_CODE_DIR', create=False)
        util.check_dir(self, 'POD_OBS_DATA', create=False)
        util.check_dir(self, 'POD_WK_DIR', create=True)

        dirs = ('model/PS', 'model/netCDF', 'obs/PS', 'obs/netCDF')
        for d in dirs:
            util.check_dir(os.path.join(self.POD_WK_DIR, d), create=True)
Exemple #6
0
    def verify_paths(self, config, p):
        # needs to be here, instead of PathManager, because we subclass it in
        # NOAA_GFDL
        keep_temp = config.get('keep_temp', False)
        # clean out WORKING_DIR if we're not keeping temp files:
        if os.path.exists(p.WORKING_DIR) and not \
            (keep_temp or p.WORKING_DIR == p.OUTPUT_DIR):
            shutil.rmtree(p.WORKING_DIR)

        try:
            for dir_name, create_ in (('CODE_ROOT', False), ('OBS_DATA_ROOT',
                                                             False),
                                      ('MODEL_DATA_ROOT',
                                       True), ('WORKING_DIR', True)):
                util.check_dir(p, dir_name, create=create_)
        except Exception as exc:
            _log.fatal((f"Input settings for {dir_name} mis-specified (caught "
                        f"{repr(exc)}.)"))
            util.exit_handler(code=1)
Exemple #7
0
    def get_annotation_files(self, dir, ext="xml"):
        """アノテーションデータファイルのリストをPathlib.Path形式で取得する

        Args:
            dir (str): アノテーションデータの格納されているディレクトリのパス
            ext (str, optional): アノテーションデータファイルの拡張子.
                    Defaults to "xml".

        Raises:
            FileNotFoundError: アノテーションデータファイルが見つからなかった

        Returns:
            list(pathlib.Path): アノテーションデータファイルのリスト
        """
        path_anno = util.check_dir(dir)
        files = list(path_anno.glob(f"**/*.{ext}"))
        if len(files) < 1:
            raise FileNotFoundError("Annotation File does not exists.")
        return files
Exemple #8
0
import config

from src import model
from src import dataGenerator
from src import util
from src import model

from src import pre_processing

# physical_devices = tf.config.experimental.list_physical_devices('GPU')
# assert len(physical_devices) > 0, "Not enough GPU hardware devices available"
# _config = tf.config.experimental.set_memory_growth(physical_devices[0], True)

# Checking for logging folder and creating logging file
util.check_dir(config.path_log)
util.set_logger(os.path.join(config.path_log, 'train.log'))

# Checking for param json folder and creating logging file
param_dict = config.model_param
util.save_json(param_dict, config.path_param)

logging.info('Loading Parameters')
params = util.Params(config.path_param)

# Training Data to Data Generator format
logging.info('Training Data to Data Generator format')
data = util.load_txt(config.path_training) + util.load_txt(config.path_testing)
index = np.arange(len(data))
np.random.shuffle(index)
from src import util

experiment = 1

path_training = 'data/train.txt'
path_testing = 'data/test.txt'

path_output = 'experiments/e1'
path_log = 'log'

model_param = {
    'dimension_size': 100,
    'max_subword': 1,
    'batch_size': 1024,
    'learning_rate': 10e-4,
    'window_size': 3,
    'epochs': 20000,
    'valid_size': 16,  # Random set of words to evaluate similarity on.
    'valid_window':
    100,  # Only pick dev samples in the head of the distribution.
    'vocab_size': 5000
}

####################### DO NOT CHANGE FROM HERE #########################
path_param = os.path.join(path_output, 'param.json')
path_output_model = os.path.join(path_output, 'model')

util.check_dir(path_output)
util.check_dir(path_log)
util.check_dir(path_output_model)
import tensorflow as tf
import keras
from keras.optimizers import Adam
from keras import backend as K
from keras.callbacks import ModelCheckpoint, TensorBoard
from keras.models import load_model
from src import metric, model, io, util, dataGenerator, loss
from src.bf_grid import bf_grid
import config

physical_devices = tf.config.experimental.list_physical_devices('GPU')
assert len(physical_devices) > 0, "Not enough GPU hardware devices available"
_config = tf.config.experimental.set_memory_growth(physical_devices[0], True)

timing = {}
util.check_dir(config.path_logs)
util.set_logger(os.path.join(config.path_logs, 'train.log'))

parser = argparse.ArgumentParser(
    description='See description below to see all available options')

parser.add_argument('-pt',
                    '--pretrained',
                    help='Continuining training from the given model. \
                          [Default] is no model given',
                    default=None,
                    type=str,
                    required=False)

parser.add_argument('-w',
                    '--weight',
Exemple #11
0
 def set_output_directory(self, out):
     _ = util.check_dir(out, mkdir=True)
     self.out = out
     self.logger.info(f"set output: {out}")
import gdal
import os
import config
from src import util
import logging

util.check_dir(config.path_logs)
util.set_logger(os.path.join(config.path_logs, 'generateMutiRes.log'))


def drange(start: float, stop: float, step: float):
    r = start
    while r < stop:
        yield round(r, 1)
        r += step


def getRes(path_tif: str) -> None:
    """
    Reading Resolution of input iamge
    Input:
        path_tif: path of input to be processed
    Output:
        None
    """
    ds = gdal.Open(path_tif)
    resolution = round(ds.GetGeoTransform()[1], 1)
    return resolution


logging.info('Iterating Training data in folder: {}'.format(config.path_image))
Exemple #13
0
 def test_check_dir_exist_dir(self):
     dir_path = "tools"
     actual = util.check_dir(dir=dir_path, mkdir=False)
     assert actual
Exemple #14
0
 def test_check_dir_not_exist(self):
     dir_path = "test/not_exist_dir"
     with pytest.raises(FileNotFoundError):
         _ = util.check_dir(dir=dir_path, mkdir=False)
import logging
import config
import os
from src import io, util

util.check_dir(config.path_logs)
util.set_logger(os.path.join(config.path_logs, 'generateDataset.log'))

# Checking directories
util.check_dir(config.path_model)
util.check_dir(config.path_weight)
util.check_dir(config.path_prediction)
util.check_dir(config.path_tiled)
util.check_dir(config.path_tiled_image)
util.check_dir(config.path_tiled_label)

# Validation
util.check_dir(config.path_vali_tiled_image)
util.check_dir(config.path_vali_tiled_label)

path_image = config.path_image_vrt
path_label = config.path_label_vrt

path_vali_image = config.path_vali_image_vrt
path_vali_label = config.path_vali_label_vrt

logging.info(
    'path_image: {}, path_label: {}, path_vali_image: {}, path_vali_label: {}'.
    format(path_image, path_label, path_vali_image, path_vali_label))

logging.info('Tiling Training Images...')
Exemple #16
0
def prep_test(p4_name, p4_dir=P4_DIR):
    p4_file = p4_dir.joinpath(p4_name)
    target_dir = TARGET_DIR.joinpath(p4_file.stem)
    util.del_dir(target_dir)
    util.check_dir(target_dir)
    return p4_file, target_dir
Exemple #17
0
 def test_check_dir_mkdir(self, clear_temporary_dir):
     with pytest.raises(FileNotFoundError):
         _ = util.check_dir(dir=TMP_DIR, mkdir=False)
     dir_path = util.check_dir(dir=TMP_DIR, mkdir=True)
     assert dir_path.exists()
Exemple #18
0
from src import postprocess
from src import metric
from src import io
from src import util
from src import bf_grid
from src import metric
from src import dataGenerator
from src import model

import config

# physical_devices = tf.config.experimental.list_physical_devices('GPU')
# assert len(physical_devices) > 0, "Not enough GPU hardware devices available"
# _config = tf.config.experimental.set_memory_growth(physical_devices[0], True)

util.check_dir(config.path_logs)
util.set_logger(os.path.os.path.join(config.path_logs, 'testing.log'))

parser = argparse.ArgumentParser(
    description='See description below to see all available options')

parser.add_argument(
    '-sg',
    '--skipGridding',
    help='If skipping grididing while testing. [Default] False',
    type=bool,
    default=False,
    required=False)

parser.add_argument('-d',
                    '--data',
Exemple #19
0
import os
import config
from src import util

import numpy as np
import cv2
import os
from tqdm import tqdm
import logging

paramDict = config.modelParam
util.save_json(paramDict, config.pathParam)
params = util.Params(config.pathParam)

# Checking for logging folder and creating logging file
util.check_dir(config.pathLog)
util.set_logger(os.path.join(config.pathLog, 'generateAugmentation.log'))

logging.info(' Reading all the image file present in training data folder')
# Reading all the image file present in training data folder
pathTrainingData = []
for root, dirs, files in os.walk(config.pathTraining):
    for file in files:
        if file.endswith(tuple(config.imageFormat)):
            pathTrainingData.append(os.path.join(root, file))

# Checking for Image augmentations
xflip = params.xflip
yflip = params.yflip

if (xflip) and (yflip) is True: