def copy_log_from_where(rundate, pipeline, args): kwargs = dict() for a in args.split(): if "=" in a: a = a.split("=", maxsplit=1) kwargs[a[0].lstrip("-")] = a[1] file_vars = dict( **config.program_vars(rundate, pipeline, use_options=False, **kwargs), **config.date_vars(rundate)) log_level = config.where.runner.log_level.str current_level = "none" try: with config.files.open("log", file_vars=file_vars) as fid: for line in fid: line_level, _, text = line.partition(" ") line_level = line_level.strip().lower() current_level = line_level if line_level else current_level text = text.strip() if getattr(LogLevel, current_level) >= getattr( LogLevel, log_level) and text: # strip the 20 date characters from the text log.log(text[20:], current_level) except FileNotFoundError as err: log.warn(f"'{err}'")
def list_datasets(rundate, tech, session, stage, **kwargs): """List datasets in a given dataset file Args: rundate: Datetime, the model run date. tech: String, the technique. stage: String, the stage. kwargs: Other arguments are passed to files.open. Returns: List of strings describing the datasets. """ file_vars = dict( config.program_vars(rundate, tech, session=session, stage=stage, **kwargs), **config.date_vars(rundate)) try: with files.open("dataset_json", file_vars=file_vars) as fid: json_data = json.load(fid) except FileNotFoundError: return list() log.fatal( f"No data found for {tech.upper()} {stage} {rundate.strftime(config.FMT_date)}" ) return sorted(k for k in json_data.keys() if not k.startswith("_") and "/" in k)
def parse_dataset_id(rundate, tech, stage, dataset_name, dataset_id, **kwargs): """Allow for some advanced handling of dataset_id In addition to using regular numbers as dataset_id, some text keywords can be used: + 'last': Use the last dataset_id written to file, default 0 if no file is previously written. + 'all': Return a list of all dataset_ids in the file. """ if isinstance(dataset_id, (float, int)): return dataset_id # Use the JSON-file to find information about the dataset ids file_vars = dict( config.program_vars(rundate, tech, session=dataset_name, stage=stage, **kwargs), **config.date_vars(rundate)) try: with files.open("dataset_json", file_vars=file_vars) as fid: json_data = json.load(fid) except FileNotFoundError: json_data = dict() if dataset_id == "last": # If _last_dataset_id is not given, use dataset_id=0 as default return json_data.get(dataset_name, dict()).get("_last_dataset_id", 0) if dataset_id == "all": return [ int(k.split("/")[-1]) for k in json_data.keys() if k.startswith("{}/".format(dataset_name)) ]
def __init__(self, rundate, tech, stage, dataset_name, dataset_id, empty=False, **kwargs): """Create a new Dataset or read an existing one Note: Be aware that the implementation is dependent on ``self._fields`` being the first attribute to be set. See :func:`__setattr__` for more information. Args: rundate: Date, the model run date. tech: String, the technique. stage: String, the stage. dataset_name: String, the name of the dataset. dataset_id: Int, id of the dataset. empty: Boolean, if False (default) will read dataset from disk if available. """ self._fields = dict() self._data = dict() self._num_obs = 0 self._default_field_suffix = None self._kwargs = kwargs self._kwargs.setdefault("session", dataset_name) # TODO: Can this be removed? self.vars = dict( config.program_vars(**dict( kwargs, rundate=rundate, tech_name=tech, stage=stage, dataset_name=dataset_name, dataset_id=str(dataset_id), ))) self.vars.update(**kwargs) self.rundate = rundate dataset_id = _data.parse_dataset_id(rundate, tech, stage, dataset_name, dataset_id) self.name = "{name}/{id:04d}".format(name=dataset_name, id=dataset_id) self.meta = dict() # Try to read dataset from disk unless explicitly told to create an empty dataset if not empty: try: self.read() except FileNotFoundError: pass
def copy_log_from_where(rundate, pipeline, session): file_vars = dict(**config.program_vars(rundate, pipeline, session), **config.date_vars(rundate)) log_level = config.where.runner.log_level.str current_level = "none" try: with files.open("log", file_vars=file_vars) as fid: for line in fid: line_level, _, text = line.partition(" ") line_level = line_level.strip().lower() current_level = line_level if line_level else current_level text = text.strip() if getattr(LogLevel, current_level) >= getattr( LogLevel, log_level) and text: log.log(text, current_level) except FileNotFoundError as err: log.warn(f"'{err}'")
def _concatenate_datasets(from_date: date, to_date: date, dset_vars: Dict[str, str], only_for_rundate: bool) -> np.ndarray: """Concatenate datasets Args: from_date: Start date for reading Dataset. to_date: End date for reading Dataset. dset_vars: Common Dataset variables. only_for_rundate: Concatenate only data for given rundate. """ merged_vars = config.program_vars(rundate=from_date, tech_name=dset_vars["tech"], **dset_vars) merged_vars["id"] += "_concatenated" dset_merged = data.Dataset( **dict(merged_vars, rundate=from_date, empty=True)) date_to_read = from_date while date_to_read <= to_date: dset = data.Dataset(rundate=date_to_read, **dset_vars) current_date = date_to_read date_to_read += timedelta(days=1) if dset.num_obs == 0: log.info(f"No data to read for {current_date}") continue if only_for_rundate: _keep_data_only_for_rundate(dset) if dset.num_obs == 0: log.info(f"No data to read for {current_date}") continue log.info(f"Reading data for {current_date}") if not dset_merged: dset_merged.copy_from(dset) else: dset_merged.extend(dset) return dset_merged
def concatenate_datasets(from_date, to_date, dset_vars): merged_vars = config.program_vars(rundate=from_date, tech_name=dset_vars["tech"], **dset_vars) merged_vars["id"] += "_concatenated" dset_merged = data.Dataset(**dict(merged_vars, rundate=from_date, empty=True)) date_to_read = from_date while date_to_read <= to_date: dset = data.Dataset(rundate=date_to_read, **dset_vars) current_date = date_to_read date_to_read += timedelta(days=1) if dset.num_obs == 0: log.info(f"No data to read for {current_date}") continue log.info(f"Reading data for {current_date}") if not dset_merged: dset_merged.copy_from(dset) else: dset_merged.extend(dset) return dset_merged