def saveAnomaly(data, new, compute=True): """ save deviation to processeddir """ filename = generateFileName(data.name, dataset=data.dataset, processed='anom', suffix='nc') path = join(processeddir, filename) if exists(path) and not new: print(f"{data.name} anomaly already computed") else: print(f"Compute {data.name} anomaly") if compute: print(f"Compute and save {data.name} anomaly") anom = computeAnomaly(data) else: print(f"Save {data.name} anomaly") anom = data anom.name = ''.join([data.name, 'Anom']) anom.attrs = data.attrs.copy() anom.attrs['statistic'] = 'Substracted the monthly Mean.' anom.attrs = _delete_some_attributes(anom.attrs) anom.to_netcdf(path)
def computeMeanClimatology(data): """ Monthly means """ filename = generateFileName(data.name, dataset=data.dataset, processed='meanclim', suffix='nc') path = join(processeddir, filename) if not exists(path): print(f"- Compute {data.name} climatetology") period = _get_period(data) print(f"- Data has {period} period") if reference_period: meanclim = data.loc['1981-01-01':'2010-12-31']. \ groupby(f'time.{period}').mean(dim="time") else: print("Use the entire time series for the Mean Climatology") meanclim = data.groupby(f'time.{period}').mean(dim="time") meanclim.to_netcdf(path) else: print(f"- Read {data.name} climatetology") meanclim = xr.open_dataarray(path) return meanclim
def save(self): self.data = pd.DataFrame( {'global_transitivity': self.global_transitivity, 'avelocal_transmissivity': self.avglocal_transitivity, 'fraction_clusters_size_2': self.frac_cluster_size2, 'fraction_clusters_size_3': self.frac_cluster_size3, 'fraction_clusters_size_5': self.frac_cluster_size5, 'fraction_giant_component': self.frac_giant, 'average_path_length': self.avg_path_length, 'hamming_distance': self.hamming_distance, 'corrected_hamming_distance': self.corrected_hamming_distance, 'threshold': self.threshold_value, 'edge_density': self.edge_density_value }) filename = generateFileName(self.variable, self.dataset, processed=self.processed, suffix='csv') filename = '-'.join(['network_metrics', filename]) if self.threshold is not None: # TODO: dynamic naming depending on the methods used pass elif self.edge_density is not None: pass self.data.to_csv(join(processeddir, filename))
def save(self, extension='', filename=None): """ Saves the first three pca components to a csv-file. """ # save data to first day of month save_index = self.time.to_index() pca1 = pd.Series(np.matmul(self.EOFarr, self.components_[0, :]), index=save_index) pca2 = pd.Series(np.matmul(self.EOFarr, self.components_[1, :]), index=save_index) pca3 = pd.Series(np.matmul(self.EOFarr, self.components_[2, :]), index=save_index) self.df = pd.DataFrame({'pca1': pca1, 'pca2': pca2, 'pca3': pca3}) if filename is None: filename = generateFileName(self.variable, self.dataset, ''.join((self.processed, extension)), suffix='csv') else: filename = '.'.join((filename, 'csv')) filename = '-'.join(['pca', filename]) self.df.to_csv(join(processeddir, filename))
def read_statistic(self, statistic, variable, dataset='', processed=''): filename = generateFileName(variable, dataset, processed=processed, suffix="csv") filename = '-'.join([statistic, filename]) data = pd.read_csv(join(processeddir, filename), index_col=0, parse_dates=True) self._check_dates(data, f"{variable} - {statistic}" ) return data.loc[self.startdate:self.enddate]
def toProcessedDir(data, new): """ Save the basic data to the processeddir. """ filename = generateFileName(data.name, dataset=data.dataset, suffix='nc') path = join(processeddir, filename) if exists(path) and not new: print(f"{data.name} already saved in post directory") else: print(f"save {data.name} in post directory") data.to_netcdf(path)
def read_netcdf(self, variable, dataset='', processed='', chunks=None): """ wrapper for xarray.open_dataarray. :param variable: the name of the variable :param dataset: the name of the dataset :param processed: the postprocessing that was applied :param chunks: same as for xarray.open_dataarray """ filename = generateFileName(variable, dataset, processed=processed, suffix="nc") data = xr.open_dataarray(join(processeddir, filename), chunks=chunks) regrided = ['GODAS', 'ERSSTv5', 'ORAS4', 'NODC', 'NCAR'] if processed == 'meanclim': return data else: self._check_dates(data, f'{filename[:-3]}') if dataset not in regrided and dataset != 'ORAP5' and dataset != 'GFDL-CM3': return data.loc[self.startdate:self.enddate, self.lat_max:self.lat_min, self.lon_min:self.lon_max] elif dataset in regrided or dataset == 'GFDL-CM3': return data.loc[self.startdate:self.enddate, self.lat_min:self.lat_max, self.lon_min:self.lon_max] elif dataset == 'ORAP5': return data.loc[self.startdate:self.enddate, :, :].where( (data.nav_lat > self.lat_min) & (data.nav_lat < self.lat_max) & (data.nav_lon > self.lon_min) & (data.nav_lon < self.lon_max), drop=True)
def saveNormAnomaly(data, new): """ save deviation to processeddir """ filename = generateFileName(data.name, dataset=data.dataset, processed='normanom', suffix='nc') path = join(processeddir, filename) if exists(path) and not new: print(f"{data.name} normed anomaly already computed") else: print(f"Compute {data.name} normed anomaly") normanom = computeNormAnomaly(data) normanom.name = ''.join([data.name, 'NormAnom']) normanom.attrs = data.attrs.copy() normanom.attrs['statistic'] = 'Substracted the monthly Mean.\ Divided by the Monthly standard deviation' normanom.attrs = _delete_some_attributes(normanom.attrs) normanom.to_netcdf(path)