def main_func(): # Select input folder fldr_in = utils.get_folder_path('Select input folder') if not fldr_in: raise Exception('Input folder selection aborted') fldr_in += r'*.nc' # Select output file file_out = utils.get_save_path('Select output file') if not file_out: raise Exception('Output file selection aborted') # Set xarray to keep attributes for DataArrays and Datasets xr.set_options(keep_attrs=True) # This concatenates the files into a Dataset ds = xr.open_mfdataset(fldr_in, engine='netcdf4', mask_and_scale=False) # Convert calendar to standard one utils.convert_calendar(ds) # Add to file history utils.add_to_history(ds=ds, txt='Drozdowski concatenation of multiple files', prepend=True) utils.add_to_history(ds=ds, txt='Drozdowski: set calendar to standard', prepend=True) # Get default encodings for use with Dataset::to_netcdf() method encodings = utils.get_to_netcdf_encodings(ds=ds, comp_level=4) # Save Dataset to file with encodings ds.to_netcdf(path=file_out, engine='netcdf4', encoding=encodings) # No need to close files! print('Done!!!')
import os import time from utils import derypt_file from utils import list_files_in_path from utils import get_password from utils import get_folder_path from utils import delete_folder from utils import get_file_sha256sum from utils import get_hash_sum_path if __name__ == "__main__": local_path, data_path = get_folder_path() hash_sum_path = get_hash_sum_path() failed_file = [] for data_file in list_files_in_path(data_path): target_file = os.path.join(local_path, data_file.lstrip(data_path)) hash_file = os.path.join(hash_sum_path, data_file.lstrip(data_path)) folder = os.path.dirname(target_file) hash_folder = os.path.dirname(hash_file) os.makedirs(folder, exist_ok=True) os.makedirs(hash_folder, exist_ok=True) try: start_time = time.time() ret = derypt_file(data_file, target_file, get_password()) end_time = time.time() if ret.exit == 0: print("Decrypt %s ok.... %ss" % (data_file, end_time - start_time)) # os.remove(data_file) sha256sum = get_file_sha256sum(target_file) with open(hash_file, "w") as f:
parser.add_argument("--gpu_idx", type=str, default='0', help="") parser.add_argument("--epochs", type=int, default=1000, help="") parser.add_argument("--opt", type=str, default='adam', help="") parser.add_argument("--loss", type=str, default='mse', help="") parser.add_argument("--batch_size", type=int, default=4, help="") parser.add_argument("--lr", type=float, default=1e-5, help="") parser.add_argument("--weight_decay", type=float, default=10e-3, help="") parser.add_argument("--early_stopping", type=int, default=40, help="") # Recommender params parser.add_argument("--num_recs", type=int, default=10, help="") args = parser.parse_args() # Setup data and weights file path data_folder, weights_folder, logger_folder = get_folder_path(args.dataset + args.dataset_name) # Setup device if not torch.cuda.is_available() or args.device == 'cpu': device = 'cpu' else: device = 'cuda:{}'.format(args.gpu_idx) # Setup args dataset_args = { 'root': data_folder, 'dataset': args.dataset, 'name': args.dataset_name, 'num_core': args.num_core, 'num_feat_core': args.num_feat_core, 'seed': args.seed,
def __init__(self, test_groups, data=None, groups=None, feature=None, data_source=None, data_query_path=None, time_period=None, time_indicator=None, time_schedule=None, exporting_data=True, export_path=None, connector=None, confidence_level=None, boostrap_sample_ratio=None, boostrap_iteration=None): self.test_groups = test_groups self.data = data self.groups = groups self.feature = feature self.data_source = data_source self.data_query_path = data_query_path self.time_period = time_period self.time_indicator = time_indicator self.time_schedule = time_schedule self.exporting_data = False if export_path is None else exporting_data self.export_path = export_path self.connector = connector self.confidence_level = confidence_level self.boostrap_sample_ratio = boostrap_sample_ratio self.boostrap_iteration = boostrap_iteration self.arguments = { "data": data, "test_groups": test_groups, "groups": groups, "feature": feature, "data_source": data_source, "data_query_path": data_query_path, "time_period": time_period, "time_indicator": time_indicator, "export_path": export_path, "exporting_data": exporting_data, "parameters": None } self.arg_terminal = { "test_groups": "TG", "groups": "G", "date": "D", "feature": "F", "data_source": "DS", "data_query_path": "DQP", "time_period": "TP", "time_indicator": "TI", "export_path": "EP", "parameters": "P" } self.args_str = "" self.ab_test = None self.path = get_folder_path() self.mandetory_arguments = [ "data_source", "data_query_path", "test_groups", "groups", "feature", "export_path" ] self.schedule_arg = "TS" self.params = None
def main_func(): # Select input folder fldr_in = utils.get_folder_path('Select input folder') if not fldr_in: raise Exception('Input folder selection aborted') fldr_in += r'*.nc' # Select output folder fldr_out = utils.get_folder_path('Select output folder') if not fldr_out: raise Exception('Output folder selection aborted') # Set xarray to keep attributes for DataArrays and Datasets xr.set_options(keep_attrs=True) # Iterate all *.NC files in input folder files = glob.glob(fldr_in) for path in files: # Get only file name, no folder file = os.path.basename(path) # Extract date from file name into a Pandas DateTime # Skip with printout if not parsable date component pdt = dt_from_filename(file) if pdt is None: print( f'File: {file} is missing date component in file name. Skipped' ) continue # Open full path file into a Dataset ds = xr.open_dataset(path, engine='netcdf4', mask_and_scale=False) # Add a time dimension/variable based on extracted DateTime # and convert DataArray to Dataset ds = ds.expand_dims({'time': [pdt]}) # Add some attributes to time variable ds.time.attrs['long_name'] = 'time' ds.time.attrs['standard_name'] = 'time' ds.time.attrs['axis'] = 'T' ds.time.attrs['Descript'] = 'Time' # About time units: # You'll see the time units will be `days since ...` # This is correct because CF Conventions have special meanings # for `months since...` and `years since ...` # Time units are automatically generated # Convert calendar to standard one utils.convert_calendar(ds) # Add to file history utils.add_to_history( ds=ds, txt='Drozdowski: add time dimension based on file name', prepend=True) utils.add_to_history(ds=ds, txt='Drozdowski: set calendar to standard', prepend=True) # Get defult encodings for use with Dataset::to_netcdf() method encodings = utils.get_to_netcdf_encodings(ds=ds, comp_level=4) # Assign file name same as current input file but in output folder. file_out = fldr_out + file # Save Dataset to file with encodings ds.to_netcdf(path=file_out, engine='netcdf4', encoding=encodings) # Close Dataset file ds.close() print('Done!!!')
def do_generate_indices(): # Select folder containing all *.NC files to be included in calculations fldr_in = utils.get_folder_path('Select input folder') if not fldr_in: raise Exception('Folder selection aborted') fldr_in += r'*.nc' # Select file to save as- can't overwrite input file! # Don't save to input folder! file_out = utils.get_save_path('Select output file') if not file_out: raise Exception('Save file selection aborted') # Set xarray to keep attributes for DataArrays and Datasets # Causes problems with unit conversions, don't use #xr.set_options(keep_attrs=True) # Open and concatencate all *.NC files in folder into a Dataset to be # used in calculations ds = xr.open_mfdataset(fldr_in, engine='netcdf4') ds = ds.resample(time='D').mean(keep_attrs=True) # Ouput info about Dataset so you can see the available variables #print(ds) # You could calculate based on a selection of data based on lat and/or # lon and/or time as long as the ranges are in the *.NC files. If you # do this, you need to replace references to `ds` with `ds_sel` in # the calculations. #ds_sel = ds.sel(lat=slice(45, 50), lon=slice(150, 175)) #, time=slice('2090', '2100')) # Define an output Dataset with attributes from input Dataset ds_out = xr.Dataset(attrs=ds.attrs) # Calculations of `Indicators`, which are xclim wrappers around indice functions # providing additional functionality over the underlying indice function. # List of available Indicators: # https://xclim.readthedocs.io/en/stable/indicators.html # The following calculations assume the `pr`, `tasmax`, and `tasmin` variables exist # Precipitation indicators based on `pr` variable # R1mm; Number of days per year when precipitation ≥ 1 mm da = xc.atmos.wetdays(ds.pr, thresh='1 mm/day', freq='YS') ds_out[da.name] = da # CDD; Maximum number of consecutive days with daily precipitation < 1 mm da = xc.atmos.maximum_consecutive_dry_days(ds.pr, thresh='1 mm/day', freq='YS') ds_out[da.name] = da # CWD; Maximum number of consecutive days with daily precipitation ≥ 1 mm da = xc.atmos.maximum_consecutive_wet_days(ds.pr, thresh='1 mm/day', freq='YS') ds_out[da.name] = da # PRCPTOT; Annual total precipitation in wet days (daily precipitation ≥ 1 mm) da = xc.atmos.precip_accumulation(ds.pr, freq='YS') ds_out[da.name] = da # SDII; Annual total precipitation divided by the number of wet days da = xc.atmos.daily_pr_intensity(ds.pr, thresh='1 mm/day', freq='YS') ds_out[da.name] = da # RX1day; Annual maximum 1-day precipitation da = xc.atmos.max_1day_precipitation_amount(ds.pr, freq='YS') ds_out[da.name] = da # RX5day; Annual maximum 5-day precipitation da = xc.atmos.max_n_day_precipitation_amount(ds.pr, window=5, freq='YS') ds_out[da.name] = da # TXx Annual maximum daily maximum temperature da = xc.atmos.tx_max(ds.tasmax, freq='YS') ds_out[da.name] = da # TNx Annual maximum daily minimum temperature da = xc.atmos.tn_max(ds.tasmin, freq='YS') ds_out[da.name] = da # TXn Annual minimum daily maximum temperature da = xc.atmos.tx_min(ds.tasmax, freq='YS') ds_out[da.name] = da # TNn Annual minimum daily minimum temperature da = xc.atmos.tn_min(ds.tasmin, freq='YS') ds_out[da.name] = da #FD Number of days per year when daily minimum temperature < 0°C da = xc.atmos.frost_days(ds.tasmin, freq='YS') ds_out[da.name] = da #ID Number of days per year when daily maximum temperature < 0°C da = xc.atmos.ice_days(ds.tasmax, freq='YS') ds_out[da.name] = da # SU Number of days per year when daily maximum temperature > 25°C da = xc.atmos.tx_days_above(ds.tasmax, thresh='25 degC', freq='YS') ds_out[da.name] = da # TR Number of days per year when daily minimum temperature > 20°C da = xc.atmos.tropical_nights(ds.tasmin, thresh='20 degC', freq='YS') ds_out[da.name] = da # Local DataArrays (not part of Dataset) to be used below tas = xc.indices.tas(ds.tasmin, ds.tasmax) t10 = xc.core.calendar.percentile_doy(tas, per=10) t90 = xc.core.calendar.percentile_doy(tas, per=90) tn10 = xc.core.calendar.percentile_doy(ds.tasmin, per=10) # TX10p Percentage of days with daily maximum temperature < 10th percentile of the base period da = xc.atmos.tx10p(ds.tasmax, t10, freq='YS') ds_out[da.name] = da # TX90p Percentage of days with daily maximum temperature > 90th percentile of the base period da = xc.atmos.tx90p(ds.tasmax, t90, freq='YS') ds_out[da.name] = da # TN10p Percentage of nights with daily minimum temperature < 10th percentile of the base period da = xc.atmos.tn10p(ds.tasmin, t10, freq='YS') ds_out[da.name] = da # TN90p Percentage of nights with daily minimum temperature > 90th percentile of the base period da = xc.atmos.tn90p(ds.tasmin, t90, freq='YS') ds_out[da.name] = da # WSDI Number of days per year with at least 6 consecutive days when daily maximum temperature > 90th percentile of the base period # CSDI Number of days per year with at least 6 consecutive days when daily minimum temperature < 10th percentile of the base period da = xc.atmos.cold_spell_duration_index(ds.tasmin, tn10, window= 1, freq='YS') ds_out[da.name] = da # Done with calculations # Get default encodings for use with Dataset::to_netcdf() method encodings = utils.get_to_netcdf_encodings(ds=ds_out, comp_level=4) # Save Dataset to file with encodings ds_out.to_netcdf(path=file_out, engine='netcdf4', encoding=encodings)
def path(self): return get_folder_path(self.raw_url)