Ejemplo n.º 1
0
def main_func():
    # Select input folder
    fldr_in = utils.get_folder_path('Select input folder')
    if not fldr_in:
        raise Exception('Input folder selection aborted')
    fldr_in += r'*.nc'

    # Select output file
    file_out = utils.get_save_path('Select output file')
    if not file_out:
        raise Exception('Output file selection aborted')

    # Set xarray to keep attributes for DataArrays and Datasets
    xr.set_options(keep_attrs=True)

    # This concatenates the files into a Dataset
    ds = xr.open_mfdataset(fldr_in, engine='netcdf4', mask_and_scale=False)

    # Convert calendar to standard one
    utils.convert_calendar(ds)

    # Add to file history
    utils.add_to_history(ds=ds,
                         txt='Drozdowski concatenation of multiple files',
                         prepend=True)
    utils.add_to_history(ds=ds,
                         txt='Drozdowski: set calendar to standard',
                         prepend=True)

    # Get default encodings for use with Dataset::to_netcdf() method
    encodings = utils.get_to_netcdf_encodings(ds=ds, comp_level=4)

    # Save Dataset to file with encodings
    ds.to_netcdf(path=file_out, engine='netcdf4', encoding=encodings)

    # No need to close files!

    print('Done!!!')
Ejemplo n.º 2
0
import os
import time
from utils import derypt_file
from utils import list_files_in_path
from utils import get_password
from utils import get_folder_path
from utils import delete_folder
from utils import get_file_sha256sum
from utils import get_hash_sum_path

if __name__ == "__main__":
    local_path, data_path = get_folder_path()
    hash_sum_path = get_hash_sum_path()
    failed_file = []
    for data_file in list_files_in_path(data_path):
        target_file = os.path.join(local_path, data_file.lstrip(data_path))
        hash_file = os.path.join(hash_sum_path, data_file.lstrip(data_path))
        folder = os.path.dirname(target_file)
        hash_folder = os.path.dirname(hash_file)
        os.makedirs(folder, exist_ok=True)
        os.makedirs(hash_folder, exist_ok=True)
        try:
            start_time = time.time()
            ret = derypt_file(data_file, target_file, get_password())
            end_time = time.time()
            if ret.exit == 0:
                print("Decrypt %s ok.... %ss" %
                      (data_file, end_time - start_time))
                # os.remove(data_file)
                sha256sum = get_file_sha256sum(target_file)
                with open(hash_file, "w") as f:
Ejemplo n.º 3
0
parser.add_argument("--gpu_idx", type=str, default='0', help="")
parser.add_argument("--epochs", type=int, default=1000, help="")
parser.add_argument("--opt", type=str, default='adam', help="")
parser.add_argument("--loss", type=str, default='mse', help="")
parser.add_argument("--batch_size", type=int, default=4, help="")
parser.add_argument("--lr", type=float, default=1e-5, help="")
parser.add_argument("--weight_decay", type=float, default=10e-3, help="")
parser.add_argument("--early_stopping", type=int, default=40, help="")

# Recommender params
parser.add_argument("--num_recs", type=int, default=10, help="")

args = parser.parse_args()

# Setup data and weights file path
data_folder, weights_folder, logger_folder = get_folder_path(args.dataset +
                                                             args.dataset_name)

# Setup device
if not torch.cuda.is_available() or args.device == 'cpu':
    device = 'cpu'
else:
    device = 'cuda:{}'.format(args.gpu_idx)

# Setup args
dataset_args = {
    'root': data_folder,
    'dataset': args.dataset,
    'name': args.dataset_name,
    'num_core': args.num_core,
    'num_feat_core': args.num_feat_core,
    'seed': args.seed,
Ejemplo n.º 4
0
 def __init__(self,
              test_groups,
              data=None,
              groups=None,
              feature=None,
              data_source=None,
              data_query_path=None,
              time_period=None,
              time_indicator=None,
              time_schedule=None,
              exporting_data=True,
              export_path=None,
              connector=None,
              confidence_level=None,
              boostrap_sample_ratio=None,
              boostrap_iteration=None):
     self.test_groups = test_groups
     self.data = data
     self.groups = groups
     self.feature = feature
     self.data_source = data_source
     self.data_query_path = data_query_path
     self.time_period = time_period
     self.time_indicator = time_indicator
     self.time_schedule = time_schedule
     self.exporting_data = False if export_path is None else exporting_data
     self.export_path = export_path
     self.connector = connector
     self.confidence_level = confidence_level
     self.boostrap_sample_ratio = boostrap_sample_ratio
     self.boostrap_iteration = boostrap_iteration
     self.arguments = {
         "data": data,
         "test_groups": test_groups,
         "groups": groups,
         "feature": feature,
         "data_source": data_source,
         "data_query_path": data_query_path,
         "time_period": time_period,
         "time_indicator": time_indicator,
         "export_path": export_path,
         "exporting_data": exporting_data,
         "parameters": None
     }
     self.arg_terminal = {
         "test_groups": "TG",
         "groups": "G",
         "date": "D",
         "feature": "F",
         "data_source": "DS",
         "data_query_path": "DQP",
         "time_period": "TP",
         "time_indicator": "TI",
         "export_path": "EP",
         "parameters": "P"
     }
     self.args_str = ""
     self.ab_test = None
     self.path = get_folder_path()
     self.mandetory_arguments = [
         "data_source", "data_query_path", "test_groups", "groups",
         "feature", "export_path"
     ]
     self.schedule_arg = "TS"
     self.params = None
Ejemplo n.º 5
0
def main_func():
    # Select input folder
    fldr_in = utils.get_folder_path('Select input folder')
    if not fldr_in:
        raise Exception('Input folder selection aborted')
    fldr_in += r'*.nc'

    # Select output folder
    fldr_out = utils.get_folder_path('Select output folder')
    if not fldr_out:
        raise Exception('Output folder selection aborted')

    # Set xarray to keep attributes for DataArrays and Datasets
    xr.set_options(keep_attrs=True)

    # Iterate all *.NC files in input folder
    files = glob.glob(fldr_in)
    for path in files:
        # Get only file name, no folder
        file = os.path.basename(path)

        # Extract date from file name into a Pandas DateTime
        # Skip with printout if not parsable date component
        pdt = dt_from_filename(file)
        if pdt is None:
            print(
                f'File: {file} is missing date component in file name. Skipped'
            )
            continue

        # Open full path file into a Dataset
        ds = xr.open_dataset(path, engine='netcdf4', mask_and_scale=False)

        # Add a time dimension/variable based on extracted DateTime
        # and convert DataArray to Dataset
        ds = ds.expand_dims({'time': [pdt]})

        # Add some attributes to time variable
        ds.time.attrs['long_name'] = 'time'
        ds.time.attrs['standard_name'] = 'time'
        ds.time.attrs['axis'] = 'T'
        ds.time.attrs['Descript'] = 'Time'

        # About time units:
        # You'll see the time units will be `days since ...`
        # This is correct because CF Conventions have special meanings
        # for `months since...` and `years since ...`
        # Time units are automatically generated

        # Convert calendar to standard one
        utils.convert_calendar(ds)

        # Add to file history
        utils.add_to_history(
            ds=ds,
            txt='Drozdowski: add time dimension based on file name',
            prepend=True)
        utils.add_to_history(ds=ds,
                             txt='Drozdowski: set calendar to standard',
                             prepend=True)

        # Get defult encodings for use with Dataset::to_netcdf() method
        encodings = utils.get_to_netcdf_encodings(ds=ds, comp_level=4)

        # Assign file name same as current input file but in output folder.
        file_out = fldr_out + file

        # Save Dataset to file with encodings
        ds.to_netcdf(path=file_out, engine='netcdf4', encoding=encodings)

        # Close Dataset file
        ds.close()

    print('Done!!!')
def do_generate_indices():
    # Select folder containing all *.NC files to be included in calculations
    fldr_in = utils.get_folder_path('Select input folder')
    if not fldr_in:
        raise Exception('Folder selection aborted')
    fldr_in += r'*.nc'

    # Select file to save as- can't overwrite input file!
    # Don't save to input folder!
    file_out = utils.get_save_path('Select output file')
    if not file_out:
        raise Exception('Save file selection aborted')


    # Set xarray to keep attributes for DataArrays and Datasets
    # Causes problems with unit conversions, don't use
    #xr.set_options(keep_attrs=True)


    # Open and concatencate all *.NC files in folder into a Dataset to be
    # used in calculations
    ds = xr.open_mfdataset(fldr_in, engine='netcdf4')

    ds = ds.resample(time='D').mean(keep_attrs=True)

    # Ouput info about Dataset so you can see the available variables
    #print(ds)

    # You could calculate based on a selection of data based on lat and/or
    # lon and/or time as long as the ranges are in the *.NC files. If you
    # do this, you need to replace references to `ds` with `ds_sel` in
    # the calculations.
    #ds_sel = ds.sel(lat=slice(45, 50), lon=slice(150, 175)) #, time=slice('2090', '2100'))

    # Define an output Dataset with attributes from input Dataset
    ds_out = xr.Dataset(attrs=ds.attrs)

    # Calculations of `Indicators`, which are xclim wrappers around indice functions
    # providing additional functionality over the underlying indice function.
    # List of available Indicators:
    # https://xclim.readthedocs.io/en/stable/indicators.html


    # The following calculations assume the `pr`, `tasmax`, and `tasmin` variables exist
    
    # Precipitation indicators based on `pr` variable
    # R1mm; Number of days per year when precipitation ≥ 1 mm
    da = xc.atmos.wetdays(ds.pr, thresh='1 mm/day', freq='YS')
    ds_out[da.name] = da
    # CDD; Maximum number of consecutive days with daily precipitation < 1 mm
    da = xc.atmos.maximum_consecutive_dry_days(ds.pr, thresh='1 mm/day', freq='YS')
    ds_out[da.name] = da
    # CWD; Maximum number of consecutive days with daily precipitation ≥ 1 mm
    da = xc.atmos.maximum_consecutive_wet_days(ds.pr, thresh='1 mm/day', freq='YS')
    ds_out[da.name] = da
    # PRCPTOT; Annual total precipitation in wet days (daily precipitation ≥ 1 mm)
    da = xc.atmos.precip_accumulation(ds.pr, freq='YS')
    ds_out[da.name] = da
    # SDII; Annual total precipitation divided by the number of wet days
    da = xc.atmos.daily_pr_intensity(ds.pr, thresh='1 mm/day', freq='YS')
    ds_out[da.name] = da
    # RX1day; Annual maximum 1-day precipitation
    da = xc.atmos.max_1day_precipitation_amount(ds.pr, freq='YS')
    ds_out[da.name] = da
    # RX5day; Annual maximum 5-day precipitation
    da = xc.atmos.max_n_day_precipitation_amount(ds.pr, window=5, freq='YS')
    ds_out[da.name] = da
    # TXx Annual maximum daily maximum temperature
    da = xc.atmos.tx_max(ds.tasmax, freq='YS')
    ds_out[da.name] = da
    # TNx Annual maximum daily minimum temperature
    da = xc.atmos.tn_max(ds.tasmin, freq='YS')
    ds_out[da.name] = da
    # TXn Annual minimum daily maximum temperature
    da = xc.atmos.tx_min(ds.tasmax, freq='YS')
    ds_out[da.name] = da
    # TNn Annual minimum daily minimum temperature
    da = xc.atmos.tn_min(ds.tasmin, freq='YS')
    ds_out[da.name] = da
    #FD Number of days per year when daily minimum temperature < 0°C
    da = xc.atmos.frost_days(ds.tasmin, freq='YS')
    ds_out[da.name] = da
    #ID Number of days per year when daily maximum temperature < 0°C
    da = xc.atmos.ice_days(ds.tasmax, freq='YS')
    ds_out[da.name] = da
    # SU Number of days per year when daily maximum temperature > 25°C
    da = xc.atmos.tx_days_above(ds.tasmax, thresh='25 degC', freq='YS')
    ds_out[da.name] = da
    # TR Number of days per year when daily minimum temperature > 20°C
    da = xc.atmos.tropical_nights(ds.tasmin, thresh='20 degC', freq='YS')
    ds_out[da.name] = da


    # Local DataArrays (not part of Dataset) to be used below
    tas = xc.indices.tas(ds.tasmin, ds.tasmax)
    t10 = xc.core.calendar.percentile_doy(tas, per=10)
    t90 = xc.core.calendar.percentile_doy(tas, per=90)
    tn10 = xc.core.calendar.percentile_doy(ds.tasmin, per=10)

 
    # TX10p Percentage of days with daily maximum temperature < 10th percentile of the base period
    da = xc.atmos.tx10p(ds.tasmax, t10, freq='YS')
    ds_out[da.name] = da

    # TX90p Percentage of days with daily maximum temperature > 90th percentile of the base period
    da = xc.atmos.tx90p(ds.tasmax, t90, freq='YS')
    ds_out[da.name] = da

    # TN10p Percentage of nights with daily minimum temperature < 10th percentile of the base period
    da = xc.atmos.tn10p(ds.tasmin, t10, freq='YS')
    ds_out[da.name] = da

    # TN90p Percentage of nights with daily minimum temperature > 90th percentile of the base period
    da = xc.atmos.tn90p(ds.tasmin, t90, freq='YS')
    ds_out[da.name] = da

    # WSDI Number of days per year with at least 6 consecutive days when daily maximum temperature > 90th percentile of the base period

    # CSDI Number of days per year with at least 6 consecutive days when daily minimum temperature < 10th percentile of the base period
    da = xc.atmos.cold_spell_duration_index(ds.tasmin, tn10, window= 1, freq='YS')
    ds_out[da.name] = da

    # Done with calculations

    # Get default encodings for use with Dataset::to_netcdf() method
    encodings = utils.get_to_netcdf_encodings(ds=ds_out, comp_level=4)

    # Save Dataset to file with encodings
    ds_out.to_netcdf(path=file_out, engine='netcdf4', encoding=encodings)
Ejemplo n.º 7
0
 def path(self):
     return get_folder_path(self.raw_url)