def read_swan(filename, dirorder=True, as_site=None): """Read Spectra from SWAN ASCII file. Args: - dirorder (bool): If True reorder spectra so that directions are sorted. - as_site (bool): If True locations are defined by 1D site dimension. Returns: - dset (SpecDataset): spectra dataset object read from file. """ swanfile = SwanSpecFile(filename, dirorder=dirorder) times = swanfile.times lons = swanfile.x lats = swanfile.y sites = [os.path.splitext(os.path.basename(filename))[0]] if len(lons)==1 else np.arange(len(lons))+1 freqs = swanfile.freqs dirs = swanfile.dirs tab = None if as_site: swanfile.is_grid = False spec_list = [s for s in swanfile.readall()] # Create fake time if no timestamp times = times or [datetime.datetime.now().replace(second=0, microsecond=0)] if swanfile.is_tab: try: tab = read_tab(swanfile.tabfile) if len(swanfile.times) == tab.index.size: if 'X-wsp' in tab and 'Y-wsp' in tab: tab[attrs.WSPDNAME], tab[attrs.WDIRNAME] = uv_to_spddir(tab['X-wsp'], tab['Y-wsp'], coming_from=True) else: warnings.warn( "Times in {} and {} not consistent, not appending winds and depth" .format(swanfile.filename, swanfile.tabfile) ) tab = None except Exception as exc: warnings.warn( "Cannot parse depth and winds from {}:\n{}".format(swanfile.tabfile, exc) ) if swanfile.is_grid: lons = sorted(np.unique(lons)) lats = sorted(np.unique(lats)) arr = np.array(spec_list).reshape(len(times), len(lons), len(lats), len(freqs), len(dirs)) dset = xr.DataArray( data=np.swapaxes(arr, 1, 2), coords=OrderedDict(((attrs.TIMENAME, times), (attrs.LATNAME, lats), (attrs.LONNAME, lons), (attrs.FREQNAME, freqs), (attrs.DIRNAME, dirs))), dims=(attrs.TIMENAME, attrs.LATNAME, attrs.LONNAME, attrs.FREQNAME, attrs.DIRNAME), name=attrs.SPECNAME, ).to_dataset() if tab is not None and attrs.WSPDNAME in tab: dset[attrs.WSPDNAME] = xr.DataArray(data=tab[attrs.WSPDNAME].values.reshape(-1,1,1), dims=[attrs.TIMENAME, attrs.LATNAME, attrs.LONNAME]) dset[attrs.WDIRNAME] = xr.DataArray(data=tab[attrs.WDIRNAME].values.reshape(-1,1,1), dims=[attrs.TIMENAME, attrs.LATNAME, attrs.LONNAME]) if tab is not None and 'dep' in tab: dset[attrs.DEPNAME] = xr.DataArray(data=tab['dep'].values.reshape(-1,1,1), dims=[attrs.TIMENAME, attrs.LATNAME, attrs.LONNAME]) else: arr = np.array(spec_list).reshape(len(times), len(sites), len(freqs), len(dirs)) dset = xr.DataArray( data=arr, coords=OrderedDict(((attrs.TIMENAME, times), (attrs.SITENAME, sites), (attrs.FREQNAME, freqs), (attrs.DIRNAME, dirs))), dims=(attrs.TIMENAME, attrs.SITENAME, attrs.FREQNAME, attrs.DIRNAME), name=attrs.SPECNAME, ).to_dataset() if tab is not None and attrs.WSPDNAME in tab: dset[attrs.WSPDNAME] = xr.DataArray(data=tab[attrs.WSPDNAME].values.reshape(-1,1), dims=[attrs.TIMENAME, attrs.SITENAME]) dset[attrs.WDIRNAME] = xr.DataArray(data=tab[attrs.WDIRNAME].values.reshape(-1,1), dims=[attrs.TIMENAME, attrs.SITENAME]) if tab is not None and 'dep' in tab: dset[attrs.DEPNAME] = xr.DataArray(data=tab['dep'].values.reshape(-1,1), dims=[attrs.TIMENAME, attrs.SITENAME]) dset[attrs.LATNAME] = xr.DataArray(data=lats, coords={attrs.SITENAME: sites}, dims=[attrs.SITENAME]) dset[attrs.LONNAME] = xr.DataArray(data=lons, coords={attrs.SITENAME: sites}, dims=[attrs.SITENAME]) set_spec_attributes(dset) if 'dir' in dset and len(dset.dir)>1: dset[attrs.SPECNAME].attrs.update({'_units': 'm^{2}.s.degree^{-1}', '_variable_name': 'VaDens'}) else: dset[attrs.SPECNAME].attrs.update({'units': 'm^{2}.s', '_units': 'm^{2}.s', '_variable_name': 'VaDens'}) return dset
def read_swans(fileglob, ndays=None, int_freq=True, int_dir=False, dirorder=True, ntimes=None): """Read multiple swan files into single Dataset. Args: - fileglob (str, list): glob pattern specifying files to read. - ndays (float): number of days to keep from each file, choose None to keep entire period. - int_freq (ndarray, bool): frequency array for interpolating onto: - ndarray: 1d array specifying frequencies to interpolate onto. - True: logarithm array is constructed such that fmin=0.0418 Hz, fmax=0.71856 Hz, df=0.1f. - False: No interpolation performed in frequency space. - int_dir (ndarray, bool): direction array for interpolating onto: - ndarray: 1d array specifying directions to interpolate onto. - True: circular array is constructed such that dd=10 degrees. - False: No interpolation performed in direction space. - dirorder (bool): if True ensures directions are sorted. - ntimes (int): use it to read only specific number of times, useful for checking headers only. Returns: - dset (SpecDataset): spectra dataset object read from file with different sites and cycles concatenated along the 'site' and 'time' dimensions. Note: - If multiple cycles are provided, 'time' coordinate is replaced by 'cycletime' multi-index coordinate. - If more than one cycle is prescribed from fileglob, each cycle must have same number of sites. - Either all or none of the spectra in fileglob must have tabfile associated to provide wind/depth data. - Concatenation is done with numpy arrays for efficiency. """ swans = sorted(fileglob) if isinstance(fileglob, list) else sorted(glob.glob(fileglob)) assert swans, 'No SWAN file identified with fileglob %s' % (fileglob) # Default spectral basis for interpolating if int_freq == True: int_freq = [0.04118 * 1.1**n for n in range(31)] elif int_freq == False: int_freq = None if int_dir == True: int_dir = np.arange(0, 360, 10) elif int_dir == False: int_dir = None cycles = list() dsets = SortedDict() tabs = SortedDict() all_times = list() all_sites = SortedDict() all_lons = SortedDict() all_lats = SortedDict() deps = SortedDict() wspds = SortedDict() wdirs = SortedDict() for filename in swans: swanfile = SwanSpecFile(filename, dirorder=dirorder) times = swanfile.times lons = list(swanfile.x) lats = list(swanfile.y) sites = [os.path.splitext(os.path.basename(filename))[0]] if len(lons)==1 else np.arange(len(lons))+1 freqs = swanfile.freqs dirs = swanfile.dirs if ntimes is None: spec_list = [s for s in swanfile.readall()] else: spec_list = [swanfile.read() for itime in range(ntimes)] # Read tab files for winds / depth if swanfile.is_tab: try: tab = read_tab(swanfile.tabfile).rename(columns={'dep': attrs.DEPNAME}) if len(swanfile.times) == tab.index.size: if 'X-wsp' in tab and 'Y-wsp' in tab: tab[attrs.WSPDNAME], tab[attrs.WDIRNAME] = uv_to_spddir(tab['X-wsp'], tab['Y-wsp'], coming_from=True) else: warnings.warn( "Times in {} and {} not consistent, not appending winds and depth" .format(swanfile.filename, swanfile.tabfile) ) tab = pd.DataFrame() tab = tab[list(set(tab.columns).intersection((attrs.DEPNAME, attrs.WSPDNAME, attrs.WDIRNAME)))] except Exception as exc: warnings.warn( "Cannot parse depth and winds from {}:\n{}".format(swanfile.tabfile, exc) ) else: tab = pd.DataFrame() # Shrinking times if ndays is not None: tend = times[0] + datetime.timedelta(days=ndays) if tend > times[-1]: raise IOError('Times in %s does not extend for %0.2f days' % (filename, ndays)) iend = times.index(min(times, key=lambda d: abs(d - tend))) times = times[0:iend+1] spec_list = spec_list[0:iend+1] tab = tab.loc[times[0]:tend] if tab is not None else tab spec_list = flatten_list(spec_list, []) # Interpolate spectra if int_freq is not None or int_dir is not None: spec_list = [interp_spec(spec, freqs, dirs, int_freq, int_dir) for spec in spec_list] freqs = int_freq if int_freq is not None else freqs dirs = int_dir if int_dir is not None else dirs # Appending try: arr = np.array(spec_list).reshape(len(times), len(sites), len(freqs), len(dirs)) cycle = times[0] if cycle not in dsets: dsets[cycle] = [arr] tabs[cycle] = [tab] all_sites[cycle] = sites all_lons[cycle] = lons all_lats[cycle] = lats all_times.append(times) nsites = 1 else: dsets[cycle].append(arr) tabs[cycle].append(tab) all_sites[cycle].extend(sites) all_lons[cycle].extend(lons) all_lats[cycle].extend(lats) nsites += 1 except: if len(spec_list) != arr.shape[0]: raise IOError('Time length in %s (%i) does not match previous files (%i), cannot concatenate', (filename, len(spec_list), arr.shape[0])) else: raise swanfile.close() cycles = dsets.keys() # Ensuring sites are consistent across cycles sites = all_sites[cycle] lons = all_lons[cycle] lats = all_lats[cycle] for site, lon, lat in zip(all_sites.values(), all_lons.values(), all_lats.values()): if (list(site) != list(sites)) or (list(lon) != list(lons)) or (list(lat) != list(lats)): raise IOError('Inconsistent sites across cycles in glob pattern provided') # Ensuring consistent tabs cols = set([frozenset(tabs[cycle][n].columns) for cycle in cycles for n in range(len(tabs[cycle]))]) if len(cols) > 1: raise IOError('Inconsistent tab files, ensure either all or none of the spectra have associated tabfiles and columns are consistent') # Concat sites for cycle in cycles: dsets[cycle] = np.concatenate(dsets[cycle], axis=1) deps[cycle] = np.vstack([tab[attrs.DEPNAME].values for tab in tabs[cycle]]).T if attrs.DEPNAME in tabs[cycle][0] else None wspds[cycle] = np.vstack([tab[attrs.WSPDNAME].values for tab in tabs[cycle]]).T if attrs.WSPDNAME in tabs[cycle][0] else None wdirs[cycle] = np.vstack([tab[attrs.WDIRNAME].values for tab in tabs[cycle]]).T if attrs.WDIRNAME in tabs[cycle][0] else None time_sizes = [dsets[cycle].shape[0] for cycle in cycles] # Concat cycles if len(dsets) > 1: dsets = np.concatenate(dsets.values(), axis=0) deps = np.concatenate(deps.values(), axis=0) if attrs.DEPNAME in tabs[cycle][0] else None wspds = np.concatenate(wspds.values(), axis=0) if attrs.WSPDNAME in tabs[cycle][0] else None wdirs = np.concatenate(wdirs.values(), axis=0) if attrs.WDIRNAME in tabs[cycle][0] else None else: dsets = dsets[cycle] deps = deps[cycle] if attrs.DEPNAME in tabs[cycle][0] else None wspds = wspds[cycle] if attrs.WSPDNAME in tabs[cycle][0] else None wdirs = wdirs[cycle] if attrs.WDIRNAME in tabs[cycle][0] else None # Creating dataset times = flatten_list(all_times, []) dsets = xr.DataArray( data=dsets, coords=OrderedDict(((attrs.TIMENAME, times), (attrs.SITENAME, sites), (attrs.FREQNAME, freqs), (attrs.DIRNAME, dirs))), dims=(attrs.TIMENAME, attrs.SITENAME, attrs.FREQNAME, attrs.DIRNAME), name=attrs.SPECNAME, ).to_dataset() dsets[attrs.LATNAME] = xr.DataArray(data=lats, coords={attrs.SITENAME: sites}, dims=[attrs.SITENAME]) dsets[attrs.LONNAME] = xr.DataArray(data=lons, coords={attrs.SITENAME: sites}, dims=[attrs.SITENAME]) if wspds is not None: dsets[attrs.WSPDNAME] = xr.DataArray(data=wspds, dims=[attrs.TIMENAME, attrs.SITENAME], coords=OrderedDict(((attrs.TIMENAME, times), (attrs.SITENAME, sites)))) dsets[attrs.WDIRNAME] = xr.DataArray(data=wdirs, dims=[attrs.TIMENAME, attrs.SITENAME], coords=OrderedDict(((attrs.TIMENAME, times), (attrs.SITENAME, sites)))) if deps is not None: dsets[attrs.DEPNAME] = xr.DataArray(data=deps, dims=[attrs.TIMENAME, attrs.SITENAME], coords=OrderedDict(((attrs.TIMENAME, times), (attrs.SITENAME, sites)))) # Setting multi-index if len(cycles) > 1: dsets = dsets.rename({attrs.TIMENAME: 'cycletime'}) cycletime = zip( [item for sublist in [[c]*t for c,t in zip(cycles, time_sizes)] for item in sublist], dsets.cycletime.values ) dsets['cycletime'] = pd.MultiIndex.from_tuples(cycletime, names=[attrs.CYCLENAME, attrs.TIMENAME]) dsets['cycletime'].attrs = attrs.ATTRS[attrs.TIMENAME] set_spec_attributes(dsets) if 'dir' in dsets and len(dsets.dir)>1: dsets[attrs.SPECNAME].attrs.update({'_units': 'm^{2}.s.degree^{-1}', '_variable_name': 'VaDens'}) else: dsets[attrs.SPECNAME].attrs.update({'units': 'm^{2}.s', '_units': 'm^{2}.s', '_variable_name': 'VaDens'}) return dsets