def search_data_files(self, **kwargs): dt_fr = self.dt_fr dt_to = self.dt_to diff_days = dttool.get_diff_days(dt_fr, dt_to) dt0 = dttool.get_start_of_the_day(dt_fr) for i in range(diff_days + 1): thisday = dt0 + datetime.timedelta(days=i) initial_file_dir = kwargs.pop('initial_file_dir', None) if initial_file_dir is None: initial_file_dir = self.data_root_dir / thisday.strftime("%Y") / thisday.strftime('%Y%m%d') file_patterns = [self.data_file_type.replace('-', '_'), thisday.strftime("%Y%m%d"), self.data_file_version] # remove empty str file_patterns = [pattern for pattern in file_patterns if str(pattern)] search_pattern = '*' + '*'.join(file_patterns) + '*' done = super().search_data_files( initial_file_dir=initial_file_dir, search_pattern=search_pattern ) # Validate file paths if not done and self.allow_download: done = self.download_data() if done: done = super().search_data_files( initial_file_dir=initial_file_dir, search_pattern=search_pattern) else: print('Cannot find files from the online database!') return done
def search_data_files(self, recursive=True, **kwargs): dt_fr = self.dt_fr dt_to = self.dt_to diff_days = dttool.get_diff_days(dt_fr, dt_to) day0 = dttool.get_start_of_the_day(dt_fr) for i in range(diff_days + 1): thisday = day0 + datetime.timedelta(days=i) initial_file_dir = self.data_root_dir / thisday.strftime( '%Y') / thisday.strftime('%Y%m%d') file_patterns = [ 'MillstoneHill', self.data_file_type.replace(' ', '_'), thisday.strftime('%Y%m%d') ] # remove empty str file_patterns = [ pattern for pattern in file_patterns if str(pattern) ] search_pattern = '*' + '*'.join(file_patterns) + '*' if str(self.exp_name_pattern): search_pattern = '*' + self.exp_name_pattern.replace( ' ', '-') + '*/' + search_pattern recursive = False done = super().search_data_files(initial_file_dir=initial_file_dir, search_pattern=search_pattern, recursive=recursive) # Validate file paths if not done and self.allow_download: done = self.download_data() if done: done = super().search_data_files( initial_file_dir=initial_file_dir, search_pattern=search_pattern) else: print( 'The requested experiment does not exist in the online database!' ) if len(done) > 1: if str(self.exp_name_pattern): mylog.StreamLogger.error( "Multiple data files detected! Check the files:") else: mylog.StreamLogger.error( "Multiple data files detected!" + "Specify the experiment name by the keyword 'exp_name_pattern' if possible." ) for fp in done: mylog.simpleinfo.info(fp) raise KeyError return done
def download(self): diff_days = dttool.get_diff_days(self.dt_fr, self.dt_to) for i in range(diff_days + 1): dt1 = self.dt_fr + datetime.timedelta(days=i) fn = '_'.join(['SuperDARN', 'POTMAP', str(self.data_res) + 'min', dt1.strftime('%Y%m%d'), self.pole]) + '.dat' file_path = self.data_file_root_dir / dt1.strftime('%Y') / fn self.save_to_netcdf(file_path)
def search_data_files(self, **kwargs): dt_fr = self.dt_fr if self.dt_to.hour > 22: dt_to = self.dt_to + datetime.timedelta(days=1) else: dt_to = self.dt_to diff_days = dttool.get_diff_days(dt_fr, dt_to) dt0 = dttool.get_start_of_the_day(dt_fr) for i in range(diff_days + 1): thisday = dt0 + datetime.timedelta(days=i) initial_file_dir = kwargs.pop('initial_file_dir', None) if initial_file_dir is None: initial_file_dir = self.data_root_dir / self.sat_id.lower( ) / thisday.strftime("%Y%m%d") file_patterns = [ self.sat_id.upper(), self.product.upper(), thisday.strftime("%Y%m%d"), ] if self.orbit_id is not None: file_patterns.append(self.orbit_id) # remove empty str file_patterns = [ pattern for pattern in file_patterns if str(pattern) ] search_pattern = '*' + '*'.join(file_patterns) + '*' if self.orbit_id is not None: multiple_files = False else: fp_log = initial_file_dir / (self.product.upper() + '.full.log') if not fp_log.is_file(): self.download_data(dt_fr=thisday, dt_to=thisday) multiple_files = True done = super().search_data_files( initial_file_dir=initial_file_dir, search_pattern=search_pattern, allow_multiple_files=multiple_files, ) if done and self.orbit_id is not None: return True # Validate file paths if not done and self.allow_download: done = self.download_data(dt_fr=thisday, dt_to=thisday) if done: done = super().search_data_files( initial_file_dir=initial_file_dir, search_pattern=search_pattern, allow_multiple_files=multiple_files) return done
def get_dt_range_str(self, style='title'): dt_fr = self._xlim[0] dt_to = self._xlim[1] if style == 'title': diff_days = dttool.get_diff_days(dt1=dt_fr, dt2=dt_to) if diff_days == 0: fmt1 = "%Y-%m-%dT%H:%M:%S" fmt2 = "%H:%M:%S" else: fmt1 = "%Y-%m-%dT%H:%M:%S" fmt2 = fmt1 dt_range_str = dt_fr.strftime(fmt1) + ' - ' + dt_to.strftime(fmt2) elif style == 'filename': diff_days = dttool.get_diff_days(dt1=dt_fr, dt2=dt_to) if diff_days == 0: fmt1 = "%Y%m%d-%H%M%S" fmt2 = "%H%M%S" else: fmt1 = "%Y%m%d-%H%M%S" fmt2 = fmt1 dt_range_str = dt_fr.strftime(fmt1) + '-' + dt_to.strftime(fmt2) return dt_range_str
def search_data_files(self, **kwargs): dt_fr = self.dt_fr dt_to = self.dt_to diff_days = dttool.get_diff_days(dt_fr, dt_to) day0 = dttool.get_start_of_the_day(dt_fr) for i in range(diff_days + 1): thisday = day0 + datetime.timedelta(days=i) initial_file_dir = self.data_root_dir / self.site / thisday.strftime( '%Y') file_patterns = [] if self.data_file_type == 'eiscat-hdf5': file_patterns.append('EISCAT') elif self.data_file_type == 'madrigal-hdf5': file_patterns.append('MAD6400') elif self.data_file_type == 'eiscat-mat': pass file_patterns.append(thisday.strftime('%Y-%m-%d')) file_patterns.append(self.modulation) file_patterns.append(self.antenna.lower()) # remove empty str file_patterns = [ pattern for pattern in file_patterns if str(pattern) ] search_pattern = '*' + '*'.join(file_patterns) + '*' if self.data_file_type == 'eiscat-mat': search_pattern = search_pattern + '/' done = super().search_data_files(initial_file_dir=initial_file_dir, search_pattern=search_pattern) # Validate file paths if not done and self.allow_download: done = self.download_data() if done: done = super().search_data_files( initial_file_dir=initial_file_dir, search_pattern=search_pattern) else: print('Cannot find files from the online database!') return done
def search_data_files(self, **kwargs): dt_fr = self.dt_fr dt_to = self.dt_to diff_days = dttool.get_diff_days(dt_fr, dt_to) dt0 = dttool.get_start_of_the_day(dt_fr) for i in range(diff_days + 1): this_day = dt0 + datetime.timedelta(days=i) initial_file_dir = kwargs.pop('initial_file_dir', self.data_root_dir) file_patterns = [ 'EFI' + self.sat_id.upper(), self.product.upper(), this_day.strftime('%Y%m%d') + 'T', ] # remove empty str file_patterns = [ pattern for pattern in file_patterns if str(pattern) ] search_pattern = '*' + '*'.join(file_patterns) + '*' done = super().search_data_files( initial_file_dir=initial_file_dir, search_pattern=search_pattern, allow_multiple_files=True, ) # Validate file paths if (not done and self.allow_download) or self.force_download: done = self.download_data() if done: initial_file_dir = self.data_root_dir done = super().search_data_files( initial_file_dir=initial_file_dir, search_pattern=search_pattern, allow_multiple_files=True) return done
def load_data(self): dataset = netCDF4.Dataset(self.file_path) variables = {} metadata = {} if self.pole == 'N': pole = self.pole pole_str = 'NORTH' elif self.pole == 'S': pole = self.pole pole_str = 'SOUTH' else: raise ValueError # Time and Position # sectime = int(np.array(dataset.variables['TIME']).flatten()[0]) # doy = int(np.array(dataset.variables['DOY']).flatten()[0]) # year = int(np.array(dataset.variables['YEAR']).flatten()[0]) # dt0 = dttool.convert_doy_to_datetime(year, doy) starting_time = datetime.datetime.strptime(dataset.STARTING_TIME, "%Y%j%H%M%S") variables['STARTING_TIME'] = starting_time stopping_time = datetime.datetime.strptime(dataset.STOPPING_TIME, "%Y%j%H%M%S") variables['STOPPING_TIME'] = stopping_time dt0 = dttool.get_start_of_the_day(starting_time) variables['SC_LAT'] = np.array(dataset.variables['LATITUDE']) variables['SC_LON'] = np.array(dataset.variables['LONGITUDE']) variables['SC_ALT'] = np.array(dataset.variables['ALTITUDE']) variables['GRID_MLAT'] = np.array( dataset.variables['LATITUDE_GEOMAGNETIC_GRID_MAP']) variables['GRID_MLON'] = np.array( dataset.variables['LONGITUDE_GEOMAGNETIC_' + pole_str + '_GRID_MAP']) variables['GRID_MLT'] = np.array(dataset.variables['MLT_GRID_MAP']) if self.pole == 'S': variables['GRID_MLAT'] = -variables['GRID_MLAT'] variables['GRID_UT'] = np.array(dataset.variables['UT_' + pole]) lat = np.array(variables['GRID_MLAT']) ut = np.array(variables['GRID_UT']) lat = np.where(ut == 0, np.nan, lat) if self.pole == 'N': ind_mid_t = np.where(lat == np.nanmax(lat.flatten())) else: ind_mid_t = np.where(lat == np.nanmin(lat.flatten())) sectime0 = variables['GRID_UT'][ind_mid_t][0] * 3600 diff_days = dttool.get_diff_days(starting_time, stopping_time) if diff_days > 0 and sectime0 < 0.5 * 86400.: dt = dt0 + datetime.timedelta(seconds=int(sectime0 + 86400)) else: dt = dt0 + datetime.timedelta(seconds=int(sectime0)) variables['DATETIME'] = dt invalid_ut_inds = np.where(ut == 0) # Auroral map, #colors: 0: '1216', 1: '1304', 2: '1356', 3: 'LBHS', 4: 'LBHL'. variables['EMISSION_SPECTRA'] = [ '1216', '1304', '1356', 'LBHS', 'LBHL' ] disk_aur = np.array(dataset.variables['DISK_RADIANCEDATA_INTENSITY_' + pole_str]) # disk_aur[:, invalid_ut_inds] = np.nan disk_aur[disk_aur <= 0] = 0.1 variables['GRID_AUR_1216'] = disk_aur[0, ::] variables['GRID_AUR_1216'][invalid_ut_inds] = np.nan variables['GRID_AUR_1304'] = disk_aur[1, ::] variables['GRID_AUR_1304'][invalid_ut_inds] = np.nan variables['GRID_AUR_1356'] = disk_aur[2, ::] variables['GRID_AUR_1356'][invalid_ut_inds] = np.nan variables['GRID_AUR_LBHS'] = disk_aur[3, ::] variables['GRID_AUR_LBHS'][invalid_ut_inds] = np.nan variables['GRID_AUR_LBHL'] = disk_aur[4, ::] variables['GRID_AUR_LBHL'][invalid_ut_inds] = np.nan # Auroral oval boundary variables['AOB_EQ_MLAT'] = np.array( dataset.variables[pole_str + '_GEOMAGNETIC_LATITUDE']) variables['AOB_EQ_MLON'] = np.array( dataset.variables[pole_str + '_GEOMAGNETIC_LONGITUDE']) variables['AOB_EQ_MLT'] = np.array( dataset.variables[pole_str + '_MAGNETIC_LOCAL_TIME']) variables['AOB_PL_MLAT'] = np.array( dataset.variables[pole_str + '_POLAR_GEOMAGNETIC_LATITUDE']) variables['AOB_PL_MLON'] = np.array( dataset.variables[pole_str + '_POLAR_GEOMAGNETIC_LONGITUDE']) variables['AOB_PL_MLT'] = np.array( dataset.variables[pole_str + '_POLAR_MAGNETIC_LOCAL_TIME']) variables['MAOB_EQ_MLAT'] = np.array( dataset.variables['MODEL_' + pole_str + '_GEOMAGNETIC_LATITUDE']) variables['MAOB_EQ_MLON'] = np.array( dataset.variables['MODEL_' + pole_str + '_GEOMAGNETIC_LONGITUDE']) variables['MAOB_EQ_MLT'] = np.array( dataset.variables['MODEL_' + pole_str + '_MAGNETIC_LOCAL_TIME']) variables['MAOB_PL_MLAT'] = np.array( dataset.variables['MODEL_' + pole_str + '_POLAR_GEOMAGNETIC_LATITUDE']) variables['MAOB_PL_MLON'] = np.array( dataset.variables['MODEL_' + pole_str + '_POLAR_GEOMAGNETIC_LONGITUDE']) variables['MAOB_PL_MLT'] = np.array( dataset.variables['MODEL_' + pole_str + '_POLAR_MAGNETIC_LOCAL_TIME']) metadata.setdefault('ORBIT_ID', dataset.STARTING_ORBIT_NUMBER) dataset.close() self.variables = variables self.metadata = metadata
def download_files(self): """ Get a list of the urls from input date and datatype and download the files and also move them to the corresponding folders.!!! """ diff_days = dttool.get_diff_days(self.dt_fr, self.dt_to) dt0 = dttool.get_start_of_the_day(self.dt_fr) for iday in range(diff_days + 1): thisday = dt0 + datetime.timedelta(days=iday) # construct day of year from date doy = thisday.timetuple().tm_yday doy_str = "{:03d}".format(doy) if self.file_type in ['l1b', 'edr-aur', 'edr-iono']: payload_type = self.file_type elif self.file_type in ['sdr-limb', 'sdr-disk', 'sdr2-disk']: payload_type = 'sdr' elif self.file_type in ['edr-night-disk', 'edr-day-disk']: payload_type = 'edr-disk' elif self.file_type in ['edr-night-limb', 'edr-day-limb']: payload_type = 'edr-limb' elif self.file_type in ['edr-gaim-disk', 'edr-gaim-lim']: payload_type = 'edr-gaim' else: raise NotImplementedError payload = { "spc": self.sat_id, "type": payload_type, "Doy": doy_str, "year": "{:d}".format(thisday.year) } # get a list of the files from dmsp ssusi website # based on the data type and date r = requests.get(self.url_base + "data_retriver/", params=payload, verify=True) soup = bs4.BeautifulSoup(r.text, 'html.parser') div_filelist = soup.find("div", {"id": "filelist"}) href_list = div_filelist.find_all(href=True) url_list = [self.url_base + href["href"] for href in href_list] import natsort url_list = natsort.natsorted(url_list, reverse=False) for f_url in url_list: # we only need data files which have .NC if ".NC" not in f_url: continue # If working with sdr data use only # sdr-disk files if self.file_type.upper() not in f_url: continue if self.orbit_id is not None: if len(self.orbit_id) != 5: raise ValueError if self.orbit_id not in f_url: continue file_dir = self.data_file_root_dir / self.sat_id.lower( ) / thisday.strftime("%Y%m%d") file_dir.mkdir(parents=True, exist_ok=True) file_name = f_url.split('/')[-1] file_path = file_dir / file_name if file_path.is_file(): self.done = True mylog.simpleinfo.info( "The file {} exists.".format(file_name)) continue mylog.simpleinfo.info( "Downloading {} from the online database ...".format( file_name)) rf = requests.get(f_url, verify=True) file_name = rf.url.split("/")[-1] with open(file_path, "wb") as ssusi_data: ssusi_data.write(rf.content) mylog.simpleinfo.info( "Done. The file has been saved to {}".format(file_dir)) self.done = True if self.orbit_id is not None: return # self.file_dir = file_dir # self.file_name = file_name if self.orbit_id is None and self.done: fp_log = file_dir / (self.file_type.upper() + '.full.log') fp_log.touch() if not self.done: mylog.StreamLogger.warning( "Cannot find the requested data on {} from the online database!" .format(thisday.strftime("%Y-%m-%d")))