def _extract_timeseries_from_video(vid, scalars, channels): res = [[] for k in range(len(scalars))] sample_time = [] tf = drms.to_datetime(vid.attrs['end_time']) first_frame = None for frame_key in sorted(list(vid.keys()), key=lambda frame_key : float(frame_key[5:])): if('channels' in vid[frame_key].keys() and len(vid[frame_key]['channels'].shape) == 3): ti = drms.to_datetime(vid[frame_key].attrs['T_REC']) sample_time += [(tf - ti).total_seconds()/60] if(first_frame is None): first_frame = Data_Gen._extract_frame(vid[frame_key]['channels'], vid[frame_key].attrs['SEGS'], channels) i = 0 for scalar in scalars: if(scalar == 'RMS'): l1_err = 0 try: for c in range(first_frame.shape[2]): if(vid[frame_key].attrs['SEGS'][c].decode() in channels): this_frame = Data_Gen._extract_frame(vid[frame_key]['channels'], vid[frame_key].attrs['SEGS'], channels) l1_err += np.sum(np.abs(sk.resize(this_frame[:,:,c], first_frame.shape[:2], preserve_range=True)-first_frame[:,:,c])) res[i] += [l1_err/np.product(first_frame.shape[0:2])*len(channels)] except: sample_time = sample_time[:-1] print('Frame {} not extracted.'.format(frame_key)) print(traceback.format_exc()) else: res[i] += [vid[frame_key].attrs[scalar]] i += 1 return np.array(res), np.array(sample_time)
def _get_frames_key_from_query(ar_nb, peak_time, keys): list_keys = [] for k in range(len(keys.NOAA_AR)): if (keys.NOAA_AR[k] == ar_nb and abs(keys.LAT_FWT[k]) <= 68 and abs(keys.LON_FWT[k]) <= 68 and drms.to_datetime( keys.T_REC[k]) <= drms.to_datetime(peak_time)): list_keys += [k] return list_keys
def extract_B_flares_from_goes(goes_data_path, output_path, time_window): with open(goes_data_path, 'r', newline='') as file: reader = csv.reader(file, delimiter=',') counter_init_B = 0 counter_final_B = 0 counter_M_X = 0 dict_M_X = {} dict_B = {} # first, store the M-class flares in a dictionnary [date, noaa] = [2, 1] # we assume this format for event in reader: if (re.match('(M|X)[1-9]\.[0-9],[1-9][0-9]*,.*,.*,.*,.*', str.join(',', event))): counter_M_X += 1 event_date = drms.to_datetime(event[date]) if (event_date in dict_M_X): dict_M_X[event_date] += [event[noaa]] else: dict_M_X[event_date] = [event[noaa]] file.seek(0) # Then analyze all B-class flares and store them with open(output_path, 'w', newline='') as out: writer = csv.writer(out, delimiter=',') for event in reader: if (re.match('B[1-9]\.[0-9],[1-9][0-9]*,.*,.*,.*,.*', str.join(',', event))): counter_init_B += 1 event_date = drms.to_datetime(event[date]) exclude_event = False for time_delta in range(-time_window, time_window + 1): event_date_window = event_date + timedelta( days=time_delta) if (event_date_window in dict_M_X and event[noaa] in dict_M_X[event_date_window]): #print('Event {} ignored because of M-X flare {}'.format(event,event_date_window)) exclude_event = True break elif (event_date_window in dict_B and event[noaa] in dict_B[event_date_window]): #print('Event {} ignored because of B flare {}'.format(event, event_date_window)) exclude_event = True break if (not exclude_event): writer.writerow(event) counter_final_B += 1 if (event_date in dict_B): dict_B[event_date] += [event[noaa]] else: dict_B[event_date] = [event[noaa]] elif (not re.match( '(B|C|M|X)[1-9]\.[0-9],[1-9][0-9]*,.*,.*,.*,.*', str.join(',', event))): writer.writerow(event) print('Total number of M-X flares: {}'.format(counter_M_X)) print('Total number of B flares: {}'.format(counter_init_B)) print('Number of output B flares: {}'.format(counter_final_B))
def unduh_fits(j,z): time = (tanggal_jsoc(j)[1] - timedelta(hours = int(z))).strftime('%Y.%m.%d_%H:%M:%S_TAI') k = c.query('%s[%d][%s]' % (series, sharpnum,time), key=kwlist, rec_index=True) #Find the record that is clostest to the central meridian, by using the minimum of the patch's absolute longitude: rec_cm = k.LON_FWT.idxmin() t_cm = drms.to_datetime(k.T_REC[rec_cm]) # print(rec_cm, '@', k.LON_FWT[rec_cm], 'deg') t_cm_str = t_cm.strftime('%Y%m%d_%H%M%S_TAI') os.chdir(wdir) k.to_csv(path('meta',time)+'/k_'+t_cm_str[9:15]+'.csv',index_label='query') os.chdir(path('Fits',time)) fname_mask = '{series}.{sharpnum}.{tstr}.{segment}.fits' fnames = { s: fname_mask.format( series=series, sharpnum=sharpnum, tstr=t_cm_str, segment=s) for s in segments} download_segments = [] for w, v in fnames.items(): if not os.path.exists(v): os.chdir(wdir) download_segments.append(w) print('{} terunduh.'.format(v)) else: print('{} sudah ada di folder.'.format(v)) if download_segments: exp_query = '%s{%s}' % (rec_cm, ','.join(download_segments)) r = c.export(exp_query) r.download(path('Fits',time))
def _get_urls_hmi_b720(client, mag_time): """Returns for #download_magnetogram_hmi needed urls for hmi.B_720s Args: client (drms.Client): To query and return urls. mag_time (datetime.datetime): To find nearest magnetogram. Returns: generator that yields (datetime.datetime, str): Time of magnetogram, suffix url of magnetogram """ import drms query_string = 'hmi.B_720s' query_string += f'[{mag_time.year}.' query_string += f'{str(mag_time.month).zfill(2)}.' query_string += f'{str(mag_time.day).zfill(2)}_' query_string += f'{str(mag_time.hour).zfill(2)}' query_string += '/1h]' data = client.query(query_string, key='T_REC', seg='field') times = drms.to_datetime(data[0].T_REC) nearest_time = _nearest(mag_time, times) # Generator to find the nearest time urls = ((data_time, mag_url) for (data_time, mag_url) in zip(times, data[1].field) if data_time == nearest_time) return urls
def convert_time_2015(k): #change T_REC to datetime type k.T_REC = drms.to_datetime(k.T_REC) #convert tai time to utc t1 = Time(k.T_REC, format='datetime64', scale='tai') t2 = t1.utc t3 = t2.iso k.T_REC = t3 k.T_REC = pd.to_datetime(k.T_REC) #delete first row of df from previous year k = k[(k['T_REC'].dt.year != 2014)] return (k)
def _in_time_window(time, start_time, end_time): if (start_time is None and end_time is None): return True try: if (start_time is None): before_end = (drms.to_datetime(time) <= drms.to_datetime(end_time)) return before_end elif (end_time is None): after_start = (drms.to_datetime(time) >= drms.to_datetime(start_time)) return after_start else: after_start = (drms.to_datetime(time) >= drms.to_datetime(start_time)) before_end = (drms.to_datetime(time) <= drms.to_datetime(end_time)) return (after_start and before_end) except: print('Impossible to determine if time {} is in [{}, {}]'.format( time, start_time, end_time)) return False
def fnames(i, j, m): os.chdir(wdir) os.chdir(path_folder('meta', i, j)) k = pd.read_csv(os.listdir()[m], index_col='query') rec_cm = k.LON_FWT.idxmin() k_cm = k.loc[rec_cm] t_cm = drms.to_datetime(k.T_REC[rec_cm]) t_cm_str = t_cm.strftime('%Y%m%d_%H%M%S_TAI') os.chdir(wdir) os.chdir(path_folder('Fits', i, j)) fname_mask = '{series}.{sharpnum}.{tstr}.{segment}.fits' fname = { s: fname_mask.format(series=series, sharpnum=ar_sharpnum(i)[1], tstr=t_cm_str, segment=s) for s in segments } # os.chdir(wdir) return fname, k_cm, t_cm_str, t_cm
def get_Hmi_sharp(): c = drms.Client() startDate = datetime.strptime('2010.05.01 00:00:00', "%Y.%m.%d %H:%M:%S") while startDate.year < 2020: startDateString = startDate.strftime('%Y.%m.%d_%H:%M:%S') startDate = startDate + relativedelta(weeks=+1) endDateString = startDate.strftime('%Y.%m.%d_%H:%M:%S') dateString = startDateString + '-' + endDateString print(dateString) # variable = 'T_REC,HARPNUM,TOTUSJH,TOTPOT,TOTUSJZ,ABSNJZH,SAVNCPP,USFLUX,AREA_ACR,MEANPOT,R_VALUE,SHRGT45,NOAA_AR,NOAA_NUM,NOAA_ARS,QUALITY' variable = 'T_REC,HARPNUM,TOTUSJH,TOTPOT,TOTUSJZ,ABSNJZH,SAVNCPP,USFLUX,AREA_ACR,MEANPOT,R_VALUE,SHRGT45,MEANSHR,MEANGAM,MEANGBT,MEANGBZ,MEANGBH,MEANJZH,MEANJZD,MEANALP,NOAA_AR,NOAA_NUM,NOAA_ARS,QUALITY' df = c.query('hmi.sharp_720s[][' + dateString + ']', key=variable) if (df.size == 0): continue df.T_REC = drms.to_datetime(df.T_REC) conn = sqlite3.connect('HMI_SHARP_SWPC_FINAL.db') df.to_sql('02_HMI_SHARP', conn, if_exists='append', index=False)
def SHARPtime(sharpnum, method='cm', maxlon=90): """ For a given SHARP, identify a single frame with (if possible) longitude centroid within +-maxlon of Central Meridian. - If method='cm', use the frame with timestamp closest to central meridian. - If method='maxflux', use the frame with maximum unsigned flux. Returns (1) timestamp of chosen frame, and (2) corresponding emergence time (next noon) as a datetime object, and (3) ivalid flag True if valid frame exists and False if frame is outside +-maxlon. """ c = drms.Client() # Get time series of unsigned fluxes and longitudes of this SHARP (0 is central meridian): k = c.query('hmi.sharp_cea_720s[%i][]' % sharpnum, key='HARPNUM, T_REC, USFLUXL, LON_FWT') # Find individual record: if (method == 'cm'): rec_cm = k.LON_FWT.abs().idxmin() k_cm = k.loc[rec_cm] if (np.abs(k.LON_FWT[rec_cm]) <= maxlon): ivalid = True else: ivalid = False if (method == 'maxflux'): usfluxl = k.USFLUXL.where(np.abs(k.LON_FWT) <= maxlon, other=0) if (usfluxl.max() > 0): rec_cm = usfluxl.abs().idxmax() k_cm = k.loc[rec_cm] ivalid = True else: rec_cm = 0 ivalid = False t_cm = drms.to_datetime(k.T_REC[rec_cm]) # Identify emergence (completion) time - next noon: twelve_hrs = datetime.timedelta(hours=12) t_em = t_cm + twelve_hrs t_em = t_em.replace(hour=12, minute=0, second=0) return k.T_REC[rec_cm], t_em, ivalid
def get_cgemLorentz(): c = drms.Client() startDate = datetime.strptime('2010.05.01 00:00:00', "%Y.%m.%d %H:%M:%S") while startDate.year < 2020: startDateString = startDate.strftime('%Y.%m.%d_%H:%M:%S') startDate = startDate + relativedelta(weeks=+1) endDateString = startDate.strftime('%Y.%m.%d_%H:%M:%S') dateString = startDateString + '-' + endDateString print(dateString) variable = 'HARPNUM, T_REC, TOTBSQ, TOTFZ, EPSZ, TOTFY, TOTFX, EPSY, EPSX, QUALITY, NOAA_ARS, NOAA_AR, NOAA_NUM' df = c.query('cgem.lorentz[][' + dateString + ']', key=variable) if (df.size == 0): continue df.T_REC = drms.to_datetime(df.T_REC) conn = sqlite3.connect('HMI_SHARP_SWPC_FINAL.db') df.to_sql('02_CGEM_LORENTZ', conn, if_exists='append', index=False)
# entries from aia.lev1 in this case. print('Querying series info...') si = c.info(series) si_lev1 = c.info(series_lev1) for k in keys: linkinfo = si.keywords.loc[k].linkinfo if linkinfo is not None and linkinfo.startswith('lev1->'): note_str = si_lev1.keywords.loc[k].note else: note_str = si.keywords.loc[k].note print('%10s : %s' % (k, note_str)) # Get keyword values for the selected timespan and wavelength print('Querying keyword data...\n -> %s' % qstr) res = c.query(qstr, key=keys) print(' -> %d lines retrieved.' % len(res)) # Only use entries with QUALITY==0 res = res[res.QUALITY == 0] print(' -> %d lines after QUALITY selection.' % len(res)) # Convert T_REC strings to datetime and use it as index for the series res.index = drms.to_datetime(res.T_REC) # Create some simple plots ax = res[['DATAMIN', 'DATAMAX', 'DATAMEAN', 'DATARMS', 'DATASKEW']].plot( figsize=(8, 10), subplots=True) ax[0].set_title(qstr, fontsize='medium') plt.tight_layout() plt.show()
tsel = '2010.05.01_TAI-2016.04.01_TAI@12h' # DRMS query string qstr = '%s[%s]' % (series, tsel) # Create DRMS JSON client, use debug=True to see the query URLs c = drms.Client() # Send request to the DRMS server print('Querying keyword data...\n -> %s' % qstr) res = c.query(qstr, key=['T_REC', 'CAPN2', 'CAPS2']) print(' -> %d lines retrieved.' % len(res)) # Convert T_REC strings to datetime and use it as index for the series res.index = drms.to_datetime(res.pop('T_REC')) # Determine smallest timestep dt = np.diff(res.index.to_pydatetime()).min() # Make sure the time series contains all time steps (fills gaps with NaNs) # Note: This does not seem to work with old pandas versions (e.g. v0.14.1) a = res.asfreq(dt) # Compute 30d moving average and standard deviation using a boxcar window win_size = int(30*24*3600/dt.total_seconds()) if tuple(map(int, pd.__version__.split('.')[:2])) >= (0, 18): a_avg = a.rolling(win_size, min_periods=1, center=True).mean() a_std = a.rolling(win_size, min_periods=1, center=True).std() else: # this is deprecated since pandas v0.18.0
import drms date0 = '2012-07-12T16:00:00Z' # convert a DRMS time to an astropy Time object, This can then be converted to tai from astropy.time import Time td = drms.to_datetime(date0) print(td) ta = Time(td, format='datetime', scale='utc') print(ta) tt = Time(ta, format='datetime', scale='tai') print(tt) from sunpy.time import parse_time print(parse_time(date0.split('Z')[0])) print(ta.tai) print(parse_time(ta.isot)) # test out time differencing d0 = '2012-07-12T11:00:00.000' d1 = '2012-07-12T13:00:00.000' t0 = Time(d0, scale='tai') t1 = Time(d1, scale='tai') delta_t = t1 - t0 print() print('-----------------------') print(t0) print(t1) print(delta_t.sec)
'USFLUX', 'ERRVF', 'CRPIX1', 'CRPIX2', 'CDELT1', 'CDELT2', 'CRVAL1', 'CRVAL2', ] # Create DRMS client, use debug=True to see the query URLs. c = drms.Client(verbose=True) print('Querying metadata...') kw = c.query(f'{series}[{int(sharpnum)}]', key=kwlist, rec_index=True) t = drms.to_datetime(kw.T_REC) print('Finding central meridian crossing...') rec_cm = kw.LON_FWT.abs().idxmin() k_cm = kw.loc[rec_cm] t_cm = drms.to_datetime(kw.T_REC[rec_cm]) print('-> rec_cm:', rec_cm, '@', kw.LON_FWT[rec_cm], 'deg') # Check if any files were already downloaded. fnames = {} download_segments = [] t_cm_str = t_cm.strftime('%Y%m%d_%H%M%S_TAI') for s in segments: fnames[s] = fname_fmt_str.format(series=series, sharpnum=sharpnum, tstr=t_cm_str,
from matplotlib import dates import drms import numpy numpy.set_printoptions(threshold=1600) file = open('testfile.txt', 'w') series = 'hmi.sharp_cea_720s' sharpnum = 5298 # NOAA12297 kwlist = ['T_REC', 'LON_FWT', 'TOTPOT', 'TOTUSJH', 'TOTUSJZ', 'AREA_ACR'] c = drms.Client() k = c.query('%s[%d]' % (series, sharpnum), key=kwlist, n='none') file.write(str(k)) file.close() k.index = drms.to_datetime(k.T_REC) t_cm = k.LON_FWT.abs().argmin() print(k) plt.rc('axes', titlesize='medium') plt.rc('axes.formatter', use_mathtext=True) plt.rc('mathtext', default='regular') plt.rc('legend', fontsize='medium') fig, ax = plt.subplots(2, 2, sharex=True, figsize=(10, 6)) axi = ax[0, 0] axi.plot(k.index, k.TOTPOT, '.', ms=2, label='TOTPOT') axi.set_title('Total Photospheric Magnetic Free Energy') axi.set_ylabel(r'Ergs $cm^{-1}$', size=15) axi = ax[0, 1]
tsel = '2010.05.01_TAI-2016.04.01_TAI@6h' # DRMS query string qstr = '%s[%s]' % (series, tsel) # Create DRMS JSON client, use debug=True to see the query URLs c = drms.Client() # Send request to the DRMS server print('Querying keyword data...\n -> %s' % qstr) res = c.query(qstr, key=['T_REC', 'DATAMEAN', 'DATARMS']) print(' -> %d lines retrieved.' % len(res)) # Convert T_REC strings to datetime and use it as index for the series res.index = drms.to_datetime(res.pop('T_REC')) # Note: DATARMS contains the standard deviation, not the RMS! t = res.index avg = res.DATAMEAN/1e3 std = res.DATARMS/1e3 # Create plot fig, ax = plt.subplots(1, 1, figsize=(15, 7)) ax.set_title(qstr, fontsize='medium') ax.fill_between( t, avg+std, avg-std, edgecolor='none', facecolor='b', alpha=0.3, interpolate=True) ax.plot(t, avg, color='b') ax.set_xlabel('Time') ax.set_ylabel('Disk-averaged continuum intensity [kDN/s]')
def test_corner_case_series(time_series, expected): assert pd.isnull(drms.to_datetime(time_series)).equals(expected)
def test_force_string(time_string, expected): assert drms.to_datetime(time_string, force=True) == expected
def test_z_leap_string(time_string, expected): assert drms.to_datetime(time_string) == expected
def main(image_size_output, path_to_mag_cube, mag_cube_name, base, mission): warnings.simplefilter(action="ignore", category=FutureWarning) warnings.simplefilter(action="ignore", category=SettingWithCopyWarning) ### This put all the FITS header keywords, for all the MDI or HMI data products available in a pandas dataframe called mag_keys. ### ### This part takes about 10 minutes ### client = drms.Client() query_mag = 'mdi.fd_M_96m_lev182[]' ### or ('hmi.M_720s') if want SDO HMI instead of SOHO MDI mag_keys = client.query(query_mag, key=drms.const.all) print('len(mag_keys):', len(mag_keys)) print('image_size_output:', image_size_output) print('path_to_mag_cube:', path_to_mag_cube) print('mag_cube_name:', mag_cube_name) print('base:', base) print('mission:', mission) mag_keys_list = list( client.keys('mdi.fd_M_96m_lev182' )) #or ('hmi.M_720s') if want SDO HMI instead of SOHO MDI print('mag_keys_list:', mag_keys_list) cube_orig = h5py.File(f'{path_to_mag_cube}{mag_cube_name}', 'r') print(list(cube_orig.keys())) cube_orig_data = cube_orig[list(cube_orig.keys( ))[0]][:] #cube_orig[f'{base}_{mission}_{image_size_output}'][:] print('np.shape(cube_orig_data):', np.shape(cube_orig_data)) times_list = csv_times_reader(path_to_mag_cube, pattern=f'*{base}*{mission}*[!sync].csv') print('times_list[0:10]:', times_list[0:10]) print('times_list[-10:]:', times_list[-10:]) print('np.shape(times_list):', np.shape(times_list)) ### creat cube copy with data from original cube and add the metadata via attributes which can now write ### full_mag_cube_name = f'{path_to_mag_cube}{mag_cube_name}' mag_cube_name_new = full_mag_cube_name.split( '.')[0] + '_retroactivemetadata.h5' print(mag_cube_name_new) data_cube_new = h5py.File(mag_cube_name_new, 'w') data_cube_new.create_dataset(f'{base}_{mission}_{image_size_output}', data=cube_orig_data, compression="gzip") counter = 0 meta_data_dict = {} for t_pre in tqdm(times_list): #[0:2] saftey check t_drms_split = str(drms.to_datetime(t_pre)).split(' ') t_tai = '_'.join( (t_drms_split[0].replace('-', '.'), t_drms_split[1])) + '_TAI' ### this original method line below never completes on JSOC as starts fast but after 800 files starts to slow down exponentially ### #query = client.query(f'mdi.fd_M_96m_lev182[{t_tai}]', key = client.keys('mdi.fd_M_96m_lev182')) with client = drms.Client(email,verbose=False) query_pre = mag_keys.loc[mag_keys['T_REC'] == t_tai] query = mag_keys.loc[query_pre.index[0]] query_metadata_update = downsample_header_local( mission, image_size_output, query, mag_keys) for j, key in enumerate(mag_keys): if (key == 'COMMENT') or (key == 'HISTORY'): key1 = f'{key}{counter}' ##########data_cube_new.attrs[f'{key1}_{counter}'] = query_metadata_update[key] #[0] meta_data_dict[f'{key1}_{counter}'] = query_metadata_update[ key] else: ##########data_cube_new.attrs[f'{key}_{counter}'] = query_metadata_update[key] #[0] meta_data_dict[f'{key}_{counter}'] = query_metadata_update[key] #########data_cube_new.attrs[f'COMMENT_{counter}'] = f'Zeros outside solar disk for {base}' meta_data_dict[ f'COMMENT_{counter}'] = f'Zeros outside solar disk for {base}' counter += 1 ########data_cube_new.attrs.update(meta_data_dict) data_cube_new.create_dataset( f'{base}_{mission}_{image_size_output}_metadata', data=json.dumps(meta_data_dict, cls=NpEncoder)) data_cube_new.attrs['NOTE'] = 'JSON serialization' data_cube_new.close()
def test_time_series(time_series, expected): assert drms.to_datetime(time_series).equals(expected)
def download_jsoc_data( self, files_core_name='jsoc_data', directory=None, goes_data_path=None, goes_row_pattern='(B|C|M|X)[1-9]\.[0-9],[1-9][0-9]*,.*,.*,.*,.*', start_time=None, end_time=None, hours_before_event=24, sample_time='@1h', limit=400): if (directory is None and not os.path.isdir(os.path.join(self.main_path, 'JSOC-Data'))): os.mkdir(os.path.join(self.main_path, 'JSOC-Data')) os.chdir(os.path.join(self.main_path, 'JSOC-Data')) elif (os.path.isdir(os.path.join(self.main_path, directory))): os.chdir(os.path.join(self.main_path, directory)) else: print('The path {} does not exist.'.format( os.path.join(self.main_path, directory))) return False essential_ar_attrs = {'NOAA_AR', 'HARPNUM', 'LAT_FWT', 'LON_FWT'} essential_goes_attrs = { 'start_time', 'peak_time', 'noaa_active_region', 'event_class' } jsoc_serie = 'hmi.sharp_cea_720s[1-7256]' # Verifications of the path to GOES data and the format of the .csv if (goes_data_path is None): if (os.path.exists(os.path.join(self.main_path, 'GOES_data.csv'))): goes_data_path = os.path.join(self.main_path, 'GOES_data.csv') else: print('Please enter a valid path to the GOES data.') return False if (not os.path.exists(goes_data_path)): print('Please enter a valid path to the GOES data.') return False missing_goes_attrs = self._check_essential_attributes( set(self.goes_attrs), essential_goes_attrs) if (len(missing_goes_attrs) > 0): print('Missing attributes in GOES file : {}.'.format( missing_goes_attrs)) return False [start, peak, noaa_ar] = [ self.goes_attrs.index('start_time'), self.goes_attrs.index('peak_time'), self.goes_attrs.index('noaa_active_region') ] total_length = sum(1 for line in open(goes_data_path, 'r')) self.ar_attrs += self._check_essential_attributes( set(self.ar_attrs), essential_ar_attrs) # Estimation of the number of solar eruption videos considered. # Limit the number of videos if 'limit' is reached. nb_positive = 0 considered_events = [] with open(goes_data_path, 'r', newline='') as file: reader = csv.reader(file, delimiter=',') counter = 0 for event in reader: counter += 1 if (self._in_time_window(event[start], start_time, end_time) and re.match(goes_row_pattern, str.join(',', event)) and int(event[noaa_ar]) > 0): nb_positive += 1 considered_events += [counter] if (limit is not None and nb_positive > limit): events_really_considered = np.random.choice( considered_events, limit) # Summary print('Nb of videos to download: {}/{}'.format(nb_positive, counter)) print('Look up of pictures until {}h before an event.'.format( hours_before_event)) with open(goes_data_path, 'r', newline='') as file: reader = csv.reader(file, delimiter=',') client = drms.Client() mem = 0 # Set a counter for the current cache memory (in bytes) used by videos part_counter = 0 vid_counter = 0 counter = 0 current_save_file = h5py.File( '{}_part_{}.hdf5'.format(files_core_name, part_counter), 'w') for event in reader: counter += 1 if (re.match(goes_row_pattern, str.join(',', event)) and (limit is None or nb_positive <= limit or (counter in events_really_considered)) and self._in_time_window(event[start], start_time, end_time)): ar_nb = int(event[noaa_ar]) # We process only numbered flares if (ar_nb > 0): peak_time = drms.to_datetime(event[peak]) start_time = peak_time - timedelta( hours=hours_before_event) # Change the date format peak_time = self._UTC2JSOC_time(str(peak_time)) start_time = self._UTC2JSOC_time(str(start_time)) # Do the request to JSOC database query = '{}[{}-{}{}]'.format(jsoc_serie, start_time, peak_time, sample_time) if (len(self.ar_segs) == 0): keys = client.query(query, key=self.ar_attrs) else: keys, segments = client.query(query, key=self.ar_attrs, seg=self.ar_segs) try: # Downloads the video of this solar flare and construct # the HDF5 file. nb_frame = len(keys.NOAA_AR) - 1 dumping = False current_vid = current_save_file.create_group( 'video{}'.format(vid_counter)) vid_counter += 1 for k in range(len(self.goes_attrs)): current_vid.attrs[ self.goes_attrs[k]] = event[k] print( 'Trying to extract data for video {} corresponding to event {}' .format(vid_counter, event[peak])) frame_counter = 0 while (nb_frame > -1 and not dumping): right_pic = (keys.NOAA_AR[nb_frame] == ar_nb)\ and abs(keys.LAT_FWT[nb_frame]) <= 68\ and abs(keys.LON_FWT[nb_frame]) <= 68 #Creates a new frame and add it to the video if (right_pic): current_frame = current_vid.create_group( 'frame{}'.format(frame_counter)) frame_counter += 1 for k in range(len(self.ar_attrs)): current_frame.attrs[ self.ar_attrs[k]] = keys[ self.ar_attrs[k]][nb_frame] current_frame.attrs['SEGS'] = np.string_( list(self.ar_segs)) data_shape = None # unknown frame = None seg_counter = 0 for seg in self.ar_segs: url = 'http://jsoc.stanford.edu' + segments[ seg][nb_frame] data = np.array(fits.getdata( url, cache=False), dtype=np.float32) if (data_shape is None): data_shape = data.shape frame = np.zeros( data_shape + (len(self.ar_segs), ), dtype=np.float32) frame[:, :, seg_counter] = data seg_counter += 1 mem += data.nbytes current_frame.create_dataset('channels', data=frame) if (mem / (1024 * 1024) > 2 * self.mem_limit): print( 'Memory usage > {}MB. Dumping...'. format(3 * self.mem_limit)) dumping = True nb_frame -= 1 if (frame_counter == 0): # del current_save_file['video{}'.format(vid_counter)] # vid_counter -=1 print('No frame downloaded, video erased.') else: print( 'Video {} associated to event {} extracted ({} frames)' .format(vid_counter, event[peak], frame_counter)) except: print( 'Impossible to extract data for event {0} (nb {1})' .format(event[peak], counter)) print(traceback.format_exc()) else: # if the row pattern does not match print('Row ignored: ' + str.join(',', event)) if (int(counter * 100.0 / total_length) % 5 == 0): print( str(counter * 100.0 / total_length) + '% of GOES data set analyzed') # Save the current HDF5 file. Reset vid_counter for the next HDF5 file. if (mem / (1024 * 1024) > self.mem_limit): current_save_file.close() part_counter += 1 vid_counter = 0 mem = 0 current_save_file = h5py.File( '{}_part_{}.hdf5'.format(files_core_name, part_counter), 'w') # After the downloading, close the last file ! current_save_file.close() print('Data base has been downloaded successfully !') return True
import numpy as n #%matplotlib inline series = 'hmi.sharp_cea_720s' sharpnum = 377 #sharp number segments = ['magnetogram', 'continuum'] kwlist = ['T_REC', 'LON_FWT', 'OBS_VR', 'CROTA2', 'CRPIX1', 'CRPIX2', 'CDELT1', 'CDELT2', 'CRVAL1', 'CRVAL2'] c = drms.Client(email='*****@*****.**', verbose=True) ##Use your own email address. k = c.query('%s[%d]' % (series, sharpnum), key=kwlist, rec_index=True) rec_cm = k.LON_FWT.abs().idxmin() k_cm = k.loc[rec_cm] t_cm = drms.to_datetime(k.T_REC[rec_cm]) print(rec_cm, '@', k.LON_FWT[rec_cm], 'deg') print('Timestamp:', t_cm) t_cm_str = t_cm.strftime('%Y%m%d_%H%M%S_TAI') fname_mask = '{series}.{sharpnum}.{tstr}.{segment}.fits' fnames = { s: fname_mask.format( series=series, sharpnum=sharpnum, tstr=t_cm_str, segment=s) for s in segments} download_segments = [] for k, v in fnames.items(): if not os.path.exists(v): download_segments.append(k)
note_str = series_info_lev1.keywords.loc[key].note else: note_str = series_info.keywords.loc[key].note print(f'{key:>10} : {note_str}') ############################################################################### # Construct the DRMS query string: "Series[timespan][wavelength]" qstr = 'aia.lev1_euv_12s[2014-01-01T00:00:01Z/365d@1d][335]' # Get keyword values for the selected timespan and wavelength print(f'Querying keyword data...\n -> {qstr}') result = client.query(qstr, key=keys) print(f' -> {len(result)} lines retrieved.') # Only use entries with QUALITY==0 result = result[result.QUALITY == 0] print(f' -> {len(result)} lines after QUALITY selection.') # Convert T_REC strings to datetime and use it as index for the series result.index = drms.to_datetime(result.T_REC) ############################################################################### # Create some simple plots ax = result[['DATAMIN', 'DATAMAX', 'DATAMEAN', 'DATARMS', 'DATASKEW']].plot(figsize=(8, 10), subplots=True) ax[0].set_title(qstr, fontsize='medium') plt.tight_layout() plt.show()
def test_corner_case(time_string, expected): assert pd.isnull(drms.to_datetime(time_string)) == expected assert isinstance(drms.to_datetime([]), pd.Series) assert drms.to_datetime([]).empty
def download_jsoc_data( self, files_core_name='jsoc_data', directory=None, goes_data_path=None, goes_row_pattern='(B|C|M|X)[1-9]\.[0-9],[1-9][0-9]*,.*,.*,.*,.*', start_time=None, end_time=None, nb_frames_before_event=24, sample_time=1, # in hours limit=400): if (directory is None and not os.path.isdir(os.path.join(self.main_path, 'JSOC-Data'))): os.mkdir(os.path.join(self.main_path, 'JSOC-Data')) os.chdir(os.path.join(self.main_path, 'JSOC-Data')) elif (os.path.isdir(os.path.join(self.main_path, directory))): os.chdir(os.path.join(self.main_path, directory)) else: print('The path {} does not exist.'.format( os.path.join(self.main_path, directory))) return False essential_ar_attrs = { 'NOAA_AR', 'HARPNUM', 'LAT_FWT', 'LON_FWT', 'T_REC' } essential_goes_attrs = { 'start_time', 'peak_time', 'noaa_active_region', 'event_class' } jsoc_serie = 'hmi.sharp_cea_720s[1-7256]' # Verifications of the path to GOES data and the format of the .csv if (goes_data_path is None): if (os.path.exists(os.path.join(self.main_path, 'GOES_data.csv'))): goes_data_path = os.path.join(self.main_path, 'GOES_data.csv') else: print('Please enter a valid path to the GOES data.') return False if (not os.path.exists(goes_data_path)): print('Please enter a valid path to the GOES data.') return False missing_goes_attrs = self._check_essential_attributes( set(self.goes_attrs), essential_goes_attrs) if (len(missing_goes_attrs) > 0): print('Missing attributes in GOES file : {}.'.format( missing_goes_attrs)) return False [start, peak, noaa_ar] = [ self.goes_attrs.index('start_time'), self.goes_attrs.index('peak_time'), self.goes_attrs.index('noaa_active_region') ] total_length = sum(1 for line in open(goes_data_path, 'r')) self.ar_attrs += self._check_essential_attributes( set(self.ar_attrs), essential_ar_attrs) # Estimation of the number of solar eruption videos considered. # Limit the number of videos if 'limit' is reached. nb_positive = 0 considered_events = [] with open(goes_data_path, 'r', newline='') as file: reader = csv.reader(file, delimiter=',') counter = 0 for event in reader: counter += 1 if (self._in_time_window(event[start], start_time, end_time) and re.match(goes_row_pattern, str.join(',', event)) and int(event[noaa_ar]) > 0): nb_positive += 1 considered_events += [counter] if (limit is not None and nb_positive > limit): events_really_considered = np.random.choice( considered_events, limit) # Summary print('Nb of videos to download: {}/{}'.format(nb_positive, counter)) print('Look up of pictures until {}h before an event.'.format( sample_time * nb_frames_before_event)) with open(goes_data_path, 'r', newline='') as file: reader = csv.reader(file, delimiter=',') client = drms.Client() mem = 0 # Set a counter for the current cache memory (in bytes) used by videos part_counter = 0 vid_counter = 0 counter = 0 current_save_file = h5py.File( '{}_part_{}.hdf5'.format(files_core_name, part_counter), 'w') # Get the delta time for the look up in the JSOC data base (with a marge) dt = timedelta(hours=sample_time * (nb_frames_before_event + 1)) # Change the sampling rate format sample_time = '@{}h'.format(sample_time) for event in reader: counter += 1 if (re.match(goes_row_pattern, str.join(',', event)) and (limit is None or nb_positive <= limit or (counter in events_really_considered)) and self._in_time_window(event[start], start_time, end_time)): ar_nb = int(event[noaa_ar]) # We process only numbered flares if (ar_nb > 0): peak_time = drms.to_datetime(event[peak]) start_time = peak_time - dt # Change the date format peak_time = self._UTC2JSOC_time(str(peak_time)) start_time = self._UTC2JSOC_time(str(start_time)) # Do the request to JSOC database query = '{}[{}-{}{}]'.format(jsoc_serie, start_time, peak_time, sample_time) if (len(self.ar_segs) == 0): keys = client.query(query, key=self.ar_attrs) else: keys, segments = client.query(query, key=self.ar_attrs, seg=self.ar_segs) try: # Get only the frames that are: # * related to our AR (same NOAA) # * within +/- 68deg from the central meridian # * before the peak time frames_keys = self._get_frames_key_from_query( ar_nb, peak_time, keys) # Do not download videos with missing data if (len(frames_keys) < nb_frames_before_event): print( 'Only {} (< {}) frames found for the SF produced on {}. Ignored.' .format(len(frames_keys), nb_frames_before_event, event[peak])) else: current_vid = current_save_file.create_group( 'video{}'.format(vid_counter)) vid_counter += 1 for k in range(len(self.goes_attrs)): current_vid.attrs[ self.goes_attrs[k]] = event[k] if (len(frames_keys) > nb_frames_before_event): print( '{} frames are found for the SF produced on {}, only the last {} are considered' .format(len(frames_keys), event[peak], nb_frames_before_event)) frames_keys = frames_keys[ len(frames_keys) - nb_frames_before_event:] # We download each video with the LAST frame corresponding to the eruption for i in range(nb_frames_before_event): current_frame = current_vid.create_group( 'frame{}'.format(i)) # Includes the specific attributes to the frame current_frame.attrs['SEGS'] = np.string_( list(self.ar_segs)) for a in self.ar_attrs: current_frame.attrs[a] = keys[a][ frames_keys[i]] # Downloads the specific segments data_frame = [] for seg in self.ar_segs: url = 'http://jsoc.stanford.edu' + segments[ seg][frames_keys[i]] data = np.array(fits.getdata( url, cache=False), dtype=np.float32) data_frame += [data] mem += data.nbytes data_frame = np.array(data_frame, dtype=np.float32) # Creates the actual data set in the hdf5 file current_frame.create_dataset( 'channels', data=data_frame) except: print('Impossible to extract data for event {0}.'. format(event[peak])) print(traceback.format_exc()) else: # if the row pattern does not match print('Row ignored: ' + str.join(',', event)) if (counter % 20 == 0): print('{:0.2f}% of GOES data set analyzed'.format( counter * 100.0 / total_length)) # Save the current HDF5 file. Reset vid_counter for the next HDF5 file. if (mem / (1024 * 1024) > self.mem_limit): current_save_file.close() part_counter += 1 vid_counter = 0 mem = 0 current_save_file = h5py.File( '{}_part_{}.hdf5'.format(files_core_name, part_counter), 'w') # After the downloading, close the last file ! current_save_file.close() print('The data base has been downloaded successfully !') return True
def _extract_timeseries_from_video(vid, scalars, channels, time_event_last_frame=True): res = [[] for k in range(len(scalars))] sample_time = [] tf = drms.to_datetime(vid.attrs['end_time']) last_frame = None for frame_key in sorted(list(vid.keys()), key=lambda frame_key: float(frame_key[5:])): if ('channels' in vid[frame_key].keys() and len(vid[frame_key]['channels'].shape) == 3): ti = drms.to_datetime(vid[frame_key].attrs['T_REC']) sample_time += [(tf - ti).total_seconds() / 60] i = 0 for scalar in scalars: if (scalar == 'l1_err' or scalar == 'TV'): l1_err = 0 TV = 0 try: this_frame = Data_Gen._extract_frame( vid[frame_key]['channels'], vid[frame_key].attrs['SEGS'], channels) if (last_frame is None): last_frame = this_frame for c in range(last_frame.shape[2]): if (channels is None or vid[frame_key].attrs['SEGS'] [c].decode() in channels): if (scalar == 'l1_err'): l1_err += np.sum( np.abs( sk.resize(this_frame[:, :, c], last_frame.shape[:2], preserve_range=True) - last_frame[:, :, c])) else: # only valid total variation TV += np.sum( np.sqrt( np.square( np.diff(this_frame[:, :, c], axis=0)[:, 1:])) + np.square( np.diff(this_frame[:, :, c], axis=1)[1:, :])) if (channels is None): nb_channels = last_frame.shape[2] else: nb_channels = len(channels) normalization = np.product( last_frame.shape[0:2]) * nb_channels if (scalar == 'TV'): res[i] += [TV / normalization] else: res[i] += [l1_err / normalization] except: sample_time = sample_time[:-1] print('Frame {} not extracted.'.format(frame_key)) print(traceback.format_exc()) else: res[i] += [vid[frame_key].attrs[scalar]] i += 1 last_frame = this_frame if (time_event_last_frame): return np.flip(np.array(res), axis=1), np.flip(np.array(sample_time), axis=0) return np.array(res), np.array(sample_time)
def _download_images(self, fits_directory: str, records: List[Tuple[str, str]]): fits_directory = os.path.join(fits_directory, "_fits_temp") os.makedirs(fits_directory, exist_ok=True) logger.debug( f'Downloading {len(records)} FITS files into {fits_directory}...') for record, url, extra_keys in records: record_match = self.RECORD_PARSE_REGEX.match(record) if record_match is None: raise Exception(f"Invalid record format '{record}'") record_date_raw, record_wavelength = record_match.groups() record_date = dt.datetime.strptime( record_date_raw, self.RECORD_DATE_FORMAT_HMI if len(record_wavelength) == 1 else self.RECORD_DATE_FORMAT) if len(record_wavelength) == 1: record_wavelength = self.HMI_PARSE_REGEX.match( record).groups()[0] output_file_name = f"{record_date:%Y-%m-%dT%H%M%S}_{record_wavelength}.fits" fp = os.path.join(fits_directory, output_file_name) if not os.path.isfile( fp): #TODO: Check for corruption, incomplete files retries = 0 while True: try: urllib.request.urlretrieve(url, fp) except Exception as e: retries += 1 if retries % 100 == 0: logger.info( f'Failed fetching FITS %s after {retries} retries: %s', url, e) if isinstance(e, URLError) and isinstance( e.reason, ConnectionRefusedError): logger.info('waiting for a while longer...') else: break time.sleep(0.5) else: logger.info(f'{retries} retries') # extend HMI Fits with extra keys if extra_keys is not None: try: data, header = fits.getdata(fp, header=True) if header['BITPIX'] == -32 or header[ 'BITPIX'] == -64: del header[ 'BLANK'] # https://github.com/astropy/astropy/issues/7253 for k in extra_keys.iteritems(): if k[1] == 'Invalid KeyLink': logger.warning( f'Invalid KeyLink for {k[0]}, {fp}' ) continue if k[0].upper() not in self.DATE_KEYS: header[k[0]] = k[1] else: pdt = drms.to_datetime( k[1]).to_pydatetime() if pdt is not pd.NaT: header[k[0]] = pdt.strftime( "%Y-%m-%dT%H%M%S") fits.writeto(fp, data, header, overwrite=True) except Exception as e: logger.error( f"Unable to extend HMI file {fp}, removing & skipping... {e}" ) try: os.remove(fp) except Exception as e2: logger.error( f'Was unable to delete file {fp}, {e2}' ) continue break else: logger.debug(f'Already found {fp}') logger.debug("Downloaded %d files to %s", len(records), fits_directory)
# entries from aia.lev1 in this case. print('Querying series info...') si = c.info(series) si_lev1 = c.info(series_lev1) for k in keys: linkinfo = si.keywords.loc[k].linkinfo if linkinfo is not None and linkinfo.startswith('lev1->'): note_str = si_lev1.keywords.loc[k].note else: note_str = si.keywords.loc[k].note print('%10s : %s' % (k, note_str)) # Get keyword values for the selected timespan and wavelength print('Querying keyword data...\n -> %s' % qstr) res = c.query(qstr, key=keys) print(' -> %d lines retrieved.' % len(res)) # Only use entries with QUALITY==0 res = res[res.QUALITY == 0] print(' -> %d lines after QUALITY selection.' % len(res)) # Convert T_REC strings to datetime and use it as index for the series res.index = drms.to_datetime(res.T_REC) # Create some simple plots ax = res[['DATAMIN', 'DATAMAX', 'DATAMEAN', 'DATARMS', 'DATASKEW']].plot(figsize=(8, 10), subplots=True) ax[0].set_title(qstr, fontsize='medium') plt.tight_layout() plt.show()