def _load(probe, starttime, endtime, instrument, product_id, cdfkeys): daylist = daysplitinterval(starttime, endtime) data = [] for day in daylist: date = day[0] year = str(date.year) month = str(date.month).zfill(2) day = str(date.day).zfill(2) local_dir = os.path.join(cluster_dir, 'c' + probe, instrument, year) local_fname = 'C' + probe + '_' + product_id + '__' +\ year + month + day + '.cdf' # If we don't have local file download it if not os.path.exists(os.path.join(local_dir, local_fname)): thisstart = datetime.combine(date, time.min) thisend = datetime.combine(date, time.max) try: _download(probe, thisstart, thisend, instrument, product_id) except Exception as err: print(str(err)) continue cdf = pycdf.CDF(os.path.join(local_dir, local_fname)) for key, value in cdfkeys.items(): if value == 'Time': index_key = key break data.append(cdf2df(cdf, index_key, cdfkeys)) if len(data) == 0: raise RuntimeError('No data available to download during requested ' 'times') return timefilter(data, starttime, endtime)
def mitplasma_h0(probe, starttime, endtime): """ Import mit h0 plasma data. Parameters ---------- probe : string Probe number. starttime : datetime Start of interval. endtime : datetime End of interval. Returns ------- data : DataFrame Requested data. """ data = [] dtimes = spacetime.daysplitinterval(starttime, endtime) # Loop through years for dtime in dtimes: date = dtime[0] intervalstring = str(date.year) +\ str(date.month).zfill(2) +\ str(date.day).zfill(2) filename = 'i' + probe + '_h0_mitplasma_' + intervalstring + '_v01.cdf' # Location of file relative to local directory or remote url relative_loc = 'imp' + probe + '/plasma_mit/mitplasma_h0/' +\ str(date.year) local_dir = os.path.join(imp_dir, relative_loc) remote_url = imp_url + relative_loc cdf = helper.load(filename, local_dir, remote_url) keys = { 'EW_flowangle_best': 'EW_flowangle_best', 'EW_flowangle_mom': 'EW_flowangle_best', 'Epoch': 'Time', 'Flow_elevation_thresh': 'Flow_elevation_thresh', 'Flow_elevation_threshsp': 'Flow_elevation_threshsp', 'Region': 'Region', 'V_fit': 'V_fit', 'V_mom': 'V_fit', 'mode': 'mode', 'protonV_thermal_fit': 'protonV_thermal_fit', 'protonV_thermal_mom': 'protonV_thermal_fit', 'proton_density_fit': 'proton_density_fit', 'proton_density_mom': 'proton_density_mom', 'xyzgse': ['x_gse', 'y_gse', 'z_gse'], 'ygsm': 'ygsm', 'zgsm': 'zgsm' } thisdata = helper.cdf2df(cdf, 'Epoch', keys) data.append(thisdata) data = pd.concat(data) data = data[(data['Time'] > starttime) & (data['Time'] < endtime)] return data
def mag_rtn(starttime, endtime): """ Import magnetic field in RTN coordinates from Messenger. Parameters ---------- starttime : datetime Interval start time. endtime : datetime Interval end time. Returns ------- data : DataFrame """ # Directory relative to main WIND data directory relative_dir = 'rtn' keys = {'B_normal': 'Bn', 'B_radial': 'Br', 'B_tangential': 'Bt', 'Epoch': 'Time', 'azimuth_ecliptic': 'sc_Az', 'latitude_ecliptic': 'sc_Lat', 'radialDistance': 'sc_r'} daylist = spacetime.daysplitinterval(starttime, endtime) data = [] for day in daylist: date = day[0] this_relative_dir = os.path.join(relative_dir, str(date.year)) hdffile = 'messenger_mag_rtn_' +\ str(date.year) +\ str(date.month).zfill(2) +\ str(date.day).zfill(2) +\ '_v01.hdf' hdfloc = os.path.join(mess_dir, this_relative_dir, hdffile) # Try to load hdf file if os.path.isfile(hdfloc): df = pd.read_hdf(hdfloc) data.append(df) continue filename = hdffile[:-4] + '.cdf' # Absolute path to local directory for this data file local_dir = os.path.join(mess_dir, this_relative_dir) helper.checkdir(local_dir) remote_url = os.path.join(remote_mess_dir, this_relative_dir) cdf = helper.load(filename, local_dir, remote_url, guessversion=True) df = helper.cdf2df(cdf, index_key='Epoch', keys=keys) if use_hdf: hdffile = filename[:-4] + '.hdf' df.to_hdf(hdfloc, key='data', mode='w') data.append(df) return helper.timefilter(data, starttime, endtime)
def threedp_pm(starttime, endtime): """ Import 'pm' wind data. Parameters ---------- starttime : datetime Interval start time. endtime : datetime Interval end time. Returns ------- data : DataFrame """ # Directory relative to main WIND data directory relative_dir = os.path.join('3dp', '3dp_pm') daylist = spacetime.daysplitinterval(starttime, endtime) data = [] for day in daylist: date = day[0] this_relative_dir = os.path.join(relative_dir, str(day[0].year)) # Absolute path to local directory for this data file local_dir = os.path.join(wind_dir, this_relative_dir) filename = 'wi_pm_3dp_' +\ str(date.year) +\ str(date.month).zfill(2) +\ str(date.day).zfill(2) +\ '_v05.cdf' hdfname = filename[:-4] + 'hdf' hdfloc = os.path.join(local_dir, hdfname) if os.path.isfile(hdfloc): df = pd.read_hdf(hdfloc) data.append(df) continue helper.checkdir(local_dir) remote_url = remote_wind_dir + this_relative_dir cdf = helper.load(filename, local_dir, remote_url, guessversion=True) keys = { 'A_DENS': 'n_a', 'A_TEMP': 'T_a', 'A_VELS': ['va_x', 'va_y', 'va_z'], 'P_DENS': 'n_p', 'P_TEMP': 'T_p', 'P_VELS': ['vp_x', 'vp_y', 'vp_z'], 'Epoch': 'Time' } df = helper.cdf2df(cdf, index_key='Epoch', keys=keys) if use_hdf: df.to_hdf(hdfloc, 'pm', mode='w', format='f') data.append(df) return helper.timefilter(data, starttime, endtime)
def fgm_hires(starttime, endtime): """ Import high resolution fluxgate magnetometer data. Parameters ---------- starttime : datetime Start of interval endtime : datetime End of interval Returns ------- data : DataFrame Requested data """ fgm_options = url_options readargs = { 'names': ['year', 'doy', 'hour', 'minute', 'second', 'Bx', 'By', 'Bz', '|B|'], 'delim_whitespace': True } data = [] dtimes = heliotime.daysplitinterval(starttime, endtime) # Loop through years for dtime in dtimes: date = dtime[0] yearstr = date.strftime('%Y') fgm_options['FILE_NAME'] = ('U' + yearstr[-2:] + date.strftime('%j') + 'SH.ASC') # Local locaiton to download to local_dir = os.path.join(ulysses_dir, 'fgm', 'hires', yearstr) local_file = os.path.join(local_dir, fgm_options['FILE_NAME']) local_hdf = local_file[:-4] + '.hdf' # If we have already saved a hdf file if os.path.exists(local_hdf): thisdata = pd.read_hdf(local_hdf) else: # Put together remote url fgm_options['FILE_PATH'] = '/ufa/HiRes/VHM-FGM/' + yearstr remote_url = ulysses_url for key in fgm_options: remote_url += key + '=' + fgm_options[key] + '&' f = helper.load(fgm_options['FILE_NAME'], local_dir, remote_url) # Read in data thisdata = pd.read_table(f, **readargs) # Process data/time thisdata = _convert_ulysses_time(thisdata) if use_hdf: thisdata.to_hdf(local_hdf, 'fgm_hires') data.append(thisdata) return helper.timefilter(data, starttime, endtime)
def fgm_survey(probe, starttime, endtime): """ Import fgm survey mode data. Parameters ---------- probe : string Probe number, must be 1, 2, 3, or 4 starttime : datetime Interval start time. endtime : datetime Interval end time. Returns ------- data : DataFrame Imported data. """ # Directory relative to main MMS data directory relative_dir = os.path.join('mms' + probe, 'fgm', 'srvy', 'l2') daylist = spacetime.daysplitinterval(starttime, endtime) data = [] for day in daylist: date = day[0] this_relative_dir = os.path.join(relative_dir, str(date.year), str(date.month).zfill(2)) filename = 'mms' + probe + '_fgm_srvy_l2_' +\ str(date.year) +\ str(date.month).zfill(2) +\ str(date.day).zfill(2) +\ '_v4.18.0.cdf' # Absolute path to local directory for this data file local_dir = os.path.join(mms_dir, this_relative_dir) helper.checkdir(local_dir) remote_url = remote_mms_dir + this_relative_dir # Load cdf file cdf = helper.load(filename, local_dir, remote_url) # Convert cdf to dataframe keys = {'mms' + probe + '_fgm_b_gsm_srvy_l2': ['Bx', 'By', 'Bz', 'Br'], 'Epoch': 'Time'} df = helper.cdf2df(cdf, 'Epoch', keys) data.append(df) data = pd.concat(data) data = data[(data['Time'] > starttime) & (data['Time'] < endtime)] return data
def swe_h3(starttime, endtime): """ Import 'h3' solar wind electron data product from WIND. Parameters ---------- starttime : datetime Interval start time. endtime : datetime Interval end time. Returns ------- data : DataFrame """ # Directory relative to main WIND data directory relative_dir = os.path.join('swe', 'swe_h3') daylist = spacetime.daysplitinterval(starttime, endtime) data = [] for day in daylist: date = day[0] filename = 'wi_h3_swe_' +\ str(date.year) +\ str(date.month).zfill(2) +\ str(date.day).zfill(2) +\ '_v01.cdf' this_relative_dir = os.path.join(relative_dir, str(day[0].year)) # Absolute path to local directory for this data file local_dir = os.path.join(wind_dir, this_relative_dir) helper.checkdir(local_dir) remote_url = remote_wind_dir + this_relative_dir cdf = helper.load(filename, local_dir, remote_url) distkeys = [] for i in range(0, 13): distkeys.append('f_pitch_E' + str(i).zfill(2)) anglelabels = [] for i in range(0, 30): anglelabels.append((i + 0.5) * np.pi / 30) timekey = 'Epoch' energykey = 'Ve' df = helper.pitchdist_cdf2df(cdf, distkeys, energykey, timekey, anglelabels) data.append(df) data = pd.concat(data) data = data[(data.index.get_level_values('Time') > starttime) & (data.index.get_level_values('Time') < endtime)] return data
def mfi_h0(starttime, endtime): """ Import 'mfi_h0' magnetic field data product from ACE. This data set has 16 second cadence. Parameters ---------- starttime : datetime Interval start time. endtime : datetime Interval end time. Returns ------- data : DataFrame """ # Directory relative to main WIND data directory relative_dir = os.path.join('mag', 'level_2_cdaweb', 'mfi_h0') daylist = spacetime.daysplitinterval(starttime, endtime) data = [] for day in daylist: date = day[0] filename = 'ac_h0_mfi_' +\ str(date.year) +\ str(date.month).zfill(2) +\ str(date.day).zfill(2) +\ '_v06.cdf' this_relative_dir = os.path.join(relative_dir, str(day[0].year)) # Absolute path to local directory for this data file local_dir = os.path.join(ace_dir, this_relative_dir) helper.checkdir(local_dir) remote_url = remote_ace_dir + this_relative_dir cdf = helper.load(filename, local_dir, remote_url, guessversion=True) keys = { 'BGSEc': ['Bx_gse', 'By_gse', 'Bz_gse'], 'Magnitude': '|B|', 'SC_pos_GSE': ['sc_gse_x', 'sc_gse_y', 'sc_gse_z'], 'Epoch': 'Time' } badvalues = {} df = helper.cdf2df(cdf, index_key='Epoch', keys=keys, badvalues=badvalues) data.append(df) data = pd.concat(data) data = data[(data['Time'] > starttime) & (data['Time'] < endtime)] return data
def mfi_h0(starttime, endtime): """ Import 'mfi_h0' magnetic field data product from WIND. Parameters ---------- starttime : datetime Interval start time. endtime : datetime Interval end time. Returns ------- data : DataFrame """ # Directory relative to main WIND data directory relative_dir = os.path.join('mfi', 'mfi_h0') daylist = spacetime.daysplitinterval(starttime, endtime) data = [] for day in daylist: date = day[0] this_relative_dir = os.path.join(relative_dir, str(day[0].year)) # Absolute path to local directory for this data file local_dir = os.path.join(wind_dir, this_relative_dir) filename = 'wi_h0_mfi_' +\ str(date.year) +\ str(date.month).zfill(2) +\ str(date.day).zfill(2) +\ '_v05.cdf' hdfname = filename[:-4] + 'hdf' hdfloc = os.path.join(local_dir, hdfname) if os.path.isfile(hdfloc): df = pd.read_hdf(hdfloc) data.append(df) continue helper.checkdir(local_dir) remote_url = remote_wind_dir + this_relative_dir cdf = helper.load(filename, local_dir, remote_url, guessversion=True) keys = {'B3GSE': ['Bx_gse', 'By_gse', 'Bz_gse'], 'Epoch3': 'Time'} badvalues = {'Bx_gse': -1e+31, 'By_gse': -1e+31, 'Bz_gse': -1e+31} df = helper.cdf2df(cdf, index_key='Epoch3', keys=keys, badvalues=badvalues) if use_hdf: df.to_hdf(hdfloc, 'mag', mode='w', format='f') data.append(df) return helper.timefilter(data, starttime, endtime)
def mag320ms(probe, startTime, endTime): """ Import 320ms cadence magnetic field data. Parameters ---------- probe : string Probe number. starttime : datetime Start of interval. endtime : datetime End of interval. Returns ------- data : DataFrame Requested data. """ data = [] dtimes = spacetime.daysplitinterval(startTime, endTime) # Loop through years for dtime in dtimes: date = dtime[0] intervalstring = str(date.year) +\ str(date.month).zfill(2) +\ str(date.day).zfill(2) filename = 'i8_320msec_mag_' + intervalstring + '_v01.cdf' # Location of file relative to local directory or remote url relative_loc = 'imp' + probe + '/mag/mag_320msec_cdaweb/' +\ str(date.year) local_dir = os.path.join(imp_dir, relative_loc) remote_url = imp_url + relative_loc cdf = helper.load(filename, local_dir, remote_url) keys = { 'B': '|B|', 'BX': 'Bx', 'BY': 'By', 'BZ': 'Bz', 'Epoch': 'Time' } thisdata = helper.cdf2df(cdf, 'Epoch', keys) data.append(thisdata) data = pd.concat(data) data = data[(data['Time'] > startTime) & (data['Time'] < endTime)] return data
def _download(probe, starttime, endtime, instrument, product_id): if cda_cookie == 'none': raise RuntimeError('Cluster download cookie not set') daylist = daysplitinterval(starttime, endtime) for day in daylist: date = day[0] start = datetime.combine(date, time.min) end = datetime.combine(date, time.max) # Add start and end time to request dictionary request_dict = generic_dict request_dict['START_DATE'] = start.strftime(cda_time_fmt) request_dict['END_DATE'] = end.strftime(cda_time_fmt) # Create request string request_str = '' request_str += 'DATASET_ID' + '=' request_str += 'C' + probe + '_' + product_id for item in request_dict: request_str += '&' request_str += item request_str += '=' request_str += request_dict[item] # Create request url request_str += '&NON_BROWSER' request_url = csa_url + request_str # Work out local directory to download to year = str(starttime.year) month = str(starttime.month).zfill(2) day = str(starttime.day).zfill(2) local_dir = os.path.join(cluster_dir, 'c' + probe, instrument, year) # Work out local filename to download to filename = 'C' + probe + '_' + product_id + '__' + year + month +\ day + '.tar.gz' print(request_url) # Download data checkdir(local_dir) urlretrieve(request_url, filename=os.path.join(local_dir, filename), reporthook=reporthook) # Extract tar.gz file tar = tarfile.open(os.path.join(local_dir, filename)) tar.extractall(local_dir) # Delete tar.gz file os.remove(os.path.join(local_dir, filename)) # The CSA timpstamps the downloaded file by when it is downloaded, # so manually list and retrieve the folder name dirlist = os.listdir(local_dir) for d in dirlist: if d[:13] == 'CSA_Download_': download_dir = os.path.join(local_dir, d, 'C' + probe + '_' + product_id) break # Remove request times from filename dirlist = os.listdir(download_dir) # Move to data folder cutoff = 3 + len(product_id) + 10 for f in dirlist: os.rename(os.path.join(download_dir, f), os.path.join(local_dir, f[:cutoff] + '.cdf')) # Delte extra folders created by tar.gz file os.rmdir(download_dir) os.rmdir(os.path.join(local_dir, d))
def swoops_ions(starttime, endtime): """ Import SWOOPS ion data. Parameters ---------- starttime : datetime Start of interval endtime : datetime End of interval Returns ------- data : DataFrame Requested data """ swoops_options = url_options readargs = { 'names': [ 'year', 'doy', 'hour', 'minute', 'second', 'r', 'hlat', 'hlon', 'n_p', 'n_a', 'T_p_large', 'T_p_small', 'v_r', 'v_t', 'v_n', 'iqual' ], 'delim_whitespace': True } data = [] months_loaded = [] dtimes = heliotime.daysplitinterval(starttime, endtime) # Loop through individual days for dtime in dtimes: thisdate = dtime[0] # Get first day of the month first_day = date(thisdate.year, thisdate.month, 1) # Check if this month's data already loaded if first_day in months_loaded: continue doy = first_day.strftime('%j') swoops_options['FILE_NAME'] = ('u' + first_day.strftime('%y') + doy + 'bam.dat') swoops_options['FILE_PATH'] =\ ('/ufa/stageIngestArea/swoops/ions/bamion' + first_day.strftime('%y') + '.zip_files') # Put together url for this days data remote_url = ulysses_url for key in swoops_options: remote_url += key + '=' + swoops_options[key] + '&' # Local locaiton to download to local_dir = os.path.join(ulysses_dir, 'swoops', 'ions', first_day.strftime('%Y')) # Load data try: f = helper.load(swoops_options['FILE_NAME'], local_dir, remote_url) except HTTPError: print('No SWOOPS ion data available for date %s' % first_day) continue # Read in data thisdata = pd.read_table(f, **readargs) # Process data/time thisdata = _convert_ulysses_time(thisdata) data.append(thisdata) months_loaded.append(first_day) return helper.timefilter(data, starttime, endtime)
def merged(probe, starttime, endtime, verbose=True, try_download=True): """ Read in merged data set Parameters ---------- probe : int, string Helios probe to import data from. Must be 1 or 2. starttime : datetime Interval start time. endtime : datetime Interval end time. verbose : bool If True, print more information as data is loading. Returns ------- data : DataFrame Merged data set. """ probe = _check_probe(probe) startdate = starttime.date() enddate = endtime.date() daylist = spacetime.daysplitinterval(starttime, endtime) data = [] floc = os.path.join(helios_dir, 'helios' + probe, 'merged', 'he' + probe + '_40sec') for day in daylist: this_date = day[0] # Check that data for this day exists if probe == '1': if this_date < date(1974, 12, 12) or this_date > date(1985, 9, 4): continue if probe == '2': if this_date < date(1976, 1, 17) or this_date > date(1980, 3, 8): continue doy = int(this_date.strftime('%j')) year = this_date.year hdfloc = os.path.join( floc, 'H' + probe + str(year - 1900) + '_' + str(doy).zfill(3) + '.h5') # Data not processed yet, try to process and load it if not os.path.isfile(hdfloc): try: data.append( _merged_fromascii(probe, year, doy, try_download=try_download)) if verbose: print(year, doy, 'Processed ascii file') except (FileNotFoundError, URLError) as err: if verbose: print(str(err)) print(year, doy, 'No raw merged data') else: # Load data from already processed file data.append(pd.read_hdf(hdfloc, 'table')) if verbose: print(year, doy) if data == []: fmt = '%d-%m-%Y' raise ValueError('No data to import for probe ' + probe + ' between ' + startdate.strftime(fmt) + ' and ' + enddate.strftime(fmt)) return helper.timefilter(data, starttime, endtime)
def fgm(probe, rate, coords, starttime, endtime): """ Import fgm magnetic field data from THEMIS. Parameters ---------- probe : string Alowed values are [a, b, c, d, e]. rate : string Date rate to return. Allowed values are [e, h, l, s]. coords : string Magnetic field co-ordinate system. Allowed values are [dsl, gse, gsm, ssl]. NOTE: Add link to co-ordinate system descriptions. starttime : datetime Interval start time. endtime : datetime Interval end time. Returns ------- data : DataFrame """ valid_rates = ['e', 'h', 'l', 's'] valid_coords = ['dsl', 'gse', 'gsm', 'ssl'] _validate_probe(probe) if rate not in valid_rates: raise ValueError(('rate argument %s is not in list of allowed' 'rates: %s') % (rate, valid_rates)) if coords not in valid_coords: raise ValueError(('coords argument %s is not in list of allowed' 'co-ordinate systems: %s') % (rate, valid_rates)) # Directory relative to main THEMIS data directory relative_dir = os.path.join('th' + probe, 'l2', 'fgm') daylist = spacetime.daysplitinterval(starttime, endtime) data = [] for day in daylist: date = day[0] this_relative_dir = os.path.join(relative_dir, str(date.year)) filename = 'th' + probe + '_l2_fgm_' +\ str(date.year) +\ str(date.month).zfill(2) +\ str(date.day).zfill(2) +\ '_v01.cdf' # Absolute path to local directory for this data file local_dir = os.path.join(themis_dir, this_relative_dir) helper.checkdir(local_dir) remote_url = remote_themis_dir + this_relative_dir cdf = helper.load(filename, local_dir, remote_url) probestr = 'th' + probe ratestr = '_fg' + rate + '_' keys = { probestr + ratestr + 'btotal': '|B|', probestr + ratestr + coords: ['Bx_' + coords, 'By_' + coords, 'Bz_' + coords], probestr + ratestr + 'time': 'Time' } df = helper.cdf2df(cdf, probestr + ratestr + 'time', keys, dtimeindex=False) df = df.set_index(pd.to_datetime(df.index.values, unit='s')) df['Time'] = df.index.values data.append(df) data = pd.concat(data) data = data[(data['Time'] > starttime) & (data['Time'] < endtime)] return data
def fpi_dis_moms(probe, mode, starttime, endtime): """ Import fpi distribution moment data. Parameters ---------- probe : string Probe number, must be 1, 2, 3, or 4 mode : string Data mode, must be 'fast' or 'brst' starttime : datetime Interval start time. endtime : datetime Interval end time. Returns ------- data : DataFrame Imported data. """ valid_modes = ['fast', 'brst'] if mode not in valid_modes: raise RuntimeError('Mode must be either fast or brst') # Directory relative to main MMS data directory relative_dir = os.path.join('mms' + probe, 'fpi', mode, 'l2', 'dis-moms') daylist = spacetime.daysplitinterval(starttime, endtime) data = [] for day in daylist: date = day[0] starthour = day[1].hour endhour = day[2].hour + 1 # fips fast data product has files every two hours, so get nearest two # hour stamps starthour -= np.mod(starthour, 2) endhour += np.mod(endhour, 2) for h in range(starthour, endhour, 2): this_relative_dir = os.path.join(relative_dir, str(date.year), str(date.month).zfill(2)) filename = 'mms' + probe + '_fpi_' + mode + '_l2_dis-moms_' +\ str(date.year) +\ str(date.month).zfill(2) +\ str(date.day).zfill(2) +\ str(h).zfill(2) + '0000' + \ '_v3.1.1.cdf' # Absolute path to local directory for this data file local_dir = os.path.join(mms_dir, this_relative_dir) helper.checkdir(local_dir) remote_url = remote_mms_dir + this_relative_dir # Load cdf file try: cdf = helper.load(filename, local_dir, remote_url) except urllib.error.HTTPError as e: if str(e) == 'HTTP Error 404: Not Found': print('No data available for hours', str(h) + '-' + str(h + 2), 'on', date.strftime('%d/%m/%Y')) continue else: raise probestr = 'mms' + probe + '_' # Convert cdf to dataframe keys = {'Epoch': 'Time', probestr + 'dis_bulkv_gse_fast': ['bulkv_x', 'bulkv_y', 'bulkv_z'], probestr + 'dis_heatq_gse_fast': ['heatq_x', 'heatq_y', 'heatq_z'], probestr + 'dis_numberdensity_fast': 'n', probestr + 'dis_temppara_fast': 'T_par', probestr + 'dis_tempperp_fast': 'T_perp'} df = helper.cdf2df(cdf, 'Epoch', keys) data.append(df) data = pd.concat(data) data = data[(data['Time'] > starttime) & (data['Time'] < endtime)] return data