def _load(probe, starttime, endtime, instrument, product_id, cdfkeys): daylist = daysplitinterval(starttime, endtime) data = [] for day in daylist: date = day[0] year = str(date.year) month = str(date.month).zfill(2) day = str(date.day).zfill(2) local_dir = os.path.join(cluster_dir, 'c' + probe, instrument, year) local_fname = 'C' + probe + '_' + product_id + '__' +\ year + month + day + '.cdf' # If we don't have local file download it if not os.path.exists(os.path.join(local_dir, local_fname)): thisstart = datetime.combine(date, time.min) thisend = datetime.combine(date, time.max) try: _download(probe, thisstart, thisend, instrument, product_id) except Exception as err: print(str(err)) continue cdf = pycdf.CDF(os.path.join(local_dir, local_fname)) for key, value in cdfkeys.items(): if value == 'Time': index_key = key break data.append(cdf2df(cdf, index_key, cdfkeys)) if len(data) == 0: raise RuntimeError('No data available to download during requested ' 'times') return timefilter(data, starttime, endtime)
def distparams(probe, starttime, endtime): """ Read in distribution parameters found in the header of distribution files. Parameters ---------- probe : int Helios probe to import data from. Must be 1 or 2. starttime : datetime Start of interval endtime : datetime End of interval Returns ------- distinfo : Series Infromation stored in the top of distribution function files. """ extensions = ['hdm.0', 'hdm.1', 'ndm.0', 'ndm.1'] paramlist = [] # Loop through each day while starttime < endtime: year = starttime.year doy = starttime.strftime('%j') # Directory for today's distribution files dist_dir = _dist_file_dir(probe, year, doy) # Locaiton of hdf file to save to/load from hdffile = 'h' + probe + str(year) + str(doy).zfill(3) +\ 'distparams.hdf' hdffile = os.path.join(dist_dir, hdffile) if os.path.isfile(hdffile): todays_params = pd.read_hdf(hdffile) else: todays_params = [] # Get every distribution function file present for this day for f in os.listdir(dist_dir): path = os.path.join(dist_dir, f) # Check for distribution function if path[-5:] in extensions: # year = int(path[-21:-19]) + 1900 # doy = int(path[-18:-15]) hour = int(path[-14:-12]) minute = int(path[-11:-9]) second = int(path[-8:-6]) p = distparams_single(probe, year, doy, hour, minute, second) todays_params.append(p) todays_params = pd.concat(todays_params, ignore_index=True, axis=1).T todays_params = todays_params.set_index('Time', drop=False) if use_hdf: todays_params.to_hdf(hdffile, key='distparams', mode='w') paramlist.append(todays_params) starttime += timedelta(days=1) return helper.timefilter(paramlist, starttime, endtime)
def trajectory(probe, starttime, endtime): """ Read in trajectory data. Parameters ---------- probe : int, string Helios probe to import data from. Must be 1 or 2. startdate : datetime Interval start date. enddate : datetime Interval end date. Returns ------- data : DataFrame Trajectory data set. """ probe = _check_probe(probe) data = [] headings = [ 'Year', 'doy', 'Hour', 'Carrrot', 'r', 'selat', 'selon', 'hellat', 'hellon', 'hilon', 'escang', 'code' ] colspecs = [(0, 3), (4, 7), (8, 10), (11, 15), (16, 22), (23, 30), (31, 37), (38, 44), (45, 51), (52, 58), (59, 65), (66, 67)] # Loop through years for i in range(starttime.year, endtime.year + 1): floc = os.path.join(helios_dir, 'helios' + probe, 'traj') fname = 'he' + probe + 'trj' + str(i - 1900) + '.asc' # Read in data try: thisdata = pd.read_fwf(os.path.join(floc, fname), names=headings, header=None, colspecs=colspecs) except OSError: continue thisdata['Year'] += 1900 # Convert date info to datetime thisdata['Time'] = pd.to_datetime(thisdata['Year'], format='%Y') + \ pd.to_timedelta(thisdata['doy'] - 1, unit='d') + \ pd.to_timedelta(thisdata['Hour'], unit='h') thisdata['ordinal'] = dtime2ordinal(thisdata['Time']) # Calculate cartesian positions thisdata['x'] = thisdata['r'] * np.cos(thisdata['selat']) *\ np.cos(thisdata['selon']) thisdata['y'] = thisdata['r'] * np.cos(thisdata['selat']) *\ np.sin(thisdata['selon']) thisdata['z'] = thisdata['r'] * np.sin(thisdata['selat']) thisdata = thisdata.drop(['Year', 'doy', 'Hour'], axis=1) data.append(thisdata) return helper.timefilter(data, starttime, endtime)
def mag_rtn(starttime, endtime): """ Import magnetic field in RTN coordinates from Messenger. Parameters ---------- starttime : datetime Interval start time. endtime : datetime Interval end time. Returns ------- data : DataFrame """ # Directory relative to main WIND data directory relative_dir = 'rtn' keys = {'B_normal': 'Bn', 'B_radial': 'Br', 'B_tangential': 'Bt', 'Epoch': 'Time', 'azimuth_ecliptic': 'sc_Az', 'latitude_ecliptic': 'sc_Lat', 'radialDistance': 'sc_r'} daylist = spacetime.daysplitinterval(starttime, endtime) data = [] for day in daylist: date = day[0] this_relative_dir = os.path.join(relative_dir, str(date.year)) hdffile = 'messenger_mag_rtn_' +\ str(date.year) +\ str(date.month).zfill(2) +\ str(date.day).zfill(2) +\ '_v01.hdf' hdfloc = os.path.join(mess_dir, this_relative_dir, hdffile) # Try to load hdf file if os.path.isfile(hdfloc): df = pd.read_hdf(hdfloc) data.append(df) continue filename = hdffile[:-4] + '.cdf' # Absolute path to local directory for this data file local_dir = os.path.join(mess_dir, this_relative_dir) helper.checkdir(local_dir) remote_url = os.path.join(remote_mess_dir, this_relative_dir) cdf = helper.load(filename, local_dir, remote_url, guessversion=True) df = helper.cdf2df(cdf, index_key='Epoch', keys=keys) if use_hdf: hdffile = filename[:-4] + '.hdf' df.to_hdf(hdfloc, key='data', mode='w') data.append(df) return helper.timefilter(data, starttime, endtime)
def mag_ness(probe, starttime, endtime, verbose=True): """ Read in 6 second magnetic field data. Parameters ---------- probe : int, string Helios probe to import data from. Must be 1 or 2. starttime : datetime Interval start time. endtime : datetime Interval end time. verbose : bool, optional If ``True``, print more information as data is loading. Default is ``True``. Returns ------- data : DataFrame 6 second magnetic field data set """ probe = _check_probe(probe) startdate = starttime.date() enddate = endtime.date() data = [] # Loop through years for year in range(startdate.year, enddate.year + 1): floc = os.path.join(helios_dir, 'helios' + probe, 'mag', '6sec_ness', str(year)) # Calculate start day startdoy = 1 if year == startdate.year: startdoy = int(startdate.strftime('%j')) # Calculate end day enddoy = 366 if year == enddate.year: enddoy = int(enddate.strftime('%j')) # Loop through days of year for doy in range(startdoy, enddoy + 1): hdfloc = os.path.join( floc, 'h' + probe + str(year - 1900) + str(doy).zfill(3) + '.h5') if os.path.isfile(hdfloc): # Load data from already processed file data.append(pd.read_hdf(hdfloc, 'table')) continue # Data not processed yet, try to process and load it try: data.append(_mag_ness_fromascii(probe, year, doy)) except ValueError: if verbose: print(year, doy, 'No raw mag data') return helper.timefilter(data, starttime, endtime)
def threedp_pm(starttime, endtime): """ Import 'pm' wind data. Parameters ---------- starttime : datetime Interval start time. endtime : datetime Interval end time. Returns ------- data : DataFrame """ # Directory relative to main WIND data directory relative_dir = os.path.join('3dp', '3dp_pm') daylist = spacetime.daysplitinterval(starttime, endtime) data = [] for day in daylist: date = day[0] this_relative_dir = os.path.join(relative_dir, str(day[0].year)) # Absolute path to local directory for this data file local_dir = os.path.join(wind_dir, this_relative_dir) filename = 'wi_pm_3dp_' +\ str(date.year) +\ str(date.month).zfill(2) +\ str(date.day).zfill(2) +\ '_v05.cdf' hdfname = filename[:-4] + 'hdf' hdfloc = os.path.join(local_dir, hdfname) if os.path.isfile(hdfloc): df = pd.read_hdf(hdfloc) data.append(df) continue helper.checkdir(local_dir) remote_url = remote_wind_dir + this_relative_dir cdf = helper.load(filename, local_dir, remote_url, guessversion=True) keys = { 'A_DENS': 'n_a', 'A_TEMP': 'T_a', 'A_VELS': ['va_x', 'va_y', 'va_z'], 'P_DENS': 'n_p', 'P_TEMP': 'T_p', 'P_VELS': ['vp_x', 'vp_y', 'vp_z'], 'Epoch': 'Time' } df = helper.cdf2df(cdf, index_key='Epoch', keys=keys) if use_hdf: df.to_hdf(hdfloc, 'pm', mode='w', format='f') data.append(df) return helper.timefilter(data, starttime, endtime)
def fgm_hires(starttime, endtime): """ Import high resolution fluxgate magnetometer data. Parameters ---------- starttime : datetime Start of interval endtime : datetime End of interval Returns ------- data : DataFrame Requested data """ fgm_options = url_options readargs = { 'names': ['year', 'doy', 'hour', 'minute', 'second', 'Bx', 'By', 'Bz', '|B|'], 'delim_whitespace': True } data = [] dtimes = heliotime.daysplitinterval(starttime, endtime) # Loop through years for dtime in dtimes: date = dtime[0] yearstr = date.strftime('%Y') fgm_options['FILE_NAME'] = ('U' + yearstr[-2:] + date.strftime('%j') + 'SH.ASC') # Local locaiton to download to local_dir = os.path.join(ulysses_dir, 'fgm', 'hires', yearstr) local_file = os.path.join(local_dir, fgm_options['FILE_NAME']) local_hdf = local_file[:-4] + '.hdf' # If we have already saved a hdf file if os.path.exists(local_hdf): thisdata = pd.read_hdf(local_hdf) else: # Put together remote url fgm_options['FILE_PATH'] = '/ufa/HiRes/VHM-FGM/' + yearstr remote_url = ulysses_url for key in fgm_options: remote_url += key + '=' + fgm_options[key] + '&' f = helper.load(fgm_options['FILE_NAME'], local_dir, remote_url) # Read in data thisdata = pd.read_table(f, **readargs) # Process data/time thisdata = _convert_ulysses_time(thisdata) if use_hdf: thisdata.to_hdf(local_hdf, 'fgm_hires') data.append(thisdata) return helper.timefilter(data, starttime, endtime)
def mfi_h0(starttime, endtime): """ Import 'mfi_h0' magnetic field data product from WIND. Parameters ---------- starttime : datetime Interval start time. endtime : datetime Interval end time. Returns ------- data : DataFrame """ # Directory relative to main WIND data directory relative_dir = os.path.join('mfi', 'mfi_h0') daylist = spacetime.daysplitinterval(starttime, endtime) data = [] for day in daylist: date = day[0] this_relative_dir = os.path.join(relative_dir, str(day[0].year)) # Absolute path to local directory for this data file local_dir = os.path.join(wind_dir, this_relative_dir) filename = 'wi_h0_mfi_' +\ str(date.year) +\ str(date.month).zfill(2) +\ str(date.day).zfill(2) +\ '_v05.cdf' hdfname = filename[:-4] + 'hdf' hdfloc = os.path.join(local_dir, hdfname) if os.path.isfile(hdfloc): df = pd.read_hdf(hdfloc) data.append(df) continue helper.checkdir(local_dir) remote_url = remote_wind_dir + this_relative_dir cdf = helper.load(filename, local_dir, remote_url, guessversion=True) keys = {'B3GSE': ['Bx_gse', 'By_gse', 'Bz_gse'], 'Epoch3': 'Time'} badvalues = {'Bx_gse': -1e+31, 'By_gse': -1e+31, 'Bz_gse': -1e+31} df = helper.cdf2df(cdf, index_key='Epoch3', keys=keys, badvalues=badvalues) if use_hdf: df.to_hdf(hdfloc, 'mag', mode='w', format='f') data.append(df) return helper.timefilter(data, starttime, endtime)
def swoops_ions(starttime, endtime): """ Import SWOOPS ion data. Parameters ---------- starttime : datetime Start of interval endtime : datetime End of interval Returns ------- data : DataFrame Requested data """ swoops_options = url_options readargs = { 'names': [ 'year', 'doy', 'hour', 'minute', 'second', 'r', 'hlat', 'hlon', 'n_p', 'n_a', 'T_p_large', 'T_p_small', 'v_r', 'v_t', 'v_n', 'iqual' ], 'delim_whitespace': True } data = [] months_loaded = [] dtimes = heliotime.daysplitinterval(starttime, endtime) # Loop through individual days for dtime in dtimes: thisdate = dtime[0] # Get first day of the month first_day = date(thisdate.year, thisdate.month, 1) # Check if this month's data already loaded if first_day in months_loaded: continue doy = first_day.strftime('%j') swoops_options['FILE_NAME'] = ('u' + first_day.strftime('%y') + doy + 'bam.dat') swoops_options['FILE_PATH'] =\ ('/ufa/stageIngestArea/swoops/ions/bamion' + first_day.strftime('%y') + '.zip_files') # Put together url for this days data remote_url = ulysses_url for key in swoops_options: remote_url += key + '=' + swoops_options[key] + '&' # Local locaiton to download to local_dir = os.path.join(ulysses_dir, 'swoops', 'ions', first_day.strftime('%Y')) # Load data try: f = helper.load(swoops_options['FILE_NAME'], local_dir, remote_url) except HTTPError: print('No SWOOPS ion data available for date %s' % first_day) continue # Read in data thisdata = pd.read_table(f, **readargs) # Process data/time thisdata = _convert_ulysses_time(thisdata) data.append(thisdata) months_loaded.append(first_day) return helper.timefilter(data, starttime, endtime)
def merged(probe, starttime, endtime, verbose=True, try_download=True): """ Read in merged data set Parameters ---------- probe : int, string Helios probe to import data from. Must be 1 or 2. starttime : datetime Interval start time. endtime : datetime Interval end time. verbose : bool If True, print more information as data is loading. Returns ------- data : DataFrame Merged data set. """ probe = _check_probe(probe) startdate = starttime.date() enddate = endtime.date() daylist = spacetime.daysplitinterval(starttime, endtime) data = [] floc = os.path.join(helios_dir, 'helios' + probe, 'merged', 'he' + probe + '_40sec') for day in daylist: this_date = day[0] # Check that data for this day exists if probe == '1': if this_date < date(1974, 12, 12) or this_date > date(1985, 9, 4): continue if probe == '2': if this_date < date(1976, 1, 17) or this_date > date(1980, 3, 8): continue doy = int(this_date.strftime('%j')) year = this_date.year hdfloc = os.path.join( floc, 'H' + probe + str(year - 1900) + '_' + str(doy).zfill(3) + '.h5') # Data not processed yet, try to process and load it if not os.path.isfile(hdfloc): try: data.append( _merged_fromascii(probe, year, doy, try_download=try_download)) if verbose: print(year, doy, 'Processed ascii file') except (FileNotFoundError, URLError) as err: if verbose: print(str(err)) print(year, doy, 'No raw merged data') else: # Load data from already processed file data.append(pd.read_hdf(hdfloc, 'table')) if verbose: print(year, doy) if data == []: fmt = '%d-%m-%Y' raise ValueError('No data to import for probe ' + probe + ' between ' + startdate.strftime(fmt) + ' and ' + enddate.strftime(fmt)) return helper.timefilter(data, starttime, endtime)
def ion_dists(probe, starttime, endtime, remove_advect=False): """ Read in distribution parameters found in the header of distribution files. Parameters ---------- probe : int Helios probe to import data from. Must be 1 or 2. starttime : datetime Start of interval endtime : datetime End of interval remove_advect : bool, optional If *False*, the distribution is returned in the spacecraft frame. If *True*, the distribution is returned in the solar wind frame, by subtracting the spacecraft velocity from the velcoity of each bin. Note this significantly slows down reading in the distribution. Returns ------- distinfo : Series Infromation stored in the top of distribution function files. """ extensions = ['hdm.0', 'hdm.1', 'ndm.0', 'ndm.1'] distlist = [] # Loop through each day while starttime < endtime: year = starttime.year doy = starttime.strftime('%j') # Directory for today's distribution files dist_dir = _dist_file_dir(probe, year, doy) # Locaiton of hdf file to save to/load from hdffile = 'h' + probe + str(year) + str(doy).zfill(3) +\ 'ion_dists.hdf' hdffile = os.path.join(dist_dir, hdffile) if os.path.isfile(hdffile): todays_dist = pd.read_hdf(hdffile) else: todays_dist = [] # Get every distribution function file present for this day for f in os.listdir(dist_dir): path = os.path.join(dist_dir, f) # Check for distribution function if path[-5:] in extensions: # year = int(path[-21:-19]) + 1900 # doy = int(path[-18:-15]) hour = int(path[-14:-12]) minute = int(path[-11:-9]) second = int(path[-8:-6]) try: d = ion_dist_single(probe, year, doy, hour, minute, second) except RuntimeError as err: strerr = 'No ion distribution function data in file' if str(err) == strerr: continue raise err t = datetime.combine(starttime.date(), time(hour, minute, second)) d['Time'] = t todays_dist.append(d) todays_dist = pd.concat(todays_dist) todays_dist = todays_dist.set_index('Time', append=True) if use_hdf: todays_dist.to_hdf(hdffile, key='ion_dist', mode='w') distlist.append(todays_dist) starttime += timedelta(days=1) # The while loop will only stop after we have overshot starttime -= timedelta(days=1) return helper.timefilter(distlist, starttime, endtime)