def load(cosmicFiles, tag=None, sat_id=None, altitude_bin=None): """ cosmic data load routine, called by pysat """ num = len(cosmicFiles) # make sure there are files to read if num != 0: # call separate load_files routine, segemented for possible # multiprocessor load, not included and only benefits about 20% output = pysat.DataFrame( load_files(cosmicFiles, tag=tag, sat_id=sat_id, altitude_bin=altitude_bin)) output.index = pysat.utils.create_datetime_index( year=output.year, month=output.month, day=output.day, uts=output.hour * 3600. + output.minute * 60. + output.second) # make sure UTS strictly increasing output.sort_index(inplace=True) # use the first available file to pick out meta information profile_meta = pysat.Meta() meta = pysat.Meta() ind = 0 repeat = True while repeat: try: data = netcdf_file(cosmicFiles[ind], mode='r', mmap=False) keys = data.variables.keys() for key in keys: profile_meta[key] = { 'units': data.variables[key].units, 'long_name': data.variables[key].long_name } # ncattrsList = data.ncattrs() ncattrsList = data._attributes.keys() for d in ncattrsList: meta[d] = {'units': '', 'long_name': d} repeat = False except RuntimeError: # file was empty, try the next one by incrementing ind ind += 1 meta['profiles'] = profile_meta return output, meta else: # no data return pysat.DataFrame(None), pysat.Meta()
def load(fnames, tag=None, sat_id=None): # create an artifical satellite data set parts = fnames[0].split('/') yr = int('20' + parts[-1][0:2]) month = int(parts[-3]) day = int(parts[-2]) date = pysat.datetime(yr, month, day) num = 86400 #int(tag) uts = np.arange(num) data = pysat.DataFrame(uts, columns=['uts']) # need to create simple orbits here. Have start of first orbit # at 2009,1, 0 UT. 14.84 orbits per day time_delta = date - pysat.datetime(2009, 1, 1) uts_root = np.mod(time_delta.total_seconds(), 5820) mlt = np.mod(uts_root + np.arange(num), 5820) * (24. / 5820.) data['mlt'] = mlt # do slt, 20 second offset from mlt uts_root = np.mod(time_delta.total_seconds() + 20, 5820) data['slt'] = np.mod(uts_root + np.arange(num), 5820) * (24. / 5820.) index = pds.date_range(date, date + pds.DateOffset(hours=23, minutes=59, seconds=59), freq='S') data.index = index data.index.name = 'time' return data, meta.copy()
def custom1(inst): out = pysat.DataFrame({'doubleMLT': inst.data.mlt * 2, 'tripleMLT': inst.data.mlt * 3}, index=inst.index) return {'data': out, 'long_name': ['doubleMLTlong', 'tripleMLTlong'], 'units': ['hours1', 'hours2']}
def custom1(inst): out = pysat.DataFrame( { 'doubleMLT': inst.data.mlt * 2, 'tripleMLT': inst.data.mlt * 3 }, index=inst.index) return out
def load(fnames, tag=None, sat_id=None, fake_daily_files_from_monthly=False, flatten_twod=True): """Load NASA CDAWeb CDF files Parameters ------------ fnames : (pandas.Series) Series of filenames tag : (str or NoneType) tag or None (default=None) sat_id : (str or NoneType) satellite id or None (default=None) fake_daily_files_from_monthly : bool Some CDAWeb instrument data files are stored by month, interfering with pysat's functionality of loading by day. This flag, when true, parses of daily dates to monthly files that were added internally by the list_files routine, when flagged. These dates are used here to provide data by day. Returns --------- data : (pandas.DataFrame) Object containing satellite data meta : (pysat.Meta) Object containing metadata such as column names and units """ import pysatCDF if len(fnames) <= 0: return pysat.DataFrame(None), None else: # going to use pysatCDF to load the CDF and format # data and metadata for pysat using some assumptions. # Depending upon your needs the resulting pandas DataFrame may # need modification # currently only loads one file, which handles more situations via pysat # than you may initially think if fake_daily_files_from_monthly: # parse out date from filename fname = fnames[0][0:-11] date = pysat.datetime.strptime(fnames[0][-10:], '%Y-%m-%d') with pysatCDF.CDF(fname) as cdf: # convert data to pysat format data, meta = cdf.to_pysat(flatten_twod=flatten_twod) # select data from monthly data = data.ix[date:date + pds.DateOffset(days=1) - pds.DateOffset(microseconds=1), :] return data, meta else: # basic data return with pysatCDF.CDF(fnames[0]) as cdf: return cdf.to_pysat(flatten_twod=flatten_twod)
def load_files(files, tag=None, sat_id=None, altitude_bin=None): '''Loads a list of COSMIC data files, supplied by user. Returns a list of dicts, a dict for each file. ''' output = [None] * len(files) drop_idx = [] for (i, file) in enumerate(files): try: #data = netCDF4.Dataset(file) data = netcdf_file(file, mode='r', mmap=False) # build up dictionary will all ncattrs new = {} # get list of file attributes #ncattrsList = data.ncattrs() ncattrsList = data._attributes.keys() for d in ncattrsList: new[d] = data._attributes[d] #data.getncattr(d) # load all of the variables in the netCDF loadedVars = {} keys = data.variables.keys() for key in keys: if data.variables[key][:].dtype.byteorder != '=': loadedVars[key] = data.variables[key][:].byteswap( ).newbyteorder() else: loadedVars[key] = data.variables[key][:] new['profiles'] = pysat.DataFrame(loadedVars) output[i] = new data.close() except RuntimeError: # some of the files have zero bytes, which causes a read error # this stores the index of these zero byte files so I can drop # the Nones the gappy file leaves behind drop_idx.append(i) # drop anything that came from the zero byte files drop_idx.reverse() for i in drop_idx: del output[i] if tag == 'ionprf': if altitude_bin is not None: for out in output: out['profiles'].index = ( out['profiles']['MSL_alt'] / altitude_bin).round().values * altitude_bin out['profiles'] = out['profiles'].groupby( out['profiles'].index.values).mean() else: for out in output: out['profiles'].index = out['profiles']['MSL_alt'] return output
def load(fnames, tag='survey', sat_id=''): """ Load DEMETER IAP data Parameters ---------- fnames : (list) List of file names tag : (string) Denotes type of file to load. Accepted types are 'survey'; 'burst' will be added in the future. (default='survey') sat_id : (string or NoneType) Specifies the satellite ID for a constellation. Not used. (default='') Returns ------- data : (pds.DataFrame) DataFrame of DEMETER satellite data meta : Metadata object """ if len(fnames) == 0: print('need list of filenames') return pysat.DataFrame(None), None # Load the desired data and cast as a DataFrame data = list() for fname in fnames: fdata, fmeta = demeter_methods.load_binary_file( fname, load_experiment_data) data.extend(fdata) data = np.vstack(data) data = pysat.DataFrame(data, index=data[:, 3], columns=fmeta['data names']) # Assign metadata if len(data.columns) > 0: meta = demeter_methods.set_metadata(name, fmeta) else: meta = pysat.Meta(None) return data, meta
def load_orig(fnames, tag=None): import pydarn if len(fnames) <= 0: return pysat.DataFrame(None), pysat.Meta(None) elif len(fnames) == 1: b = pydarn.sdio.sdDataOpen(pysat.datetime(1980, 1, 1), src='local', eTime=pysat.datetime(2050, 1, 1), fileName=fnames[0]) data_list = pydarn.sdio.sdDataReadAll(b) sys.stdout.flush() in_dict = [] for info in data_list: arr = np.arange(len(info.stid)) drift_frame = pds.DataFrame( info.vector.__dict__, #index=[info.vector.mlon, info.vector.mlat]) index=info.vector.index) drift_frame.index.name = 'index' drift_frame.sort(inplace=True) #drift_frame.index.names=['mlon', 'mlat'] for i in arr: nvec = info.nvec[i] in_frame = drift_frame.iloc[0:nvec] drift_frame = drift_frame.iloc[nvec:] in_dict.append({ 'stid': info.stid[i], 'channel': info.channel[i], 'noisemean': info.noisemean[i], 'noisesd': info.noisesd[i], 'gsct': info.gsct[i], 'nvec': info.nvec[i], 'pmax': info.pmax[i], 'vector': in_frame, 'start_time': info.sTime, 'end_time': info.eTime, 'vemax': info.vemax[i], 'vemin': info.vemin[i], 'pmin': info.pmin[i], 'programid': info.programid[i], 'wmax': info.wmax[i], 'wmin': info.wmin[i], 'freq': info.freq[i] }) output = pds.DataFrame(in_dict) output.index = output.start_time output.drop('start_time', axis=1, inplace=True) return output, pysat.Meta() else: raise ValueError('Only one filename currently supported.')
def load(fnames, tag=None, sat_id=None): import pysatCDF if len(fnames) <= 0 : return pysat.DataFrame(None), None else: # pull out date appended to filename fname = fnames[0][0:-11] date = pysat.datetime.strptime(fnames[0][-10:], '%Y-%m-%d') with pysatCDF.CDF(fname) as cdf: data, meta = cdf.to_pysat() # pick out data for date data = data.ix[date:date+pds.DateOffset(days=1) - pds.DateOffset(microseconds=1)] return data, meta
def load(fnames, tag=None, inst_id=None): """Load CHAIN GPS Files Parameters ---------- fnames : (list or array-like) series of filenames to be loaded tag : (string or NoneType) Denotes type of file to load. (default=None) inst_id : (string or NoneType) Specifies the satellite ID for a constellation. Not used. (default=None) """ if not fnames: print('fnames are go') warnings.warn('You no got no fname, you no get no data') return pysat.DataFrame(None), pysat.Meta(None) elif len(fnames) == 1: print('fnames are greater than 1') meta = pysat.Meta() signal_meta = pysat.Meta() # load the rinex data = gr.load(fnames[0]) # get the metadata from the xarray.Dataset xr_attrs = data.attrs # format the metadata # for at in xr_attrs: # print(type(at)) # meta[at] = {'units': '', 'long_name': at} # keys = data.data_vars.keys() # for key in keys: # # find the xarray equivalents of units and long_name # print(key) # signal_meta[key] = {meta.labels.units: var_units[key][0], # meta.labels.name: var_units[key][1]} # # format the data # meta['signals'] = signal_meta # output = data.variables return data, meta else: raise ValueError('Only one filename currently supported') print('no idea wahts goin on')
def load(fnames, tag=None, sat_id=None): # create an artifical satellite data set parts = fnames[0].split('/') yr = int('20' + parts[-1][0:2]) month = int(parts[-3]) day = int(parts[-2]) date = pysat.datetime(yr, month, day) num = 864 #int(tag) uts = np.arange(num) data = pysat.DataFrame(uts, columns=['uts']) # need to create simple orbits here. Have start of first orbit # at 2009,1, 0 UT. 14.84 orbits per day time_delta = date - pysat.datetime(2009, 1, 1) uts_root = np.mod(time_delta.total_seconds(), 5820) mlt = np.mod(uts_root + np.arange(num), 5820) * (24. / 5820.) data['mlt'] = mlt # do slt, 20 second offset from mlt uts_root = np.mod(time_delta.total_seconds() + 20, 5820) data['slt'] = np.mod(uts_root + np.arange(num), 5820) * (24. / 5820.) index = pds.date_range(date, date + pds.DateOffset(hours=23, minutes=59, seconds=59), freq='100S') data.index = index data.index.name = 'epoch' profiles = [] frame = pds.DataFrame( { 'density': data.ix[0:50, 'mlt'].values.copy(), 'dummy_str': ['test'] * 50, 'dummy_ustr': [u'test'] * 50 }, index=data.index[0:50], columns=['density', 'dummy_str', 'dummy_ustr']) for time in data.index: profiles.append(frame) data['profiles'] = pds.Series(profiles, index=data.index) return data, meta.copy()
def load_files(files, tag=None, sat_id=None): '''Loads a list of COSMIC data files, supplied by user. Returns a list of dicts, a dict for each file. ''' output = [None] * len(files) drop_idx = [] for (i, file) in enumerate(files): try: data = netCDF4.Dataset(file) # build up dictionary will all ncattrs new = {} # get list of file attributes ncattrsList = data.ncattrs() for d in ncattrsList: new[d] = data.getncattr(d) # load all of the variables in the netCDF loadedVars = {} keys = data.variables.keys() for key in keys: loadedVars[key] = data.variables[key][:] new['profiles'] = pysat.DataFrame(loadedVars) if tag == 'ionprf': new['profiles'].index = new['profiles']['MSL_alt'] output[i] = new data.close() except RuntimeError: # some of the S4 files have zero bytes, which causes a read error # this stores the index of these zero byte files so I can drop # the Nones the gappy file leaves behind drop_idx.append(i) # drop anything that came from the zero byte files drop_idx.reverse() for i in drop_idx: del output[i] return output
def load(fnames, tag=None, sat_id=None, sim_multi_file_right=False, sim_multi_file_left=False, root_date=None, file_date_range=None, malformed_index=False, mangle_file_dates=False): """ Loads the test files Parameters ---------- fnames : list List of filenames tag : str or NoneType Instrument tag (accepts '') sat_id : str or NoneType Instrument satellite ID (accepts '' or a number (i.e., '10'), which specifies the number of data points to include in the test instrument) sim_multi_file_right : boolean Adjusts date range to be 12 hours in the future or twelve hours beyond root_date (default=False) sim_multi_file_left : boolean Adjusts date range to be 12 hours in the past or twelve hours before root_date (default=False) root_date : NoneType Optional central date, uses _test_dates if not specified. (default=None) file_date_range : pds.date_range or NoneType Range of dates for files or None, if this optional arguement is not used. Shift actually performed by the init function. (default=None) malformed_index : boolean If True, time index will be non-unique and non-monotonic. mangle_file_dates : bool If True, the loaded file list time index is shifted by 5-minutes. This shift is actually performed by the init function. Returns ------- data : (pds.DataFrame) Testing data meta : (pysat.Meta) Metadataxs """ # create an artifical satellite data set iperiod = mm_test.define_period() drange = mm_test.define_range() uts, index, date = mm_test.generate_times(fnames, sat_id, freq='1S') # Specify the date tag locally and determine the desired date range pds_offset = pds.DateOffset(hours=12) if sim_multi_file_right: root_date = root_date or _test_dates[''][''] + pds_offset elif sim_multi_file_left: root_date = root_date or _test_dates[''][''] - pds_offset else: root_date = root_date or _test_dates[''][''] data = pysat.DataFrame(uts, columns=['uts']) # need to create simple orbits here. Have start of first orbit default # to 1 Jan 2009, 00:00 UT. 14.84 orbits per day time_delta = date - root_date data['mlt'] = mm_test.generate_fake_data(time_delta.total_seconds(), uts, period=iperiod['lt'], data_range=drange['lt']) # do slt, 20 second offset from mlt data['slt'] = mm_test.generate_fake_data(time_delta.total_seconds() + 20, uts, period=iperiod['lt'], data_range=drange['lt']) # create a fake longitude, resets every 6240 seconds # sat moves at 360/5820 deg/s, Earth rotates at 360/86400, takes extra time # to go around full longitude data['longitude'] = mm_test.generate_fake_data(time_delta.total_seconds(), uts, period=iperiod['lon'], data_range=drange['lon']) # create latitude area for testing polar orbits angle = mm_test.generate_fake_data(time_delta.total_seconds(), uts, period=iperiod['angle'], data_range=drange['angle']) data['latitude'] = 90.0 * np.cos(angle) # fake orbit number fake_delta = date - (_test_dates[''][''] - pds.DateOffset(years=1)) data['orbit_num'] = mm_test.generate_fake_data(fake_delta.total_seconds(), uts, period=iperiod['lt'], cyclic=False) # create some fake data to support testing of averaging routines mlt_int = data['mlt'].astype(int) long_int = (data['longitude'] / 15.0).astype(int) if tag == 'ascend': data['dummy1'] = [i for i in range(len(data['mlt']))] elif tag == 'descend': data['dummy1'] = [-i for i in range(len(data['mlt']))] elif tag == 'plus10': data['dummy1'] = [i + 10 for i in range(len(data['mlt']))] elif tag == 'fives': data['dummy1'] = [5 for i in range(len(data['mlt']))] elif tag == 'mlt_offset': data['dummy1'] = mlt_int + 5 else: data['dummy1'] = mlt_int data['dummy2'] = long_int data['dummy3'] = mlt_int + long_int * 1000.0 data['dummy4'] = uts data['string_dummy'] = ['test'] * len(data) data['unicode_dummy'] = [u'test'] * len(data) data['int8_dummy'] = np.ones(len(data), dtype=np.int8) data['int16_dummy'] = np.ones(len(data), dtype=np.int16) data['int32_dummy'] = np.ones(len(data), dtype=np.int32) data['int64_dummy'] = np.ones(len(data), dtype=np.int64) if malformed_index: index = index.tolist() # nonmonotonic index[0:3], index[3:6] = index[3:6], index[0:3] # non unique index[6:9] = [index[6]] * 3 data.index = index data.index.name = 'Epoch' return data, meta.copy()
def load(fnames, tag=None, sat_id=None): """Load CHAMP STAR files Parameters ------------ fnames : (pandas.Series) Series of filenames tag : (str or NoneType) tag or None (default=None) sat_id : (str or NoneType) satellite id or None (default=None) Returns --------- data : (pandas.DataFrame) Object containing satellite data meta : (pysat.Meta) Object containing metadata such as column names and units """ import re if len(fnames) <= 0: return pysat.DataFrame(None), pysat.Meta(None) if isinstance(fnames, str): fnames = [fnames] # Define the CHAMP STAR data types by column champ_labels = { 'Two-digit Year (years)': "year", 'Day of the Year (days)': "doy", 'Second of the Day (GPS time,sec)': "sod", 'Center Latitude of 3-degree Bin (deg)': "bin_lat", 'Satellite Geodetic Latitude (deg)': "sat_glat", 'Satellite Longitude (deg)': "sat_lon", 'Satellite Height (km)': "sat_h", 'Satellite Local Time (hours)': "sat_lt", 'Satellite Quasi-Dipole Latitude (deg)': "sat_qdlat", 'Satellite Magnetic Longitude (deg)': "sat_mlon", 'Satellite Magnetic Local Time (hours)': "sat_mlt", 'Neutral Density (kg/m^3)': "ndens", 'Neutral Density Normalized to 400km using NRLMSISe00': "ndens400", 'Neutral Density Normalized to 410km using NRLMSISe00': "ndens410", 'NRLMSISe00 Neutral Density at Satellite Height': "nrlmsis_ndens", 'Uncertainty in Neutral Density (kg/m^3)': "ndens_err", 'Number of Data Points in Current Averaging Bin': "npnts", ' '.join(('Number of Points in Current Averaging Bin that', 'Required Interpolation')): "npnts_interp", ' '.join(('Average Coefficient of Drag Used in Current', 'Averaging Bin')): "avg_drag_coeff", } champ_dtypes = { 'year': np.int32, 'doy': np.int32, 'sod': float, 'bin_lat': float, 'sat_glat': float, 'sat_lon': float, 'sat_h': float, 'sat_lt': float, 'sat_qdlat': float, 'sat_mlon': float, 'sat_mlt': float, 'ndens': float, 'ndens400': float, 'ndens410': float, 'nrlmsis_ndens': float, 'ndens_err': float, 'npnts': int, 'npnts_interp': float, 'avg_drag_coeff': float, } champ_units = { 'year': "2-digit years", 'doy': "day of year", 'sod': "seconds of day", 'bin_lat': "degrees", 'sat_glat': "degrees", 'sat_lon': "degrees", 'sat_h': "km", 'sat_lt': "hours", 'sat_qdlat': "degrees", 'sat_mlon': "degrees", 'sat_mlt': "hours", 'ndens': "km m^{-3}", 'ndens400': "km m^{-3}", 'ndens410': "km m^{-3}", 'nrlmsis_ndens': "km m^{-3}", 'ndens_err': "km m^{-3}", 'npnts': "number", 'npnts_interp': "number", 'avg_drag_coeff': "unitless", } # Define the routine needed to create datetime object from the # CHAMP time (YY DDD SSSSS) def parse_champdate(y, d, s): '''parse CHAMP date string (YY DDD SSSSS) into a datetime object ''' import datetime as dt t = dt.datetime.strptime("{:02d} {:03d}".format(int(y), int(d)), "%y %j") fsec = float(s) isec = np.floor(fsec) microsec = (fsec - isec) * 1.0e6 t += dt.timedelta(seconds=isec, microseconds=microsec) return (t) # The header is formatted differently from the rest of the file, read it in # first to obtain the necessary meta data f = open(fnames[0], "r") hdata = re.split(";|\n", f.readline()) f.close() try: hdata.pop(hdata.index('')) except: pass # If there are files, read in the data data = pds.read_csv(fnames[0], delim_whitespace=True, skiprows=2, header=None, names=[champ_labels[h] for h in hdata], keep_date_col=True, index_col='datetime', parse_dates={'datetime': [0, 1, 2]}, date_parser=parse_champdate) # Initialize the meta data meta = pysat.Meta() # Because the native dtype declaration interferred with datetime indexing, # define the data types here. Also set the meta data for h in hdata: col = champ_labels[h] data[col].astype(champ_dtypes[col]) meta[col] = {"units": champ_units[col], "long_name": h} # Return data frame and metadata object return data, meta
def load(fnames, tag=None, sat_id=None, malformed_index=False): """ Loads the test files Parameters ---------- fnames : (list) List of filenames tag : (str or NoneType) Instrument tag (accepts '' or a number (i.e., '10'), which specifies the number of times to include in the test instrument) sat_id : (str or NoneType) Instrument satellite ID (accepts '') malformed_index : bool (False) If True, the time index will be non-unique and non-monotonic. Returns ------- data : (pds.DataFrame) Testing data meta : (pysat.Meta) Metadataxs """ # create an artifical satellite data set parts = os.path.split(fnames[0])[-1].split('-') yr = int(parts[0]) month = int(parts[1]) day = int(parts[2][0:2]) date = pysat.datetime(yr, month, day) # scalar divisor below used to reduce the number of time samples # covered by the simulation per day. The higher the number the lower # the number of samples (86400/scalar) scalar = 100 num = 86400 / scalar # basic time signal in UTS uts = np.arange(num) * scalar num_array = np.arange(num) * scalar # seed DataFrame with UT array data = pysat.DataFrame(uts, columns=['uts']) # need to create simple orbits here. Have start of first orbit # at 2009,1, 0 UT. 14.84 orbits per day # figure out how far in time from the root start # use that info to create a signal that is continuous from that start # going to presume there are 5820 seconds per orbit (97 minute period) time_delta = date - pysat.datetime(2009, 1, 1) # mlt runs 0-24 each orbit. data['mlt'] = test.generate_fake_data(time_delta.total_seconds(), np.arange(num) * scalar, period=5820, data_range=[0.0, 24.0]) # do slt, 20 second offset from mlt data['slt'] = test.generate_fake_data(time_delta.total_seconds() + 20, np.arange(num) * scalar, period=5820, data_range=[0.0, 24.0]) # create a fake longitude, resets every 6240 seconds # sat moves at 360/5820 deg/s, Earth rotates at 360/86400, takes extra time # to go around full longitude data['longitude'] = test.generate_fake_data(time_delta.total_seconds(), num_array, period=6240, data_range=[0.0, 360.0]) # create latitude signal for testing polar orbits angle = test.generate_fake_data(time_delta.total_seconds(), num_array, period=5820, data_range=[0.0, 2.0 * np.pi]) data['latitude'] = 90.0 * np.cos(angle) # create real UTC time signal index = pds.date_range(date, date + pds.DateOffset(hours=23, minutes=59, seconds=59), freq=str(scalar) + 'S') if malformed_index: index = index[0:num].tolist() # nonmonotonic index[0:3], index[3:6] = index[3:6], index[0:3] # non unique index[6:9] = [index[6]] * 3 data.index = index data.index.name = 'epoch' # higher rate time signal (for scalar >= 2) # this time signal used for 2D profiles associated with each time in main # DataFrame high_rate_template = pds.date_range( date, date + pds.DateOffset(hours=0, minutes=1, seconds=39), freq='2S') # create a few simulated profiles # DataFrame at each time with mixed variables profiles = [] # DataFrame at each time with numeric variables only alt_profiles = [] # Serie at each time, numeric data only series_profiles = [] # frame indexed by date times frame = pds.DataFrame( { 'density': data.loc[data.index[0:50], 'mlt'].values.copy(), 'dummy_str': ['test'] * 50, 'dummy_ustr': [u'test'] * 50 }, index=data.index[0:50], columns=['density', 'dummy_str', 'dummy_ustr']) # frame indexed by float dd = np.arange(50) * 1.2 ff = np.arange(50) / 50. ii = np.arange(50) * 0.5 frame_alt = pds.DataFrame({ 'density': dd, 'fraction': ff }, index=ii, columns=['density', 'fraction']) # series version of storage series_alt = pds.Series(dd, index=ii, name='series_profiles') for time in data.index: frame.index = high_rate_template + (time - data.index[0]) profiles.append(frame) alt_profiles.append(frame_alt) series_profiles.append(series_alt) # store multiple data types into main frame data['profiles'] = pds.Series(profiles, index=data.index) data['alt_profiles'] = pds.Series(alt_profiles, index=data.index) data['series_profiles'] = pds.Series(series_profiles, index=data.index) return data, meta.copy()
def load(fnames, tag=None, sat_id=None, malformed_index=False): """ Loads the test files Parameters ---------- fnames : list List of filenames tag : str or NoneType Instrument tag (accepts '') sat_id : str or NoneType Instrument satellite ID (accepts '' or a number (i.e., '10'), which specifies the number of data points to include in the test instrument) malformed_index : bool If True, the time index will be non-unique and non-monotonic. (default=False) Returns ------- data : pds.DataFrame Testing data meta : pysat.Meta Metadataxs """ # create an artifical satellite data set iperiod = mm_test.define_period() drange = mm_test.define_range() # Using 100s frequency for compatibility with seasonal analysis unit tests uts, index, date = mm_test.generate_times(fnames, sat_id, freq='100S') # seed DataFrame with UT array data = pysat.DataFrame(uts, columns=['uts']) # need to create simple orbits here. Have start of first orbit # at 2009,1, 0 UT. 14.84 orbits per day # figure out how far in time from the root start # use that info to create a signal that is continuous from that start # going to presume there are 5820 seconds per orbit (97 minute period) time_delta = date - pysat.datetime(2009, 1, 1) # mlt runs 0-24 each orbit. data['mlt'] = mm_test.generate_fake_data(time_delta.total_seconds(), uts, period=iperiod['lt'], data_range=drange['lt']) # do slt, 20 second offset from mlt data['slt'] = mm_test.generate_fake_data(time_delta.total_seconds() + 20, uts, period=iperiod['lt'], data_range=drange['lt']) # create a fake longitude, resets every 6240 seconds # sat moves at 360/5820 deg/s, Earth rotates at 360/86400, takes extra time # to go around full longitude data['longitude'] = mm_test.generate_fake_data(time_delta.total_seconds(), uts, period=iperiod['lon'], data_range=drange['lon']) # create latitude signal for testing polar orbits angle = mm_test.generate_fake_data(time_delta.total_seconds(), uts, period=iperiod['angle'], data_range=drange['angle']) data['latitude'] = 90.0 * np.cos(angle) if malformed_index: index = index.tolist() # nonmonotonic index[0:3], index[3:6] = index[3:6], index[0:3] # non unique index[6:9] = [index[6]] * 3 data.index = index data.index.name = 'epoch' # higher rate time signal (for scalar >= 2) # this time signal used for 2D profiles associated with each time in main # DataFrame high_rate_template = pds.date_range( date, date + pds.DateOffset(hours=0, minutes=1, seconds=39), freq='2S') # create a few simulated profiles # DataFrame at each time with mixed variables profiles = [] # DataFrame at each time with numeric variables only alt_profiles = [] # Serie at each time, numeric data only series_profiles = [] # frame indexed by date times frame = pds.DataFrame( { 'density': data.loc[data.index[0:50], 'mlt'].values.copy(), 'dummy_str': ['test'] * 50, 'dummy_ustr': [u'test'] * 50 }, index=data.index[0:50], columns=['density', 'dummy_str', 'dummy_ustr']) # frame indexed by float dd = np.arange(50) * 1.2 ff = np.arange(50) / 50. ii = np.arange(50) * 0.5 frame_alt = pds.DataFrame({ 'density': dd, 'fraction': ff }, index=ii, columns=['density', 'fraction']) # series version of storage series_alt = pds.Series(dd, index=ii, name='series_profiles') for time in data.index: frame.index = high_rate_template + (time - data.index[0]) profiles.append(frame) alt_profiles.append(frame_alt) series_profiles.append(series_alt) # store multiple data types into main frame data['profiles'] = pds.Series(profiles, index=data.index) data['alt_profiles'] = pds.Series(alt_profiles, index=data.index) data['series_profiles'] = pds.Series(series_profiles, index=data.index) return data, meta.copy()
def load_files(files, tag=None, sat_id=None, altitude_bin=None): """Load COSMIC data files directly from a given list. May be directly called by user, but in general is called by load. This is separate from the main load function for future support of multiprocessor loading. Parameters ---------- files : (pandas.Series) Series of filenames tag : (str or NoneType) tag or None (default=None) sat_id : (str or NoneType) satellite id or None (default=None) altitude_bin : integer Number of kilometers to bin altitude profiles by when loading. Currently only supported for tag='ionprf'. Returns ------- output : (list of dicts, one per file) Object containing satellite data """ output = [None] * len(files) drop_idx = [] for (i, file) in enumerate(files): try: data = netCDF4.Dataset(file) # build up dictionary will all ncattrs new = {} # get list of file attributes ncattrsList = data.ncattrs() for d in ncattrsList: new[d] = data.getncattr(d) # load all of the variables in the netCDF loadedVars = {} keys = data.variables.keys() for key in keys: if data.variables[key][:].dtype.byteorder != '=': loadedVars[key] = \ data.variables[key][:].byteswap().newbyteorder() else: loadedVars[key] = data.variables[key][:] new['profiles'] = pysat.DataFrame(loadedVars) output[i] = new data.close() except RuntimeError: # some of the files have zero bytes, which causes a read error # this stores the index of these zero byte files so I can drop # the Nones the gappy file leaves behind drop_idx.append(i) # drop anything that came from the zero byte files drop_idx.reverse() for i in drop_idx: del output[i] if tag == 'ionprf': if altitude_bin is not None: for out in output: rval = (out['profiles']['MSL_alt'] / altitude_bin).round().values out['profiles'].index = rval * altitude_bin out['profiles'] = \ out['profiles'].groupby(out['profiles'].index.values).mean() else: for out in output: out['profiles'].index = out['profiles']['MSL_alt'] return output
def load(fnames, tag=None, sat_id=None, sim_multi_file_right=False, sim_multi_file_left=False, root_date=None): # create an artifical satellite data set parts = fnames[0].split('/') yr = int('20' + parts[-1][0:2]) month = int(parts[-3]) day = int(parts[-2]) date = pysat.datetime(yr, month, day) if sim_multi_file_right: root_date = root_date or pysat.datetime(2009, 1, 1, 12) data_date = date + pds.DateOffset(hours=12) elif sim_multi_file_left: root_date = root_date or pysat.datetime(2008, 12, 31, 12) data_date = date - pds.DateOffset(hours=12) else: root_date = root_date or pysat.datetime(2009, 1, 1) data_date = date num = 86400 if tag is '' else int(tag) num_array = np.arange(num) uts = num_array data = pysat.DataFrame(uts, columns=['uts']) # need to create simple orbits here. Have start of first orbit # at 2009,1, 0 UT. 14.84 orbits per day time_delta = date - root_date uts_root = np.mod(time_delta.total_seconds(), 5820) mlt = np.mod(uts_root + num_array, 5820) * (24. / 5820.) data['mlt'] = mlt # fake orbit numbermedC1 = resultsC['dummy1']['median'] fake_delta = date - pysat.datetime(2008, 1, 1) fake_uts_root = fake_delta.total_seconds() data['orbit_num'] = ((fake_uts_root + num_array) / 5820.).astype(int) # create a fake longitude, resets every 6240 seconds # sat moves at 360/5820 deg/s, Earth rotates at 360/86400, takes extra time # to go around full longitude long_uts_root = np.mod(time_delta.total_seconds(), 6240) longitude = np.mod(long_uts_root + num_array, 6240) * (360. / 6240.) data['longitude'] = longitude # create latitude area for testing polar orbits latitude = 90. * np.cos( np.mod(uts_root + num_array, 5820) * (2. * np.pi / 5820.)) data['latitude'] = latitude # do slt, 20 second offset from mlt uts_root = np.mod(time_delta.total_seconds() + 20, 5820) data['slt'] = np.mod(uts_root + num_array, 5820) * (24. / 5820.) # create some fake data to support testing of averaging routines dummy1 = [] for i in range(len(data['mlt'])): dummy1.append(i) long_int = (data['longitude'] / 15.).astype(int) data['dummy1'] = dummy1 data['string_dummy'] = ['test'] * len(data) data['unicode_dummy'] = [u'test'] * len(data) data['int8_dummy'] = np.array([1] * len(data), dtype=np.int8) data['int16_dummy'] = np.array([1] * len(data), dtype=np.int16) data['int32_dummy'] = np.array([1] * len(data), dtype=np.int32) data['int64_dummy'] = np.array([1] * len(data), dtype=np.int64) # print (data['string_dummy']) index = pds.date_range(data_date, data_date + pds.DateOffset(seconds=num - 1), freq='S') data.index = index[0:num] data.index.name = 'time' return data, meta.copy()
def load(fnames, tag=None, sat_id=None, altitude_bin=None): """Load COSMIC GPS files. Parameters ---------- fnames : pandas.Series Series of filenames tag : str or NoneType tag or None (default=None) sat_id : str or NoneType satellite id or None (default=None) altitude_bin : integer Number of kilometers to bin altitude profiles by when loading. Currently only supported for tag='ionprf'. (default=None) Returns ------- output : pandas.DataFrame Object containing satellite data meta : pysat.Meta Object containing metadata such as column names and units """ # input check if altitude_bin is not None: if tag != 'ionprf': estr = 'altitude_bin keyword only supported for "tag=ionprf"' raise ValueError(estr) num = len(fnames) # make sure there are files to read if num != 0: # call separate load_files routine, segmented for possible # multiprocessor load, not included and only benefits about 20% output = pysat.DataFrame(load_files(fnames, tag=tag, sat_id=sat_id, altitude_bin=altitude_bin)) utsec = output.hour * 3600. + output.minute * 60. + output.second # make times unique by adding a unique amount of time less than a second # FIXME: need to switch to xarray so unique time stamps not needed if tag != 'scnlv1': # add 1E-6 seconds to time based upon occulting_sat_id # additional 1E-7 seconds added based upon cosmic ID # get cosmic satellite ID c_id = np.array([snip[3] for snip in output.fileStamp]).astype(int) # time offset utsec += output.occulting_sat_id*1.e-5 + c_id*1.e-6 else: # construct time out of three different parameters # duration must be less than 10,000 # prn_id is allowed two characters # antenna_id gets one # prn_id and antenna_id are not sufficient for a unique time utsec += output.prn_id*1.e-2 + output.duration.astype(int)*1.E-6 utsec += output.antenna_id*1.E-7 # move to Index output.index = \ pysat.utils.time.create_datetime_index(year=output.year, month=output.month, day=output.day, uts=utsec) if not output.index.is_unique: raise ValueError('Datetimes returned by load_files not unique.') # make sure UTS strictly increasing output.sort_index(inplace=True) # use the first available file to pick out meta information profile_meta = pysat.Meta() meta = pysat.Meta() ind = 0 repeat = True while repeat: try: data = netCDF4.Dataset(fnames[ind]) ncattrsList = data.ncattrs() for d in ncattrsList: meta[d] = {'units': '', 'long_name': d} keys = data.variables.keys() for key in keys: if 'units' in data.variables[key].ncattrs(): profile_meta[key] = {'units': data.variables[key].units, 'long_name': data.variables[key].long_name} repeat = False except RuntimeError: # file was empty, try the next one by incrementing ind ind += 1 meta['profiles'] = profile_meta return output, meta else: # no data return pysat.DataFrame(None), pysat.Meta()
def load(fnames, tag=None, sat_id=None): import davitpy if len(fnames) <= 0: return pysat.DataFrame(None), pysat.Meta(None) elif len(fnames) == 1: myPtr = davitpy.pydarn.sdio.sdDataPtr(sTime=pysat.datetime(1980, 1, 1), fileType='grdex', eTime=pysat.datetime(2250, 1, 1), hemi=tag, fileName=fnames[0]) myPtr.open() in_list = [] in_dict = { 'stid': [], 'channel': [], 'noisemean': [], 'noisesd': [], 'gsct': [], 'nvec': [], 'pmax': [], 'start_time': [], 'end_time': [], 'vemax': [], 'vemin': [], 'pmin': [], 'programid': [], 'wmax': [], 'wmin': [], 'freq': [] } while True: info = myPtr.readRec() if info is None: myPtr.close() break drift_frame = pds.DataFrame.from_records(info.vector.__dict__, nrows=len(info.pmax), index=info.vector.index) drift_frame['partial'] = 1 drift_frame.drop('index', axis=1, inplace=True) drift_frame.index.name = 'index' sum_vec = 0 for nvec in info.nvec: in_list.append(drift_frame.iloc[sum_vec:sum_vec + nvec]) sum_vec += nvec in_dict['stid'].extend(info.stid) in_dict['channel'].extend(info.channel) in_dict['noisemean'].extend(info.noisemean) in_dict['noisesd'].extend(info.noisesd) in_dict['gsct'].extend(info.gsct) in_dict['nvec'].extend(info.nvec) in_dict['pmax'].extend(info.pmax) in_dict['start_time'].extend([info.sTime] * len(info.pmax)) in_dict['end_time'].extend([info.eTime] * len(info.pmax)) in_dict['vemax'].extend(info.vemax) in_dict['vemin'].extend(info.vemin) in_dict['pmin'].extend(info.pmin) in_dict['programid'].extend(info.programid) in_dict['wmax'].extend(info.wmax) in_dict['wmin'].extend(info.wmin) in_dict['freq'].extend(info.freq) output = pds.DataFrame(in_dict) output['vector'] = in_list output.index = output.start_time output.drop('start_time', axis=1, inplace=True) return output, pysat.Meta() else: raise ValueError('Only one filename currently supported.')
def load(fnames, tag=None, sat_id=None, sim_multi_file_right=False, sim_multi_file_left=False, root_date=None, file_date_range=None, malformed_index=False, **kwargs): """ Loads the test files Parameters ---------- fnames : (list) List of filenames tag : (str or NoneType) Instrument tag (accepts '' or a number (i.e., '10'), which specifies the number of times to include in the test instrument) sat_id : (str or NoneType) Instrument satellite ID (accepts '') sim_multi_file_right : (boolean) Adjusts date range to be 12 hours in the future or twelve hours beyond root_date (default=False) sim_multi_file_left : (boolean) Adjusts date range to be 12 hours in the past or twelve hours before root_date (default=False) root_date : (NoneType) Optional central date, uses test_dates if not specified. (default=None) file_date_range : (pds.date_range or NoneType) Range of dates for files or None, if this optional arguement is not used (default=None) malformed_index : bool (default=False) If True, time index for simulation will be non-unique and non-monotonic. **kwargs : Additional keywords Additional keyword arguments supplied at pyast.Instrument instantiation are passed here Returns ------- data : (pds.DataFrame) Testing data meta : (pysat.Meta) Metadataxs """ # create an artifical satellite data set parts = os.path.split(fnames[0])[-1].split('-') yr = int(parts[0]) month = int(parts[1]) day = int(parts[2][0:2]) # Specify the date tag locally and determine the desired date range date = pysat.datetime(yr, month, day) pds_offset = pds.DateOffset(hours=12) if sim_multi_file_right: root_date = root_date or test_dates[''][''] + pds_offset data_date = date + pds_offset elif sim_multi_file_left: root_date = root_date or test_dates[''][''] - pds_offset data_date = date - pds_offset else: root_date = root_date or test_dates[''][''] data_date = date # The sat_id can be used to specify the number of indexes to load for # any of the testing objects num = 86400 if sat_id == '' else int(sat_id) num_array = np.arange(num) uts = num_array data = pysat.DataFrame(uts, columns=['uts']) # need to create simple orbits here. Have start of first orbit default # to 1 Jan 2009, 00:00 UT. 14.84 orbits per day time_delta = date - root_date data['mlt'] = test.generate_fake_data(time_delta.total_seconds(), num_array, period=5820, data_range=[0.0, 24.0]) # do slt, 20 second offset from mlt data['slt'] = test.generate_fake_data(time_delta.total_seconds() + 20, num_array, period=5820, data_range=[0.0, 24.0]) # create a fake longitude, resets every 6240 seconds # sat moves at 360/5820 deg/s, Earth rotates at 360/86400, takes extra time # to go around full longitude data['longitude'] = test.generate_fake_data(time_delta.total_seconds(), num_array, period=6240, data_range=[0.0, 360.0]) # create latitude area for testing polar orbits angle = test.generate_fake_data(time_delta.total_seconds(), num_array, period=5820, data_range=[0.0, 2.0 * np.pi]) data['latitude'] = 90.0 * np.cos(angle) # fake orbit number fake_delta = date - (test_dates[''][''] - pds.DateOffset(years=1)) data['orbit_num'] = test.generate_fake_data(fake_delta.total_seconds(), num_array, period=5820, cyclic=False) # create some fake data to support testing of averaging routines mlt_int = data['mlt'].astype(int) long_int = (data['longitude'] / 15.0).astype(int) if tag == 'ascend': data['dummy1'] = [i for i in range(len(data['mlt']))] elif tag == 'descend': data['dummy1'] = [-i for i in range(len(data['mlt']))] elif tag == 'plus10': data['dummy1'] = [i + 10 for i in range(len(data['mlt']))] elif tag == 'fives': data['dummy1'] = [5 for i in range(len(data['mlt']))] elif tag == 'mlt_offset': data['dummy1'] = mlt_int + 5 else: data['dummy1'] = mlt_int data['dummy2'] = long_int data['dummy3'] = mlt_int + long_int * 1000.0 data['dummy4'] = num_array data['string_dummy'] = ['test'] * len(data) data['unicode_dummy'] = [u'test'] * len(data) data['int8_dummy'] = np.ones(len(data), dtype=np.int8) data['int16_dummy'] = np.ones(len(data), dtype=np.int16) data['int32_dummy'] = np.ones(len(data), dtype=np.int32) data['int64_dummy'] = np.ones(len(data), dtype=np.int64) index = pds.date_range(data_date, data_date + pds.DateOffset(seconds=num - 1), freq='S') if malformed_index: index = index[0:num].tolist() # nonmonotonic index[0:3], index[3:6] = index[3:6], index[0:3] # non unique index[6:9] = [index[6]] * 3 data.index = index[0:num] data.index.name = 'Epoch' return data, meta.copy()
def load(fnames, tag=None, sat_id=None, obs_long=0., obs_lat=0., obs_alt=0., TLE1=None, TLE2=None): """ Returns data and metadata in the format required by pysat. Finds position of satellite in both ECI and ECEF co-ordinates. Routine is directly called by pysat and not the user. Parameters ---------- fnames : list-like collection File name that contains date in its name. tag : string Identifies a particular subset of satellite data sat_id : string Satellite ID obs_long: float Longitude of the observer on the Earth's surface obs_lat: float Latitude of the observer on the Earth's surface obs_alt: float Altitude of the observer on the Earth's surface TLE1 : string First string for Two Line Element. Must be in TLE format TLE2 : string Second string for Two Line Element. Must be in TLE format Example ------- inst = pysat.Instrument('pysat', 'sgp4', TLE1='1 25544U 98067A 18135.61844383 .00002728 00000-0 48567-4 0 9998', TLE2='2 25544 51.6402 181.0633 0004018 88.8954 22.2246 15.54059185113452') inst.load(2018, 1) """ import sgp4 # wgs72 is the most commonly used gravity model in satellite tracking community from sgp4.earth_gravity import wgs72 from sgp4.io import twoline2rv import ephem import pysatMagVect # TLEs (Two Line Elements for ISS) # format of TLEs is fixed and available from wikipedia... # lines encode list of orbital elements of an Earth-orbiting object # for a given point in time line1 = ( '1 25544U 98067A 18135.61844383 .00002728 00000-0 48567-4 0 9998' ) line2 = ( '2 25544 51.6402 181.0633 0004018 88.8954 22.2246 15.54059185113452' ) # use ISS defaults if not provided by user if TLE1 is not None: line1 = TLE1 if TLE2 is not None: line2 = TLE2 # create satellite from TLEs and assuming a gravity model # according to module webpage, wgs72 is common satellite = twoline2rv(line1, line2, wgs72) # grab date from filename parts = os.path.split(fnames[0])[-1].split('-') yr = int(parts[0]) month = int(parts[1]) day = int(parts[2][0:2]) date = pysat.datetime(yr, month, day) # create timing at 1 Hz (for 1 day) times = pds.date_range(start=date, end=date + pds.DateOffset(seconds=86399), freq='1S') # reduce requirements if on testing server # TODO Remove this when testing resources are higher on_travis = os.environ.get('ONTRAVIS') == 'True' if on_travis: times = times[0:100] # create list to hold satellite position, velocity position = [] velocity = [] for time in times: # orbit propagator - computes x,y,z position and velocity pos, vel = satellite.propagate(time.year, time.month, time.day, time.hour, time.minute, time.second) # print (pos) position.extend(pos) velocity.extend(vel) # put data into DataFrame data = pysat.DataFrame( { 'position_eci_x': position[::3], 'position_eci_y': position[1::3], 'position_eci_z': position[2::3], 'velocity_eci_x': velocity[::3], 'velocity_eci_y': velocity[1::3], 'velocity_eci_z': velocity[2::3] }, index=times) data.index.name = 'Epoch' # add position and velocity in ECEF # add call for GEI/ECEF translation here # instead, since available, I'll use an orbit predictor from another # package that outputs in ECEF # it also supports ground station calculations # the observer's (ground station) position on the Earth surface site = ephem.Observer() site.lon = str(obs_long) site.lat = str(obs_lat) site.elevation = obs_alt # The first parameter in readtle() is the satellite name sat = ephem.readtle('pysat', line1, line2) output_params = [] for time in times: lp = {} site.date = time sat.compute(site) # parameters relative to the ground station lp['obs_sat_az_angle'] = ephem.degrees(sat.az) lp['obs_sat_el_angle'] = ephem.degrees(sat.alt) # total distance away lp['obs_sat_slant_range'] = sat.range # satellite location # sub latitude point lp['glat'] = np.degrees(sat.sublat) # sublongitude point lp['glong'] = np.degrees(sat.sublong) # elevation of sat in m, stored as km lp['alt'] = sat.elevation / 1000. # get ECEF position of satellite lp['x'], lp['y'], lp['z'] = pysatMagVect.geodetic_to_ecef( lp['glat'], lp['glong'], lp['alt']) output_params.append(lp) output = pds.DataFrame(output_params, index=times) # modify input object to include calculated parameters data[['glong', 'glat', 'alt']] = output[['glong', 'glat', 'alt']] data[['position_ecef_x', 'position_ecef_y', 'position_ecef_z']] = output[['x', 'y', 'z']] data['obs_sat_az_angle'] = output['obs_sat_az_angle'] data['obs_sat_el_angle'] = output['obs_sat_el_angle'] data['obs_sat_slant_range'] = output['obs_sat_slant_range'] return data, meta.copy()
def to_pysat(self, flatten_twod=True, units_label='UNITS', name_label='long_name', fill_label='FILLVAL', plot_label='FieldNam', min_label='ValidMin', max_label='ValidMax', notes_label='Var_Notes', desc_label='CatDesc', axis_label = 'LablAxis'): """ Exports loaded CDF data into data, meta for pysat module Notes ----- The *_labels should be set to the values in the file, if present. Note that once the meta object returned from this function is attached to a pysat.Instrument object then the *_labels on the Instrument are assigned to the newly attached Meta object. The pysat Meta object will use data with labels that match the patterns in *_labels even if the case does not match. Parameters ---------- flatten_twod : bool (True) If True, then two dimensional data is flattened across columns. Name mangling is used to group data, first column is 'name', last column is 'name_end'. In between numbers are appended 'name_1', 'name_2', etc. All data for a given 2D array may be accessed via, data.ix[:,'item':'item_end'] If False, then 2D data is stored as a series of DataFrames, indexed by Epoch. data.ix[0, 'item'] units_label : str Identifier within metadata for units. Defults to CDAWab standard. name_label : str Identifier within metadata for variable name. Defults to 'long_name', not normally present within CDAWeb files. If not, will use values from the variable name in the file. fill_label : str Identifier within metadata for Fill Values. Defults to CDAWab standard. plot_label : str Identifier within metadata for variable name used when plotting. Defults to CDAWab standard. min_label : str Identifier within metadata for minimim variable value. Defults to CDAWab standard. max_label : str Identifier within metadata for maximum variable value. Defults to CDAWab standard. notes_label : str Identifier within metadata for notes. Defults to CDAWab standard. desc_label : str Identifier within metadata for a variable description. Defults to CDAWab standard. axis_label : str Identifier within metadata for axis name used when plotting. Defults to CDAWab standard. Returns ------- pandas.DataFrame, pysat.Meta Data and Metadata suitable for attachment to a pysat.Instrument object. """ import string import pysat import pandas # copy data cdata = self.data.copy() # # create pysat.Meta object using data above # and utilizing the attribute labels provided by the user meta = pysat.Meta(pysat.DataFrame.from_dict(self.meta, orient='index'), units_label=units_label, name_label=name_label, fill_label=fill_label, plot_label=plot_label, min_label=min_label, max_label=max_label, notes_label=notes_label, desc_label=desc_label, axis_label=axis_label) # account for different possible cases for Epoch, epoch, EPOCH, epOch lower_names = [name.lower() for name in meta.keys()] for name, true_name in zip(lower_names, meta.keys()): if name == 'epoch': meta.data.rename(index={true_name: 'Epoch'}, inplace=True) epoch = cdata.pop(true_name) cdata['Epoch'] = epoch # ready to format data, iterate over all of the data names # and put into a pandas DataFrame two_d_data = [] drop_list = [] for name in cdata.keys(): temp = np.shape(cdata[name]) # treat 2 dimensional data differently if len(temp) == 2: if not flatten_twod: # put 2D data into a Frame at each time # remove data from dict when adding to the DataFrame frame = pysat.DataFrame(cdata[name].flatten(), columns=[name]) drop_list.append(name) step = temp[0] new_list = [] new_index = np.arange(step) for i in np.arange(len(epoch)): new_list.append(frame.iloc[i*step:(i+1)*step, :]) new_list[-1].index = new_index #new_frame = pandas.DataFrame.from_records(new_list, index=epoch, columns=[name]) new_frame = pandas.Series(new_list, index=epoch, name=name) two_d_data.append(new_frame) else: # flatten 2D into series of 1D columns new_names = [name + '_{i}'.format(i=i) for i in np.arange(temp[0] - 2)] new_names.append(name + '_end') new_names.insert(0, name) # remove data from dict when adding to the DataFrame drop_list.append(name) frame = pysat.DataFrame(cdata[name].T, index=epoch, columns=new_names) two_d_data.append(frame) for name in drop_list: _ = cdata.pop(name) # all of the data left over is 1D, add as Series data = pysat.DataFrame(cdata, index=epoch) two_d_data.append(data) data = pandas.concat(two_d_data, axis=1) data.drop('Epoch', axis=1, inplace=True) return data, meta
def load(fnames, tag=None, sat_id=None): """Load COSMIC GPS files. Parameters ---------- fnames : (pandas.Series) Series of filenames tag : (str or NoneType) tag or None (default=None) sat_id : (str or NoneType) satellite id or None (default=None) Returns ------- output : (pandas.DataFrame) Object containing satellite data meta : (pysat.Meta) Object containing metadata such as column names and units """ num = len(fnames) # make sure there are files to read if num != 0: # call separate load_files routine, segemented for possible # multiprocessor load, not included and only benefits about 20% output = pysat.DataFrame(load_files(fnames, tag=tag, sat_id=sat_id)) utsec = output.hour * 3600. + output.minute * 60. + output.second output.index = \ pysat.utils.time.create_datetime_index(year=output.year, month=output.month, day=output.day, uts=utsec) # make sure UTS strictly increasing output.sort_index(inplace=True) # use the first available file to pick out meta information profile_meta = pysat.Meta() meta = pysat.Meta() ind = 0 repeat = True while repeat: try: data = netCDF4.Dataset(fnames[ind]) ncattrsList = data.ncattrs() for d in ncattrsList: meta[d] = {'units': '', 'long_name': d} keys = data.variables.keys() for key in keys: profile_meta[key] = { 'units': data.variables[key].units, 'long_name': data.variables[key].long_name } repeat = False except RuntimeError: # file was empty, try the next one by incrementing ind ind += 1 meta['profiles'] = profile_meta return output, meta else: # no data return pysat.DataFrame(None), pysat.Meta()
def load(fnames, tag=None, sat_id=None): import pydarn if len(fnames) <= 0: return pysat.DataFrame(None), pysat.Meta(None) elif len(fnames) == 1: #b = pydarn.sdio.sdDataOpen(pysat.datetime(1980,1,1), # src='local', # eTime=pysat.datetime(2050,1,1), # fileName=fnames[0]) myPtr = pydarn.sdio.sdDataPtr(sTime=pysat.datetime(1980, 1, 1), eTime=pysat.datetime(2250, 1, 1), hemi=tag) myPtr.fType, myPtr.dType = 'grdex', 'dmap' myPtr.ptr = open(fnames[0], 'r') #data_list = pydarn.sdio.sdDataReadAll(myPtr) in_list = [] in_dict = { 'stid': [], 'channel': [], 'noisemean': [], 'noisesd': [], 'gsct': [], 'nvec': [], 'pmax': [], 'start_time': [], 'end_time': [], 'vemax': [], 'vemin': [], 'pmin': [], 'programid': [], 'wmax': [], 'wmin': [], 'freq': [] } #for info in data_list: while True: info = pydarn.dmapio.readDmapRec(myPtr.ptr) info = pydarn.sdio.sdDataTypes.gridData(dataDict=info) if info.channel is None: break drift_frame = pds.DataFrame.from_records(info.vector.__dict__, nrows=len(info.pmax), index=info.vector.index) drift_frame['partial'] = 1 drift_frame.drop('index', axis=1, inplace=True) drift_frame.index.name = 'index' sum_vec = 0 for nvec in info.nvec: #in_frame = drift_frame.iloc[0:nvec] #drift_frame = drift_frame.iloc[nvec:] in_list.append(drift_frame.iloc[sum_vec:sum_vec + nvec]) sum_vec += nvec in_dict['stid'].extend(info.stid) in_dict['channel'].extend(info.channel) in_dict['noisemean'].extend(info.noisemean) in_dict['noisesd'].extend(info.noisesd) in_dict['gsct'].extend(info.gsct) in_dict['nvec'].extend(info.nvec) in_dict['pmax'].extend(info.pmax) in_dict['start_time'].extend([info.sTime] * len(info.pmax)) in_dict['end_time'].extend([info.eTime] * len(info.pmax)) in_dict['vemax'].extend(info.vemax) in_dict['vemin'].extend(info.vemin) in_dict['pmin'].extend(info.pmin) in_dict['programid'].extend(info.programid) in_dict['wmax'].extend(info.wmax) in_dict['wmin'].extend(info.wmin) in_dict['freq'].extend(info.freq) output = pds.DataFrame(in_dict) output['vector'] = in_list output.index = output.start_time output.drop('start_time', axis=1, inplace=True) myPtr.ptr.close() return output, pysat.Meta() else: raise ValueError('Only one filename currently supported.')
def load(fnames, tag='', sat_id=None): """ Load the SuperMAG files Parameters ----------- fnames : (list) List of filenames tag : (str) Denotes type of file to load. Accepted types are 'indices', 'all', 'stations', and '' (for just magnetometer measurements). (default='') sat_id : (str or NoneType) Satellite ID for constellations, not used. (default=None) Returns -------- data : (pandas.DataFrame) Object containing satellite data meta : (pysat.Meta) Object containing metadata such as column names and units """ # Ensure that there are files to load if len(fnames) <= 0: return pysat.DataFrame(None), pysat.Meta(None) # Ensure that the files are in a list if isinstance(fnames, str): fnames = [fnames] # Initialise the output data data = pds.DataFrame() baseline = list() # Cycle through the files for fname in fnames: fname = fname[:-11] # Remove date index from end of filename file_type = path.splitext(fname)[1].lower() # Open and load the files for each file type if file_type == ".csv": if tag != "indices": temp = load_csv_data(fname, tag) else: temp, bline = load_ascii_data(fname, tag) if bline is not None: baseline.append(bline) # Save the loaded data in the output data structure if len(temp.columns) > 0: data = pds.concat([data, temp], sort=True, axis=0) del temp # If data was loaded, update the meta data if len(data.columns) > 0: meta = pysat.Meta() for cc in data.columns: meta[cc] = update_smag_metadata(cc) meta.info = {'baseline': format_baseline_list(baseline)} else: meta = pysat.Meta(None) return data, meta
def load_files(files, tag=None, sat_id=None, altitude_bin=None): """Load COSMIC data files directly from a given list. May be directly called by user, but in general is called by load. This is separate from the main load function for future support of multiprocessor loading. Parameters ---------- files : pandas.Series Series of filenames tag : str or NoneType tag or None (default=None) sat_id : str or NoneType satellite id or None (default=None) altitude_bin : integer Number of kilometers to bin altitude profiles by when loading. Currently only supported for tag='ionprf'. (default=None) Returns ------- output : list of dicts Object containing satellite data, one dict per file """ output = [None] * len(files) drop_idx = [] main_dict = {} main_dict_len = {} safe_keys = [] for (i, fname) in enumerate(files): try: data = netCDF4.Dataset(fname) # build up dictionary will all ncattrs new = {} # get list of file attributes ncattrsList = data.ncattrs() # these include information about where the profile observed for d in ncattrsList: new[d] = data.getncattr(d) if i == 0: keys = data.variables.keys() for key in keys: safe_keys.append(key) main_dict[key] = [] main_dict_len[key] = [] # load all of the variables in the netCDF for key in safe_keys: # grab data t_list = data.variables[key][:] # reverse byte order if needed if t_list.dtype.byteorder != '=': main_dict[key].append(t_list.byteswap().newbyteorder()) else: main_dict[key].append(t_list) # store lengths main_dict_len[key].append(len(main_dict[key][-1])) output[i] = new data.close() except RuntimeError: # some of the files have zero bytes, which causes a read error # this stores the index of these zero byte files so I can drop # the Nones the gappy file leaves behind drop_idx.append(i) # drop anything that came from the zero byte files drop_idx.reverse() for i in drop_idx: del output[i] # combine different sub lists in main_dict into one for key in safe_keys: main_dict[key] = np.hstack(main_dict[key]) main_dict_len[key] = np.cumsum(main_dict_len[key]) if tag == 'atmprf': # this file has three groups of variable lengths # each goes into its own DataFrame # two are processed here, last is processed like other # file types # see code just after this if block for more # general explanation on lines just below p_keys = ['OL_vec2', 'OL_vec1', 'OL_vec3', 'OL_vec4'] p_dict = {} # get indices needed to parse data plengths = main_dict_len['OL_vec1'] max_p_length = np.max(plengths) plengths, plengths2 = _process_lengths(plengths) # collect data for key in p_keys: p_dict[key] = main_dict.pop(key) _ = main_dict_len.pop(key) psub_frame = pysat.DataFrame(p_dict) # change in variables in this file type # depending upon the processing applied at UCAR if 'ies' in main_dict.keys(): q_keys = ['OL_ipar', 'OL_par', 'ies', 'hes', 'wes'] else: q_keys = ['OL_ipar', 'OL_par'] q_dict = {} # get indices needed to parse data qlengths = main_dict_len['OL_par'] max_q_length = np.max(qlengths) qlengths, qlengths2 = _process_lengths(qlengths) # collect data for key in q_keys: q_dict[key] = main_dict.pop(key) _ = main_dict_len.pop(key) qsub_frame = pysat.DataFrame(q_dict) max_length = np.max([max_p_length, max_q_length]) length_arr = np.arange(max_length) # small sub DataFrames for i in np.arange(len(output)): output[i]['OL_vecs'] = psub_frame.iloc[plengths[i]:plengths[i+1], :] output[i]['OL_vecs'].index = \ length_arr[:plengths2[i+1]-plengths2[i]] output[i]['OL_pars'] = qsub_frame.iloc[qlengths[i]:qlengths[i+1], :] output[i]['OL_pars'].index = \ length_arr[:qlengths2[i+1]-qlengths2[i]] # create a single data frame with all bits, then # break into smaller frames using views main_frame = pysat.DataFrame(main_dict) # get indices needed to parse data lengths = main_dict_len[list(main_dict.keys())[0]] # get largest length and create numpy array with it # used to speed up reindexing below max_length = np.max(lengths) length_arr = np.arange(max_length) # process lengths for ease of parsing lengths, lengths2 = _process_lengths(lengths) # break main profile data into each individual profile for i in np.arange(len(output)): output[i]['profiles'] = main_frame.iloc[lengths[i]:lengths[i+1], :] output[i]['profiles'].index = length_arr[:lengths2[i+1]-lengths2[i]] if tag == 'ionprf': if altitude_bin is not None: for out in output: rval = (out['profiles']['MSL_alt']/altitude_bin).round().values out['profiles'].index = rval * altitude_bin out['profiles'] = \ out['profiles'].groupby(out['profiles'].index.values).mean() else: for out in output: out['profiles'].index = out['profiles']['MSL_alt'] return output
def load(fnames, tag=None, sat_id=None, fake_daily_files_from_monthly=False, flatten_twod=True): """Load NASA CDAWeb CDF files. This routine is intended to be used by pysat instrument modules supporting a particular NASA CDAWeb dataset. Parameters ------------ fnames : (pandas.Series) Series of filenames tag : (str or NoneType) tag or None (default=None) sat_id : (str or NoneType) satellite id or None (default=None) fake_daily_files_from_monthly : bool Some CDAWeb instrument data files are stored by month, interfering with pysat's functionality of loading by day. This flag, when true, parses of daily dates to monthly files that were added internally by the list_files routine, when flagged. These dates are used here to provide data by day. flatted_twod : bool Flattens 2D data into different columns of root DataFrame rather than produce a Series of DataFrames Returns --------- data : (pandas.DataFrame) Object containing satellite data meta : (pysat.Meta) Object containing metadata such as column names and units Examples -------- :: # within the new instrument module, at the top level define # a new variable named load, and set it equal to this load method # code below taken from cnofs_ivm.py. # support load routine # use the default CDAWeb method load = cdw.load """ import pysatCDF if len(fnames) <= 0: return pysat.DataFrame(None), None else: # going to use pysatCDF to load the CDF and format # data and metadata for pysat using some assumptions. # Depending upon your needs the resulting pandas DataFrame may # need modification # currently only loads one file, which handles more situations via # pysat than you may initially think if fake_daily_files_from_monthly: # parse out date from filename fname = fnames[0][0:-11] date = pysat.datetime.strptime(fnames[0][-10:], '%Y-%m-%d') with pysatCDF.CDF(fname) as cdf: # convert data to pysat format data, meta = cdf.to_pysat(flatten_twod=flatten_twod) # select data from monthly data = data.loc[date:date + pds.DateOffset(days=1) - pds.DateOffset(microseconds=1), :] return data, meta else: # basic data return with pysatCDF.CDF(fnames[0]) as cdf: return cdf.to_pysat(flatten_twod=flatten_twod)
def load(fnames, tag=None, sat_id=None): # create an artifical satellite data set parts = os.path.split(fnames[0])[-1].split('-') yr = int(parts[0]) month = int(parts[1]) day = int(parts[2][0:2]) date = pysat.datetime(yr, month, day) # scalar divisor below used to reduce the number of time samples # covered by the simulation per day. The higher the number the lower # the number of samples (86400/scalar) scalar = 100 num = 86400 / scalar # basic time signal in UTS uts = np.arange(num) * scalar num_array = np.arange(num) * scalar # seed DataFrame with UT array data = pysat.DataFrame(uts, columns=['uts']) # need to create simple orbits here. Have start of first orbit # at 2009,1, 0 UT. 14.84 orbits per day # figure out how far in time from the root start # use that info to create a signal that is continuous from that start # going to presume there are 5820 seconds per orbit (97 minute period) time_delta = date - pysat.datetime(2009, 1, 1) # root start uts_root = np.mod(time_delta.total_seconds(), 5820) # mlt runs 0-24 each orbit. mlt = np.mod(uts_root + np.arange(num) * scalar, 5820) * (24. / 5820.) data['mlt'] = mlt # do slt, 20 second offset from mlt uts_root = np.mod(time_delta.total_seconds() + 20, 5820) data['slt'] = np.mod(uts_root + np.arange(num) * scalar, 5820) * (24. / 5820.) # create a fake longitude, resets every 6240 seconds # sat moves at 360/5820 deg/s, Earth rotates at 360/86400, takes extra time # to go around full longitude long_uts_root = np.mod(time_delta.total_seconds(), 6240) longitude = np.mod(long_uts_root + num_array, 6240) * (360. / 6240.) data['longitude'] = longitude # create latitude signal for testing polar orbits latitude = 90. * np.cos( np.mod(uts_root + num_array, 5820) * (2. * np.pi / 5820.)) data['latitude'] = latitude # create real UTC time signal index = pds.date_range(date, date + pds.DateOffset(hours=23, minutes=59, seconds=59), freq=str(scalar) + 'S') data.index = index data.index.name = 'epoch' # higher rate time signal (for scalar >= 2) # this time signal used for 2D profiles associated with each time in main # DataFrame high_rate_template = pds.date_range( date, date + pds.DateOffset(hours=0, minutes=1, seconds=39), freq='2S') # create a few simulated profiles # DataFrame at each time with mixed variables profiles = [] # DataFrame at each time with numeric variables only alt_profiles = [] # Serie at each time, numeric data only series_profiles = [] # frame indexed by date times frame = pds.DataFrame( { 'density': data.ix[0:50, 'mlt'].values.copy(), 'dummy_str': ['test'] * 50, 'dummy_ustr': [u'test'] * 50 }, index=data.index[0:50], columns=['density', 'dummy_str', 'dummy_ustr']) # frame indexed by float dd = np.arange(50) * 1.2 ff = np.arange(50) / 50. ii = np.arange(50) * 0.5 frame_alt = pds.DataFrame({ 'density': dd, 'fraction': ff }, index=ii, columns=['density', 'fraction']) # series version of storage series_alt = pds.Series(dd, index=ii, name='series_profiles') for time in data.index: frame.index = high_rate_template + (time - data.index[0]) profiles.append(frame) alt_profiles.append(frame_alt) series_profiles.append(series_alt) # store multiple data types into main frame data['profiles'] = pds.Series(profiles, index=data.index) data['alt_profiles'] = pds.Series(alt_profiles, index=data.index) data['series_profiles'] = pds.Series(series_profiles, index=data.index) return data, meta.copy()
def to_pysat(self, flatten_twod=True): """ Exports loaded CDF data into data,meta for pysat module Parameters ---------- flatten_twod : bool (True) If True, then two dimensional data is flattened across columns. Name mangling is used to group data, first column is 'name', last column is 'name_end'. In between numbers are appended 'name_1', 'name_2', etc. All data for a given 2D array may be accessed via, data.ix[:,'item':'item_end'] If False, then 2D data is stored as a series of DataFrames, indexed by Epoch. data.ix[0, 'item'] Returns ------- data, meta """ import string import pysat import pandas # copy data cdata = self.data.copy() meta = pysat.Meta(pysat.DataFrame.from_dict(self.meta, orient='index')) # all column names should be lower case lower_names = [name.lower() for name in meta.data.columns] #map(str.lower, meta.data.columns) meta.data.columns = lower_names # replace standard CDAWeb terms with more pysat friendly versions if 'lablaxis' in meta.data.columns: meta.data.drop('long_name', inplace=True, axis=1) meta.data.rename(columns={'lablaxis': 'long_name'}, inplace=True) if 'catdesc' in meta.data.columns: meta.data.rename(columns={'catdesc': 'description'}, inplace=True) # account for different possible cases for Epoch, epoch, EPOCH, epOch lower_names = [name.lower() for name in meta.data.index.values] #lower_names = map(str.lower, meta.data.index.values) for name, true_name in zip(lower_names, meta.data.index.values): if name == 'epoch': meta.data.rename(index={true_name: 'Epoch'}, inplace=True) epoch = cdata.pop(true_name) cdata['Epoch'] = epoch # ready to format data, iterate over all of the data names # and put into a pandas DataFrame two_d_data = [] drop_list = [] for name in cdata.keys(): temp = np.shape(cdata[name]) # treat 2 dimensional data differently if len(temp) == 2: if not flatten_twod: # put 2D data into a Frame at each time # remove data from dict when adding to the DataFrame frame = pysat.DataFrame(cdata[name].flatten(), columns=[name]) drop_list.append(name) step = temp[0] new_list = [] new_index = np.arange(step) for i in np.arange(len(epoch)): new_list.append(frame.iloc[i*step:(i+1)*step, :]) new_list[-1].index = new_index #new_frame = pandas.DataFrame.from_records(new_list, index=epoch, columns=[name]) new_frame = pandas.Series(new_list, index=epoch, name=name) two_d_data.append(new_frame) else: # flatten 2D into series of 1D columns new_names = [name + '_{i}'.format(i=i) for i in np.arange(temp[0] - 2)] new_names.append(name + '_end') new_names.insert(0, name) # remove data from dict when adding to the DataFrame drop_list.append(name) frame = pysat.DataFrame(cdata[name].T, index=epoch, columns=new_names) two_d_data.append(frame) for name in drop_list: _ = cdata.pop(name) # all of the data left over is 1D, add as Series data = pysat.DataFrame(cdata, index=epoch) two_d_data.append(data) data = pandas.concat(two_d_data, axis=1) data.drop('Epoch', axis=1, inplace=True) return data, meta