def plot_selections_in_sroi(sc, tstart, tstop=dt.datetime.now(), outdir=None): # Get orbit range start_orbit = time_to_orbit(tstart) stop_orbit = time_to_orbit(tstop) outdir = pathlib.Path(outdir) if not outdir.exists(): outdir.mkdir() fname_fmt = 'burst_selections_orbit-{0}_sroi-{1}.png' # Step through each orbit for offset in range(stop_orbit - start_orbit + 1): # Get the SROI start and end times orbit = start_orbit + offset sroi = sdc.mission_events('sroi', int(orbit), int(orbit), sc=sc) for i in (0, 2): try: fig, axes = plot_burst_selections(sc, sroi['tstart'][i], sroi['tend'][i]) except Exception as e: print('Failed on orbit-{0} SROI-{1}'.format(orbit, i + 1)) print(e) continue plt.subplots_adjust(left=0.15, right=0.85, top=0.93) if outdir is not None: plt.savefig(outdir / fname_fmt.format(orbit, i + 1)) plt.close(fig)
def get_sroi(start, sc='mms1'): ''' Get the start and stop times of the SROIs, the sub-regions of interest within the orbit. Parameters ---------- start : `datetime.datetime` or int Time within an orbit or an orbit number. If time, note that the duration of the SROIs are shorter than that of the orbit so it is possible that `start` is not bounded by the start and end of the SROIs themselves. sc : str Spacecraft identifier Returns ------- tstart, tend : `datetime.datetime` Start and end time of the SROIs ''' # Convert a time stamp to an orbit number if isinstance(start, dt.datetime): start = time_to_orbit(start, sc=sc) # Get the Sub-Regions of Interest sroi = sdc.mission_events('sroi', start, start, sc=sc) return sroi['tstart'], sroi['tend']
def __init__(self, sc, level, start, end, include_selections=True, include_partials=True, verbose=False): self.sc = sc self.level = level self.include_selections = include_selections self.verbose = verbose if isinstance(start, int): sroi = api.mission_events('sroi', start, end, sc=sc) self.start_date = sroi['tstart'][0] self.end_date = sroi['tend'][-1] else: self.start_date = validate_date(start) self.end_date = validate_date(end) if (include_partials and not include_selections): raise ValueError( "Include_selections must be true in order to include partial selections in the combined dataframe." ) self.include_selections = include_selections self.include_partials = include_partials # SITL data is available in the fast-survey region of the orbit. # For many instruments, fast- and slow-survey data are combined into a single survey product self.mode = 'srvy' # This script works only for 'sitl' and 'l2' data if level not in ('sitl', 'l2'): raise ValueError('Level must be either "sitl" or "l2".') # Create an interface to the SDC self.mms = api.MrMMS_SDC_API(sc=sc, mode=self.mode, start_date=self.start_date, end_date=self.end_date) # Ensure that the log-in information is there. # - If the config file was already set, this step is redundant. self.mms._data_root = pymms.config['data_root'] if self.mode == 'sitl': self.mms._session.auth(pymms.config['username'], pymms.config['password'])
def time_to_orbit(time, sc='mms1', delta=10): ''' Identify the orbit in which a time falls. Parameters ---------- time : `datetime.datetime` Time within the orbit sc : str Spacecraft identifier delta : int Number of days around around the time of interest in which to search for the orbit. Should be the duration of at least one orbit. Returns ------- orbit : int Orbit during which `time` occurs ''' # sdc.mission_events filters by date, and the dates are right-exclusive: # [tstart, tstop). For it to return data on the date of `time`, `time` # must be rounded up to the next day. Start the time interval greater # than one orbit prior than the start time. The desired orbit should then # be the last orbit in the list tstop = dt.datetime.combine(time.date() + dt.timedelta(days=delta), dt.time(0, 0, 0)) tstart = tstop - dt.timedelta(days=2 * delta) orbits = sdc.mission_events('orbit', tstart, tstop, sc=sc) orbit = None for idx in range(len(orbits['tstart'])): if (time > orbits['tstart'][idx]) and (time < orbits['tend'][idx]): orbit = orbits['start_orbit'][idx] if orbit is None: ValueError('Did not find correct orbit!') return orbit
Normalization ensures that the numerical values of all features of the data fall within a range from one to negative one and are centered around their mean (zero-mean and unit variance). Normalization improves the speed and performance of training neural networks as it unifies the scale by which differences in the data are represented without altering the data themselves. scaler = preprocessing.StandardScaler() mms_data = scaler.fit_transform(mms_data) mms_data = pd.DataFrame(mms_data, index, column_names) mms_data = mms_data.join(selections) Next, we calculate class weights for our data classes (selected data points and non-selected data points). Since the distribution of our data is heavily skewed towards non-selected data points (just 1.9% of all data points in our training data were selected), it's important to give the class of selected data points a higher weight when training. In fact, without establishing these class weights our model would quickly acheive 98% accuracy by naively leaving all data points unselected. false_weight = len(mms_data)/(2*np.bincount(mms_data['selected'].values)[0]) true_weight = len(mms_data)/(2*np.bincount(mms_data['selected'].values)[1]) Our entire dataset is not contigous, and it contains time intervals with no observations. Therefore, we break it up into contigous chunks. We can do so by breaking up the data into the windows that the SITLs used to review the data. sitl_windows = mms.mission_events('sroi', mms_data.index[0].to_pydatetime(), mms_data.index[-1].to_pydatetime(), sc='mms1') windows = [] for start, end in zip(sitl_windows['tstart'], sitl_windows['tend']): window = mms_data[start:end] if not window.empty and len(window[window['selected']==True])>1: windows.append(window) windows Finally, we break up our data into individual sequences that will be fed to our neural network. We define a SEQ_LEN variable that will determine the length of our sequences. This variable will also be passed to our network so that it knows how long of a data sequence to expect while training. The choice of sequence length is largely arbitrary. SEQ_LEN = 250 For each window, we assemble two sequences: an X_sequence containing individual data points from our training data and a y_sequence containing the truth values for those data points (whether or not those data points were selected by a SITL).
from scipy.stats import binned_statistic import datetime as dt from pymms.sdc import mrmms_sdc_api as api from pymms.data import edp, fpi, util as mms_util from matplotlib import pyplot as plt, dates as mdates import util # Event parameters sc = 'mms1' mode = 'fast' t0 = dt.datetime(2016, 11, 5, 20, 0, 0) t1 = dt.datetime(2016, 11, 6, 0, 0, 0) - dt.timedelta(microseconds=1) # Determine the start and end times of the orbit during which the event occurs orbit = api.mission_events('orbit', t0 - dt.timedelta(hours=20), t0 + dt.timedelta(days=2), sc=sc) orbit_num = orbit['start_orbit'][0] orbit_t0 = orbit['tstart'][0] orbit_t1 = orbit['tend'][0] # Sunlit area of the spacecraft # - Roberts JGR 2012 https://agupubs.onlinelibrary.wiley.com/doi/full/10.1029/2020JA027854 A_sunlit = 34 # m^2 # Physical constants and conversions eV2K = constants.value('electron volt-kelvin relationship') me = constants.m_e # kg e = constants.e # C kB = constants.k # J/K