def init(): """Initialize the jedi catalog: load the data Inputs: None. Optional Inputs: None Outputs: All outputs are globals accessible by doing import jedi_config logger [JpmLogger]: A configurable log that can optionally also print to console. all_minutes_since_last_flare [numpy float array]: The amount of time between each flare. preflare_indices [numpy int array]: The indices where flares are considered time-independent. Optional Outputs: None Example: jedi_config.init() """ global logger, all_minutes_since_last_flare, preflare_indices # Initialize logger logger = JpmLogger(filename=logger_filename, path=output_path, console=False) logger.info('Logger initialized.') # Set up folders init_folders() # Set up filenames init_filenames() # Load the EVE data load_eve_data() # Get GOES flare events above C1 within date range corresponding to EVE data load_goes_flare_event_data() # Compute the amount of time between all flares [minutes] peak_time = goes_flare_events['peak_time'] all_minutes_since_last_flare = (peak_time[1:] - peak_time[0:-1]).sec / 60.0 # Figure out which flares are independent, store those indices is_flare_independent = all_minutes_since_last_flare > threshold_time_prior_flare_minutes preflare_indices = np.where( is_flare_independent )[0] + 1 # Add 1 to map back to event index and not to the differentiated vector logger.info( 'Found {0} independent flares of {1} total flares given a time separation of {2} minutes.' .format(len(preflare_indices), len(is_flare_independent), threshold_time_prior_flare_minutes))
def get_goes_flare_events(start_time, end_time, minimum_flare_size='C1', verbose=False): """Get a list of flare events from NOAA's GOES/XRS. Just a wrapper around sunpy.instr.goes get_goes_event_list. Inputs: start_time [metatime or string]: The beginning of the time window of interest. See jpm_time_conversions.py (https://github.com/jmason86/python_convenience_functions/blob/master/jpm_time_conversions.py) for allowed metatime formats if not using an iso or human like time string. end_time [metatime]: Same as start time but for the end of the time window. Optional Inputs: minimum_flare_size [string]: The minimum flare size to search for. Default is 'C1'. verbose [bool]: Set to log the processing messages to disk and console. Default is False. Outputs: goes_events [list]: The list of GOES flare events corresponding to the input search criteria. Optional Outputs: None Example: goes_events = get_goes_flare_events(pd.Timestamp('2010-05-01 00:00:00'), pd.Timestamp('2018-01-12 00:00:00'), verbose=True) """ # Prepare the logger for verbose if verbose: # TODO: Update the path logger = JpmLogger(filename='get_goes_flare_events_log', path='/Users/jmason86/Desktop/') logger.info("Getting > {0} flares from {1} to {2}.".format( minimum_flare_size, start_time, end_time)) if not isinstance(start_time, str): start_time = metatimes_to_human(np.array([start_time]))[0] if not isinstance(end_time, str): end_time = metatimes_to_human(np.array([end_time]))[0] time_range = TimeRange(start_time, end_time) goes_events = get_goes_event_list(time_range, goes_class_filter='c1') if verbose: logger.info("Found {0} events.".format(len(goes_events))) # Return the slopes return goes_events
def light_curve_peak_match_subtract(light_curve_to_subtract_from_df, light_curve_to_subtract_with_df, estimated_time_of_peak, max_seconds_shift=1800, plot_path_filename=None, verbose=False, logger=None): """Align the peak of a second light curve to the first, scale its magnitude to match, and subtract it off. Inputs: light_curve_to_subtract_from_df [pd DataFrame]: A pandas DataFrame with a DatetimeIndex and a column for irradiance. light_curve_to_subtract_with_df [pd DataFrame]: A pandas DataFrame with a DatetimeIndex and a column for irradiance. estimated_time_of_peak [metatime]: The estimated time that the peak should occur. This could come from, e.g., GOES/XRS. Optional Inputs: max_seconds_shift [int]: The maximum allowed time shift in seconds to get the peaks to match. plot_path_filename [str]: Set to a path and filename in order to save the summary plot to disk. Default is None, meaning the plot will not be saved to disk. verbose [bool]: Set to log the processing messages to disk and console. Default is False. logger [JpmLogger]: A configured logger from jpm_logger.py. If set to None, will generate a new one. Default is None. Outputs: light_curve_corrected_df [pd DataFrame]: A pandas DataFrame with the same format as light_curve_to_subtract_from_df but with the resultant peak match and subtraction performed. Returns np.nan if the peaks couldn't be found. seconds_shift [float]: The number of seconds that light_curve_to_subtract_with_df was shifted to get its peak to match light_curve_to_subtract_from_df. Returns np.nan if the peaks couldn't be found. scale_factor [float]: The multiplicative factor applied to light_curve_to_subtract_with_df to get its peak to match light_curve_to_subtract_from_df. Returns np.nan if the peaks couldn't be found. Optional Outputs: None Example: light_curve_corrected_df, seconds_shift, scale_factor = light_curve_peak_match_subtract(light_curve_to_subtract_from_df, light_curve_to_subtract_with_df, estimated_time_of_peak, plot_path_filename='./', verbose=True) """ # Prepare the logger for verbose if verbose: if not logger: logger = JpmLogger(filename='light_curve_peak_match_subtract_log', path='/Users/jmason86/Desktop/') logger.info("Running on event with light curve start time of {0}.".format(light_curve_to_subtract_from_df.index[0])) # Drop NaNs since peakutils can't handle them light_curve_to_subtract_from_df = light_curve_to_subtract_from_df.dropna() light_curve_to_subtract_with_df = light_curve_to_subtract_with_df.dropna() # Detrend and find the peaks that are >= 95% of the max irradiance within if verbose: logger.info("Detrending light curves.") if (light_curve_to_subtract_from_df['irradiance'].values < 0).all(): light_curve_to_subtract_from_df.iloc[0] = 1 # Else can crash peakutils.baseline base_from = peakutils.baseline(light_curve_to_subtract_from_df) detrend_from = light_curve_to_subtract_from_df - base_from indices_from = peakutils.indexes(detrend_from.values.squeeze(), thres=0.95) if (light_curve_to_subtract_with_df['irradiance'].values < 0).all(): light_curve_to_subtract_with_df.iloc[0] = 1 # Else can crash peakutils.baseline base_with = peakutils.baseline(light_curve_to_subtract_with_df) detrend_with = light_curve_to_subtract_with_df - base_with indices_with = peakutils.indexes(detrend_with.values.squeeze(), thres=0.95) if len(indices_from) == 0: if verbose: logger.warning('Could not find peak in light curve to subtract from.') return np.nan, np.nan, np.nan if len(indices_with) == 0: if verbose: logger.warning('Could not find peak in light curve to subtract with.') return np.nan, np.nan, np.nan # Identify the peak closest to the input estimated peak time (e.g., from GOES/XRS) if verbose: logger.info("Identifying peaks closest to initial guess in light curves.") peak_index_from = indices_from[closest(light_curve_to_subtract_from_df.index[indices_from], estimated_time_of_peak)] if len(indices_with) == 0: import pdb pdb.set_trace() peak_index_with = indices_with[closest(light_curve_to_subtract_with_df.index[indices_with], estimated_time_of_peak)] index_shift = peak_index_from - peak_index_with # Compute how many seconds the time shift corresponds to seconds_shift = (light_curve_to_subtract_from_df.index[peak_index_from] - light_curve_to_subtract_with_df.index[peak_index_with]).total_seconds() # Fail if seconds_shift > max_seconds_shift isTimeShiftValid = True if abs(seconds_shift) > max_seconds_shift: if verbose: logger.warning("Cannot do peak match. Time shift of {0} seconds is greater than max allowed shift of {1} seconds.".format(seconds_shift, max_seconds_shift)) isTimeShiftValid = False # Shift the subtract_with light curve in time to align its peak to the subtract_from light curve if isTimeShiftValid: if verbose: logger.info("Shifting and scaling the light curve to subtract with.") shifted_with = light_curve_to_subtract_with_df.shift(index_shift) # Scale the subtract_with light curve peak irradiance to match the subtract_from light curve peak irradiance scale_factor = (detrend_from.values[peak_index_from] / shifted_with.values[peak_index_with + index_shift])[0] shifted_scaled_with = shifted_with * scale_factor light_curve_corrected_df = light_curve_to_subtract_from_df - shifted_scaled_with if verbose: if light_curve_corrected_df.isnull().values.sum() > 1: logger.warning("%s points were shifted to become NaN." % light_curve_corrected_df.isnull().values.sum()) logger.info("Light curve peak matching and subtraction complete.") if plot_path_filename: from jpm_number_printing import latex_float seconds_shift_string = '+' if seconds_shift >= 0 else '' seconds_shift_string += str(int(seconds_shift)) if isTimeShiftValid: scale_factor_string = latex_float(scale_factor) plt.style.use('jpm-transparent-light') from matplotlib import dates plt.clf() fig, ax = plt.subplots() plt.plot(light_curve_to_subtract_from_df.index.values, light_curve_to_subtract_from_df.values, c='limegreen') plt.tick_params(axis='x', which='minor', labelbottom='off') plt.xlabel(estimated_time_of_peak) plt.ylabel('Irradiance [%]') fmtr = dates.DateFormatter("%H:%M:%S") ax.xaxis.set_major_formatter(fmtr) ax.xaxis.set_major_locator(dates.HourLocator()) if isTimeShiftValid: plt.title('I: $\\times$' + scale_factor_string + ', t: ' + seconds_shift_string + ' s', color='tomato') shifted_scaled_with.plot(c='tomato', label='subtract with', ax=ax) light_curve_corrected_df.plot(c='darkgrey', label='result', ax=ax) else: plt.title('t: ' + seconds_shift_string + ' s > max allowed {0} s'.format(max_seconds_shift), color='tomato') plt.plot(light_curve_to_subtract_with_df.index.values, light_curve_to_subtract_with_df.values, c='tomato') plt.scatter(light_curve_to_subtract_from_df.index[peak_index_from], light_curve_to_subtract_from_df.values[peak_index_from], c='black') if isTimeShiftValid: plt.scatter(shifted_scaled_with.index[peak_index_with + index_shift], shifted_scaled_with.values[peak_index_with + index_shift], c='black') ax.legend(['subtract from', 'subtract with', 'result'], loc='best') else: plt.scatter(light_curve_to_subtract_with_df.index[peak_index_with], light_curve_to_subtract_with_df.values[peak_index_with], c='black') ax.legend(['subtract from', 'subtract with'], loc='best') path = os.path.dirname(plot_path_filename) if not os.path.exists(path): os.makedirs(path) plt.savefig(plot_path_filename) if verbose: logger.info("Summary plot saved to %s" % plot_path_filename) if isTimeShiftValid: return light_curve_corrected_df, seconds_shift, scale_factor else: return np.nan, seconds_shift, np.nan
def automatic_fit_light_curve(light_curve_df, minimum_score=0.3, plots_save_path=None, verbose=False, logger=None): """Automatically fit the best support vector machine regression (SVR) model for the input light curve. Inputs: light_curve_df [pd DataFrame]: A pandas DataFrame with a DatetimeIndex, and columns for irradiance and uncertainty. Optional Inputs: minimum_score [float]: Set this to the minimum explained variance score (0 - 1) acceptable for fits. If the best fit score is < minimum_score, this function will return np.nan for light_curve_fit. Default value is 0.3. plots_save_path [str]: Set to a path in order to save the validation curve and best fit overplot on the data to disk. Default is None, meaning no plots will be saved to disk. verbose [bool]: Set to log the processing messages to disk and console. Default is False. logger [JpmLogger]: A configured logger from jpm_logger.py. If set to None, will generate a new one. Default is None. Outputs: light_curve_fit_df [pd DataFrame]: A pandas DataFrame with a DatetimeIndex, and columns for fitted irradiance and uncertainty. best_fit_gamma [float]: The best found gamma hyper parameter for the SVR. best_fit_score [float]: The best explained variance score. Optional Outputs: None Example: light_curve_fit, best_fit_gamma, best_fit_score = automatic_fit_light_curve(light_curve_df, verbose=True) """ # Prepare the logger for verbose if verbose: if not logger: logger = JpmLogger(filename='automatic_fit_light_curve_log', path='/Users/jmason86/Desktop/') logger.info("Running on event with light curve start time of {0}.".format(light_curve_df.index[0])) # Pull data out of the DataFrame for compatibility formatting X = metatimes_to_seconds_since_start(light_curve_df.index) y = light_curve_df['irradiance'].values # Check for NaNs and issue warning that they are being removed from the dataset if verbose: if np.isnan(y).any(): logger.warning("There are NaN values in light curve. Dropping them.") finite_irradiance_indices = np.isfinite(y) X = X[finite_irradiance_indices] X = X.reshape(len(X), 1) # Format to be compatible with validation_curve and SVR.fit() uncertainty = light_curve_df.uncertainty[np.isfinite(y)] y = y[finite_irradiance_indices] if verbose: logger.info("Fitting %s points." % len(y)) # Helper function for compatibility with validation_curve def jpm_svr(gamma=1e-6, **kwargs): return make_pipeline(SVR(kernel='rbf', C=1e3, gamma=gamma, **kwargs)) # Hyper parameter for SVR is gamma, so generate values of it to try gamma = np.logspace(-7, 1, num=20, base=10) # Overwrite the default scorer (R^2) with explained variance score evs = make_scorer(explained_variance_score) # Split the data between training/testing 50/50 but across the whole time range rather than the default consecutive Kfolds import time t0 = time.time() shuffle_split = ShuffleSplit(n_splits=20, train_size=0.5, test_size=0.5, random_state=None) # Generate the validation curve -- test all them gammas! # Parallelized to speed it up (n_jobs = # of parallel threads) train_score, val_score = validation_curve(jpm_svr(), X, y, 'svr__gamma', gamma, cv=shuffle_split, n_jobs=7, scoring=evs) t1 = time.time() logger.error('It took {0} seconds to run.'.format(t1 - t0)) if verbose: logger.info("Validation curve complete.") if plots_save_path: plt.clf() plt.style.use('jpm-transparent-light') plt.plot(gamma, np.median(train_score, 1), label='training score') plt.plot(gamma, np.median(val_score, 1), label='validation score') ax = plt.axes() plt.legend(loc='best') plt.title("t$_0$ = " + datetimeindex_to_human(light_curve_df.index)[0]) ax.set_xscale('log') plt.xlabel('gamma') plt.ylabel('score') plt.ylim(0, 1) filename = plots_save_path + 'Validation Curve t0 ' + datetimeindex_to_human(light_curve_df.index)[0] + '.png' plt.savefig(filename) if verbose: logger.info("Validation curve saved to %s" % filename) # Identify the best score scores = np.median(val_score, axis=1) best_fit_score = np.max(scores) best_fit_gamma = gamma[np.argmax(scores)] if verbose: logger.info('Scores: ' + str(scores)) logger.info('Best score: ' + str(best_fit_score)) logger.info('Best fit gamma: ' + str(best_fit_gamma)) # Return np.nan if only got bad fits if best_fit_score < minimum_score: if verbose: logger.warning("Uh oh. Best fit score {0:.2f} is < user-defined minimum score {1:.2f}".format(best_fit_score, minimum_score)) return np.nan, best_fit_gamma, best_fit_score # Otherwise train and fit the best model sample_weight = 1 / uncertainty model = SVR(kernel='rbf', C=1e3, gamma=best_fit_gamma).fit(X, y, sample_weight) y_fit = model.predict(X) if verbose: logger.info("Best model trained and fitted.") if plots_save_path: plt.clf() plt.errorbar(X.ravel(), y, yerr=uncertainty, color='black', fmt='o', label='Input light curve') plt.plot(X.ravel(), y_fit, linewidth=6, label='Fit') plt.title("t$_0$ = " + datetimeindex_to_human(light_curve_df.index)[0]) plt.xlabel('time [seconds since start]') plt.ylabel('irradiance [%]') plt.legend(loc='best') filename = plots_save_path + 'Fit t0 ' + datetimeindex_to_human(light_curve_df.index)[0] + '.png' plt.savefig(filename) if verbose: logger.info("Fitted curve saved to %s" % filename) # TODO: Get uncertainty of fit at each point... if that's even possible # Placeholder for now just so that the function can complete: output uncertainty = input uncertainty fit_uncertainty = uncertainty # Construct a pandas DataFrame with DatetimeIndex, y_fit, and fit_uncertainty light_curve_fit_df = pd.DataFrame({'irradiance': y_fit, 'uncertainty': fit_uncertainty}) light_curve_fit_df.index = light_curve_df.index[finite_irradiance_indices] if verbose: logger.info("Created output DataFrame") return light_curve_fit_df, best_fit_gamma, best_fit_score
def determine_dimming_slope(light_curve_df, earliest_allowed_time=None, latest_allowed_time=None, smooth_points=0, plot_path_filename=None, verbose=False, logger=None): """Find the slope of dimming in a light curve, if any. Inputs: light_curve_df [pd DataFrame]: A pandas DataFrame with a DatetimeIndex and a column for irradiance. Optional Inputs: earliest_allowed_time [metatime]: The function won't return a slope determined any earlier than this. It is recommended that this be the peak time of the flare. Default is None, meaning the beginning of the light_curve_df. latest_allowed_time [metatime]: The function won't return a slope determined any later than this. It is recommended that this be the identified time of dimming depth. Default is None, meaning the end of the light_curve_df. smooth_points [integer]: Used to apply a rolling mean with the number of points (indices) specified. Default is 0, meaning no smoothing will be performed. plot_path_filename [str]: Set to a path and filename in order to save the summary plot to disk. Default is None, meaning the plot will not be saved to disk. verbose [bool]: Set to log the processing messages to disk and console. Default is False. logger [JpmLogger]: A configured logger from jpm_logger.py. If set to None, will generate a new one. Default is None. Outputs: slope_min [float]: The minimum slope of dimming in percent/second terms. slope_max [float]: The maximum slope of dimming in percent/second terms. slope_mean [float]: The mean slope of dimming in percent/second terms. Optional Outputs: None Example: slope_min, slope_max, slope_mean = determine_dimming_slope(light_curve_df, plot_path_filename='./determine_dimming_slope_summary.png', verbose=True) """ # Prepare the logger for verbose if verbose: if not logger: logger = JpmLogger(filename='determine_dimming_slope_log', path='/Users/jmason86/Desktop/') logger.info( "Running on event with light curve start time of {0}.".format( light_curve_df.index[0])) # If no earliest_allowed_time set, then set it to beginning of light_curve_df if not earliest_allowed_time: earliest_allowed_time = light_curve_df.index[0] logger.info( "No earliest allowed time provided. Setting to beginning of light curve: {0}" .format(earliest_allowed_time)) # If no latest_allowed_time set, then set it to end of light_curve_df if not latest_allowed_time: latest_allowed_time = light_curve_df.index[-1] logger.info( "No latest allowed time provided. Setting to end of light curve: {0}" .format(latest_allowed_time)) # Optionally smooth the light curve with a rolling mean if smooth_points: light_curve_df['irradiance'] = light_curve_df.rolling( smooth_points, center=True).mean() if verbose: logger.info('Applied {0} point smooth.'.format(smooth_points)) first_non_nan = light_curve_df['irradiance'].first_valid_index() nan_indices = np.isnan(light_curve_df['irradiance']) light_curve_df['irradiance'][nan_indices] = light_curve_df['irradiance'][ first_non_nan] # Find the max in the allowed window max_time = light_curve_df[earliest_allowed_time:latest_allowed_time][ 'irradiance'].idxmax() max_irradiance = light_curve_df['irradiance'].loc[max_time] if verbose: logger.info( 'Maximum in allowed window found with value of {0:.2f} at time {1}' .format(max_irradiance, max_time)) # Compute the derivative in the time window of interest (inverting sign so that we describe "downward slope") derivative = -light_curve_df[max_time:latest_allowed_time][ 'irradiance'].diff( ) / light_curve_df[max_time:latest_allowed_time].index.to_series( ).diff().dt.total_seconds() if verbose: logger.info( "Computed derivative of light curve within time window of interest." ) # Get the min, max, and mean slope slope_min = derivative.min() slope_max = derivative.max() slope_mean = derivative.mean() slope_min_str = latex_float(slope_min) slope_max_str = latex_float(slope_max) slope_mean_str = latex_float(slope_mean) if verbose: logger.info( "Computed min ({0}), max ({1}), and mean ({2}) %/s slope.".format( slope_min_str, slope_max_str, slope_mean_str)) # Do a few sanity checks for the log if verbose: if slope_min < 0: logger.warning( "Minimum slope of {0} is unexpectedly < 0.".format(slope_min)) if slope_max < 0: logger.warning( "Maximum slope of {0} is unexpectedly < 0.".format(slope_max)) if slope_mean < 0: logger.warning( "Mean slope of {0} is unexpectedly < 0.".format(slope_mean)) # Produce a summary plot if plot_path_filename: plt.style.use('jpm-transparent-light') from matplotlib import dates p = plt.plot(light_curve_df['irradiance']) p = plt.plot( light_curve_df[max_time:latest_allowed_time]['irradiance'], label='slope region') ax = plt.gca() plt.axvline(x=earliest_allowed_time, linestyle='dashed', color='grey') plt.axvline(x=latest_allowed_time, linestyle='dashed', color='black') plt.axvline(x=max_time, linestyle='dashed', color='black') plt.title('Identified Slope') start_date = light_curve_df.index.values[0] start_date_string = pd.to_datetime(str(start_date)) plt.xlabel(start_date_string.strftime('%Y-%m-%d %H:%M:%S')) fmtr = dates.DateFormatter("%H:%M:%S") ax.xaxis.set_major_formatter(fmtr) ax.xaxis.set_major_locator(dates.HourLocator()) ax.xaxis.grid(b=True, which='minor') plt.ylabel('Irradiance [%]') inverse_str = '$^{-1}$' plt.annotate('slope_min={0} % sec{1}'.format(slope_min_str, inverse_str), xy=(0.98, 0.12), xycoords='axes fraction', ha='right', size=12, color=p[0].get_color()) plt.annotate('slope_max={0} % sec{1}'.format(slope_max_str, inverse_str), xy=(0.98, 0.08), xycoords='axes fraction', ha='right', size=12, color=p[0].get_color()) plt.annotate('slope_mean={0} % sec{1}'.format(slope_mean_str, inverse_str), xy=(0.98, 0.04), xycoords='axes fraction', ha='right', size=12, color=p[0].get_color()) ax.legend(loc='best') plt.savefig(plot_path_filename) if verbose: logger.info("Summary plot saved to %s" % plot_path_filename) # Return the slopes return slope_min, slope_max, slope_mean
def generate_jedi_catalog( threshold_time_prior_flare_minutes=240.0, dimming_window_relative_to_flare_minutes_left=0.0, dimming_window_relative_to_flare_minutes_right=240.0, threshold_minimum_dimming_window_minutes=120.0, flare_index_range=range(0, 5052), output_path='/Users/shawnpolson/Documents/School/Spring 2018/Data Mining/StealthCMEs/PyCharm/JEDI Catalog/', verbose=True): """Wrapper code for creating James's Extreme Ultraviolet Variability Experiment (EVE) Dimming Index (JEDI) catalog. Inputs: None. Optional Inputs: threshold_time_prior_flare_minutes [float]: How long before a particular event does the last one need to have occurred to be considered independent. If the previous one was too recent, will use that event's pre-flare irradiance. Default is 240 (4 hours). dimming_window_relative_to_flare_minutes_left [float]: Defines the left side of the time window to search for dimming relative to the GOES/XRS flare peak. Negative numbers mean minutes prior to the flare peak. Default is 0.0. dimming_window_relative_to_flare_minutes_right [float]: Defines the right side of the time window to search for dimming relative to the GOES/XRS flare peak. If another flare occurs before this, that time will define the end of the window instead. Default is 240 (4 hours). threshold_minimum_dimming_window_minutes [float]: The smallest allowed time window in which to search for dimming. Default is 120. flare_index_range [range] The range of GOES flare indices to process. Default is range(0, 5052). output_path [str]: Set to a path for saving the JEDI catalog table and processing summary plots. Default is '/Users/jmason86/Dropbox/Research/Postdoc_NASA/Analysis/Coronal Dimming Analysis/JEDI Catalog/'. verbose [bool]: Set to log the processing messages to disk and console. Default is False. Outputs: No direct return, but writes a (csv? sql table? hdf5?) to disk with the dimming paramerization results. Subroutines also optionally save processing plots to disk in output_path. Optional Outputs: None Example: generate_jedi_catalog(output_path='/Users/jmason86/Dropbox/Research/Postdoc_NASA/Analysis/Coronal Dimming Analysis/JEDI Catalog/', verbose=True) """ # Prepare the logger for verbose if verbose: logger = JpmLogger(filename='generate_jedi_catalog', path=output_path, console=False) logger.info("Starting JEDI processing pipeline.") logger.info("Processing events {0} - {1}".format( flare_index_range[0], flare_index_range[-1])) else: logger = None # Get EVE level 2 extracted emission lines data # TODO: Replace this shortcut method with the method I'm building into sunpy from scipy.io.idl import readsav eve_readsav = readsav( '/Users/shawnpolson/Documents/School/Spring 2018/Data Mining/StealthCMEs/savesets/eve_lines_2010121-2014146 MEGS-A Mission Bare Bones.sav' ) if verbose: logger.info('Loaded EVE data') # Create metadata dictionary # TODO: Replace this shortcut method with the method I'm building into sunpy from sunpy.util.metadata import MetaDict metadata = MetaDict() metadata['ion'] = eve_readsav['name'] metadata['temperature_ion_peak_formation'] = np.power( 10.0, eve_readsav['logt']) * u.Kelvin metadata['extracted_wavelength_center'] = eve_readsav['wavelength'] * u.nm metadata['extracted_wavelength_min'] = metadata[ 'extracted_wavelength_center'] metadata['extracted_wavelength_max'] = metadata[ 'extracted_wavelength_center'] metadata['emission_line_blends'] = ['none', 'yay', 'poop', 'Fe vi'] # etc metadata[ 'exposure_time'] = 60.0 * u.second # These example EVE data are already binned down to 1 minute metadata['precision'] = ['Not implemented in prototype'] metadata['accuracy'] = ['Not implemented in prototype'] metadata['flags'] = ['Not implemented in prototype'] metadata['flags_description'] = '1 = MEGS-A data is missing, ' \ '2 = MEGS-B data is missing, ' \ '4 = ESP data is missing, ' \ '8 = MEGS-P data is missing, ' \ '16 = Possible clock adjust in MEGS-A, ' \ '32 = Possible clock adjust in MEGS-B, ' \ '64 = Possible clock adjust in ESP, ' \ '128 = Possible clock adjust in MEGS-P' metadata['flags_spacecraft'] = ['Not implemented in prototype'] metadata['flags_spacecraft_description'] = '0 = No obstruction, ' \ '1 = Warm up from Earth eclipse, ' \ '2 = Obstruction atmosphere penumbra, ' \ '3 = Obstruction atmosphere umbra, ' \ '4 = Obstruction penumbra of Mercury, ' \ '5 = Obstruction penumbra of Mercury, ' \ '6 = Obstruction penumbra of Venus, ' \ '7 = Obstruction umbra of Venus, ' \ '8 = Obstruction penumbra of Moon, ' \ '9 = Obstruction umbra of Moon, ' \ '10 = Obstruction penumbra of solid Earth, ' \ '11 = Obstruction umbra of solid Earth, ' \ '16 = Observatory is off-pointed by more than 1 arcmin' metadata['data_version'] = ['Not implemented in prototype'] metadata['data_reprocessed_revision'] = ['Not implemented in prototype'] metadata['filename'] = ['Not implemented in prototype'] # Load up the actual irradiance data into a pandas DataFrame # TODO: Replace this shortcut method with the method I'm building into sunpy irradiance = eve_readsav['irradiance'].byteswap().newbyteorder( ) # pandas doesn't like big endian irradiance[irradiance == -1] = np.nan wavelengths = eve_readsav['wavelength'] wavelengths_str = [] [ wavelengths_str.append('{0:1.1f}'.format(wavelength)) for wavelength in wavelengths ] eve_lines = pd.DataFrame(irradiance, columns=wavelengths_str) eve_lines.index = pd.to_datetime(eve_readsav.iso.astype(str)) eve_lines = eve_lines.drop_duplicates() # slice out only columns needed by Shawn # eve_selected_lines = eve_lines.drop(columns=['9.4', '13.1', '13.3', '25.6', '28.4', '30.4', '33.5', '36.1', '36.8', '44.6', '46.5', '49.9', '52.1', '52.6', '53.7', '55.4', '56.8', '58.4', '59.2', '60.0', '61.0', '62.5', '63.0', '71.9', '72.2', '77.0', '79.0', '83.6', '95.0', '97.3', '97.7', '102.6', '103.2']) # eve_selected_lines.info() # eve_selected_lines.to_csv('/Users/shawnpolson/Documents/School/Spring 2018/Data Mining/StealthCMEs/PyCharm/JEDI Catalog/eve_selected_lines_forreal.csv') # Get GOES flare events above C1 within date range corresponding to EVE data # flares = get_goes_flare_events(eve_lines.index[0], eve_lines.index[-1], verbose=verbose) # TODO: The method in sunpy needs fixing, issue 2434 # Load GOES events from IDL saveset instead of directly through sunpy goes_flare_events = readsav( '/Users/shawnpolson/Documents/School/Spring 2018/Data Mining/StealthCMEs/savesets/GoesEventsMegsAEra.sav' ) goes_flare_events['class'] = goes_flare_events['class'].astype(str) goes_flare_events['event_peak_time_human'] = goes_flare_events[ 'event_peak_time_human'].astype(str) goes_flare_events['event_start_time_human'] = goes_flare_events[ 'event_start_time_human'].astype(str) goes_flare_events['peak_time'] = Time( goes_flare_events['event_peak_time_jd'], format='jd', scale='utc') goes_flare_events['start_time'] = Time( goes_flare_events['event_start_time_jd'], format='jd', scale='utc') if verbose: logger.info('Loaded GOES flare events.') # Define the columns of the JEDI catalog jedi_row = pd.DataFrame([ OrderedDict([('Event #', np.nan), ('GOES Flare Start Time', np.nan), ('GOES Flare Peak Time', np.nan), ('GOES Flare Class', np.nan), ('Pre-Flare Start Time', np.nan), ('Pre-Flare End Time', np.nan), ('Flare Interrupt', np.nan)]) ]) jedi_row = jedi_row.join( pd.DataFrame(columns=eve_lines.columns + ' Pre-Flare Irradiance [W/m2]')) jedi_row = jedi_row.join( pd.DataFrame(columns=eve_lines.columns + ' Slope Start Time')) jedi_row = jedi_row.join( pd.DataFrame(columns=eve_lines.columns + ' Slope End Time')) jedi_row = jedi_row.join( pd.DataFrame(columns=eve_lines.columns + ' Slope Min [%/s]')) jedi_row = jedi_row.join( pd.DataFrame(columns=eve_lines.columns + ' Slope Max [%/s]')) jedi_row = jedi_row.join( pd.DataFrame(columns=eve_lines.columns + ' Slope Mean [%/s]')) jedi_row = jedi_row.join( pd.DataFrame(columns=eve_lines.columns + ' Slope Uncertainty [%/s]')) jedi_row = jedi_row.join( pd.DataFrame(columns=eve_lines.columns + ' Depth Time')) jedi_row = jedi_row.join( pd.DataFrame(columns=eve_lines.columns + ' Depth [%]')) jedi_row = jedi_row.join( pd.DataFrame(columns=eve_lines.columns + ' Depth Uncertainty [%]')) jedi_row = jedi_row.join( pd.DataFrame(columns=eve_lines.columns + ' Duration Start Time')) jedi_row = jedi_row.join( pd.DataFrame(columns=eve_lines.columns + ' Duration End Time')) jedi_row = jedi_row.join( pd.DataFrame(columns=eve_lines.columns + ' Duration [s]')) jedi_row = jedi_row.join( pd.DataFrame(columns=eve_lines.columns + ' Fitting Gamma')) jedi_row = jedi_row.join( pd.DataFrame(columns=eve_lines.columns + ' Fitting Score')) ion_tuples = list(itertools.permutations(eve_lines.columns.values, 2)) ion_permutations = pd.Index( [' by '.join(ion_tuples[i]) for i in range(len(ion_tuples))]) jedi_row = jedi_row.join( pd.DataFrame(columns=ion_permutations + ' Slope Start Time')) jedi_row = jedi_row.join( pd.DataFrame(columns=ion_permutations + ' Slope End Time')) jedi_row = jedi_row.join( pd.DataFrame(columns=ion_permutations + ' Slope Min [%/s]')) jedi_row = jedi_row.join( pd.DataFrame(columns=ion_permutations + ' Slope Max [%/s]')) jedi_row = jedi_row.join( pd.DataFrame(columns=ion_permutations + ' Slope Mean [%/s]')) jedi_row = jedi_row.join( pd.DataFrame(columns=ion_permutations + ' Slope Uncertainty [%/s]')) jedi_row = jedi_row.join( pd.DataFrame(columns=ion_permutations + ' Depth Time')) jedi_row = jedi_row.join( pd.DataFrame(columns=ion_permutations + ' Depth [%]')) jedi_row = jedi_row.join( pd.DataFrame(columns=ion_permutations + ' Depth Uncertainty [%]')) jedi_row = jedi_row.join( pd.DataFrame(columns=ion_permutations + ' Duration Start Time')) jedi_row = jedi_row.join( pd.DataFrame(columns=ion_permutations + ' Duration End Time')) jedi_row = jedi_row.join( pd.DataFrame(columns=ion_permutations + ' Duration [s]')) jedi_row = jedi_row.join( pd.DataFrame(columns=ion_permutations + ' Correction Time Shift [s]')) jedi_row = jedi_row.join( pd.DataFrame(columns=ion_permutations + ' Correction Scale Factor')) jedi_row = jedi_row.join( pd.DataFrame(columns=ion_permutations + ' Fitting Gamma')) jedi_row = jedi_row.join( pd.DataFrame(columns=ion_permutations + ' Fitting Score')) csv_filename = output_path + 'jedi_{0}.csv'.format(Time.now().iso) jedi_row.to_csv(csv_filename, header=True, index=False, mode='w') if verbose: logger.info('Created JEDI row definition.') # Start a progress bar widgets = [ progressbar.Percentage(), progressbar.Bar(), progressbar.Timer(), ' ', progressbar.AdaptiveETA() ] progress_bar = progressbar.ProgressBar( widgets=[progressbar.FormatLabel('Flare Event Loop: ')] + widgets, min_value=flare_index_range[0], max_value=flare_index_range[-1]).start() # Prepare a hold-over pre-flare irradiance value, # which will normally have one element for each of the 39 emission lines preflare_irradiance = np.nan # Start loop through all flares for flare_index in flare_index_range: # Skip event 0 to avoid problems with referring to earlier indices if flare_index == 0: continue # Reset jedi_row jedi_row[:] = np.nan # Reset the flare interrupt flag flare_interrupt = False # Fill the GOES flare information into the JEDI row jedi_row['Event #'] = flare_index jedi_row['GOES Flare Start Time'] = goes_flare_events['start_time'][ flare_index].iso jedi_row['GOES Flare Peak Time'] = goes_flare_events['peak_time'][ flare_index].iso jedi_row['GOES Flare Class'] = goes_flare_events['class'][flare_index] if verbose: logger.info( "Event {0} GOES flare details stored to JEDI row.".format( flare_index)) # If haven't already done all pre-parameterization processing processed_jedi_non_params_filename = output_path + 'Processed Pre-Parameterization Data/Event {0} Pre-Parameterization.h5'.format( flare_index) processed_lines_filename = output_path + 'Processed Lines Data/Event {0} Lines.h5'.format( flare_index) if not os.path.isfile(processed_lines_filename) or not os.path.isfile( processed_jedi_non_params_filename): # Determine pre-flare irradiance minutes_since_last_flare = ( goes_flare_events['peak_time'][flare_index] - goes_flare_events['peak_time'][flare_index - 1]).sec / 60.0 if minutes_since_last_flare > threshold_time_prior_flare_minutes: # Clip EVE data from threshold_time_prior_flare_minutes prior to flare up to peak flare time preflare_window_start = ( goes_flare_events['peak_time'][flare_index] - (threshold_time_prior_flare_minutes * u.minute)).iso preflare_window_end = ( goes_flare_events['peak_time'][flare_index]).iso eve_lines_preflare_time = eve_lines[ preflare_window_start:preflare_window_end] # Loop through the emission lines and get pre-flare irradiance for each preflare_irradiance = [] for column in eve_lines_preflare_time: eve_line_preflare_time = pd.DataFrame( eve_lines_preflare_time[column]) eve_line_preflare_time.columns = ['irradiance'] preflare_irradiance.append( determine_preflare_irradiance( eve_line_preflare_time, pd.Timestamp(goes_flare_events['start_time'] [flare_index].iso), plot_path_filename=output_path + 'Preflare Determination/Event {0} {1}.png'.format( flare_index, column), verbose=verbose, logger=logger)) plt.close('all') else: logger.info( "This flare at {0} will use the pre-flare irradiance from flare at {1}." .format( goes_flare_events['peak_time'][flare_index].iso, goes_flare_events['peak_time'][flare_index - 1].iso)) jedi_row["Pre-Flare Start Time"] = preflare_window_start jedi_row["Pre-Flare End Time"] = preflare_window_end preflare_irradiance_cols = [ col for col in jedi_row.columns if 'Pre-Flare Irradiance' in col ] jedi_row[preflare_irradiance_cols] = preflare_irradiance if verbose: logger.info( "Event {0} pre-flare determination complete.".format( flare_index)) # Clip EVE data to dimming window bracket_time_left = ( goes_flare_events['peak_time'][flare_index] - (dimming_window_relative_to_flare_minutes_left * u.minute)) next_flare_time = Time( (goes_flare_events['peak_time'][flare_index + 1]).iso) user_choice_time = ( goes_flare_events['peak_time'][flare_index] + (dimming_window_relative_to_flare_minutes_right * u.minute)) bracket_time_right = min(next_flare_time, user_choice_time) # If flare is shortening the window, set the flare_interrupt flag if bracket_time_right == next_flare_time: flare_interrupt = True if verbose: logger.info( 'Flare interrupt for event at {0} by flare at {1}'. format(goes_flare_events['peak_time'][flare_index].iso, next_flare_time)) # Write flare_interrupt to JEDI row jedi_row['Flare Interrupt'] = flare_interrupt # Skip event if the dimming window is too short if ((bracket_time_right - bracket_time_left).sec / 60.0) < threshold_minimum_dimming_window_minutes: # Leave all dimming parameters as NaN and write this null result to the CSV on disk jedi_row.to_csv(csv_filename, header=False, index=False, mode='a') # Log message if verbose: logger.info( 'The dimming window duration of {0} minutes is shorter than the minimum threshold of {1} minutes. Skipping this event ({2})' .format(((bracket_time_right - bracket_time_left).sec / 60.0), threshold_minimum_dimming_window_minutes, goes_flare_events['peak_time'][flare_index])) # Skip the rest of the processing in the flare_index loop continue else: eve_lines_event = eve_lines[bracket_time_left. iso:bracket_time_right.iso] if verbose: logger.info( "Event {0} EVE data clipped to dimming window.".format( flare_index)) # Convert irradiance units to percent # (in place, don't care about absolute units from this point forward) eve_lines_event = (eve_lines_event - preflare_irradiance ) / preflare_irradiance * 100.0 if verbose: logger.info( "Event {0} irradiance converted from absolute to percent units." .format(flare_index)) # Do flare removal in the light curves and add the results to the DataFrame progress_bar_correction = progressbar.ProgressBar( widgets=[progressbar.FormatLabel('Peak Match Subtract: ')] + widgets, max_value=len(ion_tuples)).start() for i in range(len(ion_tuples)): light_curve_to_subtract_from_df = pd.DataFrame( eve_lines_event[ion_tuples[i][0]]) light_curve_to_subtract_from_df.columns = ['irradiance'] light_curve_to_subtract_with_df = pd.DataFrame( eve_lines_event[ion_tuples[i][1]]) light_curve_to_subtract_with_df.columns = ['irradiance'] if (light_curve_to_subtract_from_df.isnull().all().all()) or ( light_curve_to_subtract_with_df.isnull().all().all()): if verbose: logger.info( 'Event {0} {1} correction skipped because all irradiances are NaN.' .format(flare_index, ion_permutations[i])) else: light_curve_corrected, seconds_shift, scale_factor = light_curve_peak_match_subtract( light_curve_to_subtract_from_df, light_curve_to_subtract_with_df, pd.Timestamp( (goes_flare_events['peak_time'][flare_index]).iso), plot_path_filename=output_path + 'Peak Subtractions/Event {0} {1}.png'.format( flare_index, ion_permutations[i]), verbose=verbose, logger=logger) eve_lines_event[ ion_permutations[i]] = light_curve_corrected jedi_row[ion_permutations[i] + ' Correction Time Shift [s]'] = seconds_shift jedi_row[ion_permutations[i] + ' Correction Scale Factor'] = scale_factor plt.close('all') if verbose: logger.info( 'Event {0} flare removal correction complete'. format(flare_index)) progress_bar_correction.update(i) progress_bar_correction.finish() # TODO: Update calculate_eve_fe_line_precision to compute for all emission lines, not just selected uncertainty = np.ones(len(eve_lines_event)) * 0.002545 # TODO: Propagate uncertainty through light_curve_peak_match_subtract and store in eve_lines_event # Fit the light curves to reduce influence of noise on the parameterizations to come later progress_bar_fitting = progressbar.ProgressBar( widgets=[progressbar.FormatLabel('Light curve fitting: ')] + widgets, max_value=len(eve_lines_event.columns)).start() for i, column in enumerate(eve_lines_event): if eve_lines_event[column].isnull().all().all(): if verbose: logger.info( 'Event {0} {1} fitting skipped because all irradiances are NaN.' .format(flare_index, column)) else: eve_line_event = pd.DataFrame(eve_lines_event[column]) eve_line_event.columns = ['irradiance'] eve_line_event['uncertainty'] = uncertainty fitting_path = output_path + 'Fitting/' if not os.path.exists(fitting_path): os.makedirs(fitting_path) plt.close('all') light_curve_fit, best_fit_gamma, best_fit_score = automatic_fit_light_curve( eve_line_event, plots_save_path='{0} Event {1} {2} '.format( fitting_path, flare_index, column), verbose=verbose, logger=logger) eve_lines_event[column] = light_curve_fit jedi_row[column + ' Fitting Gamma'] = best_fit_gamma jedi_row[column + ' Fitting Score'] = best_fit_score if verbose: logger.info( 'Event {0} {1} light curves fitted.'.format( flare_index, column)) progress_bar_fitting.update(i) progress_bar_fitting.finish() # # Save the dimming event data to disk for quicker restore # jedi_row.to_hdf(processed_jedi_non_params_filename, 'jedi_row') # eve_lines_event.to_hdf(processed_lines_filename, 'eve_lines_event') # else: # jedi_row = pd.read_hdf(processed_jedi_non_params_filename, 'jedi_row') # eve_lines_event = pd.read_hdf(processed_lines_filename, 'eve_lines_event') # if verbose: # logger.info('Loading files {0} and {1} rather than processing again.'.format(processed_jedi_non_params_filename, processed_lines_filename)) # # # Parameterize the light curves for dimming # for column in eve_lines_event: # # # Null out all parameters # depth_percent, depth_time = np.nan, np.nan # slope_start_time, slope_end_time = np.nan, np.nan # slope_min, slope_max, slope_mean = np.nan, np.nan, np.nan # duration_seconds, duration_start_time, duration_end_time = np.nan, np.nan, np.nan # # # Determine whether to do the parameterizations or not # if eve_lines_event[column].isnull().all().all(): # if verbose: # logger.info('Event {0} {1} parameterization skipped because all irradiances are NaN.'.format(flare_index, column)) # else: # eve_line_event = pd.DataFrame(eve_lines_event[column]) # eve_line_event.columns = ['irradiance'] # # # Determine dimming depth (if any) # depth_path = output_path + 'Depth/' # if not os.path.exists(depth_path): # os.makedirs(depth_path) # # plt.close('all') # depth_percent, depth_time = determine_dimming_depth(eve_line_event, # plot_path_filename='{0} Event {1} {2} Depth.png'.format(depth_path, flare_index, column), # verbose=verbose, logger=logger) # # jedi_row[column + ' Depth [%]'] = depth_percent # # jedi_row[column + ' Depth Uncertainty [%]'] = depth_uncertainty # TODO: make determine_dimming_depth return the propagated uncertainty # jedi_row[column + ' Depth Time'] = depth_time # # # Determine dimming slope (if any) # slope_path = output_path + 'Slope/' # if not os.path.exists(slope_path): # os.makedirs(slope_path) # # slope_start_time = pd.Timestamp((goes_flare_events['peak_time'][flare_index]).iso) # slope_end_time = depth_time # # if (pd.isnull(slope_start_time)) or (pd.isnull(slope_end_time)): # if verbose: # logger.warning('Cannot compute slope or duration because slope bounding times NaN.') # else: # plt.close('all') # slope_min, slope_max, slope_mean = determine_dimming_slope(eve_line_event, # earliest_allowed_time=slope_start_time, # latest_allowed_time=slope_end_time, # plot_path_filename='{0} Event {1} {2} Slope.png'.format(slope_path, flare_index, column), # verbose=verbose, logger=logger) # # jedi_row[column + ' Slope Min [%/s]'] = slope_min # jedi_row[column + ' Slope Max [%/s]'] = slope_max # jedi_row[column + ' Slope Mean [%/s]'] = slope_mean # # jedi_row[column + ' Slope Uncertainty [%]'] = slope_uncertainty # TODO: make determine_dimming_depth return the propagated uncertainty # jedi_row[column + ' Slope Start Time'] = slope_start_time # jedi_row[column + ' Slope End Time'] = slope_end_time # # # Determine dimming duration (if any) # duration_path = output_path + 'Duration/' # if not os.path.exists(duration_path): # os.makedirs(duration_path) # # plt.close('all') # duration_seconds, duration_start_time, duration_end_time = determine_dimming_duration(eve_line_event, # earliest_allowed_time=slope_start_time, # plot_path_filename='{0} Event {1} {2} Duration.png'.format(duration_path, flare_index, column), # verbose=verbose, logger=logger) # # jedi_row[column + ' Duration [s]'] = duration_seconds # jedi_row[column + ' Duration Start Time'] = duration_start_time # jedi_row[column + ' Duration End Time'] = duration_end_time # # if verbose: # logger.info("Event {0} {1} parameterizations complete.".format(flare_index, column)) # # # Produce a summary plot for each light curve # plt.style.use('jpm-transparent-light') # # ax = eve_line_event['irradiance'].plot(color='black') # plt.axhline(linestyle='dashed', color='grey') # start_date = eve_line_event.index.values[0] # start_date_string = pd.to_datetime(str(start_date)) # plt.xlabel(start_date_string.strftime('%Y-%m-%d %H:%M:%S')) # plt.ylabel('Irradiance [%]') # fmtr = dates.DateFormatter("%H:%M:%S") # ax.xaxis.set_major_formatter(fmtr) # ax.xaxis.set_major_locator(dates.HourLocator()) # plt.title('Event {0} {1} nm Parameters'.format(flare_index, column)) # # if not np.isnan(depth_percent): # plt.annotate('', xy=(depth_time, -depth_percent), xycoords='data', # xytext=(depth_time, 0), textcoords='data', # arrowprops=dict(facecolor='limegreen', edgecolor='limegreen', linewidth=2)) # mid_depth = -depth_percent / 2.0 # plt.annotate('{0:.2f} %'.format(depth_percent), xy=(depth_time, mid_depth), xycoords='data', # ha='right', va='center', rotation=90, size=18, color='limegreen') # # if not np.isnan(slope_mean): # if pd.isnull(slope_start_time) or pd.isnull(slope_end_time): # import pdb # pdb.set_trace() # p = plt.plot(eve_line_event[slope_start_time:slope_end_time]['irradiance'], c='tomato') # # inverse_str = '$^{-1}$' # plt.annotate('slope_min={0} % s{1}'.format(latex_float(slope_min), inverse_str), # xy=(0.98, 0.12), xycoords='axes fraction', ha='right', # size=12, color=p[0].get_color()) # plt.annotate('slope_max={0} % s{1}'.format(latex_float(slope_max), inverse_str), # xy=(0.98, 0.08), xycoords='axes fraction', ha='right', # size=12, color=p[0].get_color()) # plt.annotate('slope_mean={0} % s{1}'.format(latex_float(slope_mean), inverse_str), # xy=(0.98, 0.04), xycoords='axes fraction', ha='right', # size=12, color=p[0].get_color()) # # if not np.isnan(duration_seconds): # plt.annotate('', xy=(duration_start_time, 0), xycoords='data', # xytext=(duration_end_time, 0), textcoords='data', # arrowprops=dict(facecolor='dodgerblue', edgecolor='dodgerblue', linewidth=5, arrowstyle='<->')) # mid_time = duration_start_time + (duration_end_time - duration_start_time) / 2 # plt.annotate(str(duration_seconds) + ' s', xy=(mid_time, 0), xycoords='data', ha='center', va='bottom', size=18, color='dodgerblue') # # summary_path = output_path + 'Summary Plots/' # if not os.path.exists(summary_path): # os.makedirs(summary_path) # summary_filename = '{0} Event {1} {2} Parameter Summary.png'.format(summary_path, flare_index, column) # plt.savefig(summary_filename) # if verbose: # logger.info("Summary plot saved to %s" % summary_filename) # # # Write to the JEDI catalog on disk # jedi_row.to_csv(csv_filename, header=False, index=False, mode='a') # if verbose: # logger.info('Event {0} JEDI row written to {1}.'.format(flare_index, csv_filename)) # Update progress bar progress_bar.update(flare_index) progress_bar.finish()
def determine_dimming_duration(light_curve_df, earliest_allowed_time=None, smooth_points=0, plot_path_filename=None, verbose=False, logger=None): """Find the duration of dimming in a light curve, if any. Assumes light curve is normalized such that pre-flare = 0%. Inputs: light_curve_df [pd DataFrame]: A pandas DataFrame with a DatetimeIndex and a column for irradiance. Optional Inputs: earliest_allowed_time [metatime]: The function won't return a duration if the only 0 crossings are earlier than this. Default is None, meaning the beginning of the light_curve_df. smooth_points [integer]: Used to apply a rolling mean with the number of points (indices) specified. Default is 0, meaning no smoothing will be performed. plot_path_filename [str]: Set to a path and filename in order to save the summary plot to disk. Default is None, meaning the plot will not be saved to disk. verbose [bool]: Set to log the processing messages to disk and console. Default is False. logger [JpmLogger]: A configured logger from jpm_logger.py. If set to None, will generate a new one. Default is None. Outputs: duration_seconds [integer]: The duration of dimming in seconds. duration_start_time [pd.Timestamp]: The time the duration starts (downward 0 crossing). duration_end_time [pd.Timestamp]: The time the duration ends (upward 0 crossing). Optional Outputs: None Example: duration_seconds, duration_start_time, duration_end_time = determine_dimming_duration(light_curve_df, plot_path_filename='./bla.png', verbose=True) """ # If no earliest_allowed_time set, then set it to beginning of light_curve_df if not earliest_allowed_time: earliest_allowed_time = pd.Timestamp(light_curve_df.index.values[0]) # Prepare the logger for verbose if verbose: if not logger: logger = JpmLogger(filename='determine_dimming_duration_log', path='/Users/jmason86/Desktop/') logger.info("Running on event with light curve start time of {0}.".format(light_curve_df.index[0])) # Set up a successful processing flag found_duration = True # Optionally smooth the light curve with a rolling mean if smooth_points: light_curve_df['smooth'] = light_curve_df.rolling(smooth_points, center=True).mean() else: light_curve_df['smooth'] = light_curve_df['irradiance'] first_non_nan = light_curve_df['smooth'].first_valid_index() nan_indices = np.isnan(light_curve_df['smooth']) light_curve_df['smooth'][nan_indices] = light_curve_df['smooth'][first_non_nan] # Find the indices where the light curve is closest to 0 zero_crossing_indices = np.where(np.diff(np.signbit(light_curve_df['smooth'])))[0] zero_crossing_times = light_curve_df.index[zero_crossing_indices] # Discard any indices prior to the user-provided earliest_allowed_time, else cannot compute zero_crossing_indices = zero_crossing_indices[zero_crossing_times > earliest_allowed_time] if zero_crossing_indices.size == 0: if verbose: logger.warning('No zero crossings detected after earliest allowed time of %s' % earliest_allowed_time) found_duration = False # Figure out which way the light curve is sloping if found_duration: light_curve_df['diff'] = light_curve_df['smooth'].diff() # Find the first negative slope zero crossing time if found_duration: neg_zero_crossing_indices = np.where(light_curve_df['diff'][zero_crossing_indices + 1] < 0)[0] if len(neg_zero_crossing_indices) > 0: first_neg_zero_crossing_index = neg_zero_crossing_indices[0] first_neg_zero_crossing_time = light_curve_df.index[zero_crossing_indices[first_neg_zero_crossing_index]] else: if verbose: logger.warning('No negative slope 0-crossing found. Duration cannot be defined.') found_duration = False # Find the first postiive slope zero crossing if found_duration: pos_zero_crossing_indices = np.where(light_curve_df['diff'][zero_crossing_indices + 1] > 0)[0] if len(pos_zero_crossing_indices) > 0: first_pos_zero_crossing_index = pos_zero_crossing_indices[0] first_pos_zero_crossing_time = light_curve_df.index[zero_crossing_indices[first_pos_zero_crossing_index]] else: if verbose: logger.warning('No positive slope 0-crossing found. Duration cannot be defined.') found_duration = False # If the first negative slope zero crossing isn't earlier than the positive one, return null if (found_duration) and (first_neg_zero_crossing_time > first_pos_zero_crossing_time): if verbose: logger.warning('Dimming light curve may be misaligned in window. Negative slope 0-crossing detected after positive one.') found_duration = False # Return the time difference in seconds between the selected zero crossings if found_duration: duration_seconds = int((first_pos_zero_crossing_time - first_neg_zero_crossing_time).total_seconds()) if plot_path_filename: plt.style.use('jpm-transparent-light') from matplotlib import dates if found_duration: light_curve_df = light_curve_df.drop('diff', 1) ax = light_curve_df['irradiance'].plot() start_date = light_curve_df.index.values[0] start_date_string = pd.to_datetime(str(start_date)) plt.xlabel(start_date_string.strftime('%Y-%m-%d %H:%M:%S')) plt.ylabel('Irradiance [%]') fmtr = dates.DateFormatter("%H:%M:%S") ax.xaxis.set_major_formatter(fmtr) ax.xaxis.set_major_locator(dates.HourLocator()) plt.title('Dimming Duration') if found_duration: plt.scatter([zero_crossing_times[first_neg_zero_crossing_index], zero_crossing_times[first_pos_zero_crossing_index]], [light_curve_df['smooth'][zero_crossing_indices[first_neg_zero_crossing_index]], light_curve_df['smooth'][zero_crossing_indices[first_pos_zero_crossing_index]]], c='black', s=300, zorder=3) plt.annotate('', xy=(first_neg_zero_crossing_time, 0), xycoords='data', xytext=(first_pos_zero_crossing_time, 0), textcoords='data', arrowprops=dict(facecolor='black', linewidth=5, arrowstyle='<->')) mid_time = first_neg_zero_crossing_time + (first_pos_zero_crossing_time - first_neg_zero_crossing_time) / 2 plt.annotate(str(duration_seconds) + ' s', xy=(mid_time, 0), xycoords='data', ha='center', va='bottom', size=18) plt.savefig(plot_path_filename) if verbose: logger.info("Summary plot saved to %s" % plot_path_filename) if not found_duration: duration_seconds = np.nan first_neg_zero_crossing_time = np.nan first_pos_zero_crossing_time = np.nan return duration_seconds, first_neg_zero_crossing_time, first_pos_zero_crossing_time
def determine_preflare_irradiance(light_curve_df, estimated_time_of_peak_start, max_median_diff_threshold=1.5, std_threshold=1.0, plot_path_filename=None, verbose=False, logger=None): """Determine pre-flare irradiance level in a solar light curve. Or, more generally, find the pre-peak level in a time series. Inputs: light_curve_df [pd DataFrame]: A pandas DataFrame with a DatetimeIndex and a column for irradiance. estimated_time_of_peak_start [metatime]: The estimated time that the dramatic increase starts. This could come from, e.g., GOES/XRS. Optional Inputs: max_median_diff_threshold [float]: The maximum allowed difference in medians between the 3 pre-flare windows in percent terms. This value gets multiplied by the mean of the stds from each sub-window and is then compared to the max_median_diff. The default is 1.5. std_threshold [float]: The maximum allowed standard deviation in the pre-flare windows in percent terms. The default is 0.5. plot_path_filename [str]: Set to a path and filename in order to save the summary plot to disk. Default is None, meaning the plot will not be saved to disk. verbose [bool]: Set to log the processing messages to disk and console. Default is False. logger [JpmLogger]: A configured logger from jpm_logger.py. If set to None, will generate a new one. Default is None. Outputs: preflare_irradiance [float]: The identified pre-flare irradiance level in the same units as light_curve_df.irradiance. Optional Outputs: None Example: preflare_irradiance = determine_preflare_irradiance(light_curve_df, pd.Timestamp('2012-04-15 17:52:20.0'), plot_path_filename='./bla.png', verbose=True) """ # Prepare the logger for verbose if verbose: if not logger: logger = JpmLogger(filename='determine_preflare_irradiance_log', path='/Users/jmason86/Desktop/') logger.info("Running on event with peak start time of {0}.".format( estimated_time_of_peak_start)) # Verify that not all values are nan if light_curve_df.isna().all().all(): if verbose: logger.warning("All irradiance values are NaN. Returning.") return np.nan # Convert irradiance to percent if not already present if 'irradiance_percent' not in light_curve_df.columns: median_irradiance = light_curve_df['irradiance'].median() light_curve_df['irradiance_percent'] = ( light_curve_df['irradiance'].values - median_irradiance) / median_irradiance * 100. if verbose: logger.info( "Converted irradiance to percent, baselining median in entire pre-flare window." ) # Divide the pre-flare period into 3 equal-length windows windows = np.array_split(light_curve_df[:estimated_time_of_peak_start], 3) if verbose: logger.info("Divided pre-flare period into 3 equal-length windows.") # Compute median and standard deviation in each window medians = [ windowed_df['irradiance_percent'].median() for windowed_df in windows ] medians_abs = [ windowed_df['irradiance'].median() for windowed_df in windows ] stds = np.array( [windowed_df['irradiance_percent'].std() for windowed_df in windows]) if verbose: logger.info("Computed medians and standard deviations in each window.") # Compute max difference between the medians max_median_diff = np.max( np.abs(np.append(np.diff(medians), medians[2] - medians[0]))) # Compare medians and standard deviations in each window to thresholds failed_median_threshold = False failed_std_threshold = False if np.all(np.isnan(stds)): if verbose: logger.warning( 'Cannot compute pre-flare irradiance. All standard deviations are nan.' ) failed_std_threshold = True else: if max_median_diff > max_median_diff_threshold * np.mean(stds): if verbose: logger.warning( 'Cannot compute pre-flare irradiance. Maximum difference in window medians ({0}) exceeded threshold ({1}).' .format(max_median_diff, max_median_diff_threshold * np.mean(stds))) failed_median_threshold = True if (stds < std_threshold).sum() < 2: if verbose: logger.warning( 'Cannot compute pre-flare irradiance. Standard deviation in more than 1 window is larger than threshold ({0}).' .format(std_threshold)) failed_std_threshold = True # Compute pre-flare irradiance (mean of the medians in absolute units) if failed_median_threshold or failed_std_threshold: preflare_irradiance = np.nan else: preflare_irradiance = np.mean( [windowed_df['irradiance'].median() for windowed_df in windows]) if verbose: logger.info("Computed pre-flare irradiance: {0}".format( preflare_irradiance)) # Produce summary plot if plot_path_filename: plt.style.use('jpm-transparent-light') from matplotlib import dates from matplotlib.patches import Rectangle light_curve_df = light_curve_df.drop('irradiance_percent', 1) ax = light_curve_df[:estimated_time_of_peak_start].plot(legend=False, c='grey') # plt.plot(light_curve_df[:estimated_time_of_peak_start].irradiance, c='grey') # using matplotlib instead of pandas # ax = plt.gca() start_date = light_curve_df.index.values[0] start_date_string = pd.to_datetime(str(start_date)) plt.title('Pre-flare Windows') plt.xlabel(start_date_string.strftime('%Y-%m-%d %H:%M:%S')) plt.ylabel('Irradiance [W m$^{-2}$]') fmtr = dates.DateFormatter("%H:%M:%S") ax.xaxis.set_major_formatter(fmtr) ax.xaxis.set_major_locator(dates.HourLocator()) ax2 = ax.twinx() light_curve_df[:estimated_time_of_peak_start].plot(ax=ax2, legend=False, c='grey') # ax2.plot(light_curve_df[:estimated_time_of_peak_start].irradiance, color='grey') vals = ax2.get_yticks() ax2.set_yticklabels([ '{:3.2f}%'.format( (x - median_irradiance) / median_irradiance * 100) for x in vals ]) # First window start = dates.date2num(light_curve_df.index[0]) end = dates.date2num(windows[0].index[-1]) width = end - start rect = Rectangle((start, 0), width, 1, color='deepskyblue', alpha=0.2) ax.add_patch(rect) plt.plot([windows[0].index[0], windows[0].index[-1]], [medians_abs[0], medians_abs[0]], linestyle='dashed', c='dimgrey') ax.text(start + width / 2.0, np.min( light_curve_df[:estimated_time_of_peak_start].irradiance), 'median$_1$ = ' + latex_float(medians[0]) + '% \n' + '$\sigma_1$ = ' + latex_float(stds[0]) + '%', fontsize=11, ha='center', va='bottom') # Second window start = dates.date2num(windows[1].index[0]) end = dates.date2num(windows[1].index[-1]) width = end - start rect = Rectangle((start, 0), width, 1, color='slateblue', alpha=0.2) ax.add_patch(rect) plt.plot([windows[1].index[0], windows[1].index[-1]], [medians_abs[1], medians_abs[1]], linestyle='dashed', c='dimgrey') ax.text(start + width / 2.0, np.min( light_curve_df[:estimated_time_of_peak_start].irradiance), 'median$_2$ = ' + latex_float(medians[1]) + '% \n' + '$\sigma_2$ = ' + latex_float(stds[1]) + '%', fontsize=11, ha='center', va='bottom') if not np.isnan(preflare_irradiance): ax.axes.axhline(y=preflare_irradiance, linewidth=2, color='tomato', linestyle='dashed') ax.text( start + width / 2.0, np.max( light_curve_df[:estimated_time_of_peak_start].irradiance), 'pre-flare I = ' + latex_float(preflare_irradiance) + ' W m$^{-2}$', fontsize=11, ha='center', va='top', color='tomato') else: ax.text( start + width / 2.0, np.max( light_curve_df[:estimated_time_of_peak_start].irradiance), 'pre-flare I = N/A \n' + 'median condition ok: ' + str(not failed_median_threshold) + '\n' + '$\sigma$ condition ok: ' + str(not failed_std_threshold), fontsize=11, ha='center', va='top', color='tomato') # Third window start = dates.date2num(windows[2].index[0]) end = dates.date2num(windows[2].index[-1]) width = end - start rect = Rectangle((start, 0), width, 1, color='violet', alpha=0.2) ax.add_patch(rect) plt.plot([windows[2].index[0], windows[2].index[-1]], [medians_abs[2], medians_abs[2]], linestyle='dashed', c='dimgrey') ax.text(start + width / 2.0, np.min( light_curve_df[:estimated_time_of_peak_start].irradiance), 'median$_3$ = ' + latex_float(medians[2]) + '% \n' + '$\sigma_3$ = ' + latex_float(stds[2]) + '%', fontsize=11, ha='center', va='bottom') ax.text(end, np.max( light_curve_df[:estimated_time_of_peak_start].irradiance), 'median diff = ' + latex_float(max_median_diff) + '% \n' + r'thresh $\times \mu_{\sigma n}$ = ' + latex_float(max_median_diff_threshold * np.mean(stds)) + '%', fontsize=11, ha='right', va='top') # Increase border so y-axes don't get cut off in savefig, even though they don't in plt.show() plt.gcf().subplots_adjust(left=0.22) plt.savefig(plot_path_filename) if verbose: logger.info( "Summary plot for event with start time {0} saved to {1}". format(estimated_time_of_peak_start, plot_path_filename)) return preflare_irradiance
def correlationCoefficientScan( output_path='/Users/tyleralbee/Desktop/StealthCME', eve_data_path='/Users/tyleralbee/Desktop/savesets/eve_selected_lines.csv', cme_signature='/Users/tyleralbee/Desktop/savesets/eve_lines_event_percents_fitted.csv', verbose=True): eve_lines = pd.read_csv(eve_data_path, index_col=0) eve_lines.index = pd.to_datetime(eve_lines.index) wholeDfLength = eve_lines.__len__() cme_event = pd.read_csv(cme_signature, index_col=0) cme_event.index = pd.to_datetime(cme_event.index) cmeEventLength = cme_event.__len__() if verbose: logger = JpmLogger(filename='do_correlation_coefficient_scan', path=output_path, console=True) logger.info("Starting Stealth CME search pipeline!") else: logger = None if verbose: logger.info('Loaded EVE and CME data') # Define the columns of the output catalog output_table = pd.DataFrame(columns=[ 'Event #', 'Start Time', 'End Time', 'Correlation Coefficient' ]) csv_filename = output_path + 'cc_output_{0}.csv'.format(Time.now().iso) output_table.to_csv(csv_filename, header=True, index=False, mode='w') if verbose: logger.info('Created output table definition.') # Start a progress bar widgets = [ progressbar.Percentage(), progressbar.Bar(), progressbar.Timer(), ' ', progressbar.AdaptiveETA() ] startRow = 0 endRow = cmeEventLength numSlices = int(wholeDfLength / cmeEventLength) output_row = 1 progress_bar_sliding_window = progressbar.ProgressBar( widgets=[progressbar.FormatLabel('Correlation Coefficient Analysis ') ] + widgets, max_value=numSlices).start() # ----------Loop through data set using a sliding time window------------------------------------------------------- for i in range(1, numSlices): # ----------Clip dataset to time slice window------------------------------------------------------------------- event_time_slice = eve_lines.iloc[startRow:endRow] # ---------Convert irradiance values to percentages------------------------------------------------------------- preflare_irradiance = event_time_slice.iloc[0] event_time_slice_percentages = (event_time_slice - preflare_irradiance ) / preflare_irradiance * 100.0 if verbose: logger.info( "Event {0} irradiance converted from absolute to percent units." .format(i)) # ---------Fit light curves to reduce noise--------------------------------------------------------------------- uncertainty = np.ones(len(event_time_slice_percentages) ) * 0.002545 # got this line from James's code progress_bar_fitting = progressbar.ProgressBar( widgets=[progressbar.FormatLabel('Light curve fitting: ')] + widgets, max_value=len(event_time_slice_percentages.columns)).start() for j, column in enumerate(event_time_slice_percentages): if event_time_slice_percentages[column].isnull().all().all(): if verbose: logger.info( 'Event {0} {1} fitting skipped because all irradiances are NaN.' .format(j, column)) else: eve_line_event_percentages = pd.DataFrame( event_time_slice_percentages[column]) eve_line_event_percentages.columns = ['irradiance'] eve_line_event_percentages['uncertainty'] = uncertainty fitting_path = output_path + 'Fitting/' if not os.path.exists(fitting_path): os.makedirs(fitting_path) plt.close('all') light_curve_fit, best_fit_gamma, best_fit_score = automatic_fit_light_curve( eve_line_event_percentages, plots_save_path='{0} Event {1} {2} '.format( fitting_path, j, column), verbose=verbose, logger=logger) event_time_slice_percentages[column] = light_curve_fit event_time_slice_fitted = event_time_slice_percentages # Keep our variable names explicit if verbose: logger.info('Event {0} {1} light curves fitted.'.format( j, column)) progress_bar_fitting.update(j) progress_bar_fitting.finish() if verbose: logger.info("Event {0} Light curves fitted".format(i)) # ---------Compute Correlation Coefficients--------------------------------------------------------------------- totalCorrelationCoefficient = 0.0 ds1 = event_time_slice_fitted ds2 = cme_event # Gather stats for correlation for k, column in enumerate(ds1): dsColumn1 = ds1[column] dsColumn2 = ds2[column] dsColumn1.reset_index( drop=True, inplace=True) # prevent NaNs from appearing in join dsColumn2.reset_index( drop=True, inplace=True) # prevent NaNs from appearing in join # TODO: assert that both columns have same count? n = int(dsColumn1.count()) meanA = float(dsColumn1.mean()) meanB = float(dsColumn2.mean()) stdA = float(dsColumn1.std(ddof=0)) stdB = float(dsColumn2.std(ddof=0)) # Generate correlation output dsJoined = pd.DataFrame({ 'a': dsColumn1, 'b': dsColumn2 }) # Avoids ambiguity when attr names are the same numerator = 0.0 # Stores summation of (a_i - meanA)(b_i - meanB) denominator = n * stdA * stdB for index, row in dsJoined.iterrows(): a = row['a'] b = row['b'] numerator = numerator + (a - meanA) * (b - meanB) correlationCoefficient = numerator / denominator totalCorrelationCoefficient = totalCorrelationCoefficient + correlationCoefficient # ---------Output Results--------------------------------------------------------------------------------------- eventStartTime = event_time_slice.iloc[0].name eventEndTime = event_time_slice.iloc[-1].name if not math.isnan(totalCorrelationCoefficient ) and totalCorrelationCoefficient >= 4.2: output_table.loc[output_row] = [ output_row, eventStartTime, eventEndTime, totalCorrelationCoefficient ] csv_filename = output_path + 'cc_output_{0}.csv'.format( Time.now().iso) output_table.to_csv(csv_filename, header=True, index=False, mode='w') output_row = output_row + 1 startRow = startRow + 60 # advance time window by 1 hour endRow = endRow + 60 # advance time window by 1 hour progress_bar_sliding_window.update(i) # advance progress bar