Beispiel #1
0
def init():
    """Initialize the jedi catalog: load the data

        Inputs:
            None.

        Optional Inputs:
            None

        Outputs:
            All outputs are globals accessible by doing import jedi_config
            logger [JpmLogger]:                               A configurable log that can optionally also print to console.
            all_minutes_since_last_flare [numpy float array]: The amount of time between each flare.
            preflare_indices [numpy int array]:               The indices where flares are considered time-independent.

        Optional Outputs:
             None

        Example:
            jedi_config.init()
    """
    global logger, all_minutes_since_last_flare, preflare_indices

    # Initialize logger
    logger = JpmLogger(filename=logger_filename,
                       path=output_path,
                       console=False)
    logger.info('Logger initialized.')

    # Set up folders
    init_folders()

    # Set up filenames
    init_filenames()

    # Load the EVE data
    load_eve_data()

    # Get GOES flare events above C1 within date range corresponding to EVE data
    load_goes_flare_event_data()

    # Compute the amount of time between all flares [minutes]
    peak_time = goes_flare_events['peak_time']
    all_minutes_since_last_flare = (peak_time[1:] - peak_time[0:-1]).sec / 60.0

    # Figure out which flares are independent, store those indices
    is_flare_independent = all_minutes_since_last_flare > threshold_time_prior_flare_minutes
    preflare_indices = np.where(
        is_flare_independent
    )[0] + 1  # Add 1 to map back to event index and not to the differentiated vector
    logger.info(
        'Found {0} independent flares of {1} total flares given a time separation of {2} minutes.'
        .format(len(preflare_indices), len(is_flare_independent),
                threshold_time_prior_flare_minutes))
Beispiel #2
0
def get_goes_flare_events(start_time,
                          end_time,
                          minimum_flare_size='C1',
                          verbose=False):
    """Get a list of flare events from NOAA's GOES/XRS. Just a wrapper around sunpy.instr.goes get_goes_event_list.

    Inputs:
        start_time [metatime or string]: The beginning of the time window of interest. See jpm_time_conversions.py
                                         (https://github.com/jmason86/python_convenience_functions/blob/master/jpm_time_conversions.py)
                                         for allowed metatime formats if not using an iso or human like time string.
        end_time [metatime]:             Same as start time but for the end of the time window.

    Optional Inputs:
        minimum_flare_size [string]: The minimum flare size to search for. Default is 'C1'.
        verbose [bool]:              Set to log the processing messages to disk and console. Default is False.

    Outputs:
        goes_events [list]: The list of GOES flare events corresponding to the input search criteria.

    Optional Outputs:
        None

    Example:
        goes_events = get_goes_flare_events(pd.Timestamp('2010-05-01 00:00:00'),
                                            pd.Timestamp('2018-01-12 00:00:00'),
                                            verbose=True)
    """

    # Prepare the logger for verbose
    if verbose:
        # TODO: Update the path
        logger = JpmLogger(filename='get_goes_flare_events_log',
                           path='/Users/jmason86/Desktop/')
        logger.info("Getting > {0} flares from {1} to {2}.".format(
            minimum_flare_size, start_time, end_time))

    if not isinstance(start_time, str):
        start_time = metatimes_to_human(np.array([start_time]))[0]
    if not isinstance(end_time, str):
        end_time = metatimes_to_human(np.array([end_time]))[0]
    time_range = TimeRange(start_time, end_time)
    goes_events = get_goes_event_list(time_range, goes_class_filter='c1')

    if verbose:
        logger.info("Found {0} events.".format(len(goes_events)))

    # Return the slopes
    return goes_events
def light_curve_peak_match_subtract(light_curve_to_subtract_from_df, light_curve_to_subtract_with_df, estimated_time_of_peak,
                                    max_seconds_shift=1800,
                                    plot_path_filename=None, verbose=False, logger=None):
    """Align the peak of a second light curve to the first, scale its magnitude to match, and subtract it off.

    Inputs:
        light_curve_to_subtract_from_df [pd DataFrame]: A pandas DataFrame with a DatetimeIndex and a column for irradiance.
        light_curve_to_subtract_with_df [pd DataFrame]: A pandas DataFrame with a DatetimeIndex and a column for irradiance.
        estimated_time_of_peak [metatime]: The estimated time that the peak should occur. This could come from, e.g., GOES/XRS.

    Optional Inputs:
        max_seconds_shift [int]:  The maximum allowed time shift in seconds to get the peaks to match.
        plot_path_filename [str]: Set to a path and filename in order to save the summary plot to disk.
                                  Default is None, meaning the plot will not be saved to disk.
        verbose [bool]:           Set to log the processing messages to disk and console. Default is False.
        logger [JpmLogger]:       A configured logger from jpm_logger.py. If set to None, will generate a
                                  new one. Default is None.

    Outputs:
        light_curve_corrected_df [pd DataFrame]: A pandas DataFrame with the same format as light_curve_to_subtract_from_df but
                                                 with the resultant peak match and subtraction performed. Returns np.nan if
                                                 the peaks couldn't be found.
        seconds_shift [float]:                   The number of seconds that light_curve_to_subtract_with_df was shifted to get
                                                 its peak to match light_curve_to_subtract_from_df. Returns np.nan if
                                                 the peaks couldn't be found.
        scale_factor [float]:                    The multiplicative factor applied to light_curve_to_subtract_with_df to get
                                                 its peak to match light_curve_to_subtract_from_df. Returns np.nan if
                                                 the peaks couldn't be found.

    Optional Outputs:
        None

    Example:
        light_curve_corrected_df, seconds_shift, scale_factor = light_curve_peak_match_subtract(light_curve_to_subtract_from_df,
                                                                                                light_curve_to_subtract_with_df,
                                                                                                estimated_time_of_peak,
                                                                                                plot_path_filename='./',
                                                                                                verbose=True)
    """

    # Prepare the logger for verbose
    if verbose:
        if not logger:
            logger = JpmLogger(filename='light_curve_peak_match_subtract_log', path='/Users/jmason86/Desktop/')
        logger.info("Running on event with light curve start time of {0}.".format(light_curve_to_subtract_from_df.index[0]))

    # Drop NaNs since peakutils can't handle them
    light_curve_to_subtract_from_df = light_curve_to_subtract_from_df.dropna()
    light_curve_to_subtract_with_df = light_curve_to_subtract_with_df.dropna()

    # Detrend and find the peaks that are >= 95% of the max irradiance within
    if verbose:
        logger.info("Detrending light curves.")
    if (light_curve_to_subtract_from_df['irradiance'].values < 0).all():
        light_curve_to_subtract_from_df.iloc[0] = 1  # Else can crash peakutils.baseline
    base_from = peakutils.baseline(light_curve_to_subtract_from_df)
    detrend_from = light_curve_to_subtract_from_df - base_from
    indices_from = peakutils.indexes(detrend_from.values.squeeze(), thres=0.95)

    if (light_curve_to_subtract_with_df['irradiance'].values < 0).all():
        light_curve_to_subtract_with_df.iloc[0] = 1  # Else can crash peakutils.baseline
    base_with = peakutils.baseline(light_curve_to_subtract_with_df)
    detrend_with = light_curve_to_subtract_with_df - base_with
    indices_with = peakutils.indexes(detrend_with.values.squeeze(), thres=0.95)

    if len(indices_from) == 0:
        if verbose:
            logger.warning('Could not find peak in light curve to subtract from.')
        return np.nan, np.nan, np.nan
    if len(indices_with) == 0:
        if verbose:
            logger.warning('Could not find peak in light curve to subtract with.')
        return np.nan, np.nan, np.nan

    # Identify the peak closest to the input estimated peak time (e.g., from GOES/XRS)
    if verbose:
        logger.info("Identifying peaks closest to initial guess in light curves.")
    peak_index_from = indices_from[closest(light_curve_to_subtract_from_df.index[indices_from], estimated_time_of_peak)]
    if len(indices_with) == 0:
        import pdb
        pdb.set_trace()
    peak_index_with = indices_with[closest(light_curve_to_subtract_with_df.index[indices_with], estimated_time_of_peak)]
    index_shift = peak_index_from - peak_index_with

    # Compute how many seconds the time shift corresponds to
    seconds_shift = (light_curve_to_subtract_from_df.index[peak_index_from] -
                     light_curve_to_subtract_with_df.index[peak_index_with]).total_seconds()

    # Fail if seconds_shift > max_seconds_shift
    isTimeShiftValid = True
    if abs(seconds_shift) > max_seconds_shift:
        if verbose:
            logger.warning("Cannot do peak match. Time shift of {0} seconds is greater than max allowed shift of {1} seconds.".format(seconds_shift, max_seconds_shift))
        isTimeShiftValid = False

    # Shift the subtract_with light curve in time to align its peak to the subtract_from light curve
    if isTimeShiftValid:
        if verbose:
            logger.info("Shifting and scaling the light curve to subtract with.")
        shifted_with = light_curve_to_subtract_with_df.shift(index_shift)

        # Scale the subtract_with light curve peak irradiance to match the subtract_from light curve peak irradiance
        scale_factor = (detrend_from.values[peak_index_from] / shifted_with.values[peak_index_with + index_shift])[0]
        shifted_scaled_with = shifted_with * scale_factor
        light_curve_corrected_df = light_curve_to_subtract_from_df - shifted_scaled_with

        if verbose:
            if light_curve_corrected_df.isnull().values.sum() > 1:
                logger.warning("%s points were shifted to become NaN." % light_curve_corrected_df.isnull().values.sum())
            logger.info("Light curve peak matching and subtraction complete.")

    if plot_path_filename:
        from jpm_number_printing import latex_float
        seconds_shift_string = '+' if seconds_shift >= 0 else ''
        seconds_shift_string += str(int(seconds_shift))
        if isTimeShiftValid:
            scale_factor_string = latex_float(scale_factor)

        plt.style.use('jpm-transparent-light')
        from matplotlib import dates

        plt.clf()
        fig, ax = plt.subplots()
        plt.plot(light_curve_to_subtract_from_df.index.values, light_curve_to_subtract_from_df.values, c='limegreen')
        plt.tick_params(axis='x', which='minor', labelbottom='off')
        plt.xlabel(estimated_time_of_peak)
        plt.ylabel('Irradiance [%]')
        fmtr = dates.DateFormatter("%H:%M:%S")
        ax.xaxis.set_major_formatter(fmtr)
        ax.xaxis.set_major_locator(dates.HourLocator())

        if isTimeShiftValid:
            plt.title('I: $\\times$' + scale_factor_string + ', t: ' + seconds_shift_string + ' s', color='tomato')
            shifted_scaled_with.plot(c='tomato', label='subtract with', ax=ax)
            light_curve_corrected_df.plot(c='darkgrey', label='result', ax=ax)
        else:
            plt.title('t: ' + seconds_shift_string + ' s > max allowed {0} s'.format(max_seconds_shift), color='tomato')
            plt.plot(light_curve_to_subtract_with_df.index.values, light_curve_to_subtract_with_df.values, c='tomato')
        plt.scatter(light_curve_to_subtract_from_df.index[peak_index_from], light_curve_to_subtract_from_df.values[peak_index_from], c='black')

        if isTimeShiftValid:
            plt.scatter(shifted_scaled_with.index[peak_index_with + index_shift], shifted_scaled_with.values[peak_index_with + index_shift], c='black')
            ax.legend(['subtract from', 'subtract with', 'result'], loc='best')
        else:
            plt.scatter(light_curve_to_subtract_with_df.index[peak_index_with], light_curve_to_subtract_with_df.values[peak_index_with], c='black')
            ax.legend(['subtract from', 'subtract with'], loc='best')

        path = os.path.dirname(plot_path_filename)
        if not os.path.exists(path):
            os.makedirs(path)
        plt.savefig(plot_path_filename)

        if verbose:
            logger.info("Summary plot saved to %s" % plot_path_filename)

    if isTimeShiftValid:
        return light_curve_corrected_df, seconds_shift, scale_factor
    else:
        return np.nan, seconds_shift, np.nan
def automatic_fit_light_curve(light_curve_df, minimum_score=0.3, plots_save_path=None,
                              verbose=False, logger=None):
    """Automatically fit the best support vector machine regression (SVR) model for the input light curve.

    Inputs:
        light_curve_df [pd DataFrame]: A pandas DataFrame with a DatetimeIndex, and columns for irradiance and uncertainty.

    Optional Inputs:
        minimum_score [float]: Set this to the minimum explained variance score (0 - 1) acceptable for fits. If the
                               best fit score is < minimum_score, this function will return np.nan for light_curve_fit.
                               Default value is 0.3.
        plots_save_path [str]: Set to a path in order to save the validation curve and best fit overplot on the data to disk.
                               Default is None, meaning no plots will be saved to disk.
        verbose [bool]:        Set to log the processing messages to disk and console. Default is False.
        logger [JpmLogger]:    A configured logger from jpm_logger.py. If set to None, will generate a
                               new one. Default is None.

    Outputs:
        light_curve_fit_df [pd DataFrame]: A pandas DataFrame with a DatetimeIndex, and columns for fitted irradiance and uncertainty.
        best_fit_gamma [float]:            The best found gamma hyper parameter for the SVR.
        best_fit_score [float]:            The best explained variance score.

    Optional Outputs:
        None

    Example:
        light_curve_fit, best_fit_gamma, best_fit_score = automatic_fit_light_curve(light_curve_df, verbose=True)
    """

    # Prepare the logger for verbose
    if verbose:
        if not logger:
            logger = JpmLogger(filename='automatic_fit_light_curve_log', path='/Users/jmason86/Desktop/')
        logger.info("Running on event with light curve start time of {0}.".format(light_curve_df.index[0]))

    # Pull data out of the DataFrame for compatibility formatting
    X = metatimes_to_seconds_since_start(light_curve_df.index)
    y = light_curve_df['irradiance'].values

    # Check for NaNs and issue warning that they are being removed from the dataset
    if verbose:
        if np.isnan(y).any():
            logger.warning("There are NaN values in light curve. Dropping them.")
    finite_irradiance_indices = np.isfinite(y)
    X = X[finite_irradiance_indices]
    X = X.reshape(len(X), 1)  # Format to be compatible with validation_curve and SVR.fit()
    uncertainty = light_curve_df.uncertainty[np.isfinite(y)]
    y = y[finite_irradiance_indices]

    if verbose:
        logger.info("Fitting %s points." % len(y))

    # Helper function for compatibility with validation_curve
    def jpm_svr(gamma=1e-6, **kwargs):
        return make_pipeline(SVR(kernel='rbf', C=1e3, gamma=gamma, **kwargs))

    # Hyper parameter for SVR is gamma, so generate values of it to try
    gamma = np.logspace(-7, 1, num=20, base=10)

    # Overwrite the default scorer (R^2) with explained variance score
    evs = make_scorer(explained_variance_score)

    # Split the data between training/testing 50/50 but across the whole time range rather than the default consecutive Kfolds
    import time
    t0 = time.time()
    shuffle_split = ShuffleSplit(n_splits=20, train_size=0.5, test_size=0.5, random_state=None)

    # Generate the validation curve -- test all them gammas!
    # Parallelized to speed it up (n_jobs = # of parallel threads)
    train_score, val_score = validation_curve(jpm_svr(), X, y,
                                              'svr__gamma',
                                              gamma, cv=shuffle_split, n_jobs=7, scoring=evs)
    t1 = time.time()
    logger.error('It took {0} seconds to run.'.format(t1 - t0))

    if verbose:
        logger.info("Validation curve complete.")

    if plots_save_path:
        plt.clf()
        plt.style.use('jpm-transparent-light')
        plt.plot(gamma, np.median(train_score, 1), label='training score')
        plt.plot(gamma, np.median(val_score, 1), label='validation score')
        ax = plt.axes()
        plt.legend(loc='best')
        plt.title("t$_0$ = " + datetimeindex_to_human(light_curve_df.index)[0])
        ax.set_xscale('log')
        plt.xlabel('gamma')
        plt.ylabel('score')
        plt.ylim(0, 1)
        filename = plots_save_path + 'Validation Curve t0 ' + datetimeindex_to_human(light_curve_df.index)[0] + '.png'
        plt.savefig(filename)
        if verbose:
            logger.info("Validation curve saved to %s" % filename)

    # Identify the best score
    scores = np.median(val_score, axis=1)
    best_fit_score = np.max(scores)
    best_fit_gamma = gamma[np.argmax(scores)]
    if verbose:
        logger.info('Scores: ' + str(scores))
        logger.info('Best score: ' + str(best_fit_score))
        logger.info('Best fit gamma: ' + str(best_fit_gamma))

    # Return np.nan if only got bad fits
    if best_fit_score < minimum_score:
        if verbose:
            logger.warning("Uh oh. Best fit score {0:.2f} is < user-defined minimum score {1:.2f}".format(best_fit_score, minimum_score))
        return np.nan, best_fit_gamma, best_fit_score

    # Otherwise train and fit the best model
    sample_weight = 1 / uncertainty
    model = SVR(kernel='rbf', C=1e3, gamma=best_fit_gamma).fit(X, y, sample_weight)
    y_fit = model.predict(X)

    if verbose:
        logger.info("Best model trained and fitted.")

    if plots_save_path:
        plt.clf()
        plt.errorbar(X.ravel(), y, yerr=uncertainty, color='black', fmt='o', label='Input light curve')
        plt.plot(X.ravel(), y_fit, linewidth=6, label='Fit')
        plt.title("t$_0$ = " + datetimeindex_to_human(light_curve_df.index)[0])
        plt.xlabel('time [seconds since start]')
        plt.ylabel('irradiance [%]')
        plt.legend(loc='best')
        filename = plots_save_path + 'Fit t0 ' + datetimeindex_to_human(light_curve_df.index)[0] + '.png'
        plt.savefig(filename)
        if verbose:
            logger.info("Fitted curve saved to %s" % filename)

    # TODO: Get uncertainty of fit at each point... if that's even possible
    # Placeholder for now just so that the function can complete: output uncertainty = input uncertainty
    fit_uncertainty = uncertainty

    # Construct a pandas DataFrame with DatetimeIndex, y_fit, and fit_uncertainty
    light_curve_fit_df = pd.DataFrame({'irradiance': y_fit,
                                       'uncertainty': fit_uncertainty})
    light_curve_fit_df.index = light_curve_df.index[finite_irradiance_indices]
    if verbose:
        logger.info("Created output DataFrame")

    return light_curve_fit_df, best_fit_gamma, best_fit_score
def determine_dimming_slope(light_curve_df,
                            earliest_allowed_time=None,
                            latest_allowed_time=None,
                            smooth_points=0,
                            plot_path_filename=None,
                            verbose=False,
                            logger=None):
    """Find the slope of dimming in a light curve, if any.

    Inputs:
        light_curve_df [pd DataFrame]:    A pandas DataFrame with a DatetimeIndex and a column for irradiance.

    Optional Inputs:
        earliest_allowed_time [metatime]: The function won't return a slope determined any earlier than this.
                                          It is recommended that this be the peak time of the flare.
                                          Default is None, meaning the beginning of the light_curve_df.
        latest_allowed_time [metatime]:   The function won't return a slope determined any later than this.
                                          It is recommended that this be the identified time of dimming depth.
                                          Default is None, meaning the end of the light_curve_df.
        smooth_points [integer]:          Used to apply a rolling mean with the number of points (indices) specified.
                                          Default is 0, meaning no smoothing will be performed.
        plot_path_filename [str]:         Set to a path and filename in order to save the summary plot to disk.
                                          Default is None, meaning the plot will not be saved to disk.
        verbose [bool]:                   Set to log the processing messages to disk and console. Default is False.
        logger [JpmLogger]:               A configured logger from jpm_logger.py. If set to None, will generate a
                                          new one. Default is None.

    Outputs:
        slope_min [float]: The minimum slope of dimming in percent/second terms.
        slope_max [float]: The maximum slope of dimming in percent/second terms.
        slope_mean [float]: The mean slope of dimming in percent/second terms.

    Optional Outputs:
        None

    Example:
        slope_min, slope_max, slope_mean = determine_dimming_slope(light_curve_df,
                                                                   plot_path_filename='./determine_dimming_slope_summary.png',
                                                                   verbose=True)
    """

    # Prepare the logger for verbose
    if verbose:
        if not logger:
            logger = JpmLogger(filename='determine_dimming_slope_log',
                               path='/Users/jmason86/Desktop/')
        logger.info(
            "Running on event with light curve start time of {0}.".format(
                light_curve_df.index[0]))

    # If no earliest_allowed_time set, then set it to beginning of light_curve_df
    if not earliest_allowed_time:
        earliest_allowed_time = light_curve_df.index[0]
        logger.info(
            "No earliest allowed time provided. Setting to beginning of light curve: {0}"
            .format(earliest_allowed_time))

    # If no latest_allowed_time set, then set it to end of light_curve_df
    if not latest_allowed_time:
        latest_allowed_time = light_curve_df.index[-1]
        logger.info(
            "No latest allowed time provided. Setting to end of light curve: {0}"
            .format(latest_allowed_time))

    # Optionally smooth the light curve with a rolling mean
    if smooth_points:
        light_curve_df['irradiance'] = light_curve_df.rolling(
            smooth_points, center=True).mean()
        if verbose:
            logger.info('Applied {0} point smooth.'.format(smooth_points))

    first_non_nan = light_curve_df['irradiance'].first_valid_index()
    nan_indices = np.isnan(light_curve_df['irradiance'])
    light_curve_df['irradiance'][nan_indices] = light_curve_df['irradiance'][
        first_non_nan]

    # Find the max in the allowed window
    max_time = light_curve_df[earliest_allowed_time:latest_allowed_time][
        'irradiance'].idxmax()
    max_irradiance = light_curve_df['irradiance'].loc[max_time]
    if verbose:
        logger.info(
            'Maximum in allowed window found with value of {0:.2f} at time {1}'
            .format(max_irradiance, max_time))

    # Compute the derivative in the time window of interest (inverting sign so that we describe "downward slope")
    derivative = -light_curve_df[max_time:latest_allowed_time][
        'irradiance'].diff(
        ) / light_curve_df[max_time:latest_allowed_time].index.to_series(
        ).diff().dt.total_seconds()
    if verbose:
        logger.info(
            "Computed derivative of light curve within time window of interest."
        )

    # Get the min, max, and mean slope
    slope_min = derivative.min()
    slope_max = derivative.max()
    slope_mean = derivative.mean()
    slope_min_str = latex_float(slope_min)
    slope_max_str = latex_float(slope_max)
    slope_mean_str = latex_float(slope_mean)
    if verbose:
        logger.info(
            "Computed min ({0}), max ({1}), and mean ({2}) %/s slope.".format(
                slope_min_str, slope_max_str, slope_mean_str))

    # Do a few sanity checks for the log
    if verbose:
        if slope_min < 0:
            logger.warning(
                "Minimum slope of {0} is unexpectedly < 0.".format(slope_min))
        if slope_max < 0:
            logger.warning(
                "Maximum slope of {0} is unexpectedly < 0.".format(slope_max))
        if slope_mean < 0:
            logger.warning(
                "Mean slope of {0} is unexpectedly < 0.".format(slope_mean))

    # Produce a summary plot
    if plot_path_filename:
        plt.style.use('jpm-transparent-light')
        from matplotlib import dates

        p = plt.plot(light_curve_df['irradiance'])
        p = plt.plot(
            light_curve_df[max_time:latest_allowed_time]['irradiance'],
            label='slope region')
        ax = plt.gca()
        plt.axvline(x=earliest_allowed_time, linestyle='dashed', color='grey')
        plt.axvline(x=latest_allowed_time, linestyle='dashed', color='black')
        plt.axvline(x=max_time, linestyle='dashed', color='black')
        plt.title('Identified Slope')

        start_date = light_curve_df.index.values[0]
        start_date_string = pd.to_datetime(str(start_date))
        plt.xlabel(start_date_string.strftime('%Y-%m-%d %H:%M:%S'))
        fmtr = dates.DateFormatter("%H:%M:%S")
        ax.xaxis.set_major_formatter(fmtr)
        ax.xaxis.set_major_locator(dates.HourLocator())
        ax.xaxis.grid(b=True, which='minor')
        plt.ylabel('Irradiance [%]')

        inverse_str = '$^{-1}$'
        plt.annotate('slope_min={0} % sec{1}'.format(slope_min_str,
                                                     inverse_str),
                     xy=(0.98, 0.12),
                     xycoords='axes fraction',
                     ha='right',
                     size=12,
                     color=p[0].get_color())
        plt.annotate('slope_max={0} % sec{1}'.format(slope_max_str,
                                                     inverse_str),
                     xy=(0.98, 0.08),
                     xycoords='axes fraction',
                     ha='right',
                     size=12,
                     color=p[0].get_color())
        plt.annotate('slope_mean={0} % sec{1}'.format(slope_mean_str,
                                                      inverse_str),
                     xy=(0.98, 0.04),
                     xycoords='axes fraction',
                     ha='right',
                     size=12,
                     color=p[0].get_color())

        ax.legend(loc='best')

        plt.savefig(plot_path_filename)
        if verbose:
            logger.info("Summary plot saved to %s" % plot_path_filename)

    # Return the slopes
    return slope_min, slope_max, slope_mean
Beispiel #6
0
def generate_jedi_catalog(
        threshold_time_prior_flare_minutes=240.0,
        dimming_window_relative_to_flare_minutes_left=0.0,
        dimming_window_relative_to_flare_minutes_right=240.0,
        threshold_minimum_dimming_window_minutes=120.0,
        flare_index_range=range(0, 5052),
        output_path='/Users/shawnpolson/Documents/School/Spring 2018/Data Mining/StealthCMEs/PyCharm/JEDI Catalog/',
        verbose=True):
    """Wrapper code for creating James's Extreme Ultraviolet Variability Experiment (EVE) Dimming Index (JEDI) catalog.

    Inputs:
        None.

    Optional Inputs:
        threshold_time_prior_flare_minutes [float]:             How long before a particular event does the last one need to have
                                                                occurred to be considered independent. If the previous one was too
                                                                recent, will use that event's pre-flare irradiance.
                                                                Default is 240 (4 hours).
        dimming_window_relative_to_flare_minutes_left [float]:  Defines the left side of the time window to search for dimming
                                                                relative to the GOES/XRS flare peak. Negative numbers mean
                                                                minutes prior to the flare peak. Default is 0.0.
        dimming_window_relative_to_flare_minutes_right [float]: Defines the right side of the time window to search for dimming
                                                                relative to the GOES/XRS flare peak. If another flare
                                                                occurs before this, that time will define the end of the
                                                                window instead. Default is 240 (4 hours).
        threshold_minimum_dimming_window_minutes [float]:       The smallest allowed time window in which to search for dimming.
                                                                Default is 120.
        flare_index_range [range]                               The range of GOES flare indices to process. Default is range(0, 5052).
        output_path [str]:                                      Set to a path for saving the JEDI catalog table and processing
                                                                summary plots. Default is '/Users/jmason86/Dropbox/Research/Postdoc_NASA/Analysis/Coronal Dimming Analysis/JEDI Catalog/'.
        verbose [bool]:                                         Set to log the processing messages to disk and console. Default is False.

    Outputs:
        No direct return, but writes a (csv? sql table? hdf5?) to disk with the dimming paramerization results.
        Subroutines also optionally save processing plots to disk in output_path.

    Optional Outputs:
        None

    Example:
        generate_jedi_catalog(output_path='/Users/jmason86/Dropbox/Research/Postdoc_NASA/Analysis/Coronal Dimming Analysis/JEDI Catalog/',
                              verbose=True)
    """

    # Prepare the logger for verbose
    if verbose:
        logger = JpmLogger(filename='generate_jedi_catalog',
                           path=output_path,
                           console=False)
        logger.info("Starting JEDI processing pipeline.")
        logger.info("Processing events {0} - {1}".format(
            flare_index_range[0], flare_index_range[-1]))
    else:
        logger = None

    # Get EVE level 2 extracted emission lines data
    # TODO: Replace this shortcut method with the method I'm building into sunpy
    from scipy.io.idl import readsav
    eve_readsav = readsav(
        '/Users/shawnpolson/Documents/School/Spring 2018/Data Mining/StealthCMEs/savesets/eve_lines_2010121-2014146 MEGS-A Mission Bare Bones.sav'
    )
    if verbose:
        logger.info('Loaded EVE data')

    # Create metadata dictionary
    # TODO: Replace this shortcut method with the method I'm building into sunpy
    from sunpy.util.metadata import MetaDict
    metadata = MetaDict()
    metadata['ion'] = eve_readsav['name']
    metadata['temperature_ion_peak_formation'] = np.power(
        10.0, eve_readsav['logt']) * u.Kelvin
    metadata['extracted_wavelength_center'] = eve_readsav['wavelength'] * u.nm
    metadata['extracted_wavelength_min'] = metadata[
        'extracted_wavelength_center']
    metadata['extracted_wavelength_max'] = metadata[
        'extracted_wavelength_center']
    metadata['emission_line_blends'] = ['none', 'yay', 'poop', 'Fe vi']  # etc
    metadata[
        'exposure_time'] = 60.0 * u.second  # These example EVE data are already binned down to 1 minute
    metadata['precision'] = ['Not implemented in prototype']
    metadata['accuracy'] = ['Not implemented in prototype']
    metadata['flags'] = ['Not implemented in prototype']
    metadata['flags_description'] = '1 = MEGS-A data is missing, ' \
                                    '2 = MEGS-B data is missing, ' \
                                    '4 = ESP data is missing, ' \
                                    '8 = MEGS-P data is missing, ' \
                                    '16 = Possible clock adjust in MEGS-A, ' \
                                    '32 = Possible clock adjust in MEGS-B, ' \
                                    '64 = Possible clock adjust in ESP, ' \
                                    '128 = Possible clock adjust in MEGS-P'
    metadata['flags_spacecraft'] = ['Not implemented in prototype']
    metadata['flags_spacecraft_description'] = '0 = No obstruction, ' \
                                               '1 = Warm up from Earth eclipse, ' \
                                               '2 = Obstruction atmosphere penumbra, ' \
                                               '3 = Obstruction atmosphere umbra, ' \
                                               '4 = Obstruction penumbra of Mercury, ' \
                                               '5 = Obstruction penumbra of Mercury, ' \
                                               '6 = Obstruction penumbra of Venus, ' \
                                               '7 = Obstruction umbra of Venus, ' \
                                               '8 = Obstruction penumbra of Moon, ' \
                                               '9 = Obstruction umbra of Moon, ' \
                                               '10 = Obstruction penumbra of solid Earth, ' \
                                               '11 = Obstruction umbra of solid Earth, ' \
                                               '16 = Observatory is off-pointed by more than 1 arcmin'
    metadata['data_version'] = ['Not implemented in prototype']
    metadata['data_reprocessed_revision'] = ['Not implemented in prototype']
    metadata['filename'] = ['Not implemented in prototype']

    # Load up the actual irradiance data into a pandas DataFrame
    # TODO: Replace this shortcut method with the method I'm building into sunpy
    irradiance = eve_readsav['irradiance'].byteswap().newbyteorder(
    )  # pandas doesn't like big endian
    irradiance[irradiance == -1] = np.nan
    wavelengths = eve_readsav['wavelength']
    wavelengths_str = []
    [
        wavelengths_str.append('{0:1.1f}'.format(wavelength))
        for wavelength in wavelengths
    ]
    eve_lines = pd.DataFrame(irradiance, columns=wavelengths_str)
    eve_lines.index = pd.to_datetime(eve_readsav.iso.astype(str))
    eve_lines = eve_lines.drop_duplicates()

    # slice out only columns needed by Shawn
    # eve_selected_lines = eve_lines.drop(columns=['9.4', '13.1', '13.3', '25.6', '28.4', '30.4', '33.5', '36.1', '36.8', '44.6', '46.5', '49.9', '52.1', '52.6', '53.7', '55.4', '56.8', '58.4', '59.2', '60.0', '61.0', '62.5', '63.0', '71.9', '72.2', '77.0', '79.0', '83.6', '95.0', '97.3', '97.7', '102.6', '103.2'])
    # eve_selected_lines.info()
    # eve_selected_lines.to_csv('/Users/shawnpolson/Documents/School/Spring 2018/Data Mining/StealthCMEs/PyCharm/JEDI Catalog/eve_selected_lines_forreal.csv')

    # Get GOES flare events above C1 within date range corresponding to EVE data
    # flares = get_goes_flare_events(eve_lines.index[0], eve_lines.index[-1], verbose=verbose)  # TODO: The method in sunpy needs fixing, issue 2434

    # Load GOES events from IDL saveset instead of directly through sunpy
    goes_flare_events = readsav(
        '/Users/shawnpolson/Documents/School/Spring 2018/Data Mining/StealthCMEs/savesets/GoesEventsMegsAEra.sav'
    )
    goes_flare_events['class'] = goes_flare_events['class'].astype(str)
    goes_flare_events['event_peak_time_human'] = goes_flare_events[
        'event_peak_time_human'].astype(str)
    goes_flare_events['event_start_time_human'] = goes_flare_events[
        'event_start_time_human'].astype(str)
    goes_flare_events['peak_time'] = Time(
        goes_flare_events['event_peak_time_jd'], format='jd', scale='utc')
    goes_flare_events['start_time'] = Time(
        goes_flare_events['event_start_time_jd'], format='jd', scale='utc')
    if verbose:
        logger.info('Loaded GOES flare events.')

    # Define the columns of the JEDI catalog
    jedi_row = pd.DataFrame([
        OrderedDict([('Event #', np.nan), ('GOES Flare Start Time', np.nan),
                     ('GOES Flare Peak Time', np.nan),
                     ('GOES Flare Class', np.nan),
                     ('Pre-Flare Start Time', np.nan),
                     ('Pre-Flare End Time', np.nan),
                     ('Flare Interrupt', np.nan)])
    ])
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=eve_lines.columns +
                     ' Pre-Flare Irradiance [W/m2]'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=eve_lines.columns + ' Slope Start Time'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=eve_lines.columns + ' Slope End Time'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=eve_lines.columns + ' Slope Min [%/s]'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=eve_lines.columns + ' Slope Max [%/s]'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=eve_lines.columns + ' Slope Mean [%/s]'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=eve_lines.columns + ' Slope Uncertainty [%/s]'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=eve_lines.columns + ' Depth Time'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=eve_lines.columns + ' Depth [%]'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=eve_lines.columns + ' Depth Uncertainty [%]'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=eve_lines.columns + ' Duration Start Time'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=eve_lines.columns + ' Duration End Time'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=eve_lines.columns + ' Duration [s]'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=eve_lines.columns + ' Fitting Gamma'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=eve_lines.columns + ' Fitting Score'))

    ion_tuples = list(itertools.permutations(eve_lines.columns.values, 2))
    ion_permutations = pd.Index(
        [' by '.join(ion_tuples[i]) for i in range(len(ion_tuples))])

    jedi_row = jedi_row.join(
        pd.DataFrame(columns=ion_permutations + ' Slope Start Time'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=ion_permutations + ' Slope End Time'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=ion_permutations + ' Slope Min [%/s]'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=ion_permutations + ' Slope Max [%/s]'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=ion_permutations + ' Slope Mean [%/s]'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=ion_permutations + ' Slope Uncertainty [%/s]'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=ion_permutations + ' Depth Time'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=ion_permutations + ' Depth [%]'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=ion_permutations + ' Depth Uncertainty [%]'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=ion_permutations + ' Duration Start Time'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=ion_permutations + ' Duration End Time'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=ion_permutations + ' Duration [s]'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=ion_permutations + ' Correction Time Shift [s]'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=ion_permutations + ' Correction Scale Factor'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=ion_permutations + ' Fitting Gamma'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=ion_permutations + ' Fitting Score'))

    csv_filename = output_path + 'jedi_{0}.csv'.format(Time.now().iso)
    jedi_row.to_csv(csv_filename, header=True, index=False, mode='w')

    if verbose:
        logger.info('Created JEDI row definition.')

    # Start a progress bar
    widgets = [
        progressbar.Percentage(),
        progressbar.Bar(),
        progressbar.Timer(), ' ',
        progressbar.AdaptiveETA()
    ]
    progress_bar = progressbar.ProgressBar(
        widgets=[progressbar.FormatLabel('Flare Event Loop: ')] + widgets,
        min_value=flare_index_range[0],
        max_value=flare_index_range[-1]).start()

    # Prepare a hold-over pre-flare irradiance value,
    # which will normally have one element for each of the 39 emission lines
    preflare_irradiance = np.nan

    # Start loop through all flares
    for flare_index in flare_index_range:

        # Skip event 0 to avoid problems with referring to earlier indices
        if flare_index == 0:
            continue

        # Reset jedi_row
        jedi_row[:] = np.nan

        # Reset the flare interrupt flag
        flare_interrupt = False

        # Fill the GOES flare information into the JEDI row
        jedi_row['Event #'] = flare_index
        jedi_row['GOES Flare Start Time'] = goes_flare_events['start_time'][
            flare_index].iso
        jedi_row['GOES Flare Peak Time'] = goes_flare_events['peak_time'][
            flare_index].iso
        jedi_row['GOES Flare Class'] = goes_flare_events['class'][flare_index]
        if verbose:
            logger.info(
                "Event {0} GOES flare details stored to JEDI row.".format(
                    flare_index))

        # If haven't already done all pre-parameterization processing
        processed_jedi_non_params_filename = output_path + 'Processed Pre-Parameterization Data/Event {0} Pre-Parameterization.h5'.format(
            flare_index)
        processed_lines_filename = output_path + 'Processed Lines Data/Event {0} Lines.h5'.format(
            flare_index)
        if not os.path.isfile(processed_lines_filename) or not os.path.isfile(
                processed_jedi_non_params_filename):
            # Determine pre-flare irradiance
            minutes_since_last_flare = (
                goes_flare_events['peak_time'][flare_index] -
                goes_flare_events['peak_time'][flare_index - 1]).sec / 60.0
            if minutes_since_last_flare > threshold_time_prior_flare_minutes:
                # Clip EVE data from threshold_time_prior_flare_minutes prior to flare up to peak flare time
                preflare_window_start = (
                    goes_flare_events['peak_time'][flare_index] -
                    (threshold_time_prior_flare_minutes * u.minute)).iso
                preflare_window_end = (
                    goes_flare_events['peak_time'][flare_index]).iso
                eve_lines_preflare_time = eve_lines[
                    preflare_window_start:preflare_window_end]

                # Loop through the emission lines and get pre-flare irradiance for each
                preflare_irradiance = []
                for column in eve_lines_preflare_time:
                    eve_line_preflare_time = pd.DataFrame(
                        eve_lines_preflare_time[column])
                    eve_line_preflare_time.columns = ['irradiance']
                    preflare_irradiance.append(
                        determine_preflare_irradiance(
                            eve_line_preflare_time,
                            pd.Timestamp(goes_flare_events['start_time']
                                         [flare_index].iso),
                            plot_path_filename=output_path +
                            'Preflare Determination/Event {0} {1}.png'.format(
                                flare_index, column),
                            verbose=verbose,
                            logger=logger))
                    plt.close('all')
            else:
                logger.info(
                    "This flare at {0} will use the pre-flare irradiance from flare at {1}."
                    .format(
                        goes_flare_events['peak_time'][flare_index].iso,
                        goes_flare_events['peak_time'][flare_index - 1].iso))

            jedi_row["Pre-Flare Start Time"] = preflare_window_start
            jedi_row["Pre-Flare End Time"] = preflare_window_end
            preflare_irradiance_cols = [
                col for col in jedi_row.columns
                if 'Pre-Flare Irradiance' in col
            ]
            jedi_row[preflare_irradiance_cols] = preflare_irradiance

            if verbose:
                logger.info(
                    "Event {0} pre-flare determination complete.".format(
                        flare_index))

            # Clip EVE data to dimming window
            bracket_time_left = (
                goes_flare_events['peak_time'][flare_index] -
                (dimming_window_relative_to_flare_minutes_left * u.minute))
            next_flare_time = Time(
                (goes_flare_events['peak_time'][flare_index + 1]).iso)
            user_choice_time = (
                goes_flare_events['peak_time'][flare_index] +
                (dimming_window_relative_to_flare_minutes_right * u.minute))
            bracket_time_right = min(next_flare_time, user_choice_time)

            # If flare is shortening the window, set the flare_interrupt flag
            if bracket_time_right == next_flare_time:
                flare_interrupt = True
                if verbose:
                    logger.info(
                        'Flare interrupt for event at {0} by flare at {1}'.
                        format(goes_flare_events['peak_time'][flare_index].iso,
                               next_flare_time))

            # Write flare_interrupt to JEDI row
            jedi_row['Flare Interrupt'] = flare_interrupt

            # Skip event if the dimming window is too short
            if ((bracket_time_right - bracket_time_left).sec /
                    60.0) < threshold_minimum_dimming_window_minutes:
                # Leave all dimming parameters as NaN and write this null result to the CSV on disk
                jedi_row.to_csv(csv_filename,
                                header=False,
                                index=False,
                                mode='a')

                # Log message
                if verbose:
                    logger.info(
                        'The dimming window duration of {0} minutes is shorter than the minimum threshold of {1} minutes. Skipping this event ({2})'
                        .format(((bracket_time_right - bracket_time_left).sec /
                                 60.0),
                                threshold_minimum_dimming_window_minutes,
                                goes_flare_events['peak_time'][flare_index]))

                # Skip the rest of the processing in the flare_index loop
                continue
            else:
                eve_lines_event = eve_lines[bracket_time_left.
                                            iso:bracket_time_right.iso]

            if verbose:
                logger.info(
                    "Event {0} EVE data clipped to dimming window.".format(
                        flare_index))

            # Convert irradiance units to percent
            # (in place, don't care about absolute units from this point forward)
            eve_lines_event = (eve_lines_event - preflare_irradiance
                               ) / preflare_irradiance * 100.0

            if verbose:
                logger.info(
                    "Event {0} irradiance converted from absolute to percent units."
                    .format(flare_index))

            # Do flare removal in the light curves and add the results to the DataFrame
            progress_bar_correction = progressbar.ProgressBar(
                widgets=[progressbar.FormatLabel('Peak Match Subtract: ')] +
                widgets,
                max_value=len(ion_tuples)).start()
            for i in range(len(ion_tuples)):
                light_curve_to_subtract_from_df = pd.DataFrame(
                    eve_lines_event[ion_tuples[i][0]])
                light_curve_to_subtract_from_df.columns = ['irradiance']
                light_curve_to_subtract_with_df = pd.DataFrame(
                    eve_lines_event[ion_tuples[i][1]])
                light_curve_to_subtract_with_df.columns = ['irradiance']

                if (light_curve_to_subtract_from_df.isnull().all().all()) or (
                        light_curve_to_subtract_with_df.isnull().all().all()):
                    if verbose:
                        logger.info(
                            'Event {0} {1} correction skipped because all irradiances are NaN.'
                            .format(flare_index, ion_permutations[i]))
                else:
                    light_curve_corrected, seconds_shift, scale_factor = light_curve_peak_match_subtract(
                        light_curve_to_subtract_from_df,
                        light_curve_to_subtract_with_df,
                        pd.Timestamp(
                            (goes_flare_events['peak_time'][flare_index]).iso),
                        plot_path_filename=output_path +
                        'Peak Subtractions/Event {0} {1}.png'.format(
                            flare_index, ion_permutations[i]),
                        verbose=verbose,
                        logger=logger)

                    eve_lines_event[
                        ion_permutations[i]] = light_curve_corrected
                    jedi_row[ion_permutations[i] +
                             ' Correction Time Shift [s]'] = seconds_shift
                    jedi_row[ion_permutations[i] +
                             ' Correction Scale Factor'] = scale_factor

                    plt.close('all')

                    if verbose:
                        logger.info(
                            'Event {0} flare removal correction complete'.
                            format(flare_index))
                    progress_bar_correction.update(i)

            progress_bar_correction.finish()

            # TODO: Update calculate_eve_fe_line_precision to compute for all emission lines, not just selected
            uncertainty = np.ones(len(eve_lines_event)) * 0.002545

            # TODO: Propagate uncertainty through light_curve_peak_match_subtract and store in eve_lines_event

            # Fit the light curves to reduce influence of noise on the parameterizations to come later
            progress_bar_fitting = progressbar.ProgressBar(
                widgets=[progressbar.FormatLabel('Light curve fitting: ')] +
                widgets,
                max_value=len(eve_lines_event.columns)).start()
            for i, column in enumerate(eve_lines_event):
                if eve_lines_event[column].isnull().all().all():
                    if verbose:
                        logger.info(
                            'Event {0} {1} fitting skipped because all irradiances are NaN.'
                            .format(flare_index, column))
                else:
                    eve_line_event = pd.DataFrame(eve_lines_event[column])
                    eve_line_event.columns = ['irradiance']
                    eve_line_event['uncertainty'] = uncertainty

                    fitting_path = output_path + 'Fitting/'
                    if not os.path.exists(fitting_path):
                        os.makedirs(fitting_path)

                    plt.close('all')
                    light_curve_fit, best_fit_gamma, best_fit_score = automatic_fit_light_curve(
                        eve_line_event,
                        plots_save_path='{0} Event {1} {2} '.format(
                            fitting_path, flare_index, column),
                        verbose=verbose,
                        logger=logger)
                    eve_lines_event[column] = light_curve_fit
                    jedi_row[column + ' Fitting Gamma'] = best_fit_gamma
                    jedi_row[column + ' Fitting Score'] = best_fit_score

                    if verbose:
                        logger.info(
                            'Event {0} {1} light curves fitted.'.format(
                                flare_index, column))
                    progress_bar_fitting.update(i)

            progress_bar_fitting.finish()

        #     # Save the dimming event data to disk for quicker restore
        #     jedi_row.to_hdf(processed_jedi_non_params_filename, 'jedi_row')
        #     eve_lines_event.to_hdf(processed_lines_filename, 'eve_lines_event')
        # else:
        #     jedi_row = pd.read_hdf(processed_jedi_non_params_filename, 'jedi_row')
        #     eve_lines_event = pd.read_hdf(processed_lines_filename, 'eve_lines_event')
        #     if verbose:
        #         logger.info('Loading files {0} and {1} rather than processing again.'.format(processed_jedi_non_params_filename, processed_lines_filename))
        #
        # # Parameterize the light curves for dimming
        # for column in eve_lines_event:
        #
        #     # Null out all parameters
        #     depth_percent, depth_time = np.nan, np.nan
        #     slope_start_time, slope_end_time = np.nan, np.nan
        #     slope_min, slope_max, slope_mean = np.nan, np.nan, np.nan
        #     duration_seconds, duration_start_time, duration_end_time = np.nan, np.nan, np.nan
        #
        #     # Determine whether to do the parameterizations or not
        #     if eve_lines_event[column].isnull().all().all():
        #         if verbose:
        #             logger.info('Event {0} {1} parameterization skipped because all irradiances are NaN.'.format(flare_index, column))
        #     else:
        #         eve_line_event = pd.DataFrame(eve_lines_event[column])
        #         eve_line_event.columns = ['irradiance']
        #
        #         # Determine dimming depth (if any)
        #         depth_path = output_path + 'Depth/'
        #         if not os.path.exists(depth_path):
        #             os.makedirs(depth_path)
        #
        #         plt.close('all')
        #         depth_percent, depth_time = determine_dimming_depth(eve_line_event,
        #                                                             plot_path_filename='{0} Event {1} {2} Depth.png'.format(depth_path, flare_index, column),
        #                                                             verbose=verbose, logger=logger)
        #
        #         jedi_row[column + ' Depth [%]'] = depth_percent
        #         # jedi_row[column + ' Depth Uncertainty [%]'] = depth_uncertainty  # TODO: make determine_dimming_depth return the propagated uncertainty
        #         jedi_row[column + ' Depth Time'] = depth_time
        #
        #         # Determine dimming slope (if any)
        #         slope_path = output_path + 'Slope/'
        #         if not os.path.exists(slope_path):
        #             os.makedirs(slope_path)
        #
        #         slope_start_time = pd.Timestamp((goes_flare_events['peak_time'][flare_index]).iso)
        #         slope_end_time = depth_time
        #
        #         if (pd.isnull(slope_start_time)) or (pd.isnull(slope_end_time)):
        #             if verbose:
        #                 logger.warning('Cannot compute slope or duration because slope bounding times NaN.')
        #         else:
        #             plt.close('all')
        #             slope_min, slope_max, slope_mean = determine_dimming_slope(eve_line_event,
        #                                                                        earliest_allowed_time=slope_start_time,
        #                                                                        latest_allowed_time=slope_end_time,
        #                                                                        plot_path_filename='{0} Event {1} {2} Slope.png'.format(slope_path, flare_index, column),
        #                                                                        verbose=verbose, logger=logger)
        #
        #             jedi_row[column + ' Slope Min [%/s]'] = slope_min
        #             jedi_row[column + ' Slope Max [%/s]'] = slope_max
        #             jedi_row[column + ' Slope Mean [%/s]'] = slope_mean
        #             # jedi_row[column + ' Slope Uncertainty [%]'] = slope_uncertainty  # TODO: make determine_dimming_depth return the propagated uncertainty
        #             jedi_row[column + ' Slope Start Time'] = slope_start_time
        #             jedi_row[column + ' Slope End Time'] = slope_end_time
        #
        #             # Determine dimming duration (if any)
        #             duration_path = output_path + 'Duration/'
        #             if not os.path.exists(duration_path):
        #                 os.makedirs(duration_path)
        #
        #             plt.close('all')
        #             duration_seconds, duration_start_time, duration_end_time = determine_dimming_duration(eve_line_event,
        #                                                                                                   earliest_allowed_time=slope_start_time,
        #                                                                                                   plot_path_filename='{0} Event {1} {2} Duration.png'.format(duration_path, flare_index, column),
        #                                                                                                   verbose=verbose, logger=logger)
        #
        #             jedi_row[column + ' Duration [s]'] = duration_seconds
        #             jedi_row[column + ' Duration Start Time'] = duration_start_time
        #             jedi_row[column + ' Duration End Time'] = duration_end_time
        #
        #         if verbose:
        #             logger.info("Event {0} {1} parameterizations complete.".format(flare_index, column))
        #
        #         # Produce a summary plot for each light curve
        #         plt.style.use('jpm-transparent-light')
        #
        #         ax = eve_line_event['irradiance'].plot(color='black')
        #         plt.axhline(linestyle='dashed', color='grey')
        #         start_date = eve_line_event.index.values[0]
        #         start_date_string = pd.to_datetime(str(start_date))
        #         plt.xlabel(start_date_string.strftime('%Y-%m-%d %H:%M:%S'))
        #         plt.ylabel('Irradiance [%]')
        #         fmtr = dates.DateFormatter("%H:%M:%S")
        #         ax.xaxis.set_major_formatter(fmtr)
        #         ax.xaxis.set_major_locator(dates.HourLocator())
        #         plt.title('Event {0} {1} nm Parameters'.format(flare_index, column))
        #
        #         if not np.isnan(depth_percent):
        #             plt.annotate('', xy=(depth_time, -depth_percent), xycoords='data',
        #                          xytext=(depth_time, 0), textcoords='data',
        #                          arrowprops=dict(facecolor='limegreen', edgecolor='limegreen', linewidth=2))
        #             mid_depth = -depth_percent / 2.0
        #             plt.annotate('{0:.2f} %'.format(depth_percent), xy=(depth_time, mid_depth), xycoords='data',
        #                          ha='right', va='center', rotation=90, size=18, color='limegreen')
        #
        #         if not np.isnan(slope_mean):
        #             if pd.isnull(slope_start_time) or pd.isnull(slope_end_time):
        #                 import pdb
        #                 pdb.set_trace()
        #             p = plt.plot(eve_line_event[slope_start_time:slope_end_time]['irradiance'], c='tomato')
        #
        #             inverse_str = '$^{-1}$'
        #             plt.annotate('slope_min={0} % s{1}'.format(latex_float(slope_min), inverse_str),
        #                          xy=(0.98, 0.12), xycoords='axes fraction', ha='right',
        #                          size=12, color=p[0].get_color())
        #             plt.annotate('slope_max={0} % s{1}'.format(latex_float(slope_max), inverse_str),
        #                          xy=(0.98, 0.08), xycoords='axes fraction', ha='right',
        #                          size=12, color=p[0].get_color())
        #             plt.annotate('slope_mean={0} % s{1}'.format(latex_float(slope_mean), inverse_str),
        #                          xy=(0.98, 0.04), xycoords='axes fraction', ha='right',
        #                          size=12, color=p[0].get_color())
        #
        #         if not np.isnan(duration_seconds):
        #             plt.annotate('', xy=(duration_start_time, 0), xycoords='data',
        #                          xytext=(duration_end_time, 0), textcoords='data',
        #                          arrowprops=dict(facecolor='dodgerblue', edgecolor='dodgerblue', linewidth=5, arrowstyle='<->'))
        #             mid_time = duration_start_time + (duration_end_time - duration_start_time) / 2
        #             plt.annotate(str(duration_seconds) + ' s', xy=(mid_time, 0), xycoords='data', ha='center', va='bottom', size=18, color='dodgerblue')
        #
        #         summary_path = output_path + 'Summary Plots/'
        #         if not os.path.exists(summary_path):
        #             os.makedirs(summary_path)
        #         summary_filename = '{0} Event {1} {2} Parameter Summary.png'.format(summary_path, flare_index, column)
        #         plt.savefig(summary_filename)
        #         if verbose:
        #             logger.info("Summary plot saved to %s" % summary_filename)
        #
        # # Write to the JEDI catalog on disk
        # jedi_row.to_csv(csv_filename, header=False, index=False, mode='a')
        # if verbose:
        #     logger.info('Event {0} JEDI row written to {1}.'.format(flare_index, csv_filename))

        # Update progress bar
        progress_bar.update(flare_index)

    progress_bar.finish()
Beispiel #7
0
def determine_dimming_duration(light_curve_df,
                               earliest_allowed_time=None, smooth_points=0,
                               plot_path_filename=None, verbose=False, logger=None):
    """Find the duration of dimming in a light curve, if any.
    Assumes light curve is normalized such that pre-flare = 0%.

    Inputs:
        light_curve_df [pd DataFrame]:    A pandas DataFrame with a DatetimeIndex and a column for irradiance.

    Optional Inputs:
        earliest_allowed_time [metatime]: The function won't return a duration if the only 0 crossings are earlier than this.
                                          Default is None, meaning the beginning of the light_curve_df.
        smooth_points [integer]:          Used to apply a rolling mean with the number of points (indices) specified.
                                          Default is 0, meaning no smoothing will be performed.
        plot_path_filename [str]:         Set to a path and filename in order to save the summary plot to disk.
                                          Default is None, meaning the plot will not be saved to disk.
        verbose [bool]:                   Set to log the processing messages to disk and console. Default is False.
        logger [JpmLogger]:               A configured logger from jpm_logger.py. If set to None, will generate a
                                          new one. Default is None.

    Outputs:
        duration_seconds [integer]:         The duration of dimming in seconds.
        duration_start_time [pd.Timestamp]: The time the duration starts (downward 0 crossing).
        duration_end_time [pd.Timestamp]:   The time the duration ends (upward 0 crossing).

    Optional Outputs:
        None

    Example:
        duration_seconds, duration_start_time, duration_end_time = determine_dimming_duration(light_curve_df,
                                                                                              plot_path_filename='./bla.png',
                                                                                              verbose=True)
    """

    # If no earliest_allowed_time set, then set it to beginning of light_curve_df
    if not earliest_allowed_time:
        earliest_allowed_time = pd.Timestamp(light_curve_df.index.values[0])

    # Prepare the logger for verbose
    if verbose:
        if not logger:
            logger = JpmLogger(filename='determine_dimming_duration_log', path='/Users/jmason86/Desktop/')
        logger.info("Running on event with light curve start time of {0}.".format(light_curve_df.index[0]))

    # Set up a successful processing flag
    found_duration = True

    # Optionally smooth the light curve with a rolling mean
    if smooth_points:
        light_curve_df['smooth'] = light_curve_df.rolling(smooth_points, center=True).mean()
    else:
        light_curve_df['smooth'] = light_curve_df['irradiance']

    first_non_nan = light_curve_df['smooth'].first_valid_index()
    nan_indices = np.isnan(light_curve_df['smooth'])
    light_curve_df['smooth'][nan_indices] = light_curve_df['smooth'][first_non_nan]

    # Find the indices where the light curve is closest to 0
    zero_crossing_indices = np.where(np.diff(np.signbit(light_curve_df['smooth'])))[0]
    zero_crossing_times = light_curve_df.index[zero_crossing_indices]

    # Discard any indices prior to the user-provided earliest_allowed_time, else cannot compute
    zero_crossing_indices = zero_crossing_indices[zero_crossing_times > earliest_allowed_time]
    if zero_crossing_indices.size == 0:
        if verbose:
            logger.warning('No zero crossings detected after earliest allowed time of %s' % earliest_allowed_time)
        found_duration = False

    # Figure out which way the light curve is sloping
    if found_duration:
        light_curve_df['diff'] = light_curve_df['smooth'].diff()

    # Find the first negative slope zero crossing time
    if found_duration:
        neg_zero_crossing_indices = np.where(light_curve_df['diff'][zero_crossing_indices + 1] < 0)[0]
        if len(neg_zero_crossing_indices) > 0:
            first_neg_zero_crossing_index = neg_zero_crossing_indices[0]
            first_neg_zero_crossing_time = light_curve_df.index[zero_crossing_indices[first_neg_zero_crossing_index]]
        else:
            if verbose:
                logger.warning('No negative slope 0-crossing found. Duration cannot be defined.')
            found_duration = False

    # Find the first postiive slope zero crossing
    if found_duration:
        pos_zero_crossing_indices = np.where(light_curve_df['diff'][zero_crossing_indices + 1] > 0)[0]
        if len(pos_zero_crossing_indices) > 0:
            first_pos_zero_crossing_index = pos_zero_crossing_indices[0]
            first_pos_zero_crossing_time = light_curve_df.index[zero_crossing_indices[first_pos_zero_crossing_index]]
        else:
            if verbose:
                logger.warning('No positive slope 0-crossing found. Duration cannot be defined.')
            found_duration = False

    # If the first negative slope zero crossing isn't earlier than the positive one, return null
    if (found_duration) and (first_neg_zero_crossing_time > first_pos_zero_crossing_time):
        if verbose:
            logger.warning('Dimming light curve may be misaligned in window. Negative slope 0-crossing detected after positive one.')
        found_duration = False

    # Return the time difference in seconds between the selected zero crossings
    if found_duration:
        duration_seconds = int((first_pos_zero_crossing_time - first_neg_zero_crossing_time).total_seconds())

    if plot_path_filename:
        plt.style.use('jpm-transparent-light')
        from matplotlib import dates

        if found_duration:
            light_curve_df = light_curve_df.drop('diff', 1)

        ax = light_curve_df['irradiance'].plot()
        start_date = light_curve_df.index.values[0]
        start_date_string = pd.to_datetime(str(start_date))
        plt.xlabel(start_date_string.strftime('%Y-%m-%d %H:%M:%S'))
        plt.ylabel('Irradiance [%]')
        fmtr = dates.DateFormatter("%H:%M:%S")
        ax.xaxis.set_major_formatter(fmtr)
        ax.xaxis.set_major_locator(dates.HourLocator())
        plt.title('Dimming Duration')

        if found_duration:
            plt.scatter([zero_crossing_times[first_neg_zero_crossing_index], zero_crossing_times[first_pos_zero_crossing_index]],
                        [light_curve_df['smooth'][zero_crossing_indices[first_neg_zero_crossing_index]],
                         light_curve_df['smooth'][zero_crossing_indices[first_pos_zero_crossing_index]]],
                        c='black', s=300, zorder=3)
            plt.annotate('', xy=(first_neg_zero_crossing_time, 0), xycoords='data',
                         xytext=(first_pos_zero_crossing_time, 0), textcoords='data',
                         arrowprops=dict(facecolor='black', linewidth=5, arrowstyle='<->'))
            mid_time = first_neg_zero_crossing_time + (first_pos_zero_crossing_time - first_neg_zero_crossing_time) / 2
            plt.annotate(str(duration_seconds) + ' s', xy=(mid_time, 0), xycoords='data', ha='center', va='bottom', size=18)

        plt.savefig(plot_path_filename)
        if verbose:
            logger.info("Summary plot saved to %s" % plot_path_filename)

    if not found_duration:
        duration_seconds = np.nan
        first_neg_zero_crossing_time = np.nan
        first_pos_zero_crossing_time = np.nan

    return duration_seconds, first_neg_zero_crossing_time, first_pos_zero_crossing_time
Beispiel #8
0
def determine_preflare_irradiance(light_curve_df,
                                  estimated_time_of_peak_start,
                                  max_median_diff_threshold=1.5,
                                  std_threshold=1.0,
                                  plot_path_filename=None,
                                  verbose=False,
                                  logger=None):
    """Determine pre-flare irradiance level in a solar light curve.
    Or, more generally, find the pre-peak level in a time series.

    Inputs:
        light_curve_df [pd DataFrame]:           A pandas DataFrame with a DatetimeIndex and a column for irradiance.
        estimated_time_of_peak_start [metatime]: The estimated time that the dramatic increase starts.
                                                 This could come from, e.g., GOES/XRS.

    Optional Inputs:
        max_median_diff_threshold [float]: The maximum allowed difference in medians between the 3 pre-flare windows
                                           in percent terms. This value gets multiplied by the mean of the stds from
                                           each sub-window and is then compared to the max_median_diff. The default is 1.5.
        std_threshold [float]:             The maximum allowed standard deviation in the pre-flare windows in percent
                                           terms. The default is 0.5.
        plot_path_filename [str]:          Set to a path and filename in order to save the summary plot to disk.
                                           Default is None, meaning the plot will not be saved to disk.
        verbose [bool]:                    Set to log the processing messages to disk and console. Default is False.
        logger [JpmLogger]:                A configured logger from jpm_logger.py. If set to None, will generate a
                                           new one. Default is None.

    Outputs:
        preflare_irradiance [float]: The identified pre-flare irradiance level in the same units as light_curve_df.irradiance.

    Optional Outputs:
        None

    Example:
        preflare_irradiance = determine_preflare_irradiance(light_curve_df, pd.Timestamp('2012-04-15 17:52:20.0'),
                                                            plot_path_filename='./bla.png',
                                                            verbose=True)
    """

    # Prepare the logger for verbose
    if verbose:
        if not logger:
            logger = JpmLogger(filename='determine_preflare_irradiance_log',
                               path='/Users/jmason86/Desktop/')
        logger.info("Running on event with peak start time of {0}.".format(
            estimated_time_of_peak_start))

    # Verify that not all values are nan
    if light_curve_df.isna().all().all():
        if verbose:
            logger.warning("All irradiance values are NaN. Returning.")
        return np.nan

    # Convert irradiance to percent if not already present
    if 'irradiance_percent' not in light_curve_df.columns:
        median_irradiance = light_curve_df['irradiance'].median()
        light_curve_df['irradiance_percent'] = (
            light_curve_df['irradiance'].values -
            median_irradiance) / median_irradiance * 100.
        if verbose:
            logger.info(
                "Converted irradiance to percent, baselining median in entire pre-flare window."
            )

    # Divide the pre-flare period into 3 equal-length windows
    windows = np.array_split(light_curve_df[:estimated_time_of_peak_start], 3)
    if verbose:
        logger.info("Divided pre-flare period into 3 equal-length windows.")

    # Compute median and standard deviation in each window
    medians = [
        windowed_df['irradiance_percent'].median() for windowed_df in windows
    ]
    medians_abs = [
        windowed_df['irradiance'].median() for windowed_df in windows
    ]
    stds = np.array(
        [windowed_df['irradiance_percent'].std() for windowed_df in windows])
    if verbose:
        logger.info("Computed medians and standard deviations in each window.")

    # Compute max difference between the medians
    max_median_diff = np.max(
        np.abs(np.append(np.diff(medians), medians[2] - medians[0])))

    # Compare medians and standard deviations in each window to thresholds
    failed_median_threshold = False
    failed_std_threshold = False
    if np.all(np.isnan(stds)):
        if verbose:
            logger.warning(
                'Cannot compute pre-flare irradiance. All standard deviations are nan.'
            )
        failed_std_threshold = True
    else:
        if max_median_diff > max_median_diff_threshold * np.mean(stds):
            if verbose:
                logger.warning(
                    'Cannot compute pre-flare irradiance. Maximum difference in window medians ({0}) exceeded threshold ({1}).'
                    .format(max_median_diff,
                            max_median_diff_threshold * np.mean(stds)))
            failed_median_threshold = True
        if (stds < std_threshold).sum() < 2:
            if verbose:
                logger.warning(
                    'Cannot compute pre-flare irradiance. Standard deviation in more than 1 window is larger than threshold ({0}).'
                    .format(std_threshold))
            failed_std_threshold = True

    # Compute pre-flare irradiance (mean of the medians in absolute units)
    if failed_median_threshold or failed_std_threshold:
        preflare_irradiance = np.nan
    else:
        preflare_irradiance = np.mean(
            [windowed_df['irradiance'].median() for windowed_df in windows])
        if verbose:
            logger.info("Computed pre-flare irradiance: {0}".format(
                preflare_irradiance))

    # Produce summary plot
    if plot_path_filename:
        plt.style.use('jpm-transparent-light')
        from matplotlib import dates
        from matplotlib.patches import Rectangle

        light_curve_df = light_curve_df.drop('irradiance_percent', 1)
        ax = light_curve_df[:estimated_time_of_peak_start].plot(legend=False,
                                                                c='grey')
        #  plt.plot(light_curve_df[:estimated_time_of_peak_start].irradiance, c='grey') #  using matplotlib instead of pandas
        #  ax = plt.gca()
        start_date = light_curve_df.index.values[0]
        start_date_string = pd.to_datetime(str(start_date))
        plt.title('Pre-flare Windows')
        plt.xlabel(start_date_string.strftime('%Y-%m-%d %H:%M:%S'))
        plt.ylabel('Irradiance [W m$^{-2}$]')
        fmtr = dates.DateFormatter("%H:%M:%S")
        ax.xaxis.set_major_formatter(fmtr)
        ax.xaxis.set_major_locator(dates.HourLocator())
        ax2 = ax.twinx()
        light_curve_df[:estimated_time_of_peak_start].plot(ax=ax2,
                                                           legend=False,
                                                           c='grey')
        #  ax2.plot(light_curve_df[:estimated_time_of_peak_start].irradiance, color='grey')
        vals = ax2.get_yticks()
        ax2.set_yticklabels([
            '{:3.2f}%'.format(
                (x - median_irradiance) / median_irradiance * 100)
            for x in vals
        ])

        # First window
        start = dates.date2num(light_curve_df.index[0])
        end = dates.date2num(windows[0].index[-1])
        width = end - start
        rect = Rectangle((start, 0), width, 1, color='deepskyblue', alpha=0.2)
        ax.add_patch(rect)
        plt.plot([windows[0].index[0], windows[0].index[-1]],
                 [medians_abs[0], medians_abs[0]],
                 linestyle='dashed',
                 c='dimgrey')
        ax.text(start + width / 2.0,
                np.min(
                    light_curve_df[:estimated_time_of_peak_start].irradiance),
                'median$_1$ = ' + latex_float(medians[0]) + '% \n' +
                '$\sigma_1$ = ' + latex_float(stds[0]) + '%',
                fontsize=11,
                ha='center',
                va='bottom')

        # Second window
        start = dates.date2num(windows[1].index[0])
        end = dates.date2num(windows[1].index[-1])
        width = end - start
        rect = Rectangle((start, 0), width, 1, color='slateblue', alpha=0.2)
        ax.add_patch(rect)
        plt.plot([windows[1].index[0], windows[1].index[-1]],
                 [medians_abs[1], medians_abs[1]],
                 linestyle='dashed',
                 c='dimgrey')
        ax.text(start + width / 2.0,
                np.min(
                    light_curve_df[:estimated_time_of_peak_start].irradiance),
                'median$_2$ = ' + latex_float(medians[1]) + '% \n' +
                '$\sigma_2$ = ' + latex_float(stds[1]) + '%',
                fontsize=11,
                ha='center',
                va='bottom')

        if not np.isnan(preflare_irradiance):
            ax.axes.axhline(y=preflare_irradiance,
                            linewidth=2,
                            color='tomato',
                            linestyle='dashed')
            ax.text(
                start + width / 2.0,
                np.max(
                    light_curve_df[:estimated_time_of_peak_start].irradiance),
                'pre-flare I = ' + latex_float(preflare_irradiance) +
                ' W m$^{-2}$',
                fontsize=11,
                ha='center',
                va='top',
                color='tomato')
        else:
            ax.text(
                start + width / 2.0,
                np.max(
                    light_curve_df[:estimated_time_of_peak_start].irradiance),
                'pre-flare I = N/A \n' + 'median condition ok: ' +
                str(not failed_median_threshold) + '\n' +
                '$\sigma$ condition ok: ' + str(not failed_std_threshold),
                fontsize=11,
                ha='center',
                va='top',
                color='tomato')

        # Third window
        start = dates.date2num(windows[2].index[0])
        end = dates.date2num(windows[2].index[-1])
        width = end - start
        rect = Rectangle((start, 0), width, 1, color='violet', alpha=0.2)
        ax.add_patch(rect)
        plt.plot([windows[2].index[0], windows[2].index[-1]],
                 [medians_abs[2], medians_abs[2]],
                 linestyle='dashed',
                 c='dimgrey')
        ax.text(start + width / 2.0,
                np.min(
                    light_curve_df[:estimated_time_of_peak_start].irradiance),
                'median$_3$ = ' + latex_float(medians[2]) + '% \n' +
                '$\sigma_3$ = ' + latex_float(stds[2]) + '%',
                fontsize=11,
                ha='center',
                va='bottom')
        ax.text(end,
                np.max(
                    light_curve_df[:estimated_time_of_peak_start].irradiance),
                'median diff = ' + latex_float(max_median_diff) + '% \n' +
                r'thresh $\times \mu_{\sigma n}$ = ' +
                latex_float(max_median_diff_threshold * np.mean(stds)) + '%',
                fontsize=11,
                ha='right',
                va='top')

        # Increase border so y-axes don't get cut off in savefig, even though they don't in plt.show()
        plt.gcf().subplots_adjust(left=0.22)

        plt.savefig(plot_path_filename)
        if verbose:
            logger.info(
                "Summary plot for event with start time {0} saved to {1}".
                format(estimated_time_of_peak_start, plot_path_filename))

    return preflare_irradiance
def correlationCoefficientScan(
        output_path='/Users/tyleralbee/Desktop/StealthCME',
        eve_data_path='/Users/tyleralbee/Desktop/savesets/eve_selected_lines.csv',
        cme_signature='/Users/tyleralbee/Desktop/savesets/eve_lines_event_percents_fitted.csv',
        verbose=True):

    eve_lines = pd.read_csv(eve_data_path, index_col=0)
    eve_lines.index = pd.to_datetime(eve_lines.index)
    wholeDfLength = eve_lines.__len__()

    cme_event = pd.read_csv(cme_signature, index_col=0)
    cme_event.index = pd.to_datetime(cme_event.index)
    cmeEventLength = cme_event.__len__()

    if verbose:
        logger = JpmLogger(filename='do_correlation_coefficient_scan',
                           path=output_path,
                           console=True)
        logger.info("Starting Stealth CME search pipeline!")
    else:
        logger = None

    if verbose:
        logger.info('Loaded EVE and CME data')

    # Define the columns of the output catalog
    output_table = pd.DataFrame(columns=[
        'Event #', 'Start Time', 'End Time', 'Correlation Coefficient'
    ])
    csv_filename = output_path + 'cc_output_{0}.csv'.format(Time.now().iso)
    output_table.to_csv(csv_filename, header=True, index=False, mode='w')

    if verbose:
        logger.info('Created output table definition.')

    # Start a progress bar
    widgets = [
        progressbar.Percentage(),
        progressbar.Bar(),
        progressbar.Timer(), ' ',
        progressbar.AdaptiveETA()
    ]

    startRow = 0
    endRow = cmeEventLength
    numSlices = int(wholeDfLength / cmeEventLength)
    output_row = 1

    progress_bar_sliding_window = progressbar.ProgressBar(
        widgets=[progressbar.FormatLabel('Correlation Coefficient Analysis ')
                 ] + widgets,
        max_value=numSlices).start()

    # ----------Loop through data set using a sliding time window-------------------------------------------------------

    for i in range(1, numSlices):

        # ----------Clip dataset to time slice window-------------------------------------------------------------------

        event_time_slice = eve_lines.iloc[startRow:endRow]

        # ---------Convert irradiance values to percentages-------------------------------------------------------------

        preflare_irradiance = event_time_slice.iloc[0]
        event_time_slice_percentages = (event_time_slice - preflare_irradiance
                                        ) / preflare_irradiance * 100.0

        if verbose:
            logger.info(
                "Event {0} irradiance converted from absolute to percent units."
                .format(i))

        # ---------Fit light curves to reduce noise---------------------------------------------------------------------

        uncertainty = np.ones(len(event_time_slice_percentages)
                              ) * 0.002545  # got this line from James's code

        progress_bar_fitting = progressbar.ProgressBar(
            widgets=[progressbar.FormatLabel('Light curve fitting: ')] +
            widgets,
            max_value=len(event_time_slice_percentages.columns)).start()

        for j, column in enumerate(event_time_slice_percentages):
            if event_time_slice_percentages[column].isnull().all().all():
                if verbose:
                    logger.info(
                        'Event {0} {1} fitting skipped because all irradiances are NaN.'
                        .format(j, column))
            else:
                eve_line_event_percentages = pd.DataFrame(
                    event_time_slice_percentages[column])
                eve_line_event_percentages.columns = ['irradiance']
                eve_line_event_percentages['uncertainty'] = uncertainty

                fitting_path = output_path + 'Fitting/'
                if not os.path.exists(fitting_path):
                    os.makedirs(fitting_path)

                plt.close('all')
                light_curve_fit, best_fit_gamma, best_fit_score = automatic_fit_light_curve(
                    eve_line_event_percentages,
                    plots_save_path='{0} Event {1} {2} '.format(
                        fitting_path, j, column),
                    verbose=verbose,
                    logger=logger)
                event_time_slice_percentages[column] = light_curve_fit
                event_time_slice_fitted = event_time_slice_percentages  # Keep our variable names explicit

                if verbose:
                    logger.info('Event {0} {1} light curves fitted.'.format(
                        j, column))
                progress_bar_fitting.update(j)

        progress_bar_fitting.finish()

        if verbose:
            logger.info("Event {0} Light curves fitted".format(i))

        # ---------Compute Correlation Coefficients---------------------------------------------------------------------

        totalCorrelationCoefficient = 0.0
        ds1 = event_time_slice_fitted
        ds2 = cme_event

        # Gather stats for correlation
        for k, column in enumerate(ds1):
            dsColumn1 = ds1[column]
            dsColumn2 = ds2[column]

            dsColumn1.reset_index(
                drop=True, inplace=True)  # prevent NaNs from appearing in join
            dsColumn2.reset_index(
                drop=True, inplace=True)  # prevent NaNs from appearing in join

            # TODO: assert that both columns have same count?
            n = int(dsColumn1.count())
            meanA = float(dsColumn1.mean())
            meanB = float(dsColumn2.mean())
            stdA = float(dsColumn1.std(ddof=0))
            stdB = float(dsColumn2.std(ddof=0))

            # Generate correlation output
            dsJoined = pd.DataFrame({
                'a': dsColumn1,
                'b': dsColumn2
            })  # Avoids ambiguity when attr names are the same
            numerator = 0.0  # Stores summation of (a_i - meanA)(b_i - meanB)
            denominator = n * stdA * stdB

            for index, row in dsJoined.iterrows():
                a = row['a']
                b = row['b']
                numerator = numerator + (a - meanA) * (b - meanB)

            correlationCoefficient = numerator / denominator
            totalCorrelationCoefficient = totalCorrelationCoefficient + correlationCoefficient

        # ---------Output Results---------------------------------------------------------------------------------------

        eventStartTime = event_time_slice.iloc[0].name
        eventEndTime = event_time_slice.iloc[-1].name

        if not math.isnan(totalCorrelationCoefficient
                          ) and totalCorrelationCoefficient >= 4.2:
            output_table.loc[output_row] = [
                output_row, eventStartTime, eventEndTime,
                totalCorrelationCoefficient
            ]
            csv_filename = output_path + 'cc_output_{0}.csv'.format(
                Time.now().iso)
            output_table.to_csv(csv_filename,
                                header=True,
                                index=False,
                                mode='w')
            output_row = output_row + 1

        startRow = startRow + 60  # advance time window by 1 hour
        endRow = endRow + 60  # advance time window by 1 hour
        progress_bar_sliding_window.update(i)  # advance progress bar