def check_labels(els_file_name, labels_file_name):

    # Check if the labels file exists.
    if not os.path.exists(labels_file_name):
        return False

    # Load labels.
    with open(labels_file_name, 'r') as labels_file_object:
        labels = yaml.safe_load(labels_file_object)
        crossings = labels['change_points']

    # Convert to float (unit days).
    crossing_floats = datestring_to_float(crossings)

    # Check if atleast one label is valid.
    atleast_one_valid_label = False
    for crossing_float in crossing_floats:
        try:
            times = get_ELS_data(els_file_name, quantity='anode5', start_time=datetime.min, end_time=datetime.max)[2]
            if times[0] <= crossing_float <= times[-1]:
                atleast_one_valid_label = True
                break

        except ValueError:
            pass

    return atleast_one_valid_label
예제 #2
0
파일: plot_els.py 프로젝트: JPLMLIA/libeos
def main(els_data_file, outputfile, quantity, start_time, end_time, colorbar_range, colorbar_orientation, title, interpolated, show_labels, **kwargs):

    # Check input arguments - data file should exist.
    if not os.path.exists(els_data_file):
        raise OSError('Could not find %s.' % els_data_file)

    # Create figure and axes.
    fig, ax = plt.subplots()

    # Check input arguments - start and end times should be valid.
    if start_time is not None:
        try:
            start_time = datetime.strptime(start_time, '%d-%m-%Y/%H:%M')
        except ValueError:
            raise
    else:
        start_time = datetime.min

    if end_time is not None:
        try:
            end_time = datetime.strptime(end_time, '%d-%m-%Y/%H:%M').replace(second=59, microsecond=999999)
        except ValueError:
            raise
    else:
        end_time = datetime.max

    # Pass all parameters and plot.
    if interpolated:
        plot_interpolated_ELS_data(fig, ax, els_data_file, quantity, start_time, end_time, colorbar_range, verbose=True, **kwargs)
    else:
        plot_raw_ELS_data(fig, ax, els_data_file, quantity, start_time, end_time, colorbar_range, colorbar_orientation, verbose=True, **kwargs)

    # Add title.
    if title is not None:
        ax.set_title(title)

    # Plot the events occurring in this file.
    if show_labels:
        from compute_labelled_events import list_of_events
        from data_utils import datestring_to_float

        labels = list_of_events(os.path.basename(os.path.splitext(els_data_file)[0]), './')

        # How large is the width of the rectangle around each labelled event?
        days_per_minute = 1/(24 * 60)
        window_size = 1*days_per_minute

        # Annotate plot with labelled events.
        print 'Labelled events:'
        for label_type, crossing_timestring in labels:
            print '- Event at %s of type %s.' % (crossing_timestring, label_type)
            crossing_time = datestring_to_float(crossing_timestring)
            ax.axvspan(crossing_time - window_size/2, crossing_time + window_size/2, facecolor=crossing_color(label_type), alpha=1)

    # Save to file if given.
    if outputfile is None:
        plt.show()
    else:
        plt.savefig(outputfile, bbox_inches='tight')
예제 #3
0
def load_anomalies(labels_file, params):

    # Unpack parameters.
    start_time = params.start_time
    end_time = params.end_time
    anomaly_type = params.anomaly_type

    # Check if labels file exists.
    if not os.path.exists(labels_file):
        raise OSError('Could not find labels file %s.' % labels_file)

    # Load anomalies from file.
    with open(labels_file, 'r') as labels_file_obj:
        labelled_anomalies = yaml.safe_load(labels_file_obj)[anomaly_type]

    # Convert to float (unit days).
    labelled_anomalies = datestring_to_float(labelled_anomalies)

    # Sort by start-time.
    labelled_anomalies = np.sort(labelled_anomalies)

    # Return change-points within the time span.
    return labelled_anomalies[np.logical_and(labelled_anomalies >= start_time,
                                             labelled_anomalies <= end_time)]
예제 #4
0
    # Initialize.
    num_timesteps = 0
    scores_sum = 0
    scores_min = 1e9
    scores_max = -1e9

    # Iterate over each folder, filling up the two lists above.
    for folder, labels_file in zip(folders, labels_files):

        # Load the labels for this file.
        with open(labels_file, 'r') as labels_file_object:
            crossing_datestrings = yaml.safe_load(labels_file_object)['change_points']

        # Convert to float (units as days).
        crossing_times = datestring_to_float(crossing_datestrings)

        # Load the scores for this file.
        try:
            file_full_path = folder + '/' + algorithm + '.hdf5'
            with h5py.File(file_full_path, 'r') as filedata:
                scores = filedata['scores'][()]
                times = filedata['times'][()]
        except IOError:
            raise IOError('File %s cannot be found. Have you run these algorithms on the training set?' % (file_full_path))

        # Update scores stats.
        num_timesteps += len(scores)
        scores_sum += np.sum(scores)
        scores_min = min(scores_min, np.min(scores))
        scores_max = max(scores_max, np.max(scores))
def plot_all(algorithms, title, suffix, savefile):
    fig, axs = plt.subplots(nrows=len(algorithms),
                            sharex=True,
                            figsize=(10, 18))
    colors = plt.rcParams['axes.prop_cycle']()

    for algorithm, ax in zip(algorithms, axs):

        # Load crossings with scores.
        all_crossings_file = ERROR_ANALYSIS_DIR + algorithm + suffix
        all_crossings = np.load(all_crossings_file)

        all_crossings_scores = np.array(all_crossings[:, 0], dtype=float)
        all_crossings_times = datestring_to_float(all_crossings[:, 1])

        # Create scatterplot.
        ax.scatter(all_crossings_times,
                   all_crossings_scores,
                   label=algorithm,
                   s=10,
                   alpha=0.8,
                   **next(colors))

        # Load scores summary.
        scores_summary_file = ERROR_ANALYSIS_DIR + algorithm + '_scores_summary.npy'
        scores_min, scores_mean, scores_max = np.load(scores_summary_file)

        # Horizontal lines indicating min, mean and max of 'actual' scores.
        ax.axhline(y=scores_min, linestyle='-', c='gray', alpha=0.5)
        ax.axhline(y=scores_mean, linestyle='--', c='black')
        ax.axhline(y=scores_max, linestyle='-', c='gray', alpha=0.5)

    # Set x-axis tick range.
    start = datestring_to_float('01-01-2004/00:00:00')
    end = datestring_to_float('01-01-2005/00:00:00')
    ax.set_xlim(start, end)

    # Set x-axis formatting of dates.
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%d-%m-%Y/%H:%M'))
    ax.xaxis.set_tick_params(labelsize=8)

    # Tilts dates to the left for easier reading.
    plt.setp(ax.get_xticklabels(), rotation=30, ha='right')

    # Set title.
    fig.suptitle(title % LABELS_SUBDIR, y=0.92, fontweight='bold')

    # Set x-label.
    ax.set_xlabel('Datetime')
    fig.text(x=0.02, y=0.5, s='Scores')

    # Show legend, common across all subplots.
    labels_handles = {
        label: handle
        for ax in fig.axes
        for handle, label in zip(*ax.get_legend_handles_labels())
    }

    fig.legend(
        labels_handles.values(),
        labels_handles.keys(),
        loc='center right',
        title='$\\bf{Algorithm}$',
        fancybox=True,
        shadow=True,
    )

    # Fix dimensions.
    fig.subplots_adjust(left=0.15, bottom=0.15, right=0.7)

    # Save to file.
    fig.savefig(ERROR_ANALYSIS_DIR + savefile,
                dpi=fig.dpi,
                bbox_inches='tight')
def plot_worst(algorithms, title, suffix, savefile, num_samples=5):

    for algorithm in algorithms:

        # Each algorithm gets its own figure.
        fig, axs = plt.subplots(ncols=num_samples, figsize=(25, 5))

        # Load crossings with scores.
        worst_detections_file = ERROR_ANALYSIS_DIR + algorithm + suffix
        worst_detections = np.load(worst_detections_file)[:num_samples]

        # Parse each column.
        worst_detections_scores = np.array(worst_detections[:, 0], dtype=float)
        worst_detections_times = worst_detections[:, 1]

        # Plot each of the worst crossings.
        for detection_time, detection_score, ax in zip(
                worst_detections_times, worst_detections_scores, axs):

            # Base ELS file name (without the .DAT extension) for this detection.
            els_basename = get_ELS_file_name(detection_time,
                                             remove_extension=True)

            # The time of the detection as a datetime object, and the size of the window used for plotting.
            detection_time_dt = convert_to_dt(detection_time)
            time_diff = timedelta(minutes=TIME_TOLERANCE // 2)

            # Plot ELS data first.
            els_data_file = DATA_DIR + els_basename + '.DAT'
            plot_interpolated_ELS_data(fig,
                                       ax,
                                       els_data_file,
                                       start_time=detection_time_dt -
                                       time_diff,
                                       end_time=detection_time_dt + time_diff,
                                       colorbar_orientation='horizontal',
                                       quantity='anode5',
                                       blur_sigma=BLUR_SIGMA,
                                       bin_selection=BIN_SELECTION,
                                       filter=FILTER,
                                       filter_size=FILTER_SIZE)

            # Obtain the list of events occurring in this file.
            labels = list_of_events(els_basename, CROSSINGS_DIR)

            # How large is the width of the rectangle around each labelled event?
            days_per_minute = 1 / (24 * 60)
            window_size = 1 * days_per_minute

            # Annotate plot with labelled events.
            for label_type, crossing_timestring in labels:
                if detection_time_dt - time_diff <= convert_to_dt(
                        crossing_timestring) <= detection_time_dt + time_diff:
                    crossing_time = datestring_to_float(crossing_timestring)
                    ax.axvspan(crossing_time - window_size / 2,
                               crossing_time + window_size / 2,
                               facecolor=crossing_color(label_type),
                               alpha=1)

            # Set title as the score.
            ax.set_title('Score %0.2f' % detection_score, pad=55)

        # Set title.
        fig.suptitle(title % (LABELS_SUBDIR, algorithm),
                     x=0.45,
                     y=0.92,
                     fontweight='bold')

        # Fix dimensions.
        fig.subplots_adjust(left=0.15,
                            bottom=0.15,
                            right=0.7,
                            top=0.65,
                            wspace=0.4)

        # Save to file.
        fig.savefig(ERROR_ANALYSIS_DIR + savefile % algorithm,
                    dpi=fig.dpi,
                    bbox_inches='tight')
    fig, axs = plt.subplots(nrows=len(algorithm_files) + 1, figsize=(10, 40), sharex=True)

    # Plot ELS data.
    plot_interpolated_ELS_data(fig, axs[0], els_data_file, 'anode5', colorbar_orientation='horizontal', blur_sigma=BLUR_SIGMA, bin_selection=BIN_SELECTION, filter=FILTER, filter_size=FILTER_SIZE)

    # Load labels.
    with open(els_labels_file, 'r') as labels_file_object:
        labels = yaml.safe_load(labels_file_object)
        crossings = labels['change_points']

    # Mark each crossing in the first plot with a window of the appropriate color.
    color = crossing_color(LABELS_SUBDIR)
    days_per_minute = 1/(24 * 60)
    window_size = 5*days_per_minute
    for crossing in crossings:
        crossing_time = datestring_to_float(crossing)
        axs[0].axvspan(crossing_time - window_size/2, crossing_time + window_size/2, facecolor=color, alpha=1)

    # Fill the remaining subplots with scores from each algorithm.
    plotter = StatsPlotter()
    for index, algorithm_file in enumerate(algorithm_files, start=1):
        with h5py.File(els_dir_full_path + algorithm_file, 'r') as filedata:
            scores = filedata['scores'][()]
            times = filedata['times'][()]
        plotter.plot_scores(fig, axs[index], times, scores)
        axs[index].set_xlabel(algorithm_file)

    # Create plots directory, if it doesn't exist.
    if not os.path.exists(RANDOM_PLOTS_DIR):
        Path(RANDOM_PLOTS_DIR).mkdir(parents=True, exist_ok=True)