예제 #1
0
def hcc_time_plot(Ts, Ds, start, end, datastart):
    '''
    Plot time series of Temperature and Dewpoints 
    showing which points have been flagged from the tests

    :param array Ts: Temperatures
    :param array Ds: Dewpoints
    :param int start: start of flag period
    :param int end: end of flag period
    :param datetime datastart: start of data set

    :returns:
    '''

    extra = 48

    times = utils.times_hours_to_datetime(np.arange(start - extra, end + extra, 1), datastart)

    import matplotlib.pyplot as plt
    plt.clf()

    plt.plot(times, Ts[start - extra: end + extra], 'ko', ls = '-', label = "Temperature")
    plt.plot(times, Ds[start - extra: end + extra], 'bs', ls = '-', label = "Dewpoints")

    plt.plot(times[extra:-extra], Ts[start: end], 'ro')
    plt.plot(times[extra:-extra], Ds[start: end], 'rs')
    plt.ylabel("(Dewpoint) Temperature (C)")
    plt.legend(loc='lower center', ncol=2, frameon=False,prop={'size':13})
    plt.show()

    return # hcc_time_plot
예제 #2
0
파일: humidity.py 프로젝트: rjhd2/HadISD_v2
def hcc_time_plot(Ts, Ds, start, end, datastart):
    """
    Plot time series of Temperature and Dewpoints 
    showing which points have been flagged from the tests

    :param array Ts: Temperatures
    :param array Ds: Dewpoints
    :param int start: start of flag period
    :param int end: end of flag period
    :param datetime datastart: start of data set

    :returns:
    """

    extra = 48

    times = utils.times_hours_to_datetime(np.arange(start - extra, end + extra, 1), datastart)

    import matplotlib.pyplot as plt

    plt.clf()

    plt.plot(times, Ts[start - extra : end + extra], "ko", ls="-", label="Temperature")
    plt.plot(times, Ds[start - extra : end + extra], "bs", ls="-", label="Dewpoints")

    plt.plot(times[extra:-extra], Ts[start:end], "ro")
    plt.plot(times[extra:-extra], Ds[start:end], "rs")
    plt.ylabel("(Dewpoint) Temperature (C)")
    plt.legend(loc="lower center", ncol=2, frameon=False, prop={"size": 13})
    plt.show()

    return  # hcc_time_plot
예제 #3
0
파일: spike.py 프로젝트: rjhd2/HadISD_v2
def sc_diagnostics_and_plots(times, indata, start, end, datastart, title, plots = True):
    '''
    Plot each set of values highlighted by Spike Check

    :param array times: array of times (hours since)
    :param array indata: data to plot
    :param int start: start of flagging period
    :param int end : end of flagging period
    :param datetime datastart: start of dataset
    :param string title: title of plot
    :param bool plots: do the plot

    :returns:
    '''

    YLABELS = {"temperatures":"Temperature (C)", "dewpoints":"Dewpoints (C)", "slp":"SLP (hPa)", "windspeeds":"Wind Speed (m/s)"}

    extra = 48

    plot_times = utils.times_hours_to_datetime(times[start-extra:end+extra], datastart)

    print "Spike at %s" % dt.datetime.strftime(plot_times[extra], "%Y %m %d %H:%M")

    if plots:

        import matplotlib.pyplot as plt
        plt.clf()
        plt.plot(plot_times, indata[start-extra:end+extra], 'bo', ls='-')
        plt.plot(plot_times[extra:-extra], indata[start:end], 'ro', markersize=10)
        plt.title("Spike "+title.capitalize())
        plt.ylabel(YLABELS[title])
        plt.show()

    return # sc_plots
예제 #4
0
def oc_plots(station, cluster, time, start, indata, variable):
    '''
    Plot each odd cluster highlighted against surrounding data

    :param MetVar station: station object
    :param OddCluster cluster: cluster object
    :param int time: timestamp
    :param datetime start: start of dataseries
    :param masked array indata: input data
    :param string variable: variable name
    
    :returns:
    '''


    import matplotlib.pyplot as plt
    YLABELS = {"temperatures":"Temperature (C)", "dewpoints":"Dewpoints (C)", "slp":"SLP (hPa)", "windspeeds":"Wind Speed (m/s)"}
    
    plot_start, plot_end = cluster.locations[0] - 10*24 , time + 10*24
    if plot_start < 0 : plot_start = 0
    
    plot_times = utils.times_hours_to_datetime(station.time.data[plot_start: plot_end], start)
    
    plt.clf()
    plt.plot(plot_times, indata[plot_start: plot_end], 'bo')
    plt.plot(plot_times[np.array(oc_details.locations) - plot_start], indata[oc_details.locations], 'ro')

    plt.ylim(utils.sort_ts_ylim(filtered_data[plot_start: plot_end]))
    plt.ylabel(YLABELS[variable])
    plt.show()

    return # oc_plots
예제 #5
0
def sc_diagnostics_and_plots(times,
                             indata,
                             start,
                             end,
                             datastart,
                             title,
                             plots=True):
    '''
    Plot each set of values highlighted by Spike Check

    :param array times: array of times (hours since)
    :param array indata: data to plot
    :param int start: start of flagging period
    :param int end : end of flagging period
    :param datetime datastart: start of dataset
    :param string title: title of plot
    :param bool plots: do the plot

    :returns:
    '''

    YLABELS = {
        "temperatures": "Temperature (C)",
        "dewpoints": "Dewpoints (C)",
        "slp": "SLP (hPa)",
        "windspeeds": "Wind Speed (m/s)"
    }

    extra = 48

    plot_times = utils.times_hours_to_datetime(
        times[start - extra:end + extra], datastart)

    print "Spike at %s" % dt.datetime.strftime(plot_times[extra],
                                               "%Y %m %d %H:%M")

    if plots:

        import matplotlib.pyplot as plt
        plt.clf()
        plt.plot(plot_times, indata[start - extra:end + extra], 'bo', ls='-')
        plt.plot(plot_times[extra:-extra],
                 indata[start:end],
                 'ro',
                 markersize=10)
        plt.title("Spike " + title.capitalize())
        plt.ylabel(YLABELS[title])
        plt.show()

    return  # sc_plots
예제 #6
0
def plot_outlier(station, variable, outlier_locs, all_data, datastart):
    '''
    Plot the outlier location (either to flag or unflag) with the target and all neighbours

    :param MetVar station: station object
    :param str variable: variable to process
    :param array outlier_locs: locations which are outliers to plot
    :param array all_data: all data from neighbours for plotting
    :param datetime datastart: start of dataset

    :returns: None
    '''

    import matplotlib.pyplot as plt
    import datetime as dt

    YLABELS = {
        "temperatures": "Temperature (C)",
        "dewpoints": "Dewpoints (C)",
        "slp": "SLP (hPa)",
        "windspeeds": "Wind Speed (m/s)"
    }
    extra = 48  # hours

    indata = getattr(station, variable).data

    for location in outlier_locs:

        plot_times = utils.times_hours_to_datetime(
            station.time.data[location - extra:location + extra], datastart)

        plt.clf()
        plt.plot(plot_times,
                 indata[location - extra:location + extra],
                 'bo',
                 ls='-')
        plt.plot(plot_times[extra], indata[location], 'ro', markersize=10)

        for nn in range(all_data.shape[0]):
            plt.plot(plot_times,
                     all_data[nn, location - extra:location + extra],
                     c='0.5',
                     ls='-')

        plt.ylabel(YLABELS[variable])
        plt.title("{:s} {:s}".format(
            station.id, dt.datetime.strftime(plot_times[extra], "%d/%m/%Y")))
        plt.show()

    return  # plot_outlier
예제 #7
0
def rsc_diagnostics_and_plot(time, data, flags, title, start, plots = False):
    ''' plots time series of data with flagged streaks highlighted
    
    :param array time: time stamps in hours since
    :param array data: data to be plotted
    :param list flags: locations of obs to be flagged
    :param string title: title of plot (parameter)
    :param datetime start: dataset start date
    :param bool plots: do the plot
    '''


    YLABELS = {"temperatures":"Temperature (C)", "dewpoints":"Dewpoints (C)", "slp":"SLP (hPa)", "windspeeds":"Wind Speed (m/s)", "winddirs":"Degrees"}

    # get period to plot and convert times
    extra = 48
    min_t = flags[0] - extra
    max_t = flags[-1] + extra

    if min_t < 0: min_t = 0

    time = utils.times_hours_to_datetime(time[min_t:max_t], start)

    print "Streak at %s, %i observations" % (dt.datetime.strftime(time[extra], "%Y %m %d %H:%M"), len(flags))


    if plots:
        import matplotlib.pyplot as plt
        plt.clf()
        plt.plot(time, data[min_t:max_t], 'bo', ls = '-')
        
        flag_time = np.array(flags) - min_t
        plt.plot(time[flag_time], data[flags], 'ro', markersize = 10)
        plt.title(title.capitalize())
        plt.ylabel(YLABELS[title])
        
        plt.show()

    return # rsc_plots
예제 #8
0
파일: streaks.py 프로젝트: rjhd2/HadISD_v2
def rsc_diagnostics_and_plot(time, data, flags, title, start, plots = False):
    ''' plots time series of data with flagged streaks highlighted
    
    :param array time: time stamps in hours since
    :param array data: data to be plotted
    :param list flags: locations of obs to be flagged
    :param string title: title of plot (parameter)
    :param datetime start: dataset start date
    :param bool plots: do the plot
    '''


    YLABELS = {"temperatures":"Temperature (C)", "dewpoints":"Dewpoints (C)", "slp":"SLP (hPa)", "windspeeds":"Wind Speed (m/s)"}

    # get period to plot and convert times
    extra = 48
    min_t = flags[0] - extra
    max_t = flags[-1] + extra

    if min_t < 0: min_t = 0

    time = utils.times_hours_to_datetime(time[min_t:max_t], start)

    print "Streak at %s, %i observations" % (dt.datetime.strftime(time[extra], "%Y %m %d %H:%M"), len(flags))


    if plots:
        import matplotlib.pyplot as plt
        plt.clf()
        plt.plot(time, data[min_t:max_t], 'bo', ls = '-')
        
        flag_time = np.array(flags) - min_t
        plt.plot(time[flag_time], data[flags], 'ro', markersize = 10)
        plt.title(title.capitalize())
        plt.ylabel(YLABELS[title])
        
        plt.show()

    return # rsc_plots
예제 #9
0
def plot_outlier(station, variable, outlier_locs, all_data, datastart):
    '''
    Plot the outlier location (either to flag or unflag) with the target and all neighbours

    :param MetVar station: station object
    :param str variable: variable to process
    :param array outlier_locs: locations which are outliers to plot
    :param array all_data: all data from neighbours for plotting
    :param datetime datastart: start of dataset

    :returns: None
    '''

    import matplotlib.pyplot as plt
    import datetime as dt
    
    YLABELS = {"temperatures":"Temperature (C)", "dewpoints":"Dewpoints (C)", "slp":"SLP (hPa)", "windspeeds":"Wind Speed (m/s)"}
    extra = 48 # hours

    indata = getattr(station, variable).data
    
    for location in outlier_locs:
        
        plot_times = utils.times_hours_to_datetime(station.time.data[location-extra: location+extra], datastart)
        
        plt.clf()
        plt.plot(plot_times, indata[location-extra:location+extra], 'bo', ls='-')
        plt.plot(plot_times[extra], indata[location], 'ro', markersize=10)
        
        for nn in range(all_data.shape[0]):
            plt.plot(plot_times,all_data[nn,location-extra:location+extra] , c='0.5', ls='-')
            
        plt.ylabel(YLABELS[variable])
        plt.title("{:s} {:s}".format(station.id,dt.datetime.strftime(plot_times[extra], "%d/%m/%Y")))
        plt.show()
                    
    return # plot_outlier
예제 #10
0
def cu_plots(times, indata, start, end, datastart, title, extra_text=""):
    '''
    Plot each set of values highlighted by Clean Up Check

    :param array times: array of times (hours since)
    :param array indata: data to plot
    :param int start: start of flagging period
    :param int end : end of flagging period
    :param datetime datastart: start of dataset
    :param string title: title of plot
    :param string extra_text: more text for title
    :returns:
    '''

    YLABELS = {
        "temperatures": "Temperature (C)",
        "dewpoints": "Dewpoints (C)",
        "slp": "SLP (hPa)",
        "windspeeds": "Wind Speed (m/s)"
    }

    extra = 480

    plot_times = utils.times_hours_to_datetime(
        times[start - extra:end + extra], datastart)

    import matplotlib.pyplot as plt
    plt.clf()
    plt.plot(plot_times, indata[start - extra:end + extra], 'bo', ls='-')
    plt.plot(plot_times[extra:-extra], indata[start:end], 'ro', markersize=10)
    plt.xlim([plot_times[0], plot_times[-1]])
    plt.title("Clean Up - " + title.capitalize() + " - " + extra_text)
    plt.ylabel(YLABELS[title])
    plt.show()

    return  # sc_plots
예제 #11
0
파일: clean_up.py 프로젝트: rjhd2/HadISD_v2
def cu_plots(times, indata, start, end, datastart, title, extra_text=""):
    """
    Plot each set of values highlighted by Clean Up Check

    :param array times: array of times (hours since)
    :param array indata: data to plot
    :param int start: start of flagging period
    :param int end : end of flagging period
    :param datetime datastart: start of dataset
    :param string title: title of plot
    :param string extra_text: more text for title
    :returns:
    """

    YLABELS = {
        "temperatures": "Temperature (C)",
        "dewpoints": "Dewpoints (C)",
        "slp": "SLP (hPa)",
        "windspeeds": "Wind Speed (m/s)",
    }

    extra = 480

    plot_times = utils.times_hours_to_datetime(times[start - extra : end + extra], datastart)

    import matplotlib.pyplot as plt

    plt.clf()
    plt.plot(plot_times, indata[start - extra : end + extra], "bo", ls="-")
    plt.plot(plot_times[extra:-extra], indata[start:end], "ro", markersize=10)
    plt.xlim([plot_times[0], plot_times[-1]])
    plt.title("Clean Up - " + title.capitalize() + " - " + extra_text)
    plt.ylabel(YLABELS[title])
    plt.show()

    return  # sc_plots
예제 #12
0
def sc(station,
       variable_list,
       flag_col,
       start,
       end,
       logfile,
       diagnostics=False,
       plots=False,
       doMonth=False):
    '''
    Spike Check, looks for spikes up to 3 observations long, using thresholds
    calculated from the data itself.

    :param MetVar station: the station object
    :param list variable_list: list of observational variables to process
    :param list flag_col: the columns to set on the QC flag array
    :param datetime start: dataset start time
    :param datetime end: dataset end time
    :param file logfile: logfile to store outputs
    :param bool plots: do plots
    :param bool doMonth: account for incomplete months

    :returns:    
    '''
    print "refactor"

    for v, variable in enumerate(variable_list):

        flags = station.qc_flags[:, flag_col[v]]

        st_var = getattr(station, variable)

        # if incomplete year, mask all obs for the incomplete bit
        all_filtered = utils.apply_filter_flags(st_var,
                                                doMonth=doMonth,
                                                start=start,
                                                end=end)

        reporting_resolution = utils.reporting_accuracy(
            utils.apply_filter_flags(st_var))
        # to match IDL system - should never be called as would mean no data
        if reporting_resolution == -1:
            reporting_resolution = 1

        month_ranges = utils.month_starts_in_pairs(start, end)
        month_ranges = month_ranges.reshape(-1, 12, 2)

        good, = np.where(all_filtered.mask == False)

        full_time_diffs = np.ma.zeros(len(all_filtered), dtype=int)
        full_time_diffs.mask = copy.deepcopy(all_filtered.mask[:])
        full_time_diffs[good[:-1]] = station.time.data[
            good[1:]] - station.time.data[good[:-1]]

        # develop critical values using clean values
        # NOTE 4/7/14 - make sure that Missing and Flagged values treated appropriately
        print "sort the differencing if values were flagged rather than missing"

        full_filtered_diffs = np.ma.zeros(len(all_filtered))
        full_filtered_diffs.mask = copy.deepcopy(all_filtered.mask[:])
        full_filtered_diffs[good[:-1]] = all_filtered.compressed(
        )[1:] - all_filtered.compressed()[:-1]

        # test all values
        good_to_uncompress, = np.where(st_var.data.mask == False)
        full_value_diffs = np.ma.zeros(len(st_var.data))
        full_value_diffs.mask = copy.deepcopy(st_var.data.mask[:])
        full_value_diffs[good_to_uncompress[:-1]] = st_var.data.compressed(
        )[1:] - st_var.data.compressed()[:-1]

        # convert to compressed time to match IDL
        value_diffs = full_value_diffs.compressed()
        time_diffs = full_time_diffs.compressed()
        filtered_diffs = full_filtered_diffs.compressed()
        flags = flags[good_to_uncompress]

        critical_values = np.zeros([9, 12])
        critical_values.fill(st_var.mdi)

        # link observation to calendar month
        month_locs = np.zeros(full_time_diffs.shape, dtype=int)

        for month in range(12):
            for year in range(month_ranges.shape[0]):

                if year == 0:
                    this_month_time_diff = full_time_diffs[month_ranges[
                        year, month, 0]:month_ranges[year, month, 1]]
                    this_month_filtered_diff = full_filtered_diffs[
                        month_ranges[year, month, 0]:month_ranges[year, month,
                                                                  1]]
                else:
                    this_month_time_diff = np.ma.concatenate([
                        this_month_time_diff,
                        full_time_diffs[month_ranges[year, month,
                                                     0]:month_ranges[year,
                                                                     month, 1]]
                    ])
                    this_month_filtered_diff = np.ma.concatenate([
                        this_month_filtered_diff,
                        full_filtered_diffs[month_ranges[year, month,
                                                         0]:month_ranges[year,
                                                                         month,
                                                                         1]]
                    ])

                month_locs[month_ranges[year, month,
                                        0]:month_ranges[year, month,
                                                        1]] = month

            for delta in range(1, 9):

                locs = np.ma.where(this_month_time_diff == delta)

                if len(locs[0]) >= 100:

                    iqr = utils.IQR(this_month_filtered_diff[locs])

                    if iqr == 0. and delta == 1:
                        critical_values[delta - 1, month] = 6.
                    elif iqr == 0:
                        critical_values[delta - 1, month] = st_var.mdi
                    else:
                        critical_values[delta - 1, month] = 6. * iqr

                    # January 2015 - changed to dynamically calculating the thresholds if less than IQR method ^RJHD

                    if plots:
                        import calendar
                        title = "{}, {}-hr differences".format(
                            calendar.month_name[month + 1], delta)
                        line_label = st_var.name
                        xlabel = "First Difference Magnitudes"
                    else:
                        title, line_label, xlabel = "", "", ""

                    threshold = utils.get_critical_values(
                        this_month_filtered_diff[locs],
                        binmin=0,
                        binwidth=0.5,
                        plots=plots,
                        diagnostics=diagnostics,
                        title=title,
                        line_label=line_label,
                        xlabel=xlabel,
                        old_threshold=critical_values[delta - 1, month])

                    if threshold < critical_values[delta - 1, month]:
                        critical_values[delta - 1, month] = threshold

                    if plots or diagnostics:

                        print critical_values[delta - 1, month], iqr, 6 * iqr

        month_locs = month_locs[good_to_uncompress]
        if diagnostics:
            print critical_values[0, :]

        # not less than 5x reporting accuracy
        good_critical_values = np.where(critical_values != st_var.mdi)
        low_critical_values = np.where(
            critical_values[good_critical_values] <= 5. * reporting_resolution)
        temporary = critical_values[good_critical_values]
        temporary[low_critical_values] = 5. * reporting_resolution
        critical_values[good_critical_values] = temporary

        if diagnostics:
            print critical_values[0, :], 5. * reporting_resolution

        # check hourly against 2 hourly, if <2/3 the increase to avoid crazy rejection rate
        for month in range(12):
            if critical_values[0, month] != st_var.mdi and critical_values[
                    1, month] != st_var.mdi:
                if critical_values[0, month] / critical_values[1,
                                                               month] <= 0.66:
                    critical_values[0,
                                    month] = 0.66 * critical_values[1, month]

        if diagnostics:
            print "critical values"
            print critical_values[0, :]

        # get time differences for unfiltered data

        full_time_diffs = np.ma.zeros(len(st_var.data), dtype=int)
        full_time_diffs.mask = copy.deepcopy(st_var.data.mask[:])
        full_time_diffs[good_to_uncompress[:-1]] = station.time.data[
            good_to_uncompress[1:]] - station.time.data[
                good_to_uncompress[:-1]]
        time_diffs = full_time_diffs.compressed()

        # go through each difference, identify which month it is in if passes spike thresholds

        # spikes at the beginning or ends of sections
        for t in np.arange(len(time_diffs)):
            if (np.abs(time_diffs[t - 1]) > 240) and (np.abs(time_diffs[t]) <
                                                      3):
                # 10 days before but short gap thereafter

                next_values = st_var.data[good_to_uncompress[t + 1:]]
                good, = np.where(next_values.mask == False)

                next_median = np.ma.median(next_values[good[:10]])

                next_diff = np.abs(value_diffs[t])  # out of spike
                median_diff = np.abs(next_median -
                                     st_var.data[good_to_uncompress[t]]
                                     )  # are the remaining onees

                if (critical_values[time_diffs[t] - 1, month_locs[t]] !=
                        st_var.mdi):

                    # jump from spike > critical but average after < critical / 2
                    if (np.abs(median_diff) < critical_values[time_diffs[t] - 1, month_locs[t]] / 2.) and\
                        (np.abs(next_diff) > critical_values[time_diffs[t] - 1, month_locs[t]]) :

                        flags[t] = 1
                        if plots or diagnostics:
                            sc_diagnostics_and_plots(station.time.data,
                                                     st_var.data,
                                                     good_to_uncompress[t],
                                                     good_to_uncompress[t + 1],
                                                     start,
                                                     variable,
                                                     plots=plots)

            elif (np.abs(time_diffs[t - 1]) < 3) and (np.abs(time_diffs[t]) >
                                                      240):
                # 10 days after but short gap before

                prev_values = st_var.data[good_to_uncompress[:t - 1]]
                good, = np.where(prev_values.mask == False)

                prev_median = np.ma.median(prev_values[good[-10:]])

                prev_diff = np.abs(value_diffs[t - 1])
                median_diff = np.abs(prev_median -
                                     st_var.data[good_to_uncompress[t]])

                if (critical_values[time_diffs[t - 1] - 1, month_locs[t]] !=
                        st_var.mdi):

                    # jump into spike > critical but average before < critical / 2
                    if (np.abs(median_diff) < critical_values[time_diffs[t - 1] - 1, month_locs[t]] / 2.) and\
                        (np.abs(prev_diff) > critical_values[time_diffs[t - 1] - 1, month_locs[t]]) :

                        flags[t] = 1
                        if plots or diagnostics:
                            sc_diagnostics_and_plots(station.time.data,
                                                     st_var.data,
                                                     good_to_uncompress[t],
                                                     good_to_uncompress[t + 1],
                                                     start,
                                                     variable,
                                                     plots=plots)
        ''' this isn't the nicest way, but a direct copy from IDL
            masked arrays might help remove some of the lines

            Also, this is relatively slow'''

        for t in np.arange(len(time_diffs)):
            for spk_len in [1, 2, 3]:
                if t >= spk_len and t < len(time_diffs) - spk_len:

                    # check if time differences are appropriate, for multi-point spikes
                    if (np.abs(time_diffs[t - spk_len]) <= spk_len * 3) and\
                    (np.abs(time_diffs[t]) <= spk_len * 3) and\
                    (time_diffs[t - spk_len - 1] - 1 < spk_len * 3) and\
                    (time_diffs[t + 1] - 1 < spk_len * 3) and \
                    ((spk_len == 1) or \
                    ((spk_len == 2) and (np.abs(time_diffs[t - spk_len + 1]) <= spk_len * 3)) or \
                    ((spk_len == 3) and (np.abs(time_diffs[t - spk_len + 1]) <= spk_len * 3) and (np.abs(time_diffs[t - spk_len + 2]) <= spk_len * 3))):

                        # check if differences are valid
                        if (value_diffs[t - spk_len] != st_var.mdi) and \
                        (value_diffs[t - spk_len] != st_var.fdi) and \
                        (critical_values[time_diffs[t - spk_len] - 1, month_locs[t]] != st_var.mdi):

                            # if exceed critical values
                            if (np.abs(value_diffs[t - spk_len]) >=
                                    critical_values[time_diffs[t - spk_len] -
                                                    1, month_locs[t]]):

                                # are signs of two differences different
                                if (math.copysign(1, value_diffs[t])
                                        != math.copysign(
                                            1, value_diffs[t - spk_len])):

                                    # are within spike differences small
                                    if (spk_len == 1) or\
                                    ((spk_len == 2) and (np.abs(value_diffs[t - spk_len + 1]) < critical_values[time_diffs[t - spk_len + 1] -1, month_locs[t]] / 2.)) or \
                                    ((spk_len == 3) and (np.abs(value_diffs[t - spk_len + 1]) < critical_values[time_diffs[t - spk_len + 1] -1, month_locs[t]] / 2.) and\
                                      (np.abs(value_diffs[t - spk_len + 2]) < critical_values[time_diffs[t - spk_len + 2] -1, month_locs[t]] / 2.)):

                                        # check if following value is valid
                                        if (value_diffs[t] != st_var.mdi) and (critical_values[time_diffs[t] - 1, month_locs[t]] != st_var.mdi) and\
                                            (value_diffs[t] != st_var.fdi):

                                            # and if at least critical value
                                            if (np.abs(value_diffs[t]) >=
                                                    critical_values[
                                                        time_diffs[t] - 1,
                                                        month_locs[t]]):

                                                # test if surrounding differences below 1/2 critical value
                                                if (np.abs(
                                                        value_diffs[t - spk_len
                                                                    - 1]
                                                ) <= critical_values[
                                                        time_diffs[t -
                                                                   spk_len -
                                                                   1] - 1,
                                                        month_locs[t]] / 2.):
                                                    if (np.abs(
                                                            value_diffs[t + 1]
                                                    ) <= critical_values[
                                                            time_diffs[t + 1] -
                                                            1, month_locs[t]] /
                                                            2.):

                                                        # set the flags
                                                        flags[t - spk_len +
                                                              1:t + 1] = 1

                                                        if plots or diagnostics:

                                                            sc_diagnostics_and_plots(
                                                                station.time.
                                                                data,
                                                                st_var.data,
                                                                good_to_uncompress[
                                                                    t -
                                                                    spk_len +
                                                                    1],
                                                                good_to_uncompress[
                                                                    t + 1],
                                                                start,
                                                                variable,
                                                                plots=plots)

        station.qc_flags[good_to_uncompress, flag_col[v]] = flags

        flag_locs, = np.where(station.qc_flags[:, flag_col[v]] != 0)

        utils.print_flagged_obs_number(logfile,
                                       "Spike",
                                       variable,
                                       len(flag_locs),
                                       noWrite=diagnostics)  # additional flags

        # copy flags into attribute
        st_var.flags[flag_locs] = 1

        # matches 030660 - but with adapted IDL
        # matches 030220 OK, but finds more but all are reasonable 1/9/14

        do_interactive = False
        if plots and do_interactive == True:
            import matplotlib.pyplot as plt

            plot_times = utils.times_hours_to_datetime(station.time.data,
                                                       start)

            plt.clf()
            plt.plot(plot_times, all_filtered, 'bo', ls='-')
            flg = np.where(flags[:, flag_col[v]] == 1)
            plt.plot(plot_times[flg], all_filtered[flg], 'ro', markersize=10)
            plt.show()

    station = utils.append_history(station, "Spike Check")

    return  # sc
예제 #13
0
# nyears x 12 months
month_start_locs = np.array(utils.month_starts(DATASTART, DATAEND)).reshape(-1,12)

# which years
years = DATASTART.year + np.arange(month_start_locs.shape[0])

for year in range(DATASTART.year, DATAEND.year):

    year_loc, = np.where(years == year)

    if year != DATAEND.year - 1:
        plot_range = (month_start_locs[year_loc,0], month_start_locs[year_loc+1,0])
    else:
        plot_range = (month_start_locs[year_loc,0], -1) # misses last hour

    plot_times = utils.times_hours_to_datetime(station.time.data[plot_range[0]:plot_range[1]], DATASTART)

    plot_qc_flags = station.qc_flags[plot_range[0]:plot_range[1],:]

    plt.figure(figsize=(12, 12), dpi=100)
    plt.clf()

    MakePlot = False
    for v,var in enumerate(process_vars):

        ax = plt.subplot(5, 1, v+1)

        plot_var = getattr(station, var)
        plot_data = plot_var.data[plot_range[0]:plot_range[1]]

        if len(plot_data.compressed()) > 0:
예제 #14
0
# which years
years = DATASTART.year + np.arange(month_start_locs.shape[0])

# find which year and test to plot
year_loc, = np.where(years == year)
test_loc, = np.where(qc_test == test)[0]

# and get the plot range
if year != DATAEND.year - 1:
    plot_range = (month_start_locs[year_loc, 0], month_start_locs[year_loc + 1,
                                                                  0])
else:
    plot_range = (month_start_locs[year_loc, 0], -1)  # misses last hour

# convert to useful numbers
plot_times = utils.times_hours_to_datetime(
    station.time.data[plot_range[0]:plot_range[1]], DATASTART)

# get all the QC flags
plot_qc_flags = station.qc_flags[plot_range[0]:plot_range[1], :]

plot_var = getattr(station, variable)
plot_data = plot_var.data[plot_range[0]:plot_range[1]]

plot_test_loc, = np.where(plot_qc_flags[:, test_loc] != 0)
plot_all_test_loc, = np.where(np.sum(plot_qc_flags[:, :], axis=1) != 0)

plt.clf()

plt.scatter(plot_times, plot_data, c='k', marker='o', s=ms, edgecolor='k')
plt.scatter(plot_times[plot_all_test_loc],
            plot_data[plot_all_test_loc],
예제 #15
0
def evc(station, variable_list, flag_col, start, end, logfile, diagnostics = False, plots = False, idl = False):
    
    if plots or diagnostics:
        import matplotlib.pyplot as plt
        import calendar

    
    # very similar to climatological check - ensure that not duplicating
    
    for v, variable in enumerate(variable_list):
    
        st_var = getattr(station, variable)
    
        reporting_resolution = utils.reporting_accuracy(utils.apply_filter_flags(st_var))
        reporting_freq = utils.reporting_frequency(utils.apply_filter_flags(st_var))
   
        month_ranges = utils.month_starts_in_pairs(start, end)
        month_ranges = month_ranges.reshape(-1,12,2)

        month_data_count = np.zeros(month_ranges.shape[0:2])

        # for each month
        for month in range(12):

            # set up hourly climatologies
            hourly_clims = np.zeros(24)
            hourly_clims.fill(st_var.data.fill_value)

            this_month, year_ids, month_data_count[:,month] = utils.concatenate_months(month_ranges[:,month,:], st_var.data, hours = True)

            
            # # extract each year and append together
            # year_ids = [] # counter to determine which year each day corresponds to
            # for year in range(month_ranges.shape[0]):
                
            #     this_year = st_var.data[month_ranges[year,month][0]:month_ranges[year,month][1]]
            #     if year == 0:
            #         # store so can access each hour of day separately
            #         this_month = this_year.reshape(-1,24)
                    
            #         year_ids = [year for x in range(this_month.shape[0])]
                    
            #         month_data_count[year,month] = len(this_year.compressed())
                    
            #     else:
            #         this_year = this_year.reshape(-1,24)
                       
            #         this_month = np.ma.concatenate((this_month, this_year), axis = 0)
                    
            #         year_ids.extend([year for x in range(this_year.shape[0])])
                    
            #         month_data_count[year,month] = len(this_year.compressed())

                
                  
            # winsorize and get hourly climatology 
            for h in range(24):
                
                this_hour = this_month[:,h]
                
                if len(this_hour.compressed()) > 100:

                    
                    # winsorize & climatologies - done to match IDL
                    if idl:
                        this_hour_winsorized = utils.winsorize(np.append(this_hour.compressed(), -999999), 0.05, idl = idl)
                        hourly_clims[h] = np.ma.sum(this_hour_winsorized)/(len(this_hour_winsorized) - 1)
                        
                    else:
                        this_hour_winsorized = utils.winsorize(this_hour.compressed(), 0.05, idl = idl)
                        hourly_clims[h] = np.ma.mean(this_hour_winsorized)
                    
            
            hourly_clims = np.ma.masked_where(hourly_clims == st_var.data.fill_value, hourly_clims)           
            anomalies = this_month - np.tile(hourly_clims, (this_month.shape[0], 1))
            
            # extract IQR of anomalies (using 1/2 value to match IDL)
            if len(anomalies.compressed()) >= 10:
                
                iqr = utils.IQR(anomalies.compressed().reshape(-1)) / 2. # to match IDL
                if iqr < 1.5: iqr = 1.5

            else:
                iqr = st_var.mdi
            
            normed_anomalies = anomalies / iqr
            

            variances = np.ma.zeros(month_ranges.shape[0])
            variances.mask = [False for i in range(month_ranges.shape[0])]
            rep_accuracies = np.zeros(month_ranges.shape[0])
            rep_freqs = np.zeros(month_ranges.shape[0])
            
            variances.fill(st_var.mdi)
            rep_accuracies.fill(st_var.mdi)
            rep_freqs.fill(st_var.mdi)
                
            year_ids = np.array(year_ids)
            
            # extract variance of normalised anomalies for each year
            for y, year in enumerate(range(month_ranges.shape[0])):
            
                year_locs = np.where(year_ids == y)
            
                this_year = normed_anomalies[year_locs,:]
                this_year = this_year.reshape(-1)
                
            # end of similarity with Climatological check
            
                if len(this_year.compressed()) >= 30:
            
                    variances[y] = utils.mean_absolute_deviation(this_year, median = True)
                    
                    rep_accuracies[y] = utils.reporting_accuracy(this_year)
                    rep_freqs[y] = utils.reporting_frequency(this_year)

                else:
                    variances.mask[y] = True

            good = np.where(month_data_count[:,month] >= 100)
            
            # get median and IQR of variance for all years for this month
            if len(good[0]) >= 10:
                
                median_variance = np.median(variances[good])
                
                iqr_variance = utils.IQR(variances[good]) / 2. # to match IDL
                
                if iqr_variance < 0.01: iqr_variance = 0.01
            else:
                
                median_variance = st_var.mdi
                iqr_variance = st_var.mdi

                
            # if SLP, then get median and MAD of SLP and windspeed for month
            if variable in ["slp", "windspeeds"]:
                
                winds = getattr(station, "windspeeds")
                slp = getattr(station, "slp")
        
                # refactor this as similar in style to how target data extracted  
                for y, year in enumerate(range(month_ranges.shape[0])):
                    
                    if y == 0:
                        winds_year = winds.data[month_ranges[year,month][0]:month_ranges[year,month][1]]
                        winds_month = winds_year.reshape(-1,24)
                                            
                        slp_year = slp.data[month_ranges[year,month][0]:month_ranges[year,month][1]]
                        slp_month = slp_year.reshape(-1,24)
                                            
                    else:
                        winds_year = winds.data[month_ranges[year,month][0]:month_ranges[year,month][1]]                  
                        winds_year = winds_year.reshape(-1,24)
                        winds_month = np.ma.concatenate((winds_month, winds_year), axis = 0)
                        
                        slp_year = slp.data[month_ranges[year,month][0]:month_ranges[year,month][1]]                  
                        slp_year =  slp_year.reshape(-1,24)
                        slp_month = np.ma.concatenate((slp_month, slp_year), axis = 0)
                        
                median_wind = np.ma.median(winds_month)
                median_slp  = np.ma.median(slp_month)
                
                wind_MAD = utils.mean_absolute_deviation(winds_month.compressed())
                slp_MAD = utils.mean_absolute_deviation(slp_month.compressed())
                
                if diagnostics:
                    print "median windspeed {} m/s, MAD = {}".format(median_wind, wind_MAD)
                    print "median slp {} hPa, MAD = {}".format(median_slp, slp_MAD)

            # now test to see if variance exceeds expected range
            for y, year in enumerate(range(month_ranges.shape[0])):


                if (variances[y] != st_var.mdi) and (iqr_variance != st_var.mdi) and \
                    (median_variance != st_var.mdi) and (month_data_count[y,month] >= DATA_COUNT_THRESHOLD):
                    
                    # if SLP, then need to test if deep low pressure ("hurricane/storm") present
                    #   as this will increase the variance for this month + year
                    if variable in ["slp", "windspeeds"]:
                        
                        iqr_threshold = 6.
                        
                        # increase threshold if reporting frequency and resolution of this
                        #   year doesn't match average
                        if (rep_accuracies[y] != reporting_resolution) and \
                            (rep_freqs[y] != reporting_freq):
                            iqr_threshold = 8.
                       
                        if diagnostics:
                            print np.abs(variances[y] - median_variance) / iqr_variance, variances[y] , median_variance , iqr_variance , iqr_threshold, month+1, year+start.year
                        
                        if np.abs((variances[y] - median_variance) / iqr_variance) > iqr_threshold:
                        
                            # check for storms     
                            winds_month = winds.data[month_ranges[year,month][0]:month_ranges[year,month][1]]                  
                            slp_month = slp.data[month_ranges[year,month][0]:month_ranges[year,month][1]]                  
                   
                            storm = False
                            if (len(winds_month.compressed()) >= 1) and (len(slp_month.compressed()) >= 1):
                                # find max wind & min SLP
                                # max_wind_loc = np.where(winds_month == np.max(winds_month))[0][0]
                                # min_slp_loc = np.where(slp_month == np.min(slp_month))[0][0]

                                # if these are above thresholds and within one day of each other,
                                #    then it likely was a storm
                                # print "fix this in case of multiple max/min locations"
                                # if (np.abs(max_wind_loc - min_slp_loc) <= 24) and \ 
                                #     (((np.max(winds_month) - median_wind) / wind_MAD) > MAD_THRESHOLD) and \
                                #     (((median_slp - np.min(slp_month)) / slp_MAD) > MAD_THRESHOLD): 

                                # locations where winds greater than threshold
                                high_winds, = np.where((winds_month - median_wind)/wind_MAD > MAD_THRESHOLD)
                                # and where SLP less than threshold
                                low_slps, = np.where((median_slp - slp_month)/slp_MAD > MAD_THRESHOLD)

                                # if any locations match, then it's a storm
                                match_loc = high_winds[np.in1d(high_winds, low_slps)]
                                    
                                if len(match_loc) > 0:
                                    storm = True
                            else:
                                print "write spurious"
                                
                            # check the SLP first difference series
                            #   to ensure a drop down and climb out of minimum SLP/or climb up and down from maximum wind speed
                            if variable == "slp":
                                diffs = np.diff(slp_month.compressed())
                            elif variable == "windspeeds":
                                diffs = np.diff(winds_month.compressed())
                            
                            negs, poss = 0,0
                            biggest_neg, biggest_pos = 0,0
                            
                            for diff in diffs:
                                
                                if diff > 0:
                                    if negs > biggest_neg: biggest_neg = negs
                                    negs = 0
                                    poss += 1
                                else:
                                    if poss > biggest_pos: biggest_pos = poss
                                    poss = 0
                                    negs += 1
                                
                            if (biggest_neg < 10) and (biggest_pos < 10) and not storm:
                                
                                # not a hurricane, so mask
                                station.qc_flags[month_ranges[year,month,0]:month_ranges[year,month,1], flag_col[v]] = 1
                                if plots or diagnostics:
                                    print "No Storm or Hurricane in %i %i - flagging\n" % (month+1, y+start.year)
                                else:
                                    logfile.write("No Storm or Hurricane in %i %i - flagging\n" % (month+1, y+start.year))
                                
                            else:
                                # hurricane
                                if plots or diagnostics:
                                    print "Storm or Hurricane in %i %i - not flagging\n" % (month+1, y+start.year)
                                else:
                                    logfile.write("Storm or Hurricane in %i %i - not flagging\n" % (month+1, y+start.year))
                        
                            if plots:
                                # plot showing the pressure, pressure first differences and the wind speeds
                                plot_times = utils.times_hours_to_datetime(station.time.data[month_ranges[year,month][0]:month_ranges[year,month][1]], start)

                                evc_plot_slp_wind(plot_times, slp_month, diffs, median_slp, slp_MAD, winds_month, median_wind, wind_MAD)

                    else:
                        
                        iqr_threshold = 8.
                        
                        if (rep_accuracies[y] != reporting_resolution) and \
                            (rep_freqs[y] != reporting_freq):
                            iqr_threshold = 10.
                            

                        if np.abs(variances[y] - median_variance) / iqr_variance > iqr_threshold:
                                
                            if diagnostics:
                                print "flagging {} {}".format(year+start.year,calendar.month_name[month+1])
                            # remove the data 
                            station.qc_flags[month_ranges[year,month,0]:month_ranges[year,month,1], flag_col[v]] = 1


            if plots:
                plot_variances = (variances - median_variance) / iqr_variance

                plot_variances = np.ma.masked_where(month_data_count[:,month] < DATA_COUNT_THRESHOLD,plot_variances)
                
                evc_plot_hist(plot_variances, iqr_threshold, "Variance Check - %s - %s" % (variable, calendar.month_name[month+1]))
 
        flag_locs = np.where(station.qc_flags[:, flag_col[v]] != 0)
        if plots or diagnostics:
            utils.print_flagged_obs_number(logfile, "Variance", variable, len(flag_locs[0]), noWrite = True)
        else:
            utils.print_flagged_obs_number(logfile, "Variance", variable, len(flag_locs[0]))
            
        # copy flags into attribute
        st_var.flags[flag_locs] = 1

    # matches 030660 for T, D and SLP 21/8/2014

    station = utils.append_history(station, "Excess Variance Check")

    return # evc
예제 #16
0
파일: spike.py 프로젝트: rjhd2/HadISD_v2
def sc(station, variable_list, flag_col, start, end, logfile, diagnostics = False, plots = False, second = False):
    '''
    Spike Check, looks for spikes up to 3 observations long, using thresholds
    calculated from the data itself.

    :param MetVar station: the station object
    :param list variable_list: list of observational variables to process
    :param list flag_col: the columns to set on the QC flag array
    :param datetime start: dataset start time
    :param datetime end: dataset end time
    :param file logfile: logfile to store outputs
    :param bool plots: do plots
    :param bool second: run for second time

    :returns:    
    '''
    print "refactor"
    
    for v, variable in enumerate(variable_list):

        flags = station.qc_flags[:, flag_col[v]]

        prev_flag_number = 0
        if second:
            # count currently existing flags:
            prev_flag_number = len(flags[flags != 0])
    
        st_var = getattr(station, variable)
    
        all_filtered = utils.apply_filter_flags(st_var)
      
        reporting_resolution = utils.reporting_accuracy(utils.apply_filter_flags(st_var))
        # to match IDL system - should never be called as would mean no data
        if reporting_resolution == -1: reporting_resolution = 1 

        month_ranges = utils.month_starts_in_pairs(start, end)
        month_ranges = month_ranges.reshape(-1,12,2)
        
        good = np.where(all_filtered.mask == False)
        
        full_time_diffs = np.ma.zeros(len(all_filtered))
        full_time_diffs.mask = all_filtered.mask
        full_time_diffs[good] = station.time.data[good][1:] - station.time.data[good][:-1]
        
        # develop critical values using clean values
        # NOTE 4/7/14 - make sure that Missing and Flagged values treated appropriately
        print "sort the differencing if values were flagged rather than missing"

        full_filtered_diffs = np.ma.zeros(len(all_filtered))
        full_filtered_diffs.mask = all_filtered.mask
        full_filtered_diffs[good] = all_filtered.compressed()[1:] - all_filtered.compressed()[:-1]
        
        # test all values
        good_to_uncompress = np.where(st_var.data.mask == False)
        full_value_diffs = np.ma.zeros(len(st_var.data))
        full_value_diffs.mask = st_var.data.mask
        full_value_diffs[good_to_uncompress] = st_var.data.compressed()[1:] - st_var.data.compressed()[:-1]

        # convert to compressed time to match IDL
        value_diffs = full_value_diffs.compressed()
        time_diffs = full_time_diffs.compressed()
        filtered_diffs = full_filtered_diffs.compressed()
        flags = flags[good_to_uncompress]
        

        critical_values = np.zeros([9,12])
        critical_values.fill(st_var.mdi)
        
        # link observation to calendar month
        month_locs = np.zeros(full_time_diffs.shape)
                
        for month in range(12):
            for year in range(month_ranges.shape[0]):
                
                if year == 0:
                    this_month_time_diff = full_time_diffs[month_ranges[year,month,0]:month_ranges[year,month,1]]
                    this_month_filtered_diff = full_filtered_diffs[month_ranges[year,month,0]:month_ranges[year,month,1]]
                else:
                    this_month_time_diff = np.ma.concatenate([this_month_time_diff, full_time_diffs[month_ranges[year,month,0]:month_ranges[year,month,1]]])
                    this_month_filtered_diff = np.ma.concatenate([this_month_filtered_diff, full_filtered_diffs[month_ranges[year,month,0]:month_ranges[year,month,1]]])


                month_locs[month_ranges[year,month,0]:month_ranges[year,month,1]] = month
      
            for delta in range(1,9):
                
                locs = np.ma.where(this_month_time_diff == delta)
        
                if len(locs[0]) >= 100:
                    
                    iqr = utils.IQR(this_month_filtered_diff[locs])

                    if iqr == 0. and delta == 1:
                        critical_values[delta-1,month] = 6.
                    elif iqr == 0: 
                        critical_values[delta-1,month] = st_var.mdi
                    else:
                        critical_values[delta-1,month] = 6. * iqr      

                    # January 2015 - changed to dynamically calculating the thresholds if less than IQR method ^RJHD

                    if plots:
                        import calendar
                        title = "{}, {}-hr differences".format(calendar.month_name[month+1], delta)                  
                        line_label = st_var.name
                        xlabel = "First Difference Magnitudes"
                    else:
                        title, line_label, xlabel = "","",""

                    threshold = utils.get_critical_values(this_month_filtered_diff[locs], binmin = 0, binwidth = 0.5, plots = plots, diagnostics = diagnostics, title = title, line_label = line_label, xlabel = xlabel, old_threshold = critical_values[delta-1,month])

                    if threshold < critical_values[delta-1,month]: critical_values[delta-1,month] = threshold

                    if plots or diagnostics:

                        print critical_values[delta-1,month] , iqr, 6 * iqr
           

        month_locs = month_locs[good_to_uncompress]
        if diagnostics:
            print critical_values[0,:]
                
        # not less than 5x reporting accuracy
        good_critical_values = np.where(critical_values != st_var.mdi)
        low_critical_values = np.where(critical_values[good_critical_values] <= 5.*reporting_resolution)
        temporary = critical_values[good_critical_values]
        temporary[low_critical_values] = 5.*reporting_resolution
        critical_values[good_critical_values] = temporary
        
        
        if diagnostics:
            print critical_values[0,:], 5.*reporting_resolution

        # check hourly against 2 hourly, if <2/3 the increase to avoid crazy rejection rate
        for month in range(12):
            if critical_values[0,month] != st_var.mdi and critical_values[1,month] != st_var.mdi:
                if critical_values[0,month]/critical_values[1,month] <= 0.66:
                    critical_values[0,month] = 0.66 * critical_values[1,month]
        
        if diagnostics:
            print critical_values[0,:]


        # get time differences for unfiltered data

        full_time_diffs = np.ma.zeros(len(st_var.data))
        full_time_diffs.mask = st_var.data.mask
        full_time_diffs[good_to_uncompress] = station.time.data[good_to_uncompress][1:] - station.time.data[good_to_uncompress][:-1]
        time_diffs = full_time_diffs.compressed()

        # go through each difference, identify which month it is in if passes spike thresholds 
    
        # spikes at the beginning or ends of sections
        for t in np.arange(len(time_diffs)):
            if (np.abs(time_diffs[t - 1]) > 240) and (np.abs(time_diffs[t]) < 3):
                # 10 days before but short gap thereafter
                
                next_values = st_var.data[good_to_uncompress[0][t + 1:]] 
                good, = np.where(next_values.mask == False)
        
                next_median = np.ma.median(next_values[good[:10]])
        
                next_diff = np.abs(value_diffs[t]) # out of spike
                median_diff = np.abs(next_median - st_var.data[good_to_uncompress[0][t]]) # are the remaining onees
                       
                if (critical_values[time_diffs[t] - 1, month_locs[t]] != st_var.mdi):
                    
                    # jump from spike > critical but average after < critical / 2
                    if (np.abs(median_diff) < critical_values[time_diffs[t] - 1, month_locs[t]] / 2.) and\
                        (np.abs(next_diff) > critical_values[time_diffs[t] - 1, month_locs[t]]) :
                    
                        flags[t] = 1
                        if plots or diagnostics:
                            sc_diagnostics_and_plots(station.time.data, st_var.data, good_to_uncompress[0][t], good_to_uncompress[0][t+1], start, variable, plots = plots)
                        
                        
            elif (np.abs(time_diffs[t - 1]) < 3) and (np.abs(time_diffs[t]) > 240):
                # 10 days after but short gap before
                
                prev_values = st_var.data[good_to_uncompress[0][:t - 1]]
                good, = np.where(prev_values.mask == False)
        
                prev_median = np.ma.median(prev_values[good[-10:]])
        
                prev_diff = np.abs(value_diffs[t - 1])
                median_diff = np.abs(prev_median - st_var.data[good_to_uncompress[0][t]])
        
                if (critical_values[time_diffs[t - 1] - 1, month_locs[t]] != st_var.mdi):
                    
                    # jump into spike > critical but average before < critical / 2
                    if (np.abs(median_diff) < critical_values[time_diffs[t - 1] - 1, month_locs[t]] / 2.) and\
                        (np.abs(prev_diff) > critical_values[time_diffs[t - 1] - 1, month_locs[t]]) :
                    
                        flags[t] = 1
                        if plots or diagnostics:
                            sc_diagnostics_and_plots(station.time.data, st_var.data, good_to_uncompress[0][t], good_to_uncompress[0][t+1], start, variable, plots = plots)
                        
        
        
        
        ''' this isn't the nicest way, but a direct copy from IDL
            masked arrays might help remove some of the lines

            Also, this is relatively slow'''
            
        for t in np.arange(len(time_diffs)):
            for spk_len in [1,2,3]:
                if t >= spk_len and t < len(time_diffs) - spk_len:
                    
                    # check if time differences are appropriate, for multi-point spikes
                    if (np.abs(time_diffs[t - spk_len]) <= spk_len * 3) and\
                    (np.abs(time_diffs[t]) <= spk_len * 3) and\
                    (time_diffs[t - spk_len - 1] - 1 < spk_len * 3) and\
                    (time_diffs[t + 1] - 1 < spk_len * 3) and \
                    ((spk_len == 1) or \
                    ((spk_len == 2) and (np.abs(time_diffs[t - spk_len + 1]) <= spk_len * 3)) or \
                    ((spk_len == 3) and (np.abs(time_diffs[t - spk_len + 1]) <= spk_len * 3) and (np.abs(time_diffs[t - spk_len + 2]) <= spk_len * 3))):
                        
                        # check if differences are valid                        
                        if (value_diffs[t - spk_len] != st_var.mdi) and \
                        (value_diffs[t - spk_len] != st_var.fdi) and \
                        (critical_values[time_diffs[t - spk_len] - 1, month_locs[t]] != st_var.mdi):
                        
                            # if exceed critical values
                            if (np.abs(value_diffs[t - spk_len]) >= critical_values[time_diffs[t - spk_len] - 1, month_locs[t]]):

                                # are signs of two differences different
                                if (math.copysign(1, value_diffs[t]) != math.copysign(1, value_diffs[t - spk_len])):
                                    
                                    # are within spike differences small
                                    if (spk_len == 1) or\
                                    ((spk_len == 2) and (np.abs(value_diffs[t - spk_len + 1]) < critical_values[time_diffs[t - spk_len + 1] -1, month_locs[t]] / 2.)) or \
                                    ((spk_len == 3) and (np.abs(value_diffs[t - spk_len + 1]) < critical_values[time_diffs[t - spk_len + 1] -1, month_locs[t]] / 2.) and\
                                      (np.abs(value_diffs[t - spk_len + 2]) < critical_values[time_diffs[t - spk_len + 2] -1, month_locs[t]] / 2.)):
                                    
                                        # check if following value is valid
                                        if (value_diffs[t] != st_var.mdi) and (critical_values[time_diffs[t] - 1, month_locs[t]] != st_var.mdi) and\
                                            (value_diffs[t] != st_var.fdi):
                                            
                                            # and if at least critical value                                            
                                            if (np.abs(value_diffs[t]) >= critical_values[time_diffs[t] - 1, month_locs[t]]):
                                                
                                                # test if surrounding differences below 1/2 critical value
                                                if (np.abs(value_diffs[t - spk_len - 1]) <= critical_values[time_diffs[t - spk_len - 1] - 1, month_locs[t]] / 2.): 
                                                    if (np.abs(value_diffs[t + 1]) <= critical_values[time_diffs[t + 1] - 1, month_locs[t]] / 2.): 
                                                    
                                                        # set the flags
                                                        flags[ t - spk_len + 1 : t +1] = 1   

                                                        if plots or diagnostics:
                                                            
                                                            sc_diagnostics_and_plots(station.time.data, st_var.data, good_to_uncompress[0][t-spk_len+1], good_to_uncompress[0][t+1], start, variable, plots = plots)
                                                           

        station.qc_flags[good_to_uncompress, flag_col[v]] = flags
                                    
        flag_locs = np.where(station.qc_flags[:, flag_col[v]] != 0)

        if plots or diagnostics:
            utils.print_flagged_obs_number(logfile, "Spike", variable, len(flag_locs[0]) - prev_flag_number, noWrite = True) # additional flags
        else:
            utils.print_flagged_obs_number(logfile, "Spike", variable, len(flag_locs[0]) - prev_flag_number) # additional flags

        # copy flags into attribute
        st_var.flags[flag_locs] = 1
 
        # matches 030660 - but with adapted IDL
        # matches 030220 OK, but finds more but all are reasonable 1/9/14

        do_interactive = False
        if plots and do_interactive == True:
            import matplotlib.pyplot as plt
        
            plot_times = utils.times_hours_to_datetime(station.time.data, start)
            
            plt.clf()
            plt.plot(plot_times, all_filtered, 'bo', ls='-')
            flg = np.where(flags[:, flag_col[v]] == 1)
            plt.plot(plot_times[flg], all_filtered[flg], 'ro', markersize=10)
            plt.show()
	    
    station = utils.append_history(station, "Spike Check")  

    return # sc