Esempio n. 1
0
    if stat[0] == input_id:

        # set up station
        station = utils.Station(stat[0], float(stat[1]), float(stat[2]), float(stat[3]))
        break
else:
    sys.exit(0)

# read attributes and qc_flags
ncdfp.read(os.path.join(NETCDF_DATA_LOCS, station.id + "_external.nc"), station, process_vars, [])

match_to_compress = utils.create_fulltimes(station, process_vars, DATASTART, DATAEND, [])

# nyears x 12 months
month_start_locs = np.array(utils.month_starts(DATASTART, DATAEND)).reshape(-1,12)

# which years
years = DATASTART.year + np.arange(month_start_locs.shape[0])

for year in range(DATASTART.year, DATAEND.year):

    year_loc, = np.where(years == year)

    if year != DATAEND.year - 1:
        plot_range = (month_start_locs[year_loc,0], month_start_locs[year_loc+1,0])
    else:
        plot_range = (month_start_locs[year_loc,0], -1) # misses last hour

    plot_times = utils.times_hours_to_datetime(station.time.data[plot_range[0]:plot_range[1]], DATASTART)
Esempio n. 2
0
def rsc_annual_string_expectance(all_filtered, value_starts, value_lengths, flags, start, end, st_var, times, diagnostics = False, plots = False):
    '''
    Find years where have more strings than expected, but not long enough to set off test
    
    :param array all_filtered: data filtered by all flags set so far
    :param array value_starts: locations of start of strings/streaks of data
    :param array value_lengths: lengths of each streak
    :param array flags: array of flags to be set
    :param datetime start: start of data
    :param datetime end: end of data    
    :param bool diagnostics: do diagnostic output
    :param bool plots: do plots
    '''

    month_starts = utils.month_starts(start,end)

    month_starts = np.array(month_starts).reshape(-1,12)

    year_proportions = np.zeros(month_starts.shape[0])
    year_proportions.fill(st_var.mdi)
    
    # churn through each year in turn
    for y in range(month_starts.shape[0]):
        
        if y != month_starts.shape[0] -1:
            year = all_filtered[month_starts[y,0] : month_starts[y+1,0]]
        else:
            year = all_filtered[month_starts[y,0] :]
        
        if len(year.compressed()) >= 200:
            # if there are strings (streaks of same value) in this year
            if y != month_starts.shape[0] -1:
                string_starts = np.where(np.logical_and((value_starts >= month_starts[y,0]),(value_starts < month_starts[y+1,0])))
            else:
                string_starts = np.where(value_starts >= month_starts[y,0])
            
            year_proportions[y] = 0
            if len(string_starts[0]) >= 1:
                # work out the proportion of the amount of data
                year_proportions[y] = np.sum(value_lengths[string_starts[0]])/float(len(year.compressed()))
            
    # if enough dirty years

    good_years = np.where(year_proportions != st_var.mdi)

    if len(good_years[0]) >= 10:
        
        median = np.median(year_proportions[good_years])
        
        if median < 0.005 : median = 0.005
        
        # find the number which have proportions > 5 x median
        bad_years = np.where(year_proportions > 5.*median)
        
        if len(bad_years[0]) >= 1:
            
            for bad in bad_years[0]:
                # and flag
                if bad == month_starts.shape[0]-1:
                    # if last year, just select all
                    locs, = np.where(value_starts >= month_starts[bad,0])
                else:
                    locs, = np.where((value_starts >= month_starts[bad,0]) & (value_starts <= month_starts[bad+1,0]))
                
                for loc in locs:
                    # need to account for missing values here  26/9/2014

                    goods, = np.where(all_filtered.mask[value_starts[loc]:] == False)

                    flags[value_starts[loc]+goods[:value_lengths[loc]]] = 1

                if plots or diagnostics:
                    plot_year = all_filtered[month_starts[bad,0]:month_starts[bad+1,0]]
                    plot_time = times[month_starts[bad,0]:month_starts[bad+1,0]]
                    plot_flags = np.where(flags[month_starts[bad,0]:month_starts[bad+1,0]] == 1)[0]

                    rsc_diagnostics_and_plot(plot_time, plot_year, plot_flags, st_var.name, start, plots = plots)           


    return flags # rsc_annual_string_expectance
Esempio n. 3
0
                                float(stat[3]))
        break
else:
    sys.exit(0)

# read attributes and qc_flags
ncdfp.read(os.path.join(NETCDF_DATA_LOCS, station.id + "_external.nc"),
           station,
           process_vars, [],
           read_qc_flags=True)

match_to_compress = utils.create_fulltimes(station, process_vars, DATASTART,
                                           DATAEND, [])

# nyears x 12 months
month_start_locs = np.array(utils.month_starts(DATASTART,
                                               DATAEND)).reshape(-1, 12)

# which years
years = DATASTART.year + np.arange(month_start_locs.shape[0])

# find which year and test to plot
year_loc, = np.where(years == year)
test_loc, = np.where(qc_test == test)[0]

# and get the plot range
if year != DATAEND.year - 1:
    plot_range = (month_start_locs[year_loc, 0], month_start_locs[year_loc + 1,
                                                                  0])
else:
    plot_range = (month_start_locs[year_loc, 0], -1)  # misses last hour
Esempio n. 4
0
def rsc_annual_string_expectance(all_filtered, value_starts, value_lengths, flags, start, end, st_var, times, diagnostics = False, plots = False):
    '''
    Find years where have more strings than expected, but not long enough to set off test
    
    :param array all_filtered: data filtered by all flags set so far
    :param array value_starts: locations of start of strings/streaks of data
    :param array value_lengths: lengths of each streak
    :param array flags: array of flags to be set
    :param datetime start: start of data
    :param datetime end: end of data    
    :param bool diagnostics: do diagnostic output
    :param bool plots: do plots
    '''

    month_starts = utils.month_starts(start,end)

    month_starts = np.array(month_starts).reshape(-1,12)

    year_proportions = np.zeros(month_starts.shape[0])
    year_proportions.fill(st_var.mdi)
    
    # churn through each year in turn
    for y in range(month_starts.shape[0]):
        
        if y != month_starts.shape[0] -1:
            year = all_filtered[month_starts[y,0] : month_starts[y+1,0]]
        else:
            year = all_filtered[month_starts[y,0] :]
        
        if len(year.compressed()) >= 200:
            # if there are strings (streaks of same value) in this year
            if y != month_starts.shape[0] -1:
                string_starts = np.where(np.logical_and((value_starts >= month_starts[y,0]),(value_starts < month_starts[y+1,0])))
            else:
                string_starts = np.where(value_starts >= month_starts[y,0])
            
            year_proportions[y] = 0
            if len(string_starts[0]) >= 1:
                # work out the proportion of the amount of data
                year_proportions[y] = np.sum(value_lengths[string_starts[0]])/float(len(year.compressed()))
            
    # if enough dirty years

    good_years = np.where(year_proportions != st_var.mdi)

    if len(good_years[0]) >= 10:
        
        median = np.median(year_proportions[good_years])
        
        if median < 0.005 : median = 0.005
        
        # find the number which have proportions > 5 x median
        bad_years = np.where(year_proportions > 5.*median)
        
        if len(bad_years[0]) >= 1:
            
            for bad in bad_years[0]:
                # and flag
                if bad == month_starts.shape[0]-1:
                    # if last year, just select all
                    locs, = np.where(value_starts >= month_starts[bad,0])
                else:
                    locs, = np.where((value_starts >= month_starts[bad,0]) & (value_starts <= month_starts[bad+1,0]))
                
                for loc in locs:
                    # need to account for missing values here  26/9/2014

                    goods, = np.where(all_filtered.mask[value_starts[loc]:] == False)

                    flags[value_starts[loc]+goods[:value_lengths[loc]]] = 1

                if plots or diagnostics:
                    plot_year = all_filtered[month_starts[bad,0]:month_starts[bad+1,0]]
                    plot_time = times[month_starts[bad,0]:month_starts[bad+1,0]]
                    plot_flags = np.where(flags[month_starts[bad,0]:month_starts[bad+1,0]] == 1)[0]

                    rsc_diagnostics_and_plot(plot_time, plot_year, plot_flags, st_var.name, start, plots = plots)           


    return flags # rsc_annual_string_expectance