def test(p, parameters): """ Runs the quality control check on profile p and returns a numpy array of quality control decisions with False where the data value has passed the check and True where it failed. """ cruise = p.cruise() uid = p.uid() # don't bother if cruise == 0 or None, or if timestamp is corrupt if (cruise in [0, None]) or (None in [p.year(), p.month(), p.day(), p.time()]): return np.zeros(1, dtype=bool) # don't bother if this has already been analyzed command = 'SELECT en_track_check FROM ' + parameters["table"] + ' WHERE uid = ' + str(uid) + ';' en_track_result = main.dbinteract(command) if en_track_result[0][0] is not None: en_track_result = main.unpack_row(en_track_result[0])[0] result = np.zeros(1, dtype=bool) result[0] = np.any(en_track_result) return result # some detector types cannot be assessed by this test; do not raise flag. if p.probe_type() in [None]: return np.zeros(1, dtype=bool) # fetch all profiles on track, sorted chronologically, earliest first (None sorted as highest) command = 'SELECT uid, year, month, day, time, lat, long, probe FROM ' + parameters["table"] + ' WHERE cruise = ' + str(cruise) + ' and year is not null and month is not null and day is not null and time is not null ORDER BY year, month, day, time, uid ASC;' track_rows = main.dbinteract(command) # start all as passing by default: EN_track_results = {} for i in range(len(track_rows)): EN_track_results[track_rows[i][0]] = np.zeros(1, dtype=bool) # copy the list of headers; # remove entries as they are flagged. passed_rows = copy.deepcopy(track_rows) rejects = findOutlier(passed_rows, EN_track_results) while rejects != []: passed_index = [x for x in range(len(passed_rows)) if x not in rejects ] passed_rows = [passed_rows[index] for index in passed_index ] rejects = findOutlier(passed_rows, EN_track_results) # if more than half got rejected, reject everyone if len(passed_rows) < len(track_rows) / 2: for i in range(len(track_rows)): EN_track_results[track_rows[i][0]][0] = True # write all to db result = [] for i in range(len(track_rows)): result.append((main.pack_array(EN_track_results[track_rows[i][0]]), track_rows[i][0])) query = "UPDATE " + sys.argv[1] + " SET en_track_check=? WHERE uid=?" main.interact_many(query, result) return EN_track_results[uid]
def test(p, parameters): """ Runs the quality control check on profile p and returns a numpy array of quality control decisions with False where the data value has passed the check and True where it failed. """ country = p.primary_header['Country code'] cruise = p.cruise() originator_cruise = p.originator_cruise() uid = p.uid() # don't bother if this has already been analyzed command = 'SELECT en_track_check FROM ' + parameters["table"] + ' WHERE uid = ' + str(uid) + ';' en_track_result = main.dbinteract(command) if en_track_result[0][0] is not None: en_track_result = main.unpack_row(en_track_result[0])[0] result = np.zeros(1, dtype=bool) result[0] = np.any(en_track_result) return result # make sure this profile makes sense in the track check if not assess_usability(p): return np.zeros(1, dtype=bool) # fetch all profiles on track, sorted chronologically, earliest first (None sorted as highest), then by uid command = 'SELECT uid, year, month, day, time, lat, long, probe, raw FROM ' + parameters["table"] + ' WHERE cruise = ' + str(cruise) + ' and country = "' + str(country) + '" and ocruise = "' + str(originator_cruise) + '" and year is not null and month is not null and day is not null and time is not null ORDER BY year, month, day, time, uid ASC;' track_rows = main.dbinteract(command) # avoid inappropriate profiles track_rows = [tr for tr in track_rows if assess_usability_raw(tr[8][1:-1])] # start all as passing by default EN_track_results = {} for i in range(len(track_rows)): EN_track_results[track_rows[i][0]] = np.zeros(1, dtype=bool) # copy the list of headers; # remove entries as they are flagged. passed_rows = copy.deepcopy(track_rows) rejects = findOutlier(passed_rows, EN_track_results) while rejects != []: passed_index = [x for x in range(len(passed_rows)) if x not in rejects ] passed_rows = [passed_rows[index] for index in passed_index ] rejects = findOutlier(passed_rows, EN_track_results) # if more than half got rejected, reject everyone if len(passed_rows) < len(track_rows) / 2: for i in range(len(track_rows)): EN_track_results[track_rows[i][0]][0] = True # write all to db result = [] for i in range(len(track_rows)): result.append((main.pack_array(EN_track_results[track_rows[i][0]]), track_rows[i][0])) query = "UPDATE " + sys.argv[1] + " SET en_track_check=? WHERE uid=?" main.interact_many(query, result) return EN_track_results[uid]
def stdLevelData(p, parameters): """ Combines data that have passed other QC checks to create a set of observation minus background data on standard levels. """ # Combine other QC results. preQC = (EN_background_check.test(p, parameters) | EN_constant_value_check.test(p, parameters) | EN_increasing_depth_check.test(p, parameters) | EN_range_check.test(p, parameters) | EN_spike_and_step_check.test(p, parameters) | EN_stability_check.test(p, parameters)) # Get the data stored by the EN background check. # As it was run above we know that the data is available in the db. query = 'SELECT origlevels, ptlevels, bglevels FROM enbackground WHERE uid = ' + str(p.uid()) enbackground_pars = main.dbinteract(query) enbackground_pars = main.unpack_row(enbackground_pars[0]) origlevels = enbackground_pars[0] ptlevels = enbackground_pars[1] bglevels = enbackground_pars[2] origLevels = np.array(origlevels) diffLevels = (np.array(ptlevels) - np.array(bglevels)) nLevels = len(origLevels) if nLevels == 0: return None # Nothing more to do. # Remove any levels that failed previous QC. nLevels, origLevels, diffLevels = filterLevels(preQC, origLevels, diffLevels) if nLevels == 0: return None levels, assocLevs = meanDifferencesAtStandardLevels(origLevels, diffLevels, p.z(), parameters) return levels, origLevels, assocLevs
def test(p, parameters): """ Runs the quality control check on profile p and returns a numpy array of quality control decisions with False where the data value has passed the check and True where it failed. """ # Check if the QC of this profile was already done and if not # run the QC. query = 'SELECT en_constant_value_check FROM ' + parameters["table"] + ' WHERE uid = ' + str(p.uid()) + ';' qc_log = main.dbinteract(query) qc_log = main.unpack_row(qc_log[0]) if qc_log[0] is not None: return qc_log[0] return run_qc(p, parameters)
def test(p, parameters): """ Runs the quality control check on profile p and returns a numpy array of quality control decisions with False where the data value has passed the check and True where it failed. """ # Check if the QC of this profile was already done and if not # run the QC. query = 'SELECT en_increasing_depth_check FROM ' + parameters["table"] + ' WHERE uid = ' + str(p.uid()) + ';' qc_log = main.dbinteract(query) qc_log = main.unpack_row(qc_log[0]) if qc_log[0] is not None: return qc_log[0] return run_qc(p, parameters)
def stdLevelData(p, parameters): """ Combines data that have passed other QC checks to create a set of observation minus background data on standard levels. """ # Combine other QC results. preQC = (EN_background_check.test(p, parameters) | EN_constant_value_check.test(p, parameters) | EN_increasing_depth_check.test(p, parameters) | EN_range_check.test(p, parameters) | EN_spike_and_step_check.test(p, parameters) | EN_stability_check.test(p, parameters)) # Get the data stored by the EN background check. # As it was run above we know that the data is available in the db. query = 'SELECT origlevels, ptlevels, bglevels FROM enbackground WHERE uid = ' + str( p.uid()) enbackground_pars = main.dbinteract(query) enbackground_pars = main.unpack_row(enbackground_pars[0]) origlevels = enbackground_pars[0] ptlevels = enbackground_pars[1] bglevels = enbackground_pars[2] origLevels = np.array(origlevels) diffLevels = (np.array(ptlevels) - np.array(bglevels)) nLevels = len(origLevels) if nLevels == 0: return None # Nothing more to do. # Remove any levels that failed previous QC. nLevels, origLevels, diffLevels = filterLevels(preQC, origLevels, diffLevels) if nLevels == 0: return None levels, assocLevs = meanDifferencesAtStandardLevels( origLevels, diffLevels, p.z(), parameters) return levels, origLevels, assocLevs
def test(p, parameters, allow_level_reinstating=True): """ Runs the quality control check on profile p and returns a numpy array of quality control decisions with False where the data value has passed the check and True where it failed. If allow_level_reinstating is set to True then rejected levels can be reprieved by comparing with levels above and below. NB this is done by default in EN processing. """ # Define an array to hold results. qc = np.zeros(p.n_levels(), dtype=bool) # Obtain the obs minus background differences on standard levels. result = stdLevelData(p, parameters) if result is None: return qc # Unpack the results. levels, origLevels, assocLevels = result # Retrieve the background and observation error variances and # the background values. query = 'SELECT bgstdlevels, bgevstdlevels FROM enbackground WHERE uid = ' + str( p.uid()) enbackground_pars = main.dbinteract(query) enbackground_pars = main.unpack_row(enbackground_pars[0]) bgsl = enbackground_pars[0] slev = parameters['enbackground']['depth'] bgev = enbackground_pars[1] obev = parameters['enbackground']['obev'] #find initial pge pgeData = determine_pge(levels, bgev, obev, p) # Find buddy. profiles = get_profile_info(parameters) minDist = 1000000000.0 iMinDist = None for iProfile, profile in enumerate(profiles): pDist = assessBuddyDistance(p, profile) if pDist is not None and pDist < minDist: minDist = pDist iMinDist = iProfile # Check if we have found a buddy and process if so. if minDist <= 400000: pBuddy = main.get_profile_from_db(profiles[iMinDist][0]) # buddy vetos Fail = False if pBuddy.var_index() is None: Fail = True if Fail == False: main.catchFlags(pBuddy) if np.sum(pBuddy.t().mask == False) == 0: Fail = True if Fail == False: result = stdLevelData(pBuddy, parameters) query = 'SELECT bgevstdlevels FROM enbackground WHERE uid = ' + str( pBuddy.uid()) buddy_pars = main.dbinteract(query) buddy_pars = main.unpack_row(buddy_pars[0]) if result is not None: levelsBuddy, origLevelsBuddy, assocLevelsBuddy = result bgevBuddy = buddy_pars[0] pgeBuddy = determine_pge(levels, bgevBuddy, obev, pBuddy) pgeData = update_pgeData(pgeData, pgeBuddy, levels, levelsBuddy, minDist, p, pBuddy, obev, bgev, bgevBuddy) # Check if levels should be reinstated. if allow_level_reinstating: if np.abs(p.latitude()) < 20.0: depthTol = 300.0 else: depthTol = 200.0 stdLevelFlags = pgeData >= 0.5 for i, slflag in enumerate(stdLevelFlags): if slflag: # Check for non rejected surrounding levels. okbelow = False if i > 0: if stdLevelFlags[i - 1] == False and levels.mask[ i - 1] == False and bgsl.mask[i - 1] == False: okbelow = True okabove = False nsl = len(stdLevelFlags) if i < nsl - 1: if stdLevelFlags[i + 1] == False and levels.mask[ i + 1] == False and bgsl.mask[i + 1] == False: okabove = True # Work out tolerances. if slev[i] > depthTol + 100: tolFactor = 0.5 elif slev[i] > depthTol: tolFactor = 1.0 - 0.005 * (slev[i] - depthTol) else: tolFactor = 1.0 ttol = 0.5 * tolFactor if okbelow == True and okabove == True: xmax = levels[i - 1] + bgsl[i - 1] + ttol xmin = levels[i + 1] + bgsl[i + 1] - ttol elif okbelow == True: xmax = levels[i - 1] + bgsl[i - 1] + ttol xmin = levels[i - 1] + bgsl[i - 1] - ttol elif okabove == True: xmax = levels[i + 1] + bgsl[i + 1] + ttol xmin = levels[i + 1] + bgsl[i + 1] - ttol else: continue # Reassign PGE if level is within the tolerances. if levels[i] + bgsl[i] >= xmin and levels[i] + bgsl[i] <= xmax: pgeData[i] = 0.49 # Assign the QC flags to original levels. for i, pge in enumerate(pgeData): if pgeData.mask[i]: continue if pge < 0.5: continue for j, assocLevel in enumerate(assocLevels): if assocLevel == i: origLevel = origLevels[j] qc[origLevel] = True return qc
def run_qc(p, suspect): # check for pre-registered suspect tabulation, if that's what we want: if suspect: query = 'SELECT suspect FROM enspikeandstep WHERE uid = ' + str(p.uid()) + ';' susp = main.dbinteract(query) if len(susp) > 0: return main.unpack_row(susp[0])[0] # Define tolerances used. tolD = np.array([0, 200, 300, 500, 600]) tolDTrop = np.array([0, 300, 400, 500, 600]) tolT = np.array([5.0, 5.0, 2.5, 2.0, 1.5]) # Define an array to hold results. qc = np.zeros(p.n_levels(), dtype=bool) # Get depth and temperature values from the profile. z = p.z() t = p.t() # Find which levels have data. isTemperature = (t.mask==False) isDepth = (z.mask==False) isData = isTemperature & isDepth # Array to hold temperature differences between levels and gradients. dt, gt = composeDT(t, z, p.n_levels()) # Spikes and steps detection. for i in range(1, p.n_levels()): if i >= 2: if (isData[i-2] and isData[i-1] and isData[i]) == False: continue if z[i] - z[i-2] >= 5.0: wt1 = (z[i-1] - z[i-2]) / (z[i] - z[i-2]) else: wt1 = 0.5 else: if (isData[i-1] and isData[i]) == False: continue wt1 = 0.5 dTTol = determineDepthTolerance(z[i-1], np.abs(p.latitude())) gTTol = 0.05 # Check for low temperatures in the Tropics. # This might be more appropriate to appear in a separate EN regional # range check but is included here for now for consistency with the # original code. if (np.abs(p.latitude()) < 20.0 and z[i-1] < 1000.0 and t[i-1] < 1.0): dt[i] = np.ma.masked if suspect == True: qc[i-1] = True continue qc, dt = conditionA(dt, dTTol, qc, wt1, i, suspect) qc, dt = conditionB(dt, dTTol, gTTol, qc, gt, i, suspect) qc = conditionC(dt, dTTol, z, qc, t, i, suspect) # End of loop over levels. # Step or 0.0 at the bottom of a profile. if isData[-1] and dt.mask[-1] == False: dTTol = determineDepthTolerance(z[-1], np.abs(p.latitude())) if np.abs(dt[-1]) > dTTol: if suspect == True: qc[-1] = True if isTemperature[-1]: if t[-1] == 0.0: if suspect == True: qc[-1] = True # If 4 levels or more than half the profile is rejected then reject all. if suspect == False: nRejects = np.count_nonzero(qc) if nRejects >= 4 or nRejects > p.n_levels()/2: qc[:] = True # register suspects, if computed, to db if suspect: query = "REPLACE INTO enspikeandstep VALUES(?,?);" main.dbinteract(query, [p.uid(), main.pack_array(qc)] ) return qc
def run_qc(p, suspect, parameters): # check for pre-registered suspect tabulation, if that's what we want: if suspect: query = 'SELECT suspect FROM enspikeandstep WHERE uid = ' + str( p.uid()) + ';' susp = main.dbinteract(query, targetdb=parameters["db"]) if len(susp) > 0: return main.unpack_row(susp[0])[0] # Define tolerances used. tolD = np.array([0, 200, 300, 500, 600]) tolDTrop = np.array([0, 300, 400, 500, 600]) tolT = np.array([5.0, 5.0, 2.5, 2.0, 1.5]) # Define an array to hold results. qc = np.zeros(p.n_levels(), dtype=bool) # Get depth and temperature values from the profile. z = p.z() t = p.t() # Find which levels have data. isTemperature = (t.mask == False) isDepth = (z.mask == False) isData = isTemperature & isDepth # Array to hold temperature differences between levels and gradients. dt, gt = composeDT(t, z, p.n_levels()) # Spikes and steps detection. for i in range(1, p.n_levels()): if i >= 2: if (isData[i - 2] and isData[i - 1] and isData[i]) == False: continue if z[i] - z[i - 2] >= 5.0: wt1 = (z[i - 1] - z[i - 2]) / (z[i] - z[i - 2]) else: wt1 = 0.5 else: if (isData[i - 1] and isData[i]) == False: continue wt1 = 0.5 dTTol = determineDepthTolerance(z[i - 1], np.abs(p.latitude())) gTTol = 0.05 # Check for low temperatures in the Tropics. # This might be more appropriate to appear in a separate EN regional # range check but is included here for now for consistency with the # original code. if (np.abs(p.latitude()) < 20.0 and z[i - 1] < 1000.0 and t[i - 1] < 1.0): dt[i] = np.ma.masked if suspect == True: qc[i - 1] = True continue qc, dt = conditionA(dt, dTTol, qc, wt1, i, suspect) qc, dt = conditionB(dt, dTTol, gTTol, qc, gt, i, suspect) qc = conditionC(dt, dTTol, z, qc, t, i, suspect) # End of loop over levels. # Step or 0.0 at the bottom of a profile. if isData[-1] and dt.mask[-1] == False: dTTol = determineDepthTolerance(z[-1], np.abs(p.latitude())) if np.abs(dt[-1]) > dTTol: if suspect == True: qc[-1] = True if isTemperature[-1]: if t[-1] == 0.0: if suspect == True: qc[-1] = True # If 4 levels or more than half the profile is rejected then reject all. if suspect == False: nRejects = np.count_nonzero(qc) if nRejects >= 4 or nRejects > p.n_levels() / 2: qc[:] = True # register suspects, if computed, to db if suspect: query = "REPLACE INTO enspikeandstep VALUES(?,?);" main.dbinteract(query, [p.uid(), main.pack_array(qc)], targetdb=parameters["db"]) return qc
def test(p, parameters, allow_level_reinstating=True): """ Runs the quality control check on profile p and returns a numpy array of quality control decisions with False where the data value has passed the check and True where it failed. If allow_level_reinstating is set to True then rejected levels can be reprieved by comparing with levels above and below. NB this is done by default in EN processing. """ # Define an array to hold results. qc = np.zeros(p.n_levels(), dtype=bool) # Obtain the obs minus background differences on standard levels. result = stdLevelData(p, parameters) if result is None: return qc # Unpack the results. levels, origLevels, assocLevels = result # Retrieve the background and observation error variances and # the background values. query = 'SELECT bgstdlevels, bgevstdlevels FROM enbackground WHERE uid = ' + str(p.uid()) enbackground_pars = main.dbinteract(query) enbackground_pars = main.unpack_row(enbackground_pars[0]) bgsl = enbackground_pars[0] slev = parameters['enbackground']['depth'] bgev = enbackground_pars[1] obev = parameters['enbackground']['obev'] #find initial pge pgeData = determine_pge(levels, bgev, obev, p) # Find buddy. profiles = get_profile_info(parameters) minDist = 1000000000.0 iMinDist = None for iProfile, profile in enumerate(profiles): pDist = assessBuddyDistance(p, profile) if pDist is not None and pDist < minDist: minDist = pDist iMinDist = iProfile # Check if we have found a buddy and process if so. if minDist <= 400000: pBuddy = main.get_profile_from_db(profiles[iMinDist][0]) # buddy vetos Fail = False if pBuddy.var_index() is None: Fail = True if Fail == False: main.catchFlags(pBuddy) if np.sum(pBuddy.t().mask == False) == 0: Fail = True if Fail == False: result = stdLevelData(pBuddy, parameters) query = 'SELECT bgevstdlevels FROM enbackground WHERE uid = ' + str(pBuddy.uid()) buddy_pars = main.dbinteract(query) buddy_pars = main.unpack_row(buddy_pars[0]) if result is not None: levelsBuddy, origLevelsBuddy, assocLevelsBuddy = result bgevBuddy = buddy_pars[0] pgeBuddy = determine_pge(levels, bgevBuddy, obev, pBuddy) pgeData = update_pgeData(pgeData, pgeBuddy, levels, levelsBuddy, minDist, p, pBuddy, obev, bgev, bgevBuddy) # Check if levels should be reinstated. if allow_level_reinstating: if np.abs(p.latitude()) < 20.0: depthTol = 300.0 else: depthTol = 200.0 stdLevelFlags = pgeData >= 0.5 for i, slflag in enumerate(stdLevelFlags): if slflag: # Check for non rejected surrounding levels. okbelow = False if i > 0: if stdLevelFlags[i - 1] == False and levels.mask[i - 1] == False and bgsl.mask[i - 1] == False: okbelow = True okabove = False nsl = len(stdLevelFlags) if i < nsl - 1: if stdLevelFlags[i + 1] == False and levels.mask[i + 1] == False and bgsl.mask[i + 1] == False: okabove = True # Work out tolerances. if slev[i] > depthTol + 100: tolFactor = 0.5 elif slev[i] > depthTol: tolFactor = 1.0 - 0.005 * (slev[i] - depthTol) else: tolFactor = 1.0 ttol = 0.5 * tolFactor if okbelow == True and okabove == True: xmax = levels[i - 1] + bgsl[i - 1] + ttol xmin = levels[i + 1] + bgsl[i + 1] - ttol elif okbelow == True: xmax = levels[i - 1] + bgsl[i - 1] + ttol xmin = levels[i - 1] + bgsl[i - 1] - ttol elif okabove == True: xmax = levels[i + 1] + bgsl[i + 1] + ttol xmin = levels[i + 1] + bgsl[i + 1] - ttol else: continue # Reassign PGE if level is within the tolerances. if levels[i] + bgsl[i] >= xmin and levels[i] + bgsl[i] <= xmax: pgeData[i] = 0.49 # Assign the QC flags to original levels. for i, pge in enumerate(pgeData): if pgeData.mask[i]: continue if pge < 0.5: continue for j, assocLevel in enumerate(assocLevels): if assocLevel == i: origLevel = origLevels[j] qc[origLevel] = True return qc