def __getitem__(self, key): """ Return the key array from self.data """ if key not in self.flags.keys(): return self.input[key] else: f = combined_flag(self.flags[key]) return ma.masked_array(self.input[key].data, mask=(f!=1)) raise KeyError('%s not found' % key)
def __getitem__(self, key): """ Return the key array from self.data """ if key not in self.flags.keys(): return self.input[key] else: f = combined_flag(self.flags[key]) return ma.masked_array(self.input[key].data, mask=(f != 1)) raise KeyError('%s not found' % key)
def test_all_valid_no_9(): """ If all measurements are valid it can't return flag 9 This is to test a special condition when all values are valid, .mask return False, instead of an array on the same size with False. This test input all valid values, and check if there is no flag 9. """ datafile = download_testdata("dPIRX010.cnv") pqc = cotede.qc.fProfileQC(datafile) assert pqc['TEMP'].mask == False assert ~(combined_flag(pqc.flags['TEMP']) == 9).any()
def test_all_valid_no_9(): """ If all measurements are valid it can't return flag 9 This is to test a special condition when all values are valid, .mask return False, instead of an array on the same size with False. This test input all valid values, and check if there is no flag 9. """ profile = DummyData() pqc = ProfileQC(profile) assert pqc['TEMP'].mask.all() == False assert np.allclose( combined_flag(pqc.flags['TEMP']) == 9, profile['TEMP'].mask)
def flags2bin(flags, good_flags=[1, 2], bad_flags=[3, 4]): """ """ if hasattr(flags, 'keys'): # The different flags must have same ammount of data. N = len(flags[flags.keys()[0]]) for f in flags: assert len(flags[f]) == N flags = combined_flag(flags, reference_flags) else: N = len(flags) output = ma.masked_all(N, dtype='bool') for f in good_flags: output[flags == f] = True for f in bad_flags: output[flags == f] = False return output
def flags2bin(flags, good_flags=[1,2], bad_flags=[3,4]): """ """ if hasattr(flags, 'keys'): # The different flags must have same ammount of data. N = len(flags[flags.keys()[0]]) for f in flags: assert len(flags[f]) == N flags = combined_flag(flags, reference_flags) else: N = len(flags) output = ma.masked_all(N, dtype='bool') for f in good_flags: output[flags == f] = True for f in bad_flags: output[flags == f] = False return output
def human_calibrate_mistakes(datadir, varname, cfg=None, niter=5): """ """ import pandas as pd db = ProfilesQCPandasCollection(datadir, cfg=cfg, saveauxiliary=True) assert varname in db.keys() data = db.data features = db.auxiliary[varname] flags = combined_flag(db.flags[varname]) binflags = flags2bin(np.array(flags)) result = calibrate4flags(db.flags[varname], features, q=0.90, verbose=False) #profileslist = aux['profileid'].iloc[mistake].iloc[ # np.absolute(prob[mistake] - p_optimal).argsort() # ].unique() error_log = [{ 'err': result['n_err'], 'err_ratio': result['err_ratio'], 'p_optimal': result['p_optimal'] }] human_flag = ma.masked_all(len(flags), dtype='object') for i in range(niter): # Failures from AD to reproduce flags mistake = (result['false_positive'] | result['false_negative']) # Only the ones that weren't already flagged by a human mistake = mistake & ma.getmaskarray(human_flag) profileids = np.unique(data['profileid'].iloc[mistake]) # In the future order by how badly AD mistaked #profileids = data['profileid'].iloc[mistake].iloc[ # np.absolute(prob[mistake] - p_optimal).argsort() # ].unique() #derr = np.absolute(prob[np.nonzero(mistake)] - p_optimal) #ind_toeval = np.nonzero(mistake & ~doubt) #profileids = data['profileid'].iloc[ind_toeval].iloc[derr.argsort() # ].unique() if len(profileids) == 0: break # 5 random profiles with mistakes for pid in np.random.permutation(profileids)[:5]: print("Profile: %s" % pid) ind_p = data.profileid == pid h = HumanQC().eval(data[varname][ind_p], data['PRES'][ind_p], baseflag=binflags[np.array(ind_p)], fails=mistake[np.array(ind_p)], humanflag=human_flag[np.array(ind_p)]) #ind_humanqc[np.nonzero(ind_p)[0][h == 'good']] = True #flags.loc[np.nonzero(ind_p)[0][h == 'good'], 'human'] = 1 #ind_humanqc[np.nonzero(ind_p)[0][h == 'bad']] = False #flags.loc[np.nonzero(ind_p)[0][h == 'bad'], 'human'] = 4 #flags.loc[np.nonzero(ind_p)[0][h == 'doubt'], 'human'] = 6 #doubt[np.nonzero(ind_p)[0][h == 'doubt']] = True #ind_humanqc.mask[np.nonzero(ind_p)[0][h == 'doubt']] = True # Update human_flag only at the new values human_flag[np.nonzero(ind_p)[0][~h.mask]] = h[~h.mask] flags[human_flag == 'good'] = 1 flags[human_flag == 'bad'] = 4 #flags[human_flag == 'doubt'] = 1 #doubt[human_flag == 'doubt'] = True # Update binflags binflags = flags2bin(flags) result = calibrate4flags(flags, features, q=0.90, verbose=False) error_log.append({ 'err': result['n_err'], 'err_ratio': result['err_ratio'], 'p_optimal': result['p_optimal'], 'tot_misfit': result['tot_misfit'] }) print error_log[-2] print error_log[-1] result['human_flag'] = human_flag result['ind_humanqc'] = binflags result['error_log'] = error_log #return {'ind_humanqc': binflags, 'error_log': error_log, # 'result': result} return result
def calibrate4flags(flags, features, q=0.90, verbose=False): """ Adjust coeficients for Anomaly Detection to best reproduce given flags Inputs: flag_ref: Reference index. What the Anomaly Detection will try to reproduce. Uses the True and Falses from flag_ref to partition the data to be used to fit, to adjust and to estimate the error. qctests: The tests used by the Anomaly Detection. One curve will be fit for each test. aux: The auxiliary tests results from the ProfileQCCollection. It is expected that the qctests are present in aux. q: The top q extreme tests results to be used on Anom. Detect. For example q=0 will use all the data, while q=0.9 (default) will use the percentile of 0.9, i.e. the top 10% values. Output: Returns a dictionary with err: err_ratio: false_negative: false_positive: p_optimal: params: Use the functions: split_data_groups() fit_tests() estimate_anomaly() estimate_p_optimal() """ if hasattr(flags, 'keys'): flags = combined_flag(flags) assert not hasattr(flags, 'keys') assert hasattr(features, 'keys') assert len(features[features.keys()[0]]) == len(flags) indices = split_data_groups(flags) params = fit_tests(features[indices['fit']], q=q) prob = estimate_anomaly(features, params) if verbose is True: pylab.hist(prob) pylab.show() binflags = flags2bin(flags) p_optimal, test_err = estimate_p_optimal(prob[indices['test']], binflags[indices['test']]) # Guarantee the the false_* indices will be np.array false_negative = (prob < p_optimal) & binflags false_negative[ma.getmaskarray(false_negative)] = False false_negative = np.array(false_negative) false_positive = (prob > p_optimal) & ~binflags false_positive[ma.getmaskarray(false_positive)] = False false_positive = np.array(false_positive) mistake = false_positive | false_negative # I can extract only .data, since split_data_groups already eliminated # all non valid positions. #err = np.nonzero(false_negative)[0].size + \ # np.nonzero(false_positive)[0].size tot_misfit = np.nonzero(mistake)[0].size n_err = float(np.nonzero(mistake[indices['err']])[0].shape[0]) #err_ratio = float(err)/prob[indices['ind_err']].size err_ratio = n_err / indices['err'].astype('i').sum() #false_negative = (prob < p_optimal) & \ # (flag_ref.data is True) & (ma.getmaskarray(flag_ref) is False) #false_positive = (prob > p_optimal) & \ # (flag_ref.data is False) & (ma.getmaskarray(flag_ref) is False) output = { 'false_negative': false_negative, 'false_positive': false_positive, 'prob': prob, 'p_optimal': p_optimal, 'tot_misfit': tot_misfit, 'n_err': n_err, 'err_ratio': err_ratio, 'params': params } return output
def evaluate(self, v, cfg): self.flags[v] = {} # Apply common flag for all points. if 'common' in self.flags: N = self.input[v].shape for f in self.flags['common']: self.flags[v][f] = self.flags['common'][f] * \ np.ones(N, dtype='i1') if self.saveauxiliary: if v not in self.auxiliary.keys(): self.auxiliary[v] = {} if 'platform_identification' in cfg: logging.warn("Sorry I'm not ready to evaluate platform_identification()") if 'valid_geolocation' in cfg: logging.warn("Sorry I'm not ready to evaluate valid_geolocation()") if 'valid_speed' in cfg: # Think about. ARGO also has a test valid_speed, but that is # in respect to sucessive profiles. How is the best way to # distinguish them here? try: if self.saveauxiliary: self.flags[v]['valid_speed'], \ self.auxiliary[v]['valid_speed'] = \ possible_speed(self.input, cfg['valid_speed']) except: print("Fail on valid_speed") if 'global_range' in cfg: self.flags[v]['global_range'] = global_range( self.input, v, cfg['global_range']) if 'regional_range' in cfg: logging.warn("Sorry, I'm no ready to evaluate regional_range()") if 'pressure_increasing' in cfg: logging.warn("Sorry, I'm no ready to evaluate pressure_increasing()") if 'profile_envelop' in cfg: self.flags[v]['profile_envelop'] = profile_envelop( self.input, cfg['profile_envelop'], v) if 'gradient' in cfg: y = Gradient(self.input, v, cfg['gradient']) y.test() if self.saveauxiliary: self.auxiliary[v]['gradient'] = y.features['gradient'] self.flags[v]['gradient'] = y.flags['gradient'] if 'gradient_depthconditional' in cfg: cfg_tmp = cfg['gradient_depthconditional'] g = gradient(self.input[v]) flag = np.zeros(g.shape, dtype='i1') # Flag as 9 any masked input value flag[ma.getmaskarray(self.input[v])] = 9 # ---- Shallow zone ----------------- threshold = cfg_tmp['shallow_max'] flag[np.nonzero( \ (self['PRES'] <= cfg_tmp['pressure_threshold']) & \ (g > threshold))] \ = 4 flag[np.nonzero( \ (self['PRES'] <= cfg_tmp['pressure_threshold']) & \ (g <= threshold))] \ = 1 # ---- Deep zone -------------------- threshold = cfg_tmp['deep_max'] flag[np.nonzero( \ (self['PRES'] > cfg_tmp['pressure_threshold']) & \ (g > threshold))] \ = 4 flag[np.nonzero( \ (self['PRES'] > cfg_tmp['pressure_threshold']) & \ (g <= threshold))] \ = 1 self.flags[v]['gradient_depthconditional'] = flag if 'spike' in cfg: y = Spike(self.input, v, cfg['spike']) y.test() if self.saveauxiliary: self.auxiliary[v]['spike'] = y.features['spike'] self.flags[v]['spike'] = y.flags['spike'] if 'spike_depthconditional' in cfg: cfg_tmp = cfg['spike_depthconditional'] s = spike(self.input[v]) flag = np.zeros(s.shape, dtype='i1') # Flag as 9 any masked input value flag[ma.getmaskarray(self.input[v])] = 9 # ---- Shallow zone ----------------- threshold = cfg_tmp['shallow_max'] flag[np.nonzero( \ (self['PRES'] <= cfg_tmp['pressure_threshold']) & \ (s > threshold))] \ = 4 flag[np.nonzero( \ (self['PRES'] <= cfg_tmp['pressure_threshold']) & \ (s <= threshold))] \ = 1 # ---- Deep zone -------------------- threshold = cfg_tmp['deep_max'] flag[np.nonzero( \ (self['PRES'] > cfg_tmp['pressure_threshold']) & \ (s > threshold))] \ = 4 flag[np.nonzero( \ (self['PRES'] > cfg_tmp['pressure_threshold']) & \ (s <= threshold))] \ = 1 self.flags[v]['spike_depthconditional'] = flag if 'stuck_value' in cfg: logging.warn("Sorry I'm not ready to evaluate stuck_value()") if 'grey_list' in cfg: logging.warn("Sorry I'm not ready to evaluate grey_list()") if 'gross_sensor_drift' in cfg: logging.warn("Sorry I'm not ready to evaluate gross_sensor_drift()") if 'frozen_profile' in cfg: logging.warn("Sorry I'm not ready to evaluate frozen_profile()") if 'deepest_pressure' in cfg: logging.warn("Sorry I'm not ready to evaluate deepest_pressure()") if 'tukey53H_norm' in cfg: y = Tukey53H(self.input, v, cfg['tukey53H_norm']) y.test() if self.saveauxiliary: self.auxiliary[v]['tukey53H_norm'] = \ y.features['tukey53H_norm'] self.flags[v]['tukey53H_norm'] = y.flags['tukey53H_norm'] #if 'spike_depthsmooth' in cfg: # from maud.window_func import _weight_hann as wfunc # cfg_tmp = cfg['spike_depthsmooth'] # cfg_tmp['dzwindow'] = 10 # smooth = ma.masked_all(self.input[v].shape) # z = ped['pressure'] # for i in range(len(self.input[v])): # ind = np.nonzero(ma.absolute(z-z[i]) < \ # cfg_tmp['dzwindow'])[0] # ind = ind[ind != i] # w = wfunc(z[ind]-z[i], cfg_tmp['dzwindow']) # smooth[i] = (T[ind]*w).sum()/w.sum() # ARGO, test #12. (10C, 5PSU) if 'digit_roll_over' in cfg: threshold = cfg['digit_roll_over'] s = step(self.input[v]) if self.saveauxiliary: self.auxiliary[v]['step'] = s flag = np.zeros(s.shape, dtype='i1') # Flag as 9 any masked input value flag[ma.getmaskarray(self.input[v])] = 9 flag[np.nonzero(ma.absolute(s) > threshold)] = 4 flag[np.nonzero(ma.absolute(s) <= threshold)] = 1 self.flags[v]['digit_roll_over'] = flag if 'bin_spike' in cfg: y = Bin_Spike(self.input, v, cfg['bin_spike']) # y.test() if self.saveauxiliary: self.auxiliary[v]['bin_spike'] = y.features['bin_spike'] # self.flags[v]['bin_spike'] = y.flags['bin_spike'] if 'density_inversion' in cfg: try: if self.saveauxiliary: self.flags[v]['density_inversion'], \ self.auxiliary[v]['density_step'] = \ density_inversion( self.input, cfg['density_inversion'], saveaux=True) else: self.flags[v]['density_inversion'] = density_inversion( self.input, cfg['density_inversion']) except: print("Fail on density_inversion") if 'woa_normbias' in cfg: y = WOA_NormBias(self.input, v, cfg['woa_normbias']) # self.attributes) y.test() if self.saveauxiliary: for f in y.features: self.auxiliary[v][f] = y.features[f] self.flags[v]['woa_normbias'] = y.flags['woa_normbias'] #if 'pstep' in cfg: # ind = np.isfinite(self.input[v]) # ind = ma.getmaskarray(self.input[v]) # if self.saveauxiliary: # self.auxiliary[v]['pstep'] = ma.concatenate( # [ma.masked_all(1), # np.diff(self.input['PRES'][ind])]) if 'rate_of_change' in cfg: self.flags[v]['rate_of_change'], RoC = \ rate_of_change(self.input, v, cfg['rate_of_change']) if self.saveauxiliary: self.auxiliary[v]['rate_of_change'] = RoC if 'cum_rate_of_change' in cfg: x = cum_rate_of_change(self.input, v, cfg['cum_rate_of_change']['memory']) self.flags[v]['cum_rate_of_change'] = np.zeros(x.shape, dtype='i1') self.flags[v]['cum_rate_of_change'][ np.nonzero(x <= cfg['cum_rate_of_change']['threshold']) ] = 1 self.flags[v]['cum_rate_of_change'][ np.nonzero(x > cfg['cum_rate_of_change']['threshold']) ] = 4 self.flags[v]['cum_rate_of_change'][ ma.getmaskarray(self.input[v])] = 9 # FIXME: the Anomaly Detection and Fuzzy require some features # to be estimated previously. Generalize this. if 'anomaly_detection' in cfg: features = {} for f in cfg['anomaly_detection']['features']: if f == 'spike': features['spike'] = spike(self.input[v]) elif f == 'gradient': features['gradient'] = gradient(self.input[v]) elif f == 'tukey53H_norm': features['tukey53H_norm'] = tukey53H_norm(self.input[v]) elif f == 'rate_of_change': RoC = ma.masked_all_like(self.input[v]) RoC[1:] = ma.absolute(ma.diff(self.input[v])) features['rate_of_change'] = RoC elif (f == 'woa_normbias'): y = WOA_NormBias(self.input, v, {}) features['woa_normbias'] = \ np.abs(y.features['woa_normbias']) else: logging.error("Sorry, I can't evaluate anomaly_detection with: %s" % f) self.flags[v]['anomaly_detection'] = \ anomaly_detection(features, cfg['anomaly_detection']) if 'morello2014' in cfg: self.flags[v]['morello2014'] = morello2014( features=self.auxiliary[v], cfg=cfg['morello2014']) if 'fuzzylogic' in cfg: features = {} for f in cfg['fuzzylogic']['features']: if f == 'spike': features['spike'] = spike(self.input[v]) elif f == 'gradient': features['gradient'] = gradient(self.input[v]) elif f == 'tukey53H_norm': features['tukey53H_norm'] = tukey53H_norm(self.input[v], k=1.5) elif f == 'rate_of_change': RoC = ma.masked_all_like(data[v]) RoC[1:] = ma.absolute(ma.diff(data[v])) features['rate_of_change'] = RoC elif (f == 'woa_normbias'): y = WOA_NormBias(self.input, v, {}) features['woa_normbias'] = \ np.abs(y.features['woa_normbias']) else: logging.error("Sorry, I can't evaluate fuzzylogic with: %s" % f) self.flags[v]['fuzzylogic'] = fuzzylogic( features=features, cfg=cfg['fuzzylogic']) self.flags[v]['overall'] = combined_flag(self.flags[v])
def human_calibrate_mistakes(datadir, varname, cfg=None, niter=5): """ """ import pandas as pd db = ProfilesQCPandasCollection(datadir, cfg=cfg, saveauxiliary=True) assert varname in db.keys() data = db.data features = db.auxiliary[varname] flags = combined_flag(db.flags[varname]) binflags = flags2bin(np.array(flags)) result = calibrate4flags(db.flags[varname], features, q=0.90, verbose=False) #profileslist = aux['profileid'].iloc[mistake].iloc[ # np.absolute(prob[mistake] - p_optimal).argsort() # ].unique() error_log = [{'err': result['n_err'], 'err_ratio': result['err_ratio'], 'p_optimal': result['p_optimal']}] human_flag = ma.masked_all(len(flags), dtype='object') for i in range(niter): # Failures from AD to reproduce flags mistake = (result['false_positive'] | result['false_negative']) # Only the ones that weren't already flagged by a human mistake = mistake & ma.getmaskarray(human_flag) profileids = np.unique(data['profileid'].iloc[mistake]) # In the future order by how badly AD mistaked #profileids = data['profileid'].iloc[mistake].iloc[ # np.absolute(prob[mistake] - p_optimal).argsort() # ].unique() #derr = np.absolute(prob[np.nonzero(mistake)] - p_optimal) #ind_toeval = np.nonzero(mistake & ~doubt) #profileids = data['profileid'].iloc[ind_toeval].iloc[derr.argsort() # ].unique() if len(profileids) == 0: break # 5 random profiles with mistakes for pid in np.random.permutation(profileids)[:5]: print("Profile: %s" % pid) ind_p = data.profileid == pid h = HumanQC().eval( data[varname][ind_p], data['PRES'][ind_p], baseflag=binflags[np.array(ind_p)], fails=mistake[np.array(ind_p)], humanflag=human_flag[np.array(ind_p)]) #ind_humanqc[np.nonzero(ind_p)[0][h == 'good']] = True #flags.loc[np.nonzero(ind_p)[0][h == 'good'], 'human'] = 1 #ind_humanqc[np.nonzero(ind_p)[0][h == 'bad']] = False #flags.loc[np.nonzero(ind_p)[0][h == 'bad'], 'human'] = 4 #flags.loc[np.nonzero(ind_p)[0][h == 'doubt'], 'human'] = 6 #doubt[np.nonzero(ind_p)[0][h == 'doubt']] = True #ind_humanqc.mask[np.nonzero(ind_p)[0][h == 'doubt']] = True # Update human_flag only at the new values human_flag[np.nonzero(ind_p)[0][~h.mask]] = h[~h.mask] flags[human_flag == 'good'] = 1 flags[human_flag == 'bad'] = 4 #flags[human_flag == 'doubt'] = 1 #doubt[human_flag == 'doubt'] = True # Update binflags binflags = flags2bin(flags) result = calibrate4flags(flags, features, q=0.90, verbose=False) error_log.append({'err': result['n_err'], 'err_ratio': result['err_ratio'], 'p_optimal': result['p_optimal'], 'tot_misfit': result['tot_misfit']}) print error_log[-2] print error_log[-1] result['human_flag'] = human_flag result['ind_humanqc'] = binflags result['error_log'] = error_log #return {'ind_humanqc': binflags, 'error_log': error_log, # 'result': result} return result
def calibrate4flags(flags, features, q=0.90, verbose=False): """ Adjust coeficients for Anomaly Detection to best reproduce given flags Inputs: flag_ref: Reference index. What the Anomaly Detection will try to reproduce. Uses the True and Falses from flag_ref to partition the data to be used to fit, to adjust and to estimate the error. qctests: The tests used by the Anomaly Detection. One curve will be fit for each test. aux: The auxiliary tests results from the ProfileQCCollection. It is expected that the qctests are present in aux. q: The top q extreme tests results to be used on Anom. Detect. For example q=0 will use all the data, while q=0.9 (default) will use the percentile of 0.9, i.e. the top 10% values. Output: Returns a dictionary with err: err_ratio: false_negative: false_positive: p_optimal: params: Use the functions: split_data_groups() fit_tests() estimate_anomaly() estimate_p_optimal() """ if hasattr(flags, 'keys'): flags = combined_flag(flags) assert not hasattr(flags, 'keys') assert hasattr(features, 'keys') assert len(features[features.keys()[0]]) == len(flags) indices = split_data_groups(flags) params = fit_tests(features[indices['fit']], q=q) prob = estimate_anomaly(features, params) if verbose is True: pylab.hist(prob) pylab.show() binflags = flags2bin(flags) p_optimal, test_err = estimate_p_optimal(prob[indices['test']], binflags[indices['test']]) # Guarantee the the false_* indices will be np.array false_negative = (prob < p_optimal) & binflags false_negative[ma.getmaskarray(false_negative)] = False false_negative = np.array(false_negative) false_positive = (prob > p_optimal) & ~binflags false_positive[ma.getmaskarray(false_positive)] = False false_positive = np.array(false_positive) mistake = false_positive | false_negative # I can extract only .data, since split_data_groups already eliminated # all non valid positions. #err = np.nonzero(false_negative)[0].size + \ # np.nonzero(false_positive)[0].size tot_misfit = np.nonzero(mistake)[0].size n_err = float(np.nonzero(mistake[indices['err']])[0].shape[0]) #err_ratio = float(err)/prob[indices['ind_err']].size err_ratio = n_err/indices['err'].astype('i').sum() #false_negative = (prob < p_optimal) & \ # (flag_ref.data is True) & (ma.getmaskarray(flag_ref) is False) #false_positive = (prob > p_optimal) & \ # (flag_ref.data is False) & (ma.getmaskarray(flag_ref) is False) output = {'false_negative': false_negative, 'false_positive': false_positive, 'prob': prob, 'p_optimal': p_optimal, 'tot_misfit': tot_misfit, 'n_err': n_err, 'err_ratio': err_ratio, 'params': params} return output
def evaluate(self, v, cfg): self.flags[v] = {} # Apply common flag for all points. if 'common' in self.flags: N = self.input[v].shape for f in self.flags['common']: self.flags[v][f] = self.flags['common'][f] * \ np.ones(N, dtype='i1') if self.saveauxiliary: if v not in self.features.keys(): self.features[v] = {} if 'platform_identification' in cfg: module_logger.warning( "Sorry I'm not ready to evaluate platform_identification()") if 'valid_geolocation' in cfg: y = ValidGeolocation( self.input, v, cfg['valid_geolocation'], autoflag=True) if self.saveauxiliary: for f in y.features.keys(): self.features[v][f] = y.features[f] for f in y.flags: self.flags[v][f] = y.flags[f] if 'valid_speed' in cfg: # Think about. Argo also has a test valid_speed, but that is # in respect to sucessive profiles. How is the best way to # distinguish them here? try: if self.saveauxiliary: self.flags[v]['valid_speed'], \ self.features[v]['valid_speed'] = \ possible_speed(self.input, cfg['valid_speed']) except: module_logger.warning("Fail on valid_speed") if 'grey_list' in cfg: module_logger.warning("Sorry I'm not ready to evaluate grey_list()") if 'gross_sensor_drift' in cfg: module_logger.warning( "Sorry I'm not ready to evaluate gross_sensor_drift()") if 'frozen_profile' in cfg: module_logger.warning( "Sorry I'm not ready to evaluate frozen_profile()") criteria = (c for c in cfg if (cfg[c] is not None) and ("procedure" in cfg[c]) and (cfg[c]["procedure"] in qctests.QCTESTS)) for criterion in criteria: Procedure = qctests.catalog(cfg[criterion]["procedure"]) if issubclass(Procedure, qctests.QCCheckVar): y = Procedure(self.input, varname=v, cfg=cfg[criterion], autoflag=True) elif issubclass(Procedure, qctests.QCCheck): y = Procedure(self.input, cfg=cfg[criterion], autoflag=True) if self.saveauxiliary: for f in y.features.keys(): self.features[v][f] = y.features[f] for f in y.flags: self.flags[v][f] = y.flags[f] #if 'spike_depthsmooth' in cfg: # from maud.window_func import _weight_hann as wfunc # cfg_tmp = cfg['spike_depthsmooth'] # cfg_tmp['dzwindow'] = 10 # smooth = ma.masked_all(self.input[v].shape) # z = ped['pressure'] # for i in range(len(self.input[v])): # ind = np.nonzero(ma.absolute(z-z[i]) < \ # cfg_tmp['dzwindow'])[0] # ind = ind[ind != i] # w = wfunc(z[ind]-z[i], cfg_tmp['dzwindow']) # smooth[i] = (T[ind]*w).sum()/w.sum() #if 'pstep' in cfg: # ind = np.isfinite(self.input[v]) # ind = ma.getmaskarray(self.input[v]) # if self.saveauxiliary: # self.features[v]['pstep'] = ma.concatenate( # [ma.masked_all(1), # np.diff(self.input['PRES'][ind])]) # FIXME: the Anomaly Detection and Fuzzy require some features # to be estimated previously. Generalize this. if 'anomaly_detection' in cfg: features = {} for f in cfg['anomaly_detection']['features']: try: features[f] = self.features[v][f] except: if f == 'spike': features['spike'] = qctests.spike(self.input[v]) elif f == 'gradient': features['gradient'] = qctests.gradient(self.input[v]) elif f == 'constant_cluster_size': features['constant_cluster_size'] = \ qctests.constant_cluster_size(self.input[v]) elif f == 'tukey53H_norm': features['tukey53H_norm'] = qctests.tukey53H_norm(self.input[v]) elif f == 'rate_of_change': features['rate_of_change'] = qctests.rate_of_change(self.input[v]) elif (f == 'woa_normbias'): y = qctests.WOA_NormBias(self.input, v, {}, autoflag=False) features['woa_normbias'] = \ np.abs(y.features['woa_normbias']) elif (f == 'cars_normbias'): y = qctests.CARS_NormBias(self.input, v, {}, autoflag=False) features['cars_normbias'] = \ np.abs(y.features['cars_normbias']) else: module_logger.error( "Sorry, I can't evaluate anomaly_detection with: %s" % f) prob, self.flags[v]['anomaly_detection'] = \ qctests.anomaly_detection(features, cfg['anomaly_detection']) if self.saveauxiliary: self.features[v]['anomaly_detection'] = prob if 'morello2014' in cfg: y = qctests.Morello2014(self.input, v, cfg['morello2014'], autoflag=True) if self.saveauxiliary: for f in y.features.keys(): self.features[v][f] = y.features[f] for f in y.flags: self.flags[v][f] = y.flags[f] if "fuzzylogic" in cfg: y = qctests.FuzzyLogic(self.input, v, cfg["fuzzylogic"], autoflag=True) if self.saveauxiliary: for f in y.features.keys(): self.features[v][f] = y.features[f] for f in y.flags: self.flags[v][f] = y.flags[f] self.flags[v]['overall'] = combined_flag(self.flags[v])
def evaluate(self, v, cfg): self.flags[v] = {} # Apply common flag for all points. if 'common' in self.flags: N = self.input[v].shape for f in self.flags['common']: self.flags[v][f] = self.flags['common'][f] * \ np.ones(N, dtype='i1') if self.saveauxiliary: if v not in self.features.keys(): self.features[v] = {} if 'platform_identification' in cfg: module_logger.warning( "Sorry I'm not ready to evaluate platform_identification()") if 'valid_geolocation' in cfg: y = ValidGeolocation(self.input, v, cfg['valid_geolocation'], autoflag=True) if self.saveauxiliary: for f in y.features.keys(): self.features[v][f] = y.features[f] for f in y.flags: self.flags[v][f] = y.flags[f] if 'valid_speed' in cfg: # Think about. Argo also has a test valid_speed, but that is # in respect to sucessive profiles. How is the best way to # distinguish them here? try: if self.saveauxiliary: self.flags[v]['valid_speed'], \ self.features[v]['valid_speed'] = \ possible_speed(self.input, cfg['valid_speed']) except: module_logger.warning("Fail on valid_speed") if 'pressure_increasing' in cfg: module_logger.warning( "Sorry, I'm no ready to evaluate pressure_increasing()") if 'grey_list' in cfg: module_logger.warning( "Sorry I'm not ready to evaluate grey_list()") if 'gross_sensor_drift' in cfg: module_logger.warning( "Sorry I'm not ready to evaluate gross_sensor_drift()") if 'frozen_profile' in cfg: module_logger.warning( "Sorry I'm not ready to evaluate frozen_profile()") catalog = { 'bin_spike': Bin_Spike, 'cars_normbias': CARS_NormBias, 'constant_cluster_size': ConstantClusterSize, 'cum_rate_of_change': CumRateOfChange, 'deepest_pressure': DeepestPressure, 'digit_roll_over': DigitRollOver, 'global_range': GlobalRange, 'gradient': Gradient, 'gradient_depthconditional': GradientDepthConditional, 'monotonic_z': MonotonicZ, 'profile_envelop': ProfileEnvelop, 'rate_of_change': RateOfChange, 'regional_range': RegionalRange, 'spike': Spike, 'spike_depthconditional': SpikeDepthConditional, 'stuck_value': StuckValue, 'tukey53H_norm': Tukey53H, 'woa_normbias': WOA_NormBias, } for criterion in [c for c in catalog if c in cfg]: Procedure = catalog[criterion] y = Procedure(self.input, v, cfg[criterion], autoflag=True) if self.saveauxiliary: for f in y.features.keys(): self.features[v][f] = y.features[f] for f in y.flags: self.flags[v][f] = y.flags[f] #if 'spike_depthsmooth' in cfg: # from maud.window_func import _weight_hann as wfunc # cfg_tmp = cfg['spike_depthsmooth'] # cfg_tmp['dzwindow'] = 10 # smooth = ma.masked_all(self.input[v].shape) # z = ped['pressure'] # for i in range(len(self.input[v])): # ind = np.nonzero(ma.absolute(z-z[i]) < \ # cfg_tmp['dzwindow'])[0] # ind = ind[ind != i] # w = wfunc(z[ind]-z[i], cfg_tmp['dzwindow']) # smooth[i] = (T[ind]*w).sum()/w.sum() if 'density_inversion' in cfg: try: y = DensityInversion(self.input, cfg=cfg['density_inversion'], autoflag=True) if self.saveauxiliary: for f in y.features.keys(): self.features[v][f] = y.features[f] for f in y.flags: self.flags[v][f] = y.flags[f] except: module_logger.warning("Fail on density_inversion") #if 'pstep' in cfg: # ind = np.isfinite(self.input[v]) # ind = ma.getmaskarray(self.input[v]) # if self.saveauxiliary: # self.features[v]['pstep'] = ma.concatenate( # [ma.masked_all(1), # np.diff(self.input['PRES'][ind])]) # FIXME: the Anomaly Detection and Fuzzy require some features # to be estimated previously. Generalize this. if 'anomaly_detection' in cfg: features = {} for f in cfg['anomaly_detection']['features']: try: features[f] = self.features[v][f] except: if f == 'spike': features['spike'] = spike(self.input[v]) elif f == 'gradient': features['gradient'] = gradient(self.input[v]) elif f == 'constant_cluster_size': features['constant_cluster_size'] = \ constant_cluster_size(self.input[v]) elif f == 'tukey53H_norm': features['tukey53H_norm'] = tukey53H_norm( self.input[v]) elif f == 'rate_of_change': features['rate_of_change'] = rate_of_change( self.input[v]) elif (f == 'woa_normbias'): y = WOA_NormBias(self.input, v, {}, autoflag=False) features['woa_normbias'] = \ np.abs(y.features['woa_normbias']) elif (f == 'cars_normbias'): y = CARS_NormBias(self.input, v, {}, autoflag=False) features['cars_normbias'] = \ np.abs(y.features['cars_normbias']) else: module_logger.error( "Sorry, I can't evaluate anomaly_detection with: %s" % f) prob, self.flags[v]['anomaly_detection'] = \ anomaly_detection(features, cfg['anomaly_detection']) if self.saveauxiliary: self.features[v]['anomaly_detection'] = prob if 'morello2014' in cfg: self.flags[v]['morello2014'] = morello2014( features=self.features[v], cfg=cfg['morello2014']) if 'fuzzylogic' in cfg: features = {} for f in cfg['fuzzylogic']['features']: try: features[f] = self.features[v][f] except: module_logger.error("Can't evaluate fuzzylogic with: %s" % f) self.flags[v]['fuzzylogic'] = fuzzylogic(features=features, cfg=cfg['fuzzylogic']) self.flags[v]['overall'] = combined_flag(self.flags[v])