def apDiff(det): """ Computes the difference of Overall Average Precision for two sets of detections. The detections have to be given as a list of dictionaries with the keys 'det1', 'det2' and 'num_gt' (number of ground-truth intervals). Returns: Tuple with 3 elements: AP of the 1st detections, AP of the 2nd detections, difference of both. """ num_gt = sum(d['num_gt'] for d in det) ap1 = eval.average_precision(num_gt, [d['det1'] for d in det]) ap2 = eval.average_precision(num_gt, [d['det2'] for d in det]) return ap1, ap2, ap2 - ap1
def apPerType(det): """ Computes the Average Precision of two set of detections separately for each function type. The detections have to be given as a list of dictionaries with the keys 'ftype', 'det1', 'det2' and 'num_gt' (number of ground-truth intervals). Returns: Tuple with 2 lists containing the AP for each function type. """ types = set(d['ftype'] for d in det) aps1, aps2 = [], [] for ftype in types: num_gt = sum(d['num_gt'] for d in det if d['ftype'] == ftype) aps1.append( eval.average_precision( num_gt, [d['det1'] for d in det if d['ftype'] == ftype])) aps2.append( eval.average_precision( num_gt, [d['det2'] for d in det if d['ftype'] == ftype])) return aps1, aps2
def find_best_k(func, method, td_lag): # Find embedding dimension which maximizes AP k_best, ap_best, auc_best = 0, 0.0, 0.0 regions_best = [] for k in range(3, 21): detections = maxdiv.maxdiv(func['ts'], method=method, mode='I_OMEGA', extint_min_len=20, extint_max_len=100, num_intervals=None, td_dim=k, td_lag=td_lag) cur_ap = eval.average_precision([func['gt']], [detections]) cur_auc = eval.auc(func['gt'], detections, func['ts'].shape[1]) if (k_best == 0) or (cur_ap > ap_best) or ((cur_ap == ap_best) and (cur_auc > auc_best)): k_best, ap_best, auc_best, regions_best = k, cur_ap, cur_auc, detections return regions_best, k_best
print(l) funcs = [sample_gp_with_meanshift(n, l) for i in range(m)] for method in methods: auc = [] regions = [] for i in range(m): gp, ygt = funcs[i] regions.append(maxdiv.maxdiv(gp, method = method, num_intervals = 5, extint_min_len = 20, extint_max_len = 220, kernelparameters={'kernel_sigma_sq': args.kernel_sigma_sq}, **parameters)) auc.append(eval.auc(ygt, regions[-1], n)) aucs[method].append(np.mean(auc)) aps[method].append(eval.average_precision([ygt for gp, ygt in funcs], regions)) ratios.append(float(l) / n) # Plot results fig_auc = plt.figure() sp_auc = fig_auc.add_subplot(111, xlabel = 'Length of anomaly / Length of time series', ylabel = 'AUC') fig_ap = plt.figure() sp_ap = fig_ap.add_subplot(111, xlabel = 'Length of anomaly / Length of time series', ylabel = 'Average Precision') for method in methods: sp_auc.plot(ratios, aucs[method], marker = 'x', label = method) sp_ap.plot(ratios, aps[method], marker = 'x', label = method) sp_auc.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=len(methods), mode="expand", borderaxespad=0.) sp_ap.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=len(methods), mode="expand", borderaxespad=0.) fig_auc.savefig('anomaly_ratio_auc.svg') fig_ap.savefig('anomaly_ratio_ap.svg')
ygts.append(func['gt']) det, k_best = optimizers[args.optimizer](func, args.method, args.td_lag) # Divide scores by maximum score since their range differs widely depending on the dimensionality if args.method not in ('gaussian_cov_ts', 'gaussian_ts'): for r in range(len(det) - 1, -1, -1): det[r] = (det[r][0], det[r][1], det[r][2] / det[0][2]) regions.append(det) aucs[ftype].append(eval.auc(func['gt'], det, func['ts'].shape[1])) best_k[ftype][k_best] += 1 print("Best k: {}".format(k_best)) aps[ftype] = eval.average_precision(ygts, regions) print("AP: {}".format(aps[ftype])) if args.plot: plt.bar( np.array(list(best_k[ftype].keys())) - 0.5, list(best_k[ftype].values()), 1) plt.title(ftype) plt.show() all_ids += func_ids all_regions += regions all_gt += ygts print('-- Best k --') for ftype, counts in best_k.items():
aps['Z-Score'] = [] for detrending in aps.keys(): pipeline_key = 'none' if detrending == 'FT' else detrending for i, (method, mode) in enumerate(METHODS): sys.stderr.write('{}, {}, {}\n'.format(detrending, method, mode)) ygts = [] regions = [] for func in data: ygts.append(func['gt']) ts = preproc.deseasonalize_ft( func['ts']) if detrending == 'FT' else func['ts'] regions.append( libmaxdiv_wrapper.maxdiv_exec(ts, pipelines[pipeline_key][i], None)) aps[detrending].append(eval.average_precision(ygts, regions)) # Clean up for p in pipelines.values(): for pipeline in p: libmaxdiv_wrapper.libmaxdiv.maxdiv_free_pipeline(pipeline) # Print results header = 'Deseasonalization' for method, mode in METHODS: header += ';{} ({})'.format(method, mode) print(header) for detrending in aps.keys(): print('{};{}'.format(detrending, ';'.join('{}'.format(ap) for ap in aps[detrending])))
ygts.append(func['gt']) aucs.append( eval.auc(func['gt'], regions[-1], func['ts'].shape[1])) if preproc is None: if func['ts'].shape[1] not in times: times[func['ts'].shape[1]] = { m: [] for m in METHODS } times[func['ts'].shape[1]][method].append(time_stop - time_start) auc[id][ftype] = np.mean(aucs) auc_sd[id][ftype] = np.std(aucs) aps[id][ftype] = eval.average_precision(ygts, regions) all_gt[id] += ygts all_regions[id] += regions # Store test results on disk #with open('benchmark_results.pickle', 'wb') as fout: # pickle.dump({ 'auc' : auc, 'auc_sd' : auc_sd, 'aps' : aps, 'times' : times }, fout) if args.csv: # Print results as CSV print('--- AP ---\n') header = 'method' for ftype in canonical_order: if ftype in extremetypes:
import datasets # Parse parameters propmeth = sys.argv[1] if len(sys.argv) > 1 else 'hotellings_t' dataset = sys.argv[2] if len(sys.argv) > 2 else 'synthetic' extint_max_len = max(10, int(sys.argv[3])) if len(sys.argv) > 3 else 100 td_dim = max(1, int(sys.argv[4])) if len(sys.argv) > 4 else 1 td_lag = max(1, int(sys.argv[5])) if len(sys.argv) > 5 else 1 # Load test data data = datasets.loadDatasets(dataset, 'interval') # Try different thresholds for interval proposing regions = [] gts = [] for ftype in data: for func in data[ftype]: gts.append(func['gt']) ts = preproc.normalize_time_series(func['ts']) if td_dim > 1: ts = preproc.td(ts, td_dim, td_lag) regions.append( list( pointwiseRegionProposals(ts, method=propmeth, sd_th=-3.0, extint_min_len=10, extint_max_len=extint_max_len))) print('Overall AP: {}'.format(eval.average_precision(gts, regions)))
cur_regions = [] for func in data[ftype]: gts.append(func['gt']) cur_regions.append( maxdiv.maxdiv(func['ts'], method=METHOD, mode=MODE, preproc='normalize', td_dim=6, td_lag=2, num_intervals=None, extint_min_len=20, extint_max_len=100, proposals=propmeth, proposalparameters=propparams)) aps.append(eval.average_precision(gts, cur_regions)) ygts += gts regions += cur_regions ap[id][sd_th] = eval.average_precision(ygts, regions) mean_ap[id][sd_th] = np.mean(aps) # Print results as table hdiv_len = 5 + sum(len(lbl) + 3 for lbl in labels.values()) # length of horizontal divider print('\n-- Overall Average Precision --\n') print(' |' + '|'.join(' {} '.format(lbl) for lbl in labels.values())) print('{:-<{}s}'.format('', hdiv_len)) for sd_th in THS:
if len(truth) == 0: continue gt.append(truth) if len(files) == 1: printTruth(text, truth) # Extract features if args.feat == 'word2vec': feat = text2mat(text, Word2Vec.load(args.model)) elif args.feat == 'function_words': words = loadFunctionWords(args.wordlist) feat = wordFreq(text_sent, words) # Run detector start = time.time() intervals = maxdiv.maxdiv(feat, **parameters) stop = time.time() if args.feat == 'function_words': intervals = sentDet2wordDet(text_sent, intervals) detections.append(intervals) # Show results if len(files) == 1: printDetectedParagraphs(text, intervals) print( '\nThe search for anomalous paragraphs in a text of {} words took {} seconds.' .format(len(text), stop - start)) if len(files) > 1: ap = eval.average_precision(gt, detections, plot=True) print('AP: {}'.format(ap))