def main(): parser = ArgumentParser(description=__doc__) parser.add_argument('res', help='Experiment result (res) file.') parser.add_argument('--plot', required=True, nargs='*', help='Where to save plot.') args = parser.parse_args() print('Loading previous results [{}]'.format(args.res)) input = pickle.load(open(args.res, 'rb')) avstatsl = input['avstats'] print('Averaging results') for i in range(1, len(avstatsl)): for av in set(avstatsl[0].keys() + avstatsl[i].keys()): avstatsl[0][av] += avstatsl[i][av] for av in avstatsl[0].keys(): avstatsl[0][av] /= float(len(avstatsl)) avstats = avstatsl[0] print('Plotting antivirus detection statistics') plots.init_eurasip_style(figure_width=222.5, horizontal=False) plots.plot_avstats(avstats, args.plot) return 0
def main(): stats = ['OLD', 'Add', 'Del', 'Same', 'NEW'] metric_names = { 'Add': 'New features', 'Del': 'Obsolete features', 'Same': 'Unchanged features' } parser = ArgumentParser(description=__doc__) parser.add_argument('--first', nargs='+', required=True, help='Feature files of the first method.') parser.add_argument('--second', nargs='+', required=True, help='Feature files of the second method.') parser.add_argument('--methods', nargs='+', required=True, help='Names of all methods, in the same ' 'order as result files.') parser.add_argument('--metrics', choices=metric_names.keys(), nargs='+', required=True, help='Which metrics to compare on.') parser.add_argument('--legend', default=False, help='Where to put legend.') parser.add_argument('--plot', required=True, nargs='+', help='Where to save plot.') args = parser.parse_args() dd = OrderedDict() for feats, method in zip((args.first, args.second), args.methods): data = [] for f1, f2 in zip(feats[:-1], feats[1:]): l1, l2 = get_feats(f1), get_feats(f2) tot_old = len(l1) tot_new = len(l2) s1, s2 = set(l1), set(l2) add = len(s2.difference(s1)) rem = len(s1.difference(s2)) same = len(s1.intersection(s2)) data.append([tot_old, add, rem, same, tot_new]) dd[method] = dict(zip(stats, [numpy.array(d) for d in zip(*data)])) for m, d in dd.iteritems(): print('{:#^79s}'.format(' {} '.format(m))) print_table(d, stats) print() # Plot setup plots.init_eurasip_style(figure_width=222.5, figure_height=265.0, horizontal=len(args.metrics) < 2) datas = [] for metric in args.metrics: datas.append([ dd[method][metric].astype(numpy.float32) / dd[method][stats[0]] for method in args.methods ]) ylabels = [metric_names[msn] for msn in args.metrics] xticklabels = map(str, range(2, len(args.first) + 1)) plots.sorted_multicomparison(datas=datas, methods=args.methods, legend=args.legend, ylabels=ylabels, xlabel='Retraining period', xticklabels=xticklabels, plotfs=args.plot, autofmt_xdate=False) return 0
def main(): stats = ['OLD', 'Add', 'Del', 'Same', 'NEW'] metric_names = {'Add': 'New features', 'Del': 'Obsolete features', 'Same': 'Unchanged features'} parser = ArgumentParser(description=__doc__) parser.add_argument('--first', nargs='+', required=True, help='Feature files of the first method.') parser.add_argument('--second', nargs='+', required=True, help='Feature files of the second method.') parser.add_argument('--methods', nargs='+', required=True, help='Names of all methods, in the same ' 'order as result files.') parser.add_argument('--metrics', choices=metric_names.keys(), nargs='+', required=True, help='Which metrics to compare on.') parser.add_argument('--legend', default=False, help='Where to put legend.') parser.add_argument('--plot', required=True, nargs='+', help='Where to save plot.') args = parser.parse_args() dd = OrderedDict() for feats, method in zip((args.first, args.second), args.methods): data = [] for f1, f2 in zip(feats[:-1], feats[1:]): l1, l2 = get_feats(f1), get_feats(f2) tot_old = len(l1) tot_new = len(l2) s1, s2 = set(l1), set(l2) add = len(s2.difference(s1)) rem = len(s1.difference(s2)) same = len(s1.intersection(s2)) data.append([tot_old, add, rem, same, tot_new]) dd[method] = dict(zip(stats, [numpy.array(d) for d in zip(*data)])) for m, d in dd.iteritems(): print('{:#^79s}'.format(' {} '.format(m))) print_table(d, stats) print() # Plot setup plots.init_eurasip_style(figure_width=222.5, figure_height=265.0, horizontal=len(args.metrics) < 2) datas = [] for metric in args.metrics: datas.append([dd[method][metric].astype(numpy.float32) / dd[method][stats[0]] for method in args.methods]) ylabels = [metric_names[msn] for msn in args.metrics] xticklabels = map(str, range(2, len(args.first) + 1)) plots.sorted_multicomparison(datas=datas, methods=args.methods, legend=args.legend, ylabels=ylabels, xlabel='Retraining period', xticklabels=xticklabels, plotfs=args.plot, autofmt_xdate=False) return 0
def main(): metric_names = {'neg_tr': 'Benign training', 'pos_tr': 'Malicious training', 'neg_te': 'Benign evaluation', 'pos_te': 'Malicious evaluation', 'acc': 'Accuracy', 'AUC': 'Area under ROC', 'TPR': 'True positive rate', 'FPR': 'False positive rate'} parser = ArgumentParser(description=__doc__) parser.add_argument('--res', nargs='+', required=True, help='Result files of all methods.') parser.add_argument('--methods', nargs='+', required=True, help='Names of all methods, in the same ' 'order as result files.') parser.add_argument('--metrics', choices=metric_names.keys(), nargs='+', required=True, help='Which metrics to compare on.') parser.add_argument('-l', '--log', action='store_true', help='Plot y-axis using log scale.') parser.add_argument('--legend', default=False, help='Where to put legend.') parser.add_argument('--plot', required=True, nargs='*', help='Where to save plot(s).') args = parser.parse_args() assert len(args.res) == len(args.methods), ('There must be an equal ' 'number of result and ' 'TP files') methods = {} key_dates = [] stats = [] for res_f, method in zip(args.res, args.methods): print('Loading results for method {} [{}]'.format(method, res_f)) input = pickle.load(open(res_f, 'rb')) resl = input['res'] avstatsl = input['avstats'] key_dates = input['key_dates'] stats = input['stats'] print('Averaging results', end='\n\n') means = numpy.mean(resl, axis=0) means = means.reshape((len(means) / len(stats), len(stats))) neg_tr, pos_tr, neg_te, pos_te, acc, AUC, TPR, FPR = zip(*means) for i in range(1, len(avstatsl)): for av in set(avstatsl[0].keys() + avstatsl[i].keys()): avstatsl[0][av] += avstatsl[i][av] for av in avstatsl[0].keys(): avstatsl[0][av] /= float(len(avstatsl)) avstats = avstatsl[0] methods[method] = {'res': dict(zip(stats, zip(*means))), 'avstats': avstats} print('Dates ranging from {} to {}'.format(key_dates[0], key_dates[-1])) print('Total days: {}'.format((key_dates[-1] - key_dates[0]).days + 1)) # Plot setup plots.init_eurasip_style(figure_width=222.5, horizontal=len(args.metrics) < 2) ylabels = [metric_names[msn] for msn in args.metrics] xticklabels = [d.strftime('%b %d') for d in key_dates] years_range = sorted(set([d.strftime('%Y') for d in key_dates])) if len(years_range) > 2: years_range = [years_range[0], years_range[-1]] xlabel = 'Date ({})'.format(' - '.join(years_range)) datas = [] for metric in args.metrics: datas.append([methods[method]['res'][metric] for method in args.methods]) plots.sorted_multicomparison(datas=datas, methods=args.methods, legend=args.legend, ylabels=ylabels, xlabel=xlabel, xticklabels=xticklabels, plotfs=args.plot, autofmt_xdate=True) return 0
def main(): parser = ArgumentParser(description=__doc__) parser.add_argument('--train', nargs='+', required=True, help='Training data file(s).') parser.add_argument('--test', nargs='+', required=True, help='Test data file(s).') parser.add_argument('-l', '--log', action='store_true', help='X-axis log scale.') parser.add_argument('--legend', default=False, help='Where to put legend.') parser.add_argument('--data-plot', required=True, nargs='*', help='Where to save data quantity plot.') args = parser.parse_args() print('\nEvaluating data in time periods') res = [] key_dates = [] all_years = set() for w, (f_tr, f_te) in enumerate(zip(args.train, args.test), start=1): # Load test data y_te = load_libsvm_labels(f_te) pos_te, neg_te = (y_te > 0.5).sum(), (y_te < 0.5).sum() # Load test dates dates = numpy.array(load_dates(f_te)) week_s, week_e = dates.min(), dates.max() key_dates.append(week_s) print('Period {} [{} - {}]'.format(w, week_s, week_e)) all_years.add(str(week_s.year)) # Load training data y_tr = load_libsvm_labels(f_tr) pos_tr, neg_tr = (y_tr > 0.5).sum(), (y_tr < 0.5).sum() print('Training: {} malicious, {} benign'.format(pos_tr, neg_tr)) print('Test: {} malicious, {} benign'.format(pos_te, neg_te), end='\n\n') res.append((pos_tr, neg_tr, pos_te, neg_te)) pos_tr, neg_tr, pos_te, neg_te = zip(*res) print('Dates ranging from {} to {}'.format(key_dates[0], key_dates[-1])) print('Total days: {}'.format((key_dates[-1] - key_dates[0]).days + 1)) print('Plotting training and test sizes') bar_width = 0.35 spacing = 0.05 # spacing between a pair of training/test bars xticks = numpy.arange(len(pos_tr)).astype(numpy.float32) # Plot plots.init_eurasip_style(figure_width=222.5, figure_height=170.0) fig = pylab.figure() ax = pylab.gca() ax.bar(xticks - bar_width - spacing, neg_tr, width=bar_width, color='#00691f', linewidth=0, label='Benign training') ax.bar(xticks - bar_width - spacing, pos_tr, bottom=neg_tr, width=bar_width, color='#a50007', linewidth=0, label='Malicious training') ax.bar(xticks + spacing, neg_te, width=bar_width, color='#67bc6b', linewidth=0, label='Benign evaluation') ax.bar(xticks + spacing, pos_te, bottom=neg_te, width=bar_width, color='#ff767d', linewidth=0, label='Malicious evaluation') # Set up x axis ax.set_xticks(xticks) ax.set_xticklabels([d.strftime('%b %d') for d in key_dates]) ax.set_xlim((-2.0 * spacing - bar_width, len(pos_tr) - 1 + 2.0 * spacing + bar_width)) years_range = sorted(all_years) if len(years_range) > 2: years_range = [years_range[0], years_range[-1]] ax.set_xlabel('Date ({})'.format(' - '.join(years_range))) fig.autofmt_xdate() # Set up y axis pylab.ticklabel_format(axis='y', style='sci', scilimits=(0, 2), useOffset=False) ax.yaxis.grid() # vertical grid lines ax.set_axisbelow(True) # grid lines are behind the rest if args.log: ax.set_yscale('log') ax.set_ylabel('Samples') # Set up legend legend_loc = args.legend if args.legend else 'best' if legend_loc != 'none': pylab.legend(loc=legend_loc, fancybox=True, framealpha=0.5) # Finalize plot setup pylab.tight_layout(pad=0.5, h_pad=0.5, w_pad=0.5, rect=(0, 0, 1, 1)) for plot_file in args.data_plot: pylab.savefig(plot_file) return 0