コード例 #1
0
ファイル: avstats.py プロジェクト: srndic/hidost-reproduction
def main():
    parser = ArgumentParser(description=__doc__)
    parser.add_argument('res', help='Experiment result (res) file.')
    parser.add_argument('--plot',
                        required=True,
                        nargs='*',
                        help='Where to save plot.')

    args = parser.parse_args()

    print('Loading previous results [{}]'.format(args.res))
    input = pickle.load(open(args.res, 'rb'))
    avstatsl = input['avstats']

    print('Averaging results')
    for i in range(1, len(avstatsl)):
        for av in set(avstatsl[0].keys() + avstatsl[i].keys()):
            avstatsl[0][av] += avstatsl[i][av]
    for av in avstatsl[0].keys():
        avstatsl[0][av] /= float(len(avstatsl))
    avstats = avstatsl[0]

    print('Plotting antivirus detection statistics')
    plots.init_eurasip_style(figure_width=222.5, horizontal=False)
    plots.plot_avstats(avstats, args.plot)
    return 0
コード例 #2
0
def main():
    stats = ['OLD', 'Add', 'Del', 'Same', 'NEW']
    metric_names = {
        'Add': 'New features',
        'Del': 'Obsolete features',
        'Same': 'Unchanged features'
    }
    parser = ArgumentParser(description=__doc__)
    parser.add_argument('--first',
                        nargs='+',
                        required=True,
                        help='Feature files of the first method.')
    parser.add_argument('--second',
                        nargs='+',
                        required=True,
                        help='Feature files of the second method.')
    parser.add_argument('--methods',
                        nargs='+',
                        required=True,
                        help='Names of all methods, in the same '
                        'order as result files.')
    parser.add_argument('--metrics',
                        choices=metric_names.keys(),
                        nargs='+',
                        required=True,
                        help='Which metrics to compare on.')
    parser.add_argument('--legend', default=False, help='Where to put legend.')
    parser.add_argument('--plot',
                        required=True,
                        nargs='+',
                        help='Where to save plot.')
    args = parser.parse_args()

    dd = OrderedDict()
    for feats, method in zip((args.first, args.second), args.methods):
        data = []
        for f1, f2 in zip(feats[:-1], feats[1:]):
            l1, l2 = get_feats(f1), get_feats(f2)
            tot_old = len(l1)
            tot_new = len(l2)
            s1, s2 = set(l1), set(l2)
            add = len(s2.difference(s1))
            rem = len(s1.difference(s2))
            same = len(s1.intersection(s2))
            data.append([tot_old, add, rem, same, tot_new])
        dd[method] = dict(zip(stats, [numpy.array(d) for d in zip(*data)]))

    for m, d in dd.iteritems():
        print('{:#^79s}'.format(' {} '.format(m)))
        print_table(d, stats)
        print()

    # Plot setup
    plots.init_eurasip_style(figure_width=222.5,
                             figure_height=265.0,
                             horizontal=len(args.metrics) < 2)
    datas = []
    for metric in args.metrics:
        datas.append([
            dd[method][metric].astype(numpy.float32) / dd[method][stats[0]]
            for method in args.methods
        ])
    ylabels = [metric_names[msn] for msn in args.metrics]
    xticklabels = map(str, range(2, len(args.first) + 1))

    plots.sorted_multicomparison(datas=datas,
                                 methods=args.methods,
                                 legend=args.legend,
                                 ylabels=ylabels,
                                 xlabel='Retraining period',
                                 xticklabels=xticklabels,
                                 plotfs=args.plot,
                                 autofmt_xdate=False)
    return 0
コード例 #3
0
def main():
    stats = ['OLD', 'Add', 'Del', 'Same', 'NEW']
    metric_names = {'Add': 'New features',
                    'Del': 'Obsolete features',
                    'Same': 'Unchanged features'}
    parser = ArgumentParser(description=__doc__)
    parser.add_argument('--first',
                        nargs='+',
                        required=True,
                        help='Feature files of the first method.')
    parser.add_argument('--second',
                        nargs='+',
                        required=True,
                        help='Feature files of the second method.')
    parser.add_argument('--methods',
                        nargs='+',
                        required=True,
                        help='Names of all methods, in the same '
                        'order as result files.')
    parser.add_argument('--metrics',
                        choices=metric_names.keys(),
                        nargs='+',
                        required=True,
                        help='Which metrics to compare on.')
    parser.add_argument('--legend',
                        default=False,
                        help='Where to put legend.')
    parser.add_argument('--plot',
                        required=True,
                        nargs='+',
                        help='Where to save plot.')
    args = parser.parse_args()

    dd = OrderedDict()
    for feats, method in zip((args.first, args.second), args.methods):
        data = []
        for f1, f2 in zip(feats[:-1], feats[1:]):
            l1, l2 = get_feats(f1), get_feats(f2)
            tot_old = len(l1)
            tot_new = len(l2)
            s1, s2 = set(l1), set(l2)
            add = len(s2.difference(s1))
            rem = len(s1.difference(s2))
            same = len(s1.intersection(s2))
            data.append([tot_old, add, rem, same, tot_new])
        dd[method] = dict(zip(stats, [numpy.array(d) for d in zip(*data)]))

    for m, d in dd.iteritems():
        print('{:#^79s}'.format(' {} '.format(m)))
        print_table(d, stats)
        print()

    # Plot setup
    plots.init_eurasip_style(figure_width=222.5,
                         figure_height=265.0,
                         horizontal=len(args.metrics) < 2)
    datas = []
    for metric in args.metrics:
        datas.append([dd[method][metric].astype(numpy.float32) /
                      dd[method][stats[0]]
                      for method in args.methods])
    ylabels = [metric_names[msn] for msn in args.metrics]
    xticklabels = map(str, range(2, len(args.first) + 1))

    plots.sorted_multicomparison(datas=datas,
                                 methods=args.methods,
                                 legend=args.legend,
                                 ylabels=ylabels,
                                 xlabel='Retraining period',
                                 xticklabels=xticklabels,
                                 plotfs=args.plot,
                                 autofmt_xdate=False)
    return 0
コード例 #4
0
def main():
    metric_names = {'neg_tr': 'Benign training',
                    'pos_tr': 'Malicious training',
                    'neg_te': 'Benign evaluation',
                    'pos_te': 'Malicious evaluation',
                    'acc': 'Accuracy',
                    'AUC': 'Area under ROC',
                    'TPR': 'True positive rate',
                    'FPR': 'False positive rate'}
    parser = ArgumentParser(description=__doc__)
    parser.add_argument('--res',
                        nargs='+',
                        required=True,
                        help='Result files of all methods.')
    parser.add_argument('--methods',
                        nargs='+',
                        required=True,
                        help='Names of all methods, in the same '
                        'order as result files.')
    parser.add_argument('--metrics',
                        choices=metric_names.keys(),
                        nargs='+',
                        required=True,
                        help='Which metrics to compare on.')
    parser.add_argument('-l', '--log',
                        action='store_true',
                        help='Plot y-axis using log scale.')
    parser.add_argument('--legend',
                        default=False,
                        help='Where to put legend.')
    parser.add_argument('--plot',
                        required=True,
                        nargs='*',
                        help='Where to save plot(s).')

    args = parser.parse_args()

    assert len(args.res) == len(args.methods), ('There must be an equal '
                                                'number of result and '
                                                'TP files')

    methods = {}
    key_dates = []
    stats = []
    for res_f, method in zip(args.res, args.methods):
        print('Loading results for method {} [{}]'.format(method, res_f))
        input = pickle.load(open(res_f, 'rb'))
        resl = input['res']
        avstatsl = input['avstats']
        key_dates = input['key_dates']
        stats = input['stats']

        print('Averaging results', end='\n\n')
        means = numpy.mean(resl, axis=0)
        means = means.reshape((len(means) / len(stats), len(stats)))
        neg_tr, pos_tr, neg_te, pos_te, acc, AUC, TPR, FPR = zip(*means)
        for i in range(1, len(avstatsl)):
            for av in set(avstatsl[0].keys() + avstatsl[i].keys()):
                avstatsl[0][av] += avstatsl[i][av]
        for av in avstatsl[0].keys():
            avstatsl[0][av] /= float(len(avstatsl))
        avstats = avstatsl[0]
        methods[method] = {'res': dict(zip(stats, zip(*means))),
                           'avstats': avstats}

    print('Dates ranging from {} to {}'.format(key_dates[0], key_dates[-1]))
    print('Total days: {}'.format((key_dates[-1] - key_dates[0]).days + 1))

    # Plot setup
    plots.init_eurasip_style(figure_width=222.5, horizontal=len(args.metrics) < 2)
    ylabels = [metric_names[msn] for msn in args.metrics]
    xticklabels = [d.strftime('%b %d') for d in key_dates]
    years_range = sorted(set([d.strftime('%Y') for d in key_dates]))
    if len(years_range) > 2:
        years_range = [years_range[0], years_range[-1]]
    xlabel = 'Date ({})'.format(' - '.join(years_range))
    datas = []
    for metric in args.metrics:
        datas.append([methods[method]['res'][metric]
                      for method in args.methods])

    plots.sorted_multicomparison(datas=datas,
                                 methods=args.methods,
                                 legend=args.legend,
                                 ylabels=ylabels,
                                 xlabel=xlabel,
                                 xticklabels=xticklabels,
                                 plotfs=args.plot,
                                 autofmt_xdate=True)
    return 0
コード例 #5
0
def main():
    parser = ArgumentParser(description=__doc__)
    parser.add_argument('--train',
                        nargs='+',
                        required=True,
                        help='Training data file(s).')
    parser.add_argument('--test',
                        nargs='+',
                        required=True,
                        help='Test data file(s).')
    parser.add_argument('-l', '--log',
                        action='store_true',
                        help='X-axis log scale.')
    parser.add_argument('--legend',
                        default=False,
                        help='Where to put legend.')
    parser.add_argument('--data-plot',
                        required=True,
                        nargs='*',
                        help='Where to save data quantity plot.')

    args = parser.parse_args()

    print('\nEvaluating data in time periods')
    res = []
    key_dates = []
    all_years = set()
    for w, (f_tr, f_te) in enumerate(zip(args.train, args.test), start=1):
        # Load test data
        y_te = load_libsvm_labels(f_te)
        pos_te, neg_te = (y_te > 0.5).sum(), (y_te < 0.5).sum()

        # Load test dates
        dates = numpy.array(load_dates(f_te))
        week_s, week_e = dates.min(), dates.max()
        key_dates.append(week_s)
        print('Period {} [{} - {}]'.format(w, week_s, week_e))
        all_years.add(str(week_s.year))

        # Load training data
        y_tr = load_libsvm_labels(f_tr)
        pos_tr, neg_tr = (y_tr > 0.5).sum(), (y_tr < 0.5).sum()

        print('Training: {} malicious, {} benign'.format(pos_tr, neg_tr))
        print('Test: {} malicious, {} benign'.format(pos_te, neg_te),
              end='\n\n')
        res.append((pos_tr, neg_tr, pos_te, neg_te))

    pos_tr, neg_tr, pos_te, neg_te = zip(*res)
    print('Dates ranging from {} to {}'.format(key_dates[0], key_dates[-1]))
    print('Total days: {}'.format((key_dates[-1] - key_dates[0]).days + 1))

    print('Plotting training and test sizes')
    bar_width = 0.35
    spacing = 0.05  # spacing between a pair of training/test bars
    xticks = numpy.arange(len(pos_tr)).astype(numpy.float32)

    # Plot
    plots.init_eurasip_style(figure_width=222.5, figure_height=170.0)
    fig = pylab.figure()
    ax = pylab.gca()
    ax.bar(xticks - bar_width - spacing, neg_tr, width=bar_width,
           color='#00691f', linewidth=0, label='Benign training')
    ax.bar(xticks - bar_width - spacing, pos_tr, bottom=neg_tr,
           width=bar_width, color='#a50007', linewidth=0,
           label='Malicious training')
    ax.bar(xticks + spacing, neg_te, width=bar_width, color='#67bc6b',
           linewidth=0, label='Benign evaluation')
    ax.bar(xticks + spacing, pos_te, bottom=neg_te, width=bar_width,
           color='#ff767d', linewidth=0, label='Malicious evaluation')

    # Set up x axis
    ax.set_xticks(xticks)
    ax.set_xticklabels([d.strftime('%b %d') for d in key_dates])
    ax.set_xlim((-2.0 * spacing - bar_width,
                 len(pos_tr) - 1 + 2.0 * spacing + bar_width))
    years_range = sorted(all_years)
    if len(years_range) > 2:
        years_range = [years_range[0], years_range[-1]]
    ax.set_xlabel('Date ({})'.format(' - '.join(years_range)))
    fig.autofmt_xdate()

    # Set up y axis
    pylab.ticklabel_format(axis='y', style='sci', scilimits=(0, 2),
                           useOffset=False)
    ax.yaxis.grid()  # vertical grid lines
    ax.set_axisbelow(True)  # grid lines are behind the rest
    if args.log:
        ax.set_yscale('log')
    ax.set_ylabel('Samples')

    # Set up legend
    legend_loc = args.legend if args.legend else 'best'
    if legend_loc != 'none':
        pylab.legend(loc=legend_loc, fancybox=True, framealpha=0.5)

    # Finalize plot setup
    pylab.tight_layout(pad=0.5, h_pad=0.5, w_pad=0.5, rect=(0, 0, 1, 1))
    for plot_file in args.data_plot:
        pylab.savefig(plot_file)

    return 0