Exemple #1
0
def filter(g,
           traceroutes,
           filters=['sh', 'loop', 'ex', 'vf', 'lp'],
           first_edge=True):

    logger.info('Traceroutes: %d', len(traceroutes))
    # remove empty traces
    traceroutes = [x for x in traceroutes if len(x) > 0]
    logger.info('Non empty traceroutes: %d', (len(traceroutes)))
    traceroutes = [x for x in traceroutes if len(x) > 1]
    logger.info('Larger than one hop traceroutes: %d', (len(traceroutes)))
    # remove traces with unknown nodes
    traceroutes, _ = vft.trace_clean(g, traceroutes)
    logger.info('Ignored: %d', _)
    traceroutes = vft.trace_in_vertex_id(g, traceroutes)
    logger.info('Trace count: %d', len(traceroutes))
    progress = progressbar1.AnimatedProgressBar(end=len(traceroutes), width=15)

    good_traceroutes = traceroutes[:]
    if 'sh' in filters:
        logger.debug('Remove short traces')
        good_traceroutes = [x for x in good_traceroutes if len(x) >= 3]
        logger.debug('Remained: %d', len(good_traceroutes))

    if 'loop' in filters:
        logger.debug('Remove traces with loops')
        good_traceroutes = [
            x for x in good_traceroutes if len(set(x)) == len(x)
        ]
        logger.debug('Remained: %d' % len(good_traceroutes))

    if 'ex' in filters:
        logger.debug('Remove non existent traces')
        good_traceroutes = [
            x for x in good_traceroutes if vft.trace_exists(g, x)
        ]
        logger.debug('Remained: %d', len(good_traceroutes))

    if 'vf' in filters:
        logger.debug('Remove non vf traces')
        good_traceroutes = [
            x for x in good_traceroutes if vft.is_valley_free(g, x)
        ]
        logger.debug('Remained: %d' % len(good_traceroutes))

    if 'lp' in filters:
        logger.debug('Remove non lp traces')
        good_traceroutes = [
            x for x in good_traceroutes
            if vft.is_local_preferenced(g, x, first_edge=first_edge)
        ]
        logger.debug('Remained: %d' % len(good_traceroutes))

    # convert back node ids to node names

    good_traceroutes = [[g.vs[id]["name"] for id in trace]
                        for trace in good_traceroutes]
    logger.debug(len(good_traceroutes))

    return good_traceroutes
    def test_trace_clean(self):
        traceroutes = [['N0', 'N5', 'N4', 'N6'], ['NFAKE', 'N4', 'N5'],
                       ['N5', 'N5', 'N4', 'N5'], ['N5', 'N2', 'N4']]

        cleaned_traces, ignored = vft.trace_clean(self.sample_graph,
                                                  traceroutes)
        self.assertEqual(len(cleaned_traces), 3)
        self.assertEqual(ignored, 1)
        self.assertIn(['N0', 'N5', 'N4', 'N6'], cleaned_traces)
        self.assertIn(['N5', 'N5', 'N4', 'N5'], cleaned_traces)
        self.assertIn(['N5', 'N2', 'N4'], cleaned_traces)
Exemple #3
0
def purify(g, meta, out, count=1000):
    results = list()
    results2 = list()
    results3 = list()
    all_vf = 0
    all_nonvf = 0
    all_vf_closeness = 0
    all_nonvf_closeness = 0

    short_results = list()
    short_results2 = list()
    short_results3 = list()
    all_short_vf = 0
    all_short_nonvf = 0
    all_short_vf_closeness = 0
    all_short_nonvf_closeness = 0

    long_results = list()
    long_results2 = list()
    long_results3 = list()
    all_long_vf = 0
    all_long_nonvf = 0
    all_long_vf_closeness = 0
    all_long_nonvf_closeness = 0

    # remove traces with already calculated all_path
    logger.warn('[r]ONLY NOT FILLED PATHS[/]')
    meta = [x for x in meta if not helpers.ALL_PATH_COUNT in x]

    # traces with a maximum stretch
    logger.warn('[r]!!!ONLY WITH LOW STRETCH[/]')
    meta = [x for x in meta if x[helpers.STRETCH] < 4]

    # shorter meta records
    logger.warn('[r]!!!ONLY SHORT TRACES[/]')
    meta = [x for x in meta if len(x[helpers.TRACE]) < 5]

    meta_map = {tuple(x[helpers.TRACE]): x for x in meta}

    # traceroutes = [x for x in meta if x[TRACE_LEN] == x[SH_LEN]]
    logger.info('All trace count: %d' % len(meta))
    tr_count = min(len(meta), count)
    meta = random.sample(meta, tr_count)
    logger.info('Chosen trace count: %d' % len(meta))

    real_vf = [x for x in meta if x[helpers.IS_VF] == 1]
    real_nonvf = [x for x in meta if x[helpers.IS_VF] == 0]

    real_vf_closeness = [x for x in meta if x[helpers.IS_VF_CLOSENESS] == 1]
    real_nonvf_closeness = [x for x in meta if x[helpers.IS_VF_CLOSENESS] == 0]

    logger.info('Real vf: %f[%d]' % ((len(real_vf)/float(len(meta)), len(real_vf))))
    logger.info('Real nonvf: %f[%d]' % ((len(real_nonvf)/float(len(meta)), len(real_nonvf))))

    logger.info('Real vf closeness: %f[%d]' % ((len(real_vf_closeness)/float(len(meta)), len(real_vf_closeness))))
    logger.info('Real nonvf closeness: %f[%d]' % ((len(real_nonvf_closeness)/float(len(meta)), len(real_nonvf_closeness))))

    logger.info('Remove unknown traces. Trace count before: %d' % len(meta))
    traceroutes = [x[helpers.TRACE] for x in meta]
    traceroutes, ignored = vft.trace_clean(g, traceroutes)
    logger.info('Traceroutes after: %d. Ignored: %d' % (len(traceroutes), ignored))

    traceroutes = vft.trace_in_vertex_id(g, traceroutes)

    progress = progressbar1.AnimatedProgressBar(end=len(traceroutes), width=15)
    for trace in traceroutes:
        progress += 1
        progress.show_progress()

        for x in range(0, g.vcount()):
            g.vs[x]['traces'] = dict()

        s, t = trace[0], trace[-1]
        sh_path = g.get_all_shortest_paths(s, t, mode=i.OUT)
        all_path = helpers.dfs_mark(copy.deepcopy(g), s, t, len(trace))

        # if len(sh_path) != len(all_path):
        #     print len(sh_path)
        #     print len(all_path)
        #     print s, t

        # sanity check
        for x in all_path:
            if x[0] != s or x[-1] != t:
                logger.error('ALERT')
        if len(set([tuple(x) for x in all_path])) != len(all_path):
            logger.error('LENGTH ALERT')
            logger.error('%s' % len(all_path))
            logger.error('%s' % len(set([tuple(x) for x in all_path])))
            logger.error('%s' % sorted(all_path))

        long_path = [x for x in all_path if len(x) == len(trace)]
        short_path = [x for x in all_path if len(x) < len(trace)]

        named_trace = [g.vs[x]['name'] for x in trace]
        extra_meta = {
            helpers.ALL_PATH_COUNT: len(all_path),
            helpers.SAME_LONG_PATH_COUNT: len(long_path),
            helpers.SHORTER_PATH_COUNT: len(short_path)
        }
        meta_map[tuple(named_trace)].update(extra_meta)

        vf_count = sum([1 if vft.is_valley_free(g, x, vfmode=vft.PRELABELED) else 0 for x in all_path])
        nonvf = len(all_path) - vf_count

        vf_closeness_count = sum([1 if vft.is_valley_free(g, x, vfmode=vft.CLOSENESS) else 0 for x in all_path])
        nonvf_closeness = len(all_path) - vf_closeness_count

        tmp = [1 if vft.is_valley_free(g, x, vfmode=vft.PRELABELED) else 0 for x in short_path]
        short_vf_count = sum(tmp)
        short_nonvf = len(tmp) - short_vf_count

        tmp = [1 if vft.is_valley_free(g, x, vfmode=vft.CLOSENESS) else 0 for x in short_path]
        short_vf_closeness_count = sum(tmp)
        short_nonvf_closeness = len(tmp) - short_vf_closeness_count

        tmp = [1 if vft.is_valley_free(g, x, vfmode=vft.PRELABELED) else 0 for x in long_path]
        long_vf_count = sum(tmp)
        long_nonvf = len(tmp) - long_vf_count

        tmp = [1 if vft.is_valley_free(g, x, vfmode=vft.CLOSENESS) else 0 for x in long_path]
        long_vf_closeness_count = sum(tmp)
        long_nonvf_closeness = len(tmp) - long_vf_closeness_count

        extra_meta = {
            helpers.ALL_PATH_VF_COUNT: vf_closeness_count,
            helpers.SAME_LONG_PATH_VF_COUNT: long_vf_closeness_count,
            helpers.SHORTER_PATH_VF_COUNT: short_vf_closeness_count
        }
        meta_map[tuple(named_trace)].update(extra_meta)

        all_vf += vf_count
        all_nonvf += nonvf

        all_vf_closeness += vf_closeness_count
        all_nonvf_closeness += nonvf_closeness

        all_long_vf += long_vf_count
        all_long_nonvf += long_nonvf

        all_long_vf_closeness += long_vf_closeness_count
        all_long_nonvf_closeness += long_nonvf_closeness

        all_short_vf += short_vf_count
        all_short_nonvf += short_nonvf

        all_short_vf_closeness += short_vf_closeness_count
        all_short_nonvf_closeness += short_nonvf_closeness

        results.append(vf_count / float(len(all_path)))
        results3.append(vf_closeness_count / float(len(all_path)))
        if len(all_path) > 1: results2.append(vf_count / float(len(all_path)))

        long_results.append(long_vf_count / float(len(long_path)))
        long_results3.append(long_vf_closeness_count / float(len(long_path)))
        if len(long_path) > 1: long_results2.append(long_vf_count / float(len(long_path)))

        if len(short_path) > 0:
            short_results.append(short_vf_count / float(len(short_path)))
            short_results3.append(short_vf_closeness_count / float(len(short_path)))
        else:
            pass
            # short_results.append(0)
            # short_results3.append(0)
        if len(short_path) > 1: short_results2.append(short_vf_count / float(len(short_path)))

    # save mofified meta
    meta_mod = [x for x in meta_map.itervalues()]
    helpers.save_to_json(out, meta_mod)

    # print results
    print 'ALL'
    print 'VF count: %d' % all_vf
    print 'VF CLOSENESS count: %d' % all_vf_closeness
    print 'Non vf count: %d' % all_nonvf
    print 'Non vf CLOSENESS count: %d' % all_nonvf_closeness
    print 'VF perc: %f' % (all_vf/float(all_vf + all_nonvf))
    print 'VF CLOSENESS perc: %f' % (all_vf_closeness/float(all_vf_closeness + all_nonvf_closeness))
    print 'Mean VF prob: %f' % np.mean(results)
    print 'Mean VF CLOSENESS prob: %f' % np.mean(results3)
    print 'Mean VF2 prob: %f' % np.mean(results2)
    print '=========='
    print 'SHORT'
    print 'VF count: %d' % all_short_vf
    print 'VF  CLOSENESS count: %d' % all_short_vf_closeness
    print 'Non vf count: %d' % all_short_nonvf
    print 'Non vf CLOSENESS count: %d' % all_short_nonvf_closeness
    if all_short_vf + all_short_nonvf > 0:
        print 'VF perc: %f' % (all_short_vf/float(all_short_vf + all_short_nonvf))
    if all_short_vf_closeness + all_short_nonvf_closeness > 0:
        print 'VF CLOSENESS perc: %f' % (all_short_vf_closeness/float(all_short_vf_closeness + all_short_nonvf_closeness))
    print 'Mean VF prob: %f' % np.mean(short_results)
    print 'Mean VF CLOSENESS prob: %f' % np.mean(short_results3)
    print 'Mean VF2 prob: %f' % np.mean(short_results2)
    print '=-----------------'
    print 'LONG'
    print 'VF count: %d' % all_long_vf
    print 'VF CLOSENESS count: %d' % all_long_vf_closeness
    print 'Non vf count: %d' % all_long_nonvf
    print 'Non vf CLOSENESS count: %d' % all_long_nonvf_closeness
    print 'VF perc: %f' % (all_long_vf/float(all_long_vf + all_long_nonvf))
    print 'VF CLOSENESS perc: %f' % (all_long_vf_closeness/float(all_long_vf_closeness + all_long_nonvf_closeness))
    print 'Mean VF prob: %f' % np.mean(long_results)
    print 'Mean VF CLOSENESS prob: %f' % np.mean(long_results3)
    print 'Mean VF2 prob: %f' % np.mean(long_results2)