Exemple #1
0
def filter(g,
           traceroutes,
           filters=['sh', 'loop', 'ex', 'vf', 'lp'],
           first_edge=True):

    logger.info('Traceroutes: %d', len(traceroutes))
    # remove empty traces
    traceroutes = [x for x in traceroutes if len(x) > 0]
    logger.info('Non empty traceroutes: %d', (len(traceroutes)))
    traceroutes = [x for x in traceroutes if len(x) > 1]
    logger.info('Larger than one hop traceroutes: %d', (len(traceroutes)))
    # remove traces with unknown nodes
    traceroutes, _ = vft.trace_clean(g, traceroutes)
    logger.info('Ignored: %d', _)
    traceroutes = vft.trace_in_vertex_id(g, traceroutes)
    logger.info('Trace count: %d', len(traceroutes))
    progress = progressbar1.AnimatedProgressBar(end=len(traceroutes), width=15)

    good_traceroutes = traceroutes[:]
    if 'sh' in filters:
        logger.debug('Remove short traces')
        good_traceroutes = [x for x in good_traceroutes if len(x) >= 3]
        logger.debug('Remained: %d', len(good_traceroutes))

    if 'loop' in filters:
        logger.debug('Remove traces with loops')
        good_traceroutes = [
            x for x in good_traceroutes if len(set(x)) == len(x)
        ]
        logger.debug('Remained: %d' % len(good_traceroutes))

    if 'ex' in filters:
        logger.debug('Remove non existent traces')
        good_traceroutes = [
            x for x in good_traceroutes if vft.trace_exists(g, x)
        ]
        logger.debug('Remained: %d', len(good_traceroutes))

    if 'vf' in filters:
        logger.debug('Remove non vf traces')
        good_traceroutes = [
            x for x in good_traceroutes if vft.is_valley_free(g, x)
        ]
        logger.debug('Remained: %d' % len(good_traceroutes))

    if 'lp' in filters:
        logger.debug('Remove non lp traces')
        good_traceroutes = [
            x for x in good_traceroutes
            if vft.is_local_preferenced(g, x, first_edge=first_edge)
        ]
        logger.debug('Remained: %d' % len(good_traceroutes))

    # convert back node ids to node names

    good_traceroutes = [[g.vs[id]["name"] for id in trace]
                        for trace in good_traceroutes]
    logger.debug(len(good_traceroutes))

    return good_traceroutes
def vf_attributes(g, trace, vfmode, get_lp_soft, get_lp_hard, vf_g=None):
    is_vf = int(vft.is_valley_free(g, trace, vfmode))
    is_lp_soft = -1
    is_lp_hard = -1
    if is_vf:
        if get_lp_soft:
            lp_soft = vft.is_local_preferenced(g, trace,
                                               vf_g=vf_g,
                                               first_edge=True,
                                               vfmode=vfmode)
            is_lp_soft = int(lp_soft)
        else:
            is_lp_prelabeled_soft = -1

        if get_lp_hard:
            lp_hard = vft.is_local_preferenced(g, trace,
                                               vf_g=vf_g,
                                               first_edge=False,
                                               vfmode=vfmode)
            is_lp_hard = int(lp_hard)
        else:
            is_lp_hard = -1

    return (is_vf, is_lp_soft, is_lp_hard)
Exemple #3
0
def ba_generator(ba_graph, sh_paths, stretch, vf_g, progressbar=False):
    vf_count = 0
    trace_count = 0
    lp_count = 0
    progress = progressbar1.DummyProgressBar(end=10, width=15)

    if progressbar:
        progress = progressbar1.AnimatedProgressBar(end=len(sh_paths),
                                                    width=15)
    for (s, t), shl in sh_paths:
        progress += 1
        progress.show_progress()
        logger.debug('SH from %s to %s is %d' % (s, t, shl))
        random_route = helpers.random_route_walk(ba_graph, s, t, shl + stretch)
        logger.debug('Random route: %s' % random_route)
        real_stretch = len(random_route) - shl

        if real_stretch != stretch:
            continue

        trace_count += 1

        is_vf = vft.is_valley_free(ba_graph,
                                   random_route,
                                   vfmode=vft.CLOSENESS)
        logger.debug(
            'Trace edge dir: %s' %
            vft.trace_to_string(ba_graph, random_route, vfmode=vft.CLOSENESS))
        logger.debug('Is VF: %s' % is_vf)
        if is_vf:
            is_lp = vft.is_local_preferenced(ba_graph,
                                             random_route,
                                             first_edge=True,
                                             vfmode=vft.CLOSENESS,
                                             vf_g=vf_g)
        else:
            is_lp = 0
        logger.debug('Is LP: %s' % is_lp)

        vf_count += int(is_vf)
        lp_count += int(is_lp)

    logger.info('Stretch %d' % stretch)
    logger.info('Trace count: %d' % trace_count)
    logger.info('VF count: %d' % vf_count)
    logger.info('LP count: %d' % lp_count)

    return (stretch, trace_count, vf_count, lp_count)
def purify(g,
           meta_original,
           out,
           count=1000,
           try_per_race=1,
           show_progress=False,
           with_lp=True):

    empty = 0
    # remove traces with already calculated random paths
    logger.warn('[r]ONLY NOT FILLED PATHS[/]')
    meta_filled = [
        x for x in meta_original if helpers.RANDOM_WALK_RUN_COUNT not in x
    ]

    # Filter if interested only in routes of stretch 1
    # meta_filled = [x for x in meta_original
    #                if x[helpers.TRACE_LEN]-x[helpers.SH_LEN] == 1]

    ## traces with a maximum stretch
    # logger.warn('[r]!!!ONLY WITH STRETCH[/]')
    # meta = [x for x in meta if x[helpers.STRETCH] > -1]

    # # shorter meta records
    # logger.warn('[r]!!!ONLY SHORT TRACES[/]')
    # meta = [x for x in meta if len(x[helpers.TRACE]) < 5]

    # meta_map = {tuple(x[helpers.TRACE]): x for x in meta_filled}

    logger.info('All trace count: %d' % len(meta_filled))
    tr_count = min(len(meta_filled), count)
    meta_random = random.sample(meta_filled, tr_count)
    logger.info('Chosen subset count: %d' % len(meta_random))

    # real_vf_degree = [x for x in meta_random if x[helpers.IS_VF_DEGREE] == 1]
    # real_nonvf_degree = [x for x in meta_random if x[helpers.IS_VF_DEGREE] == 0]
    # assert len(real_nonvf_degree) == tr_count - len(real_vf_degree)

    # real_vf_prelabeled = [x for x in meta_random if x[helpers.IS_VF_PRELABELED] == 1]
    # real_nonvf_prelabeled = [x for x in meta_random if x[helpers.IS_VF_PRELABELED] == 0]
    # assert len(real_nonvf_prelabeled) == tr_count - len(real_vf_prelabeled)

    # real_vf_closeness = [x for x in meta_random if x[helpers.IS_VF_CLOSENESS] == 1]
    # real_nonvf_closeness = [x for x in meta_random if x[helpers.IS_VF_CLOSENESS] == 0]
    # assert len(real_nonvf_closeness) == tr_count - len(real_vf_closeness)

    # logger.info('Real vf degree: %f[%d]' % ((len(real_vf_degree) / float(tr_count),
    #                                  len(real_vf_degree))))
    # logger.info('Real nonvf degree: %f[%d]' % ((len(real_nonvf_degree) / float(tr_count),
    #                                     len(real_nonvf_degree))))

    # logger.info('Real vf prelabeled: %f[%d]' % ((len(real_vf_prelabeled) / float(tr_count),
    #                                  len(real_vf_prelabeled))))
    # logger.info('Real nonvf prelabeled: %f[%d]' % ((len(real_nonvf_prelabeled) / float(tr_count),
    #                                     len(real_nonvf_prelabeled))))
    # logger.info('Real vf closeness: %f[%d]' % ((len(real_vf_closeness)/float(tr_count), len(real_vf_closeness))))
    # logger.info('Real nonvf closeness: %f[%d]' % ((len(real_nonvf_closeness)/float(tr_count), len(real_nonvf_closeness))))

    # traceroutes = [x[helpers.TRACE] for x in meta_random]
    # traceroutes = vft.trace_in_vertex_id(g, traceroutes)

    try:
        meta_random[0][helpers.TRACE]
    except Exception:
        meta_random = [{helpers.TRACE: x} for x in meta_random]

    progress = progressbar1.DummyProgressBar(end=10, width=15)
    if show_progress:
        progress = progressbar1.AnimatedProgressBar(end=len(meta_random),
                                                    width=15)

    stretch_list = []
    max_stretch = max(
        [x[helpers.TRACE_LEN] - x[helpers.SH_LEN] for x in meta_random])
    for stretch in range(0, max_stretch + 1):
        metas = [
            x for x in meta_random
            if x[helpers.TRACE_LEN] - x[helpers.SH_LEN] == stretch
        ]
        stretch_list.extend(list(repeat(stretch, len(metas))))

    # print(stretch_list)
    lenghts = random.shuffle(stretch_list)

    strx_array = []

    for idx, trace_meta in enumerate(meta_random):
        progress += 1
        progress.show_progress()
        # print(trace_meta[helpers.TRACE])
        shl = trace_meta[helpers.SH_LEN]
        trace = vft.trace_in_vertex_id(g, [
            trace_meta[helpers.TRACE],
        ])
        if len(trace) != 1:
            print 'PROBLEM'
            print trace_meta
            continue
        trace = trace[0]
        # print(trace)
        random_walk_closeness_route_vf = 0
        random_walk_closeness_route_lp_soft = 0
        random_walk_closeness_route_lp_hard = 0
        random_walk_degree_route_vf = 0
        random_walk_degree_route_lp_soft = 0
        random_walk_degree_route_lp_hard = 0
        random_walk_prelabeled_route_vf = 0
        random_walk_prelabeled_route_lp_soft = 0
        random_walk_prelabeled_route_lp_hard = 0

        s, t = trace[0], trace[-1]
        for counter in xrange(0, try_per_race):
            # random_path = helpers.random_route_walk(g, s, t, len(trace)) # Modified
            random_path = helpers.random_route_walk(
                g, s, t, shl + stretch_list[idx])  # Modified
            if len(random_path) == 0:
                empty += 1
            if vft.is_valley_free(g, random_path, vfmode=vft.CLOSENESS):
                random_walk_closeness_route_vf += 1
                if (len(random_path) == shl + 1):
                    strx_array.append(1)
                if with_lp:
                    lp_soft = vft.is_local_preferenced(g,
                                                       random_path,
                                                       first_edge=True,
                                                       vfmode=vft.CLOSENESS)
                    lp_hard = vft.is_local_preferenced(g,
                                                       random_path,
                                                       first_edge=False,
                                                       vfmode=vft.CLOSENESS)
                    if lp_soft:
                        random_walk_closeness_route_lp_soft += 1
                    if lp_hard:
                        random_walk_closeness_route_lp_hard += 1
            else:
                if (len(random_path) == shl + 1):
                    strx_array.append(0)

            # if vft.is_valley_free(g, random_path, vfmode=vft.DEGREE):
            #     random_walk_degree_route_vf += 1
            #     if with_lp:
            #         lp_soft = vft.is_local_preferenced(g, random_path,
            #                                            first_edge=True,
            #                                            vfmode=vft.DEGREE)
            #         lp_hard = vft.is_local_preferenced(g, random_path,
            #                                            first_edge=False,
            #                                            vfmode=vft.DEGREE)
            #         if lp_soft:
            #             random_walk_degree_route_lp_soft += 1
            #         if lp_hard:
            #             random_walk_degree_route_lp_hard += 1

            # if vft.is_valley_free(g, random_path, vfmode=vft.PRELABELED):
            #     random_walk_prelabeled_route_vf += 1
            #     if with_lp:
            #         lp_soft = vft.is_local_preferenced(g, random_path,
            #                                            first_edge=True,
            #                                            vfmode=vft.PRELABELED)
            #         lp_hard = vft.is_local_preferenced(g, random_path,
            #                                            first_edge=False,
            #                                            vfmode=vft.PRELABELED)
            #         if lp_soft:
            #             random_walk_prelabeled_route_lp_soft += 1
            #         if lp_hard:
            #             random_walk_prelabeled_route_lp_hard += 1

            # sanity check


#             if random_path[0] != s or random_path[-1] != t:
#                 logger.error('ALERT')

            if len(random_path) != len(set(random_path)):
                logger.error('LENGTH ERROR')

        extra_meta = {
            helpers.RANDOM_WALK_RUN_COUNT:
            try_per_race,
            helpers.RANDOM_WALK_VF_CLOSENESS_ROUTE:
            random_walk_closeness_route_vf,
            helpers.RANDOM_WALK_VF_DEGREE_ROUTE:
            random_walk_degree_route_vf,
            helpers.RANDOM_WALK_VF_PRELABELED_ROUTE:
            random_walk_prelabeled_route_vf,
        }
        if with_lp:
            extra_meta.update({
                helpers.RANDOM_WALK_LP_SOFT_CLOSENESS_ROUTE:
                random_walk_closeness_route_lp_soft,
                helpers.RANDOM_WALK_LP_HARD_CLOSENESS_ROUTE:
                random_walk_closeness_route_lp_hard,
                helpers.RANDOM_WALK_LP_SOFT_DEGREE_ROUTE:
                random_walk_degree_route_lp_soft,
                helpers.RANDOM_WALK_LP_HARD_DEGREE_ROUTE:
                random_walk_degree_route_lp_hard,
                helpers.RANDOM_WALK_LP_SOFT_PRELABELED_ROUTE:
                random_walk_prelabeled_route_lp_soft,
                helpers.RANDOM_WALK_LP_HARD_PRELABELED_ROUTE:
                random_walk_prelabeled_route_lp_hard
            })

        trace_meta.update(extra_meta)

    ## save modified meta
    # all meta_* get only references from meta_original
    helpers.save_to_json(out, meta_random)
    # meta_mod = [x for x in meta_map.itervalues()]
    # helpers.save_to_json(out, meta_mod)

    # calculate results
    # real_vf = [x[helpers.IS_VF_CLOSENESS] for x in meta_random]
    # real_vf_ratio = np.mean(real_vf)

    random_walk_vf_ratio_per_element = [
        x[helpers.RANDOM_WALK_VF_CLOSENESS_ROUTE] /
        x[helpers.RANDOM_WALK_RUN_COUNT] for x in meta_random
    ]
    random_walk_vf_ratio = np.mean(random_walk_vf_ratio_per_element)
    # print results
    logger.info('')
    logger.info('Empty: %d' % empty)
    logger.info('Tested trace count: %d' % len(meta_random))
    # logger.info('VF ratio in tested traces: %f' % real_vf_ratio)
    logger.info('VF ratio in random walks: %f' % random_walk_vf_ratio)
    logger.info('VF ratio in random walks for path stretch 1: %f' %
                np.mean(strx_array))
Exemple #5
0
def purify(g, traceroutes, flags, show_progress=False):
    results = list()

    # remove traces with unknown nodes
    traceroutes = vft.trace_in_vertex_id(g, traceroutes)

    # generate valley-free graph
    if flags[FLAG_PRELABELED]:
        logger.info('Generate VF_G_PRE')
        vf_g_pre = vft.convert_to_vf(g, vfmode=vft.PRELABELED)
    else:
        logger.info('Skip prelabeled graph')
    if flags[FLAG_DEGREE]:
        logger.info('Generate VF_G_DEGREE')
        vf_g_degree = vft.convert_to_vf(g, vfmode=vft.DEGREE)
    else:
        logger.info('Skip degree graph')
    if flags[FLAG_CLOSENESS]:
        logger.info('Generate VF_G_CLOSENESS')
        vf_g_closeness = vft.convert_to_vf(g, vfmode=vft.CLOSENESS)
    else:
        logger.info('Skip closeness graph')

    progress = progressbar1.DummyProgressBar(end=10, width=15)
    if show_progress:
        progress = progressbar1.AnimatedProgressBar(end=len(traceroutes),
                                                    width=15)
    for trace in traceroutes:
        progress += 1
        progress.show_progress()

        logger.debug('Current trace: %s' % ([g.vs[x]['name'] for x in trace]))

        if len(trace) == 1: continue

        s, t = trace[0], trace[-1]

        is_vf_prelabeled = -1
        is_lp_prelabeled_hard = -1
        is_lp_prelabeled_soft = -1

        is_vf_degree = -1
        is_lp_degree_hard = -1
        is_lp_degree_soft = -1

        is_vf_closeness = -1
        is_lp_closeness_hard = -1
        is_lp_closeness_soft = -1

        trace_len = len(trace)
        sh_len = g.shortest_paths(s, t, mode=i.OUT)[0][0]
        sh_len += 1  # convert hop count to node Counter

        if flags[FLAG_PRELABELED]:
            is_vf_prelabeled = vft.is_valley_free(g, trace, vft.PRELABELED)
            is_vf_prelabeled = int(is_vf_prelabeled)
            if is_vf_prelabeled:
                if flags[FLAG_LP_SOFT]:
                    lp_soft = vft.is_local_preferenced(g,
                                                       trace,
                                                       vf_g=vf_g_pre,
                                                       first_edge=True,
                                                       vfmode=vft.PRELABELED)
                    is_lp_prelabeled_soft = 1 if lp_soft else 0
                else:
                    is_lp_prelabeled_soft = -1

                if flags[FLAG_LP_HARD]:
                    lp_hard = vft.is_local_preferenced(g,
                                                       trace,
                                                       vf_g=vf_g_pre,
                                                       first_edge=False,
                                                       vfmode=vft.PRELABELED)
                    is_lp_prelabeled_hard = 1 if lp_hard else 0
                else:
                    is_lp_prelabeled_hard = -1

        if flags[FLAG_DEGREE]:
            is_vf_degree = vft.is_valley_free(g, trace, vft.DEGREE)
            is_vf_degree = int(is_vf_degree)
            if is_vf_degree:
                if flags[FLAG_LP_SOFT]:
                    lp_soft = vft.is_local_preferenced(g,
                                                       trace,
                                                       vf_g=vf_g_degree,
                                                       first_edge=True,
                                                       vfmode=vft.DEGREE)
                    is_lp_degree_soft = 1 if lp_soft else 0
                else:
                    is_lp_degree_soft = -1

                if flags[FLAG_LP_HARD]:
                    lp_hard = vft.is_local_preferenced(g,
                                                       trace,
                                                       vf_g=vf_g_degree,
                                                       first_edge=False,
                                                       vfmode=vft.DEGREE)
                    is_lp_degree_hard = 1 if lp_hard else 0
                else:
                    is_lp_degree_hard = -1

        if flags[FLAG_CLOSENESS]:
            is_vf_closeness = vft.is_valley_free(g, trace, vft.CLOSENESS)
            is_vf_closeness = int(is_vf_closeness)
            if is_vf_closeness:
                if flags[FLAG_LP_SOFT]:
                    lp_soft = vft.is_local_preferenced(g,
                                                       trace,
                                                       vf_g=vf_g_closeness,
                                                       first_edge=True,
                                                       vfmode=vft.CLOSENESS)
                    is_lp_closeness_soft = 1 if lp_soft else 0
                else:
                    is_lp_closeness_soft = -1
                if flags[FLAG_LP_HARD]:
                    lp_hard = vft.is_local_preferenced(g,
                                                       trace,
                                                       vf_g=vf_g_closeness,
                                                       first_edge=False,
                                                       vfmode=vft.CLOSENESS)
                    is_lp_closeness_hard = 1 if lp_hard else 0
                else:
                    is_lp_closeness_hard = -1

        if False:
            sh_vf_len = vft.get_shortest_vf_route(g,
                                                  s,
                                                  t,
                                                  mode='vf',
                                                  vf_g=vf_g_pre,
                                                  _all=True,
                                                  vfmode=vft.PRELABELED)
            # ugy tunik, mintha nem mindig lenne pontos? fentartassal kezelendo
            # ez az ertek azert is kerult bele, hogy ellenorizzuk
            in_vf_prediction = 1 if sh_vf_len and trace in sh_vf_len else 0
        else:
            sh_vf_len = -1
            in_vf_prediction = -1

        sh_vf_len = len(sh_vf_len[0]) if isinstance(sh_vf_len, list) else -1
        percentage_stretch = trace_len / float(sh_len)

        named_trace = [g.vs[_id]["name"] for _id in trace]

        result = {
            helpers.TRACE: named_trace,
            helpers.TRACE_LEN: trace_len,
            helpers.SH_LEN: sh_len,
            helpers.SH_VF_LEN: sh_vf_len,
            helpers.IS_VF_PRELABELED: is_vf_prelabeled,
            helpers.IS_VF_DEGREE: is_vf_degree,
            helpers.IS_VF_CLOSENESS: is_vf_closeness,
            helpers.HOP_STRETCH: trace_len - sh_len,
            helpers.PERC_STRETCH: percentage_stretch,
            helpers.IN_VF_PRED: in_vf_prediction,
            helpers.IS_LP_SOFT_PRELABELED: is_lp_prelabeled_soft,
            helpers.IS_LP_HARD_PRELABELED: is_lp_prelabeled_hard,
            helpers.IS_LP_SOFT_DEGREE: is_lp_degree_soft,
            helpers.IS_LP_HARD_DEGREE: is_lp_degree_hard,
            helpers.IS_LP_SOFT_CLOSENESS: is_lp_closeness_soft,
            helpers.IS_LP_HARD_CLOSENESS: is_lp_closeness_hard,
        }

        results.append(result)

    # print >> sys.stderr, ('TRACE\tTRACE_LEN\tSH_LEN\tSH_VF_LEN\tIS_VF',
    #                       '\tSTRETCH\tIN_VF_PREDICTION\tIS_LP_F\tIS_LP_ALL')
    # for result in results:
    #     result = [str(r) for r in result]
    #     print >> sys.stderr, '\t'.join(result)

    # statistic = statistics.purify(g, results,
    #                               'nc+ec+tc+rt+vf+vf_closeness+pred+lp_soft_prelabeled+lp_hard_prelabeled+lp_soft_degree+lp_hard_degree+lp_soft_closeness+lp_hard_closeness'.split('+'))
    # statistics.stat_printer(statistic)

    return results
    def test_soft_lp_check(self):
        self.sample_graph.add_vertices([
            'N8',
        ])
        self.prelabeled[8] = 0.5
        self.closenesses[8] = 0.5
        self.sample_graph.add_edges([['N5', 'N7'], ['N3', 'N7'], ['N8', 'N5'],
                                     ['N6', 'N8'], ['N7', 'N4']])

        lp_hard_routes = [['N0', 'N5', 'N6', 'N2'], ['N5', 'N6'],
                          ['N4', 'N6', 'N2'], ['N7', 'N4', 'N6', 'N3']]
        # Az egyetlen kulonbseg a soft es hard lp kozott
        # csak akkor johet elo, mikor U ellel kezdunk, es
        # a kov. hopnal lehet fel vagy le/peer elen menni
        # A soft ekkor mehet tovabb fel, a hard csak peer/le
        # elet valaszthat.
        lp_soft_routes = [['N0', 'N5', 'N4', 'N6', 'N2'],
                          ['N1', 'N5', 'N6', 'N8']]
        non_lp = [['N5', 'N4', 'N6'], ['N5', 'N6', 'N8'],
                  ['N5', 'N4', 'N6', 'N2']]

        vf_g = vft.convert_to_vf(self.sample_graph, vfmode=vft.CLOSENESS)

        for trace in lp_hard_routes:
            is_lp_hard = vft.is_local_preferenced(self.sample_graph,
                                                  trace,
                                                  vf_g=vf_g,
                                                  first_edge=False,
                                                  vfmode=vft.CLOSENESS)
            is_lp_soft = vft.is_local_preferenced(self.sample_graph,
                                                  trace,
                                                  vf_g=vf_g,
                                                  first_edge=True,
                                                  vfmode=vft.CLOSENESS)

            self.assertTrue(is_lp_hard)
            self.assertTrue(is_lp_soft)

        for trace in lp_soft_routes:
            is_lp_hard = vft.is_local_preferenced(self.sample_graph,
                                                  trace,
                                                  vf_g=vf_g,
                                                  first_edge=False,
                                                  vfmode=vft.CLOSENESS)
            is_lp_soft = vft.is_local_preferenced(self.sample_graph,
                                                  trace,
                                                  vf_g=vf_g,
                                                  first_edge=True,
                                                  vfmode=vft.CLOSENESS)

            self.assertFalse(is_lp_hard)
            self.assertTrue(is_lp_soft)

        for trace in non_lp:
            is_lp_hard = vft.is_local_preferenced(self.sample_graph,
                                                  trace,
                                                  vf_g=vf_g,
                                                  first_edge=False,
                                                  vfmode=vft.CLOSENESS)
            is_lp_soft = vft.is_local_preferenced(self.sample_graph,
                                                  trace,
                                                  vf_g=vf_g,
                                                  first_edge=True,
                                                  vfmode=vft.CLOSENESS)

            self.assertFalse(is_lp_hard)
            self.assertFalse(is_lp_soft)
def purify(g,
           meta_original,
           out,
           count=1000,
           try_per_race=1,
           show_progress=False):
    empty = 0
    # remove traces with already calculated random paths
    logger.warn('[r]ONLY NOT FILLED PATHS[/]')
    meta_filled = [
        x for x in meta_original
        if helpers.RANDOM_NONVF_WALK_RUN_COUNT not in x
    ]

    logger.info('All trace count: %d' % len(meta_filled))
    tr_count = min(len(meta_filled), count)
    meta_random = random.sample(meta_filled, tr_count)
    logger.info('Chosen subset count: %d' % len(meta_random))

    real_vf_degree = [x for x in meta_random if x[helpers.IS_VF_DEGREE] == 1]
    real_nonvf_degree = [
        x for x in meta_random if x[helpers.IS_VF_DEGREE] == 0
    ]
    assert len(real_nonvf_degree) == tr_count - len(real_vf_degree)

    real_vf_prelabeled = [
        x for x in meta_random if x[helpers.IS_VF_PRELABELED] == 1
    ]
    real_nonvf_prelabeled = [
        x for x in meta_random if x[helpers.IS_VF_PRELABELED] == 0
    ]
    assert len(real_nonvf_prelabeled) == tr_count - len(real_vf_prelabeled)

    real_vf_closeness = [
        x for x in meta_random if x[helpers.IS_VF_CLOSENESS] == 1
    ]
    real_nonvf_closeness = [
        x for x in meta_random if x[helpers.IS_VF_CLOSENESS] == 0
    ]
    assert len(real_nonvf_closeness) == tr_count - len(real_vf_closeness)

    logger.info('Real vf degree: %f[%d]' %
                ((len(real_vf_degree) / float(tr_count), len(real_vf_degree))))
    logger.info(
        'Real nonvf degree: %f[%d]' %
        ((len(real_nonvf_degree) / float(tr_count), len(real_nonvf_degree))))

    logger.info(
        'Real vf prelabeled: %f[%d]' %
        ((len(real_vf_prelabeled) / float(tr_count), len(real_vf_prelabeled))))
    logger.info('Real nonvf prelabeled: %f[%d]' %
                ((len(real_nonvf_prelabeled) / float(tr_count),
                  len(real_nonvf_prelabeled))))
    logger.info(
        'Real vf closeness: %f[%d]' %
        ((len(real_vf_closeness) / float(tr_count), len(real_vf_closeness))))
    logger.info('Real nonvf closeness: %f[%d]' %
                ((len(real_nonvf_closeness) / float(tr_count),
                  len(real_nonvf_closeness))))

    progress = progressbar1.DummyProgressBar(end=10, width=15)
    if show_progress:
        progress = progressbar1.AnimatedProgressBar(end=len(meta_random),
                                                    width=15)
    for trace_meta in meta_random:
        progress += 1
        progress.show_progress()

        trace = vft.trace_in_vertex_id(g, [
            trace_meta[helpers.TRACE],
        ])
        if len(trace) != 1:
            logger.error('PROBLEM')
            logger.error('%s' % trace_meta)
            continue

        trace = trace[0]

        random_nonvf_walk_closeness_route_count = 0
        random_nonvf_walk_closeness_route_len = []
        random_nonvf_walk_degree_route_count = 0
        random_nonvf_walk_degree_route_len = []
        random_nonvf_walk_prelabeled_route_count = 0
        random_nonvf_walk_prelabeled_route_len = []

        random_nonvf_walk_lp_soft_closeness_route_count = 0
        random_nonvf_walk_lp_soft_degree_route_count = 0
        random_nonvf_walk_lp_soft_prelabeled_route_count = 0

        random_nonvf_walk_lp_hard_closeness_route_count = 0
        random_nonvf_walk_lp_hard_degree_route_count = 0
        random_nonvf_walk_lp_hard_prelabeled_route_count = 0

        s, t = trace[0], trace[-1]
        for counter in xrange(0, try_per_race):
            isvf, random_path = helpers.random_nonvf_route(
                g, s, t, len(trace), vfmode=vft.CLOSENESS)
            assert len(random_path) > 0
            if isvf:
                random_nonvf_walk_closeness_route_count += 1
                lp_soft = vft.is_local_preferenced(g,
                                                   trace,
                                                   first_edge=True,
                                                   vfmode=vft.CLOSENESS)
                lp_hard = vft.is_local_preferenced(g,
                                                   trace,
                                                   first_edge=False,
                                                   vfmode=vft.CLOSENESS)
                if lp_soft:
                    random_nonvf_walk_lp_soft_closeness_route_count += 1
                if lp_hard:
                    random_nonvf_walk_lp_hard_closeness_route_count += 1

            random_nonvf_walk_closeness_route_len.append(len(random_path))

            isvf, random_path = helpers.random_nonvf_route(g,
                                                           s,
                                                           t,
                                                           len(trace),
                                                           vfmode=vft.DEGREE)
            assert len(random_path) > 0
            if isvf:
                random_nonvf_walk_degree_route_count += 1
                lp_soft = vft.is_local_preferenced(g,
                                                   trace,
                                                   first_edge=True,
                                                   vfmode=vft.DEGREE)
                lp_hard = vft.is_local_preferenced(g,
                                                   trace,
                                                   first_edge=False,
                                                   vfmode=vft.DEGREE)
                if lp_soft:
                    random_nonvf_walk_lp_soft_degree_route_count += 1
                if lp_hard:
                    random_nonvf_walk_lp_hard_degree_route_count += 1

            random_nonvf_walk_degree_route_len.append(len(random_path))

            isvf, random_path = helpers.random_nonvf_route(
                g, s, t, len(trace), vfmode=vft.PRELABELED)

            assert len(random_path) > 0
            if isvf:
                random_nonvf_walk_prelabeled_route_count += 1
                lp_soft = vft.is_local_preferenced(g,
                                                   trace,
                                                   first_edge=True,
                                                   vfmode=vft.PRELABELED)
                lp_hard = vft.is_local_preferenced(g,
                                                   trace,
                                                   first_edge=False,
                                                   vfmode=vft.PRELABELED)
                if lp_soft:
                    random_nonvf_walk_lp_soft_prelabeled_route_count += 1
                if lp_hard:
                    random_nonvf_walk_lp_hard_prelabeled_route_count += 1
            random_nonvf_walk_prelabeled_route_len.append(len(random_path))

            # sanity check
            #             if random_path[0] != s or random_path[-1] != t:
            #                 logger.error('ALERT')

            if len(random_path) != len(set(random_path)):
                logger.error('LENGTH ERROR')

        extra_meta = {
            helpers.RANDOM_NONVF_WALK_RUN_COUNT:
            try_per_race,
            helpers.RANDOM_NONVF_WALK_VF_CLOSENESS_ROUTE:
            random_nonvf_walk_closeness_route_count,
            helpers.RANDOM_NONVF_WALK_VF_CLOSENESS_ROUTE_LEN:
            random_nonvf_walk_closeness_route_len,
            helpers.RANDOM_NONVF_WALK_VF_DEGREE_ROUTE:
            random_nonvf_walk_degree_route_count,
            helpers.RANDOM_NONVF_WALK_VF_DEGREE_ROUTE_LEN:
            random_nonvf_walk_degree_route_len,
            helpers.RANDOM_NONVF_WALK_VF_PRELABELED_ROUTE:
            random_nonvf_walk_prelabeled_route_count,
            helpers.RANDOM_NONVF_WALK_VF_PRELABELED_ROUTE_LEN:
            random_nonvf_walk_prelabeled_route_len,
            helpers.RANDOM_NONVF_WALK_LP_SOFT_DEGREE_ROUTE:
            random_nonvf_walk_lp_soft_degree_route_count,
            helpers.RANDOM_NONVF_WALK_LP_SOFT_CLOSENESS_ROUTE:
            random_nonvf_walk_lp_soft_closeness_route_count,
            helpers.RANDOM_NONVF_WALK_LP_SOFT_PRELABELED_ROUTE:
            random_nonvf_walk_lp_soft_prelabeled_route_count,
            helpers.RANDOM_NONVF_WALK_LP_HARD_DEGREE_ROUTE:
            random_nonvf_walk_lp_hard_degree_route_count,
            helpers.RANDOM_NONVF_WALK_LP_HARD_CLOSENESS_ROUTE:
            random_nonvf_walk_lp_hard_closeness_route_count,
            helpers.RANDOM_NONVF_WALK_LP_HARD_PRELABELED_ROUTE:
            random_nonvf_walk_lp_hard_prelabeled_route_count
        }

        trace_meta.update(extra_meta)

    ## save modified meta
    # all meta_* get only references from meta_original
    helpers.save_to_json(out, meta_original)

    # calculate results
    real_vf = [x[helpers.IS_VF_CLOSENESS] for x in meta_random]
    real_vf_ratio = np.mean(real_vf)

    random_nonvf_walk_vf_ratio_per_element = [
        x[helpers.RANDOM_NONVF_WALK_VF_CLOSENESS_ROUTE] /
        x[helpers.RANDOM_NONVF_WALK_RUN_COUNT] for x in meta_random
    ]
    random_nonvf_walk_vf_ratio = np.mean(
        random_nonvf_walk_vf_ratio_per_element)
    # print results
    logger.info('')
    logger.info('Empty: %d' % empty)
    logger.info('Tested trace count: %d' % len(meta_random))
    logger.info('VF ratio in tested traces: %f' % real_vf_ratio)
    logger.info('VF ratio in random walks: %f' % random_nonvf_walk_vf_ratio)