def filter(g, traceroutes, filters=['sh', 'loop', 'ex', 'vf', 'lp'], first_edge=True): logger.info('Traceroutes: %d', len(traceroutes)) # remove empty traces traceroutes = [x for x in traceroutes if len(x) > 0] logger.info('Non empty traceroutes: %d', (len(traceroutes))) traceroutes = [x for x in traceroutes if len(x) > 1] logger.info('Larger than one hop traceroutes: %d', (len(traceroutes))) # remove traces with unknown nodes traceroutes, _ = vft.trace_clean(g, traceroutes) logger.info('Ignored: %d', _) traceroutes = vft.trace_in_vertex_id(g, traceroutes) logger.info('Trace count: %d', len(traceroutes)) progress = progressbar1.AnimatedProgressBar(end=len(traceroutes), width=15) good_traceroutes = traceroutes[:] if 'sh' in filters: logger.debug('Remove short traces') good_traceroutes = [x for x in good_traceroutes if len(x) >= 3] logger.debug('Remained: %d', len(good_traceroutes)) if 'loop' in filters: logger.debug('Remove traces with loops') good_traceroutes = [ x for x in good_traceroutes if len(set(x)) == len(x) ] logger.debug('Remained: %d' % len(good_traceroutes)) if 'ex' in filters: logger.debug('Remove non existent traces') good_traceroutes = [ x for x in good_traceroutes if vft.trace_exists(g, x) ] logger.debug('Remained: %d', len(good_traceroutes)) if 'vf' in filters: logger.debug('Remove non vf traces') good_traceroutes = [ x for x in good_traceroutes if vft.is_valley_free(g, x) ] logger.debug('Remained: %d' % len(good_traceroutes)) if 'lp' in filters: logger.debug('Remove non lp traces') good_traceroutes = [ x for x in good_traceroutes if vft.is_local_preferenced(g, x, first_edge=first_edge) ] logger.debug('Remained: %d' % len(good_traceroutes)) # convert back node ids to node names good_traceroutes = [[g.vs[id]["name"] for id in trace] for trace in good_traceroutes] logger.debug(len(good_traceroutes)) return good_traceroutes
def test_trace_clean(self): traceroutes = [['N0', 'N5', 'N4', 'N6'], ['NFAKE', 'N4', 'N5'], ['N5', 'N5', 'N4', 'N5'], ['N5', 'N2', 'N4']] cleaned_traces, ignored = vft.trace_clean(self.sample_graph, traceroutes) self.assertEqual(len(cleaned_traces), 3) self.assertEqual(ignored, 1) self.assertIn(['N0', 'N5', 'N4', 'N6'], cleaned_traces) self.assertIn(['N5', 'N5', 'N4', 'N5'], cleaned_traces) self.assertIn(['N5', 'N2', 'N4'], cleaned_traces)
def purify(g, meta, out, count=1000): results = list() results2 = list() results3 = list() all_vf = 0 all_nonvf = 0 all_vf_closeness = 0 all_nonvf_closeness = 0 short_results = list() short_results2 = list() short_results3 = list() all_short_vf = 0 all_short_nonvf = 0 all_short_vf_closeness = 0 all_short_nonvf_closeness = 0 long_results = list() long_results2 = list() long_results3 = list() all_long_vf = 0 all_long_nonvf = 0 all_long_vf_closeness = 0 all_long_nonvf_closeness = 0 # remove traces with already calculated all_path logger.warn('[r]ONLY NOT FILLED PATHS[/]') meta = [x for x in meta if not helpers.ALL_PATH_COUNT in x] # traces with a maximum stretch logger.warn('[r]!!!ONLY WITH LOW STRETCH[/]') meta = [x for x in meta if x[helpers.STRETCH] < 4] # shorter meta records logger.warn('[r]!!!ONLY SHORT TRACES[/]') meta = [x for x in meta if len(x[helpers.TRACE]) < 5] meta_map = {tuple(x[helpers.TRACE]): x for x in meta} # traceroutes = [x for x in meta if x[TRACE_LEN] == x[SH_LEN]] logger.info('All trace count: %d' % len(meta)) tr_count = min(len(meta), count) meta = random.sample(meta, tr_count) logger.info('Chosen trace count: %d' % len(meta)) real_vf = [x for x in meta if x[helpers.IS_VF] == 1] real_nonvf = [x for x in meta if x[helpers.IS_VF] == 0] real_vf_closeness = [x for x in meta if x[helpers.IS_VF_CLOSENESS] == 1] real_nonvf_closeness = [x for x in meta if x[helpers.IS_VF_CLOSENESS] == 0] logger.info('Real vf: %f[%d]' % ((len(real_vf)/float(len(meta)), len(real_vf)))) logger.info('Real nonvf: %f[%d]' % ((len(real_nonvf)/float(len(meta)), len(real_nonvf)))) logger.info('Real vf closeness: %f[%d]' % ((len(real_vf_closeness)/float(len(meta)), len(real_vf_closeness)))) logger.info('Real nonvf closeness: %f[%d]' % ((len(real_nonvf_closeness)/float(len(meta)), len(real_nonvf_closeness)))) logger.info('Remove unknown traces. Trace count before: %d' % len(meta)) traceroutes = [x[helpers.TRACE] for x in meta] traceroutes, ignored = vft.trace_clean(g, traceroutes) logger.info('Traceroutes after: %d. Ignored: %d' % (len(traceroutes), ignored)) traceroutes = vft.trace_in_vertex_id(g, traceroutes) progress = progressbar1.AnimatedProgressBar(end=len(traceroutes), width=15) for trace in traceroutes: progress += 1 progress.show_progress() for x in range(0, g.vcount()): g.vs[x]['traces'] = dict() s, t = trace[0], trace[-1] sh_path = g.get_all_shortest_paths(s, t, mode=i.OUT) all_path = helpers.dfs_mark(copy.deepcopy(g), s, t, len(trace)) # if len(sh_path) != len(all_path): # print len(sh_path) # print len(all_path) # print s, t # sanity check for x in all_path: if x[0] != s or x[-1] != t: logger.error('ALERT') if len(set([tuple(x) for x in all_path])) != len(all_path): logger.error('LENGTH ALERT') logger.error('%s' % len(all_path)) logger.error('%s' % len(set([tuple(x) for x in all_path]))) logger.error('%s' % sorted(all_path)) long_path = [x for x in all_path if len(x) == len(trace)] short_path = [x for x in all_path if len(x) < len(trace)] named_trace = [g.vs[x]['name'] for x in trace] extra_meta = { helpers.ALL_PATH_COUNT: len(all_path), helpers.SAME_LONG_PATH_COUNT: len(long_path), helpers.SHORTER_PATH_COUNT: len(short_path) } meta_map[tuple(named_trace)].update(extra_meta) vf_count = sum([1 if vft.is_valley_free(g, x, vfmode=vft.PRELABELED) else 0 for x in all_path]) nonvf = len(all_path) - vf_count vf_closeness_count = sum([1 if vft.is_valley_free(g, x, vfmode=vft.CLOSENESS) else 0 for x in all_path]) nonvf_closeness = len(all_path) - vf_closeness_count tmp = [1 if vft.is_valley_free(g, x, vfmode=vft.PRELABELED) else 0 for x in short_path] short_vf_count = sum(tmp) short_nonvf = len(tmp) - short_vf_count tmp = [1 if vft.is_valley_free(g, x, vfmode=vft.CLOSENESS) else 0 for x in short_path] short_vf_closeness_count = sum(tmp) short_nonvf_closeness = len(tmp) - short_vf_closeness_count tmp = [1 if vft.is_valley_free(g, x, vfmode=vft.PRELABELED) else 0 for x in long_path] long_vf_count = sum(tmp) long_nonvf = len(tmp) - long_vf_count tmp = [1 if vft.is_valley_free(g, x, vfmode=vft.CLOSENESS) else 0 for x in long_path] long_vf_closeness_count = sum(tmp) long_nonvf_closeness = len(tmp) - long_vf_closeness_count extra_meta = { helpers.ALL_PATH_VF_COUNT: vf_closeness_count, helpers.SAME_LONG_PATH_VF_COUNT: long_vf_closeness_count, helpers.SHORTER_PATH_VF_COUNT: short_vf_closeness_count } meta_map[tuple(named_trace)].update(extra_meta) all_vf += vf_count all_nonvf += nonvf all_vf_closeness += vf_closeness_count all_nonvf_closeness += nonvf_closeness all_long_vf += long_vf_count all_long_nonvf += long_nonvf all_long_vf_closeness += long_vf_closeness_count all_long_nonvf_closeness += long_nonvf_closeness all_short_vf += short_vf_count all_short_nonvf += short_nonvf all_short_vf_closeness += short_vf_closeness_count all_short_nonvf_closeness += short_nonvf_closeness results.append(vf_count / float(len(all_path))) results3.append(vf_closeness_count / float(len(all_path))) if len(all_path) > 1: results2.append(vf_count / float(len(all_path))) long_results.append(long_vf_count / float(len(long_path))) long_results3.append(long_vf_closeness_count / float(len(long_path))) if len(long_path) > 1: long_results2.append(long_vf_count / float(len(long_path))) if len(short_path) > 0: short_results.append(short_vf_count / float(len(short_path))) short_results3.append(short_vf_closeness_count / float(len(short_path))) else: pass # short_results.append(0) # short_results3.append(0) if len(short_path) > 1: short_results2.append(short_vf_count / float(len(short_path))) # save mofified meta meta_mod = [x for x in meta_map.itervalues()] helpers.save_to_json(out, meta_mod) # print results print 'ALL' print 'VF count: %d' % all_vf print 'VF CLOSENESS count: %d' % all_vf_closeness print 'Non vf count: %d' % all_nonvf print 'Non vf CLOSENESS count: %d' % all_nonvf_closeness print 'VF perc: %f' % (all_vf/float(all_vf + all_nonvf)) print 'VF CLOSENESS perc: %f' % (all_vf_closeness/float(all_vf_closeness + all_nonvf_closeness)) print 'Mean VF prob: %f' % np.mean(results) print 'Mean VF CLOSENESS prob: %f' % np.mean(results3) print 'Mean VF2 prob: %f' % np.mean(results2) print '==========' print 'SHORT' print 'VF count: %d' % all_short_vf print 'VF CLOSENESS count: %d' % all_short_vf_closeness print 'Non vf count: %d' % all_short_nonvf print 'Non vf CLOSENESS count: %d' % all_short_nonvf_closeness if all_short_vf + all_short_nonvf > 0: print 'VF perc: %f' % (all_short_vf/float(all_short_vf + all_short_nonvf)) if all_short_vf_closeness + all_short_nonvf_closeness > 0: print 'VF CLOSENESS perc: %f' % (all_short_vf_closeness/float(all_short_vf_closeness + all_short_nonvf_closeness)) print 'Mean VF prob: %f' % np.mean(short_results) print 'Mean VF CLOSENESS prob: %f' % np.mean(short_results3) print 'Mean VF2 prob: %f' % np.mean(short_results2) print '=-----------------' print 'LONG' print 'VF count: %d' % all_long_vf print 'VF CLOSENESS count: %d' % all_long_vf_closeness print 'Non vf count: %d' % all_long_nonvf print 'Non vf CLOSENESS count: %d' % all_long_nonvf_closeness print 'VF perc: %f' % (all_long_vf/float(all_long_vf + all_long_nonvf)) print 'VF CLOSENESS perc: %f' % (all_long_vf_closeness/float(all_long_vf_closeness + all_long_nonvf_closeness)) print 'Mean VF prob: %f' % np.mean(long_results) print 'Mean VF CLOSENESS prob: %f' % np.mean(long_results3) print 'Mean VF2 prob: %f' % np.mean(long_results2)