def test_is_valley_free_false(self): peerup = [0, 5, 6, 4] # DPU peerpeer = [0, 5, 6, 5, 1] # UPPD downpeer = [4, 6, 5, 0] # DPD downup = [4, 5, 4] # DU self.assertFalse(vft.is_valley_free(self.sample_graph, peerup)) self.assertFalse(vft.is_valley_free(self.sample_graph, peerpeer)) self.assertFalse(vft.is_valley_free(self.sample_graph, downpeer)) self.assertFalse(vft.is_valley_free(self.sample_graph, downup))
def filter(g, traceroutes, filters=['sh', 'loop', 'ex', 'vf', 'lp'], first_edge=True): logger.info('Traceroutes: %d', len(traceroutes)) # remove empty traces traceroutes = [x for x in traceroutes if len(x) > 0] logger.info('Non empty traceroutes: %d', (len(traceroutes))) traceroutes = [x for x in traceroutes if len(x) > 1] logger.info('Larger than one hop traceroutes: %d', (len(traceroutes))) # remove traces with unknown nodes traceroutes, _ = vft.trace_clean(g, traceroutes) logger.info('Ignored: %d', _) traceroutes = vft.trace_in_vertex_id(g, traceroutes) logger.info('Trace count: %d', len(traceroutes)) progress = progressbar1.AnimatedProgressBar(end=len(traceroutes), width=15) good_traceroutes = traceroutes[:] if 'sh' in filters: logger.debug('Remove short traces') good_traceroutes = [x for x in good_traceroutes if len(x) >= 3] logger.debug('Remained: %d', len(good_traceroutes)) if 'loop' in filters: logger.debug('Remove traces with loops') good_traceroutes = [ x for x in good_traceroutes if len(set(x)) == len(x) ] logger.debug('Remained: %d' % len(good_traceroutes)) if 'ex' in filters: logger.debug('Remove non existent traces') good_traceroutes = [ x for x in good_traceroutes if vft.trace_exists(g, x) ] logger.debug('Remained: %d', len(good_traceroutes)) if 'vf' in filters: logger.debug('Remove non vf traces') good_traceroutes = [ x for x in good_traceroutes if vft.is_valley_free(g, x) ] logger.debug('Remained: %d' % len(good_traceroutes)) if 'lp' in filters: logger.debug('Remove non lp traces') good_traceroutes = [ x for x in good_traceroutes if vft.is_local_preferenced(g, x, first_edge=first_edge) ] logger.debug('Remained: %d' % len(good_traceroutes)) # convert back node ids to node names good_traceroutes = [[g.vs[id]["name"] for id in trace] for trace in good_traceroutes] logger.debug(len(good_traceroutes)) return good_traceroutes
def ba_generator(ba_graph, sh_paths, stretch, vf_g, progressbar=False): vf_count = 0 trace_count = 0 lp_count = 0 progress = progressbar1.DummyProgressBar(end=10, width=15) if progressbar: progress = progressbar1.AnimatedProgressBar(end=len(sh_paths), width=15) for (s, t), shl in sh_paths: progress += 1 progress.show_progress() logger.debug('SH from %s to %s is %d' % (s, t, shl)) random_route = helpers.random_route_walk(ba_graph, s, t, shl + stretch) logger.debug('Random route: %s' % random_route) real_stretch = len(random_route) - shl if real_stretch != stretch: continue trace_count += 1 is_vf = vft.is_valley_free(ba_graph, random_route, vfmode=vft.CLOSENESS) logger.debug( 'Trace edge dir: %s' % vft.trace_to_string(ba_graph, random_route, vfmode=vft.CLOSENESS)) logger.debug('Is VF: %s' % is_vf) if is_vf: is_lp = vft.is_local_preferenced(ba_graph, random_route, first_edge=True, vfmode=vft.CLOSENESS, vf_g=vf_g) else: is_lp = 0 logger.debug('Is LP: %s' % is_lp) vf_count += int(is_vf) lp_count += int(is_lp) logger.info('Stretch %d' % stretch) logger.info('Trace count: %d' % trace_count) logger.info('VF count: %d' % vf_count) logger.info('LP count: %d' % lp_count) return (stretch, trace_count, vf_count, lp_count)
def test_is_valley_free_true(self): simple_vf = [0, 5, 4, 6, 2] # UUDD - vf middle_peer_vf = [0, 5, 6, 2] # UPD - vf just_down = [4, 6, 3] # DD - vf just_up = [1, 5, 4] # UU - vf start_peer = [5, 6, 3] # PD - vf just_peer = [5, 6] # P - vf self.assertTrue(vft.is_valley_free(self.sample_graph, simple_vf)) self.assertTrue(vft.is_valley_free(self.sample_graph, middle_peer_vf)) self.assertTrue(vft.is_valley_free(self.sample_graph, just_down)) self.assertTrue(vft.is_valley_free(self.sample_graph, just_up)) self.assertTrue(vft.is_valley_free(self.sample_graph, start_peer)) self.assertTrue(vft.is_valley_free(self.sample_graph, just_peer))
def vf_attributes(g, trace, vfmode, get_lp_soft, get_lp_hard, vf_g=None): is_vf = int(vft.is_valley_free(g, trace, vfmode)) is_lp_soft = -1 is_lp_hard = -1 if is_vf: if get_lp_soft: lp_soft = vft.is_local_preferenced(g, trace, vf_g=vf_g, first_edge=True, vfmode=vfmode) is_lp_soft = int(lp_soft) else: is_lp_prelabeled_soft = -1 if get_lp_hard: lp_hard = vft.is_local_preferenced(g, trace, vf_g=vf_g, first_edge=False, vfmode=vfmode) is_lp_hard = int(lp_hard) else: is_lp_hard = -1 return (is_vf, is_lp_soft, is_lp_hard)
def purify(g, meta, out, count=1000): results = list() results2 = list() results3 = list() all_vf = 0 all_nonvf = 0 all_vf_closeness = 0 all_nonvf_closeness = 0 short_results = list() short_results2 = list() short_results3 = list() all_short_vf = 0 all_short_nonvf = 0 all_short_vf_closeness = 0 all_short_nonvf_closeness = 0 long_results = list() long_results2 = list() long_results3 = list() all_long_vf = 0 all_long_nonvf = 0 all_long_vf_closeness = 0 all_long_nonvf_closeness = 0 # remove traces with already calculated all_path logger.warn('[r]ONLY NOT FILLED PATHS[/]') meta = [x for x in meta if not helpers.ALL_PATH_COUNT in x] # traces with a maximum stretch logger.warn('[r]!!!ONLY WITH LOW STRETCH[/]') meta = [x for x in meta if x[helpers.STRETCH] < 4] # shorter meta records logger.warn('[r]!!!ONLY SHORT TRACES[/]') meta = [x for x in meta if len(x[helpers.TRACE]) < 5] meta_map = {tuple(x[helpers.TRACE]): x for x in meta} # traceroutes = [x for x in meta if x[TRACE_LEN] == x[SH_LEN]] logger.info('All trace count: %d' % len(meta)) tr_count = min(len(meta), count) meta = random.sample(meta, tr_count) logger.info('Chosen trace count: %d' % len(meta)) real_vf = [x for x in meta if x[helpers.IS_VF] == 1] real_nonvf = [x for x in meta if x[helpers.IS_VF] == 0] real_vf_closeness = [x for x in meta if x[helpers.IS_VF_CLOSENESS] == 1] real_nonvf_closeness = [x for x in meta if x[helpers.IS_VF_CLOSENESS] == 0] logger.info('Real vf: %f[%d]' % ((len(real_vf)/float(len(meta)), len(real_vf)))) logger.info('Real nonvf: %f[%d]' % ((len(real_nonvf)/float(len(meta)), len(real_nonvf)))) logger.info('Real vf closeness: %f[%d]' % ((len(real_vf_closeness)/float(len(meta)), len(real_vf_closeness)))) logger.info('Real nonvf closeness: %f[%d]' % ((len(real_nonvf_closeness)/float(len(meta)), len(real_nonvf_closeness)))) logger.info('Remove unknown traces. Trace count before: %d' % len(meta)) traceroutes = [x[helpers.TRACE] for x in meta] traceroutes, ignored = vft.trace_clean(g, traceroutes) logger.info('Traceroutes after: %d. Ignored: %d' % (len(traceroutes), ignored)) traceroutes = vft.trace_in_vertex_id(g, traceroutes) progress = progressbar1.AnimatedProgressBar(end=len(traceroutes), width=15) for trace in traceroutes: progress += 1 progress.show_progress() for x in range(0, g.vcount()): g.vs[x]['traces'] = dict() s, t = trace[0], trace[-1] sh_path = g.get_all_shortest_paths(s, t, mode=i.OUT) all_path = helpers.dfs_mark(copy.deepcopy(g), s, t, len(trace)) # if len(sh_path) != len(all_path): # print len(sh_path) # print len(all_path) # print s, t # sanity check for x in all_path: if x[0] != s or x[-1] != t: logger.error('ALERT') if len(set([tuple(x) for x in all_path])) != len(all_path): logger.error('LENGTH ALERT') logger.error('%s' % len(all_path)) logger.error('%s' % len(set([tuple(x) for x in all_path]))) logger.error('%s' % sorted(all_path)) long_path = [x for x in all_path if len(x) == len(trace)] short_path = [x for x in all_path if len(x) < len(trace)] named_trace = [g.vs[x]['name'] for x in trace] extra_meta = { helpers.ALL_PATH_COUNT: len(all_path), helpers.SAME_LONG_PATH_COUNT: len(long_path), helpers.SHORTER_PATH_COUNT: len(short_path) } meta_map[tuple(named_trace)].update(extra_meta) vf_count = sum([1 if vft.is_valley_free(g, x, vfmode=vft.PRELABELED) else 0 for x in all_path]) nonvf = len(all_path) - vf_count vf_closeness_count = sum([1 if vft.is_valley_free(g, x, vfmode=vft.CLOSENESS) else 0 for x in all_path]) nonvf_closeness = len(all_path) - vf_closeness_count tmp = [1 if vft.is_valley_free(g, x, vfmode=vft.PRELABELED) else 0 for x in short_path] short_vf_count = sum(tmp) short_nonvf = len(tmp) - short_vf_count tmp = [1 if vft.is_valley_free(g, x, vfmode=vft.CLOSENESS) else 0 for x in short_path] short_vf_closeness_count = sum(tmp) short_nonvf_closeness = len(tmp) - short_vf_closeness_count tmp = [1 if vft.is_valley_free(g, x, vfmode=vft.PRELABELED) else 0 for x in long_path] long_vf_count = sum(tmp) long_nonvf = len(tmp) - long_vf_count tmp = [1 if vft.is_valley_free(g, x, vfmode=vft.CLOSENESS) else 0 for x in long_path] long_vf_closeness_count = sum(tmp) long_nonvf_closeness = len(tmp) - long_vf_closeness_count extra_meta = { helpers.ALL_PATH_VF_COUNT: vf_closeness_count, helpers.SAME_LONG_PATH_VF_COUNT: long_vf_closeness_count, helpers.SHORTER_PATH_VF_COUNT: short_vf_closeness_count } meta_map[tuple(named_trace)].update(extra_meta) all_vf += vf_count all_nonvf += nonvf all_vf_closeness += vf_closeness_count all_nonvf_closeness += nonvf_closeness all_long_vf += long_vf_count all_long_nonvf += long_nonvf all_long_vf_closeness += long_vf_closeness_count all_long_nonvf_closeness += long_nonvf_closeness all_short_vf += short_vf_count all_short_nonvf += short_nonvf all_short_vf_closeness += short_vf_closeness_count all_short_nonvf_closeness += short_nonvf_closeness results.append(vf_count / float(len(all_path))) results3.append(vf_closeness_count / float(len(all_path))) if len(all_path) > 1: results2.append(vf_count / float(len(all_path))) long_results.append(long_vf_count / float(len(long_path))) long_results3.append(long_vf_closeness_count / float(len(long_path))) if len(long_path) > 1: long_results2.append(long_vf_count / float(len(long_path))) if len(short_path) > 0: short_results.append(short_vf_count / float(len(short_path))) short_results3.append(short_vf_closeness_count / float(len(short_path))) else: pass # short_results.append(0) # short_results3.append(0) if len(short_path) > 1: short_results2.append(short_vf_count / float(len(short_path))) # save mofified meta meta_mod = [x for x in meta_map.itervalues()] helpers.save_to_json(out, meta_mod) # print results print 'ALL' print 'VF count: %d' % all_vf print 'VF CLOSENESS count: %d' % all_vf_closeness print 'Non vf count: %d' % all_nonvf print 'Non vf CLOSENESS count: %d' % all_nonvf_closeness print 'VF perc: %f' % (all_vf/float(all_vf + all_nonvf)) print 'VF CLOSENESS perc: %f' % (all_vf_closeness/float(all_vf_closeness + all_nonvf_closeness)) print 'Mean VF prob: %f' % np.mean(results) print 'Mean VF CLOSENESS prob: %f' % np.mean(results3) print 'Mean VF2 prob: %f' % np.mean(results2) print '==========' print 'SHORT' print 'VF count: %d' % all_short_vf print 'VF CLOSENESS count: %d' % all_short_vf_closeness print 'Non vf count: %d' % all_short_nonvf print 'Non vf CLOSENESS count: %d' % all_short_nonvf_closeness if all_short_vf + all_short_nonvf > 0: print 'VF perc: %f' % (all_short_vf/float(all_short_vf + all_short_nonvf)) if all_short_vf_closeness + all_short_nonvf_closeness > 0: print 'VF CLOSENESS perc: %f' % (all_short_vf_closeness/float(all_short_vf_closeness + all_short_nonvf_closeness)) print 'Mean VF prob: %f' % np.mean(short_results) print 'Mean VF CLOSENESS prob: %f' % np.mean(short_results3) print 'Mean VF2 prob: %f' % np.mean(short_results2) print '=-----------------' print 'LONG' print 'VF count: %d' % all_long_vf print 'VF CLOSENESS count: %d' % all_long_vf_closeness print 'Non vf count: %d' % all_long_nonvf print 'Non vf CLOSENESS count: %d' % all_long_nonvf_closeness print 'VF perc: %f' % (all_long_vf/float(all_long_vf + all_long_nonvf)) print 'VF CLOSENESS perc: %f' % (all_long_vf_closeness/float(all_long_vf_closeness + all_long_nonvf_closeness)) print 'Mean VF prob: %f' % np.mean(long_results) print 'Mean VF CLOSENESS prob: %f' % np.mean(long_results3) print 'Mean VF2 prob: %f' % np.mean(long_results2)
def purify(labeled_g, out, network_path, extra_hop=0): vs = [x.index for x in labeled_g.vs] ## Jus like in R # print '================' # for x in orig_vs: # shp = labeled_g.get_all_shortest_paths(x, orig_vs, mode=i.ALL) # res = [] # mes = 0 # for p in shp: # mes += 1 # # print [labeled_g.vs[u]['name'] for u in p] # vf_indicator = 1 if vft.is_valley_free(labeled_g, p) else 0 # # if vf_indicator == 0: # # print [labeled_g.vs[u]['name'] for u in [p[0], p[-1]]] # # print [labeled_g.degree(u) for u in p] # # print vft.trace_to_string(labeled_g, p) # # print vf_indicator == 1 # res.append(vf_indicator) # # raw_input() # # print mes # print np.mean(res) # raw_input() # print '///////////////////////////' pairs = random_pairs(vs, NODE_PAIRS) print 'Random pairs: %d' % len(pairs) probed_pairs = 0 all_vf = 0 all_nonvf = 0 all_vf_closeness = 0 all_nonvf_closeness = 0 results = [] results2 = [] results3 = [] short_results = list() short_results2 = list() short_results3 = list() all_short_vf = 0 all_short_nonvf = 0 all_short_vf_closeness = 0 all_short_nonvf_closeness = 0 long_results = list() long_results2 = list() long_results3 = list() all_long_vf = 0 all_long_nonvf = 0 all_long_vf_closeness = 0 all_long_nonvf_closeness = 0 results_closeness = [] results3_closeness = [] progress = progressbar1.AnimatedProgressBar(end=len(pairs), width=15) for s, t in pairs: progress += 1 progress.show_progress() for x in range(0, labeled_g.vcount()): labeled_g.vs[x]['traces'] = dict() # all_path = labeled_g.get_all_shortest_paths(s, t, mode=i.ALL) sh_len = labeled_g.shortest_paths(s, t, mode=i.ALL)[0][0] sh_len += 1 # convert to hop count all_path = helpers.dfs_mark(copy.deepcopy(labeled_g), s, t, sh_len + extra_hop) if all_path is None or len(all_path) < 1: print 'No path between %s %s' % (s, t) continue probed_pairs += 1 vf_indicator = [ 1 if vft.is_valley_free(labeled_g, x) else 0 for x in all_path ] vf_closeness_indicator = [ 1 if vft.is_valley_free(labeled_g, x, vfmode=vft.ORDER_CLOSENESS) else 0 for x in all_path ] vf_count = sum(vf_indicator) vf_closeness_count = sum(vf_closeness_indicator) nonvf = len(all_path) - vf_count nonvf_closeness = len(all_path) - vf_closeness_count all_vf += vf_count all_nonvf += nonvf all_vf_closeness += vf_closeness_count all_nonvf_closeness += nonvf_closeness long_path = [x for x in all_path if len(x) == sh_len + extra_hop] short_path = [x for x in all_path if len(x) < sh_len + extra_hop] tmp = [ 1 if vft.is_valley_free(labeled_g, x, vfmode=vft.ORDER_PRELABELED) else 0 for x in all_path if len(x) < sh_len + extra_hop ] short_vf_count = sum(tmp) short_nonvf = len(tmp) - short_vf_count tmp = [ 1 if vft.is_valley_free(labeled_g, x, vfmode=vft.ORDER_CLOSENESS) else 0 for x in all_path if len(x) < sh_len + extra_hop ] short_vf_closeness_count = sum(tmp) short_nonvf_closeness = len(tmp) - short_vf_closeness_count tmp = [ 1 if vft.is_valley_free(labeled_g, x, vfmode=vft.ORDER_PRELABELED) else 0 for x in all_path if len(x) >= sh_len + extra_hop ] long_vf_count = sum(tmp) long_nonvf = len(tmp) - long_vf_count tmp = [ 1 if vft.is_valley_free(labeled_g, x, vfmode=vft.ORDER_CLOSENESS) else 0 for x in all_path if len(x) >= sh_len + extra_hop ] long_vf_closeness_count = sum(tmp) long_nonvf_closeness = len(tmp) - long_vf_closeness_count if len(all_path) > 0: results.append(vf_count / float(len(all_path))) results_closeness.append(vf_closeness_count / float(len(all_path))) else: results.append(0) results_closeness.append(0) results3.append([vf_count, nonvf]) results3_closeness.append([vf_closeness_count, nonvf_closeness]) if len(all_path) > 1: results2.append(vf_count / float(len(all_path))) all_long_vf += long_vf_count all_long_nonvf += long_nonvf all_long_vf_closeness += long_vf_closeness_count all_long_nonvf_closeness += long_nonvf_closeness all_short_vf += short_vf_count all_short_nonvf += short_nonvf all_short_vf_closeness += short_vf_closeness_count all_short_nonvf_closeness += short_nonvf_closeness if len(long_path) > 0: long_results.append(long_vf_count / float(len(long_path))) long_results3.append(long_vf_closeness_count / float(len(long_path))) else: long_results.append(0) long_results3.append(0) if len(long_path) > 1: long_results2.append(long_vf_count / float(len(long_path))) if len(short_path) > 0: short_results.append(short_vf_count / float(len(short_path))) short_results3.append(short_vf_closeness_count / float(len(short_path))) else: short_results.append(0) short_results3.append(0) print with open(out, 'w') as f: f.write('%s\n' % network_path) f.write('Probed pairs: %d\n' % probed_pairs) f.write('VF count: %d\n' % all_vf) f.write('Non vf count: %d\n' % all_nonvf) f.write('VF perc: %f\n' % (all_vf / float(all_vf + all_nonvf))) f.write('Mean VF prob: %f\n' % np.mean(results)) f.write('Mean VF2 prob: %f\n' % np.mean(results2)) f.write('\n') f.write('VF CLOSENESS count: %d\n' % all_vf_closeness) f.write('Non vf CLOSENESS count: %d\n' % all_nonvf_closeness) f.write( 'VF CLOSENESS perc: %f\n' % (all_vf_closeness / float(all_vf_closeness + all_nonvf_closeness))) f.write('Mean VF CLOSENESS prob: %f\n' % np.mean(results_closeness)) f.write('\n') f.write('==========\n') f.write('VF count: %d\n' % all_short_vf) f.write('VF CLOSENESS count: %d\n' % all_short_vf_closeness) f.write('Non vf count: %d\n' % all_short_nonvf) f.write('Non vf CLOSENESS count: %d\n' % all_short_nonvf_closeness) if all_short_vf + all_short_nonvf > 0: f.write('VF perc: %f\n' % (all_short_vf / float(all_short_vf + all_short_nonvf))) if all_short_vf_closeness + all_short_nonvf_closeness > 0: f.write( 'VF CLOSENESS perc: %f\n' % (all_short_vf_closeness / float(all_short_vf_closeness + all_short_nonvf_closeness))) f.write('Mean VF prob: %f\n' % np.mean(short_results)) f.write('Mean VF CLOSENESS prob: %f\n' % np.mean(short_results3)) f.write('Mean VF2 prob: %f\n' % np.mean(short_results2)) f.write('=-----------------\n') f.write('VF count: %d\n' % all_long_vf) f.write('VF CLOSENESS count: %d\n' % all_long_vf_closeness) f.write('Non vf count: %d\n' % all_long_nonvf) f.write('Non vf CLOSENESS count: %d\n' % all_long_nonvf_closeness) f.write('VF perc: %f\n' % (all_long_vf / float(all_long_vf + all_long_nonvf))) f.write('VF CLOSENESS perc: %f\n' % (all_long_vf_closeness / float(all_long_vf_closeness + all_long_nonvf_closeness))) f.write('Mean VF prob: %f\n' % np.mean(long_results)) f.write('Mean VF CLOSENESS prob: %f\n' % np.mean(long_results3)) f.write('Mean VF2 prob: %f\n' % np.mean(long_results2))
def filter(g, traceroutes): results = list() # remove traces with unknown nodes traceroutes = vft.trace_in_vertex_id(g, traceroutes) progress = progressbar1.AnimatedProgressBar(end=len(traceroutes), width=15) for trace in traceroutes: progress += 1 progress.show_progress() if not vft.trace_exists(g, trace): print 'BUG?' continue for x in range(0, g.vcount()): g.vs[x]['traces'] = dict() trace = tuple(trace) s, t = trace[0], trace[-1] sh_len = g.shortest_paths(s, t, mode=i.ALL)[0][0] sh_len += 1 # igraph's hop count to node count all_routes = helpers.dfs_mark(g, s, t, sh_len + 1) # all_routes2 = helpers.dfs_simple(g, s, t, sh_len + 1, ()) # if set(all_routes) - set(all_routes2) != set(all_routes2) - set(all_routes): # print 'AJAJAJ' # print all_routes # print '----------' # print all_routes2 sh_routes = [x for x in all_routes if len(x) == sh_len] all_vf_routes = [x for x in all_routes if vft.is_valley_free(g, x)] prediction_set = set(sh_routes) | set(all_vf_routes) result = [ trace, len(trace), sh_len, len(sh_routes), trace in sh_routes, len(all_vf_routes), trace in all_vf_routes, len(all_routes), trace in all_routes, len(prediction_set), trace in prediction_set, vft.is_valley_free(g, trace), # sh_routes, all_vf_routes, all_routes, vft.trace_to_string(g, trace) ] results.append(result) print >> sys.stderr, ( 'TRACE\tTRACE_LEN\tSH_LEN', '\t#SH_ROUTE\tOK', '\t#ALL_VF\tOK', '\t#ALL_ROUTE\tOK', '\t#PREDICTION_SET\tOK', '\tIS_VF', # '\tSH_ROUTES\tALL_VF_ROUTES\tALL_ROUTE', '\tTRACE_STR') for result in results: result = [str(r) for r in result] print >> sys.stderr, '\t'.join(result) return results
def main(): parser = argparse.ArgumentParser( description=('SANDBOX mode. ', 'Write something ', 'useful here'), formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--progressbar', action='store_true') parser.add_argument('--verbose', '-v', action='count', default=0) parser.add_argument('--edge-drop', dest='edge_drop', type=float, default=0.0) parser.add_argument('--closeness-limit', dest='closeness_limit', type=float, default=0.0) parser.add_argument('network') parser.add_argument('traceroutes') arguments = parser.parse_args() show_progress = arguments.progressbar arguments.verbose = min(len(helpers.LEVELS), arguments.verbose) logging.getLogger('compnet').setLevel(helpers.LEVELS[arguments.verbose]) g = helpers.load_network(arguments.network) traceroutes = helpers.load_from_json(arguments.traceroutes) logger.info('ecount: %d' % g.ecount()) logger.info('vcount: %d' % g.vcount()) logger.info('trace count: %d' % len(traceroutes)) g_dummy = g.copy() progress = progressbar1.DummyProgressBar(end=10, width=15) if show_progress: progress = progressbar1.AnimatedProgressBar(end=len(traceroutes), width=15) closeness_list = [] for x in g_dummy.vs: progress += 1 progress.show_progress() closeness_list.append((x.index, g_dummy.closeness(x))) end = int(arguments.closeness_limit * g_dummy.vcount()) logger.debug('Top node count: %d' % end) top_nodes = sorted(closeness_list, key=lambda x: x[1], reverse=True)[:end] top_nodes_index = [x[0] for x in top_nodes] top_nodes_name = [g_dummy.vs[x[0]]['name'] for x in top_nodes] top_edges = [ e for e in g_dummy.es if e.source in top_nodes_index and e.target in top_nodes_index ] logger.debug('Top edge count: %d' % len(top_edges)) random.shuffle(top_edges) edge_drop = top_edges[:int(len(top_edges) * arguments.edge_drop)] logger.debug('Dropped edge count: %d' % len(edge_drop)) # edges = [x.index for x in g_dummy.es] # random.shuffle(edges) # edge_drop = edges[:int(g.ecount() * arguments.edge_drop)] g_dummy.delete_edges(edge_drop) traceroutes = traceroutes[:10000] all_edges = [] for trace in traceroutes: edges = zip(trace, trace[1:]) edges = [tuple(sorted(e)) for e in edges] all_edges.extend(edges) all_edges = list(set(all_edges)) top_edges = [ e for e in all_edges if e[0] in top_nodes_name and e[1] in top_nodes_name ] logger.info('TOP edge count in real traceroutes: %d' % len(top_edges)) found_top_edges = [] increments = [] for trace in traceroutes: edges = zip(trace, trace[1:]) edges = [tuple(sorted(e)) for e in edges] top_edges = [ x for x in edges if x[0] in top_nodes_name and x[1] in top_nodes_name ] found_top_edges.extend(top_edges) found_top_edges = list(set(found_top_edges)) increments.append(len(found_top_edges)) logger.info('Found top edge count: %d' % len(found_top_edges)) dummy_sh_traceroutes_meta = [] original_sh_traceroutes_meta = [] stretches = [] progress = progressbar1.DummyProgressBar(end=10, width=15) if show_progress: progress = progressbar1.AnimatedProgressBar(end=len(traceroutes), width=15) for trace in traceroutes: progress += 1 progress.show_progress() s, t = trace[0], trace[-1] # logger.debug('Get shortest paths from {s} to {t}'.format(s=s, t=t)) sh_dummy = random.choice(g_dummy.get_shortest_paths(s, t)) sh_original = random.choice(g.get_shortest_paths(s, t)) stretch = len(sh_dummy) - len(sh_original) dummy_sh_traceroutes_meta.append((sh_dummy, stretch)) original_sh_traceroutes_meta.append((sh_original, 0)) stretches.append(stretch) # logger.debug('Stretch: %d' % stretch) # logger.debug('SH DUMMY: %s' % [g_dummy.vs[x]['name'] for x in sh_dummy]) # logger.debug('SH ORIG: %s' % [g.vs[x]['name'] for x in sh_original]) dummy_sh_meta = [(x[0], x[1], vft.is_valley_free(g_dummy, x[0], vft.CLOSENESS)) for x in dummy_sh_traceroutes_meta] dummy_sh_len_hist = collections.Counter( [len(x[0]) for x in dummy_sh_traceroutes_meta]) original_sh_len_hist = collections.Counter( [len(x[0]) for x in original_sh_traceroutes_meta]) original_len_hist = collections.Counter([len(x) for x in traceroutes]) stretches = [x for x in stretches if x >= 0] stretch_hist = collections.Counter(stretches) import matplotlib.pyplot as plt print print[(x, 100 * y / float(len(traceroutes)), y) for x, y in stretch_hist.iteritems()] plt.plot([x for x in stretch_hist.iterkeys()], [x for x in stretch_hist.itervalues()], 'g^') plt.ylabel('some numbers') # plt.show() logger.info('Dummy VF stat') max_stretch = max(dummy_sh_meta, key=lambda x: x[1])[1] for stretch in range(0, max_stretch + 1): stretched_traces = [x for x in dummy_sh_meta if x[1] == stretch] count = len(stretched_traces) vf_count = len([x for x in stretched_traces if x[2]]) vf_perc = vf_count / float(count) nonvf_count = count - vf_count nonvf_perc = nonvf_count / float(count) logger.info( '{stretch} -- {vf_perc}[{vf_count}]\t{nonvf_perc}[{nonvf_count}]'. format(stretch=stretch, vf_perc=vf_perc, vf_count=vf_count, nonvf_perc=nonvf_perc, nonvf_count=nonvf_count)) import matplotlib.pyplot as plt plt.plot(increments, 'g^') plt.ylabel('some numbers') plt.show()
def random_nonvf_route(g, s, t, hop_count, path=None, vfmode=None): if path is None: try: if isinstance(s, str): s = g.vs.find(s).index if isinstance(t, str): t = g.vs.find(t).index # check if s and t are valid inidecies _, _ = g.vs[s], g.vs[t] except (ValueError, IndexError): raise IndexError('Vertex index out of range or not exists') # some special case if hop_count < 1: # print 'HOP COUNT: %d' % hop_count return (False, []) if s == t: return (True, [s, ]) # if s != t then length must be larger than 1 if hop_count == 1: # print 'S: %s, T: %s, HC: %d' % (s, t, hop_count) return (False, []) shortest_route = g.shortest_paths(s, t, mode=i.OUT)[0][0] + 1 if hop_count < shortest_route: # print 'TOO SHORT %d' % (hop_count) return (False, []) path = [s, ] hop_count -= 1 if vfmode is None: vfmode = vft.CLOSENESS if s == t: return (vft.is_valley_free(g, path, vfmode=vfmode), path) logger.debug('Hop count remained: %d' % hop_count) logger.debug('Current node: %s' % s) neighbors = [x for x in g.neighbors(s, mode=i.OUT) if x not in path] distances = [x[0] for x in g.shortest_paths(neighbors, t, mode=i.OUT)] candidates = filter(lambda x: x[1] + 1 <= hop_count, # +, mert az igraph # azt mondja meg, # s-bol hany hopp t zip(neighbors, distances)) weights = [-1 if not vft.is_valley_free(g, path + [x[0], ], vfmode=vfmode) else vft.edge_dir(g, [s, x[0]], vfmode=vfmode).value for x in candidates] # create a list where every columnt is neighbors, distances, weights # respectevly candidates = zip(*(zip(*candidates) + [weights, ])) # sort by weights candidates = sorted(candidates, key=itemgetter(2)) if len(candidates) == 0: return (False, []) logger.debug('Valid candidates: %s' % candidates) first_route = (False, []) # by default there was no route to T for next_hop in candidates: logger.debug('Chosen one: %s' % next_hop[0]) isvf, r = random_nonvf_route(g, next_hop[0], t, hop_count - 1, path + [next_hop[0], ], vfmode) if len(r) == 0: continue if not isvf: # we are done, we found a nonVF route return (isvf, r) # our first guess a vf route. save it for later use (e.g. there is no # nonVF rotue) but lets try again with another candidate if len(first_route[1]) == 0: # first save first_route = (isvf, r) return first_route
def purify(g, meta_original, out, count=1000, try_per_race=1, show_progress=False, with_lp=True): empty = 0 # remove traces with already calculated random paths logger.warn('[r]ONLY NOT FILLED PATHS[/]') meta_filled = [ x for x in meta_original if helpers.RANDOM_WALK_RUN_COUNT not in x ] # Filter if interested only in routes of stretch 1 # meta_filled = [x for x in meta_original # if x[helpers.TRACE_LEN]-x[helpers.SH_LEN] == 1] ## traces with a maximum stretch # logger.warn('[r]!!!ONLY WITH STRETCH[/]') # meta = [x for x in meta if x[helpers.STRETCH] > -1] # # shorter meta records # logger.warn('[r]!!!ONLY SHORT TRACES[/]') # meta = [x for x in meta if len(x[helpers.TRACE]) < 5] # meta_map = {tuple(x[helpers.TRACE]): x for x in meta_filled} logger.info('All trace count: %d' % len(meta_filled)) tr_count = min(len(meta_filled), count) meta_random = random.sample(meta_filled, tr_count) logger.info('Chosen subset count: %d' % len(meta_random)) # real_vf_degree = [x for x in meta_random if x[helpers.IS_VF_DEGREE] == 1] # real_nonvf_degree = [x for x in meta_random if x[helpers.IS_VF_DEGREE] == 0] # assert len(real_nonvf_degree) == tr_count - len(real_vf_degree) # real_vf_prelabeled = [x for x in meta_random if x[helpers.IS_VF_PRELABELED] == 1] # real_nonvf_prelabeled = [x for x in meta_random if x[helpers.IS_VF_PRELABELED] == 0] # assert len(real_nonvf_prelabeled) == tr_count - len(real_vf_prelabeled) # real_vf_closeness = [x for x in meta_random if x[helpers.IS_VF_CLOSENESS] == 1] # real_nonvf_closeness = [x for x in meta_random if x[helpers.IS_VF_CLOSENESS] == 0] # assert len(real_nonvf_closeness) == tr_count - len(real_vf_closeness) # logger.info('Real vf degree: %f[%d]' % ((len(real_vf_degree) / float(tr_count), # len(real_vf_degree)))) # logger.info('Real nonvf degree: %f[%d]' % ((len(real_nonvf_degree) / float(tr_count), # len(real_nonvf_degree)))) # logger.info('Real vf prelabeled: %f[%d]' % ((len(real_vf_prelabeled) / float(tr_count), # len(real_vf_prelabeled)))) # logger.info('Real nonvf prelabeled: %f[%d]' % ((len(real_nonvf_prelabeled) / float(tr_count), # len(real_nonvf_prelabeled)))) # logger.info('Real vf closeness: %f[%d]' % ((len(real_vf_closeness)/float(tr_count), len(real_vf_closeness)))) # logger.info('Real nonvf closeness: %f[%d]' % ((len(real_nonvf_closeness)/float(tr_count), len(real_nonvf_closeness)))) # traceroutes = [x[helpers.TRACE] for x in meta_random] # traceroutes = vft.trace_in_vertex_id(g, traceroutes) try: meta_random[0][helpers.TRACE] except Exception: meta_random = [{helpers.TRACE: x} for x in meta_random] progress = progressbar1.DummyProgressBar(end=10, width=15) if show_progress: progress = progressbar1.AnimatedProgressBar(end=len(meta_random), width=15) stretch_list = [] max_stretch = max( [x[helpers.TRACE_LEN] - x[helpers.SH_LEN] for x in meta_random]) for stretch in range(0, max_stretch + 1): metas = [ x for x in meta_random if x[helpers.TRACE_LEN] - x[helpers.SH_LEN] == stretch ] stretch_list.extend(list(repeat(stretch, len(metas)))) # print(stretch_list) lenghts = random.shuffle(stretch_list) strx_array = [] for idx, trace_meta in enumerate(meta_random): progress += 1 progress.show_progress() # print(trace_meta[helpers.TRACE]) shl = trace_meta[helpers.SH_LEN] trace = vft.trace_in_vertex_id(g, [ trace_meta[helpers.TRACE], ]) if len(trace) != 1: print 'PROBLEM' print trace_meta continue trace = trace[0] # print(trace) random_walk_closeness_route_vf = 0 random_walk_closeness_route_lp_soft = 0 random_walk_closeness_route_lp_hard = 0 random_walk_degree_route_vf = 0 random_walk_degree_route_lp_soft = 0 random_walk_degree_route_lp_hard = 0 random_walk_prelabeled_route_vf = 0 random_walk_prelabeled_route_lp_soft = 0 random_walk_prelabeled_route_lp_hard = 0 s, t = trace[0], trace[-1] for counter in xrange(0, try_per_race): # random_path = helpers.random_route_walk(g, s, t, len(trace)) # Modified random_path = helpers.random_route_walk( g, s, t, shl + stretch_list[idx]) # Modified if len(random_path) == 0: empty += 1 if vft.is_valley_free(g, random_path, vfmode=vft.CLOSENESS): random_walk_closeness_route_vf += 1 if (len(random_path) == shl + 1): strx_array.append(1) if with_lp: lp_soft = vft.is_local_preferenced(g, random_path, first_edge=True, vfmode=vft.CLOSENESS) lp_hard = vft.is_local_preferenced(g, random_path, first_edge=False, vfmode=vft.CLOSENESS) if lp_soft: random_walk_closeness_route_lp_soft += 1 if lp_hard: random_walk_closeness_route_lp_hard += 1 else: if (len(random_path) == shl + 1): strx_array.append(0) # if vft.is_valley_free(g, random_path, vfmode=vft.DEGREE): # random_walk_degree_route_vf += 1 # if with_lp: # lp_soft = vft.is_local_preferenced(g, random_path, # first_edge=True, # vfmode=vft.DEGREE) # lp_hard = vft.is_local_preferenced(g, random_path, # first_edge=False, # vfmode=vft.DEGREE) # if lp_soft: # random_walk_degree_route_lp_soft += 1 # if lp_hard: # random_walk_degree_route_lp_hard += 1 # if vft.is_valley_free(g, random_path, vfmode=vft.PRELABELED): # random_walk_prelabeled_route_vf += 1 # if with_lp: # lp_soft = vft.is_local_preferenced(g, random_path, # first_edge=True, # vfmode=vft.PRELABELED) # lp_hard = vft.is_local_preferenced(g, random_path, # first_edge=False, # vfmode=vft.PRELABELED) # if lp_soft: # random_walk_prelabeled_route_lp_soft += 1 # if lp_hard: # random_walk_prelabeled_route_lp_hard += 1 # sanity check # if random_path[0] != s or random_path[-1] != t: # logger.error('ALERT') if len(random_path) != len(set(random_path)): logger.error('LENGTH ERROR') extra_meta = { helpers.RANDOM_WALK_RUN_COUNT: try_per_race, helpers.RANDOM_WALK_VF_CLOSENESS_ROUTE: random_walk_closeness_route_vf, helpers.RANDOM_WALK_VF_DEGREE_ROUTE: random_walk_degree_route_vf, helpers.RANDOM_WALK_VF_PRELABELED_ROUTE: random_walk_prelabeled_route_vf, } if with_lp: extra_meta.update({ helpers.RANDOM_WALK_LP_SOFT_CLOSENESS_ROUTE: random_walk_closeness_route_lp_soft, helpers.RANDOM_WALK_LP_HARD_CLOSENESS_ROUTE: random_walk_closeness_route_lp_hard, helpers.RANDOM_WALK_LP_SOFT_DEGREE_ROUTE: random_walk_degree_route_lp_soft, helpers.RANDOM_WALK_LP_HARD_DEGREE_ROUTE: random_walk_degree_route_lp_hard, helpers.RANDOM_WALK_LP_SOFT_PRELABELED_ROUTE: random_walk_prelabeled_route_lp_soft, helpers.RANDOM_WALK_LP_HARD_PRELABELED_ROUTE: random_walk_prelabeled_route_lp_hard }) trace_meta.update(extra_meta) ## save modified meta # all meta_* get only references from meta_original helpers.save_to_json(out, meta_random) # meta_mod = [x for x in meta_map.itervalues()] # helpers.save_to_json(out, meta_mod) # calculate results # real_vf = [x[helpers.IS_VF_CLOSENESS] for x in meta_random] # real_vf_ratio = np.mean(real_vf) random_walk_vf_ratio_per_element = [ x[helpers.RANDOM_WALK_VF_CLOSENESS_ROUTE] / x[helpers.RANDOM_WALK_RUN_COUNT] for x in meta_random ] random_walk_vf_ratio = np.mean(random_walk_vf_ratio_per_element) # print results logger.info('') logger.info('Empty: %d' % empty) logger.info('Tested trace count: %d' % len(meta_random)) # logger.info('VF ratio in tested traces: %f' % real_vf_ratio) logger.info('VF ratio in random walks: %f' % random_walk_vf_ratio) logger.info('VF ratio in random walks for path stretch 1: %f' % np.mean(strx_array))
def purify(g, traceroutes, flags, show_progress=False): results = list() # remove traces with unknown nodes traceroutes = vft.trace_in_vertex_id(g, traceroutes) # generate valley-free graph if flags[FLAG_PRELABELED]: logger.info('Generate VF_G_PRE') vf_g_pre = vft.convert_to_vf(g, vfmode=vft.PRELABELED) else: logger.info('Skip prelabeled graph') if flags[FLAG_DEGREE]: logger.info('Generate VF_G_DEGREE') vf_g_degree = vft.convert_to_vf(g, vfmode=vft.DEGREE) else: logger.info('Skip degree graph') if flags[FLAG_CLOSENESS]: logger.info('Generate VF_G_CLOSENESS') vf_g_closeness = vft.convert_to_vf(g, vfmode=vft.CLOSENESS) else: logger.info('Skip closeness graph') progress = progressbar1.DummyProgressBar(end=10, width=15) if show_progress: progress = progressbar1.AnimatedProgressBar(end=len(traceroutes), width=15) for trace in traceroutes: progress += 1 progress.show_progress() logger.debug('Current trace: %s' % ([g.vs[x]['name'] for x in trace])) if len(trace) == 1: continue s, t = trace[0], trace[-1] is_vf_prelabeled = -1 is_lp_prelabeled_hard = -1 is_lp_prelabeled_soft = -1 is_vf_degree = -1 is_lp_degree_hard = -1 is_lp_degree_soft = -1 is_vf_closeness = -1 is_lp_closeness_hard = -1 is_lp_closeness_soft = -1 trace_len = len(trace) sh_len = g.shortest_paths(s, t, mode=i.OUT)[0][0] sh_len += 1 # convert hop count to node Counter if flags[FLAG_PRELABELED]: is_vf_prelabeled = vft.is_valley_free(g, trace, vft.PRELABELED) is_vf_prelabeled = int(is_vf_prelabeled) if is_vf_prelabeled: if flags[FLAG_LP_SOFT]: lp_soft = vft.is_local_preferenced(g, trace, vf_g=vf_g_pre, first_edge=True, vfmode=vft.PRELABELED) is_lp_prelabeled_soft = 1 if lp_soft else 0 else: is_lp_prelabeled_soft = -1 if flags[FLAG_LP_HARD]: lp_hard = vft.is_local_preferenced(g, trace, vf_g=vf_g_pre, first_edge=False, vfmode=vft.PRELABELED) is_lp_prelabeled_hard = 1 if lp_hard else 0 else: is_lp_prelabeled_hard = -1 if flags[FLAG_DEGREE]: is_vf_degree = vft.is_valley_free(g, trace, vft.DEGREE) is_vf_degree = int(is_vf_degree) if is_vf_degree: if flags[FLAG_LP_SOFT]: lp_soft = vft.is_local_preferenced(g, trace, vf_g=vf_g_degree, first_edge=True, vfmode=vft.DEGREE) is_lp_degree_soft = 1 if lp_soft else 0 else: is_lp_degree_soft = -1 if flags[FLAG_LP_HARD]: lp_hard = vft.is_local_preferenced(g, trace, vf_g=vf_g_degree, first_edge=False, vfmode=vft.DEGREE) is_lp_degree_hard = 1 if lp_hard else 0 else: is_lp_degree_hard = -1 if flags[FLAG_CLOSENESS]: is_vf_closeness = vft.is_valley_free(g, trace, vft.CLOSENESS) is_vf_closeness = int(is_vf_closeness) if is_vf_closeness: if flags[FLAG_LP_SOFT]: lp_soft = vft.is_local_preferenced(g, trace, vf_g=vf_g_closeness, first_edge=True, vfmode=vft.CLOSENESS) is_lp_closeness_soft = 1 if lp_soft else 0 else: is_lp_closeness_soft = -1 if flags[FLAG_LP_HARD]: lp_hard = vft.is_local_preferenced(g, trace, vf_g=vf_g_closeness, first_edge=False, vfmode=vft.CLOSENESS) is_lp_closeness_hard = 1 if lp_hard else 0 else: is_lp_closeness_hard = -1 if False: sh_vf_len = vft.get_shortest_vf_route(g, s, t, mode='vf', vf_g=vf_g_pre, _all=True, vfmode=vft.PRELABELED) # ugy tunik, mintha nem mindig lenne pontos? fentartassal kezelendo # ez az ertek azert is kerult bele, hogy ellenorizzuk in_vf_prediction = 1 if sh_vf_len and trace in sh_vf_len else 0 else: sh_vf_len = -1 in_vf_prediction = -1 sh_vf_len = len(sh_vf_len[0]) if isinstance(sh_vf_len, list) else -1 percentage_stretch = trace_len / float(sh_len) named_trace = [g.vs[_id]["name"] for _id in trace] result = { helpers.TRACE: named_trace, helpers.TRACE_LEN: trace_len, helpers.SH_LEN: sh_len, helpers.SH_VF_LEN: sh_vf_len, helpers.IS_VF_PRELABELED: is_vf_prelabeled, helpers.IS_VF_DEGREE: is_vf_degree, helpers.IS_VF_CLOSENESS: is_vf_closeness, helpers.HOP_STRETCH: trace_len - sh_len, helpers.PERC_STRETCH: percentage_stretch, helpers.IN_VF_PRED: in_vf_prediction, helpers.IS_LP_SOFT_PRELABELED: is_lp_prelabeled_soft, helpers.IS_LP_HARD_PRELABELED: is_lp_prelabeled_hard, helpers.IS_LP_SOFT_DEGREE: is_lp_degree_soft, helpers.IS_LP_HARD_DEGREE: is_lp_degree_hard, helpers.IS_LP_SOFT_CLOSENESS: is_lp_closeness_soft, helpers.IS_LP_HARD_CLOSENESS: is_lp_closeness_hard, } results.append(result) # print >> sys.stderr, ('TRACE\tTRACE_LEN\tSH_LEN\tSH_VF_LEN\tIS_VF', # '\tSTRETCH\tIN_VF_PREDICTION\tIS_LP_F\tIS_LP_ALL') # for result in results: # result = [str(r) for r in result] # print >> sys.stderr, '\t'.join(result) # statistic = statistics.purify(g, results, # 'nc+ec+tc+rt+vf+vf_closeness+pred+lp_soft_prelabeled+lp_hard_prelabeled+lp_soft_degree+lp_hard_degree+lp_soft_closeness+lp_hard_closeness'.split('+')) # statistics.stat_printer(statistic) return results