def accumulate_binned_flow_sizes(self, flow): src_ip = flow.get_ip(True) dst_ip = flow.get_ip(False) if src_ip not in self.out_flows: self.out_flows[src_ip] = [] self.in_flows[src_ip] = [] if dst_ip not in self.in_flows: self.in_flows[dst_ip] = [] self.out_flows[dst_ip] = [] if self.monitor_flow: if self.accumulate_total_flow: flow_id = histogram.get_feature(self.flow_bins, flow.get_num_bytes()) self.out_flows[src_ip].append(flow_id) self.in_flows[dst_ip].append(flow_id) else: src_flow_id = histogram.get_feature(self.flow_bins, flow.get_src_bytes()) dst_flow_id = histogram.get_feature(self.flow_bins, flow.get_dst_bytes()) self.out_flows[src_ip].append(src_flow_id) self.in_flows[dst_ip].append(src_flow_id) self.out_flows[dst_ip].append(dst_flow_id) self.in_flows[src_ip].append(dst_flow_id) else: flow_id = histogram.get_feature(self.flow_bins, flow.get_duration()) self.out_flows[src_ip].append(flow_id) self.in_flows[dst_ip].append(flow_id) return
def find_median_flows(self): ip_set = set() for ip in self.in_flows.keys(): flow_values = self.in_flows[ip] if len(flow_values) > 0: self.in_median_flows[ip] = median(flow_values) ip_set.add(ip) for ip in self.out_flows.keys(): flow_values = self.out_flows[ip] if len(flow_values) > 0: self.out_median_flows[ip] = median(flow_values) ip_set.add(ip) for ip in ip_set: if ip not in self.in_median_flows: self.in_median_flows[ip] = histogram.get_feature(self.flow_bins, 0) if ip not in self.out_median_flows: self.out_median_flows[ip] = histogram.get_feature(self.flow_bins, 0) print('# inflows = ' + str(len(self.in_median_flows))) print('# outflows = ' + str(len(self.out_median_flows)))