def do_merge(wifi_file, traffic_files): ldb = None if wifi_file and traffic_files: ldb = LocationDB(wifi_file) for fn in traffic_files: print(fn) out_file_path = _gen_output_file_from_traffic_file(fn) ofile = FileReader.open_file(out_file_path, 'wb') ifile = FileReader.open_file(fn) one_tcp_flow = [] for line in ifile: # check prefix if 'TCP' == _traffic_log_prefix(line): if len(one_tcp_flow) == 0: one_tcp_flow.append(line) else: mac, ap = _do_search_db(one_tcp_flow, ldb) _write2file(one_tcp_flow, mac, ap, ofile) # reset tcp flow one_tcp_flow = [line] else: one_tcp_flow.append(line) mac, ap = _do_search_db(one_tcp_flow, ldb) _write2file(one_tcp_flow, mac, ap, ofile) ifile.close() ofile.close() logging.info('%s completed' % str(fn))
def mergeDpi2Tcp(dpi_file, tcp_file, output): if not tcp_file: return; # Load the meta into memory print("Loading DPI meta data ...") dpi_meta = {} # {socket: [flows]} if dpi_file: idpi_file = FileReader.open_file(dpi_file, 'rb') i = 0 for line in idpi_file: i += 1; line = line.strip("\r \n") if i==1 or len(line) == 0: continue # Extract DPI meta parts = line.split(' ') if len(parts) < 7: continue try: src_addr = ip2int(parts[0]); src_port = int(parts[1]) dst_addr = ip2int(parts[2]); dst_port = int(parts[3]) flow_conn_time = float(parts[4]) l4_proto = parts[5] if l4_proto != "TCP": continue l7_proto = parts[6] except (ValueError, IndexError): continue # Store in memory fkey = sorted([(src_addr, src_port), (dst_addr, dst_port)]) fkey = tuple(fkey) if fkey not in dpi_meta: dpi_meta[fkey] = [] dpi_meta[fkey].append( (flow_conn_time, l7_proto) ) idpi_file.close() # Mark the TCP files print("Adding DPI tag to flows ...") ofile = FileReader.open_file(output, 'wb') itcp_file = FileReader.open_file(tcp_file, 'rb') if len(dpi_meta) > 0: for line in itcp_file: proto, content = line.split(':', 1) content = content.strip(' ') if proto == 'TCP': parts = content.split(' ') if len(parts) < 98: ofile.write("%s\n" % line.strip('\r\n ')); continue try: src_addr = ip2int(parts[0]) src_port = int(parts[1]) dst_addr = ip2int(parts[44]) dst_port = int(parts[45]) fkey = sorted([(src_addr, src_port), (dst_addr, dst_port)]) fkey = tuple(fkey) try: flow_conn_time = float(parts[97])/1000.0 # in seconds except: print('TCP connection time convertion error: "%s"' % line) p = "N/A" if fkey in dpi_meta: for flow in dpi_meta[fkey]: if abs(flow_conn_time-flow[0]) < 60: # in seconds p = flow[1]; break ofile.write("%s: %s %s\n" % (proto, p, content.strip('\r\n '))) except IndexError: ofile.write("%s\n" % line.strip('\r\n ')) elif proto == 'HTTP': # Write directly ofile.write("%s\n" % line.strip('\r\n ')) else: # DPI data lost when capturing for line in itcp_file: proto, content = line.split(':', 1) content = content.strip(' ') if proto == 'TCP': p = "N/A" ofile.write("%s: %s %s\n" % (proto, p, content.strip('\r\n '))) elif proto == 'HTTP': # Write directly ofile.write("%s\n" % line.strip('\r\n ')) itcp_file.close() ofile.close() logging.info('file %s completed' % tcp_file)