def main(): # Group raw DPI and TCP files w.r.t. the same hour hour_files_map = {} # {hour: [dpi, tcp]} files = FileReader.list_files(input_dpi_log, DPI_FN_PATTERN) for fn in files: bname = os.path.basename(fn) dt = datetime.datetime.strptime(bname.split('.',1)[0], DPI_TIME_PATTERN) dt = dt.replace(minute=0, second=0) if dt not in hour_files_map: hour_files_map[dt] = [fn, None] else: print("WARNNING: Hour %s occurs with more one DPI file found" % str(dt)) files = FileReader.list_files(input_tcp_log, TCP_FN_PATTERN) for fn in files: bname = os.path.basename(fn) dt = datetime.datetime.strptime(bname.rsplit('-',1)[0], TCP_TIME_PATTERN) dt = dt.replace(minute=0, second=0) if dt in hour_files_map: hour_files_map[dt][1] = fn else: hour_files_map[dt] = [None, fn] print("WARNNING: Hour %s occurs without DPI file found" % str(dt)) multipleThread(hour_files_map)
def do_merge(wifi_file, traffic_files): ldb = None if wifi_file and traffic_files: ldb = LocationDB(wifi_file) for fn in traffic_files: print(fn) out_file_path = _gen_output_file_from_traffic_file(fn) ofile = FileReader.open_file(out_file_path, 'wb') ifile = FileReader.open_file(fn) one_tcp_flow = [] for line in ifile: # check prefix if 'TCP' == _traffic_log_prefix(line): if len(one_tcp_flow) == 0: one_tcp_flow.append(line) else: mac, ap = _do_search_db(one_tcp_flow, ldb) _write2file(one_tcp_flow, mac, ap, ofile) # reset tcp flow one_tcp_flow = [line] else: one_tcp_flow.append(line) mac, ap = _do_search_db(one_tcp_flow, ldb) _write2file(one_tcp_flow, mac, ap, ofile) ifile.close() ofile.close() logging.info('%s completed' % str(fn))
def main(): day_files = {} # {day: [wifilog, [trafficFiles]]} # wifilog files files = FileReader.list_files(wifilog, WIFILOG_FN_PATTERN) for fn in files: dt = _extract_time_from_wifilog_filename(fn) if dt not in day_files: day_files[dt] = [fn, []] # wifilog without traffic files else: print("WARNNING: day %s occurs more than once" % str(dt)) # Traffic files files = FileReader.list_files(trafficlog, TRAFFIC_FN_PATTERN) for fn in files: dt = _extract_time_from_traffic_filename(fn) dt = dt.replace(hour=0, minute=0, second=0) if dt in day_files: day_files[dt][1].append(fn) else: day_files[dt] = [None, [fn]] print("WARNNING: day %s occurs without wifilog found" % str(dt)) multipleThread(day_files)
def mergeDpi2Tcp(dpi_file, tcp_file, output): if not tcp_file: return; # Load the meta into memory print("Loading DPI meta data ...") dpi_meta = {} # {socket: [flows]} if dpi_file: idpi_file = FileReader.open_file(dpi_file, 'rb') i = 0 for line in idpi_file: i += 1; line = line.strip("\r \n") if i==1 or len(line) == 0: continue # Extract DPI meta parts = line.split(' ') if len(parts) < 7: continue try: src_addr = ip2int(parts[0]); src_port = int(parts[1]) dst_addr = ip2int(parts[2]); dst_port = int(parts[3]) flow_conn_time = float(parts[4]) l4_proto = parts[5] if l4_proto != "TCP": continue l7_proto = parts[6] except (ValueError, IndexError): continue # Store in memory fkey = sorted([(src_addr, src_port), (dst_addr, dst_port)]) fkey = tuple(fkey) if fkey not in dpi_meta: dpi_meta[fkey] = [] dpi_meta[fkey].append( (flow_conn_time, l7_proto) ) idpi_file.close() # Mark the TCP files print("Adding DPI tag to flows ...") ofile = FileReader.open_file(output, 'wb') itcp_file = FileReader.open_file(tcp_file, 'rb') if len(dpi_meta) > 0: for line in itcp_file: proto, content = line.split(':', 1) content = content.strip(' ') if proto == 'TCP': parts = content.split(' ') if len(parts) < 98: ofile.write("%s\n" % line.strip('\r\n ')); continue try: src_addr = ip2int(parts[0]) src_port = int(parts[1]) dst_addr = ip2int(parts[44]) dst_port = int(parts[45]) fkey = sorted([(src_addr, src_port), (dst_addr, dst_port)]) fkey = tuple(fkey) try: flow_conn_time = float(parts[97])/1000.0 # in seconds except: print('TCP connection time convertion error: "%s"' % line) p = "N/A" if fkey in dpi_meta: for flow in dpi_meta[fkey]: if abs(flow_conn_time-flow[0]) < 60: # in seconds p = flow[1]; break ofile.write("%s: %s %s\n" % (proto, p, content.strip('\r\n '))) except IndexError: ofile.write("%s\n" % line.strip('\r\n ')) elif proto == 'HTTP': # Write directly ofile.write("%s\n" % line.strip('\r\n ')) else: # DPI data lost when capturing for line in itcp_file: proto, content = line.split(':', 1) content = content.strip(' ') if proto == 'TCP': p = "N/A" ofile.write("%s: %s %s\n" % (proto, p, content.strip('\r\n '))) elif proto == 'HTTP': # Write directly ofile.write("%s\n" % line.strip('\r\n ')) itcp_file.close() ofile.close() logging.info('file %s completed' % tcp_file)