Beispiel #1
0
 def write_dict_to_csv(self, dict_fp, out_file):
     header = PCAPToCSV.get_csv_header(dict_fp)
     with gzip_writer(out_file) as f_out:
         writer = csv.DictWriter(f_out, fieldnames=header)
         writer.writeheader()
         try:
             with gzip_reader(dict_fp) as f_in:
                 for line in f_in:
                     writer.writerow(json.loads(line.strip()))
         except Exception as e:  # pragma: no cover
             self.logger.error(f'Failed to write to CSV because: {e}')
Beispiel #2
0
 def get_tshark_packet_data(self, pcap_file, dict_fp):
     options = '-n -V -Tjson'
     try:
         process = subprocess.Popen(shlex.split(' '.join(
             ['tshark', '-r', pcap_file, options])),
                                    stdout=subprocess.PIPE)
         with gzip_writer(dict_fp) as f_out:
             for item in self.json_packet_records(process):
                 f_out.write(json.dumps(self.flatten_json(item)) + '\n')
     except Exception as e:  # pragma: no cover
         self.logger.error(f'{e}')
Beispiel #3
0
    def combine_csvs(out_paths, combined_path):
        # First determine the field names from the top line of each input file
        fieldnames = {'filename'}
        for filename in out_paths:
            with gzip_reader(filename) as f_in:
                reader = csv.reader(f_in)
                fieldnames.update({header for header in next(reader)})

        # Then copy the data
        with gzip_writer(combined_path) as f_out:
            writer = csv.DictWriter(f_out, fieldnames=fieldnames)
            writer.writeheader()
            for filename in out_paths:
                with gzip_reader(filename) as f_in:
                    reader = csv.DictReader(f_in)
                    for line in reader:
                        line['filename'] = filename.split('/')[-1].split(
                            'csv.gz')[0]
                        writer.writerow(line)
                    PCAPToCSV.cleanup_files([filename])
Beispiel #4
0
    def get_tshark_conv_data(self, pcap_file, dict_fp):
        # TODO (add a summary of other packets with protocols?)
        output = ''
        try:
            # TODO perhaps more than just tcp/udp in the future
            options = '-n -q -z conv,tcp -z conv,udp'
            output = subprocess.check_output(
                shlex.split(' '.join(['tshark', '-r', pcap_file, options])))
            output = output.decode('utf-8')
        except Exception as e:  # pragma: no cover
            self.logger.error(f'{e}')

        in_block = False
        name = None
        results = {}
        for line in output.split('\n'):
            if line.startswith('==='):
                if in_block:
                    in_block = False
                    name = None
                    continue
                else:
                    in_block = True
                    continue
            if in_block:
                if not name:
                    name = ''.join(line.split(':')).strip()
                    results[name] = ''
                    continue
                elif not line.startswith('Filter:') and line != '':
                    results[name] += line + '\n'

        with gzip_writer(dict_fp) as f_out:
            for result in results.keys():
                if 'Conversations' in result:
                    transport_proto = result.split()[0]
                    # handle conversation parsing
                    for line in results[result].split('\n'):
                        if line == '' or line.startswith(' '):
                            # header or padding, dicard
                            continue
                        else:
                            # TODO perhaps additional features can be extracted for flows from tshark
                            src, _, dst, frames_l, bytes_l, frames_r, bytes_r, frames_total, bytes_total, rel_start, duration = line.split(
                            )
                            conv = {
                                'Source': src.rsplit(':', 1)[0],
                                'Source Port': src.rsplit(':', 1)[1],
                                'Destination': dst.rsplit(':', 1)[0],
                                'Destination Port': dst.rsplit(':', 1)[1],
                                'Transport Protocol': transport_proto,
                                'Frames to Source': frames_l,
                                'Bytes to Source': bytes_l,
                                'Frames to Destination': frames_r,
                                'Bytes to Destination': bytes_r,
                                'Total Frames': frames_total,
                                'Total Bytes': bytes_total,
                                'Relative Start': rel_start,
                                'Duration': duration
                            }
                            f_out.write(json.dumps(conv) + '\n')
Beispiel #5
0
    def get_pyshark_packet_data(self, pcap_file, dict_fp):
        all_protocols = set()

        pcap_file_short = ntpath.basename(pcap_file)
        with gzip_writer(dict_fp) as f_out:
            with pyshark.FileCapture(pcap_file,
                                     use_json=True,
                                     include_raw=True,
                                     keep_packets=False,
                                     custom_parameters=[
                                         '-o',
                                         'tcp.desegment_tcp_streams:false',
                                         '-n'
                                     ]) as cap:
                for packet in cap:
                    packet_dict = {}
                    packet_dict['filename'] = pcap_file_short
                    frame_info = packet.frame_info._all_fields
                    for key in frame_info:
                        packet_dict[key] = frame_info[key]
                    # can overflow the field size for csv
                    #packet_dict['raw_packet'] = packet.get_raw_packet()
                    layers = str(packet.layers)
                    packet_dict['layers'] = layers
                    str_layers = layers[1:-1].split(', ')
                    for str_layer in str_layers:
                        # ignore raw layers
                        if 'RAW' not in str_layer:
                            all_protocols.add(str_layer)
                        # only include specified protocols due to unknown parsing for some layers
                        if str_layer in self.PROTOCOLS:
                            layer_info = getattr(
                                packet,
                                str_layer.split()[0][1:].lower())._all_fields
                            # check for nested dicts, one level deep
                            for key in layer_info:
                                # DNS doesn't parse well
                                if isinstance(
                                        layer_info[key],
                                        dict) and str_layer != '<DNS Layer>':
                                    for inner_key in layer_info[key]:
                                        packet_dict[inner_key] = layer_info[
                                            key][inner_key]
                                else:
                                    packet_dict[key] = layer_info[key]
                    # clean up records
                    packet_dict_copy = deepcopy(packet_dict)
                    keys = packet_dict_copy.keys()
                    for key in keys:
                        if not key[0].isalpha(
                        ) or key == 'tcp.payload_raw' or key == 'tcp.payload':
                            del packet_dict[key]
                    f_out.write(json.dumps(packet_dict) + '\n')

        for protocol in self.PROTOCOLS:
            if protocol in all_protocols:
                all_protocols.remove(protocol)
        if all_protocols:
            self.logger.warning(
                f'Found the following other layers in {pcap_file_short} that were not added to the CSV: {all_protocols}'
            )
Beispiel #6
0
 def get_writer(out_file, use_gzip):
     if use_gzip:
         return gzip_writer(out_file)
     return open(out_file, 'w')