예제 #1
0
 def get_csv_header(dict_fp):
     header_all = set()
     with gzip_reader(dict_fp) as f_in:
         for line in f_in:
             header_all.update(json.loads(line.strip()).keys())
     header = []
     for key in header_all:
         if key[0].isalpha() or key[0] == '_':
             header.append(key)
     return header
예제 #2
0
    def combine_csvs(out_paths, combined_path):
        # First determine the field names from the top line of each input file
        fieldnames = {'filename'}
        for filename in out_paths:
            with gzip_reader(filename) as f_in:
                reader = csv.reader(f_in)
                fieldnames.update({header for header in next(reader)})

        # Then copy the data
        with gzip_writer(combined_path) as f_out:
            writer = csv.DictWriter(f_out, fieldnames=fieldnames)
            writer.writeheader()
            for filename in out_paths:
                with gzip_reader(filename) as f_in:
                    reader = csv.DictReader(f_in)
                    for line in reader:
                        line['filename'] = filename.split('/')[-1].split(
                            'csv.gz')[0]
                        writer.writerow(line)
                    PCAPToCSV.cleanup_files([filename])
예제 #3
0
 def write_dict_to_csv(self, dict_fp, out_file):
     header = PCAPToCSV.get_csv_header(dict_fp)
     with gzip_writer(out_file) as f_out:
         writer = csv.DictWriter(f_out, fieldnames=header)
         writer.writeheader()
         try:
             with gzip_reader(dict_fp) as f_in:
                 for line in f_in:
                     writer.writerow(json.loads(line.strip()))
         except Exception as e:  # pragma: no cover
             self.logger.error(f'Failed to write to CSV because: {e}')
예제 #4
0
 def get_reader(in_file, use_gzip):
     if use_gzip:
         return gzip_reader(in_file)
     return open(in_file, 'r')