def get_csv_header(dict_fp): header_all = set() with gzip_reader(dict_fp) as f_in: for line in f_in: header_all.update(json.loads(line.strip()).keys()) header = [] for key in header_all: if key[0].isalpha() or key[0] == '_': header.append(key) return header
def combine_csvs(out_paths, combined_path): # First determine the field names from the top line of each input file fieldnames = {'filename'} for filename in out_paths: with gzip_reader(filename) as f_in: reader = csv.reader(f_in) fieldnames.update({header for header in next(reader)}) # Then copy the data with gzip_writer(combined_path) as f_out: writer = csv.DictWriter(f_out, fieldnames=fieldnames) writer.writeheader() for filename in out_paths: with gzip_reader(filename) as f_in: reader = csv.DictReader(f_in) for line in reader: line['filename'] = filename.split('/')[-1].split( 'csv.gz')[0] writer.writerow(line) PCAPToCSV.cleanup_files([filename])
def write_dict_to_csv(self, dict_fp, out_file): header = PCAPToCSV.get_csv_header(dict_fp) with gzip_writer(out_file) as f_out: writer = csv.DictWriter(f_out, fieldnames=header) writer.writeheader() try: with gzip_reader(dict_fp) as f_in: for line in f_in: writer.writerow(json.loads(line.strip())) except Exception as e: # pragma: no cover self.logger.error(f'Failed to write to CSV because: {e}')
def get_reader(in_file, use_gzip): if use_gzip: return gzip_reader(in_file) return open(in_file, 'r')