def speed_test(filepath): print("+++++\nRunning speed test on %s" % filepath) formtypecount = Counter() start = datetime.now() parsed = {} with open(filepath) as file: linecount = 0 version = None for line in file: linecount += 1 if version is None: results = fecfile.parse_header(line) version = results[1] else: parsed = fecfile.parse_line(line, version) if not parsed: print("** not parsed %s" % line) else: # count the form type, if given try: formtypecount.update({parsed['form_type'].upper(): 1}) except KeyError: continue end = datetime.now() print("+++++\nResults:") print("\tRan %s rows in %s" % (sum(formtypecount.values()), end-start)) print("\tTotal rows processed = %s" % formtypecount)
def readfile(filepath, writer): filename = os.path.basename(filepath) filename = filename.replace(".fec", "") file_number = int(filename) firstline = None secondline = None linecount = 2 # header + formline with open(filepath, encoding="ISO-8859-1") as file: firstline = file.readline() secondline = file.readline() while True: nextline = file.readline() if not nextline: break linecount += 1 file_size = os.path.getsize(filepath) firstline = firstline.replace("\n", "") raw_results = fecfile.parse_header(firstline) results = raw_results[0] results["filing_number"] = file_number version = raw_results[1] lines = None if len(raw_results) == 3: lines = raw_results[1] original_report = results.get('report_id', None) report_number = results.get('report_number', None) if original_report: original_report = original_report.replace("FEC-", "") original_report_number = int(original_report) results["amends"] = original_report_number #print("Found amended filing %s amends %s # %s" % (file_number, original_report_number, report_number)) secondlineparsed = fecfile.parse_line(secondline, version) #print(secondlineparsed) results["form_type"] = secondlineparsed.get('form_type', '') results["filer_committee_id_number"] = secondlineparsed.get( 'filer_committee_id_number', '') results["committee_name"] = secondlineparsed.get('committee_name', '') results["date_signed"] = secondlineparsed.get('date_signed', '') results["form_type"] = secondlineparsed.get('form_type', '') results["coverage_through_date"] = secondlineparsed.get( 'coverage_through_date', '') results["coverage_from_date"] = secondlineparsed.get( 'coverage_from_date', '') results["file_size"] = file_size results["file_linecount"] = linecount # hack for F7 / F5 / F9 if not results["committee_name"]: results["committee_name"] = secondlineparsed.get( 'organization_name', '') writer.writerow(results)
def readfile(path_to_file, schedule_writer, year): filename = os.path.basename(path_to_file) filenumber = int(filename.replace(".fec", "")) #print("reading filing %s from %s" % (filenumber, path_to_file)) formtypecount = Counter() version = None with open(path_to_file, encoding="ISO-8859-1") as file: linecount = 0 for line in file: linecount += 1 if version is None: results = fecfile.parse_header(line) header = results[0] version = results[1] else: try: parsed = fecfile.parse_line(line, version) except fecfile.cache.FecParserMissingMappingError as e: print("error in %s line %s: %s" % (filenumber, linecount, e)) continue if not parsed: pass print("** not parsed %s" % line) else: # count the form type, if given try: formtypecount.update({parsed['form_type'].upper(): 1}) except KeyError: continue form_type = parsed['form_type'].upper() parsed['filing_number'] = filenumber parsed['line_sequence'] = linecount if form_type.startswith("SA"): schedule_writer['A'][year]['writer'].writerow(parsed) elif form_type.startswith("SB"): schedule_writer['B'][year]['writer'].writerow(parsed) elif form_type.startswith("F132"): remapped = remap_132_to_a(parsed) schedule_writer['F132']['writer'].writerow(remapped) #print("%s %s" % (linecount, parsed)) return formtypecount
def readfile(filepath, writer): filename = os.path.basename(filepath) filename = filename.replace(".fec", "") file_number = int(filename) file = open(filepath, encoding="ISO-8859-1") #file = open(filepath) firstline = file.readline() secondline = file.readline() firstline = firstline.replace("\n", "") raw_results = fecfile.parse_header(firstline) results = raw_results[0] results["filing_number"] = file_number version = raw_results[1] lines = None if len(raw_results) == 3: lines = raw_results[1] original_report = results.get('report_id', None) report_number = results.get('report_number', None) if original_report: original_report = original_report.replace("FEC-", "") original_report_number = int(original_report) results["amends"] = original_report_number #print("Found amended filing %s amends %s # %s" % (file_number, original_report_number, report_number)) secondlineparsed = fecfile.parse_line(secondline, version) #print(secondlineparsed) results["form_type"] = secondlineparsed.get('form_type', '') results["filer_committee_id_number"] = secondlineparsed.get( 'filer_committee_id_number', '') results["committee_name"] = secondlineparsed.get('committee_name', '') results["date_signed"] = secondlineparsed.get('date_signed', '') results["coverage_from_date"] = secondlineparsed.get( 'coverage_from_date', '') results["coverage_through_date"] = secondlineparsed.get( 'coverage_through_date', '') writer.writerow(results)