def parse_line(self, line_array, version): """ Parses a line to a Python dictionary. """ found_version = False regex_key = None line_dict = {} for regex in self.regex_dict: if (re.match(regex, version)): regex_key = regex found_version = True if not found_version: raise Exception("Can't find data to parse line type=%s version=%s" % (self.form, version)) for column in self.column_locations_dict[regex_key]: col_position = self.column_locations_dict[regex_key][column] # Sometimes trailing commas are omitted, so test that there actually is a value. if (col_position <= len(line_array) - 1): line_dict[column] = clean_entry(line_array[col_position]) else: line_dict[column] = '' return line_dict
def parse_headers(self): header_arr = self.get_next_fields() summary_line = self.get_next_fields() self.form_row = summary_line self.headers = header.parse(header_arr, self.is_paper) self.headers['amends_filing'] = None self.headers['report_num'] = None self.version = self.headers['fec_version'] try: self.headers['form'] = clean_entry(summary_line[0]) self.headers['fec_id'] = clean_entry(summary_line[1]) except IndexError: return False # Amendment discovery. # Identify if this is an amemndment to a filing. # If so, identify which filing it amends. form_last_char = self.headers['form'][-1].upper() if form_last_char == 'A': self.is_amendment = True self.headers['is_amendment'] = self.is_amendment if self.is_paper: self.headers['amends_filing'] = None else: amendment_match = re.search('^FEC\s*-\s*(\d+)', self.headers['report_id']) if amendment_match: original = amendment_match.group(1) self.headers['amends_filing'] = original else: raise Exception( "Can't find original filing in amended report %s" % (self.filing_number)) else: self.is_amendment = False self.headers['is_amendment'] = self.is_amendment return True
def parse(header_array, is_paper=False): """ Decides which version of the headers to use.""" if not is_paper: version = clean_entry(header_array[2]) if old_eheaders_re.match(version): headers_list = old_eheaders elif new_eheaders_re.match(version): headers_list = new_eheaders else: raise UnknownHeaderError( "Couldn't find parser for electronic version %s" % (version)) else: version = clean_entry(header_array[1]) if paper_headers_v1_re.match(version): headers_list = paper_headers_v1 elif paper_headers_v2_2_re.match(version): headers_list = paper_headers_v2_2 elif paper_headers_v2_6_re.match(version): headers_list = paper_headers_v2_6 else: raise UnknownHeaderError( "Couldn't find parser for paper version %s" % (version)) headers = {} for i in range(0, len(headers_list)): this_arg = "" # It's acceptable for header rows to leave off delimiters, so enter missing trailing args as blanks. try: this_arg = clean_entry(header_array[i]) except IndexError: # [JACOB WHAT DOES THIS INDICATE?] pass headers[headers_list[i]] = this_arg return headers
def parse(header_array, is_paper=False): """ Decides which version of the headers to use.""" if not is_paper: version = clean_entry(header_array[2]) if old_eheaders_re.match(version): headers_list = old_eheaders elif new_eheaders_re.match(version): headers_list = new_eheaders else: raise UnknownHeaderError ("Couldn't find parser for electronic version %s" % (version)) else: version = clean_entry(header_array[1]) if paper_headers_v1_re.match(version): headers_list = paper_headers_v1 elif paper_headers_v2_2_re.match(version): headers_list = paper_headers_v2_2 elif paper_headers_v2_6_re.match(version): headers_list = paper_headers_v2_6 else: raise UnknownHeaderError ("Couldn't find parser for paper version %s" % (version)) headers = {} for i in range(0, len(headers_list)): this_arg = "" # It's acceptable for header rows to leave off delimiters, so enter missing trailing args as blanks. try: this_arg = clean_entry(header_array[i]) except IndexError: # [JACOB WHAT DOES THIS INDICATE?] pass headers[headers_list[i]] = this_arg return headers
def parse_headers(self): header_arr = self.get_next_fields() summary_line = self.get_next_fields() self.form_row = summary_line self.headers = header.parse(header_arr, self.is_paper) self.headers['amends_filing'] = None self.headers['report_num'] = None self.version = self.headers['fec_version'] try: self.headers['form'] = clean_entry(summary_line[0]) self.headers['fec_id'] = clean_entry(summary_line[1]) except IndexError: return False # Amendment discovery. # Identify if this is an amemndment to a filing. # If so, identify which filing it amends. form_last_char = self.headers['form'][-1].upper() if form_last_char == 'A': self.is_amendment = True self.headers['is_amendment'] = self.is_amendment if self.is_paper: self.headers['amends_filing'] = None else: amendment_match = re.search('^FEC\s*-\s*(\d+)', self.headers['report_id']) if amendment_match: original = amendment_match.group(1) self.headers['amends_filing'] = original else: raise Exception("Can't find original filing in amended report %s" % (self.filing_number)) else: self.is_amendment = False self.headers['is_amendment'] = self.is_amendment return True