def _parse_headers(self): header_arr = utf8_clean(self.header_row).split(delimiter) summary_line = utf8_clean(self.form_row).split(delimiter) # These are always consistent try: self.headers['form'] = clean_entry(summary_line[0]) self.headers['fec_id'] = clean_entry(summary_line[1]) self.headers['report_num'] = None except IndexError: return False # amendment number - not sure what version it starts in. if len(summary_line) > 6: self.headers['report_num'] = clean_entry(summary_line[6])[:3] # Version number is always the third item self.version = clean_entry(header_arr[2]) headers_list = new_headers if float(self.version) <= 5: headers_list = old_headers header_hash = {} for i in range(0, len(headers_list)): # It's acceptable for header rows to leave off delimiters, so enter missing trailing args as blanks this_arg = "" try: this_arg = clean_entry(header_arr[i]) except IndexError: pass self.headers[headers_list[i]] = this_arg # figure out if this is an amendment, and if so, what's being amended. form_last_char = self.headers['form'][-1].upper() if form_last_char == 'A': self.is_amendment = True #print "Found amendment %s : %s " % (self.filing_number, self.headers['report_id']) amendment_match = re.search('^FEC\s*-\s*(\d+)', self.headers['report_id']) if amendment_match: original = amendment_match.group(1) #print "Amends filing: %s" % original self.headers['filing_amended'] = original else: raise Exception("Can't find original filing in amended report %s" % (self.filing_number)) else: self.is_amendment = False self.headers['is_amendment'] = self.is_amendment return True
def get_body_row(self): """get the next body row""" next_line = '' while True: next_line = self.fh.readline() if next_line: if next_line.isspace(): continue else: return utf8_clean(next_line).split(delimiter) else: return None
def get_first_row(self): return(utf8_clean(self.form_row).split(delimiter))