Ejemplo n.º 1
0
    def _parse_headers(self):

        header_arr = self._get_next_fields()
        summary_line = self._get_next_fields()
        self.form_row = summary_line

        self.headers = parse_header(header_arr, self.is_paper)
        self.headers['filing_amended'] = None
        self.headers['report_num'] = None
        self.version = self.headers['fec_version']

        try:
            self.headers['form'] = clean_entry(summary_line[0])
            self.headers['fec_id'] = clean_entry(summary_line[1])

        except IndexError:
            return False

        # Amendment discovery.
        # Identify if this is an amemndment to a filing.
        # If so, identify which filing it amends.
        form_last_char = self.headers['form'][-1].upper()

        if form_last_char == 'A':
            self.is_amendment = True
            self.headers['is_amendment'] = self.is_amendment

            if self.is_paper:
                self.headers['filing_amended'] = None

            else:
                # Listing the original only works for electonic filings, of course!
                print "Found amendment %s : %s " % (self.filing_number,
                                                    self.headers['report_id'])
                amendment_match = re.search('^FEC\s*-\s*(\d+)',
                                            self.headers['report_id'])

                if amendment_match:
                    original = amendment_match.group(1)
                    self.headers['filing_amended'] = original

                else:
                    raise Exception(
                        "Can't find original filing in amended report %s" %
                        (self.filing_number))
        else:
            self.is_amendment = False
            self.headers['is_amendment'] = self.is_amendment

        return True
Ejemplo n.º 2
0
    def _parse_headers(self):

        header_arr = self._get_next_fields()
        summary_line = self._get_next_fields()
        self.form_row = summary_line

        self.headers = parse_header(header_arr, self.is_paper)
        self.headers['filing_amended'] = None
        self.headers['report_num'] = None
        self.version = self.headers['fec_version']

        try:
            self.headers['form'] = clean_entry(summary_line[0])
            self.headers['fec_id'] = clean_entry(summary_line[1])

        except IndexError:
            return False

        # Amendment discovery.
        # Identify if this is an amemndment to a filing.
        # If so, identify which filing it amends.        
        form_last_char = self.headers['form'][-1].upper()

        if form_last_char == 'A':
            self.is_amendment = True
            self.headers['is_amendment'] = self.is_amendment
            
            if self.is_paper:
                self.headers['filing_amended'] = None

            else:
                # Listing the original only works for electonic filings, of course!
                print "Found amendment %s : %s " % (self.filing_number, self.headers['report_id'])
                amendment_match = re.search('^FEC\s*-\s*(\d+)', self.headers['report_id'])
    
                if amendment_match:
                    original = amendment_match.group(1)
                    self.headers['filing_amended'] = original

                else:
                    raise Exception("Can't find original filing in amended report %s" % (self.filing_number))
        else:
            self.is_amendment = False
            self.headers['is_amendment'] = self.is_amendment

        return True
Ejemplo n.º 3
0
def parse_header(header_array, is_paper=False):
    """ Decides which version of the headers to use."""

    if not is_paper:
        version = clean_entry(header_array[2])

        if old_eheaders_re.match(version):
            headers_list = old_eheaders

        elif new_eheaders_re.match(version):
            headers_list = new_eheaders

        else:
            raise UnknownHeaderError(
                "Couldn't find parser for electronic version %s" % (version))

    else:
        version = clean_entry(header_array[1])

        if paper_headers_v1_re.match(version):
            headers_list = paper_headers_v1

        elif paper_headers_v2_2_re.match(version):
            headers_list = paper_headers_v2_2

        elif paper_headers_v2_6_re.match(version):
            headers_list = paper_headers_v2_6

        else:
            raise UnknownHeaderError(
                "Couldn't find parser for paper version %s" % (version))

    headers = {}

    for i in range(0, len(headers_list)):
        this_arg = ""  # It's acceptable for header rows to leave off delimiters, so enter missing trailing args as blanks.
        try:
            this_arg = clean_entry(header_array[i])

        except IndexError:
            # [JACOB WHAT DOES THIS INDICATE?]
            pass

        headers[headers_list[i]] = this_arg

    return headers
Ejemplo n.º 4
0
def parse_header(header_array, is_paper=False):
    """ Decides which version of the headers to use."""
    
    if not is_paper:
        version = clean_entry(header_array[2])
        
        if old_eheaders_re.match(version):
            headers_list = old_eheaders

        elif new_eheaders_re.match(version):
            headers_list = new_eheaders

        else:
            raise UnknownHeaderError ("Couldn't find parser for electronic version %s" % (version))
        
    else:
        version = clean_entry(header_array[1])
        
        if paper_headers_v1_re.match(version):
            headers_list = paper_headers_v1

        elif paper_headers_v2_2_re.match(version):
            headers_list = paper_headers_v2_2

        elif paper_headers_v2_6_re.match(version):
            headers_list = paper_headers_v2_6

        else:
            raise UnknownHeaderError ("Couldn't find parser for paper version %s" % (version))
        
    
    headers = {}   

    for i in range(0, len(headers_list)):
        this_arg = "" # It's acceptable for header rows to leave off delimiters, so enter missing trailing args as blanks.
        try:
            this_arg = clean_entry(header_array[i])

        except IndexError:
            # [JACOB WHAT DOES THIS INDICATE?]
            pass

        headers[headers_list[i]] = this_arg
    
    return headers
Ejemplo n.º 5
0
    def parse_line(self, line_array, version):
        """ Return a dict of all variables"""
        found_version = False
        regex_key = None

        # make sure we have this version; since these regexes are non-overlapping, we don't care about the order, and can iterate over the hash keys (instead of an ordered array)
        for regex in self.regex_dict:
            if (re.match(regex, version)):
                regex_key = regex
                found_version = True

        if not found_version:
            raise Exception("Can't find data to parse line type=%s version=%s" % (self.form, version))

        line_dict = {}
        for column in self.column_locations_dict[regex_key]:
            col_position = self.column_locations_dict[regex_key][column]

            # sometimes trailing commas are omitted, so test that there actually is a value
            if (col_position <= len(line_array) - 1):
                line_dict[column] = clean_entry(line_array[col_position])
            else:
                line_dict[column] = ''
        return line_dict