Exemple #1
0
    def parse_line(self, line_array, version):
        """
        Parses a line to a Python dictionary.
        """
        found_version = False
        regex_key = None
        line_dict = {}

        for regex in self.regex_dict:
            if (re.match(regex, version)):
                regex_key = regex
                found_version = True
        if not found_version:
            raise Exception("Can't find data to parse line type=%s version=%s" % (self.form, version))

        for column in self.column_locations_dict[regex_key]:
            col_position = self.column_locations_dict[regex_key][column]

            # Sometimes trailing commas are omitted, so test that there actually is a value.
            if (col_position <= len(line_array) - 1):
                line_dict[column] = clean_entry(line_array[col_position])
            else:
                line_dict[column] = ''

        return line_dict
Exemple #2
0
    def parse_headers(self):

        header_arr = self.get_next_fields()
        summary_line = self.get_next_fields()
        self.form_row = summary_line

        self.headers = header.parse(header_arr, self.is_paper)
        self.headers['amends_filing'] = None
        self.headers['report_num'] = None
        self.version = self.headers['fec_version']

        try:
            self.headers['form'] = clean_entry(summary_line[0])
            self.headers['fec_id'] = clean_entry(summary_line[1])

        except IndexError:
            return False

        # Amendment discovery.
        # Identify if this is an amemndment to a filing.
        # If so, identify which filing it amends.
        form_last_char = self.headers['form'][-1].upper()

        if form_last_char == 'A':
            self.is_amendment = True
            self.headers['is_amendment'] = self.is_amendment

            if self.is_paper:
                self.headers['amends_filing'] = None

            else:
                amendment_match = re.search('^FEC\s*-\s*(\d+)',
                                            self.headers['report_id'])

                if amendment_match:
                    original = amendment_match.group(1)
                    self.headers['amends_filing'] = original

                else:
                    raise Exception(
                        "Can't find original filing in amended report %s" %
                        (self.filing_number))
        else:
            self.is_amendment = False
            self.headers['is_amendment'] = self.is_amendment

        return True
Exemple #3
0
def parse(header_array, is_paper=False):
    """ Decides which version of the headers to use."""

    if not is_paper:
        version = clean_entry(header_array[2])

        if old_eheaders_re.match(version):
            headers_list = old_eheaders

        elif new_eheaders_re.match(version):
            headers_list = new_eheaders

        else:
            raise UnknownHeaderError(
                "Couldn't find parser for electronic version %s" % (version))

    else:
        version = clean_entry(header_array[1])

        if paper_headers_v1_re.match(version):
            headers_list = paper_headers_v1

        elif paper_headers_v2_2_re.match(version):
            headers_list = paper_headers_v2_2

        elif paper_headers_v2_6_re.match(version):
            headers_list = paper_headers_v2_6

        else:
            raise UnknownHeaderError(
                "Couldn't find parser for paper version %s" % (version))

    headers = {}

    for i in range(0, len(headers_list)):
        this_arg = ""  # It's acceptable for header rows to leave off delimiters, so enter missing trailing args as blanks.
        try:
            this_arg = clean_entry(header_array[i])

        except IndexError:
            # [JACOB WHAT DOES THIS INDICATE?]
            pass

        headers[headers_list[i]] = this_arg

    return headers
Exemple #4
0
def parse(header_array, is_paper=False):
    """ Decides which version of the headers to use."""
    
    if not is_paper:
        version = clean_entry(header_array[2])
        
        if old_eheaders_re.match(version):
            headers_list = old_eheaders

        elif new_eheaders_re.match(version):
            headers_list = new_eheaders

        else:
            raise UnknownHeaderError ("Couldn't find parser for electronic version %s" % (version))
        
    else:
        version = clean_entry(header_array[1])
        
        if paper_headers_v1_re.match(version):
            headers_list = paper_headers_v1

        elif paper_headers_v2_2_re.match(version):
            headers_list = paper_headers_v2_2

        elif paper_headers_v2_6_re.match(version):
            headers_list = paper_headers_v2_6

        else:
            raise UnknownHeaderError ("Couldn't find parser for paper version %s" % (version))
        
    
    headers = {}   

    for i in range(0, len(headers_list)):
        this_arg = "" # It's acceptable for header rows to leave off delimiters, so enter missing trailing args as blanks.
        try:
            this_arg = clean_entry(header_array[i])

        except IndexError:
            # [JACOB WHAT DOES THIS INDICATE?]
            pass

        headers[headers_list[i]] = this_arg
    
    return headers
Exemple #5
0
    def parse_headers(self):

        header_arr = self.get_next_fields()
        summary_line = self.get_next_fields()
        self.form_row = summary_line

        self.headers = header.parse(header_arr, self.is_paper)
        self.headers['amends_filing'] = None
        self.headers['report_num'] = None
        self.version = self.headers['fec_version']

        try:
            self.headers['form'] = clean_entry(summary_line[0])
            self.headers['fec_id'] = clean_entry(summary_line[1])

        except IndexError:
            return False

        # Amendment discovery.
        # Identify if this is an amemndment to a filing.
        # If so, identify which filing it amends.        
        form_last_char = self.headers['form'][-1].upper()

        if form_last_char == 'A':
            self.is_amendment = True
            self.headers['is_amendment'] = self.is_amendment
            
            if self.is_paper:
                self.headers['amends_filing'] = None

            else:
                amendment_match = re.search('^FEC\s*-\s*(\d+)', self.headers['report_id'])
    
                if amendment_match:
                    original = amendment_match.group(1)
                    self.headers['amends_filing'] = original

                else:
                    raise Exception("Can't find original filing in amended report %s" % (self.filing_number))
        else:
            self.is_amendment = False
            self.headers['is_amendment'] = self.is_amendment

        return True