Ejemplo n.º 1
0
    def handle(self, *args, **options):

        filing_headers = Filing_Header.objects.filter(form='F3')
        for fh in filing_headers:
            print "Processing filing %s" % (fh.filing_number)
            f1 = filing(fh.filing_number)
            form = f1.get_form_type()
            version = f1.get_version()

            header = f1.get_first_row()
            header_line = fp.parse_form_line(header, version)
            fh.header_data = header_line
            fh.save()

            try:
                this_filing = new_filing.objects.get(
                    filing_number=fh.filing_number)

                parsed_data = process_f3_header(header_line)
                #print "got data %s" % (parsed_data)

                this_filing.coh_end = parsed_data['coh_end'] if parsed_data[
                    'coh_end'] else None
                this_filing.tot_raised = parsed_data[
                    'tot_raised'] if parsed_data['tot_raised'] else None
                this_filing.tot_spent = parsed_data[
                    'tot_spent'] if parsed_data['tot_spent'] else None
                this_filing.new_loans = parsed_data[
                    'new_loans'] if parsed_data['new_loans'] else None
                this_filing.save()

            except new_filing.DoesNotExist:
                print "new_filing MISSING FOR %s" % (new_filing.filing_number)
                continue
Ejemplo n.º 2
0
    def handle(self, *args, **options):
        
        filing_headers = Filing_Header.objects.filter(form='F3')
        for fh in filing_headers:
            print "Processing filing %s" % (fh.filing_number)
            f1 = filing(fh.filing_number)
            form = f1.get_form_type()
            version = f1.get_version()
            

            header = f1.get_first_row()
            header_line = fp.parse_form_line(header, version)
            fh.header_data=header_line
            fh.save()
            
            try:
                this_filing = new_filing.objects.get(filing_number = fh.filing_number)
            
                parsed_data = process_f3_header(header_line)
                #print "got data %s" % (parsed_data)
                
                this_filing.coh_end =  parsed_data['coh_end'] if parsed_data['coh_end'] else None
                this_filing.tot_raised = parsed_data['tot_raised'] if parsed_data['tot_raised'] else None
                this_filing.tot_spent = parsed_data['tot_spent'] if parsed_data['tot_spent'] else None
                this_filing.new_loans = parsed_data['new_loans'] if parsed_data['new_loans'] else None
                this_filing.save()
            
            except new_filing.DoesNotExist:
                print "new_filing MISSING FOR %s" % (new_filing.filing_number)
                continue
Ejemplo n.º 3
0
 def handle(self, *args, **options):
     
     # just get the ids--otherwise django will load every column into memory
     # filter(form='F13')
     all_headers = Filing_Header.objects.all().order_by('filing_number').values('pk')[:2000]
     for header_pk in all_headers:
         pk = header_pk['pk']
         header = Filing_Header.objects.get(pk=pk)
         filingnum = header.filing_number
         f1 = filing(filingnum, read_from_cache=True, write_to_cache=True)
         f1.download()
         form = header.form
         version = header.version
         
         print "processing filingnum %s, form %s version %s" % (filingnum, form, version)
         
         line_dict = {}
         content_rows = f1.get_body_rows()
         total_lines = 0
         for row in content_rows:
             # instead of parsing the line, just assume form type is the first arg.
             r_type = row[0].upper().strip()
             
             # sometimes there are blank lines within files--see 707076.fec
             if not r_type:
                 continue
                 
             total_lines += 1
             # what type of line parser would be used here? 
             lp = fp.get_line_parser(r_type)
             if lp:
                 form = lp.form
                 r_type = form
             else:
                 print "Missing parser from %s" % (r_type) 
             
             try: 
                 num = line_dict[r_type]
                 line_dict[r_type] = num + 1
             except KeyError:
                 line_dict[r_type] = 1
         
         print "Found total lines = %s with dict=%s" % (total_lines, line_dict)
         #header.lines_present = line_dict
         #header.save()
         
         
Ejemplo n.º 4
0
    def handle(self, *args, **options):

        # just get the ids--otherwise django will load every column into memory
        # filter(form='F13')
        all_headers = Filing_Header.objects.all().order_by(
            'filing_number').values('pk')[:2000]
        for header_pk in all_headers:
            pk = header_pk['pk']
            header = Filing_Header.objects.get(pk=pk)
            filingnum = header.filing_number
            f1 = filing(filingnum, read_from_cache=True, write_to_cache=True)
            f1.download()
            form = header.form
            version = header.version

            print "processing filingnum %s, form %s version %s" % (
                filingnum, form, version)

            line_dict = {}
            content_rows = f1.get_body_rows()
            total_lines = 0
            for row in content_rows:
                # instead of parsing the line, just assume form type is the first arg.
                r_type = row[0].upper().strip()

                # sometimes there are blank lines within files--see 707076.fec
                if not r_type:
                    continue

                total_lines += 1
                # what type of line parser would be used here?
                lp = fp.get_line_parser(r_type)
                if lp:
                    form = lp.form
                    r_type = form
                else:
                    print "Missing parser from %s" % (r_type)

                try:
                    num = line_dict[r_type]
                    line_dict[r_type] = num + 1
                except KeyError:
                    line_dict[r_type] = 1

            print "Found total lines = %s with dict=%s" % (total_lines,
                                                           line_dict)
    def handle(self, *args, **options):
        all_filings = Filing_Header.objects.all()
        for this_filing in all_filings:
            filing_number = this_filing.filing_number
            print "Processing %s" % filing_number
            f1 = filing(filing_number)
            form = f1.get_form_type()
            version = f1.get_version()

            # only parse forms that we're set up to read

            if not fp.is_allowed_form(form):
                if verbose:
                    print "Not a parseable form: %s - %s" % (form, filingnum)

                continue

            header = f1.get_first_row()
            header_line = fp.parse_form_line(header, version)
            this_filing.header_data=header_line
            this_filing.save()
Ejemplo n.º 6
0
    def handle(self, *args, **options):
        all_filings = Filing_Header.objects.all()
        for this_filing in all_filings:
            filing_number = this_filing.filing_number
            print "Processing %s" % filing_number
            f1 = filing(filing_number)
            form = f1.get_form_type()
            version = f1.get_version()

            # only parse forms that we're set up to read

            if not fp.is_allowed_form(form):
                if verbose:
                    print "Not a parseable form: %s - %s" % (form, filingnum)

                continue

            header = f1.get_first_row()
            header_line = fp.parse_form_line(header, version)
            this_filing.header_data = header_line
            this_filing.save()
Ejemplo n.º 7
0
def process_file(filingnum):
    #print "Processing filing %s" % (filingnum)
    f1 = filing(filingnum, read_from_cache=True, write_to_cache=True)
    f1.download()
    form = f1.get_form_type()
    version = f1.get_version()

    # only parse forms that we're set up to read

    if not fp.is_allowed_form(form):
        #if verbose:
        #    print "Not a parseable form: %s - %s" % (form, filingnum)
        try:
            count = unprocessable_form_hash[form]
            unprocessable_form_hash[form] = count + 1
        except KeyError:
            unprocessable_form_hash[form] = 1

        return

    #if verbose:
    #    print "Found parseable form: %s - %s" % (form, filingnum)

    header = f1.get_first_row()
    header_line = fp.parse_form_line(header, version)

    amended_filing = None
    if f1.is_amendment:
        amended_filing = f1.headers['filing_amended']

    # enter it if we don't have it already:
    try:
        already_entered = Filing_Header.objects.get(filing_number=filingnum)
        print "Already entered! %s" % (filingnum)
        return 0

    except Filing_Header.DoesNotExist:

        from_date = None
        through_date = None
        try:
            # dateparse('') will give today, oddly
            if header_line['coverage_from_date']:
                from_date = dateparse(header_line['coverage_from_date'])
            if header_line['coverage_through_date']:
                through_date = dateparse(header_line['coverage_through_date'])
        except KeyError:
            pass

        new_header_id = Filing_Header.objects.create(
            raw_filer_id=f1.headers['fec_id'],
            form=form,
            filing_number=filingnum,
            version=f1.version,
            coverage_from_date=from_date,
            coverage_through_date=through_date,
            is_amendment=f1.is_amendment,
            amends_filing=amended_filing,
            amendment_number=f1.headers['report_number'] or None,
            header_data=header_line)

        #print "Added header with id %s" % new_header_id
        """
        body_rows =  f1.get_body_rows()
        for row in body_rows:
            # the last line is empty, so don't try to parse it
            if len(row)>1:
                # Don't double check, just enter the data. 
                parsed_line = fp.parse_form_line(row, version)
                parsed_line['filing_number'] = int(filingnum)
                #if verbose:
                #    print parsed_line
                new_line_id = filing_lines.insert(parsed_line)
        """

        return 1
Ejemplo n.º 8
0
 def handle(self, *args, **options):
     
     # just get the ids--otherwise django will load every column into memory
     # filter(form='F13')
     all_headers = Filing_Header.objects.all().order_by('filing_number').values('pk')
     line_count = 0
     for header_pk in all_headers:
         
         line_count += 1
         if line_count % 1000 == 0:
             print "Processined %s lines" % line_count
         pk = header_pk['pk']
         header = Filing_Header.objects.get(pk=pk)
         filingnum = header.filing_number
         f1 = filing(filingnum, read_from_cache=True, write_to_cache=True)
         f1.download()
         form = header.form
         version = header.version
         
         #print "processing filingnum %s, form %s version %s" % (filingnum, form, version)
         
         line_dict = {}
         content_rows = f1.get_body_rows()
         total_lines = 0
         for row in content_rows:
             # instead of parsing the line, just assume form type is the first arg.
             r_type = row[0].upper().strip()
             
             # sometimes there are blank lines within files--see 707076.fec
             if not r_type:
                 continue
                 
             total_lines += 1
             # what type of line parser would be used here? 
             lp = fp.get_line_parser(r_type)
             if lp:
                 form = lp.form
                 r_type = form
                 #print "line parser: %s from %s" % (form, r_type)
                 
                 linedict = fp.parse_form_line(row, version)
                 
                 if form=='SchA':
                     skeda_from_skedadict(linedict, filingnum, header)
                     
                 elif form=='SchB':
                     skedb_from_skedbdict(linedict, filingnum, header)                        
                     
                 elif form=='SchE':
                     skede_from_skededict(linedict, filingnum, header)
                 
                 # Treat 48-hour contribution notices like sked A.
                 # Requires special handling for amendment, since these are superceded
                 # by regular F3 forms. 
                 elif form=='F65':
                     skeda_from_f65(linedict, filingnum, header)
                     
                 # disclosed donor to non-commmittee. Sorta rare, but.. 
                 elif form=='F56':
                     skeda_from_f56(linedict, filingnum, header)
                 
                 # disclosed electioneering donor
                 elif form=='F92':
                     skeda_from_f92(linedict, filingnum, header)   
                 
                 # inaugural donors
                 elif form=='F132':
                     skeda_from_f132(linedict, filingnum, header)                    
                 
                 #inaugural refunds
                 elif form=='F133':
                     skeda_from_f133(linedict, filingnum, header)                    
                 
                 # IE's disclosed by non-committees. Note that they use this for * both * quarterly and 24- hour notices. There's not much consistency with this--be careful with superceding stuff. 
                 elif form=='F57':
                     skede_from_f57(linedict, filingnum, header)
             
                 # Its another kind of line. Just dump it in Other lines.
                 else:
                     otherline_from_line(linedict, filingnum, header, formname=form) 
                                      
             else:
                 print "Missing parser from %s" % (r_type) 
             
             try: 
                 num = line_dict[r_type]
                 line_dict[r_type] = num + 1
             except KeyError:
                 line_dict[r_type] = 1
         
         #print "Found total lines = %s with dict=%s" % (total_lines, line_dict)
         header.lines_present = line_dict
         header.save()
         
         
Ejemplo n.º 9
0
def process_new_filing(thisnewfiling, fp=None, filing_time=None, filing_time_is_exact=False):
    """ Enter the file header if needed.  """
       
    if not fp:
        fp = form_parser()
        
    #print "Processing filing %s" % (filingnum)
    f1 = filing(thisnewfiling.filing_number)
    if f1.get_error():
        return False
        
    form = f1.get_form_type()
    version = f1.get_version()

    ## leave the form if it's already been entered-- that's where it says if it is terminated. 
    if not thisnewfiling.form_type:
        thisnewfiling.form_type = form
        
    # check if it's an amendment based on form types -- if so, mark it. Otherwise the F1's will look like they haven't been amended. 
    try:
        if thisnewfiling.form_type[-1].upper() == 'A':
            thisnewfiling.is_amendment = True
    except IndexError:
        pass

    # only parse forms that we're set up to read
    if not fp.is_allowed_form(form):
        if verbose:
            print "Not a parseable form: %s - %s" % (form, thisnewfiling.filing_number)
        
        if thisnewfiling.is_amendment:
            thisnewfiling.save()
        return True

    header = f1.get_first_row()
    header_line = fp.parse_form_line(header, version)

    amended_filing=None
    if f1.is_amendment:
        amended_filing = f1.headers['filing_amended']


    
    from_date = None
    through_date = None
    #print "header line is: %s " % header_line
    try:
        # dateparse('') will give today, oddly
        if header_line['coverage_from_date']:
            from_date = dateparse(header_line['coverage_from_date'])
            if from_date:
                thisnewfiling.cycle = get_cycle_from_date(from_date)
    except KeyError:
        print "problem with coverage_from_date"
        pass
        
    try:                
        if header_line['coverage_through_date']:
            through_date = dateparse(header_line['coverage_through_date'])
            if through_date:
                thisnewfiling.cycle = get_cycle_from_date(through_date)
    except KeyError:
        print "coverage_through_date"
        pass

    
    # Create the filing -- but don't mark it as being complete. 
    

    
    
    
    thisnewfiling.fec_id = f1.headers['fec_id']
    thisnewfiling.coverage_from_date = from_date
    thisnewfiling.coverage_to_date = through_date
    thisnewfiling.is_amendment = f1.is_amendment
    thisnewfiling.amends_filing = amended_filing
    thisnewfiling.amendment_number = f1.headers['report_number'] or None
    thisnewfiling.header_data = header_line
    
    print thisnewfiling.__dict__

    thisnewfiling.save()
    
    return True
Ejemplo n.º 10
0
def process_file(filingnum):
    #print "Processing filing %s" % (filingnum)
    f1 = filing(filingnum, read_from_cache=True, write_to_cache=True)
    f1.download()
    form = f1.get_form_type()
    version = f1.get_version()

    # only parse forms that we're set up to read
    
    if not fp.is_allowed_form(form):
        #if verbose:
        #    print "Not a parseable form: %s - %s" % (form, filingnum)
        try:
            count = unprocessable_form_hash[form]
            unprocessable_form_hash[form] = count + 1
        except KeyError:
            unprocessable_form_hash[form] = 1
            
        return

    #if verbose:
    #    print "Found parseable form: %s - %s" % (form, filingnum)
    
    header = f1.get_first_row()
    header_line = fp.parse_form_line(header, version)

    amended_filing=None
    if f1.is_amendment:
        amended_filing = f1.headers['filing_amended']

    # enter it if we don't have it already:
    try:    
        already_entered = Filing_Header.objects.get(filing_number=filingnum)
        print "Already entered! %s" % (filingnum)
        return 0
        
    except Filing_Header.DoesNotExist:
        
        from_date = None
        through_date = None
        try:
            # dateparse('') will give today, oddly
            if header_line['coverage_from_date']:
                from_date = dateparse(header_line['coverage_from_date'])
            if header_line['coverage_through_date']:
                through_date = dateparse(header_line['coverage_through_date'])
        except KeyError:
            pass
        
        new_header_id = Filing_Header.objects.create(
            raw_filer_id=f1.headers['fec_id'],
            form=form,
            filing_number=filingnum,
            version=f1.version,
            coverage_from_date=from_date,
            coverage_through_date = through_date,
            is_amendment=f1.is_amendment,
            amends_filing=amended_filing,
            amendment_number = f1.headers['report_number'] or None,
            header_data=header_line)
            
            
        #print "Added header with id %s" % new_header_id

    
        """
        body_rows =  f1.get_body_rows()
        for row in body_rows:
            # the last line is empty, so don't try to parse it
            if len(row)>1:
                # Don't double check, just enter the data. 
                parsed_line = fp.parse_form_line(row, version)
                parsed_line['filing_number'] = int(filingnum)
                #if verbose:
                #    print parsed_line
                new_line_id = filing_lines.insert(parsed_line)
        """
    
        return 1
Ejemplo n.º 11
0
import re

from formdata.utils.form_mappers import *

from parsing.form_parser import form_parser, ParserMissingError
from parsing.filing import filing
from parsing.read_FEC_settings import FILECACHE_DIRECTORY
from formdata.models import Filing_Header

# load up a form parser
fp = form_parser()

filing_num = 708753

f1 = filing(filingnum, read_from_cache=True, write_to_cache=True)



a = re.compile(r'SA*', re.I)

rows = f1.get_rows(a)

# parse a row
parsed_row = fp.parse_form_line(rows[0], version)
print parsed_row

# parsed_row = {'conduit_zip': '', 'back_reference_sched_name': '', 'donor_candidate_prefix': '', 'contribution_aggregate': '250.00', 'donor_committee_name': '', 'contributor_street_2': '', 'donor_candidate_suffix': '', 'contributor_organization_name': '', 'contributor_suffix': '', 'contributor_state': 'TX', 'donor_committee_fec_id': '', 'entity_type': 'IND', 'donor_candidate_state': '', 'donor_candidate_district': '', 'contributor_prefix': '', 'contributor_last_name': 'Acton', 'donor_candidate_middle_name': '', 'transaction_id': 'SA11AI.30102', 'contribution_date': '20101021', 'contributor_occupation': '', 'filer_committee_id_number': 'C00460808', 'donor_candidate_last_name': '', 'conduit_street2': '', 'conduit_street1': '', 'contributor_city': 'Dallas', 'donor_candidate_first_name': '', 'contribution_purpose_descrip': '', 'election_code': 'G2010', 'donor_candidate_office': '', 'memo_text_description': '', 'donor_candidate_fec_id': '', 'form_type': 'SA11AI', 'contributor_first_name': 'Robert', 'contribution_purpose_code': '', 'election_other_description': '', 'conduit_name': '', 'contribution_amount': '150.00', 'conduit_city': '', 'contributor_employer': '', 'back_reference_tran_id_number': '', 'contributor_street_1': '6407 Meadow Road', 'conduit_state': '', 'reference_code': '', 'memo_code': '', 'contributor_zip': '752305142', 'contributor_middle_name': ''}

# can we save it? 
from formdata.utils.form_mappers import *
from formdata.models import Filing_Header
Ejemplo n.º 12
0
def fix_dissemination_date(this_filing, fp):
    ## we gotta parse the rows again.

    print "handling %s line_count=%s" % (this_filing.filing_number,
                                         this_filing.lines_present)
    f1 = filing(this_filing.filing_number)
    form = f1.get_form_type()
    version = f1.get_version()
    filer_id = f1.get_filer_id()
    # This is being written when the current version is 8.1--the only version to include dissemination date.
    if not version == '8.1':
        return None

    linenum = 0

    # run through all the lines:
    while True:
        linenum += 1
        row = f1.get_body_row()
        if not row:
            break

        linedict = None
        try:
            linedict = fp.parse_form_line(row, version)
        except ParserMissingError:
            msg = 'process_filing_body: Unknown line type in filing %s line %s: type=%s Skipping.' % (
                this_filing.filing_number, linenum, row[0])

        # ignore everything but sked E's -- note that sked F57 *does not* have this issue.
        if linedict['form_parser'] == 'SchE':
            dissemination_date = linedict['dissemination_date']
            expenditure_date = linedict['expenditure_date']
            transaction_id = linedict['transaction_id']

            print "filingnum=%s dissemination_date=%s expenditure_date=%s transaction_id=%s" % (
                this_filing.filing_number, dissemination_date,
                expenditure_date, transaction_id)

            # then fix the original date in the db.
            try:
                original_line = SkedE.objects.get(
                    filing_number=this_filing.filing_number,
                    transaction_id=transaction_id)

                if dissemination_date:
                    original_line.dissemination_date = dissemination_date
                    try:
                        original_line.dissemination_date_formatted = dateparse(
                            dissemination_date)
                        original_line.effective_date = original_line.dissemination_date_formatted
                    except ValueError:
                        pass
                else:
                    original_line.dissemination_date_formatted = None

                if expenditure_date:
                    original_line.expenditure_date = expenditure_date
                    try:

                        original_line.expenditure_date_formatted = dateparse(
                            expenditure_date)
                        if not original_line.dissemination_date:
                            original_line.effective_date = original_line.expenditure_date_formatted

                    except ValueError:
                        pass
                else:
                    original_line.expenditure_date_formatted = None

                if not expenditure_date and not dissemination_date:
                    original_line.effective_date = None

                if alter_db:
                    original_line.save()

            except SkedE.DoesNotExist:
                print "Couldn't find filing%s transaction %s" % (
                    this_filing.filing_number, transaction_id)
Ejemplo n.º 13
0
import re

from formdata.utils.form_mappers import *

from parsing.form_parser import form_parser, ParserMissingError
from parsing.filing import filing
from parsing.read_FEC_settings import FILECACHE_DIRECTORY
from formdata.models import Filing_Header

# load up a form parser
fp = form_parser()

filing_num = 708753

f1 = filing(filingnum, read_from_cache=True, write_to_cache=True)

a = re.compile(r'SA*', re.I)

rows = f1.get_rows(a)

# parse a row
parsed_row = fp.parse_form_line(rows[0], version)
print parsed_row

# parsed_row = {'conduit_zip': '', 'back_reference_sched_name': '', 'donor_candidate_prefix': '', 'contribution_aggregate': '250.00', 'donor_committee_name': '', 'contributor_street_2': '', 'donor_candidate_suffix': '', 'contributor_organization_name': '', 'contributor_suffix': '', 'contributor_state': 'TX', 'donor_committee_fec_id': '', 'entity_type': 'IND', 'donor_candidate_state': '', 'donor_candidate_district': '', 'contributor_prefix': '', 'contributor_last_name': 'Acton', 'donor_candidate_middle_name': '', 'transaction_id': 'SA11AI.30102', 'contribution_date': '20101021', 'contributor_occupation': '', 'filer_committee_id_number': 'C00460808', 'donor_candidate_last_name': '', 'conduit_street2': '', 'conduit_street1': '', 'contributor_city': 'Dallas', 'donor_candidate_first_name': '', 'contribution_purpose_descrip': '', 'election_code': 'G2010', 'donor_candidate_office': '', 'memo_text_description': '', 'donor_candidate_fec_id': '', 'form_type': 'SA11AI', 'contributor_first_name': 'Robert', 'contribution_purpose_code': '', 'election_other_description': '', 'conduit_name': '', 'contribution_amount': '150.00', 'conduit_city': '', 'contributor_employer': '', 'back_reference_tran_id_number': '', 'contributor_street_1': '6407 Meadow Road', 'conduit_state': '', 'reference_code': '', 'memo_code': '', 'contributor_zip': '752305142', 'contributor_middle_name': ''}

# can we save it?
from formdata.utils.form_mappers import *
from formdata.models import Filing_Header

header = Filing_Header.objects.get(filing_number=708753)
Ejemplo n.º 14
0
def fix_dissemination_date(this_filing, fp):
    ## we gotta parse the rows again. 
    
    print "handling %s line_count=%s" % (this_filing.filing_number, this_filing.lines_present)
    f1 = filing(this_filing.filing_number)
    form = f1.get_form_type()
    version = f1.get_version()
    filer_id = f1.get_filer_id()
    # This is being written when the current version is 8.1--the only version to include dissemination date.
    if not version == '8.1':
        return None
    
    linenum = 0
    
    # run through all the lines:
    while True:
        linenum += 1
        row = f1.get_body_row()
        if not row:
            break
        
        linedict = None
        try:
            linedict = fp.parse_form_line(row, version)
        except ParserMissingError:
            msg = 'process_filing_body: Unknown line type in filing %s line %s: type=%s Skipping.' % (this_filing.filing_number, linenum, row[0])
        
        # ignore everything but sked E's -- note that sked F57 *does not* have this issue.
        if linedict['form_parser'] == 'SchE':
            dissemination_date = linedict['dissemination_date']
            expenditure_date = linedict['expenditure_date']
            transaction_id = linedict['transaction_id']
            
            print "filingnum=%s dissemination_date=%s expenditure_date=%s transaction_id=%s" % (this_filing.filing_number, dissemination_date, expenditure_date, transaction_id)
            
            # then fix the original date in the db. 
            try:
                original_line = SkedE.objects.get(filing_number=this_filing.filing_number, transaction_id=transaction_id)
                
                if dissemination_date:
                    original_line.dissemination_date = dissemination_date
                    try:
                        original_line.dissemination_date_formatted = dateparse(dissemination_date)
                        original_line.effective_date = original_line.dissemination_date_formatted
                    except ValueError:
                        pass
                else:
                    original_line.dissemination_date_formatted = None
                                
                if expenditure_date:
                    original_line.expenditure_date = expenditure_date
                    try:
                        
                        original_line.expenditure_date_formatted = dateparse(expenditure_date)
                        if not original_line.dissemination_date:
                            original_line.effective_date = original_line.expenditure_date_formatted

                    except ValueError:
                        pass
                else: 
                    original_line.expenditure_date_formatted = None
                    
                if not expenditure_date and not dissemination_date:
                    original_line.effective_date = None


                if alter_db:
                    original_line.save()
                    
            except SkedE.DoesNotExist:
                print "Couldn't find filing%s transaction %s" % (this_filing.filing_number, transaction_id)
Ejemplo n.º 15
0
def process_filing_body(filingnum, fp=None, logger=None):
    
    
    #It's useful to pass the form parser in when running in bulk so we don't have to keep creating new ones. 
    if not fp:
      fp = form_parser()
      
    if not logger:
        logger=fec_logger()
    msg = "process_filing_body: Starting # %s" % (filingnum)
    #print msg
    logger.info(msg)
      
    connection = get_connection()
    cursor = connection.cursor()
    cmd = "select fec_id, superseded_by_amendment, data_is_processed from efilings_filing where filing_number=%s" % (filingnum)
    cursor.execute(cmd)
    
    cd = CSV_dumper(connection)
    
    result = cursor.fetchone()
    if not result:
        msg = 'process_filing_body: Couldn\'t find a new_filing for filing %s' % (filingnum)
        logger.error(msg)
        raise FilingHeaderDoesNotExist(msg)
        
    # will throw a TypeError if it's missing.
    line_sequence = 1
    is_amended = result[1]
    is_already_processed = result[2]
    if is_already_processed == "1":
        msg = 'process_filing_body: This filing has already been entered.'
        print msg
        logger.error(msg)
        raise FilingHeaderAlreadyProcessed(msg)
    
    #print "Processing filing %s" % (filingnum)
    f1 = filing(filingnum)
    form = f1.get_form_type()
    version = f1.get_version()
    filer_id = f1.get_filer_id()
    
    # only parse forms that we're set up to read
    
    if not fp.is_allowed_form(form):
        if verbose:
            msg = "process_filing_body: Not a parseable form: %s - %s" % (form, filingnum)
            print msg
            logger.info(msg)
        return None
        
    linenum = 0
    while True:
        linenum += 1
        row = f1.get_body_row()
        if not row:
            break
        
        #print "row is %s" % (row)
        #print "\n\n\nForm is %s" % form
        try:
            linedict = fp.parse_form_line(row, version)
            #print "\n\n\nform is %s" % form
            process_body_row(linedict, filingnum, line_sequence, is_amended, cd, filer_id)
        except ParserMissingError:
            msg = 'process_filing_body: Unknown line type in filing %s line %s: type=%s Skipping.' % (filingnum, linenum, row[0])
            print msg
            logger.warn(msg)
            continue
        
    # commit all the leftovers
    cd.commit_all()
    cd.close()
    counter = cd.get_counter()
    total_rows = 0
    for i in counter:
        total_rows += counter[i]
        
    msg = "process_filing_body: Filing # %s Total rows: %s Tally is: %s" % (filingnum, total_rows, counter)
    print msg
    logger.info(msg)
    
    ######## DIRECT DB UPDATES. PROBABLY A BETTER APPROACH, BUT... 
    
    header_data = dict_to_hstore(counter)
    cmd = "update efilings_filing set lines_present='%s'::hstore where filing_number=%s" % (header_data, filingnum)
    cursor.execute(cmd)
    
    # mark file as having been entered. 
    cmd = "update efilings_filing set data_is_processed='1' where filing_number=%s" % (filingnum)
    cursor.execute(cmd)
    
    # flag this filer as one who has changed. 
    cmd = "update efilings_committee set is_dirty=True where fec_id='%s' and cycle='%s'" % (filer_id, CURRENT_CYCLE)
    cursor.execute(cmd)
    
    # should also update the candidate is dirty flag too by joining w/ ccl table. 
    # these tables aren't indexed, so do as two separate queries. 
    cmd = "select cand_id from ftpdata_candcomlink where cmte_id = '%s' and cmte_dsgn in ('A', 'P')" % (filer_id)
    cursor.execute(cmd)
    result = cursor.fetchone()
    if result:
        cand_id = result[0]
        cmd = "update efilings_candidate set is_dirty=True where fec_id = '%s' and cycle='%s'" % (cand_id, CURRENT_CYCLE)
        cursor.execute(cmd)

    connection.close()
Ejemplo n.º 16
0
    def handle(self, *args, **options):

        # just get the ids--otherwise django will load every column into memory
        # filter(form='F13')
        all_headers = Filing_Header.objects.all().order_by(
            'filing_number').values('pk')
        line_count = 0
        for header_pk in all_headers:

            line_count += 1
            if line_count % 1000 == 0:
                print "Processined %s lines" % line_count
            pk = header_pk['pk']
            header = Filing_Header.objects.get(pk=pk)
            filingnum = header.filing_number
            f1 = filing(filingnum, read_from_cache=True, write_to_cache=True)
            f1.download()
            form = header.form
            version = header.version

            #print "processing filingnum %s, form %s version %s" % (filingnum, form, version)

            line_dict = {}
            content_rows = f1.get_body_rows()
            total_lines = 0
            for row in content_rows:
                # instead of parsing the line, just assume form type is the first arg.
                r_type = row[0].upper().strip()

                # sometimes there are blank lines within files--see 707076.fec
                if not r_type:
                    continue

                total_lines += 1
                # what type of line parser would be used here?
                lp = fp.get_line_parser(r_type)
                if lp:
                    form = lp.form
                    r_type = form
                    #print "line parser: %s from %s" % (form, r_type)

                    linedict = fp.parse_form_line(row, version)

                    if form == 'SchA':
                        skeda_from_skedadict(linedict, filingnum, header)

                    elif form == 'SchB':
                        skedb_from_skedbdict(linedict, filingnum, header)

                    elif form == 'SchE':
                        skede_from_skededict(linedict, filingnum, header)

                    # Treat 48-hour contribution notices like sked A.
                    # Requires special handling for amendment, since these are superceded
                    # by regular F3 forms.
                    elif form == 'F65':
                        skeda_from_f65(linedict, filingnum, header)

                    # disclosed donor to non-commmittee. Sorta rare, but..
                    elif form == 'F56':
                        skeda_from_f56(linedict, filingnum, header)

                    # disclosed electioneering donor
                    elif form == 'F92':
                        skeda_from_f92(linedict, filingnum, header)

                    # inaugural donors
                    elif form == 'F132':
                        skeda_from_f132(linedict, filingnum, header)

                    #inaugural refunds
                    elif form == 'F133':
                        skeda_from_f133(linedict, filingnum, header)

                    # IE's disclosed by non-committees. Note that they use this for * both * quarterly and 24- hour notices. There's not much consistency with this--be careful with superceding stuff.
                    elif form == 'F57':
                        skede_from_f57(linedict, filingnum, header)

                    # Its another kind of line. Just dump it in Other lines.
                    else:
                        otherline_from_line(linedict,
                                            filingnum,
                                            header,
                                            formname=form)

                else:
                    print "Missing parser from %s" % (r_type)

                try:
                    num = line_dict[r_type]
                    line_dict[r_type] = num + 1
                except KeyError:
                    line_dict[r_type] = 1

            #print "Found total lines = %s with dict=%s" % (total_lines, line_dict)
            header.lines_present = line_dict
            header.save()
connection = get_connection()
cursor = connection.cursor()

logger = fec_logger()

# Process all .fec files in the FILECACHE_DIRECTORY
for d, _, files in os.walk(FILECACHE_DIRECTORY):
    for this_file in files:

        # Ignore it if it isn't a numeric fec file, e.g. \d+\.fec
        if not fec_format_file.match(this_file):
            continue

        filingnum = this_file.replace(".fec", "")
        cd = CSV_dumper(connection)
        f1 = filing(filingnum)

        formtype = f1.get_form_type()
        version = f1.version
        filer_id = f1.get_filer_id()
        print "Processing form number %s - type=%s version=%s is_amended: %s" % (
            f1.filing_number, formtype, version, f1.is_amendment)
        print "Headers are: %s" % f1.headers

        if f1.is_amendment:
            print "Original filing is: %s" % (f1.headers['filing_amended'])

        if not fp.is_allowed_form(formtype):
            print "skipping form %s - %s isn't parseable" % (f1.filing_number,
                                                             formtype)
            continue
def process_filing_body(filingnum, fp=None, logger=None):

    # It's useful to pass the form parser in when running in bulk so we don't have to keep creating new ones.
    if not fp:
        fp = form_parser()

    if not logger:
        logger = fec_logger()
    msg = "process_filing_body: Starting # %s" % (filingnum)
    # print msg
    logger.info(msg)

    connection = get_connection()
    cursor = connection.cursor()
    cmd = "select fec_id, superseded_by_amendment, data_is_processed from efilings_filing where filing_number=%s" % (
        filingnum
    )
    cursor.execute(cmd)

    cd = CSV_dumper(connection)

    result = cursor.fetchone()
    if not result:
        msg = "process_filing_body: Couldn't find a new_filing for filing %s" % (filingnum)
        logger.error(msg)
        raise FilingHeaderDoesNotExist(msg)

    # will throw a TypeError if it's missing.
    line_sequence = 1
    is_amended = result[1]
    is_already_processed = result[2]
    if is_already_processed == "1":
        msg = "process_filing_body: This filing has already been entered."
        print msg
        logger.error(msg)
        raise FilingHeaderAlreadyProcessed(msg)

    # print "Processing filing %s" % (filingnum)
    f1 = filing(filingnum)
    form = f1.get_form_type()
    version = f1.get_version()
    filer_id = f1.get_filer_id()

    # only parse forms that we're set up to read

    if not fp.is_allowed_form(form):
        if verbose:
            msg = "process_filing_body: Not a parseable form: %s - %s" % (form, filingnum)
            print msg
            logger.info(msg)
        return None

    linenum = 0
    while True:
        linenum += 1
        row = f1.get_body_row()
        if not row:
            break

        # print "row is %s" % (row)
        # print "\n\n\nForm is %s" % form
        try:
            linedict = fp.parse_form_line(row, version)
            # print "\n\n\nform is %s" % form
            process_body_row(linedict, filingnum, line_sequence, is_amended, cd, filer_id)
        except ParserMissingError:
            msg = "process_filing_body: Unknown line type in filing %s line %s: type=%s Skipping." % (
                filingnum,
                linenum,
                row[0],
            )
            print msg
            logger.warn(msg)
            continue

    # commit all the leftovers
    cd.commit_all()
    cd.close()
    counter = cd.get_counter()
    total_rows = 0
    for i in counter:
        total_rows += counter[i]

    msg = "process_filing_body: Filing # %s Total rows: %s Tally is: %s" % (filingnum, total_rows, counter)
    print msg
    logger.info(msg)

    ######## DIRECT DB UPDATES. PROBABLY A BETTER APPROACH, BUT...

    header_data = dict_to_hstore(counter)
    cmd = "update efilings_filing set lines_present='%s'::hstore where filing_number=%s" % (header_data, filingnum)
    cursor.execute(cmd)

    # mark file as having been entered.
    cmd = "update efilings_filing set data_is_processed='1' where filing_number=%s" % (filingnum)
    cursor.execute(cmd)

    # flag this filer as one who has changed.
    cmd = "update efilings_committee set is_dirty=True where fec_id='%s' and cycle='%s'" % (filer_id, CURRENT_CYCLE)
    cursor.execute(cmd)

    # should also update the candidate is dirty flag too by joining w/ ccl table.
    # these tables aren't indexed, so do as two separate queries.
    cmd = "select cand_id from ftpdata_candcomlink where cmte_id = '%s' and cmte_dsgn in ('A', 'P')" % (filer_id)
    cursor.execute(cmd)
    result = cursor.fetchone()
    if result:
        cand_id = result[0]
        cmd = "update efilings_candidate set is_dirty=True where fec_id = '%s' and cycle='%s'" % (
            cand_id,
            CURRENT_CYCLE,
        )
        cursor.execute(cmd)

    connection.close()
cursor = connection.cursor()

logger = fec_logger()


# Process all .fec files in the FILECACHE_DIRECTORY
for d, _, files in os.walk(FILECACHE_DIRECTORY):
    for this_file in files:

        # Ignore it if it isn't a numeric fec file, e.g. \d+\.fec
        if not fec_format_file.match(this_file):
            continue

        filingnum = this_file.replace(".fec", "")
        cd = CSV_dumper(connection)
        f1 = filing(filingnum)

        formtype = f1.get_form_type()
        version = f1.version
        filer_id = f1.get_filer_id()
        print "Processing form number %s - type=%s version=%s is_amended: %s" % (
            f1.filing_number,
            formtype,
            version,
            f1.is_amendment,
        )
        print "Headers are: %s" % f1.headers

        if f1.is_amendment:
            print "Original filing is: %s" % (f1.headers["filing_amended"])
def process_filing_body(filingnum, fp=None, logger=None):
    
    
    #It's useful to pass the form parser in when running in bulk so we don't have to keep creating new ones. 
    if not fp:
      fp = form_parser()
      
    if not logger:
        logger=fec_logger()
    msg = "process_filing_body: Starting # %s" % (filingnum)
    #print msg
    logger.info(msg)
      
    connection = get_connection()
    cursor = connection.cursor()
    cmd = "select fec_id, is_superceded, data_is_processed from fec_alerts_new_filing where filing_number=%s" % (filingnum)
    cursor.execute(cmd)
    
    cd = CSV_dumper(connection)
    
    result = cursor.fetchone()
    if not result:
        msg = 'process_filing_body: Couldn\'t find a new_filing for filing %s' % (filingnum)
        logger.error(msg)
        raise FilingHeaderDoesNotExist(msg)
        
    # will throw a TypeError if it's missing.
    header_id = 1
    is_amended = result[1]
    is_already_processed = result[2]
    if is_already_processed:
        msg = 'process_filing_body: This filing has already been entered.'
        logger.error(msg)
        raise FilingHeaderAlreadyProcessed(msg)
    
    #print "Processing filing %s" % (filingnum)
    try:
        f1 = filing(filingnum)
    except:
        print "*** couldn't handle filing %s" % (filingnum)
        return False
    form = f1.get_form_type()
    version = f1.get_version()
    filer_id = f1.get_filer_id()
    
    # only parse forms that we're set up to read
    
    if not fp.is_allowed_form(form):
        if verbose:
            msg = "process_filing_body: Not a parseable form: %s - %s" % (form, filingnum)
            # print msg
            logger.error(msg)
        return None
        
    linenum = 0
    while True:
        linenum += 1
        row = f1.get_body_row()
        if not row:
            break
        
        #print "row is %s" % (row)
        #print "\n\n\nForm is %s" % form
        try:
            linedict = fp.parse_form_line(row, version)
            if linedict['form_type'].upper().startswith('SE'):
                print "\n\n\nfiling %s form is %s transaction_id is: %s" % (filingnum, linedict['form_type'], linedict['transaction_id'])
                # make sure the transaction isn't already there before entering. 
                try:
                    SkedE.objects.get(filing_number=filingnum, transaction_id=linedict['transaction_id'])
                except SkedE.DoesNotExist:
                    process_body_row(linedict, filingnum, header_id, is_amended, cd, filer_id)

            elif linedict['form_type'].upper().startswith('SA'):
                print "\n\n\nfiling %s form is %s transaction_id is: %s" % (filingnum, linedict['form_type'], linedict['transaction_id'])
                # make sure the transaction isn't already there before entering. 
                try:
                    SkedA.objects.get(filing_number=filingnum, transaction_id=linedict['transaction_id'])
                    print "Already present! %s form is %s transaction_id is: %s" % (filingnum, linedict['form_type'], linedict['transaction_id'])
                except SkedA.DoesNotExist:
                    process_body_row(linedict, filingnum, header_id, is_amended, cd, filer_id)


            elif linedict['form_type'].upper().startswith('SB'):
                print "\n\n\nfiling %s form is %s transaction_id is: %s" % (filingnum, linedict['form_type'], linedict['transaction_id'])
                # make sure the transaction isn't already there before entering. 
                try:
                    SkedB.objects.get(filing_number=filingnum, transaction_id=linedict['transaction_id'])
                    print "Already present! %s form is %s transaction_id is: %s" % (filingnum, linedict['form_type'], linedict['transaction_id'])
                except SkedB.DoesNotExist:
                    process_body_row(linedict, filingnum, header_id, is_amended, cd, filer_id)
            
            
        except ParserMissingError:
            msg = 'process_filing_body: Unknown line type in filing %s line %s: type=%s Skipping.' % (filingnum, linenum, row[0])
            logger.warn(msg)
            continue
        except KeyError:
            "missing form type? in filing %s" % (filingnum)
    
    # commit all the leftovers
    cd.commit_all()
    cd.close()
    counter = cd.get_counter()
    total_rows = 0
    for i in counter:
        total_rows += counter[i]
        
    msg = "process_filing_body: Filing # %s Total rows: %s Tally is: %s" % (filingnum, total_rows, counter)
    # print msg
    logger.info(msg)
    
    
    # don't commit during testing of fix 
    
    # this data has been moved here. At some point we should pick a single location for this data. 
    header_data = dict_to_hstore(counter)
    cmd = "update fec_alerts_new_filing set lines_present='%s'::hstore where filing_number=%s" % (header_data, filingnum)
    cursor.execute(cmd)
    
    # mark file as having been entered. 
    cmd = "update fec_alerts_new_filing set data_is_processed = True where filing_number=%s" % (filingnum)
    cursor.execute(cmd)
    
    # flag this filer as one who has changed. 
    cmd = "update summary_data_committee_overlay set is_dirty=True where fec_id='%s'" % (filer_id)
    cursor.execute(cmd)
Ejemplo n.º 21
0
def process_filing_body(filingnum, fp=None, logger=None):

    #It's useful to pass the form parser in when running in bulk so we don't have to keep creating new ones.
    if not fp:
        fp = form_parser()

    if not logger:
        logger = fec_logger()
    msg = "process_filing_body: Starting # %s" % (filingnum)
    #print msg
    logger.info(msg)

    connection = get_connection()
    cursor = connection.cursor()
    cmd = "select fec_id, is_superceded, data_is_processed from fec_alerts_new_filing where filing_number=%s" % (
        filingnum)
    cursor.execute(cmd)

    cd = CSV_dumper(connection)

    result = cursor.fetchone()
    if not result:
        msg = 'process_filing_body: Couldn\'t find a new_filing for filing %s' % (
            filingnum)
        logger.error(msg)
        raise FilingHeaderDoesNotExist(msg)

    # will throw a TypeError if it's missing.
    header_id = 1
    is_amended = result[1]
    is_already_processed = result[2]
    if is_already_processed:
        msg = 'process_filing_body: This filing has already been entered.'
        logger.error(msg)
        raise FilingHeaderAlreadyProcessed(msg)

    #print "Processing filing %s" % (filingnum)
    f1 = filing(filingnum)
    form = f1.get_form_type()
    version = f1.get_version()
    filer_id = f1.get_filer_id()

    # only parse forms that we're set up to read

    if not fp.is_allowed_form(form):
        if verbose:
            msg = "process_filing_body: Not a parseable form: %s - %s" % (
                form, filingnum)
            # print msg
            logger.error(msg)
        return None

    linenum = 0
    while True:
        linenum += 1
        row = f1.get_body_row()
        if not row:
            break

        #print "row is %s" % (row)
        #print "\n\n\nForm is %s" % form
        try:
            linedict = fp.parse_form_line(row, version)
            #print "\n\n\nform is %s" % form
            process_body_row(linedict, filingnum, header_id, is_amended, cd,
                             filer_id)
        except ParserMissingError:
            msg = 'process_filing_body: Unknown line type in filing %s line %s: type=%s Skipping.' % (
                filingnum, linenum, row[0])
            logger.warn(msg)
            continue

    # commit all the leftovers
    cd.commit_all()
    cd.close()
    counter = cd.get_counter()
    total_rows = 0
    for i in counter:
        total_rows += counter[i]

    msg = "process_filing_body: Filing # %s Total rows: %s Tally is: %s" % (
        filingnum, total_rows, counter)
    # print msg
    logger.info(msg)

    # this data has been moved here. At some point we should pick a single location for this data.
    header_data = dict_to_hstore(counter)
    cmd = "update fec_alerts_new_filing set lines_present='%s'::hstore where filing_number=%s" % (
        header_data, filingnum)
    cursor.execute(cmd)

    # mark file as having been entered.
    cmd = "update fec_alerts_new_filing set data_is_processed = True where filing_number=%s" % (
        filingnum)
    cursor.execute(cmd)

    # flag this filer as one who has changed.
    cmd = "update summary_data_committee_overlay set is_dirty=True where fec_id='%s'" % (
        filer_id)
    cursor.execute(cmd)