def process_filing_body(filingnum, fp=None, logger=None):
    
    
    #It's useful to pass the form parser in when running in bulk so we don't have to keep creating new ones. 
    if not fp:
      fp = form_parser()
      
    if not logger:
        logger=fec_logger()
    msg = "process_filing_body: Starting # %s" % (filingnum)
    #print msg
    logger.info(msg)
      
    connection = get_connection()
    cursor = connection.cursor()
    cmd = "select fec_id, is_superceded, data_is_processed from fec_alerts_new_filing where filing_number=%s" % (filingnum)
    cursor.execute(cmd)
    
    cd = CSV_dumper(connection)
    
    result = cursor.fetchone()
    if not result:
        msg = 'process_filing_body: Couldn\'t find a new_filing for filing %s' % (filingnum)
        logger.error(msg)
        raise FilingHeaderDoesNotExist(msg)
        
    # will throw a TypeError if it's missing.
    header_id = 1
    is_amended = result[1]
    is_already_processed = result[2]
    if is_already_processed:
        msg = 'process_filing_body: This filing has already been entered.'
        logger.error(msg)
        raise FilingHeaderAlreadyProcessed(msg)
    
    #print "Processing filing %s" % (filingnum)
    try:
        f1 = filing(filingnum)
    except:
        print "*** couldn't handle filing %s" % (filingnum)
        return False
    form = f1.get_form_type()
    version = f1.get_version()
    filer_id = f1.get_filer_id()
    
    # only parse forms that we're set up to read
    
    if not fp.is_allowed_form(form):
        if verbose:
            msg = "process_filing_body: Not a parseable form: %s - %s" % (form, filingnum)
            # print msg
            logger.error(msg)
        return None
        
    linenum = 0
    while True:
        linenum += 1
        row = f1.get_body_row()
        if not row:
            break
        
        #print "row is %s" % (row)
        #print "\n\n\nForm is %s" % form
        try:
            linedict = fp.parse_form_line(row, version)
            if linedict['form_type'].upper().startswith('SE'):
                print "\n\n\nfiling %s form is %s transaction_id is: %s" % (filingnum, linedict['form_type'], linedict['transaction_id'])
                # make sure the transaction isn't already there before entering. 
                try:
                    SkedE.objects.get(filing_number=filingnum, transaction_id=linedict['transaction_id'])
                except SkedE.DoesNotExist:
                    process_body_row(linedict, filingnum, header_id, is_amended, cd, filer_id)

            elif linedict['form_type'].upper().startswith('SA'):
                print "\n\n\nfiling %s form is %s transaction_id is: %s" % (filingnum, linedict['form_type'], linedict['transaction_id'])
                # make sure the transaction isn't already there before entering. 
                try:
                    SkedA.objects.get(filing_number=filingnum, transaction_id=linedict['transaction_id'])
                    print "Already present! %s form is %s transaction_id is: %s" % (filingnum, linedict['form_type'], linedict['transaction_id'])
                except SkedA.DoesNotExist:
                    process_body_row(linedict, filingnum, header_id, is_amended, cd, filer_id)


            elif linedict['form_type'].upper().startswith('SB'):
                print "\n\n\nfiling %s form is %s transaction_id is: %s" % (filingnum, linedict['form_type'], linedict['transaction_id'])
                # make sure the transaction isn't already there before entering. 
                try:
                    SkedB.objects.get(filing_number=filingnum, transaction_id=linedict['transaction_id'])
                    print "Already present! %s form is %s transaction_id is: %s" % (filingnum, linedict['form_type'], linedict['transaction_id'])
                except SkedB.DoesNotExist:
                    process_body_row(linedict, filingnum, header_id, is_amended, cd, filer_id)
            
            
        except ParserMissingError:
            msg = 'process_filing_body: Unknown line type in filing %s line %s: type=%s Skipping.' % (filingnum, linenum, row[0])
            logger.warn(msg)
            continue
        except KeyError:
            "missing form type? in filing %s" % (filingnum)
    
    # commit all the leftovers
    cd.commit_all()
    cd.close()
    counter = cd.get_counter()
    total_rows = 0
    for i in counter:
        total_rows += counter[i]
        
    msg = "process_filing_body: Filing # %s Total rows: %s Tally is: %s" % (filingnum, total_rows, counter)
    # print msg
    logger.info(msg)
    
    
    # don't commit during testing of fix 
    
    # this data has been moved here. At some point we should pick a single location for this data. 
    header_data = dict_to_hstore(counter)
    cmd = "update fec_alerts_new_filing set lines_present='%s'::hstore where filing_number=%s" % (header_data, filingnum)
    cursor.execute(cmd)
    
    # mark file as having been entered. 
    cmd = "update fec_alerts_new_filing set data_is_processed = True where filing_number=%s" % (filingnum)
    cursor.execute(cmd)
    
    # flag this filer as one who has changed. 
    cmd = "update summary_data_committee_overlay set is_dirty=True where fec_id='%s'" % (filer_id)
    cursor.execute(cmd)
Ejemplo n.º 2
0
def process_filing_body(filingnum, fp=None, logger=None):

    #It's useful to pass the form parser in when running in bulk so we don't have to keep creating new ones.
    if not fp:
        fp = form_parser()

    if not logger:
        logger = fec_logger()
    msg = "process_filing_body: Starting # %s" % (filingnum)
    #print msg
    logger.info(msg)

    connection = get_connection()
    cursor = connection.cursor()
    cmd = "select fec_id, is_superceded, data_is_processed from fec_alerts_new_filing where filing_number=%s" % (
        filingnum)
    cursor.execute(cmd)

    cd = CSV_dumper(connection)

    result = cursor.fetchone()
    if not result:
        msg = 'process_filing_body: Couldn\'t find a new_filing for filing %s' % (
            filingnum)
        logger.error(msg)
        raise FilingHeaderDoesNotExist(msg)

    # will throw a TypeError if it's missing.
    header_id = 1
    is_amended = result[1]
    is_already_processed = result[2]
    if is_already_processed:
        msg = 'process_filing_body: This filing has already been entered.'
        logger.error(msg)
        raise FilingHeaderAlreadyProcessed(msg)

    #print "Processing filing %s" % (filingnum)
    f1 = filing(filingnum)
    form = f1.get_form_type()
    version = f1.get_version()
    filer_id = f1.get_filer_id()

    # only parse forms that we're set up to read

    if not fp.is_allowed_form(form):
        if verbose:
            msg = "process_filing_body: Not a parseable form: %s - %s" % (
                form, filingnum)
            # print msg
            logger.error(msg)
        return None

    linenum = 0
    while True:
        linenum += 1
        row = f1.get_body_row()
        if not row:
            break

        #print "row is %s" % (row)
        #print "\n\n\nForm is %s" % form
        try:
            linedict = fp.parse_form_line(row, version)
            #print "\n\n\nform is %s" % form
            process_body_row(linedict, filingnum, header_id, is_amended, cd,
                             filer_id)
        except ParserMissingError:
            msg = 'process_filing_body: Unknown line type in filing %s line %s: type=%s Skipping.' % (
                filingnum, linenum, row[0])
            logger.warn(msg)
            continue

    # commit all the leftovers
    cd.commit_all()
    cd.close()
    counter = cd.get_counter()
    total_rows = 0
    for i in counter:
        total_rows += counter[i]

    msg = "process_filing_body: Filing # %s Total rows: %s Tally is: %s" % (
        filingnum, total_rows, counter)
    # print msg
    logger.info(msg)

    # this data has been moved here. At some point we should pick a single location for this data.
    header_data = dict_to_hstore(counter)
    cmd = "update fec_alerts_new_filing set lines_present='%s'::hstore where filing_number=%s" % (
        header_data, filingnum)
    cursor.execute(cmd)

    # mark file as having been entered.
    cmd = "update fec_alerts_new_filing set data_is_processed = True where filing_number=%s" % (
        filingnum)
    cursor.execute(cmd)

    # flag this filer as one who has changed.
    cmd = "update summary_data_committee_overlay set is_dirty=True where fec_id='%s'" % (
        filer_id)
    cursor.execute(cmd)
def process_filing_body(filingnum, fp=None, logger=None):

    # It's useful to pass the form parser in when running in bulk so we don't have to keep creating new ones.
    if not fp:
        fp = form_parser()

    if not logger:
        logger = fec_logger()
    msg = "process_filing_body: Starting # %s" % (filingnum)
    # print msg
    logger.info(msg)

    connection = get_connection()
    cursor = connection.cursor()
    cmd = "select fec_id, superseded_by_amendment, data_is_processed from efilings_filing where filing_number=%s" % (
        filingnum
    )
    cursor.execute(cmd)

    cd = CSV_dumper(connection)

    result = cursor.fetchone()
    if not result:
        msg = "process_filing_body: Couldn't find a new_filing for filing %s" % (filingnum)
        logger.error(msg)
        raise FilingHeaderDoesNotExist(msg)

    # will throw a TypeError if it's missing.
    line_sequence = 1
    is_amended = result[1]
    is_already_processed = result[2]
    if is_already_processed == "1":
        msg = "process_filing_body: This filing has already been entered."
        print msg
        logger.error(msg)
        raise FilingHeaderAlreadyProcessed(msg)

    # print "Processing filing %s" % (filingnum)
    f1 = filing(filingnum)
    form = f1.get_form_type()
    version = f1.get_version()
    filer_id = f1.get_filer_id()

    # only parse forms that we're set up to read

    if not fp.is_allowed_form(form):
        if verbose:
            msg = "process_filing_body: Not a parseable form: %s - %s" % (form, filingnum)
            print msg
            logger.info(msg)
        return None

    linenum = 0
    while True:
        linenum += 1
        row = f1.get_body_row()
        if not row:
            break

        # print "row is %s" % (row)
        # print "\n\n\nForm is %s" % form
        try:
            linedict = fp.parse_form_line(row, version)
            # print "\n\n\nform is %s" % form
            process_body_row(linedict, filingnum, line_sequence, is_amended, cd, filer_id)
        except ParserMissingError:
            msg = "process_filing_body: Unknown line type in filing %s line %s: type=%s Skipping." % (
                filingnum,
                linenum,
                row[0],
            )
            print msg
            logger.warn(msg)
            continue

    # commit all the leftovers
    cd.commit_all()
    cd.close()
    counter = cd.get_counter()
    total_rows = 0
    for i in counter:
        total_rows += counter[i]

    msg = "process_filing_body: Filing # %s Total rows: %s Tally is: %s" % (filingnum, total_rows, counter)
    print msg
    logger.info(msg)

    ######## DIRECT DB UPDATES. PROBABLY A BETTER APPROACH, BUT...

    header_data = dict_to_hstore(counter)
    cmd = "update efilings_filing set lines_present='%s'::hstore where filing_number=%s" % (header_data, filingnum)
    cursor.execute(cmd)

    # mark file as having been entered.
    cmd = "update efilings_filing set data_is_processed='1' where filing_number=%s" % (filingnum)
    cursor.execute(cmd)

    # flag this filer as one who has changed.
    cmd = "update efilings_committee set is_dirty=True where fec_id='%s' and cycle='%s'" % (filer_id, CURRENT_CYCLE)
    cursor.execute(cmd)

    # should also update the candidate is dirty flag too by joining w/ ccl table.
    # these tables aren't indexed, so do as two separate queries.
    cmd = "select cand_id from ftpdata_candcomlink where cmte_id = '%s' and cmte_dsgn in ('A', 'P')" % (filer_id)
    cursor.execute(cmd)
    result = cursor.fetchone()
    if result:
        cand_id = result[0]
        cmd = "update efilings_candidate set is_dirty=True where fec_id = '%s' and cycle='%s'" % (
            cand_id,
            CURRENT_CYCLE,
        )
        cursor.execute(cmd)

    connection.close()
Ejemplo n.º 4
0
def process_filing_body(filingnum, fp=None, logger=None):
    
    
    #It's useful to pass the form parser in when running in bulk so we don't have to keep creating new ones. 
    if not fp:
      fp = form_parser()
      
    if not logger:
        logger=fec_logger()
    msg = "process_filing_body: Starting # %s" % (filingnum)
    #print msg
    logger.info(msg)
      
    connection = get_connection()
    cursor = connection.cursor()
    cmd = "select fec_id, superseded_by_amendment, data_is_processed from efilings_filing where filing_number=%s" % (filingnum)
    cursor.execute(cmd)
    
    cd = CSV_dumper(connection)
    
    result = cursor.fetchone()
    if not result:
        msg = 'process_filing_body: Couldn\'t find a new_filing for filing %s' % (filingnum)
        logger.error(msg)
        raise FilingHeaderDoesNotExist(msg)
        
    # will throw a TypeError if it's missing.
    line_sequence = 1
    is_amended = result[1]
    is_already_processed = result[2]
    if is_already_processed == "1":
        msg = 'process_filing_body: This filing has already been entered.'
        print msg
        logger.error(msg)
        raise FilingHeaderAlreadyProcessed(msg)
    
    #print "Processing filing %s" % (filingnum)
    f1 = filing(filingnum)
    form = f1.get_form_type()
    version = f1.get_version()
    filer_id = f1.get_filer_id()
    
    # only parse forms that we're set up to read
    
    if not fp.is_allowed_form(form):
        if verbose:
            msg = "process_filing_body: Not a parseable form: %s - %s" % (form, filingnum)
            print msg
            logger.info(msg)
        return None
        
    linenum = 0
    while True:
        linenum += 1
        row = f1.get_body_row()
        if not row:
            break
        
        #print "row is %s" % (row)
        #print "\n\n\nForm is %s" % form
        try:
            linedict = fp.parse_form_line(row, version)
            #print "\n\n\nform is %s" % form
            process_body_row(linedict, filingnum, line_sequence, is_amended, cd, filer_id)
        except ParserMissingError:
            msg = 'process_filing_body: Unknown line type in filing %s line %s: type=%s Skipping.' % (filingnum, linenum, row[0])
            print msg
            logger.warn(msg)
            continue
        
    # commit all the leftovers
    cd.commit_all()
    cd.close()
    counter = cd.get_counter()
    total_rows = 0
    for i in counter:
        total_rows += counter[i]
        
    msg = "process_filing_body: Filing # %s Total rows: %s Tally is: %s" % (filingnum, total_rows, counter)
    print msg
    logger.info(msg)
    
    ######## DIRECT DB UPDATES. PROBABLY A BETTER APPROACH, BUT... 
    
    header_data = dict_to_hstore(counter)
    cmd = "update efilings_filing set lines_present='%s'::hstore where filing_number=%s" % (header_data, filingnum)
    cursor.execute(cmd)
    
    # mark file as having been entered. 
    cmd = "update efilings_filing set data_is_processed='1' where filing_number=%s" % (filingnum)
    cursor.execute(cmd)
    
    # flag this filer as one who has changed. 
    cmd = "update efilings_committee set is_dirty=True where fec_id='%s' and cycle='%s'" % (filer_id, CURRENT_CYCLE)
    cursor.execute(cmd)
    
    # should also update the candidate is dirty flag too by joining w/ ccl table. 
    # these tables aren't indexed, so do as two separate queries. 
    cmd = "select cand_id from ftpdata_candcomlink where cmte_id = '%s' and cmte_dsgn in ('A', 'P')" % (filer_id)
    cursor.execute(cmd)
    result = cursor.fetchone()
    if result:
        cand_id = result[0]
        cmd = "update efilings_candidate set is_dirty=True where fec_id = '%s' and cycle='%s'" % (cand_id, CURRENT_CYCLE)
        cursor.execute(cmd)

    connection.close()