Ejemplo n.º 1
0
def get_filing_list(start_date, end_date, max_fails=10, waittime=10):
    #gets list of available filings from the FEC.
    #TODO: institute an API key pool or fallback?
    url = "https://api.open.fec.gov/v1/efile/filings/?per_page=100&sort=-receipt_date"
    url += "&api_key={}".format(API_KEY)
    url += "&min_receipt_date={}".format(start_date)
    url += "&max_receipt_date={}".format(end_date)

    filings = []
    page = 1
    fails = 0
    while True:
        #get new filing ids from FEC API
        resp = requests.get(url + "&page={}".format(page))
        page += 1
        try:
            files = resp.json()
        except:
            #failed to convert respons to JSON
            fails += 1
            if fails >= max_fails:
                logging.log(
                    title="FEC download failed",
                    text='Failed to download valid JSON from FEC site {} times'
                    .format(max_fails),
                    tags=["nyt-fec", "result:fail"])
                return None
            time.sleep(waittime)
        try:
            results = files['results']
        except KeyError:
            fails += 1
            if fails >= max_fails:
                logging.log(
                    title="FEC download failed",
                    text='Failed to download valid JSON from FEC site {} times'
                    .format(max_fails),
                    tags=["nyt-fec", "result:fail"])
                return None
            time.sleep(waittime)
            continue

        if len(results) == 0:
            break
        for f in results:
            if evaluate_filing(f):
                filings.append(f['file_number'])

    return filings
Ejemplo n.º 2
0
def reassign_standardized_donors(filing_id, amended_id):
    #find all skeda's with donors from the amended filing
    #that we're about to deactivate
    matched_transactions = ScheduleA.objects.filter(filing_id=amended_id).exclude(donor=None)
    i = 0
    for transaction in matched_transactions:
        transaction_id = transaction.transaction_id
        contributor_last_name = transaction.contributor_last_name
        new_trans = ScheduleA.objects.filter(transaction_id=transaction_id, filing_id=filing_id)
        if len(new_trans) == 0:
            logging.log(title="donor reassignment issue",
                    text="filing {} was amended by filing {} and no transaction could be found for donor reassigment for transaction id {}".format(amended_id, filing_id, transaction_id),
                    tags=["nyt-fec", "result:warning"])
            continue
        if len(new_trans) > 1:
            logging.log(title="donor reassignment issue",
                    text="filing {} was amended by filing {} and multiple transaction matches were found for {}".format(amended_id, filing_id, transaction_id),
                    tags=["nyt-fec", "result:warning"])
            continue
        new_trans = new_trans[0]
        if new_trans.contributor_last_name != contributor_last_name:
            logging.log(title="donor reassignment issue",
                    text="Want to reassign transaction {} from filing {} to filing {} but last names mismatch: {}/{}".format(transaction_id, amended_id, filing_id, contributor_last_name, new_trans.contributor_last_name),    
                    tags=["nyt-fec", "result:warning"])
            continue

        new_trans.donor = transaction.donor
        new_trans.save()
        transaction.donor = None
        transaction.save()
        i += 1
    print("reassigned {} transactions from amended filing".format(i))
Ejemplo n.º 3
0
def load_filings(filing_dir):

    
    filing_fieldnames = [f.name for f in Filing._meta.get_fields()]

    filing_csvs = sorted(os.listdir(filing_dir))
    filings_loaded = 0
    for filename in filing_csvs:
        filing_id = filename.split(".")[0]
        if filename[0] == ".":
            continue
        try:
            int(filing_id)
        except:
            logging.log(title="Bad FEC filename",
                    text='did not recognize filing {}'.format(filename),
                    tags=["nyt-fec", "result:warn"])
            continue

        full_filename = "{}{}".format(filing_dir, filename)
        
        if not evaluate_filing_file(full_filename, filing_id):
            continue
                
        sys.stdout.write("-------------------\n{}: Started filing {}\n".format(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), filing_id))
        

        if load_filing(filing_id, full_filename, filing_fieldnames):

            logging.log(title="Filing {} loaded".format(filing_id),
                    text='filing {} successfully loaded'.format(filing_id),
                    tags=["nyt-fec", "result:success"])

            filings_loaded += 1

    logging.log(title="FEC scrape completed".format(filing_id),
                    text='{} filings successfully loaded'.format(filings_loaded),
                    tags=["nyt-fec", "result:success"])
Ejemplo n.º 4
0
def load_filing(filing, filename, filing_fieldnames):
    #returns boolean depending on whether filing was loaded
    
    
    #this means the filing already exists
    #TODO add checking to see if import was successful
    filing_matches = Filing.objects.filter(filing_id=filing)
    if len(filing_matches) == 1:
        if filing_matches[0].status != "FAILED":
            sys.stdout.write('filing {} already exists\n'.format(filing))
            return False
        else:
            sys.stdout.write("Reloading {}, it failed perviously\n".format(filing))
    
    #filing does not exist or it failed previously
    try:
        filing_dict = process_filing.process_electronic_filing(filename)
    except Exception as e:
        logging.log(title="fec2json failed",
                    text="fec2json failed {} {}".format(filing, e),
                    tags=["nyt-fec", "result:fail"])
        return False

    #do not load filings outside of this cycle (these will likely be amendments of old filings)
    #we check this before we download the filing, but this seems like worth re-checking in case someone manually downloaded a file or somehting
    coverage_end = filing_dict.get('coverage_through_date')
    if not check_coverage_dates(filing_dict, coverage_end):
        create_or_update_filing_status(filing, 'REFUSED')
        return False

    #deal with amended filings
    is_amended = False
    amends_filing = None
    if filing_dict['amendment']:
        is_amended = True

        #oy, one filer really likes semi-colons.
        if filing_dict.get('amends_filing'):
            filing_dict['amends_filing'] = filing_dict['amends_filing'].replace(';','')
        try:
            amends_filing_str = filing_dict['amends_filing']
            amends_filing = int(amends_filing_str)
        except ValueError:
            #should be a warning or possibly critical
            logging.log(title="Filing {} Failed".format(filing),
                    text='Invalid amendment number {} for filing {}, creating filing and marking as FAILED\n'.format(filing_dict['amends_filing'],filing),
                    tags=["nyt-fec", "result:fail"])
            filing_obj = Filing.objects.create(filing_id=filing, status='FAILED')
            filing_obj.save()
            return False
        else:
            try:
                amended_filing = Filing.objects.filter(filing_id=amends_filing)[0]
            except IndexError:
                #if it's an F24 or F5, which don't always have coverage dates,
                #it is probably an amendment of an out-of-cycle filing
                #so do not load it
                if filing_dict['form'] in ['F24', 'F5']:
                    sys.stdout.write('Filing {} is an amended {} with no base. Probably from an old cycle. Not loading\n'.format(filing, filing_dict['form']))
                    create_or_update_filing_status(filing, 'REFUSED')
                    return False
                sys.stdout.write("could not find filing {}, which was amended by {}, so not deactivating any transactions\n".format(amends_filing, filing))
            else:
                #if there are filings that were amended by the amended filing
                #they also have to be deactivated, so look for them.
                other_amended_filings = Filing.objects.filter(amends_filing=amended_filing.filing_id)
                amended_filings = [f for f in other_amended_filings] + [amended_filing]
                for amended_filing in amended_filings:
                    amended_filing.active = False
                    amended_filing.status = 'SUPERSEDED'
                    amended_filing.save()
                    ScheduleA.objects.filter(filing_id=amends_filing).update(active=False, status='SUPERSEDED')
                    ScheduleB.objects.filter(filing_id=amends_filing).update(active=False, status='SUPERSEDED')
                    ScheduleE.objects.filter(filing_id=amends_filing).update(active=False, status='SUPERSEDED')

    if filing_dict['form'] in ['F3','F3X','F3P','F5']:
        #could be a periodic, so see if there are covered forms that need to be deactivated
        coverage_start_date = filing_dict['coverage_from_date']
        coverage_end_date = filing_dict['coverage_through_date']
        if coverage_start_date and coverage_end_date:
            #we're going to start by looking for whole forms to deactivate
            covered_filings = Filing.objects.filter(date_signed__gte=coverage_start_date,
                                                date_signed__lte=coverage_end_date,
                                                form__in=['F24','F5'],
                                                filer_id=filing_dict['filer_committee_id_number']).exclude(filing_id=filing) #this exclude prevents the current filing from being deactivated if it's already been saved somehow
            covered_filings.update(active=False, status='COVERED')
            covered_transactions = ScheduleE.objects.filter(filing_id__in=[f.filing_id for f in covered_filings])
            covered_transactions.update(active=False, status='COVERED')
            #there might be some additional transactions close to the edge of the filing period
            #that we should deactivate based on inconsistent dates inside filings
            individual_covered_transactions = ScheduleE.objects.filter(filer_committee_id_number=filing_dict['filer_committee_id_number'],
                                                                    active=True).exclude(filing_id=filing)
            by_expend_date = individual_covered_transactions.filter(expenditure_date__gte=coverage_start_date,
                                                                    expenditure_date__lte=coverage_end_date)
            by_expend_date.update(active=False, status='COVERED')
            by_dissemination_date = individual_covered_transactions.filter(dissemination_date__gte=coverage_start_date,
                                                                    dissemination_date__lte=coverage_end_date)
            by_dissemination_date.update(active=False, status='COVERED')


    clean_filing_dict = clean_filing_fields(filing_dict, filing_fieldnames)
    clean_filing_dict['filing_id'] = filing
    clean_filing_dict['filer_id'] = filing_dict['filer_committee_id_number']
    
    if len(filing_matches) == 1:
        filing_matches.update(**clean_filing_dict)
        filing_obj = filing_matches[0]
    else:
        filing_obj = Filing.objects.create(**clean_filing_dict)
    filing_obj.save()

    #create or update committee
    if filing_dict.get('committee_name') is None:
        filing_obj.committee_name = get_filer_name(filing_dict['filer_committee_id_number'])
        filing_obj.save()

    try:
        comm = Committee.objects.create(fec_id=filing_dict['filer_committee_id_number'])
        comm.save()
    except:
        #committee already exists
        pass

    try:
        committee_fieldnames = [f.name for f in Committee._meta.get_fields()]
        committee = {}
        committee['zipcode'] = filing_dict['zip']
        for fn in committee_fieldnames:
            try:
                field = filing_dict[fn]
            except:
                continue
            committee[fn] = field

        comm = Committee.objects.filter(fec_id=filing_dict['filer_committee_id_number']).update(**committee)
    except:
        sys.stdout.write('failed to update committee\n')

    #add itemizations - eventually we're going to need to bulk insert here
    #skedA's
    try:
        scha_count = 0
        schb_count = 0
        sche_count = 0
        if 'itemizations' in filing_dict:
            if 'SchA' in filing_dict['itemizations']:
                scha_count = load_itemizations(ScheduleA, filing_dict['itemizations']['SchA'])
            if 'SchB' in filing_dict['itemizations']:
                schb_count = load_itemizations(ScheduleB, filing_dict['itemizations']['SchB'])
            if 'SchE' in filing_dict['itemizations']:
                sche_count = load_itemizations(ScheduleE, filing_dict['itemizations']['SchE'])
            if 'F57' in filing_dict['itemizations']:
                sche_count += load_itemizations(ScheduleE, filing_dict['itemizations']['F57'])
        sys.stdout.write("inserted {} schedule A's\n".format(scha_count))
        sys.stdout.write("inserted {} schedule B's\n".format(schb_count))
        sys.stdout.write("inserted {} schedule E's\n".format(sche_count))

    except:
        #something failed in the transaction loading, keep the filing as failed
        #but remove the itemizations
        filing_obj.status='FAILED'
        filing_obj.save()
        create_or_update_filing_status(filing, 'FAILED')
        ScheduleA.objects.filter(filing_id=filing).delete()
        ScheduleB.objects.filter(filing_id=filing).delete()
        ScheduleE.objects.filter(filing_id=filing).delete()
        logging.log(title="Itemization load failed",
                    text='Something failed in itemizations, marking {} as FAILED'.format(filing),
                    tags=["nyt-fec", "result:fail"])
        return False

    if is_amended and amends_filing:
        reassign_standardized_donors(filing, amends_filing)

    #add IE total to f24s
    if filing_obj.form == 'F24':
        ies = ScheduleE.objects.filter(filing_id=filing, active=True)
        filing_obj.computed_ie_total_for_f24 = sum([ie.expenditure_amount for ie in ies])




    sys.stdout.write('Marking {} as ACTIVE\n'.format(filing))
    filing_obj.status='ACTIVE'
    filing_obj.save()
    create_or_update_filing_status(filing, 'SUCCESS')

    return True