Example #1
0
def update_tas_lookups(sess, csv_path, update_missing=[], metrics=None):
    """ Load TAS data from the provided CSV and replace/insert any TASLookups

        Args:
            sess: connection to database
            csv_path: path of the car_tas csv to import
            update_missing: if provided, this list of account numbers will only update matching records
                            if the budget_function_code is null/none
            metrics: an object containing information for the metrics file
    """
    if not metrics:
        metrics = {'records_updated': 0, 'records_added': 0}
    data = clean_tas(csv_path, metrics)
    add_existing_id(data)

    old_data = data[data['existing_id'].notnull()]
    del old_data['existing_id']

    new_data = data[data['existing_id'].isnull()]
    del new_data['existing_id']

    if update_missing:
        # find which incoming records can fill in the empty records
        relevant_old_data = old_data[old_data['account_num'].isin(
            update_missing)]
        # Fill them in. If budget_function_code is empty, the following columns have also been empty.
        fill_in_cols = [
            'account_title', 'budget_bureau_code', 'budget_bureau_name',
            'budget_function_code', 'budget_function_title',
            'budget_subfunction_code', 'budget_subfunction_title',
            'reporting_agency_aid', 'reporting_agency_name'
        ]
        for _, row in relevant_old_data.iterrows():
            fill_in_updates = {
                fill_in_col: row[fill_in_col]
                for fill_in_col in fill_in_cols
            }
            fill_in_updates['updated_at'] = datetime.now(timezone.utc)
            sess.query(TASLookup).filter_by(
                account_num=row['account_num']).update(
                    synchronize_session=False, values=fill_in_updates)
        logger.info('%s records filled in', len(relevant_old_data.index))
        metrics['records_updated'] += len(relevant_old_data.index)
    else:
        # instead of using the pandas to_sql dataframe method like some of the other domain load processes, iterate
        # through the dataframe rows so we can load using the orm model (note: toyed with the SQLAlchemy bulk load
        # options but ultimately decided not to go outside the unit of work for the sake of a performance gain)
        for _, row in old_data.iterrows():
            sess.query(TASLookup).filter_by(
                account_num=row['account_num']).update(
                    row, synchronize_session=False)

        for _, row in new_data.iterrows():
            sess.add(TASLookup(**row))
        logger.info('%s records in CSV, %s existing', len(data.index),
                    sum(data['existing_id'].notnull()))
        metrics['records_updated'] += len(old_data.index)
        metrics['records_added'] += len(new_data.index)

    sess.commit()
def updateTASLookups(csvPath):
    """Load TAS data from the provided CSV and replace/insert any
    TASLookups"""
    sess = GlobalDB.db().session

    data = cleanTas(csvPath)
    add_start_date(data)
    add_existing_id(data)

    # Mark all TAS we don't see as "ended"
    existing_ids = [int(i) for i in data['existing_id'] if pd.notnull(i)]
    sess.query(TASLookup).\
        filter(TASLookup.internal_end_date == None).\
        filter(~TASLookup.tas_id.in_(existing_ids)).\
        update({'internal_end_date': date.today()}, synchronize_session=False)

    new_data = data[data['existing_id'].isnull()]
    del new_data['existing_id']

    # instead of using the pandas to_sql dataframe method like some of the
    # other domain load processes, iterate through the dataframe rows so we
    # can load using the orm model (note: toyed with the SQLAlchemy bulk load
    # options but ultimately decided not to go outside the unit of work for
    # the sake of a performance gain)
    for _, row in new_data.iterrows():
        sess.add(TASLookup(**row))

    sess.commit()
    logger.info('%s records in CSV, %s existing',
                len(data.index), sum(data['existing_id'].notnull()))
Example #3
0
def update_tas_lookups(csv_path):
    """Load TAS data from the provided CSV and replace/insert any
    TASLookups"""
    sess = GlobalDB.db().session

    data = clean_tas(csv_path)
    add_existing_id(data)

    old_data = data[data['existing_id'].notnull()]
    del old_data['existing_id']

    new_data = data[data['existing_id'].isnull()]
    del new_data['existing_id']

    # instead of using the pandas to_sql dataframe method like some of the
    # other domain load processes, iterate through the dataframe rows so we
    # can load using the orm model (note: toyed with the SQLAlchemy bulk load
    # options but ultimately decided not to go outside the unit of work for
    # the sake of a performance gain)
    for _, row in old_data.iterrows():
        sess.query(TASLookup).filter_by(account_num=row['account_num']).update(row, synchronize_session=False)

    for _, row in new_data.iterrows():
        sess.add(TASLookup(**row))

    sess.commit()
    logger.info('%s records in CSV, %s existing', len(data.index), sum(data['existing_id'].notnull()))
Example #4
0
def updateTASLookups(csvPath):
    """Load TAS data from the provided CSV and replace/insert any
    TASLookups"""
    sess = GlobalDB.db().session

    data = cleanTas(csvPath)
    # Delete all existing TAS records -- we don't want to accept submissions
    # after the entries fall off the CARS file
    sess.query(TASLookup).delete(synchronize_session=False)

    # instead of using the pandas to_sql dataframe method like some of the
    # other domain load processes, iterate through the dataframe rows so we
    # can load using the orm model (note: toyed with the SQLAlchemy bulk load
    # options but ultimately decided not to go outside the unit of work for
    # the sake of a performance gain)
    for _, row in data.iterrows():
        sess.add(TASLookup(**row))

    sess.commit()
    logger.info('%s records inserted to %s', len(data.index),
                TASLookup.__tablename__)
Example #5
0
    def addTAS(self, ata, aid, bpoa, epoa, availability, main, sub):
        """

        Add a TAS to the validation database if it does not exist.
        This method can be slow.

        Args:
            ata -- allocation transfer agency
            aid --  agency identifier
            bpoa -- beginning period of availability
            epoa -- ending period of availability
            availability -- availability type code
            main --  main account code
            sub -- sub account code
        """
        queryResult = self.session.query(TASLookup). \
            filter(TASLookup.allocation_transfer_agency == ata). \
            filter(TASLookup.agency_identifier == aid). \
            filter(TASLookup.beginning_period_of_availability == bpoa). \
            filter(TASLookup.ending_period_of_availability == epoa). \
            filter(TASLookup.availability_type_code == availability). \
            filter(TASLookup.main_account_code == main). \
            filter(TASLookup.sub_account_code == sub).all()
        if (len(queryResult) == 0):
            tas = TASLookup()
            tas.allocation_transfer_agency = ata
            tas.agency_identifier = aid
            tas.beginning_period_of_availability = bpoa
            tas.ending_period_of_availability = epoa
            tas.availability_type_code = availability
            tas.main_account_code = main
            tas.sub_account_code = sub
            self.session.add(tas)
            self.session.commit()
            return True
        return False