Beispiel #1
0
def validate_sheet(engine, row, sheet_id, data_row_filter, stats_spending):
    spending_table = sl.get_table(engine, 'spending')
    data = list(
        sl.find(engine,
                spending_table,
                resource_id=row['resource_id'],
                sheet_id=sheet_id))
    connection = engine.connect()
    trans = connection.begin()
    issue_noted_for_this_resource = False  # record first failure only
    error_message = None
    try:
        records = 0
        for row_ in data:
            if data_row_filter and data_row_filter != row_['row_id']:
                continue
            result = {'id': row_['id'], 'valid': True}
            result['signature'] = generate_signature(row_)

            if row_['DateFormatted'] is None:
                stats_spending['date'].add_spending('Date invalid', row_)
                result['valid'] = False
                if not issue_noted_for_this_resource:
                    issue(
                        engine, row['resource_id'], row['retrieve_hash'],
                        STAGE,
                        'Date invalid (blank, inconsistent or unrecognised format)',
                        {
                            'row_id': row_.get('row_id'),
                            'row_number': row_.get('row_number'),
                            'Date': row_.get('Date')
                        })
                    error_message = 'Date invalid'
                    issue_noted_for_this_resource = True
            else:
                stats_spending['date'].add_spending('Date ok', row_)

            if row_['AmountFormatted'] is None:
                stats_spending['amount'].add_spending('Amount invalid', row_)
                result['valid'] = False
                if not issue_noted_for_this_resource:
                    issue(
                        engine, row['resource_id'], row['retrieve_hash'],
                        STAGE, 'Amount invalid', {
                            'row_id': row_.get('row_id'),
                            'row_number': row_.get('row_number'),
                            'Amount': row_.get('Amount')
                        })
                    error_message = 'Amount invalid'
                    issue_noted_for_this_resource = True
            else:
                stats_spending['amount'].add_spending('Amount ok', row_)

            if result['valid']:
                records += 1
            sl.update(connection, spending_table, {'id': result['id']}, result)
        trans.commit()
        return records > 0, error_message
    finally:
        connection.close()
def update_network_entities(engine, file_name):
    log.info("Updating network entities reference sheet: %s", file_name)
    network_entities = set()
    table = sl.get_table(engine, 'network_entity')
    if os.path.exists(file_name):
        fh = open(file_name, 'rb')
        reader = csv.DictReader(fh)
        for d in reader:
            e = [(k, v.decode('utf-8')) for (k, v) in d.items()]
            e = dict(e)
            network_entities.add((e['representativeEtlId'], e['etlFingerPrint']))
            sl.upsert(engine, table, e, ['representativeEtlId', 'etlFingerPrint'])
        fh.close()
        reps = set([ne[0] for ne in network_entities])
        rep_table = sl.get_table(engine, 'representative')
        for rep in reps:
            sl.update(engine, rep_table, {'etlId': rep}, {'network_extracted': True})

    for row in sl.all(engine, table):
        network_entities.add((row['representativeEtlId'], row['etlFingerPrint']))

    fh = open(file_name, 'wb')
    writer = None
    table = sl.get_table(engine, 'network_entity')
    for ic, fp in network_entities:
        row = {
            'representativeEtlId': ic,
            'etlFingerPrint': fp
        }
        if writer is None:
            writer = csv.DictWriter(fh, row.keys())
            writer.writerow(dict(zip(row.keys(), row.keys())))
        r = [(k, unicode(v).encode('utf-8')) for k, v in row.items()]
        writer.writerow(dict(r))
    fh.close()
Beispiel #3
0
def validate_sheet(engine, row, sheet_id, data_row_filter, stats_spending):
    spending_table = sl.get_table(engine, 'spending')
    data = list(sl.find(engine, spending_table,
            resource_id=row['resource_id'],
            sheet_id=sheet_id))
    connection = engine.connect()
    trans = connection.begin()
    issue_noted_for_this_resource = False # record first failure only
    error_message = None
    try:
        records = 0
        for row_ in data:
            if data_row_filter and data_row_filter != row_['row_id']:
                continue
            result = {'id': row_['id'], 'valid': True}
            result['signature'] = generate_signature(row_)

            if row_['DateFormatted'] is None:
                stats_spending['date'].add_spending('Date invalid', row_)
                result['valid'] = False
                if not issue_noted_for_this_resource:
                    issue(engine, row['resource_id'], row['retrieve_hash'], STAGE,
                          'Date invalid (blank, inconsistent or unrecognised format)',
                          {'row_id': row_.get('row_id'),
                           'row_number': row_.get('row_number'),
                           'Date': row_.get('Date')})
                    error_message = 'Date invalid'
                    issue_noted_for_this_resource = True
            else:
                stats_spending['date'].add_spending('Date ok', row_)

            if row_['AmountFormatted'] is None:
                stats_spending['amount'].add_spending('Amount invalid', row_)
                result['valid'] = False
                if not issue_noted_for_this_resource:
                    issue(engine, row['resource_id'], row['retrieve_hash'], STAGE,
                          'Amount invalid', {'row_id': row_.get('row_id'),
                                             'row_number': row_.get('row_number'),
                                             'Amount': row_.get('Amount')})
                    error_message = 'Amount invalid'
                    issue_noted_for_this_resource = True
            else:
                stats_spending['amount'].add_spending('Amount ok', row_)

            if result['valid']:
                records += 1
            sl.update(connection, spending_table,
                      {'id': result['id']}, result)
        trans.commit()
        return records > 0, error_message
    finally:
        connection.close()
def validate_sheet(engine, row, sheet_id):
    spending_table = sl.get_table(engine, 'spending')
    data = list(
        sl.find(engine,
                spending_table,
                resource_id=row['resource_id'],
                sheet_id=sheet_id))
    connection = engine.connect()
    trans = connection.begin()
    issue_noted_for_this_resource = False  # record first failure only
    try:
        records = 0
        for row_ in data:
            result = {'id': row_['id'], 'valid': True}
            result['signature'] = generate_signature(row_)

            if row_['DateFormatted'] is None:
                result['valid'] = False
                if not issue_noted_for_this_resource:
                    issue(
                        engine, row['resource_id'], row['retrieve_hash'],
                        'Date invalid (or possible the date format is inconsistent)',
                        {
                            'row_id': row_.get('row_id'),
                            'Date': row_.get('Date')
                        })
                    issue_noted_for_this_resource = True
            if row_['AmountFormatted'] is None:
                result['valid'] = False
                if not issue_noted_for_this_resource:
                    issue(engine, row['resource_id'], row['retrieve_hash'],
                          'Amount invalid', {
                              'row_id': row_.get('row_id'),
                              'Amount': row_.get('Amount')
                          })
                    issue_noted_for_this_resource = True

            if result['valid']:
                records += 1
            sl.update(connection, spending_table, {'id': result['id']}, result)
        trans.commit()
        return records > 0
    finally:
        connection.close()
Beispiel #5
0
def validate_sheet(engine, row, sheet_id):
    spending_table = sl.get_table(engine, 'spending')
    data = list(sl.find(engine, spending_table,
            resource_id=row['resource_id'],
            sheet_id=sheet_id))
    connection = engine.connect()
    trans = connection.begin()
    issue_noted_for_this_resource = False # record first failure only
    try:
        records = 0
        for row_ in data:
            result = {'id': row_['id'], 'valid': True}
            result['signature'] = generate_signature(row_)

            if row_['DateFormatted'] is None:
                result['valid'] = False
                if not issue_noted_for_this_resource:
                    issue(engine, row['resource_id'], row['retrieve_hash'],
                          'Date invalid (or possible the date format is inconsistent)',
                          {'row_id': row_.get('row_id'),
                           'Date': row_.get('Date')})
                    issue_noted_for_this_resource = True
            if row_['AmountFormatted'] is None:
                result['valid'] = False
                if not issue_noted_for_this_resource:
                    issue(engine, row['resource_id'], row['retrieve_hash'],
                          'Amount invalid', {'row_id': row_.get('row_id'),
                                             'Amount': row_.get('Amount')})
                    issue_noted_for_this_resource = True

            if result['valid']:
                records += 1
            sl.update(connection, spending_table,
                      {'id': result['id']}, result)
        trans.commit()
        return records > 0
    finally:
        connection.close()
Beispiel #6
0
log = logging.getLogger('dump')


def submit_all():
    engine = db_connect()
    spending = sl.get_table(engine, 'spending')
    stringer = DataStringer(service='ukspending', event='transactions')
    log.info("Submitting frames to datawire...")
    failures = 0
    for i, row in enumerate(generate_all()):
        action_at = row.get('DateFormatted')
        if action_at:
            action_at = datetime.datetime.strptime(action_at, '%Y-%m-%d')
        try:
            stringer.submit(row,
                            action_at=action_at,
                            source_url=row.get('SourceURL'))
            failures = 0
        except Exception, e:
            failures += 1
            if failures > 10:
                raise e
            time.sleep(5)
    data = {'datawire_submitted': True}
    sl.update(engine, spending, row)


if __name__ == '__main__':
    submit_all()
Beispiel #7
0

def submit_all():
    engine = db_connect()
    spending = sl.get_table(engine, 'spending')
    stringer = DataStringer(service='ukspending',
                            event='transactions')
    log.info("Submitting frames to datawire...")
    failures = 0
    for i, row in enumerate(generate_all()):
        action_at = row.get('DateFormatted')
        if action_at:
            action_at = datetime.datetime.strptime(action_at, '%Y-%m-%d')
        try:
            stringer.submit(row,
                            action_at=action_at,
                            source_url=row.get('SourceURL'))
            failures = 0
        except Exception, e:
            failures += 1
            if failures > 10:
                raise e
            time.sleep(5)
    data = {'datawire_submitted': True}
    sl.update(engine, spending, row)


if __name__ == '__main__':
    submit_all()

def reset():
    sl.update(engine, representative, {}, {'network_extracted': False})
    return jsonify({'status': 'OK'})