def cleanup_sheet(engine, row, sheet_id): spending_table = sl.get_table(engine, 'spending') data = list( sl.find(engine, spending_table, resource_id=row['resource_id'], sheet_id=sheet_id)) connection = engine.connect() trans = connection.begin() date_formats = cleanup_dates.detect_formats(data) try: if None in date_formats.values(): log.warn("Couldn't detect date formats: %r", date_formats) issue(engine, row['resource_id'], row['retrieve_hash'], "Couldn't detect date formats", repr(date_formats)) return False sl.delete(connection, spending_table, resource_id=row['resource_id'], sheet_id=sheet_id) for row in data: row = cleanup_dates.apply(row, date_formats) row = cleanup_numbers.apply(row) row = cleanup_gov.apply(row) #row = cleanup_supplier.apply(row, engine) del row['id'] sl.add_row(connection, spending_table, row) trans.commit() return True finally: connection.close()
def cleanup_sheet(engine, row, sheet_id, data_row_filter, stats_spending): spending_table = sl.get_table(engine, 'spending') data = list( sl.find(engine, spending_table, resource_id=row['resource_id'], sheet_id=sheet_id)) if not data: log.info('Sheet has no rows') return False, None connection = engine.connect() trans = connection.begin() date_formats = cleanup_dates.detect_formats(data) try: for date_format in date_formats.values(): if isinstance(date_format, basestring): issue(engine, row['resource_id'], row['retrieve_hash'], STAGE, "Couldn't detect date formats because: %s" % date_format, repr(date_formats)) return True, date_format if not data_row_filter: sl.delete(connection, spending_table, resource_id=row['resource_id'], sheet_id=sheet_id) for row in data: if data_row_filter and data_row_filter != row['row_id']: continue row = cleanup_dates.apply(row, date_formats, stats_spending) row = cleanup_numbers.apply(row, stats_spending) row = cleanup_gov.apply(row, stats_spending) #row = cleanup_supplier.apply(row, engine) del row['id'] sl.add_row(connection, spending_table, row) trans.commit() return True, None finally: connection.close()
def cleanup_sheet(engine, row, sheet_id): spending_table = sl.get_table(engine, "spending") data = list(sl.find(engine, spending_table, resource_id=row["resource_id"], sheet_id=sheet_id)) connection = engine.connect() trans = connection.begin() date_formats = cleanup_dates.detect_formats(data) try: if None in date_formats.values(): log.warn("Couldn't detect date formats: %r", date_formats) issue(engine, row["resource_id"], row["retrieve_hash"], "Couldn't detect date formats", repr(date_formats)) return False sl.delete(connection, spending_table, resource_id=row["resource_id"], sheet_id=sheet_id) for row in data: row = cleanup_dates.apply(row, date_formats) row = cleanup_numbers.apply(row) row = cleanup_gov.apply(row) # row = cleanup_supplier.apply(row, engine) del row["id"] sl.add_row(connection, spending_table, row) trans.commit() return True finally: connection.close()
def cleanup_sheet(engine, row, sheet_id, data_row_filter, stats_spending): spending_table = sl.get_table(engine, 'spending') data = list(sl.find(engine, spending_table, resource_id=row['resource_id'], sheet_id=sheet_id)) if not data: log.info('Sheet has no rows') return False, None connection = engine.connect() trans = connection.begin() date_formats = cleanup_dates.detect_formats(data) try: for date_format in date_formats.values(): if isinstance(date_format, basestring): issue(engine, row['resource_id'], row['retrieve_hash'], STAGE, "Couldn't detect date formats because: %s" % date_format, repr(date_formats)) return True, date_format if not data_row_filter: sl.delete(connection, spending_table, resource_id=row['resource_id'], sheet_id=sheet_id) for row in data: if data_row_filter and data_row_filter != row['row_id']: continue row = cleanup_dates.apply(row, date_formats, stats_spending) row = cleanup_numbers.apply(row, stats_spending) row = cleanup_gov.apply(row, stats_spending) #row = cleanup_supplier.apply(row, engine) del row['id'] sl.add_row(connection, spending_table, row) trans.commit() return True, None finally: connection.close()