def download_latest(): """ Download and save the latest submission files possibly overwriting existing, presumably older files :return: """ q1 = select([ table.c.file_name, func.max(table.c.sent_time_epoch).label('sent_time_max_epoch') ]).group_by('file_name') latest_files = engine.execute(q1).fetchall() permitted = list(permitted_file_names()) for f in latest_files: selection = [table.c.sender_name, table.c.file_name, table.c.url] q2 = select(selection).where( and_(table.c.file_name == f['file_name'], table.c.sent_time_epoch == f['sent_time_max_epoch'])) r = engine.execute(q2).fetchone() file_name = r['file_name'].lower() sender_name = r['sender_name'] if file_name not in permitted: print 'Notify %(sender_name)s that "%(file_name)s" is not a valid file name' % locals( ) # download either way just in case dest = os.path.join(settings.csv_dir, file_name) content = file_transfer.download(r['url']) if 'MD5 token has expired' not in content: with open(dest, 'wb') as out: out.write(content)
def update_table(): """ Log newly submitted files :return: """ metadata.create_all(engine) payload = file_transfer.inbox() for uid, package in payload['packages'].items(): sender_name = package['sender_name'] sent_time = int(package['sent_time']) # subtract 5 hours to get eastern time sent_datetime = datetime.utcfromtimestamp(int( package['sent_time'])) - timedelta(hours=5) for package_file in package['package_files']: file_handle = package_file['file_handle'] query = table.select().where(table.c.file_handle == file_handle) results = engine.execute(query).fetchall() if len(results) == 0: file_name = file_handle.split('/')[-1] file_size = int(package_file['file_size']) url = package_file['url'] message = package['mail_body'].strip( ) if 'mail_body' in package else None engine.execute(table.insert(), sender_name=sender_name, sent_time_epoch=sent_time, sent_datetime=sent_datetime, file_handle=file_handle, file_name=file_name, file_size=file_size, url=url, message=message)
def success(): engine.execute(log_table.insert(), hpo_id=hpo_id, log_id=datetime.datetime.utcnow(), table_name=table_name, file_name=csv_filename, phase=phase, success=True)
def create_schema(schema): """ Create schema if it doesn't exist :param schema: name of schema :return: """ result = engine.execute(SCHEMA_EXISTS_QUERY % schema) if result.rowcount == 0: engine.execute(CreateSchema(schema))
def fail(message, params=None): engine.execute(log_table.insert(), hpo_id=hpo_id, log_id=datetime.datetime.utcnow(), table_name=table_name, file_name=csv_filename, phase=phase, success=False, message=message, params=params or None)
def export_log(): """ Dumps all logs for all HPOs to `_data/log.json` Note: params column is excluded for the unlikely case it may contain sensitive data """ all_log_items = [] results = {} for hpo_id in hpo_ids: schema = hpo_id if use_multi_schemas else None metadata = MetaData(bind=engine, reflect=True, schema=schema) log_table = Table(LOG_TABLE_NAME, metadata, autoload=True) hpo_results = {} for row in engine.execute(log_table.select()): row_dict = dict(zip(row.keys(), row)) table_name = row_dict['table_name'] if table_name not in hpo_results: hpo_results[table_name] = { 'received': False, 'parsing': False, 'loading': False, 'message': None } table_results = hpo_results[table_name] table_results['file_name'] = row_dict['file_name'] phase = row_dict['phase'].lower() if 'received' in phase: table_results['received'] = row_dict['success'] elif 'parsing' in phase: table_results['parsing'] = row_dict['success'] elif 'loading' in phase: table_results['loading'] = row_dict['success'] table_results['log_id'] = str( row_dict['log_id']) # for json serialize # save error details message = row_dict['message'] if message is not None: table_results['message'] = message results[hpo_id] = hpo_results for hpo_id, hpo_results in results.items(): for table_name, table_results in hpo_results.items(): all_log_items.append({ 'log_id': table_results['log_id'], 'hpo_id': hpo_id, 'table_name': table_name, 'file_name': table_results['file_name'], 'received': table_results['received'], 'parsing': table_results['parsing'], 'loading': table_results['loading'], 'message': table_results['message'] }) log_path = os.path.join(resources.data_path, 'log.json') with open(log_path, 'w') as log_file: log_file.write(json.dumps(all_log_items)) write_report_info()