Ejemplo n.º 1
0
def download_latest():
    """
    Download and save the latest submission files possibly overwriting existing, presumably older files
    :return:
    """
    q1 = select([
        table.c.file_name,
        func.max(table.c.sent_time_epoch).label('sent_time_max_epoch')
    ]).group_by('file_name')
    latest_files = engine.execute(q1).fetchall()
    permitted = list(permitted_file_names())
    for f in latest_files:
        selection = [table.c.sender_name, table.c.file_name, table.c.url]
        q2 = select(selection).where(
            and_(table.c.file_name == f['file_name'],
                 table.c.sent_time_epoch == f['sent_time_max_epoch']))
        r = engine.execute(q2).fetchone()
        file_name = r['file_name'].lower()
        sender_name = r['sender_name']
        if file_name not in permitted:
            print 'Notify %(sender_name)s that "%(file_name)s" is not a valid file name' % locals(
            )
        # download either way just in case
        dest = os.path.join(settings.csv_dir, file_name)
        content = file_transfer.download(r['url'])
        if 'MD5 token has expired' not in content:
            with open(dest, 'wb') as out:
                out.write(content)
Ejemplo n.º 2
0
def update_table():
    """
    Log newly submitted files
    :return:
    """
    metadata.create_all(engine)
    payload = file_transfer.inbox()
    for uid, package in payload['packages'].items():
        sender_name = package['sender_name']
        sent_time = int(package['sent_time'])
        # subtract 5 hours to get eastern time
        sent_datetime = datetime.utcfromtimestamp(int(
            package['sent_time'])) - timedelta(hours=5)
        for package_file in package['package_files']:
            file_handle = package_file['file_handle']
            query = table.select().where(table.c.file_handle == file_handle)
            results = engine.execute(query).fetchall()
            if len(results) == 0:
                file_name = file_handle.split('/')[-1]
                file_size = int(package_file['file_size'])
                url = package_file['url']
                message = package['mail_body'].strip(
                ) if 'mail_body' in package else None
                engine.execute(table.insert(),
                               sender_name=sender_name,
                               sent_time_epoch=sent_time,
                               sent_datetime=sent_datetime,
                               file_handle=file_handle,
                               file_name=file_name,
                               file_size=file_size,
                               url=url,
                               message=message)
Ejemplo n.º 3
0
 def success():
     engine.execute(log_table.insert(),
                    hpo_id=hpo_id,
                    log_id=datetime.datetime.utcnow(),
                    table_name=table_name,
                    file_name=csv_filename,
                    phase=phase,
                    success=True)
Ejemplo n.º 4
0
def create_schema(schema):
    """
    Create schema if it doesn't exist
    :param schema: name of schema
    :return:
    """
    result = engine.execute(SCHEMA_EXISTS_QUERY % schema)
    if result.rowcount == 0:
        engine.execute(CreateSchema(schema))
Ejemplo n.º 5
0
 def fail(message, params=None):
     engine.execute(log_table.insert(),
                    hpo_id=hpo_id,
                    log_id=datetime.datetime.utcnow(),
                    table_name=table_name,
                    file_name=csv_filename,
                    phase=phase,
                    success=False,
                    message=message,
                    params=params or None)
Ejemplo n.º 6
0
def export_log():
    """
    Dumps all logs for all HPOs to `_data/log.json`

    Note: params column is excluded for the unlikely case it may contain sensitive data
    """
    all_log_items = []
    results = {}

    for hpo_id in hpo_ids:
        schema = hpo_id if use_multi_schemas else None
        metadata = MetaData(bind=engine, reflect=True, schema=schema)
        log_table = Table(LOG_TABLE_NAME, metadata, autoload=True)
        hpo_results = {}
        for row in engine.execute(log_table.select()):
            row_dict = dict(zip(row.keys(), row))
            table_name = row_dict['table_name']
            if table_name not in hpo_results:
                hpo_results[table_name] = {
                    'received': False,
                    'parsing': False,
                    'loading': False,
                    'message': None
                }

            table_results = hpo_results[table_name]
            table_results['file_name'] = row_dict['file_name']

            phase = row_dict['phase'].lower()
            if 'received' in phase:
                table_results['received'] = row_dict['success']
            elif 'parsing' in phase:
                table_results['parsing'] = row_dict['success']
            elif 'loading' in phase:
                table_results['loading'] = row_dict['success']

            table_results['log_id'] = str(
                row_dict['log_id'])  # for json serialize

            # save error details
            message = row_dict['message']
            if message is not None:
                table_results['message'] = message

        results[hpo_id] = hpo_results

    for hpo_id, hpo_results in results.items():
        for table_name, table_results in hpo_results.items():
            all_log_items.append({
                'log_id': table_results['log_id'],
                'hpo_id': hpo_id,
                'table_name': table_name,
                'file_name': table_results['file_name'],
                'received': table_results['received'],
                'parsing': table_results['parsing'],
                'loading': table_results['loading'],
                'message': table_results['message']
            })

    log_path = os.path.join(resources.data_path, 'log.json')
    with open(log_path, 'w') as log_file:
        log_file.write(json.dumps(all_log_items))

    write_report_info()