예제 #1
0
def primary_tag_entities_query(company_identifier: str, entity: Entity,
                               entity_array: List[Dict[str, any]],
                               tag: str) -> SQL.Query:
    upload_group = 'almacen_api {date}'.format(
        date=datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S'))
    rows = [[
        company_identifier, c['app'], c['channel'],
        str(c[entity.id_column_name]), tag, upload_group
    ] for c in entity_array]
    format_rows = ',\n'.join([SQL.Query.format_array(r) for r in rows])

    merge_query = SQL.MergeQuery(
        join_columns=['channel', entity.id_column_name],
        update_columns=[entity.primary_tag_column_name, 'upload_group'],
        source_table=entity.temp_tag_table_name,
        target_table=entity.target_tag_table_name,
        source_schema=None,
        target_schema=company_identifier)

    return SQL.Query(
        f'''
create temp table {entity.temp_tag_table_name} (like {company_identifier}.{entity.target_tag_table_name});
insert into {entity.temp_tag_table_name} (company_identifier, app, channel, {entity.id_column_name}, {entity.primary_tag_column_name}, upload_group)
values {format_rows};
{merge_query.query};
drop table {entity.temp_tag_table_name};
  ''',
        substitution_parameters=tuple(itertools.chain.from_iterable(rows)) +
        merge_query.substitution_parameters)
예제 #2
0
    def run_query(self, query: SQL.Query):
        sql_layer = SQL.Layer()
        sql_layer.connect()
        cursor = query.run(sql_layer=sql_layer)

        if cursor.description is None:
            response = flask.jsonify({
                'column_names': None,
                'rows': None,
                'row_count': cursor.rowcount,
            })
            sql_layer.commit()
            sql_layer.disconnect()
            return response

        column_names = [c.name for c in cursor.description]

        def generate():
            yield '{{"column_names": {column_names}, "rows": ['.format(
                column_names=self.to_json(column_names))
            prefix = ''
            while True:
                rows = cursor.fetchmany(size=1000)
                if not rows: break
                yield prefix + ', '.join([self.to_json(r) for r in rows])
                prefix = ', '
            row_count = cursor.rowcount
            sql_layer.commit()
            sql_layer.disconnect()
            yield '], "row_count": {row_count}}}'.format(
                row_count=self.to_json(row_count))

        return flask.Response(generate(), mimetype='application/json')
예제 #3
0
def run(identifier):
    body = api.valid_body_from_request(request=flask.request,
                                       schema=query_validation.post_run_schema)

    s3_options = api.s3_configuration
    s3_bucket = s3_options['bucket_name']
    query_text = body['query']
    query_hash = sha1(query_text.encode('utf-8')).hexdigest()
    path = s3_options['bucket_directory'] + f'/{identifier}-{query_hash}'
    results_url = f's3://{s3_bucket}/{path}'

    unload_query = _generate_unload_query(
        query_text=query_text,
        results_url=results_url,
        access_key_id=s3_options['access_key_id'],
        secret_access_key=s3_options['secret_access_key'])

    layer = SQL.Layer()
    layer.connect()
    unload_query.run(sql_layer=layer)
    layer.disconnect()

    region = s3_options['bucket_region']
    s3_key = f'{path}000.gz'
    unsigned_url = f'https://{s3_bucket}.s3.{region}.amazonaws.com/{s3_key}'

    s3 = boto3.resource('s3',
                        aws_access_key_id=s3_options['access_key_id'],
                        aws_secret_access_key=s3_options['secret_access_key'])
    s3_object = s3.Object(s3_bucket, s3_key)
    s3_object.copy_from(
        CopySource={
            'Bucket': s3_bucket,
            'Key': s3_key
        },
        ContentType='text/csv',
        ContentEncoding='gzip',
        ContentDisposition=f'attachment; filename="{identifier}.csv"',
        MetadataDirective='REPLACE')

    if api.debug:
        s3_client = boto3.client(
            's3',
            aws_access_key_id=s3_options['access_key_id'],
            aws_secret_access_key=s3_options['secret_access_key'])
        signed_url = s3_client.generate_presigned_url('get_object',
                                                      Params={
                                                          'Bucket': s3_bucket,
                                                          'Key': s3_key
                                                      },
                                                      ExpiresIn=3600)

    return flask.jsonify({
        's3_bucket': s3_bucket,
        'results_path': f'{path}000.gz',
        'unsigned_url': unsigned_url,
        **({
            'signed_url': signed_url
        } if api.debug else {}),
    })
예제 #4
0
def _generate_unload_query(query_text: str, results_url: str,
                           access_key_id: str,
                           secret_access_key: str) -> SQL.Query:
    access_query = SQL.Query(query=f'access_key_id %s secret_access_key %s',
                             substitution_parameters=(access_key_id,
                                                      secret_access_key))
    return SQL.Query(
        query=f'''
unload (%s)
to %s
{access_query.query}
parallel off
format as csv
allowoverwrite
gzip
header;
    ''',
        substitution_parameters=(query_text, results_url,
                                 *access_query.substitution_parameters))
예제 #5
0
def refresh_tags(schema: str):
    query = RefreshTagsQuery(schema=schema)
    sql_layer = SQL.Layer()
    sql_layer.connect()
    query.run(sql_layer=sql_layer)
    notices = sql_layer.connection.notices
    sql_layer.commit()
    sql_layer.disconnect()
    newline = '\n'
    log.log(f'Refreshed tags: {newline.join(notices)}'
            if notices else 'Refreshed tags.')
예제 #6
0
def delete_tag_entities_query(
        company_identifier: str, entity: Entity,
        entities_array: List[Dict[str, any]]) -> SQL.Query:
    rows = [[c['channel'], str(c[entity.id_column_name])]
            for c in entities_array]
    formatted_rows = ',\n'.join([SQL.Query.format_array(r) for r in rows])
    return SQL.Query(f'''
delete from {company_identifier}.{entity.target_tag_table_name}
where (channel, {entity.id_column_name}) in ({formatted_rows});
  ''',
                     substitution_parameters=tuple(
                         itertools.chain.from_iterable(rows)))
예제 #7
0
def update_cube_entity_tags_query(company_identifier: str,
                                  entity: Entity) -> SQL.Query:
    return SQL.Query(f'''
begin transaction;
update {company_identifier}.performance_cube_filtered
set {', '.join(f'{c} = null' for c in entity.tag_column_names)};

update {company_identifier}.performance_cube_filtered
set {', '.join(f'{c} = t.{c}' for c in entity.tag_column_names)}
from {company_identifier}.{entity.target_tag_table_name} as t
where {company_identifier}.performance_cube_filtered.channel = t.channel
and {company_identifier}.performance_cube_filtered.{entity.id_column_name} = t.{entity.id_column_name};
end transaction;
  ''')
예제 #8
0
def get_entities_query(company_identifier: str,
                       entity: Entity,
                       app_identifier: Optional[str] = None,
                       tag: Optional[str] = None) -> SQL.Query:
    query = f'select * from {company_identifier}.{entity.source_table_name}'
    conditional_keyword = 'where'

    if app_identifier is not None:
        query += f'\n{conditional_keyword} app_display_name = \'{app_identifier}\''
        conditional_keyword = 'and'
    if tag is not None:
        query += f'\n{conditional_keyword} {entity.value}_tag = \'{tag}\''
        conditional_keyword = 'and'

    return SQL.Query(query=query)
예제 #9
0
 def execute_query(self, query: SQL.Query) -> QueryResult:
     sql_layer = SQL.Layer()
     sql_layer.connect()
     cursor = query.run(sql_layer=sql_layer)
     if cursor.description is not None:
         column_names = [c.name for c in cursor.description]
         rows = cursor.fetchall()
         result = AlmacenAPI.QueryResult(row_count=cursor.rowcount,
                                         column_names=column_names,
                                         rows=rows)
     else:
         result = AlmacenAPI.QueryResult(row_count=cursor.rowcount)
     sql_layer.commit()
     sql_layer.disconnect()
     return result
예제 #10
0
def query():
    body = api.valid_body_from_request(request=flask.request,
                                       schema=query_schema)
    query = SQL.Query(
        query=body['query'],
        substitution_parameters=tuple(body['substitution_parameters'])
        if 'substitution_parameters' in body else ())
    try:
        response = api.run_query(query)
    except Exception as error:
        raise AlmacenAPI.Error(code=424,
                               message='Error running query.',
                               error=error)

    return response