def primary_tag_entities_query(company_identifier: str, entity: Entity, entity_array: List[Dict[str, any]], tag: str) -> SQL.Query: upload_group = 'almacen_api {date}'.format( date=datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')) rows = [[ company_identifier, c['app'], c['channel'], str(c[entity.id_column_name]), tag, upload_group ] for c in entity_array] format_rows = ',\n'.join([SQL.Query.format_array(r) for r in rows]) merge_query = SQL.MergeQuery( join_columns=['channel', entity.id_column_name], update_columns=[entity.primary_tag_column_name, 'upload_group'], source_table=entity.temp_tag_table_name, target_table=entity.target_tag_table_name, source_schema=None, target_schema=company_identifier) return SQL.Query( f''' create temp table {entity.temp_tag_table_name} (like {company_identifier}.{entity.target_tag_table_name}); insert into {entity.temp_tag_table_name} (company_identifier, app, channel, {entity.id_column_name}, {entity.primary_tag_column_name}, upload_group) values {format_rows}; {merge_query.query}; drop table {entity.temp_tag_table_name}; ''', substitution_parameters=tuple(itertools.chain.from_iterable(rows)) + merge_query.substitution_parameters)
def run_query(self, query: SQL.Query): sql_layer = SQL.Layer() sql_layer.connect() cursor = query.run(sql_layer=sql_layer) if cursor.description is None: response = flask.jsonify({ 'column_names': None, 'rows': None, 'row_count': cursor.rowcount, }) sql_layer.commit() sql_layer.disconnect() return response column_names = [c.name for c in cursor.description] def generate(): yield '{{"column_names": {column_names}, "rows": ['.format( column_names=self.to_json(column_names)) prefix = '' while True: rows = cursor.fetchmany(size=1000) if not rows: break yield prefix + ', '.join([self.to_json(r) for r in rows]) prefix = ', ' row_count = cursor.rowcount sql_layer.commit() sql_layer.disconnect() yield '], "row_count": {row_count}}}'.format( row_count=self.to_json(row_count)) return flask.Response(generate(), mimetype='application/json')
def run(identifier): body = api.valid_body_from_request(request=flask.request, schema=query_validation.post_run_schema) s3_options = api.s3_configuration s3_bucket = s3_options['bucket_name'] query_text = body['query'] query_hash = sha1(query_text.encode('utf-8')).hexdigest() path = s3_options['bucket_directory'] + f'/{identifier}-{query_hash}' results_url = f's3://{s3_bucket}/{path}' unload_query = _generate_unload_query( query_text=query_text, results_url=results_url, access_key_id=s3_options['access_key_id'], secret_access_key=s3_options['secret_access_key']) layer = SQL.Layer() layer.connect() unload_query.run(sql_layer=layer) layer.disconnect() region = s3_options['bucket_region'] s3_key = f'{path}000.gz' unsigned_url = f'https://{s3_bucket}.s3.{region}.amazonaws.com/{s3_key}' s3 = boto3.resource('s3', aws_access_key_id=s3_options['access_key_id'], aws_secret_access_key=s3_options['secret_access_key']) s3_object = s3.Object(s3_bucket, s3_key) s3_object.copy_from( CopySource={ 'Bucket': s3_bucket, 'Key': s3_key }, ContentType='text/csv', ContentEncoding='gzip', ContentDisposition=f'attachment; filename="{identifier}.csv"', MetadataDirective='REPLACE') if api.debug: s3_client = boto3.client( 's3', aws_access_key_id=s3_options['access_key_id'], aws_secret_access_key=s3_options['secret_access_key']) signed_url = s3_client.generate_presigned_url('get_object', Params={ 'Bucket': s3_bucket, 'Key': s3_key }, ExpiresIn=3600) return flask.jsonify({ 's3_bucket': s3_bucket, 'results_path': f'{path}000.gz', 'unsigned_url': unsigned_url, **({ 'signed_url': signed_url } if api.debug else {}), })
def _generate_unload_query(query_text: str, results_url: str, access_key_id: str, secret_access_key: str) -> SQL.Query: access_query = SQL.Query(query=f'access_key_id %s secret_access_key %s', substitution_parameters=(access_key_id, secret_access_key)) return SQL.Query( query=f''' unload (%s) to %s {access_query.query} parallel off format as csv allowoverwrite gzip header; ''', substitution_parameters=(query_text, results_url, *access_query.substitution_parameters))
def refresh_tags(schema: str): query = RefreshTagsQuery(schema=schema) sql_layer = SQL.Layer() sql_layer.connect() query.run(sql_layer=sql_layer) notices = sql_layer.connection.notices sql_layer.commit() sql_layer.disconnect() newline = '\n' log.log(f'Refreshed tags: {newline.join(notices)}' if notices else 'Refreshed tags.')
def delete_tag_entities_query( company_identifier: str, entity: Entity, entities_array: List[Dict[str, any]]) -> SQL.Query: rows = [[c['channel'], str(c[entity.id_column_name])] for c in entities_array] formatted_rows = ',\n'.join([SQL.Query.format_array(r) for r in rows]) return SQL.Query(f''' delete from {company_identifier}.{entity.target_tag_table_name} where (channel, {entity.id_column_name}) in ({formatted_rows}); ''', substitution_parameters=tuple( itertools.chain.from_iterable(rows)))
def update_cube_entity_tags_query(company_identifier: str, entity: Entity) -> SQL.Query: return SQL.Query(f''' begin transaction; update {company_identifier}.performance_cube_filtered set {', '.join(f'{c} = null' for c in entity.tag_column_names)}; update {company_identifier}.performance_cube_filtered set {', '.join(f'{c} = t.{c}' for c in entity.tag_column_names)} from {company_identifier}.{entity.target_tag_table_name} as t where {company_identifier}.performance_cube_filtered.channel = t.channel and {company_identifier}.performance_cube_filtered.{entity.id_column_name} = t.{entity.id_column_name}; end transaction; ''')
def get_entities_query(company_identifier: str, entity: Entity, app_identifier: Optional[str] = None, tag: Optional[str] = None) -> SQL.Query: query = f'select * from {company_identifier}.{entity.source_table_name}' conditional_keyword = 'where' if app_identifier is not None: query += f'\n{conditional_keyword} app_display_name = \'{app_identifier}\'' conditional_keyword = 'and' if tag is not None: query += f'\n{conditional_keyword} {entity.value}_tag = \'{tag}\'' conditional_keyword = 'and' return SQL.Query(query=query)
def execute_query(self, query: SQL.Query) -> QueryResult: sql_layer = SQL.Layer() sql_layer.connect() cursor = query.run(sql_layer=sql_layer) if cursor.description is not None: column_names = [c.name for c in cursor.description] rows = cursor.fetchall() result = AlmacenAPI.QueryResult(row_count=cursor.rowcount, column_names=column_names, rows=rows) else: result = AlmacenAPI.QueryResult(row_count=cursor.rowcount) sql_layer.commit() sql_layer.disconnect() return result
def query(): body = api.valid_body_from_request(request=flask.request, schema=query_schema) query = SQL.Query( query=body['query'], substitution_parameters=tuple(body['substitution_parameters']) if 'substitution_parameters' in body else ()) try: response = api.run_query(query) except Exception as error: raise AlmacenAPI.Error(code=424, message='Error running query.', error=error) return response