Esempio n. 1
0
def format_labels(node: pipelines.Node):
    """Html markup that comma-separates labels of a node"""
    return ', '.join([
        str(_.span[label, ': ',
                   _.tt(style='white-space:nowrap')[value]])
        for label, value in node.labels.items()
    ]) or ''
Esempio n. 2
0
def oauth2_export_to_google_sheet():
    try:
        import google_auth_oauthlib.flow
        import googleapiclient.discovery
    except ImportError:
        return flask.make_response(
            str(_.tt(style='color:red')[
                    "Please install the packages 'google_auth_oauthlib' and 'google-api-python-client'"]),
            500)

    os.environ['OAUTHLIB_INSECURE_TRANSPORT'] = '1'

    from .query import Query
    query = Query.from_dict(json.loads(flask.request.form['query']))

    if current_user_has_permission(query):
        flask.session['query_for_google_sheet_callback'] = json.loads(flask.request.form['query'])  # flask.request.json

        # Authorization
        flow = google_auth_oauthlib.flow.Flow.from_client_config(
            config.google_sheet_oauth2_client_config(),
            scopes=SCOPES)

        # Indicate where the API server will redirect the user after the user completes
        # the authorization flow. Required.
        flow.redirect_uri = flask.url_for('mara_data_explorer.google_sheet_oauth2callback', _external=True)

        # Generate URL for request to Google's OAuth 2.0 server
        authorization_url, state = flow.authorization_url(
            # Enable offline access so that you can refresh an access token without
            # re-prompting the user for permission. Recommended for web server apps.
            access_type='offline',
            # Enable incremental authorization. Recommended as a best practice
            include_granted_scopes='true')

        # Store the state so the callback can verify the auth server response.
        flask.session['state'] = state
        # Store parameters for callback
        flask.session['decimal_mark'] = flask.request.form['decimal-mark']
        flask.session['array_format'] = flask.request.form['array-format']

        return flask.redirect(authorization_url)
    else:
        return flask.make_response(acl.inline_permission_denied_message(), 403)
Esempio n. 3
0
def google_sheet_oauth2callback():
    try:
        import google_auth_oauthlib.flow
        import googleapiclient.discovery
    except ImportError:
        return flask.make_response(
            str(_.tt(style='color:red')[
                    "Please install the packages 'google_auth_oauthlib' and 'google-api-python-client'"]),
            500)

    from .query import Query
    query = Query.from_dict(flask.session['query_for_google_sheet_callback'])

    # Specify the state when creating the flow in the callback so that it can
    # verified in the authorization server response.
    state = flask.session['state']

    flow = google_auth_oauthlib.flow.Flow.from_client_config(
        config.google_sheet_oauth2_client_config(),
        scopes=SCOPES,
        state=state)

    flow.redirect_uri = flask.url_for('mara_data_explorer.google_sheet_oauth2callback', _external=True)

    # Use the authorization server's response to fetch the OAuth 2.0 tokens.
    authorization_response = flask.request.url
    flow.fetch_token(authorization_response=authorization_response)

    spreadsheet_title = query.data_set_id + ('-' + query.query_id if query.query_id else '') \
                        + '-' + datetime.date.today().isoformat()

    decimal_mark = flask.session.pop('decimal_mark')
    array_format = flask.session.pop('array_format')

    credentials = flow.credentials
    service = googleapiclient.discovery.build('sheets', 'v4', credentials=credentials)
    spreadsheet_body = {
        'properties': {
            'title': spreadsheet_title,
            # Determine decimal-mark through the Google sheet locale
            'locale': 'de_DE' if decimal_mark == ',' else 'en_US'
        }
    }

    spreadsheet = service.spreadsheets().create(body=spreadsheet_body, fields='spreadsheetId')
    api_response = spreadsheet.execute()
    spreadsheet_id = api_response.get('spreadsheetId')

    # Upload data from generator in batches of 10K rows each. Each batch is a request
    # Api limits: https://developers.google.com/sheets/api/limits
    data_batch = []
    google_sheet_rows = query.as_rows_for_google_sheet(
        array_format=array_format,
        limit=100000,
        include_personal_data=acl.current_user_has_permission(personal_data_acl_resource))
    row_count = 0
    batch_length = 10000
    for row in google_sheet_rows:
        row_count += 1
        data_batch.append(row)
        if row_count % batch_length == 0:
            body = {
                "data": [
                    {
                        "values": data_batch,
                        "range": "A" + str(row_count - batch_length + 1),
                        "majorDimension": "ROWS"
                    }
                ],
                # USER_ENTERED: The values will be parsed as if the user typed them into the UI.
                # Numbers will stay as numbers, but strings may be converted to numbers, dates, etc.
                # RAW: The values the user has entered will not be parsed and will be stored as-is.
                "valueInputOption": "USER_ENTERED"
            }

            # Request of the current batch
            service.spreadsheets().values().batchUpdate(spreadsheetId=spreadsheet_id, body=body).execute()

            # Initialize next batch
            data_batch = []

    # Append remaining or less batch_length data if any
    if row_count < batch_length:
        body = {"data": [{"values": data_batch, "range": "A1", "majorDimension": "ROWS"}],
                "valueInputOption": "USER_ENTERED"}
        service.spreadsheets().values().batchUpdate(spreadsheetId=spreadsheet_id, body=body).execute()
    elif row_count % batch_length > 0:
        body = {"data": [{"values": data_batch, "range": "A" + str(row_count - len(data_batch) + 1),
                          "majorDimension": "ROWS"}],
                "valueInputOption": "USER_ENTERED"}
        service.spreadsheets().values().batchUpdate(spreadsheetId=spreadsheet_id, body=body).execute()

    return flask.redirect('https://docs.google.com/spreadsheets/d/' + str(spreadsheet_id))
def dependency_graph(nodes: {str: pipelines.Node},
                     current_node: pipelines.Node = None) -> str:
    """
    Draws a list of pipeline nodes and the dependencies between them using graphviz

    Args:
        nodes: The nodes to render
        current_node: If not null, then this node is highlighted

    Returns:
        An svg representation of the graph
    """
    import graphviz

    graph = graphviz.Digraph(graph_attr={
        'rankdir': 'TD',
        'ranksep': '0.25',
        'nodesep': '0.1'
    })

    # for finding redundant edges
    @functools.lru_cache(maxsize=None)
    def all_transitive_upstream_ids(node: pipelines.Node,
                                    first_level: bool = True):
        upstream_ids = set()
        for upstream in node.upstreams:
            if not first_level:
                upstream_ids.add(upstream.id)
            upstream_ids.update(all_transitive_upstream_ids(upstream, False))
        return upstream_ids

    for node in nodes.values():
        node_attributes = {
            'fontname': ' ',  # use website default
            'fontsize': '10.5px'  # fontsize unfortunately must be set
        }

        if node != current_node:
            node_attributes.update({
                'href': views.node_url(node),
                'fontcolor': '#0275d8',
                'tooltip': node.description,
                'color': 'transparent'
            })
        else:
            node_attributes.update({'color': '#888888', 'style': 'dotted'})

        if isinstance(node, pipelines.Pipeline):
            node_attributes.update({
                'shape': 'rectangle',
                'style': 'dotted',
                'color': '#888888'
            })
        elif isinstance(node, pipelines.ParallelTask):
            node_attributes.update({
                'shape': 'ellipse',
                'style': 'dotted',
                'color': '#888888'
            })
        else:
            node_attributes['shape'] = 'rectangle'

        graph.node(name=node.id,
                   label=node.id.replace('_', '\n'),
                   _attributes=node_attributes)

        for upstream in node.upstreams:
            if upstream.id in nodes:
                style = 'solid'
                color = '#888888'

                if upstream.id in all_transitive_upstream_ids(node):
                    style = 'dashed'
                    color = '#cccccc'
                graph.edge(upstream.id,
                           node.id,
                           _attributes={
                               'color': color,
                               'arrowsize': '0.7',
                               'style': style
                           })
            elif (not current_node) or node in current_node.upstreams:
                graph.node(name=f'{upstream.id}_{node.id}',
                           _attributes={
                               'style': 'invis',
                               'label': '',
                               'height': '0.1',
                               'fixedsize': 'true'
                           })
                graph.edge(f'{upstream.id}_{node.id}',
                           node.id,
                           _attributes={
                               'color': '#888888',
                               'arrowsize': '0.7',
                               'edgetooltip': upstream.id,
                               'style': 'dotted'
                           })

        for downstream in node.downstreams:
            if downstream.id not in nodes and (not current_node or node
                                               in current_node.downstreams):
                graph.node(name=f'{downstream.id}_{node.id}',
                           _attributes={
                               'style': 'invis',
                               'label': '',
                               'height': '0.1',
                               'fixedsize': 'true'
                           })
                graph.edge(node.id,
                           f'{downstream.id}_{node.id}',
                           _attributes={
                               'color': '#888888',
                               'arrowsize': '0.7',
                               'edgetooltip': downstream.id,
                               'style': 'dotted'
                           })

    try:
        return graph.pipe('svg').decode('utf-8')
    except graphviz.backend.ExecutableNotFound as e:
        return str(_.tt(style='color:red')[str(e)])
Esempio n. 5
0
def draw_schema(db_alias: str, schemas: str):
    """Shows a chart of the tables and FK relationships in a given database and schema list"""

    if db_alias not in config.databases():
        flask.abort(404, f'unkown database {db_alias}')

    schema_names = schemas.split('/')
    hide_columns = flask.request.args.get('hide-columns')
    engine = flask.request.args.get('engine', 'neato')

    tables, fk_constraints = extract_schema(db_alias, schema_names)

    import graphviz.backend

    graph = graphviz.Digraph(engine=engine,
                             graph_attr={
                                 'splines': 'True',
                                 'overlap': 'ortho'
                             })

    schema_colors = {}
    fk_pattern = re.compile(config.schema_ui_foreign_key_column_regex())
    for schema_name, table_name in sorted(tables):
        if schema_name not in schema_colors:
            colors = [
                '#ffffcc', '#bbffcc', '#cceeff', '#eedd99', '#ddee99',
                '#99ddff', '#dddddd'
            ]
            schema_colors[schema_name] = colors[len(schema_colors) %
                                                len(colors)]

        label = '< <TABLE BORDER="1" CELLBORDER="0" CELLSPACING="0" CELLPADDING="1" BGCOLOR="' \
                + schema_colors[schema_name] + '"><TR>'

        node_name = schema_name + '.' + table_name
        if hide_columns:
            label += '<TD ALIGN="LEFT"> ' + table_name.replace(
                '_', '<BR/>') + ' </TD></TR>'
        else:
            label += '<TD ALIGN="LEFT"><U><B> ' + table_name + ' </B></U></TD></TR>'
            for column in tables[(schema_name, table_name)]['columns']:
                label += '<TR><TD ALIGN="LEFT" > '
                if fk_pattern.match(column) \
                        and column not in tables[(schema_name, table_name)]['constrained-columns']:
                    label += '<B><I><FONT COLOR="#dd55dd"> ' + column + ' </FONT></I></B>'
                else:
                    label += column
                label += ' </TD></TR>'

        label += '</TABLE> >'

        graph.node(name=node_name,
                   label=label,
                   _attributes={
                       'fontname': 'Helvetica, Arial, sans-serif',
                       'fontsize': '10',
                       'fontcolor': '#555555',
                       'shape': 'none'
                   })

    for (schema_name, table_name), (referenced_schema_name,
                                    referenced_table_name) in fk_constraints:
        graph.edge(schema_name + '.' + table_name,
                   referenced_schema_name + '.' + referenced_table_name,
                   _attributes={'color': '#888888'})

    try:
        svg = graph.pipe('svg').decode('utf-8')
    except graphviz.backend.ExecutableNotFound as e:
        return str(_.tt(style='color:red')[str(e)])

    response = flask.Response(svg)
    response.headers[
        'Content-Disposition'] = f'attachment; filename="{datetime.date.today().isoformat()}-{db_alias}.svg"'
    return response