Exemple #1
0
def query_cell_to_es(query_cell, session=None):
    query_cell_id = query_cell.id
    query_cell_meta = query_cell.meta

    engine_id = query_cell_meta.get("engine")
    engine = get_query_engine_by_id(engine_id, session=session)

    query = query_cell.context
    table_names, _ = process_query(query,
                                   language=(engine and engine.language))
    table_names = list(chain.from_iterable(table_names))

    datadoc = query_cell.doc

    expand_query = {
        "id": query_cell_id,
        "query_type": "query_cell",
        "title": query_cell_meta.get("title", "Untitled"),
        "data_doc_id": datadoc and datadoc.id,
        "environment_id": datadoc and datadoc.environment_id,
        "author_uid": datadoc and datadoc.owner_uid,
        "engine_id": engine_id,
        "statement_type": get_table_statement_type(query),
        "created_at": DATETIME_TO_UTC(query_cell.created_at),
        "full_table_name": table_names,
        "query_text": query,
    }
    return expand_query
Exemple #2
0
def query_execution_to_es(query_execution, data_cell=None, session=None):
    """data_cell is added as a parameter so that bulk insert of query executions won't require
    re-retrieval of data_cell"""
    query_execution_id = query_execution.id

    engine_id = query_execution.engine_id
    engine = get_query_engine_by_id(engine_id, session=session)

    table_names, _ = process_query(query_execution.query,
                                   language=(engine and engine.language))
    table_names = list(chain.from_iterable(table_names))

    duration = (DATETIME_TO_UTC(query_execution.completed_at) -
                DATETIME_TO_UTC(query_execution.created_at)
                if query_execution.completed_at is not None else None)

    environments = engine.environments
    environment_ids = [env.id for env in environments]

    title = data_cell.meta.get("title", "Untitled") if data_cell else None

    expand_query_execution = {
        "id": query_execution_id,
        "query_type": "query_execution",
        "title": title,
        "environment_id": environment_ids,
        "author_uid": query_execution.uid,
        "engine_id": engine_id,
        "statement_type": get_table_statement_type(query_execution.query),
        "created_at": DATETIME_TO_UTC(query_execution.created_at),
        "duration": duration,
        "full_table_name": table_names,
        "query_text": query_execution.query,
    }
    return expand_query_execution
Exemple #3
0
 def test_special_cases(self):
     raw_query = """
         set hive.memory = 110G;
         with test as (
             select * from wasd
         )
         insert into b select * from test;
         explain select * from test;
     """
     self.assertSequenceEqual(get_table_statement_type(raw_query),
                              [None, "INSERT", None])
Exemple #4
0
def query_execution_to_es(query_execution,
                          data_cell=None,
                          fields=None,
                          session=None):
    """data_cell is added as a parameter so that bulk insert of query executions won't require
    re-retrieval of data_cell"""
    engine_id = query_execution.engine_id
    engine = admin_logic.get_query_engine_by_id(engine_id, session=session)
    datadoc = data_cell.doc if data_cell else None

    def get_duration():
        return (DATETIME_TO_UTC(query_execution.completed_at) -
                DATETIME_TO_UTC(query_execution.created_at)
                if query_execution.completed_at is not None else None)

    field_to_getter = {
        "id":
        query_execution.id,
        "query_type":
        "query_execution",
        "title":
        data_cell.meta.get("title", "Untitled") if data_cell else None,
        "environment_id": [env.id for env in engine.environments],
        "author_uid":
        query_execution.uid,
        "engine_id":
        engine_id,
        "statement_type":
        lambda: get_table_statement_type(query_execution.query),
        "created_at":
        lambda: DATETIME_TO_UTC(query_execution.created_at),
        "duration":
        get_duration,
        "full_table_name":
        lambda: _get_table_names_from_query(
            query_execution.query, language=(engine and engine.language)),
        "query_text":
        query_execution.query,
        "public":
        datadoc is None or datadoc.public,
        "readable_user_ids":
        lambda: _get_datadoc_editors(datadoc, session=session),
    }

    return _get_dict_by_field(field_to_getter, fields=fields)
Exemple #5
0
    def test_simple_statements(self):
        raw_query = """
            select * from test;
            create database test;
            use test;
            create table test.test (
                id int(11) not null,
                PRIMARY KEY (id)
            );
            alter table test add column username varchart(255);
            update table test.test set username = '******';
            drop table test;
            drop database test;
        """

        self.assertSequenceEqual(
            get_table_statement_type(raw_query),
            ["SELECT", None, None, "CREATE", "ALTER", "UPDATE", "DROP", None],
        )
Exemple #6
0
def query_cell_to_es(query_cell, fields=None, session=None):
    query_cell_meta = query_cell.meta
    query = query_cell.context
    datadoc = query_cell.doc

    engine_id = query_cell_meta.get("engine")
    engine = admin_logic.get_query_engine_by_id(engine_id, session=session)

    field_to_getter = {
        "id":
        query_cell.id,
        "query_type":
        "query_cell",
        "title":
        query_cell_meta.get("title", "Untitled"),
        "data_doc_id":
        datadoc and datadoc.id,
        "environment_id":
        datadoc and datadoc.environment_id,
        "author_uid":
        datadoc and datadoc.owner_uid,
        "engine_id":
        engine_id,
        "statement_type":
        lambda: get_table_statement_type(query),
        "created_at":
        lambda: DATETIME_TO_UTC(query_cell.created_at),
        "full_table_name":
        lambda: _get_table_names_from_query(
            query, language=(engine and engine.language)),
        "query_text":
        query,
        "public":
        datadoc is not None and datadoc.public,
        "readable_user_ids":
        lambda: _get_datadoc_editors(datadoc, session=session),
    }

    return _get_dict_by_field(field_to_getter, fields=fields)
def log_query_per_table_task(self, query_execution_id):
    with DBSession() as session:
        query_execution = qe_logic.get_query_execution_by_id(
            query_execution_id, session=session)
        assert query_execution.status == QueryExecutionStatus.DONE
        metastore_id = query_execution.engine.metastore_id
        if metastore_id is None:
            # This query engine has no metastore configured
            return

        statement_types = get_table_statement_type(query_execution.query)
        table_per_statement, _ = process_query(query_execution.query,
                                               query_execution.engine.language)

        sync_table_to_metastore(table_per_statement,
                                statement_types,
                                metastore_id,
                                session=session)

        datadoc_cell = next(iter(query_execution.cells), None)
        if any(statement in statement_types
               for statement in ["CREATE", "INSERT"]):
            create_lineage_from_query(query_execution,
                                      metastore_id,
                                      datadoc_cell,
                                      session=session)
        if datadoc_cell is None or not datadoc_cell.doc.public:
            return

        log_table_per_statement(
            table_per_statement,
            statement_types,
            query_execution_id,
            metastore_id,
            datadoc_cell.id,
            session=session,
        )