def explain_sdss_old(config, database, quiet=False, segments=None, dry=False, offset=0):
    """Explain queries and store the results in database
    """
    connection_string = "mssql+pymssql://%s:%s@%s:%s/%s?charset=UTF-8" % (
        config["user"],
        config["password"],
        config["server"],
        config["port"],
        config["db"],
    )

    db = sa.create_engine(connection_string, echo=(not quiet))

    if not offset:
        offset = 0

    if not segments:
        segments = [0, 1]

    # batch of queries
    batch = []

    # batch of expr ops
    ops_batch = []

    datasetdb = None
    table = None
    ops_table = None

    query = "SELECT * from distinctlogs WHERE id %% {} = {} OFFSET {}".format(segments[1], segments[0], offset)

    if database:
        datasetdb = dataset.connect(database)
        queries = datasetdb.query(query)
        datasetdb.query("truncate table expr_ops")
    else:
        queries = EXAMPLE

    errors = []
    if datasetdb:
        table = datasetdb["logs"]
        ops_table = datasetdb["expr_ops"]
    else:
        dry = True

    for i, query in enumerate(queries):
        with db.connect() as connection:
            # clean cache to refresh constant values
            connection.execute("DBCC FREEPROCCACHE WITH NO_INFOMSGS")

            connection.execute("set showplan_xml on")
            connection.execute("set noexec on")

            print "Process query index:", i,
            query = dict(query)
            print "query id:", query["id"]

            entry = None
            if not dry:
                entry = table.find_one(id=query["id"])

            if entry and "xml" in table.columns and entry["xml"] and len(entry["xml"]):
                xml_string = entry["xml"]
            else:
                print "get explain from sql server"
                try:
                    qu = query["query"].replace("[", '"').replace("]", '"')
                    qu = qu.replace("SET PARSEONLY ON ", "")
                    res = connection.execute(qu).fetchall()[0]
                except Exception as e:
                    errors.append(str(e))
                    print str(e)
                    print "==> execute error"
                    if "closed automatically" in str(e):
                        raise
                    continue

                xml_string = "".join([x for x in res])

                query["xml"] = xml_string

            tree = parse_xml.clean(xml_string)

            if not quiet:
                print "==> query:", query["query"]
                print

            # indent tree and export as xml file
            if not quiet:
                parse_xml.indent(tree.getroot())
                tree.write(sys.stdout)
            # tree.write('clean_{}.xml'.format(i))

            # get the simplified query plan as dictionary
            query_plans = parse_xml.get_query_plans(tree, cost=True, show_filters=True)
            if len(query_plans) == 0:
                errors.append("No query plan found")
                print "==> no query_plan"
                continue
            if len(query_plans) > 1:
                errors.append("Found two query plans")
                print "==> multiple query_plan"
                continue

            query_plan = query_plans[0]

            # ignore inserts
            if query_plan["operator"] == "Insert":
                assert len(query_plan["children"]) == 1
                query_plan = query_plan["children"][0]

            if not quiet:
                print utils.json_pretty(query_plan)
            query["plan"] = json.dumps(query_plan, cls=utils.SetEncoder, sort_keys=True)

            query["estimated_cost"] = query_plan["total"]
            query["has_plan"] = True

            # plan for uniqueness, clustering happening here
            simple_query_plan = parse_xml.get_query_plans(tree, cost=False, show_filters=True, consts=False)[0]

            # ignore inserts
            if simple_query_plan["operator"] == "Insert":
                assert len(simple_query_plan["children"]) == 1
                simple_query_plan = simple_query_plan["children"][0]

            query["simple_plan"] = json.dumps(simple_query_plan, cls=utils.SetEncoder, sort_keys=True)

            if not quiet:
                print utils.json_pretty(simple_query_plan)

            optree = [""]
            get_op_tree(simple_query_plan, optree)
            query["optree"] = optree[0]

            batch.append(query)

            if len(batch) > BATCH_SIZE and not dry:
                datasetdb.begin()
                for query in batch:
                    table.update(query, ["id"])
                datasetdb.commit()
                batch = []

            # get expressions
            for op in parse_xml.get_expression_operators(tree):
                op["query"] = query["id"]
                ops_batch.append(op)

            if len(ops_batch) > BATCH_SIZE and not dry:
                ops_table.insert_many(ops_batch)
                ops_batch = []

            connection.execute("set showplan_xml off")
            connection.execute("set noexec off")

    if not dry:
        datasetdb.begin()
        for query in batch:
            table.update(query, ["id"])
        datasetdb.commit()
        ops_table.insert_many(ops_batch)

    print "Errors", errors
def explain_tpch(config, database, quiet=False, dry=False):
    """Explain queries and store the results in database
    """
    connection_string = "mssql+pymssql://%s:%s@%s:%s/%s?charset=UTF-8" % (
        config["user"],
        config["password"],
        config["server"],
        config["port"],
        config["db"],
    )

    db = sa.create_engine(connection_string, echo=(not quiet))

    datasetdb = None
    table = None

    if database:
        datasetdb = dataset.connect(database)
        table = datasetdb["tpchqueries"]
        ops_table = datasetdb["tpch_expr_ops"]
        datasetdb.query("truncate table tpch_expr_ops")
    else:
        dry = True

    errors = []

    for i, query in enumerate(tpchqueries.get_queries()):
        with db.connect() as connection:
            # clean cache to refresh constant values
            connection.execute("DBCC FREEPROCCACHE WITH NO_INFOMSGS")
            connection.execute("set showplan_xml on")

            print "Explain query", i

            import timeit

            start_time = timeit.default_timer()
            try:
                res = connection.execute(query["query"]).fetchall()[0]
            except Exception as e:
                errors.append(str(e))
                print str(e)
                print "==> execute error"
                if "closed automatically" in str(e):
                    raise
                continue

            query["elapsed"] = timeit.default_timer() - start_time

            xml_string = "".join([x for x in res])
            tree = parse_xml.clean(xml_string)

            if not quiet:
                print "==> query:", query["query"]
                print

            # indent tree and export as xml file
            if not quiet:
                parse_xml.indent(tree.getroot())
                tree.write(sys.stdout)
            # tree.write('clean_{}.xml'.format(i))

            # get the simplified query plan as dictionary
            query_plans = parse_xml.get_query_plans(tree, cost=True, show_filters=True)
            if len(query_plans) == 0:
                errors.append("No query plan found")
                print "==> no query_plan"
                continue
            if len(query_plans) > 1:
                errors.append("Found two query plans")
                print "==> multiple query_plan"
                continue

            query_plan = query_plans[0]

            if not quiet:
                print utils.json_pretty(query_plan)
            query["plan"] = json.dumps(query_plan, cls=utils.SetEncoder, sort_keys=True)

            query["estimated_cost"] = query_plan["total"]

            # plan for uniqueness, clustering happening here
            simple_query_plan = parse_xml.get_query_plans(tree, cost=False, show_filters=True, consts=False)[0]

            if not quiet:
                print utils.json_pretty(simple_query_plan)

            query["simple_plan"] = json.dumps(simple_query_plan, cls=utils.SetEncoder, sort_keys=True)

            optree = [""]
            get_op_tree(simple_query_plan, optree)
            query["optree"] = optree[0]

            if not dry:
                table.upsert(query, ["id"])

            # get expressions
            for op in parse_xml.get_expression_operators(tree):
                op["query"] = query["id"]
                op["tpch_query"] = query["tp_query"]
                ops_table.insert(op)

            connection.execute("set showplan_xml off")
            connection.execute("set noexec off")

    print "Errors", errors
def explain_sdss(database, quiet=False, segments=None, dry=False, offset=0):
    # batch of queries
    batch = []

    # batch of expr ops
    ops_batch = []

    datasetdb = None
    table = None
    ops_table = None

    query = "SELECT * from everything"

    if database:
        datasetdb = dataset.connect(database)
    else:
        queries = EXAMPLE

    if datasetdb:
        table = datasetdb["everything"]
        ops_table = datasetdb["expr_ops"]
        queries = datasetdb.query(query)
    else:
        dry = True

    errors = []

    for i, query in enumerate(queries):
        # print "Process query index:", i
        query = dict(query)
        # print "query id:", query['id']

        entry = None
        if not dry:
            entry = table.find_one(id=query["id"])

        if entry and "xml" in table.columns and entry["xml"] and len(entry["xml"]):
            xml_string = entry["xml"]
            xml_string = xml_string.strip()[1:-1]
            xml_string = xml_string.replace('""', '"')

        tree = parse_xml.clean(xml_string)

        if not quiet:
            print "==> query:", query["query"]
            print

        # indent tree and export as xml file
        if not quiet:
            parse_xml.indent(tree.getroot())
            tree.write(sys.stdout)
        # tree.write('clean_{}.xml'.format(i))

        # get the simplified query plan as dictionary
        query_plans = parse_xml.get_query_plans(tree, cost=True, show_filters=True)
        if len(query_plans) == 0:
            errors.append("No query plan found")
            print "==> no query_plan"
            continue
        if len(query_plans) > 1:
            errors.append("Found two query plans")
            print "==> multiple query_plan"
            continue

        query_plan = query_plans[0]

        # ignore inserts
        if query_plan["operator"] == "Insert":
            assert len(query_plan["children"]) == 1
            query_plan = query_plan["children"][0]

        # if not quiet:
        #     print utils.json_pretty(query_plan)
        query["plan"] = json.dumps(query_plan, cls=utils.SetEncoder, sort_keys=True)

        query["estimated_cost"] = query_plan["total"]
        query["has_plan"] = True

        # plan for uniqueness, clustering happening here
        simple_query_plan = parse_xml.get_query_plans(tree, cost=False, show_filters=True, consts=False)[0]

        # ignore inserts
        if simple_query_plan["operator"] == "Insert":
            assert len(simple_query_plan["children"]) == 1
            simple_query_plan = simple_query_plan["children"][0]

        query["simple_plan"] = json.dumps(simple_query_plan, cls=utils.SetEncoder, sort_keys=True)

        if not quiet:
            print utils.json_pretty(simple_query_plan)

        optree = [""]
        get_op_tree(simple_query_plan, optree)
        query["optree"] = optree[0]

        batch.append(query)

        if len(batch) > BATCH_SIZE and not dry:
            datasetdb.begin()
            for query in batch:
                table.update(query, ["id"])
            datasetdb.commit()
            batch = []

        # get expressions
        for op in parse_xml.get_expression_operators(tree):
            op["query"] = query["id"]
            ops_batch.append(op)

        if len(ops_batch) > BATCH_SIZE and not dry:
            ops_table.insert_many(ops_batch)
            ops_batch = []

    if not dry:
        datasetdb.begin()
        for query in batch:
            table.update(query, ["id"])
        datasetdb.commit()
        ops_table.insert_many(ops_batch)

    print "Errors", errors
def explain_sdss_old(config,
                     database,
                     quiet=False,
                     segments=None,
                     dry=False,
                     offset=0):
    """Explain queries and store the results in database
    """
    connection_string = 'mssql+pymssql://%s:%s@%s:%s/%s?charset=UTF-8' % (
        config['user'], config['password'], config['server'], config['port'],
        config['db'])

    db = sa.create_engine(connection_string, echo=(not quiet))

    if not offset:
        offset = 0

    if not segments:
        segments = [0, 1]

    # batch of queries
    batch = []

    # batch of expr ops
    ops_batch = []

    datasetdb = None
    table = None
    ops_table = None

    query = "SELECT * from distinctlogs WHERE id %% {} = {} OFFSET {}".format(
        segments[1], segments[0], offset)

    if database:
        datasetdb = dataset.connect(database)
        queries = datasetdb.query(query)
        datasetdb.query("truncate table expr_ops")
    else:
        queries = EXAMPLE

    errors = []
    if datasetdb:
        table = datasetdb['logs']
        ops_table = datasetdb['expr_ops']
    else:
        dry = True

    for i, query in enumerate(queries):
        with db.connect() as connection:
            # clean cache to refresh constant values
            connection.execute('DBCC FREEPROCCACHE WITH NO_INFOMSGS')

            connection.execute('set showplan_xml on')
            connection.execute('set noexec on')

            print "Process query index:", i,
            query = dict(query)
            print "query id:", query['id']

            entry = None
            if not dry:
                entry = table.find_one(id=query['id'])

            if entry and 'xml' in table.columns and entry['xml'] and len(
                    entry['xml']):
                xml_string = entry['xml']
            else:
                print "get explain from sql server"
                try:
                    qu = query['query'].replace('[', '"').replace(']', '"')
                    qu = qu.replace('SET PARSEONLY ON ', '')
                    res = connection.execute(qu).fetchall()[0]
                except Exception as e:
                    errors.append(str(e))
                    print str(e)
                    print '==> execute error'
                    if 'closed automatically' in str(e):
                        raise
                    continue

                xml_string = "".join([x for x in res])

                query['xml'] = xml_string

            tree = parse_xml.clean(xml_string)

            if not quiet:
                print "==> query:", query['query']
                print

            # indent tree and export as xml file
            if not quiet:
                parse_xml.indent(tree.getroot())
                tree.write(sys.stdout)
            # tree.write('clean_{}.xml'.format(i))

            # get the simplified query plan as dictionary
            query_plans = parse_xml.get_query_plans(tree,
                                                    cost=True,
                                                    show_filters=True)
            if len(query_plans) == 0:
                errors.append("No query plan found")
                print '==> no query_plan'
                continue
            if len(query_plans) > 1:
                errors.append("Found two query plans")
                print '==> multiple query_plan'
                continue

            query_plan = query_plans[0]

            # ignore inserts
            if query_plan['operator'] == 'Insert':
                assert len(query_plan['children']) == 1
                query_plan = query_plan['children'][0]

            if not quiet:
                print utils.json_pretty(query_plan)
            query['plan'] = json.dumps(query_plan,
                                       cls=utils.SetEncoder,
                                       sort_keys=True)

            query['estimated_cost'] = query_plan['total']
            query['has_plan'] = True

            # plan for uniqueness, clustering happening here
            simple_query_plan = parse_xml.get_query_plans(tree,
                                                          cost=False,
                                                          show_filters=True,
                                                          consts=False)[0]

            # ignore inserts
            if simple_query_plan['operator'] == 'Insert':
                assert len(simple_query_plan['children']) == 1
                simple_query_plan = simple_query_plan['children'][0]

            query['simple_plan'] = json.dumps(simple_query_plan,
                                              cls=utils.SetEncoder,
                                              sort_keys=True)

            if not quiet:
                print utils.json_pretty(simple_query_plan)

            optree = ['']
            get_op_tree(simple_query_plan, optree)
            query['optree'] = optree[0]

            batch.append(query)

            if len(batch) > BATCH_SIZE and not dry:
                datasetdb.begin()
                for query in batch:
                    table.update(query, ['id'])
                datasetdb.commit()
                batch = []

            # get expressions
            for op in parse_xml.get_expression_operators(tree):
                op['query'] = query['id']
                ops_batch.append(op)

            if len(ops_batch) > BATCH_SIZE and not dry:
                ops_table.insert_many(ops_batch)
                ops_batch = []

            connection.execute('set showplan_xml off')
            connection.execute('set noexec off')

    if not dry:
        datasetdb.begin()
        for query in batch:
            table.update(query, ['id'])
        datasetdb.commit()
        ops_table.insert_many(ops_batch)

    print "Errors", errors
def explain_tpch(config, database, quiet=False, dry=False):
    """Explain queries and store the results in database
    """
    connection_string = 'mssql+pymssql://%s:%s@%s:%s/%s?charset=UTF-8' % (
        config['user'], config['password'], config['server'], config['port'],
        config['db'])

    db = sa.create_engine(connection_string, echo=(not quiet))

    datasetdb = None
    table = None

    if database:
        datasetdb = dataset.connect(database)
        table = datasetdb['tpchqueries']
        ops_table = datasetdb['tpch_expr_ops']
        datasetdb.query("truncate table tpch_expr_ops")
    else:
        dry = True

    errors = []

    for i, query in enumerate(tpchqueries.get_queries()):
        with db.connect() as connection:
            # clean cache to refresh constant values
            connection.execute('DBCC FREEPROCCACHE WITH NO_INFOMSGS')
            connection.execute('set showplan_xml on')

            print "Explain query", i

            import timeit

            start_time = timeit.default_timer()
            try:
                res = connection.execute(query['query']).fetchall()[0]
            except Exception as e:
                errors.append(str(e))
                print str(e)
                print '==> execute error'
                if 'closed automatically' in str(e):
                    raise
                continue

            query['elapsed'] = timeit.default_timer() - start_time

            xml_string = "".join([x for x in res])
            tree = parse_xml.clean(xml_string)

            if not quiet:
                print "==> query:", query['query']
                print

            # indent tree and export as xml file
            if not quiet:
                parse_xml.indent(tree.getroot())
                tree.write(sys.stdout)
            # tree.write('clean_{}.xml'.format(i))

            # get the simplified query plan as dictionary
            query_plans = parse_xml.get_query_plans(tree,
                                                    cost=True,
                                                    show_filters=True)
            if len(query_plans) == 0:
                errors.append("No query plan found")
                print '==> no query_plan'
                continue
            if len(query_plans) > 1:
                errors.append("Found two query plans")
                print '==> multiple query_plan'
                continue

            query_plan = query_plans[0]

            if not quiet:
                print utils.json_pretty(query_plan)
            query['plan'] = json.dumps(query_plan,
                                       cls=utils.SetEncoder,
                                       sort_keys=True)

            query['estimated_cost'] = query_plan['total']

            # plan for uniqueness, clustering happening here
            simple_query_plan = parse_xml.get_query_plans(tree,
                                                          cost=False,
                                                          show_filters=True,
                                                          consts=False)[0]

            if not quiet:
                print utils.json_pretty(simple_query_plan)

            query['simple_plan'] = json.dumps(simple_query_plan,
                                              cls=utils.SetEncoder,
                                              sort_keys=True)

            optree = ['']
            get_op_tree(simple_query_plan, optree)
            query['optree'] = optree[0]

            if not dry:
                table.upsert(query, ['id'])

            # get expressions
            for op in parse_xml.get_expression_operators(tree):
                op['query'] = query['id']
                op['tpch_query'] = query['tp_query']
                ops_table.insert(op)

            connection.execute('set showplan_xml off')
            connection.execute('set noexec off')

    print "Errors", errors
def explain_sdss(database, quiet=False, segments=None, dry=False, offset=0):
    # batch of queries
    batch = []

    # batch of expr ops
    ops_batch = []

    datasetdb = None
    table = None
    ops_table = None

    query = "SELECT * from everything"

    if database:
        datasetdb = dataset.connect(database)
    else:
        queries = EXAMPLE

    if datasetdb:
        table = datasetdb['everything']
        ops_table = datasetdb['expr_ops']
        queries = datasetdb.query(query)
    else:
        dry = True

    errors = []

    for i, query in enumerate(queries):
        # print "Process query index:", i
        query = dict(query)
        # print "query id:", query['id']

        entry = None
        if not dry:
            entry = table.find_one(id=query['id'])

        if entry and 'xml' in table.columns and entry['xml'] and len(
                entry['xml']):
            xml_string = entry['xml']
            xml_string = xml_string.strip()[1:-1]
            xml_string = xml_string.replace('""', '"')

        tree = parse_xml.clean(xml_string)

        if not quiet:
            print "==> query:", query['query']
            print

        # indent tree and export as xml file
        if not quiet:
            parse_xml.indent(tree.getroot())
            tree.write(sys.stdout)
        # tree.write('clean_{}.xml'.format(i))

        # get the simplified query plan as dictionary
        query_plans = parse_xml.get_query_plans(tree,
                                                cost=True,
                                                show_filters=True)
        if len(query_plans) == 0:
            errors.append("No query plan found")
            print '==> no query_plan'
            continue
        if len(query_plans) > 1:
            errors.append("Found two query plans")
            print '==> multiple query_plan'
            continue

        query_plan = query_plans[0]

        # ignore inserts
        if query_plan['operator'] == 'Insert':
            assert len(query_plan['children']) == 1
            query_plan = query_plan['children'][0]

        # if not quiet:
        #     print utils.json_pretty(query_plan)
        query['plan'] = json.dumps(query_plan,
                                   cls=utils.SetEncoder,
                                   sort_keys=True)

        query['estimated_cost'] = query_plan['total']
        query['has_plan'] = True

        # plan for uniqueness, clustering happening here
        simple_query_plan = parse_xml.get_query_plans(tree,
                                                      cost=False,
                                                      show_filters=True,
                                                      consts=False)[0]

        # ignore inserts
        if simple_query_plan['operator'] == 'Insert':
            assert len(simple_query_plan['children']) == 1
            simple_query_plan = simple_query_plan['children'][0]

        query['simple_plan'] = json.dumps(simple_query_plan,
                                          cls=utils.SetEncoder,
                                          sort_keys=True)

        if not quiet:
            print utils.json_pretty(simple_query_plan)

        optree = ['']
        get_op_tree(simple_query_plan, optree)
        query['optree'] = optree[0]

        batch.append(query)

        if len(batch) > BATCH_SIZE and not dry:
            datasetdb.begin()
            for query in batch:
                table.update(query, ['id'])
            datasetdb.commit()
            batch = []

        # get expressions
        for op in parse_xml.get_expression_operators(tree):
            op['query'] = query['id']
            ops_batch.append(op)

        if len(ops_batch) > BATCH_SIZE and not dry:
            ops_table.insert_many(ops_batch)
            ops_batch = []

    if not dry:
        datasetdb.begin()
        for query in batch:
            table.update(query, ['id'])
        datasetdb.commit()
        ops_table.insert_many(ops_batch)

    print "Errors", errors