class TestPydrill(unittest.TestCase):
    def setUp(self):
        self.drill = PyDrill(host='localhost', port=8047)

    def test_transport_host(self):
        assert self.drill.transport.host == 'localhost'

    def test_transport_port(self):
        assert self.drill.transport.port == 8047

    @responses.activate
    def test_is_active(self):
        responses.add(**{
            'method': responses.HEAD,
            'url': 'http://localhost:8047/',
            'status': 200,
            'content_type': 'application/json',
        })
        assert self.drill.is_active() == True

    @responses.activate
    def test_is_not_active_404(self):
        responses.add(**{
            'method': responses.HEAD,
            'url': 'http://localhost:8047/',
            'content_type': 'application/json',
            'status': 404,
        })
        assert self.drill.is_active() == False

    @responses.activate
    def test_is_not_active_500(self):
        responses.add(**{
            'method': responses.HEAD,
            'url': 'http://localhost:8047/',
            'content_type': 'application/json',
            'status': 500,
        })
        assert self.drill.is_active() == False

    @responses.activate
    def test_is_not_active_timeout(self):
        responses.add(**{
            'method': responses.HEAD,
            'url': 'http://localhost:8047/',
            'content_type': 'application/json',
            'status': 500,
        })
        try:
            self.drill.perform_request('HEAD', '/', params={'request_timeout': 0})
        except TransportError as e:
            assert e.status_code == e.args[0]
            assert e.error == e.args[1]
            assert e.info == e.args[2]
            assert str(e)
        else:
            assert False
예제 #2
0
파일: __main__.py 프로젝트: xav-b/datacli
def cli(dbname, host, port, timeout, interactive, version):
    """Cli entry point."""
    if version or dbname == 'version':
        print('datacli version: {}'.format(__version__))
        sys.exit(0)

    configure_logger()

    conn = PyDrill(host=host, port=port)
    if not conn.is_active():
        log.error('unable to reach Drill server')
        return 1

    cli = DataCli(conn, dbname, DataPrompt(), timeout=timeout)

    log.info('connected to Drillbit')
    while True:
        try:
            should_exit = cli.repl(interactive)
            if should_exit:
                break
        except KeyboardInterrupt:
            break  # Control-C pressed
        except EOFError:
            break  # Control-D pressed

    log.info('shutting down...')
    return 0
예제 #3
0
파일: drill.py 프로젝트: moritz9/redash
    def run_query(self, query, user):
        drillbit_host, drillbit_port = self.get_drillbit(
            self.configuration.get('host', None),
            self.configuration.get('port', None),
            self.configuration.get('zookeeper_path', None))

        user_auth = self.configuration.get('user_auth', None)
        if user_auth:
            session = requests.Session()  # Create a session object
            username, password = user_auth.split(':')
            if not self.auth_drill(session, drillbit_host, drillbit_port,
                                  username, password):
                json_data = None
                error = 'Invalid credentials for Drill'
                return json_data, error
            connection = PyDrill(host=drillbit_host, port=drillbit_port,
                                 connection_class=DrillRequestsHttpConnection,
                                 drill_session=session)
        else:
            connection = PyDrill(host=drillbit_host, port=drillbit_port)

        if not connection.is_active():
            json_data = None
            error = 'Please run Drill first'
            return json_data, error

        annotation = self.get_annotation(query)

        try:
            result = None
            for q in self.strip_comments(query).split(';'):
                q = q.strip()
                if not q:
                    continue
                q = self.magic_helpers(q)
                q = annotation + q
                result = connection.query(q, timeout=600)
                print(result.rows)
                logger.info(result.rows)
            columns = []
            for col in result.columns:
                columns.append({'name': col,
                                'friendly_name': col,
                                'type': TYPE_STRING})
            rows = result.rows
            data = {'columns': columns, 'rows': rows}
            json_data = json.dumps(data, cls=JSONEncoder)
            error = None
        except TransportError as te:
            json_data = None
            error = drillbit_host + '\n' + te.error
        except Exception as ex:
            json_data = None
            error = drillbit_host + '\n' + str(ex)

        return json_data, error
#!/usr/local/bin/python
from pydrill.client import PyDrill
import json
from bson.json_util import dumps
from bson import json_util
#print "Content-type: application/json\n\n";
print "Content-type: text/html\n\n";
#print """<p>hehy</p>"""
#drill = PyDrill(host='localhost', port=8047)
from pymongo import MongoClient
drill = PyDrill(host='localhost', port=8047)

if not drill.is_active():
    raise ImproperlyConfigured('Please run Drill first')

city="Las Vegas"

yelp_reviews = drill.query('''
  select sum(case when t.stars=1.0 then 1 else 0 end) as `1`,
  sum(case when t.stars=2.0 then 1 else 0 end) as `2`,
  sum(case when t.stars=2.5 then 1 else 0 end) as `3`,
   sum(case when t.stars=3.0 then 1 else 0 end) as `4`,
   sum(case when t.stars=3.5 then 1 else 0 end) as `5`,
   sum(case when t.stars=4.0 then 1 else 0 end) as `6`,
   sum(case when t.stars=4.5 then 1 else 0 end) as `7`,
   sum(case when t.stars=5.0 then 1 else 0 end) as `8`
   from `mongo.274_BI`.`yelp_dataset`t where t.city='Pittsburgh' and true=repeated_contains(categories,'Restaurants')

''')

print dumps(yelp_reviews)
예제 #5
0
class DrillWrapper(object):

    def __init__(self, datasource, config, rdfmts, star):
        self.datasource = datasource
        self.rdfmts = rdfmts
        self.url = datasource.url
        self.params = datasource.params
        self.config = config
        self.drill = None
        self.df = None
        self.result = None
        self.star = star
        self.query = None
        self.prefixes = {}
        if ':' in self.url:
            self.host, self.port = self.url.split(':')
        else:
            self.host = self.url
            self.port = '8047'

        if len(self.datasource.mappings) == 0:
            self.mappings = self.config.load_mappings(self.datasource.mappingfiles, self.rdfmts)
        else:
            # self.mappings = self.config.mappings
            self.mappings = self.datasource.mappings

    def executeQuery(self, query, queue=Queue(), limit=-1, offset=0):
        """
        Entry point for query execution on csv files
        :param querystr: string query
        :return:
        """
        from time import time
        # start = time()
        # print("Start:", start)
        if len(self.mappings) == 0:
            print("Empty Mapping")
            queue.put('EOF')
            return []
        # querytxt = query
        self.query = qp.parse(query)
        self.prefixes = getPrefs(self.query.prefs)

        query_filters = [f for f in self.query.body.triples[0].triples if isinstance(f, Filter)]

        if limit > -1 or offset > -1:
            self.query.limit = limit
            self.query.offset = offset

        sqlquery, projvartocols, coltotemplates, filenametablename = self.translate(query_filters)
        # print(sqlquery)
        # totalres = 0
        if sqlquery is None or len(sqlquery) == 0:
            queue.put("EOF")
            return []
        try:
            start = time()
            try:
                self.drill = PyDrill(host=self.host, port=self.port)
            except Exception as ex:
                print("Exception while connecting to Drill", ex)
                queue.put("EOF")
                return
            if not self.drill.is_active():
                print('Exception: Please run Drill first')
                queue.put("EOF")
                return
            # print("Drill Initialization cost:", time() - start)
            logger.info("Drill Initialization cost:" + str(time() - start))
            start = time()
            if isinstance(sqlquery, list):
                sqlquery = [sql for sql in sqlquery if sql is not None and len(sql) > 0]
                if len(sqlquery) > 3:
                    sqlquery = " UNION ".join(sqlquery)
            if isinstance(sqlquery, list):
                sqlquery = [sql for sql in sqlquery if sql is not None and len(sql) > 0]
                # logger.info(" UNION ".join(sqlquery))
                processqueues = []
                processes = []
                res_dict = []
                for sql in sqlquery:
                    # processquery = Queue()
                    # self.run_union(sql, queue, projvartocols, coltotemplates, limit, processquery, res_dict)
                    # print(sql)
                    processquery = Queue()
                    processqueues.append(processquery)
                    p = Process(target=self.run_union, args=(sql, queue, projvartocols, coltotemplates, limit, processquery, res_dict,))
                    p.start()
                    processes.append(p)

                while len(processqueues) > 0:
                    toremove = []
                    try:
                        for q in processqueues:
                            if q.get(False) == 'EOF':
                                toremove.append(q)
                        for p in processes:
                            if p.is_alive():
                                p.terminate()
                    except:
                        pass
                    for q in toremove:
                        processqueues.remove(q)
                logger.info("Done running:")
                sw = " UNION ".join(sqlquery)
                logger.info(sw)
            else:
                card = 0
                # if limit == -1:
                limit = 1000
                if offset == -1:
                    offset = 0
                logger.info(sqlquery)
                # print(sqlquery)
                while True:
                    query_copy = sqlquery + " LIMIT " + str(limit) + " OFFSET " + str(offset)
                    cardinality = self.process_result(query_copy, queue, projvartocols, coltotemplates)
                    card += cardinality
                    if cardinality < limit:
                        break

                    offset = offset + limit
            # print("Exec in Drill took:", time() - start)
            logger.info("Exec in Drill took:" + str(time() - start))
        except Exception as e:
            print("Exception ", e)
            pass
        # print('End:', time(), "Total results:", totalres)
        # print("Drill finished after: ", (time()-start))
        queue.put("EOF")

    def run_union(self, sql, queue, projvartocols, coltotemplates, limit, processqueue, res_dict):

        card = 0
        # if limit == -1:
        limit = 1000
        offset = 0
        while True:
            query_copy = sql + " LIMIT " + str(limit) + " OFFSET " + str(offset)
            cardinality = self.process_result(query_copy, queue, projvartocols, coltotemplates, res_dict)
            card += cardinality
            if cardinality < limit:
                break

            offset = offset + limit

        processqueue.put("EOF")

    def process_result(self, sql, queue, projvartocols, coltotemplates, res_dict=None):
        c = 0
        try:
            if not self.drill.is_active():
                try:
                    self.drill = PyDrill(host=self.host, port=self.port)
                except Exception as ex:
                    print("Exception while connecting to Drill for query processing", ex)
                    return 0
            try:
                results = self.drill.query(sql, timeout=1000)
            except Exception as ex:
                print("Exception while running query to Drill for query processing", ex)
                return 0

            for row in results:
                c += 1
                # if res_dict is not None:
                #     rowtxt = ",".join(list(row.values()))
                #     if rowtxt in res_dict:
                #         continue
                #     else:
                #         res_dict.append(rowtxt)

                res = {}
                skip = False
                for r in row:
                    if row[r] == 'null':
                        skip = True
                        break
                    if '_' in r and r[:r.find("_")] in projvartocols:
                        s = r[:r.find("_")]
                        if s in res:
                            val = res[s]
                            if 'http://' in row[r]:
                                res[s] = row[r]
                            else:
                                res[s] = val.replace('{' + r[r.find("_") + 1:] + '}', row[r].replace(" ", '_'))
                        else:
                            if 'http://' in r:
                                res[s] = r
                            else:
                                res[s] = coltotemplates[s].replace('{' + r[r.find("_") + 1:] + '}',
                                                                   row[r].replace(" ", '_'))
                    elif r in projvartocols and r in coltotemplates:
                        if 'http://' in row[r]:
                            res[r] = row[r]
                        else:
                            res[r] = coltotemplates[r].replace('{' + projvartocols[r] + '}', row[r].replace(" ", '_'))
                    else:
                        res[r] = row[r]

                if not skip:
                    queue.put(res)
                    # if 'keggCompoundId' in res:
                    #     print(res['keggCompoundId'])
            return c
        except Exception as e:
            print("Exception while processing drill results", e, sql)
            logger.error(sql)
            logger.error("Exception while processing results:" + str(e))
            import traceback
            traceback.print_stack()
            return c

    def get_so_variables(self, triples, proj):
        tvars = []
        for t in triples:
            if not t.subject.constant:
                tvars.append(t.subject.name)
            # exclude variables that are not projected
            if not t.theobject.constant:# and t.theobject.name in proj:
                tvars.append(t.theobject.name)

        return tvars

    def getsqlfil(self, l, r, op, var_pred_map, subjmap, predicate_object_map, coltotemplates, tablealias):
        if r is not None and '?' in r.name:
            var = r.name
            val = l.name
        else:
            var = l.name
            val = r.name
        # print(val)
        if '(' in var and ')' in var:
            var = var[var.find('(') + 1:var.find(')')]

        if len(var_pred_map) == 0 or var == self.star['triples'][0].subject.name:
            subjcol = subjmap.value
            splits = subjcol.split('{')
            coltotemplates[var[1:]] = subjcol
            column = []
            for sp in splits[1:]:
                column.append(sp[:sp.find('}')])

            if len(column) > 1:
                objfilters = []
                for col in column:
                    vcolumn = "`" + col + '`'

                    if '<' in val and '>' in val:
                        val = val.replace('<', '').replace('>', '')
                    if '"' not in val and "'" not in val:
                        val = "'" + val + "'"

                    if op == 'REGEX':
                        val = "LOWER('%" + val[1:-1] + "%')"
                        objectfilter = 'LOWER(' + tablealias + '.' + vcolumn + ") LIKE " + val
                    else:
                        objectfilter = tablealias + '.' + vcolumn + op + val
                    objfilters.append(objectfilter)

                return " AND ".join(objfilters)
            elif len(column) == 1:
                column = "`" + column[0] + '`'

                if '<' in val and '>' in val:
                    val = val.replace('<', '').replace('>', '').replace(splits[0], '')

                if '"' not in val and "'" not in val:
                    val = "'" + val + "'"

                if op == 'REGEX':
                    val = "LOWER('%" + val[1:-1] + "%')"
                    objectfilter = 'LOWER(' + tablealias + '.' + column + ") LIKE " + val
                else:
                    objectfilter = tablealias + '.' + column + op + val

                return objectfilter
        if var not in var_pred_map:
            return None

        p = var_pred_map[var]
        pmap, omap = predicate_object_map[p]
        if omap.objectt.resource_type == TripleMapType.TEMPLATE:
            coltotemplates[var[1:]] = omap.objectt.value
            splits = omap.objectt.value.split('{')
            column = []
            for sp in splits[1:]:
                column.append(sp[:sp.find('}')])
            val = val.replace(splits[0], "")

            if len(column) == 1:
                column = column[0]

        elif omap.objectt.resource_type == TripleMapType.REFERENCE:
            column = omap.objectt.value
        else:
            column = []
        if isinstance(column, list):
            if len(column) > 0:
                column = column[0]
        column = "`" + column + '`'

        if '<' in val and '>' in val:
            val = val.replace('<', '').replace('>', '')
        if '"' not in val and "'" not in val:
            val = "'" + val + "'"

        if op == 'REGEX':
            val = "LOWER('%" + val[1:-1] + "%')"
            objectfilter = 'LOWER(' + tablealias + '.' + column + ") LIKE " + val
        else:
            objectfilter = tablealias + '.' + column + op + val

        return objectfilter

    def get_Expression_value(self, exp, var_pred_map, subjmap, predicate_object_map, coltotemplates, tablealias):

        left = exp.left
        right = exp.right
        op = exp.op

        if op in unaryFunctor:
            if isinstance(left, Expression) and isinstance(left.left, Argument):
                left = left.left
                fil = self.getsqlfil(left, right, op, var_pred_map, subjmap, predicate_object_map, coltotemplates, tablealias)
                return fil

        elif op in binaryFunctor:
            if op == 'REGEX' and right.desc is not False:
                if isinstance(left, Expression):
                    if 'xsd:string' in left.op:
                        left = left.left
                        fil = self.getsqlfil(left, right, op, var_pred_map, subjmap, predicate_object_map, coltotemplates, tablealias)
                        return fil
                    # else:
                    #     left = self.get_Expression_value(left, var_pred_map, subjmap,predicate_object_map, coltotemplates, tablealias)
                    #     right = self.get_Expression_value(right, var_pred_map, subjmap, predicate_object_map, coltotemplates,tablealias)
                else:
                    fil = self.getsqlfil(left, right, op, var_pred_map, subjmap, predicate_object_map,
                                         coltotemplates, tablealias)
                    return fil
                # return op + "(" + str(left) + "," + right.name + "," + right.desc + ")"
            else:
                return op + "(" + str(left) + "," + str(right) + ")"

        elif right is None:
            return op + str(left)

        else:
            if isinstance(left, Argument) and isinstance(right, Argument):
                fil = self.getsqlfil(left, right, op, var_pred_map, subjmap, predicate_object_map, coltotemplates, tablealias)
                return fil
            if isinstance(left, Expression) and isinstance(right, Expression):
                leftexp = self.get_Expression_value(left, var_pred_map, subjmap, predicate_object_map, coltotemplates, tablealias)
                rightexp = self.get_Expression_value(right, var_pred_map, subjmap, predicate_object_map, coltotemplates, tablealias)
                if op == '||' or op == '|':
                    if leftexp is None or rightexp is None:
                        return None
                    return '(' + leftexp + ' OR ' + rightexp + ')'
                else:
                    if leftexp is None or rightexp is None:
                        return None
                    return '(' + leftexp + ' AND ' + rightexp + ')'
            # print(op, type(left), left, type(right), right)
            return "(" + str(exp.left) + " " + exp.op + " " + str(exp.right)

    def get_obj_filter(self, f, var_pred_map, subjmap, predicate_object_map, coltotemplates, tablealias):
        return self.get_Expression_value(f.expr, var_pred_map, subjmap, predicate_object_map, coltotemplates,
                                         tablealias)

    def makeJoin(self, mapping_preds, query_filters):

        coltotemplates = {}
        projections = {}
        projvartocol = {}
        objectfilters = []
        constfilters = []
        fromclauses = []
        database_name = ""
        i = 0
        tm_tablealias = {}
        subjects = {}

        projvartocols = {}
        query = ""

        for tm, predicate_object_map in mapping_preds.items():
            sparqlprojected = set(self.get_so_variables(self.star['triples'], [c.name for c in self.query.args]))
            tablealias = 'Ontario_' + str(i)
            if isinstance(predicate_object_map, SubjectMap):
                subjvar = self.star['triples'][0].subject.name
                var = subjvar
                var_pred_map = {subjvar: 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type'}
                if predicate_object_map.subject.resource_type == TripleMapType.TEMPLATE:
                    coltotemplates[subjvar[1:]] = predicate_object_map.subject.value
                    splits = predicate_object_map.subject.value.split('{')
                    column = []
                    for sp in splits[1:]:
                        column.append(sp[:sp.find('}')])
                    if len(column) == 1:
                        column = column[0]
                elif predicate_object_map.subject.resource_type == TripleMapType.REFERENCE:
                    column = predicate_object_map.subject.value
                else:
                    column = []
                if isinstance(column, list):
                    j = 0
                    for col in column:
                        if '[*]' in col:
                            col = col.replace('[*]', '')
                            vcolumn = "`" + col + '`'
                            projections[var[1:] + '_Ontario_' + str(j)] = "FLATTEN(" + tablealias + "." + vcolumn + ") AS " + var[1:] + '_Ontario_' + str(j)
                        else:
                            vcolumn = "`" + col + '`'
                            projections[var[1:] + '_Ontario_' + str(j)] = tablealias + "." + vcolumn + " AS " + var[1:] + '_Ontario_' + str(j)
                        projvartocol.setdefault(var[1:], []).append(col)
                        objectfilters.append(tablealias + '.' + vcolumn + " is not null ")
                        objectfilters.append(tablealias + '.' + vcolumn + " <> '' ")
                        j += 1
                else:
                    col = column
                    if '[*]' in col:
                        col = col.replace('[*]', '')
                        column = "`" + col + '`'
                        projections[var[1:]] = "FLATTEN(" + tablealias + "." + column + ") AS `" + var[1:] + '`'
                    else:
                        column = "`" + column + '`'
                        projections[var[1:]] = tablealias + "." + column + " AS `" + var[1:] + '`'
                    projvartocol[var[1:]] = col
                    objectfilters.append(tablealias + '.' + column + " is not null ")
                    objectfilters.append(tablealias + '.' + column + " <> '' ")
            else:
                var_pred_map = {var: pred for pred, var in self.star['predicates'].items() if
                                pred in predicate_object_map}
                column = []
                for var in sparqlprojected:
                    if var not in var_pred_map:
                        continue
                    p = var_pred_map[var]
                    pmap, omap = predicate_object_map[p]
                    if omap.objectt.resource_type == TripleMapType.TEMPLATE:
                        coltotemplates[var[1:]] = omap.objectt.value
                        splits = omap.objectt.value.split('{')
                        column = []
                        for sp in splits[1:]:
                            column.append(sp[:sp.find('}')])
                        if len(column) == 1:
                            column = column[0]
                    elif omap.objectt.resource_type == TripleMapType.REFERENCE:
                        column = omap.objectt.value
                    else:
                        column = []
                    if isinstance(column, list):
                        j = 0
                        for col in column:
                            if '[*]' in col:
                                col = col.replace('[*]', '')
                                vcolumn = "`" + col + '`'
                                projections[var[1:] + '_Ontario_' + str(j)] = "FLATTEN(" + tablealias + "." + vcolumn + ") AS " + var[1:] + '_Ontario_' + str(j)
                            else:
                                vcolumn = "`" + col + '`'
                                projections[var[1:] + '_Ontario_' + str(j)] = tablealias + "." + vcolumn + " AS `" + var[1:] + '_Ontario_' + str(j) + '`'

                            projvartocol.setdefault(var[1:], []).append(col)
                            objectfilters.append(tablealias + '.' + vcolumn + " is not null ")
                            objectfilters.append(tablealias + '.' + vcolumn + " <> '' ")
                            j += 1
                    else:
                        col = column
                        if '[*]' in col:
                            col = col.replace('[*]', '')
                            column = "`" + col + '`'
                            projections[var[1:]] = "FLATTEN(" + tablealias + "." + column + ") AS `" + var[1:] + '`'
                        else:
                            column = "`" + column + '`'
                            projections[var[1:]] = tablealias + "." + column + " AS `" + var[1:] + '`'
                        projvartocol[var[1:]] = col
                        objectfilters.append(tablealias + '.' + column + " is not null ")
                        objectfilters.append(tablealias + '.' + column + " <> '' ")
            for f in query_filters:
                #if len(set(f.getVars()).intersection(list(var_pred_map.keys()))) == len(set(f.getVars())):
                fil = self.get_obj_filter(f, var_pred_map, self.mappings[tm].subject_map.subject, predicate_object_map, coltotemplates, tablealias)
                if fil is not None and len(fil) > 0:
                    constfilters.append(fil)
            tm_tablealias[tablealias] = tm

            triplemap = self.mappings[tm]
            subjects[tm] = triplemap.subject_map.subject

            logicalsource = triplemap.logical_source
            data_source = logicalsource.data_source
            # tablename = data_source.name
            # database_name = logicalsource.iterator  #TODO: this is not correct, only works for LSLOD-Custom experiment
            database_name = data_source.name
            if '/' in database_name:
                database_name = database_name.split('/')[-1]
            tablename = data_source.name
            # TODO: change the paths, this works only for LSLOD-experiment
            if self.datasource.dstype == DataSourceType.LOCAL_TSV:
                # fileext = 'dfs.`/data/tsv/' + database_name + '/' + tablename + '.tsv`'
                fileext = 'dfs.`/data/tsv/' + tablename + '`'
            elif self.datasource.dstype == DataSourceType.LOCAL_CSV:
                # fileext = 'dfs.`/data/csv/' + database_name + '/' + tablename + '.csv`'
                fileext = 'dfs.`/data/csv/' + tablename + '`'
            elif self.datasource.dstype == DataSourceType.LOCAL_JSON:
                # fileext = 'dfs.`/data/json/' + database_name + '/' + tablename + '.json`'
                fileext = 'dfs.`/data/json/' + tablename + '`'
            elif self.datasource.dstype == DataSourceType.HADOOP_TSV:
                # fileext = 'hdfs.`/user/kemele/data/tsv/' + database_name + '/' + tablename + '.tsv`'
                fileext = 'hdfs.`/user/kemele/data/tsv/' + tablename + '`'
            elif self.datasource.dstype == DataSourceType.HADOOP_CSV:
                # fileext = 'hdfs.`/user/kemele/data/csv/' + database_name + '/' + tablename + '.csv`'
                fileext = 'hdfs.`/user/kemele/data/csv/' + tablename + '`'
            elif self.datasource.dstype == DataSourceType.HADOOP_JSON:
                # fileext = 'hdfs.`/user/kemele/data/json/' + database_name + '/' + tablename + '.json`'
                fileext = 'hdfs.`/user/kemele/data/json/' + tablename + '`'
            else:
                fileext = ''

            fromclauses.append(fileext + ' ' + tablealias)
            i += 1

            for var, p in var_pred_map.items():

                if '?' not in var:
                    pmap, omap = predicate_object_map[p]

                    if omap.objectt.resource_type == TripleMapType.TEMPLATE:
                        # omap.objectt.value
                        splits = omap.objectt.value.split('{')
                        column = []
                        for sp in splits[1:]:
                            column.append(sp[:sp.find('}')])

                        var = var.replace(splits[0], '').replace('}', '')
                        if '<' in var and '>' in var:
                            var = var[1:-1]
                        var = "'" + var + "'"
                    elif omap.objectt.resource_type == TripleMapType.REFERENCE:
                        column = omap.objectt.value
                        if "'" not in var and '"' not in var:
                            var = "'" + var + "'"
                        if '"' in var:
                            var = "'" + var[1:-1] + "'"
                    else:
                        column = []
                    if isinstance(column, list):
                        j = 0
                        for col in column:
                            vcolumn = "`" + col + '`'
                            constfilters.append(tablealias + "." + vcolumn + " = " + var)
                            j += 1
                    else:
                        column = "`" + column + '`'
                        constfilters.append(tablealias + "." + column + " = " + var)

        subj = self.star['triples'][0].subject.name if not self.star['triples'][0].subject.constant else None
        invalidsubj = False
        if subj is not None:
            filtersadded = []
            for tm, subject in subjects.items():
                subjcol = subject.value
                tablealias = [v for v in tm_tablealias if tm_tablealias[v] == tm][0]
                splits = subjcol.split('{')
                coltotemplates[subj[1:]] = subjcol
                column = []
                for sp in splits[1:]:
                    column.append(sp[:sp.find('}')])

                if len(column) > 1:
                    j = 0
                    for col in column:
                        vcolumn = "`" + col + '`'
                        projections[subj[1:] + '_Ontario_' + str(j)] = tablealias + "." + vcolumn + " AS `" + subj[1:] + '_Ontario_' + str(j) + '`'
                        projvartocol.setdefault(subj[1:], []).append(col)
                        objectfilters.append(tablealias + '.' + vcolumn + " is not null ")
                        objectfilters.append(tablealias + '.' + vcolumn + " <> '' ")
                        j += 1
                elif len(column) == 1:
                    col = column[0]
                    column = "`" + col + '`'
                    projections[subj[1:]] = tablealias + "." + column + " AS `" + subj[1:] + '`'
                    projvartocol[subj[1:]] = col

                    objectfilters.append(tablealias + '.' + column + " is not null ")
                    objectfilters.append(tablealias + '.' + column + " <> '' ")
        else:
            subj = self.star['triples'][0].subject.name
            for tm, subject in subjects.items():
                subjcol = subject.value
                tablealias = [v for v in tm_tablealias if tm_tablealias[v] == tm][0]
                splits = subjcol.split('{')
                column = []
                for sp in splits[1:]:
                    column.append(sp[:sp.find('}')])

                if len(splits[0]) > 0 and splits[0] not in subj:
                    invalidsubj = True
                    break
                var = subj.replace(splits[0], '').replace('}', '')

                if '<' in var and '>' in var:
                    var = var[1:-1]
                var = "'" + var + "'"
                # if isinstance(column, list):
                j = 0
                for col in column:
                    vcolumn = "`" + col + '`'
                    constfilters.append(tablealias + "." + vcolumn + " = " + var)
                    j += 1
        if invalidsubj:
            mapping_preds = []

        if len(subjects) > 1:
            aliases = list(tm_tablealias.keys())
            raliases = aliases.copy()
            raliases.reverse()
            compared = []
            for a1 in aliases:
                for a2 in aliases:
                    if a1 + a2 in compared:
                        continue
                    if a1 == a2:
                        continue
                    compared.append(a1 + a2)
                    compared.append(a2 + a1)
                    subj1 = subjects[tm_tablealias[a1]].value
                    subj2 = subjects[tm_tablealias[a2]].value
                    column1 = None
                    column2 = None
                    splits = subj1.split('{')
                    for sp in splits:
                        if '}' in sp:
                            column1 = sp[:sp.find('}')]
                            break
                    splits = subj2.split('{')
                    for sp in splits:
                        if '}' in sp:
                            column2 = sp[:sp.find('}')]
                            break
                    column1 = '`' + column1 + '`'
                    column2 = '`' + column2 + '`'
                    if column1 == column2:
                        objectfilters.append(a1 + '.' + column1 + "=" + a2 + "." + column2)
        objectfilters.extend(constfilters)
        if len(mapping_preds) > 0:
            fromcaluse = "\n FROM " + ", ".join(list(set(fromclauses)))
            distinct = ""
            if self.query.distinct:
                distinct = "DISTINCT "
            projections = " SELECT  " + distinct + ", ".join(list(set(projections.values())))
            if len(objectfilters) > 0:
                whereclause = "\n WHERE " + "\n\t AND ".join(list(set(objectfilters)))
            else:
                whereclause = ""

            sqlquery = projections + " " + fromcaluse + " " + whereclause
            return sqlquery, projvartocol, coltotemplates, database_name

        return query, projvartocols, coltotemplates, database_name

    def makeunion(self, tounions, query_filters, subjectunions=False):

        coltotemplates = {}
        projvartocols = {}
        database_name = ""
        unions = []
        rdfmts = list(tounions.keys())
        rdfmts = list(reversed(sorted(rdfmts)))
        # print(rdfmts)
        for rdfmt in rdfmts:
            mappingpreds = tounions[rdfmt]
            if subjectunions:
                for tm, submaps in mappingpreds.items():
                    un, projvartocols, coltotemplates, database_name = self.makeJoin({tm: submaps}, query_filters)
                    if un is not None and len(un) > 0:
                        unions.append(un)
            else:
                un, projvartocols, coltotemplates, database_name = self.makeJoin(mappingpreds, query_filters)
                if un is not None and len(un) > 0:
                    unions.append(un)

        #query = " UNION ".join(unions)
        # print(query)
        return unions, projvartocols, coltotemplates, database_name

    def translate(self, query_filters):
        rdfmts = self.star['rdfmts']
        starpreds = list(self.star['predicates'].keys())
        star_preds = [p for p in starpreds if '?' not in p]
        if 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type' in star_preds:
            star_preds.remove('http://www.w3.org/1999/02/22-rdf-syntax-ns#type')

        touninon = {}
        completematch = {}

        if len(star_preds) == 0:
            subjectonly = False
            for tm, triplemap in self.mappings.items():
                for rdfmt in triplemap.subject_map.rdf_types:
                    if rdfmt in rdfmts:
                        if 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type' in starpreds:
                            touninon.setdefault(rdfmt, {})[tm] = triplemap.subject_map
                            subjectonly = True
                        else:
                            touninon.setdefault(rdfmt, {})[tm] = triplemap.predicate_object_map
            if len(touninon) > 1 or subjectonly:
                return self.makeunion(touninon, query_filters, subjectonly)
            elif len(touninon) == 1:
                query, projvartocols, coltotemplates, database_name = self.makeJoin(touninon[list(touninon.keys())[0]],
                                                                                    query_filters)
                return query, projvartocols, coltotemplates, database_name
            else:
                return None, None, None, None
        else:
            mapping_preds = {tm: triplemap for tm, triplemap in self.mappings.items() for p in star_preds if p in triplemap.predicate_object_map}
            for tm, triplemap in mapping_preds.items():
                for rdfmt in triplemap.subject_map.rdf_types:
                    if rdfmt in rdfmts and len(set(star_preds).intersection(list(triplemap.predicate_object_map.keys()))) == len(set(star_preds)):
                        completematch[rdfmt] = {}
                        completematch[rdfmt][tm] = triplemap.predicate_object_map
                    if rdfmt in rdfmts and len(set(star_preds).intersection(list(triplemap.predicate_object_map.keys()))) > 0:
                        touninon.setdefault(rdfmt, {})[tm] = triplemap.predicate_object_map
            if len(completematch) > 0:
                if len(completematch) == 1:
                    query, projvartocols, coltotemplates, database_name = self.makeJoin(
                        touninon[list(touninon.keys())[0]], query_filters)
                    return query, projvartocols, coltotemplates, database_name
                else:
                    return self.makeunion(completematch, query_filters)
            elif len(touninon) > 1:
                return self.makeunion(touninon, query_filters)
            elif len(touninon) == 1:
                query, projvartocols, coltotemplates, database_name = self.makeJoin(touninon[list(touninon.keys())[0]],
                                                                                    query_filters)
                return query, projvartocols, coltotemplates, database_name
            else:
                return None, None, None, None
예제 #6
0
파일: sandbox.py 프로젝트: xav-b/datacli
from pydrill.client import PyDrill

# or use PYDRILL_HOST PYDRILL_PORT
conn = PyDrill(host='localhost', port=8047)

assert conn.is_active()

employees = conn.query('''
  SELECT * FROM cp.`employee.json` LIMIT 5
''')
class TestPydrill(unittest.TestCase):
    def setUp(self):
        self.drill = PyDrill(host='localhost', port=8047)

    def test_transport_host(self):
        assert self.drill.transport.host == 'localhost'

    def test_transport_port(self):
        assert self.drill.transport.port == 8047

    @responses.activate
    def test_is_active(self):
        responses.add(
            **{
                'method': responses.HEAD,
                'url': 'http://localhost:8047/',
                'status': 200,
                'content_type': 'application/json',
            })
        assert self.drill.is_active() == True

    @responses.activate
    def test_is_not_active_404(self):
        responses.add(
            **{
                'method': responses.HEAD,
                'url': 'http://localhost:8047/',
                'content_type': 'application/json',
                'status': 404,
            })
        assert self.drill.is_active() == False

    @responses.activate
    def test_is_not_active_500(self):
        responses.add(
            **{
                'method': responses.HEAD,
                'url': 'http://localhost:8047/',
                'content_type': 'application/json',
                'status': 500,
            })
        assert self.drill.is_active() == False

    @responses.activate
    def test_is_not_active_timeout(self):
        responses.add(
            **{
                'method': responses.HEAD,
                'url': 'http://localhost:8047/',
                'content_type': 'application/json',
                'status': 500,
            })
        try:
            self.drill.perform_request('HEAD',
                                       '/',
                                       params={'request_timeout': 0})
        except TransportError as e:
            assert e.status_code == e.args[0]
            assert e.error == e.args[1]
            assert e.info == e.args[2]
            assert str(e)
        else:
            assert False
예제 #8
0
class DataService:
    __drill = None
    __dictionary = queryDictionary.QueryDictionary

    def __init__(self):
        self.init_drill_connection();

    def init_drill_connection(self):
        self.__drill = PyDrill(host='localhost')
        is_drill_active = self.__drill.is_active()

        if is_drill_active:
            print("Drill is active: %s" % is_drill_active)
        else:
            print('''
            Drill is not active. Start your server in a terminal using command:
            cd /Users/Aymeric/apache-drill-1.5.0
            bin/drill-embedded
            ''')

    def get_users(self):
        query = self.__dictionary.get_users()
        results = self.__drill.query(query)
        return self.get_frame(results)

    def get_elite_users(self):
        query = self.__dictionary.get_elite()
        results = self.__drill.query(query)
        return self.get_frame(results)

    def get_elite_users_count(self):
        query = self.__dictionary.get_elite_count()
        results = self.__drill.query(query)
        return self.get_frame(results)

    def get_elite_users_tip(self):
        query = self.__dictionary.get_elite_tip()
        results = self.__drill.query(query)
        return self.get_frame(results)

    def get_elite_users_review(self):
        query = self.__dictionary.get_elite_review()
        results = self.__drill.query(query, 30)
        return self.get_frame(results)

    def get_restaurant_review(self):
        query = self.__dictionary.get_review()
        results = self.__drill.query(query, 30)
        return self.get_frame(results)

    def get_featureset1_but_votes(self):
        query = self.__dictionary.get_featureset1_but_votes()
        results = self.__drill.query(query, 300)
        return self.get_frame(results)

    def get_user_review(self, review_id):
        query = self.__dictionary.get_user_review(review_id)
        results = self.__drill.query(query, 30)
        return self.get_frame(results)

    def review_dates(self):
        query = self.__dictionary.review_dates()
        t0 = time.clock();
        print('started on: ' + str(datetime.datetime.now()))
        results = self.__drill.query(query, 600)
        print('finished on: ' + str(datetime.datetime.now()))
        return self.get_frame(results)

    @staticmethod
    def get_frame(results, number_of_records=None):
        return DataFrame(data=results.rows, columns=results.columns)

    @staticmethod
    def print_frame(frame, records_to_display=None):
        # frame = DataService.get_frame(results, records_to_display)
        print("\n")

        if records_to_display is None:
            print(frame.to_string(justify='right'))
        else:
            print(frame[:records_to_display].to_string(justify='left'))

    def main(self):
        self.print_frame(self.get_elite_users(), 10)