コード例 #1
0
ファイル: presto.py プロジェクト: Captricity/redash
    def run_query(self, query, user):
        connection = presto.connect(
                host=self.configuration.get('host', ''),
                port=self.configuration.get('port', 8080),
                username=self.configuration.get('username', 'redash'),
                catalog=self.configuration.get('catalog', 'hive'),
                schema=self.configuration.get('schema', 'default'))

        cursor = connection.cursor()


        try:
            cursor.execute(query)
            column_tuples = [(i[0], PRESTO_TYPES_MAPPING.get(i[1], None)) for i in cursor.description]
            columns = self.fetch_columns(column_tuples)
            rows = [dict(zip(([c['name'] for c in columns]), r)) for i, r in enumerate(cursor.fetchall())]
            data = {'columns': columns, 'rows': rows}
            json_data = json.dumps(data, cls=JSONEncoder)
            error = None
        except DatabaseError as db:
            json_data = None
            default_message = 'Unspecified DatabaseError: {0}'.format(db.message)
            message = db.message.get('failureInfo', {'message', None}).get('message')
            error = default_message if message is None else message
        except (KeyboardInterrupt, InterruptException) as e:
            cursor.cancel()
            error = "Query cancelled by user."
            json_data = None
        except Exception as ex:
            json_data = None
            error = ex.message
            if not isinstance(error, basestring):
                error = unicode(error)

        return json_data, error
コード例 #2
0
ファイル: presto_hook.py プロジェクト: AI-Cdrone/airflow
 def __init__(self, host=None, db=None, port=None,
              presto_conn_id='presto_default'):
     self.user = '******'
     if not presto_conn_id:
         self.host = host
         self.db = db
         self.port = port
     else:
         session = settings.Session()
         db = session.query(
             Connection).filter(
                 Connection.conn_id == presto_conn_id)
         if db.count() == 0:
             raise Exception("The presto_conn_id you provided isn't defined")
         else:
             db = db.all()[0]
         self.host = db.host
         self.db = db.schema
         self.catalog = 'hive'
         self.port = db.port
         self.cursor = presto.connect(host=db.host, port=db.port,
                                      username=self.user,
                                      catalog=self.catalog,
                                      schema=db.schema).cursor()
         session.close()    # currently only a pass in pyhive
コード例 #3
0
ファイル: test_presto.py プロジェクト: dropbox/PyHive
 def test_invalid_kwargs(self):
     """some kwargs are reserved"""
     self.assertRaisesRegexp(
         ValueError, 'Cannot override', lambda: presto.connect(
             host=_HOST, username='******', requests_kwargs={'url': 'test'}
         ).cursor()
     )
コード例 #4
0
def makeGraph(table,dt):
  """
  Build the entire failure graph with all DNS errors found in the table at the given date.
  """
    
  sys.stderr.write("Build the graph. ")
  # Get DNS failed requests
  req0 = "select dst, dns_question from %s where dns_rcode!='NOERROR' and ( dns_question like '%% A' or dns_question like '%% AAAA' ) and dt='%s' " % (table,dt);

  # Ignored certain request (antivirus,...)
  for fqdn in ignoredDomains:
    req0 += " and  not regexp_like(dns_question, '(?i).*%s .*') " % fqdn 

  #print req0
  cursor = presto.connect('localhost').cursor()
  cursor.execute(req0)

  #Make the corresponding failure graph
  G = nx.Graph()
  count = 0
  for data in cursor:
    addr = data[0]
    if data[1] is None:
      continue
    ques = data[1].split()[0]

    G.add_node(addr,bipartite=0)
    G.add_node(ques,bipartite=1)
    G.add_edge(addr,ques)
  
  return G
コード例 #5
0
ファイル: presto.py プロジェクト: examplegithubaccount/redash
    def run_query(self, query):
        connection = presto.connect(
                host=self.configuration.get('host', ''),
                port=self.configuration.get('port', 8080),
                username=self.configuration.get('username', 'redash'),
                catalog=self.configuration.get('catalog', 'hive'),
                schema=self.configuration.get('schema', 'default'))

        cursor = connection.cursor()

        try:
            cursor.execute(query)
            columns_data = [(row[0], row[1]) for row in cursor.description]

            columns = [{'name': col[0],
                'friendly_name': col[0],
                'type': PRESTO_TYPES_MAPPING.get(col[1], None)} for col in columns_data]

            rows = [dict(zip(([c[0] for c in columns_data]), r)) for i, r in enumerate(cursor.fetchall())]
            data = {'columns': columns, 'rows': rows}
            json_data = json.dumps(data, cls=JSONEncoder)
            error = None
        except Exception, ex:
            json_data = None
            error = ex.message
コード例 #6
0
ファイル: matatabi.py プロジェクト: necoma/n6-matatabi
 def RunQuery(self, query, parameters):
     cursor = presto.connect('localhost').cursor()
     try:
         cursor.execute(query, parameters)
     except presto.DatabaseError as e:
         raise
     res = cursor.fetchall()
     return res
コード例 #7
0
ファイル: test_presto.py プロジェクト: dropbox/PyHive
 def test_requests_session(self):
     with requests.Session() as session:
         connection = presto.connect(
             host=_HOST, port=_PORT, source=self.id(), requests_session=session
         )
         cursor = connection.cursor()
         cursor.execute('SELECT * FROM one_row')
         self.assertEqual(cursor.fetchall(), [(1,)])
コード例 #8
0
ファイル: test_presto.py プロジェクト: dropbox/PyHive
 def test_requests_kwargs(self):
     connection = presto.connect(
         host=_HOST, port=_PORT, source=self.id(),
         requests_kwargs={'proxies': {'http': 'localhost:99999'}},
     )
     cursor = connection.cursor()
     self.assertRaises(requests.exceptions.ProxyError,
                       lambda: cursor.execute('SELECT * FROM one_row'))
コード例 #9
0
ファイル: test_presto.py プロジェクト: dropbox/PyHive
 def test_invalid_password_and_kwargs(self):
     """password and requests_kwargs authentication are incompatible"""
     self.assertRaisesRegexp(
         ValueError, 'Cannot use both', lambda: presto.connect(
             host=_HOST, username='******', password='******', protocol='https',
             requests_kwargs={'auth': requests.auth.HTTPBasicAuth('user', 'secret')}
         ).cursor()
     )
コード例 #10
0
def findNtpAmplifiers(table,today=datetime.date.today(),verbose=False):
  """
  Find NTP amplifiers in the given traffic (table) and store the results in the 'ntpamplifiers' Hive table.
  """
  
  date  = "%d%02d%02d" % (today.year, today.month, today.day)
  table = scrub(table)


  ## set some variables regarding the input data
  if table.startswith("netflow"):
    dataType = "netflow"
    req0 = "select sa, sum(ibyt), sum(ipkt) from %s where sp=123 and dt='%s' and pr='UDP' and ibyt/ipkt=468 group by sa" % (table,date)
  elif table.startswith("sflow"):
    dataType = "sflow"
    req0 = "select srcip, sum(ipsize), count(*) from %s where udpsrcport=123 and ipprotocol=17 and ipsize=468 and dt='%s' group by srcip" % (table,date)
  else:
    sys.stderr.write("Data type unknown!")
    sys.exit(-1) 


  cursor = presto.connect('localhost').cursor()
  if verbose: sys.stdout.write("Looking for %s NTP amplifiers... (%s)\n" % (date,table))
    
  # get today's data
  cursor.execute(req0)
  res = cursor.fetchall()

  if len(res)==0:
	return

  data = pd.DataFrame(res,columns=["srcip", "nbbyt", "nbpkt"])

  # add the confidence score:
  data["confidence"] = "LOW"
  data.loc[data.nbpkt>=100,"confidence"] = "MED"
  data.loc[data.nbpkt>=1000,"confidence"] = "HIGH"

  outputFile = open("%s/ntpamplifiers_%s_%s.txt" % (outputDirectory,table,date),"w")
  data.to_csv(outputFile,sep="\t",header=False,cols=["srcip","nbbyt","nbpkt","confidence"],index=False)
  outputFile.close()

  # Store results in Hive 
  try:
      transport = TSocket.TSocket('localhost', 10000)
      transport = TTransport.TBufferedTransport(transport)
      protocol = TBinaryProtocol.TBinaryProtocol(transport)
 
      client = ThriftHive.Client(protocol)
      transport.open()

      client.execute("create table if not exists ntpamplifiers (srcip string, byte bigint,  pkt bigint, confidence string) partitioned by(dt string, dataSrc string) row format delimited fields terminated by '\t'");
      client.execute("load data local inpath '{dir}/ntpamplifiers_{table}_{date}.txt' overwrite into table ntpamplifiers partition (dt='{date}', dataSrc='{table}')".format(table=table,date=date,dir=outputDirectory))
      transport.close()

  except Thrift.TException, tx:
      sys.stderr.write('%s\n' % (tx.message))
コード例 #11
0
 def get_conn(self):
     """Returns a connection object"""
     db = self.get_connection(self.presto_conn_id)
     return presto.connect(
         host=db.host,
         port=db.port,
         username=db.login,
         catalog=db.extra_dejson.get('catalog', 'hive'),
         schema=db.schema)
コード例 #12
0
def find_amplifiers():
    
    # Find NTP amplifiers and keep ip in Hive Table
    req = "create table ntp_amplifiers as "
    req += "select sa from netflow where sp=123 and pr='UDP' and ibyt/ipkt=468 group by sa"

    cursor = presto.connect('localhost').cursor()
    cursor.execute(req)
    res = cursor.fetchall()
コード例 #13
0
ファイル: presto_hook.py プロジェクト: anjiang00/airflow
 def get_conn(self):
     """Returns a connection object"""
     db = self.get_connection(self.conn_id_name)
     return presto.connect(
         host=db.host,
         port=db.port,
         username=db.login,
         catalog=db.extra_dejson.get("catalog", "hive"),
         schema=db.schema,
     )
コード例 #14
0
def find_victims():

    # Request the data
    req = "select da, sum(ibyt) as vol from netflow where sa in (select sa from ntp_amplifiers ) group by da order by vol desc"
    cursor = presto.connect('localhost').cursor()
    cursor.execute(req)
    res = cursor.fetchall()

    for i in range(3):
        print res[i]
        plot_traffic_volume(res[i][0])
コード例 #15
0
 def get_conn(self):
     """Returns a connection object"""
     db = self.get_connection(self.presto_conn_id)
     reqkwargs = None
     if db.password is not None:
         reqkwargs = {'auth': HTTPBasicAuth(db.login, db.password)}
     return presto.connect(
         host=db.host,
         port=db.port,
         username=db.login,
         source=db.extra_dejson.get('source', 'airflow'),
         protocol=db.extra_dejson.get('protocol', 'http'),
         catalog=db.extra_dejson.get('catalog', 'hive'),
         requests_kwargs=reqkwargs,
         schema=db.schema)
コード例 #16
0
ファイル: test_presto.py プロジェクト: kaushikd49/PyHive
    def test_set_session_in_consructor(self):
        conn = presto.connect(
            host=_HOST, source=self.id(), session_props={'query_max_run_time': '1234m'}
        )
        with contextlib.closing(conn):
            with contextlib.closing(conn.cursor()) as cursor:
                cursor.execute('SHOW SESSION')
                rows = [r for r in cursor.fetchall() if r[0] == 'query_max_run_time']
                assert len(rows) == 1
                session_prop = rows[0]
                assert session_prop[1] == '1234m'

                cursor.execute('RESET SESSION query_max_run_time')
                cursor.fetchall()

                cursor.execute('SHOW SESSION')
                rows = [r for r in cursor.fetchall() if r[0] == 'query_max_run_time']
                assert len(rows) == 1
                session_prop = rows[0]
                assert session_prop[1] != '1234m'
コード例 #17
0
    def test_set_session_in_constructor(self):
        conn = presto.connect(
            host=_HOST, source=self.id(), session_props={'query_max_run_time': '1234m'}
        )
        with contextlib.closing(conn):
            with contextlib.closing(conn.cursor()) as cursor:
                cursor.execute('SHOW SESSION')
                rows = [r for r in cursor.fetchall() if r[0] == 'query_max_run_time']
                assert len(rows) == 1
                session_prop = rows[0]
                assert session_prop[1] == '1234m'

                cursor.execute('RESET SESSION query_max_run_time')
                cursor.fetchall()

                cursor.execute('SHOW SESSION')
                rows = [r for r in cursor.fetchall() if r[0] == 'query_max_run_time']
                assert len(rows) == 1
                session_prop = rows[0]
                assert session_prop[1] != '1234m'
コード例 #18
0
    def __init__(self,
                 connection_string,
                 username=None,
                 password=None,
                 proxy_user=None,
                 impersonate=False,
                 *args,
                 **kwargs):
        presto_conf = get_presto_connection_conf(connection_string)

        host = presto_conf.host
        port = 8080 if not presto_conf.port else presto_conf.port

        # default to querybook credentials if user/pwd is not supplied
        # we pass auth credentials through requests_kwargs instead of
        # using requests library's builtin auth to bypass the https requirement
        # and set the proper Authorization header
        req_kwargs = {}

        if username and password:
            auth = (username, password)
            if proxy_user and impersonate:
                auth = HTTPBasicAndProxyAuth(
                    auth,  # Basic Auth
                    (proxy_user,
                     "no pass"),  # proxy user auth, password not required
                )
            req_kwargs["auth"] = auth

        connection = presto.connect(
            host,
            port=port,
            username=proxy_user or username,
            catalog=presto_conf.catalog,
            schema=presto_conf.schema,
            source="querybook",
            protocol=presto_conf.protocol,
            requests_kwargs=req_kwargs,
        )
        self._connection = connection
        super(PrestoClient, self).__init__()
コード例 #19
0
ファイル: presto.py プロジェクト: jasonthomas/redash
    def run_query(self, query, user):
        connection = presto.connect(
            host=self.configuration.get("host", ""),
            port=self.configuration.get("port", 8080),
            protocol=self.configuration.get("protocol", "http"),
            username=self.configuration.get("username", "redash"),
            password=(self.configuration.get("password") or None),
            catalog=self.configuration.get("catalog", "hive"),
            schema=self.configuration.get("schema", "default"),
        )

        cursor = connection.cursor()

        try:
            cursor.execute(query)
            column_tuples = [
                (i[0], PRESTO_TYPES_MAPPING.get(i[1], None)) for i in cursor.description
            ]
            columns = self.fetch_columns(column_tuples)
            rows = [
                dict(zip(([column["name"] for column in columns]), r))
                for i, r in enumerate(cursor.fetchall())
            ]
            data = {"columns": columns, "rows": rows}
            json_data = json_dumps(data)
            error = None
        except DatabaseError as db:
            json_data = None
            default_message = "Unspecified DatabaseError: {0}".format(str(db))
            if isinstance(db.args[0], dict):
                message = db.args[0].get("failureInfo", {"message", None}).get(
                    "message"
                )
            else:
                message = None
            error = default_message if message is None else message
        except (KeyboardInterrupt, InterruptException, JobTimeoutException):
            cursor.cancel()
            raise

        return json_data, error
コード例 #20
0
    def connect(self, params={}):
        host = params.get('host')
        port = params.get('port', 8080)

        self.host = host
        self.port = port

        kwargs = {
            'host': host,
            'port': port,
            'username': params.get('username'),
            'catalog': params.get('catalog', 'hive'),
            'schema': params.get('schema', 'default'),
            'poll_interval': params.get('poll_interval', 1),
            'source': params.get('source', 'pyhive'),
        }

        self.cursor = presto.connect(**kwargs).cursor()
        self.logger.info("Connecting to %s:%s " % (host, port))

        self.timeout = 5
コード例 #21
0
    def connect(self, params={}):
        host = params.get("host")
        port = params.get("port", 8080)

        self.host = host
        self.port = port

        kwargs = {
            "host": host,
            "port": port,
            "username": params.get("username"),
            "catalog": params.get("catalog", "hive"),
            "schema": params.get("schema", "default"),
            "poll_interval": params.get("poll_interval", 1),
            "source": params.get("source", "pyhive"),
        }

        self.cursor = presto.connect(**kwargs).cursor()
        self.logger.info("Connecting to %s:%s " % (host, port))

        self.timeout = 5
コード例 #22
0
def handler(context, v3io_username, v3io_access_key, presto_api, presto_table):
    """
    Query data from KV table via Presto.
    """
    context.logger.info("Connecting to Presto")
    req_kw = {
        "auth": (v3io_username, v3io_access_key),
        "verify": False,
    }
    conn = presto.connect(
        presto_api,
        port=443,
        username=v3io_username,
        protocol="https",
        requests_kwargs=req_kw,
    )
    context.logger.info("Querying Presto")
    df = pd.read_sql_query(f"select * from {presto_table}", conn)

    context.logger.info("Logging Dataset")
    context.log_dataset("data", df=df, format="csv", index=False)
コード例 #23
0
    def run_query(self, query, user):
        connection = presto.connect(
            host=self.configuration.get('host', ''),
            port=self.configuration.get('port', 8080),
            username=self.configuration.get('username', 'redash'),
            catalog=self.configuration.get('catalog', 'hive'),
            schema=self.configuration.get('schema', 'default'))

        cursor = connection.cursor()

        try:
            cursor.execute(query)
            column_tuples = [(i[0], PRESTO_TYPES_MAPPING.get(i[1], None))
                             for i in cursor.description]
            columns = self.fetch_columns(column_tuples)
            rows = [
                dict(zip(([c['name'] for c in columns]), r))
                for i, r in enumerate(cursor.fetchall())
            ]
            data = {'columns': columns, 'rows': rows}
            json_data = json.dumps(data, cls=JSONEncoder)
            error = None
        except DatabaseError as db:
            json_data = None
            default_message = 'Unspecified DatabaseError: {0}'.format(
                db.message)
            message = db.message.get('failureInfo',
                                     {'message', None}).get('message')
            error = default_message if message is None else message
        except (KeyboardInterrupt, InterruptException) as e:
            cursor.cancel()
            error = "Query cancelled by user."
            json_data = None
        except Exception as ex:
            json_data = None
            error = ex.message
            if not isinstance(error, basestring):
                error = unicode(error)

        return json_data, error
コード例 #24
0
def plot_traffic_volume(ip):

    # Request the data
    req = "select ts, ibyt from netflow where sa = '{0}' or da = '{0}'".format(ip)
    cursor = presto.connect('localhost').cursor()
    cursor.execute(req)
    res = cursor.fetchall()

    # Pre-process for ploting
    data = pd.DataFrame(res,columns=["ts", "bytes"])
    data.index = pd.to_datetime(data.pop("ts"), unit="s")
    data = data.resample('H',how="sum")
    ix = pd.DatetimeIndex(start=data.index[0]-datetime.timedelta(0.5), end=data.index[-1]+datetime.timedelta(0.5), freq='H')
    data = data.reindex(ix)
    data.bytes = data.bytes.fillna(0)

    # Plot the data
    plt.figure()
    data.plot(lw=2)
    plt.grid(True)
    plt.savefig("traffic_volume_%s.png" % ip)
    plt.close()
コード例 #25
0
def create_kpi_df(**kwargs):
    today = kwargs['execution_date'] + timedelta(days=1)
    today = today.strftime("%Y-%m-%d")
    cursor = presto.connect('presto.smartnews.internal',
                            port=8081,
                            username=OWNER).cursor()
    cursor.execute(sql_kpi.format(execution_date=today))
    ret = cursor.fetchall()
    columns = [c[0] for c in cursor.description]
    kpi_df = pd.DataFrame(ret, columns=columns)
    #Creating KPI DF

    kpi_df['28DAU'] = kpi_df[['28DAU']].applymap(lambda x: '{:,}'.format(x))
    kpi_df['MHU'] = kpi_df[['MHU']].applymap(lambda x: '{:,}'.format(x))
    kpi_df['MHDAU'] = kpi_df[['MHDAU']].applymap(lambda x: '{:,}'.format(x))
    kpi_df['DAU'] = kpi_df[['DAU']].applymap(lambda x: '{:,}'.format(x))
    kpi_df['Installs'] = kpi_df[['Installs'
                                 ]].applymap(lambda x: '{:,}'.format(x))

    #Rendering HTML
    global s
    s = kpi_df.to_html(index=False, justify='left')
コード例 #26
0
ファイル: presto.py プロジェクト: vorakumar/redash
    def run_query(self, query):
        connection = presto.connect(
                host=self.configuration.get('host', ''),
                port=self.configuration.get('port', 8080),
                username=self.configuration.get('username', 'redash'),
                catalog=self.configuration.get('catalog', 'hive'),
                schema=self.configuration.get('schema', 'default'))

        cursor = connection.cursor()


        try:
            cursor.execute(query)
            column_tuples = [(i[0], PRESTO_TYPES_MAPPING.get(i[1], None)) for i in cursor.description]
            columns = self.fetch_columns(column_tuples)
            rows = [dict(zip(([c['name'] for c in columns]), r)) for i, r in enumerate(cursor.fetchall())]
            data = {'columns': columns, 'rows': rows}
            json_data = json.dumps(data, cls=JSONEncoder)
            error = None
        except Exception, ex:
            json_data = None
            error = ex.message
コード例 #27
0
ファイル: api.py プロジェクト: Tzeross/Navidog
    def __init__(self,
                 host,
                 port=8080,
                 username=None,
                 catalog='hive',
                 schema='default',
                 poll_interval=1,
                 source='pyhive',
                 session_props=None,
                 protocol='http',
                 password=None,
                 requests_session=None,
                 requests_kwargs=None,
                 **kwargs):
        conn = presto.connect(host, port, username, catalog, schema,
                              poll_interval, source, session_props, protocol,
                              password, requests_session, requests_kwargs,
                              **kwargs)

        cursor = conn.cursor()

        super(Presto, self).__init__(conn, cursor)
コード例 #28
0
ファイル: POP_DB.py プロジェクト: Sean-Chuang/item2item-exp
 def train(self):
     b_time = time.time()
     cursor = presto.connect('presto.smartnews.internal', 8081).cursor()
     query = f"""
         select 
             content_id,
             count(*) as count
         from hive_ad.z_seanchuang.i2i_offline_train_raw 
         where dt = '{config['dt']}'
           and content_id != ''
           and behavior_types = '{config['behavior']}'
         group by 1
         order by 2 desc
         limit {config['topN']}
     """
     print(query)
     cursor.execute(query)
     column_names = [desc[0] for desc in cursor.description]
     df = pd.DataFrame(cursor.fetchall(), columns=column_names)
     self.pop_items = df['content_id'].tolist()
     print(df)
     print("Train finished ... : ", time.time() - b_time)
コード例 #29
0
ファイル: prestohose.py プロジェクト: lowlowjack/personal
def worker():
    while True:
        item = q.get()
        if item is None:
            break
        cursor = presto.connect(host='HOST',
                                port=PORT,
                                protocol='https',
                                name='NAME',
                                password=password,
                                session_props={
                                    'query_max_run_time': timeout
                                }).cursor()

        try:
            cursor.execute(query.format(dbname, item, item + hosesize))
            i = pd.DataFrame(cursor.fetchall(),
                             columns=[i[0] for i in cursor.description])

            #Thread locking mechanism
            try:
                lock.acquire()
                output = pd.concat([output, i])
            except Exception as err:
                raise err
            finally:
                lock.release()

        except DatabaseError as err:
            if err.args[0].get('message').find('Query exceeded maximum') >= 0:
                q.put(item)
                lock.acquire()
                logger.update({})
                lock.release()
            else:
                raise
        else:
            raise
        q.task_done()
コード例 #30
0
def data_from_hive(sql, source):

    # 配置presto属性
    try:
        conn = presto.connect(host="180.150.189.61",
                              port=26623,
                              catalog="hive",
                              schema=source)
        start = time.time()
        print("数据库连接完成!")
        try:
            cursor = conn.cursor()
            print("数据提取中......")
            cursor.execute(sql)  # query
            print("数据提取完成,耗时{}\n".format(time.time() - start))
            return pd.DataFrame(
                cursor.fetchall(),
                columns=list(zip(*cursor.description))[0])  # return 数据 & 列名称
        finally:
            cursor.close()
    finally:
        conn.close()
コード例 #31
0
ファイル: sflow.py プロジェクト: necoma/n6-matatabi
    def RunQuery(params, **kwargs):
        parameters = []
        time_max = (datetime.utcnow() + timedelta(days=2)).strftime("%Y%m%d")
        time_min = (datetime.utcnow() - timedelta(days=20)).strftime("%Y%m%d")
        ip = None
        if 'time.max' in params.keys():
            time_max = params['time.max'][0].strftime("%Y%m%d")
        if 'time.min' in params.keys():
            time_min = params['time.min'][0].strftime("%Y%m%d")
        if 'ip' in params.keys():
            ip = params['ip']

        query = "select srcip,dstip,ipprotocol,tcpsrcport,udpsrcport,tcpdstport,udpdstport,unixsecondsutc from " + params['source'][0] 

        if time_max != '' or time_min != '' or ip is not None:
            query += " where "

        if time_max != '':
            query += " dt<=%s"
            parameters.append(time_max)
        if time_min != '':
            query += " and dt>=%s"
            parameters.append(time_min)
        if ip is not None:
            query += " and (srcip = " + " or srcip = ".join(["%s" for i in ip]) + " or dstip = " + " or dstip = ".join(["%s" for i in ip]) + ")"
            print query
            for j in [1,2]:
                for i in ip:
                    parameters.append(i)

        query += " order by dt"
        cursor = presto.connect('localhost').cursor()
        try:
            cursor.execute(query, parameters)
        except presto.DatabaseError as e:
            raise
        res = cursor.fetchall()
        return res
コード例 #32
0
def printData(fieldValues):

    if fieldValues != None:

        print "beginDSInfo"
        print """fileName;#;true
    csv_first_row_has_column_names;true;true;
    csv_separator;|;true
    csv_number_grouping;,;true
    csv_number_decimal;.;true
    csv_date_format;d.M.yyyy;true"""
        print ''.join(['host;', fieldValues[0], ';true'])
        print ''.join(['port;', fieldValues[1], ';true'])
        print ''.join(['catalog;', fieldValues[2], ';true'])
        print ''.join(['schema;', fieldValues[3], ';true'])
        print ''.join(['query;', fieldValues[4], ';true'])
        print "endDSInfo"
        print "beginData"
        try:
            cursor = presto.connect(host=fieldValues[0],
                                    port=fieldValues[1],
                                    catalog=fieldValues[2],
                                    schema=fieldValues[3]).cursor()
            cursor.execute(fieldValues[4])

            columns = [i[0] for i in cursor.description]
            print ', '.join(columns)

            for values in cursor.fetchall():
                print ', '.join(
                    str(value).replace(',', '.') for value in values)

        except Exception, e:
            easygui.msgbox('failed because of %s' % e.message)
            print "Error"
            print "Failed"

        print("endData")
コード例 #33
0
def execute_presto(sql, engine):
    if engine is None:
        if 'PRESTO_ENGINE' in os.environ:
            engine = os.environ['PRESTO_ENGINE']
        else:
            engine = "sn"

    log.info('executing...')
    addr = ENGINES[engine]
    p = presto.connect(addr['host'],
                       port=addr['port'],
                       catalog='hive_ad',
                       username='******').cursor()
    try:
        p.execute(sql)
        d = p.fetchall()
        df = DataFrame(d)
        log.info('done')
        if len(d) != 0:
            df.columns = map(lambda x: x['name'], p._columns)
        return df
    except presto.DatabaseError as e:
        log.exception(e)
コード例 #34
0
ファイル: presto2csv.py プロジェクト: HuJiawei1990/hive2es
def presto_connector(host='localhost',
                     port=8089,
                     catalog='elasticsearch',
                     schema='default',
                     *args,
                     **kwargs):
    """
    presto connector API
    :param host:
    :param port:
    :param catalog:
    :param schema:
    :param args:
    :param kwargs:
    :return:
    """
    logger.info("Connecting to PRESTO server [{}:{}/{}/{}]".format(
        host, port, catalog, schema))
    return presto.connect(host=host,
                          port=port,
                          catalog=catalog,
                          schema=schema,
                          *args,
                          **kwargs).cursor()
コード例 #35
0
def execute_sql(sql):

    host = '172.17.2.55'
    p_port = u'9090'
    p_username = '******'

    conn = presto.connect(host, port=p_port, username=p_username)

    cursor = conn.cursor()

    cursor.execute(sql)

    columns = [col[0] for col in cursor.description]

    ## result = [dict(zip(columns,row)) for row in cursor.fetchall()]

    row = cursor.fetchall()

    row = list(row)
    row = [list(i) for i in row]

    result = DataFrame(row, columns=columns)

    return result
コード例 #36
0
 def train(self):
     b_time = time.time()
     cursor = presto.connect('presto.smartnews.internal', 8081).cursor()
     #query = f"select * from hive_ad.z_seanchuang.i2i_offline_item_topk_items where dt='{self.dt}'"
     query = f"""
         with test_item as (
             select 
                 distinct content_id as item
             from hive_ad.z_seanchuang.i2i_offline_test_raw
             where dt = '{self.dt}'
         )
         select 
             all.* 
         from hive_ad.z_seanchuang.i2i_offline_item_topk_items all
         inner join test_item test
             on all.item = test.item
               and all.dt='{self.dt}'
     """
     cursor.execute(query)
     column_names = [desc[0] for desc in cursor.description]
     df = pd.DataFrame(cursor.fetchall(), columns=column_names)
     self.items_similar = df.set_index('item')['topk_json'].to_dict(
         defaultdict(list))
     print("Train finished ... : ", time.time() - b_time)
コード例 #37
0
ファイル: test_presto.py プロジェクト: ptallada/PyHive
 def test_bad_protocol(self):
     self.assertRaisesRegexp(ValueError, 'Protocol must be',
                             lambda: presto.connect('localhost', protocol='nonsense').cursor())
コード例 #38
0
ファイル: test_presto.py プロジェクト: ptallada/PyHive
 def test_invalid_protocol_config(self):
     """protocol should be https when passing password"""
     self.assertRaisesRegexp(
         ValueError, 'Protocol.*https.*password', lambda: presto.connect(
             host=_HOST, username='******', password='******', protocol='http').cursor()
     )
コード例 #39
0
ファイル: run_Presto.py プロジェクト: kevin868/SQLonS3
    t0 = time.time()
    result = cursor.fetchall()
    delta_t = time.time() - t0
    if (len(result) == 0):
        return ("Null", t0, delta_t)
    else:
        return (str(result[0]), t0, delta_t)

#Write to file
#with open("test.txt", "a") as myfile:
#        myfile.write(getTimeString(t0) + "," + str(query) + "," + str(result[0]) + "\n")

##Result is a 2-tuple, with time taken and TECHNOLOGY+NUM WORKER NODES (Presto6)
def appendResultToFile(filename, startTime, queryID, year, timeTaken, result):
    outString = "{},{},{},{:.2f},{},{}\n".format(getTimeString(time.time()), queryID, year, timeTaken, result, "Presto6")
    with open(filename, "a") as myfile:
        myfile.write(outString)
    print(outString)


cursor = presto.connect('localhost').cursor()
for year in [2007,2009,2012]:
    for queryID in range(1,5):
        #Repeat 2 times
        for i in range(1,4):
            result, t0, time_taken = runTimeQuery(year, mapQuery[queryID])
            appendResultToFile("PrestoQueries.txt", t0, queryID, year, time_taken, result)



コード例 #40
0
ファイル: test_presto.py プロジェクト: kirklg/PyHive
 def connect(self):
     return presto.connect(host=_HOST, source=self.id())
コード例 #41
0
 def test_bad_protocol(self):
     self.assertRaisesRegexp(ValueError, 'Protocol must be',
                             lambda: presto.connect('localhost', protocol='nonsense').cursor())
コード例 #42
0
from string import Template
#import credentials
from pyhive import presto

# conn_string = credentials.getRedshiftCredentials()
print 'Connecting to database\n	->%s' % ('"presto.ophan.co.uk", port=8889, catalog="hive", schema="temp_ah"')
conn = presto.connect("presto.ophan.co.uk", port=8889, catalog="hive", schema="temp_ah")
print 'Connected'

def presto_query(conn, query):
    cur = conn.cursor()
    cur.execute(query)
    cols = [column[0] for column in cur.description]
    fetch = cur.fetchall()
    results = []
    for row in fetch:
        results.append(dict(zip(cols, row)))
    return results

# define functions to execute sql queries 
# Series and episodes published last 30 days
def series_published_sql():
	return presto_query(conn, """
	SELECT publication_date,
	       section,
	       COUNT(series_url) AS series_published_to,
	       SUM(episodes_published_per_series) AS episodes_published
	FROM (SELECT publication_date,
	             series_url,
	             section,
	             COUNT(episode_url) AS episodes_published_per_series
コード例 #43
0
ファイル: test_presto.py プロジェクト: kaushikd49/PyHive
 def connect(self):
     return presto.connect(host=_HOST, source=self.id())
コード例 #44
0
from decimal import Decimal
from collections import OrderedDict
from enum import Enum, unique

import boto3
from boto3.dynamodb.conditions import Key

import json
from json import encoder
encoder.FLOAT_REPR = lambda o: format(o, '.4f')

logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s',
                    level=logging.INFO)
log = logging.getLogger(__name__)

cursor = presto.connect('presto.smartnews.internal', 8081).cursor()
ddb_client = boto3.client('dynamodb')
dynamodb = boto3.resource('dynamodb', region_name='ap-northeast-1')


# enum
@unique
class Action(Enum):
    # View = "View"
    # AddToCart = "AddToCart"
    # Purchase = "Purchase"
    View = "ViewContent"
    AddToCart = "AddToCart"
    Purchase = "revenue"

コード例 #45
0
ファイル: prestohose.py プロジェクト: lowlowjack/personal
query = '''with a as (select
            data,
            row_number() over (range unbounded preceding) as rn
        from
            {} )
        select
            data
            rn
        from   
            a where rn>={} and rn<{}
        '''
cursor = presto.connect(host='HOST',
                        port=PORT,
                        protocol='https',
                        name='NAME',
                        password=password,
                        session_props={
                            'query_max_run_time': timeout
                        }).cursor()

#Basic query check.
try:
    cursor.execute(query.format(dbname, 0, 100000))
    cursor.fetchall()
except DatabaseError as err:
    if err.args[0].get('message').find('Query exceeded maximum') >= 0:
        pass
    else:
        raise
else:
    raise
コード例 #46
0
def presto_query(query):
    import pandas as pd
    from pyhive import presto
    conn = presto.connect(host='presto-server', port=8080)
    return pd.read_sql_query(query, conn, parse_dates=['timestamp'])
コード例 #47
0
import pandas as pd
import sqlalchemy
from pyhive import presto

query = '''
SELECT wbanno,
       min(temperature) t_min,
       avg(temperature) t_avg,
       max(temperature) t_max
FROM uscrn GROUP BY 1
'''
conn = presto.connect(host='presto-server', port=8080)
features = pd.read_sql_query(query, conn)
print(features)

uri = "postgresql://*****:*****@datamart:5432/datamart"
engine = sqlalchemy.create_engine(uri)
features.to_sql('temperature_features',
                index=False,
                if_exists='replace',
                con=engine)
print('temperature_features created')
コード例 #48
0
ファイル: write-mongo.py プロジェクト: JohnatanSantana/Go
banco = client['john']
collection = banco['nba']
# collection = banco.album

# ELASTIC
es = Elasticsearch(hosts= server)

# REDIS

r = redis.Redis(
    host=server,
    port=6379)


#CONNECT PRESTO
connect = presto.connect(host=server,port=8080)

def read_presto():
    df = pd.read_sql_query('select * from elastic.default.john',connect)
    print(df.head())

def write_mongo(data_dict):
    try:
        data_id = collection.insert_many(data_dict)
        print(data_id)
    except NameError:
        print(NameError)


def read_csv(path):
    data = pd.read_csv(path)
コード例 #49
0
def get_presto_connection(attemps=3):
    return presto.connect(host=HOST, port=DB_PORT)
コード例 #50
0
 def query_age_price(self):
     conn = presto.connect(**PRESTO_SERVER)
     cursor = conn.cursor()
     cursor.execute(AGE_PRICE_QUERY)
     tuples = cursor.fetchall()
     return tuples
コード例 #51
0
def get_connection():
    return presto.connect(**presto_config)
コード例 #52
0
ファイル: connect_hive.py プロジェクト: atbraoy/data_mining
#!/usr/bin/env python

import logging
logging.basicConfig(level=logging.DEBUG)

# from pyhive import hive
# conn = hive.Connection(host="localhost", port=9083, username="******")

from pyhive import presto

cursor = presto.connect(host="localhost", port=10000, username="******").cursor()
sql = 'select * from default limit 10'
cursor.execute(sql)

print(cursor.fetchone())
print(cursor.fetchall())

コード例 #53
0
 def test_invalid_protocol_config(self):
     """protocol should be https when passing password"""
     self.assertRaisesRegexp(
         ValueError, 'Protocol.*https.*password', lambda: presto.connect(
             host=_HOST, username='******', password='******', protocol='http').cursor()
     )