def pyhive_test(): from pyhive import presto # 通过presto访问hive, presto也支持rest api访问 conn = presto.Connection(host='192.168.11.127', port=30890) cursor = conn.cursor() # sql_str = 'create table user_product(id INTEGER);' cursor.execute('select * from day_result') # cursor.execute(sql_str) result = cursor.fetchall() print(result)
def create_talbe(): # 可以执行的 CREATE TABLE if not exists day_result(id INT, datestr STRING, region_id INT, profile_id STRING, user_capture_days INT, age INT, gender STRING, lable ARRAY<STRING>, vip INT); # CREATE TABLE if not exists day_result(datestr STRING, region_id INT, profile_id STRING, user_capture_days INT, age INT, gender STRING, lable ARRAY<STRING>, vip INT); sql_str = 'CREATE TABLE if not exists dayresult(id INT PRIMARY KEY AUTO_INCREMENT, datestr STRING, ' \ 'region_id INT, profile_id STRING, user_capture_days INT, age INT, gender STRING, lable ARRAY<STRING>, vip INT);' # sql_str = 'create table user_product(id INTEGER PRIMARY KEY AUTO_INCREMENT, userid INTEGER not null, productid INTEGER not null, type varchar(64) not null);' conn = presto.Connection(host=HOST, port=PORT) cursor = conn.cursor() ret = cursor.execute(sql_str) print(ret) ret = cursor.fetchall() print(ret) cursor.close() conn.close()
from pyhive import presto from kafka import KafkaProducer import logging logging.basicConfig(level=logging.DEBUG) # default_args = {'email': ['*****@*****.**'], # 'email_on_failure': True, # 'email_on_retry': True} #from airflow.models import Variable # producer = KafkaProducer(bootstrap_servers=json.loads(Variable.get('KafkaServer')), acks='all', request_timeout_ms=10000, # api_version=(0, 10, 1)) producer = KafkaProducer(bootstrap_servers=['dev-kafka1.oyorooms.ms:9092'], acks='all', request_timeout_ms=10000, api_version=(0, 10, 1)) conn = presto.Connection(host="presto.oyorooms.io", port=8889) dag = DAG('hotel_generator_availability_data', description= 'hotel_generator_availability_data from Hive to ranking service', schedule_interval='0 12 * * *', start_date=datetime(2017, 3, 20), catchup=False) import os path = os.getcwd() file = os.path.join(path, 'Airflow/dags/queries/generatordata.hql') #################generator################ def get_hive_data_generator(**kwargs): with open(file) as f:
def get_presto_connection(host, port): return presto.Connection(host=host, port=port)
def create_conn_pyhive(self): conn = presto.Connection(host=self.host, port=self.port, username=self.username, schema=self.schema) return conn
def connect(self, host=HOST, port=PORT): self.conn = presto.Connection(host=host, port=port) self.cursor = self.conn.cursor()