def open(self): """Establishes a connection to the druid server.""" self._conn = connect(host=self._host, port=self._port, path=self._path, scheme=self._scheme) self._curs = self._conn.cursor() if not self._sql: # build the sql from our parameters self._sql = "select * from %s where " % (self._table) if self._begin_time: self._sql += "__time >= time_parse('%s') and " % \ (self.format_unix_epoch(self._begin_time)) if self._end_time: self._sql += "__time <= time_parse('%s') and " % \ (self.format_unix_epoch(self._end_time)) for item in self._exclude_list: self._sql += "%s <> '%s' and " % \ (self._exclude_key, item) if self._where: self._sql += " (" + self._where + ") and " if self._sql[-4:] == "and ": # strip off the trailing and self._sql = self._sql[:-4] if self._limit: self._sql = self._sql + "limit %d " % (self._limit) self._curs.execute(self._sql)
def run_query(self, query, user): connection = connect( host=self.configuration["host"], port=self.configuration["port"], path="/druid/v2/sql/", scheme=(self.configuration.get("scheme") or "http"), user=(self.configuration.get("user") or None), password=(self.configuration.get("password") or None), ) cursor = connection.cursor() try: cursor.execute(query) columns = self.fetch_columns([(i[0], TYPES_MAP.get(i[1], None)) for i in cursor.description]) rows = [ dict(zip((column["name"] for column in columns), row)) for row in cursor ] data = {"columns": columns, "rows": rows} error = None json_data = json_dumps(data) print(json_data) finally: connection.close() return json_data, error
def run_query(self, query, user): connection = connect(host=self.configuration['host'], port=self.configuration['port'], path='/druid/v2/sql/', scheme=(self.configuration.get('scheme') or 'http'), user=(self.configuration.get('user') or None), password=(self.configuration.get('password') or None)) cursor = connection.cursor() try: cursor.execute(query) columns = self.fetch_columns([(i[0], TYPES_MAP.get(i[1], None)) for i in cursor.description]) rows = [ dict(zip((column['name'] for column in columns), row)) for row in cursor ] data = {'columns': columns, 'rows': rows} error = None json_data = json_dumps(data) print(json_data) finally: connection.close() return json_data, error
def run_sql_query(self, query, context, user): #context = {"useApproximateCountDistinct": False} connection = connect( host=self.configuration["host"], port=self.configuration["port"], path="/druid/v2/sql/", scheme=(self.configuration.get("scheme") or "http"), user=(self.configuration.get("user") or None), password=(self.configuration.get("password") or None), context=context, ) cursor = connection.cursor() try: cursor.execute(query) if cursor.description is not None: columns = self.fetch_columns( [(i[0], TYPES_MAP.get(i[1], None)) for i in cursor.description] ) rows = [ dict(zip((column["name"] for column in columns), row)) for row in cursor ] data = {"columns": columns, "rows": rows} error = None #json_data = json_dumps(data) #print(json_data) else: data = {"columns": [], "rows": []} error = None #如果结果就是没数据,那么不返会错误 finally: connection.close() return data, error
def _connect(self): from pydruid.db import connect logger.info('Connecting to Druid database ...') self.__druid = connect(self.host, self.port, path='/druid/v2/sql/', scheme='http') if self.username or self.password: logger.warning( 'Duct username and password not passed to pydruid connection. ' 'pydruid connection currently does not allow these fields to be passed.' )
def _connect(self): from pydruid.db import connect logger.info('Connecting to Druid database ...') self.__druid = connect(self.host, self.port, path='/druid/v2/sql/', scheme='http') if self.username or self.password: logger.warning( 'Duct username and passowrd not passed to pydruid connection. ' 'pydruid connection currently does not allow these fields to be passed.' )
def get_conn(self): """ Establish a connection to druid broker. """ conn = self.get_connection(self.druid_broker_conn_id) druid_broker_conn = connect( host=conn.host, port=conn.port, path=conn.extra_dejson.get('endpoint', '/druid/v2/sql'), scheme=conn.extra_dejson.get('schema', 'http')) self.log.info('Get the connection to druid broker on %s', conn.host) return druid_broker_conn
def get_conn(self) -> connect: """Establish a connection to druid broker.""" conn = self.get_connection(getattr(self, self.conn_name_attr)) druid_broker_conn = connect( host=conn.host, port=conn.port, path=conn.extra_dejson.get('endpoint', '/druid/v2/sql'), scheme=conn.extra_dejson.get('schema', 'http'), user=conn.login, password=conn.password, ) self.log.info('Get the connection to druid broker on %s using user %s', conn.host, conn.login) return druid_broker_conn
def get_conn(self): """ Establish a connection to druid broker. """ conn = self.get_connection(self.druid_broker_conn_id) druid_broker_conn = connect( host=conn.host, port=conn.port, path=conn.extra_dejson.get('endpoint', '/druid/v2/sql'), scheme=conn.extra_dejson.get('schema', 'http') ) self.log.info('Get the connection to druid broker on %s', conn.host) return druid_broker_conn
def query_sql(): conn = connect(host='druid-api.taoche.com', port=80, path='/druid/v2/sql/', scheme='http') curs = conn.cursor() curs.execute(""" SELECT distinct car_id,series_id,spec_id, car_age,mileage,sale_price,data_source, sale_status,guide_price, sale_date, publish_date FROM alg_car_price_detail WHERE __time >= CURRENT_TIMESTAMP - INTERVAL '2' DAY and data_source='guazi' and series_id=1825 and mileage <10 and car_age < 8 and TIME_PARSE(publish_date) >= timestamp '2019-01-01 00:00:00' limit 20 """) for row in curs: print(row[0], row[1], row[2], row[3], row[4], row[5], row[6], row[7], row[8])
def get_conn(self): """ Establish a connection to druid broker. """ conn = self.get_connection(self.druid_broker_conn_id) # pylint: disable=no-member druid_broker_conn = connect( host=conn.host, port=conn.port, path=conn.extra_dejson.get('endpoint', '/druid/v2/sql'), scheme=conn.extra_dejson.get('schema', 'http'), user=conn.login, password=conn.password) self.log.info('Get the connection to druid broker on %s using user %s', conn.host, conn.login) return druid_broker_conn
def main(argv): DRUID_HOST = os.environ.get("DRUID_HOST", "localhost") try: DRUID_PORT = int(os.environ.get("DRUID_PORT", "8888")) except ValueError: DRUID_PORT = 8888 DRUID_URL = f"http://{DRUID_HOST}:{DRUID_PORT}" bucket = os.environ.get("S3_BUCKET") if not bucket: print("You must define S3_BUCKET in your environoment.") exit(1) account = None source_uuid = None try: opts, _ = getopt.getopt(argv,"ha:s:",["account=","source="]) except getopt.GetoptError: print(f"{SCRIPT_NAME} -a < account_id > -s < source_uuid >") sys.exit(2) for opt, arg in opts: if opt == "-h": print (f"{SCRIPT_NAME} -a <account_id> -s <source_uuid>") sys.exit() elif opt in ("-a", "--account"): account = arg elif opt in ("-s", "--source"): source_uuid = arg if not account or not source_uuid: print("You must provide both an account and source_uuid. See -h for more help.") exit(1) datasource_name = f"aws_data_{account}_{source_uuid.replace('-', '_')}" conn = connect(host=DRUID_HOST, port=DRUID_PORT, path='/druid/v2/sql/', scheme='http') curs = conn.cursor() curs.execute(""" SELECT "__time", "bill/BillType", "bill/BillingEntity", "bill/BillingPeriodStartDate", "count", "identity/LineItemId", "identity/TimeInterval", "lineItem/AvailabilityZone", "lineItem/CurrencyCode", "lineItem/LineItemDescription", "lineItem/LineItemType", "lineItem/Operation", "lineItem/ProductCode", "lineItem/ResourceId", "lineItem/UsageEndDate", "lineItem/UsageStartDate", "lineItem/UsageType", "pricing/term", "pricing/unit", "product/ProductName", "product/clockSpeed", "product/currentGeneration", "product/enhancedNetworkingSupported", "product/instanceFamily", "product/instanceType", "product/licenseModel", "product/location", "product/locationType", "product/memory", "product/networkPerformance", "product/operatingSystem", "product/operation", "product/physicalProcessor", "product/preInstalledSw", "product/processorArchitecture", "product/processorFeatures", "product/productFamily", "product/region", "product/servicecode", "product/sku", "product/storage", "product/tenancy", "product/usagetype", "resourceTags/user:version", "sum_bill/PayerAccountId", "sum_lineItem/UsageAccountId", "sum_lineItem/UsageAmount", "sum_product/ecu", "sum_product/vcpu" FROM "aws_data_10001_50e9fa68_6dba_43c6_9b91_26eb1ab2e860" WHERE "__time" >= CURRENT_TIMESTAMP - INTERVAL '1' DAY LIMIT 10 """) for row in curs: print(row)
def run_query(self, query, user): connection = connect(host=self.configuration['host'], port=self.configuration['port'], path='/druid/v2/sql/', scheme=self.configuration['scheme']) cursor = connection.cursor() try: cursor.execute(query) columns = self.fetch_columns([(i[0], TYPES_MAP.get(i[1], None)) for i in cursor.description]) rows = [dict(zip((c['name'] for c in columns), row)) for row in cursor] data = {'columns': columns, 'rows': rows} error = None json_data = json_dumps(data) print(json_data) finally: connection.close() return json_data, error
f.close() initial_size = get_size() initial_time = get_time() import_task = subprocess.Popen([ args.import_script, "--file", args.import_config, "--url", "http://localhost:8081" ], stdout=subprocess.DEVNULL) import_task.wait() final_time = get_time() final_size = get_size() initial_time_udf = get_time() conn = connect(host="localhost", port=8082, path="/druid/v2/sql", scheme="http") curs = conn.cursor() select_format = "(dim{0} - (SELECT AVG(dim{0}) FROM master)) / (SELECT STDDEV_POP(dim{0}) FROM master)" select_expression = select_format.format(0) for i in range(1, columns): select_expression = select_expression + ", " + select_format.format( i) initial_time_udf = get_time() curs.execute("SELECT " + select_expression + " FROM master") final_time_udf = get_time() print("Terminating druid") druid.terminate() druid.wait()
distance_exp = distance_exp + " + " + distance_format.format(i) initial_size = get_size() initial_time = get_time() import_task = subprocess.Popen([ args.import_script, "--file", args.import_config, "--url", "http://localhost:8081" ], stdout=subprocess.DEVNULL) import_task.wait() final_time = get_time() final_size = get_size() initial_time_udf = get_time() conn = connect(host='localhost', port=8082, path='/druid/v2/sql/', scheme='http') curs = conn.cursor() curs.execute("SELECT SQRT(" + distance_exp + ") FROM master") final_time_udf = get_time() print("Terminating druid") druid.terminate() druid.wait() print("*" * 100) print("(lines, columns) =", (lines, columns)) print("Insert time:", final_time - initial_time) print("Total size (bytes):", final_size - initial_size) print("Total size (MB):", (final_size - initial_size) / 1024.0 / 1024.0)
def _druid_get_connection(): # when are cursor closed? return connect(host='192.168.8.1', port=8082, path='/druid/v2/sql/', scheme='http')
# pip install pydruid # # or, if you intend to use asynchronous client # pip install pydruid[async] # # or, if you intend to export query results into pandas # pip install pydruid[pandas] # # or, if you intend to do both # pip install pydruid[async, pandas] # # or, if you want to use the SQLAlchemy engine # pip install pydruid[sqlalchemy] # # or, if you want to use the CLI # pip install pydruid[cli] from pydruid.client import * from pylab import plt from pydruid.db import connect conn = connect(host='192.168.11.127', port=32666, path='/druid/v2/sql/', scheme='http') curs = conn.cursor() curs.execute(""" SELECT place, CAST(REGEXP_EXTRACT(place, '(.*),', 1) AS FLOAT) AS lat, CAST(REGEXP_EXTRACT(place, ',(.*)', 1) AS FLOAT) AS lon FROM places LIMIT 10 """) for row in curs: print(row)
if mileage < 1.5: lower_mile = 0 else: lower_mile = mileage - mile_diff print("lower_mile and upper_mile are:", lower_mile, upper_mile) return lower_age, upper_age, lower_mile, upper_mile if __name__ == '__main__': # shop_id, province_id, spec_name, mileage, reg_date, price, spec_id, car_age data_f = pd.read_excel(r'/Users/pujie/Desktop/selling_190527.xlsx', header=None) the_result = [] kkk = 0 conn = connect(host='druid-api.taoche.com', port=80, path='/druid/v2/sql/', scheme='http') for i in range(len(data_f)): listt = [] shop_id = data_f.iat[i, 0] province_id = str(data_f.iat[i, 1]) spec_name = data_f.iat[i, 2] mileage = float(data_f.iat[i, 3]) price = float(data_f.iat[i, 5]) reg_date = data_f.iat[i, 4] spec_id = str(int(data_f.iat[i, 6])) car_age = data_f.iat[i, 7] sell_days = data_f.iat[i, 8] print("This car's info is :", car_age, mileage) low_age, up_age, low_mile, up_mile = 0, 0, 0, 0