Ejemplo n.º 1
0
    def open(self):
        """Establishes a connection to the druid server."""
        self._conn = connect(host=self._host,
                             port=self._port,
                             path=self._path,
                             scheme=self._scheme)
        self._curs = self._conn.cursor()

        if not self._sql:
            # build the sql from our parameters
            self._sql = "select * from %s where " % (self._table)

            if self._begin_time:
                self._sql += "__time >= time_parse('%s') and " % \
                    (self.format_unix_epoch(self._begin_time))

            if self._end_time:
                self._sql += "__time <= time_parse('%s') and " % \
                    (self.format_unix_epoch(self._end_time))

            for item in self._exclude_list:
                self._sql += "%s <> '%s' and " % \
                    (self._exclude_key, item)

            if self._where:
                self._sql += " (" + self._where + ") and "

            if self._sql[-4:] == "and ":  # strip off the trailing and
                self._sql = self._sql[:-4]

            if self._limit:
                self._sql = self._sql + "limit %d " % (self._limit)

        self._curs.execute(self._sql)
Ejemplo n.º 2
0
    def run_query(self, query, user):
        connection = connect(
            host=self.configuration["host"],
            port=self.configuration["port"],
            path="/druid/v2/sql/",
            scheme=(self.configuration.get("scheme") or "http"),
            user=(self.configuration.get("user") or None),
            password=(self.configuration.get("password") or None),
        )

        cursor = connection.cursor()

        try:
            cursor.execute(query)
            columns = self.fetch_columns([(i[0], TYPES_MAP.get(i[1], None))
                                          for i in cursor.description])
            rows = [
                dict(zip((column["name"] for column in columns), row))
                for row in cursor
            ]

            data = {"columns": columns, "rows": rows}
            error = None
            json_data = json_dumps(data)
            print(json_data)
        finally:
            connection.close()

        return json_data, error
Ejemplo n.º 3
0
    def run_query(self, query, user):
        connection = connect(host=self.configuration['host'],
                             port=self.configuration['port'],
                             path='/druid/v2/sql/',
                             scheme=(self.configuration.get('scheme')
                                     or 'http'),
                             user=(self.configuration.get('user') or None),
                             password=(self.configuration.get('password')
                                       or None))

        cursor = connection.cursor()

        try:
            cursor.execute(query)
            columns = self.fetch_columns([(i[0], TYPES_MAP.get(i[1], None))
                                          for i in cursor.description])
            rows = [
                dict(zip((column['name'] for column in columns), row))
                for row in cursor
            ]

            data = {'columns': columns, 'rows': rows}
            error = None
            json_data = json_dumps(data)
            print(json_data)
        finally:
            connection.close()

        return json_data, error
Ejemplo n.º 4
0
Archivo: druid.py Proyecto: CFHH/redash
    def run_sql_query(self, query, context, user):
        #context = {"useApproximateCountDistinct": False}
        connection = connect(
            host=self.configuration["host"],
            port=self.configuration["port"],
            path="/druid/v2/sql/",
            scheme=(self.configuration.get("scheme") or "http"),
            user=(self.configuration.get("user") or None),
            password=(self.configuration.get("password") or None),
            context=context,
        )

        cursor = connection.cursor()

        try:
            cursor.execute(query)
            if cursor.description is not None:
                columns = self.fetch_columns(
                    [(i[0], TYPES_MAP.get(i[1], None)) for i in cursor.description]
                )
                rows = [
                    dict(zip((column["name"] for column in columns), row)) for row in cursor
                ]
                data = {"columns": columns, "rows": rows}
                error = None
                #json_data = json_dumps(data)
                #print(json_data)
            else:
                data = {"columns": [], "rows": []}
                error = None #如果结果就是没数据,那么不返会错误
        finally:
            connection.close()

        return data, error
Ejemplo n.º 5
0
 def _connect(self):
     from pydruid.db import connect
     logger.info('Connecting to Druid database ...')
     self.__druid = connect(self.host, self.port, path='/druid/v2/sql/', scheme='http')
     if self.username or self.password:
         logger.warning(
             'Duct username and password not passed to pydruid connection. '
             'pydruid connection currently does not allow these fields to be passed.'
         )
Ejemplo n.º 6
0
 def _connect(self):
     from pydruid.db import connect
     logger.info('Connecting to Druid database ...')
     self.__druid = connect(self.host, self.port, path='/druid/v2/sql/', scheme='http')
     if self.username or self.password:
         logger.warning(
             'Duct username and passowrd not passed to pydruid connection. '
             'pydruid connection currently does not allow these fields to be passed.'
         )
Ejemplo n.º 7
0
 def get_conn(self):
     """
     Establish a connection to druid broker.
     """
     conn = self.get_connection(self.druid_broker_conn_id)
     druid_broker_conn = connect(
         host=conn.host,
         port=conn.port,
         path=conn.extra_dejson.get('endpoint', '/druid/v2/sql'),
         scheme=conn.extra_dejson.get('schema', 'http'))
     self.log.info('Get the connection to druid broker on %s', conn.host)
     return druid_broker_conn
Ejemplo n.º 8
0
 def get_conn(self) -> connect:
     """Establish a connection to druid broker."""
     conn = self.get_connection(getattr(self, self.conn_name_attr))
     druid_broker_conn = connect(
         host=conn.host,
         port=conn.port,
         path=conn.extra_dejson.get('endpoint', '/druid/v2/sql'),
         scheme=conn.extra_dejson.get('schema', 'http'),
         user=conn.login,
         password=conn.password,
     )
     self.log.info('Get the connection to druid broker on %s using user %s', conn.host, conn.login)
     return druid_broker_conn
Ejemplo n.º 9
0
 def get_conn(self):
     """
     Establish a connection to druid broker.
     """
     conn = self.get_connection(self.druid_broker_conn_id)
     druid_broker_conn = connect(
         host=conn.host,
         port=conn.port,
         path=conn.extra_dejson.get('endpoint', '/druid/v2/sql'),
         scheme=conn.extra_dejson.get('schema', 'http')
     )
     self.log.info('Get the connection to druid broker on %s', conn.host)
     return druid_broker_conn
Ejemplo n.º 10
0
def query_sql():
    conn = connect(host='druid-api.taoche.com',
                   port=80,
                   path='/druid/v2/sql/',
                   scheme='http')
    curs = conn.cursor()
    curs.execute("""
    SELECT distinct car_id,series_id,spec_id, car_age,mileage,sale_price,data_source, sale_status,guide_price,
    sale_date, publish_date FROM alg_car_price_detail
    WHERE __time >= CURRENT_TIMESTAMP - INTERVAL '2' DAY  and data_source='guazi' and series_id=1825
    and mileage <10 and car_age < 8 and TIME_PARSE(publish_date) >= timestamp '2019-01-01 00:00:00' limit 20
    """)
    for row in curs:
        print(row[0], row[1], row[2], row[3], row[4], row[5], row[6], row[7],
              row[8])
Ejemplo n.º 11
0
 def get_conn(self):
     """
     Establish a connection to druid broker.
     """
     conn = self.get_connection(self.druid_broker_conn_id)  # pylint: disable=no-member
     druid_broker_conn = connect(
         host=conn.host,
         port=conn.port,
         path=conn.extra_dejson.get('endpoint', '/druid/v2/sql'),
         scheme=conn.extra_dejson.get('schema', 'http'),
         user=conn.login,
         password=conn.password)
     self.log.info('Get the connection to druid broker on %s using user %s',
                   conn.host, conn.login)
     return druid_broker_conn
Ejemplo n.º 12
0
def main(argv):
    DRUID_HOST = os.environ.get("DRUID_HOST", "localhost")
    try:
        DRUID_PORT = int(os.environ.get("DRUID_PORT", "8888"))
    except ValueError:
        DRUID_PORT = 8888
    DRUID_URL = f"http://{DRUID_HOST}:{DRUID_PORT}"

    bucket = os.environ.get("S3_BUCKET")
    if not bucket:
        print("You must define S3_BUCKET in your environoment.")
        exit(1)

    account = None
    source_uuid = None
    try:
        opts, _ = getopt.getopt(argv,"ha:s:",["account=","source="])
    except getopt.GetoptError:
        print(f"{SCRIPT_NAME} -a < account_id > -s < source_uuid >")
        sys.exit(2)
    for opt, arg in opts:
        if opt == "-h":
            print (f"{SCRIPT_NAME} -a <account_id> -s <source_uuid>")
            sys.exit()
        elif opt in ("-a", "--account"):
            account = arg
        elif opt in ("-s", "--source"):
            source_uuid = arg

    if not account or not source_uuid:
        print("You must provide both an account and source_uuid. See -h for more help.")
        exit(1)

    datasource_name = f"aws_data_{account}_{source_uuid.replace('-', '_')}"

    conn = connect(host=DRUID_HOST, port=DRUID_PORT,
                path='/druid/v2/sql/', scheme='http')
    curs = conn.cursor()
    curs.execute("""
        SELECT "__time", "bill/BillType", "bill/BillingEntity", "bill/BillingPeriodStartDate", "count", "identity/LineItemId", "identity/TimeInterval", "lineItem/AvailabilityZone", "lineItem/CurrencyCode", "lineItem/LineItemDescription", "lineItem/LineItemType", "lineItem/Operation", "lineItem/ProductCode", "lineItem/ResourceId", "lineItem/UsageEndDate", "lineItem/UsageStartDate", "lineItem/UsageType", "pricing/term", "pricing/unit", "product/ProductName", "product/clockSpeed", "product/currentGeneration", "product/enhancedNetworkingSupported", "product/instanceFamily", "product/instanceType", "product/licenseModel", "product/location", "product/locationType", "product/memory", "product/networkPerformance", "product/operatingSystem", "product/operation", "product/physicalProcessor", "product/preInstalledSw", "product/processorArchitecture", "product/processorFeatures", "product/productFamily", "product/region", "product/servicecode", "product/sku", "product/storage", "product/tenancy", "product/usagetype", "resourceTags/user:version", "sum_bill/PayerAccountId", "sum_lineItem/UsageAccountId", "sum_lineItem/UsageAmount", "sum_product/ecu", "sum_product/vcpu"
        FROM "aws_data_10001_50e9fa68_6dba_43c6_9b91_26eb1ab2e860"
        WHERE "__time" >= CURRENT_TIMESTAMP - INTERVAL '1' DAY
        LIMIT 10
    """)
    for row in curs:
        print(row)
Ejemplo n.º 13
0
    def run_query(self, query, user):
        connection = connect(host=self.configuration['host'],
                             port=self.configuration['port'],
                             path='/druid/v2/sql/',
                             scheme=self.configuration['scheme'])

        cursor = connection.cursor()

        try:
            cursor.execute(query)
            columns = self.fetch_columns([(i[0], TYPES_MAP.get(i[1], None)) for i in cursor.description])
            rows = [dict(zip((c['name'] for c in columns), row)) for row in cursor]

            data = {'columns': columns, 'rows': rows}
            error = None
            json_data = json_dumps(data)
            print(json_data)
        finally:
            connection.close()

        return json_data, error
Ejemplo n.º 14
0
        f.close()

        initial_size = get_size()
        initial_time = get_time()
        import_task = subprocess.Popen([
            args.import_script, "--file", args.import_config, "--url",
            "http://localhost:8081"
        ],
                                       stdout=subprocess.DEVNULL)
        import_task.wait()
        final_time = get_time()
        final_size = get_size()

        initial_time_udf = get_time()
        conn = connect(host="localhost",
                       port=8082,
                       path="/druid/v2/sql",
                       scheme="http")
        curs = conn.cursor()
        select_format = "(dim{0} - (SELECT AVG(dim{0}) FROM master)) / (SELECT STDDEV_POP(dim{0}) FROM master)"
        select_expression = select_format.format(0)
        for i in range(1, columns):
            select_expression = select_expression + ", " + select_format.format(
                i)
        initial_time_udf = get_time()
        curs.execute("SELECT " + select_expression + " FROM master")
        final_time_udf = get_time()

        print("Terminating druid")
        druid.terminate()
        druid.wait()
Ejemplo n.º 15
0
            distance_exp = distance_exp + " + " + distance_format.format(i)

        initial_size = get_size()
        initial_time = get_time()
        import_task = subprocess.Popen([
            args.import_script, "--file", args.import_config, "--url",
            "http://localhost:8081"
        ],
                                       stdout=subprocess.DEVNULL)
        import_task.wait()
        final_time = get_time()
        final_size = get_size()

        initial_time_udf = get_time()
        conn = connect(host='localhost',
                       port=8082,
                       path='/druid/v2/sql/',
                       scheme='http')
        curs = conn.cursor()
        curs.execute("SELECT SQRT(" + distance_exp + ") FROM master")
        final_time_udf = get_time()

        print("Terminating druid")
        druid.terminate()
        druid.wait()

        print("*" * 100)
        print("(lines, columns) =", (lines, columns))
        print("Insert time:", final_time - initial_time)
        print("Total size (bytes):", final_size - initial_size)
        print("Total size (MB):",
              (final_size - initial_size) / 1024.0 / 1024.0)
Ejemplo n.º 16
0
def _druid_get_connection():
    # when are cursor closed?
    return connect(host='192.168.8.1',
                   port=8082,
                   path='/druid/v2/sql/',
                   scheme='http')
Ejemplo n.º 17
0
# pip install pydruid
# # or, if you intend to use asynchronous client
# pip install pydruid[async]
# # or, if you intend to export query results into pandas
# pip install pydruid[pandas]
# # or, if you intend to do both
# pip install pydruid[async, pandas]
# # or, if you want to use the SQLAlchemy engine
# pip install pydruid[sqlalchemy]
# # or, if you want to use the CLI
# pip install pydruid[cli]

from pydruid.client import *
from pylab import plt
from pydruid.db import connect

conn = connect(host='192.168.11.127', port=32666, path='/druid/v2/sql/', scheme='http')
curs = conn.cursor()
curs.execute("""
    SELECT place,
           CAST(REGEXP_EXTRACT(place, '(.*),', 1) AS FLOAT) AS lat,
           CAST(REGEXP_EXTRACT(place, ',(.*)', 1) AS FLOAT) AS lon
      FROM places
     LIMIT 10
""")

for row in curs:
    print(row)
Ejemplo n.º 18
0
    if mileage < 1.5:
        lower_mile = 0
    else:
        lower_mile = mileage - mile_diff
    print("lower_mile and upper_mile are:", lower_mile, upper_mile)
    return lower_age, upper_age, lower_mile, upper_mile


if __name__ == '__main__':
    # shop_id, province_id, spec_name, mileage, reg_date, price, spec_id, car_age
    data_f = pd.read_excel(r'/Users/pujie/Desktop/selling_190527.xlsx',
                           header=None)
    the_result = []
    kkk = 0
    conn = connect(host='druid-api.taoche.com',
                   port=80,
                   path='/druid/v2/sql/',
                   scheme='http')
    for i in range(len(data_f)):
        listt = []
        shop_id = data_f.iat[i, 0]
        province_id = str(data_f.iat[i, 1])
        spec_name = data_f.iat[i, 2]
        mileage = float(data_f.iat[i, 3])
        price = float(data_f.iat[i, 5])
        reg_date = data_f.iat[i, 4]
        spec_id = str(int(data_f.iat[i, 6]))
        car_age = data_f.iat[i, 7]
        sell_days = data_f.iat[i, 8]
        print("This car's info is :", car_age, mileage)

        low_age, up_age, low_mile, up_mile = 0, 0, 0, 0