コード例 #1
0
ファイル: ClickHouseDB.py プロジェクト: sn0wfree/QuantStudio
 def _connect(self):
     self._Connection = None
     if (self.Connector
             == "clickhouse-driver") or ((self.Connector == "default") and
                                         (self.DBType == "ClickHouse")):
         try:
             import clickhouse_driver
             if self.DSN:
                 self._Connection = clickhouse_driver.connect(
                     dsn=self.DSN, password=self.Pwd)
             else:
                 self._Connection = clickhouse_driver.connect(
                     user=self.User,
                     password=self.Pwd,
                     host=self.IPAddr,
                     port=self.Port,
                     database=self.DBName)
         except Exception as e:
             Msg = (
                 "'%s' 尝试使用 clickhouse-driver 连接(%s@%s:%d)数据库 '%s' 失败: %s" %
                 (self.Name, self.User, self.IPAddr, self.Port, self.DBName,
                  str(e)))
             self._QS_Logger.error(Msg)
             if self.Connector != "default": raise e
         else:
             self._Connector = "clickhouse-driver"
     self._PID = os.getpid()
     return 0
コード例 #2
0
ファイル: clickhouse.py プロジェクト: LeoQuote/archery
 def get_connection(self, db_name=None):
     if self.conn:
         return self.conn
     if db_name:
         self.conn = connect(host=self.host, port=self.port, user=self.user, password=self.password,
                             database=db_name, connect_timeout=10)
     else:
         self.conn = connect(host=self.host, port=self.port, user=self.user, password=self.password,
                             connect_timeout=10)
     return self.conn
コード例 #3
0
 def get_connection(self, new_connect=False):
     if not new_connect:
         if self.connection and not self.connection.is_closed:
             return self.connection
         else:
             self.connection = connect(host=self.CONFIG.CK.host,
                                       password=self.CONFIG.CK.password,
                                       database=self.CONFIG.CK.database,
                                       user=self.CONFIG.CK.user)
             return self.connection
     else:
         return connect(host=self.CONFIG.CK.host,
                        password=self.CONFIG.CK.password,
                        database=self.CONFIG.CK.database,
                        user=self.CONFIG.CK.user)
コード例 #4
0
ファイル: clickhouse.py プロジェクト: snower/syncany
 def create(self):
     try:
         import clickhouse_driver
         from clickhouse_driver.util.escape import escape_param
     except ImportError:
         raise ImportError("clickhouse_driver>=0.1.5 is required")
     return clickhouse_driver.connect(**self.config)
コード例 #5
0
ファイル: instagram.py プロジェクト: Zhekazuev/CEM
def database():
    print("Phone number format: 375291234567")
    # check number format
    number = input("Input MSISDN here:")

    print("Data start format: 21-08-2020")
    # check data start
    data_start = input("Input Data start here:")

    print("Data finish format: 21-08-2020")
    data_finish = input("Input Data finish here:")

    con = connect(host=config.host,
                  port=config.port,
                  user=config.login,
                  password=config.password)

    query = f"""SELECT RADIUS_ID, START_TIME, END_TIME, RULEBASE, BYTES_UPLINK, BYTES_DOWNLINK, 
                SN_CHARGING_ACTION, P2P_PROTOCOL, SERVER_IP_ADDRESS, P2P_TLS_SNI, P2P_TLS_CNAME
                FROM DPI.FLOW_SHARDED
                PREWHERE RADIUS_ID = {number}
                WHERE END_TIME >= toDateTime('{data_start} 00:00:00') and 
                END_TIME < toDateTime('{data_finish} 23:59:59')"""
    df = pd.read_sql_query(query, con)

    return df
コード例 #6
0
    def get_form(cls, connector: 'ClickhouseConnector', current_config):
        """
        Method to retrieve the form with a current config
        For example, once the connector is set,
        - we are able to give suggestions for the `database` field
        - if `database` is set, we are able to give suggestions for the `table` field
        """
        constraints = {}

        with suppress(Exception):
            connection = clickhouse_driver.connect(
                connector.get_connection_url())
            # Always add the suggestions for the available databases

            with connection.cursor() as cursor:
                cursor.execute('SHOW DATABASES')
                res = cursor.fetchall()
                available_dbs = [
                    db_name for (db_name, ) in res if db_name != 'system'
                ]
                constraints['database'] = strlist_to_enum(
                    'database', available_dbs)

                if 'database' in current_config:
                    cursor.execute(
                        f"""SELECT name FROM system.tables WHERE database = '{current_config["database"]}'"""
                    )
                    res = cursor.fetchall()
                    available_tables = [table[0] for table in res]
                    constraints['table'] = strlist_to_enum(
                        'table', available_tables, None)

        return create_model('FormSchema', **constraints, __base__=cls).schema()
コード例 #7
0
    def test_connect_default_params(self):
        connection = connect(host=self.host)
        cursor = connection.cursor()

        rv = cursor.execute('SELECT 1')
        self.assertIsNone(rv)
        self.assertEqual(cursor.fetchall(), [(1, )])
        connection.close()
コード例 #8
0
    def create_connection(self, **kwargs):
        kwargs.setdefault('user', self.user)
        kwargs.setdefault('password', self.password)
        kwargs.setdefault('host', self.host)
        kwargs.setdefault('port', self.port)
        kwargs.setdefault('database', self.database)

        return connect(**kwargs)
コード例 #9
0
ファイル: clickhouse_ds.py プロジェクト: mindsdb/datasources
    def query(self, q):
        with clickhouse_driver.connect(host=self.host,
                                       port=self.port,
                                       database=self.database,
                                       user=self.user,
                                       password=self.password) as con:
            df = pd.read_sql(q, con=con)

        return df, self._make_colmap(df)
コード例 #10
0
def get_group(slice,operationid,targetid,doc):
    percent = []
    columns = []
    user_id = []
    for k in doc:
        doc[k] = doc[k]/100
        columns.append(k)
        percent.append(doc[k])
    num = len(columns)
    conn = connect(host='chi-ftabc-clickhouse-0-0.default.svc.cluster.local', port='9000', database='ftabcch', user='******', password='******')
    cursor = conn.cursor()
    cursor.execute("select distinct id from ads_user_tag_distribute where tag_id={}".format(targetid))
    tag_id_num = len(cursor.fetchall())
    import random
    list_part = [i for i in range(slice)]
    random.shuffle(list_part)
    import math
    n = math.ceil(num/slice)
    many_list_part = list_part*n
    list_all = []
    for i in range(1,n+1):
        name = locals()
        name['list'+str(i)] = many_list_part[(i-1)*4:(i-1)*4+4]
        random.shuffle(name['list'+str(i)])
        for x in (name['list'+str(i)]):
            list_all.append(x)
    group_number = []
    name = locals()
    for i in range(num):
        if(percent[i]==0):
            name = locals()
            name['part'+str(i)] = """insert into add_plan_user_group (operation_id, group_name, group_user)
            select {} as operation_id, '{}' as group_name, [0] as group_user""".format(operationid,columns[i])
            
            randomInsert(n,list_part,list_all,num,name['part'+str(i)])
            group_number.append(0)
            
        else:
            sql = """insert into ads_plan_user_group (operation_id, group_name, group_user) select {} as operation_id, '{}' as group_name, groupArraySample({})(sample) as group_user
                                from (select bitmapToArray(bitmapAndnot((select bitmapBuild(groupArray(id))
                                         from (select distinct id
                                               from ads_user_tag_distribute
                                               where tag_id = {})), (select groupBitmapOrState(gu) as bgu
                                                                     from (select bitmapBuild(group_user) gu
                                                                           from ads_plan_user_group_distribute
                                                                           where operation_id = {})
                                                                     group by {}))) as uid)
                                                                     array join uid as sample;
            """.format(operationid,columns[i],math.ceil(tag_id_num*percent[i]),targetid,operationid,operationid)
            print(sql)
            name['part'+str(i)] = sql
            randomInsert(n,list_part,list_all,num,name['part'+str(i)])
            group_number.append(0)
            
    return group_number,columns
コード例 #11
0
ファイル: querysearch.py プロジェクト: trolley813/text-search
def idf(word_id):
    con = clickhouse_driver.connect("clickhouse://127.0.0.1")
    cur = con.cursor()
    cur.execute("SELECT COUNT(*) FROM documents")
    n_total = cur.fetchone()[0] if cur.rowcount else 0
    cur.execute("SELECT * FROM idf_doc_count WHERE word_id = %(id)s",
                {"id": word_id})
    n_word = cur.fetchone()[1] if cur.rowcount else 0
    con.close()
    return log2((n_total - n_word + 0.5) /
                (n_word + 0.5)) - log2(0.5 / (n_total + 0.5))
コード例 #12
0
 def test_from_dsn(self):
     connection = connect(
         'clickhouse://{user}:{password}@{host}:{port}/{database}'.format(
             user=self.user, password=self.password,
             host=self.host, port=self.port, database=self.database
         )
     )
     cursor = connection.cursor()
     rv = cursor.execute('SELECT 1')
     self.assertIsNone(rv)
     self.assertEqual(cursor.fetchall(), [(1, )])
     connection.close()
コード例 #13
0
 def insert_ck(self):
     if self.command_list_len >= self.many:
         url = 'clickhouse://{}'.format(self.ckhost)
         try:
             conn = connect(url)
             cursor = conn.cursor()
             cursor.executemany(
                 'insert into redis_audit.redis_audit_info(source_host,source_port,destination_host,destination_port,command,cluster_name,event_date) values',
                 self.command_list)
         except:
             print(traceback.format_exc())
         self.command_list_len = 0
         self.command_list = []
コード例 #14
0
def randomInsert(n,list1,list2,number,sql):
    name = locals()
    if(number<=4):
        random = list1.pop(0)
        conn = connect(host='chi-ftabc-clickhouse-{}-0.default.svc.cluster.local'.format(random), port='9000', database='ftabcch', user='******', password='******')
        cursor = conn.cursor()
            
        cursor.execute(sql)
        cursor.fetchall()
        time.sleep(1)
        conn.close()
        cursor.close()
    else:
        random = list2.pop(0)
        conn = connect(host='chi-ftabc-clickhouse-{}-0.default.svc.cluster.local'.format(random), port='9000', database='ftabcch', user='******', password='******')
        cursor = conn.cursor()
            
        cursor.execute(sql)
        cursor.fetchall()
        time.sleep(1)
        conn.close()
        cursor.close()
コード例 #15
0
ファイル: db.py プロジェクト: gitchenping/wukongbak
def connect_clickhouse(host=None,
                       port=None,
                       user=None,
                       password=None,
                       database=None,
                       collection=None):
    conn = connect(host=host,
                   port=port,
                   user=user,
                   password=password,
                   database=database)
    # conn = connect(host=host,user=user, password=password, database=database)
    return conn.cursor()
コード例 #16
0
def main():
    env.read_envfile()
    config = load_conf()
    ch_conn = connect(secure=True, **config['clickhouse'])

    cursor = ch_conn.cursor()
    q = f"""
        SELECT now();
    """
    q = query_preprocess(q)
    print(q)

    result = fire_query(cursor, q)
    print(result)
コード例 #17
0
    def _retrieve_data(self, data_source):
        connection = clickhouse_driver.connect(
            self.get_connection_url(database=data_source.database))
        query_params = data_source.parameters or {}
        query = (data_source.query if data_source.query else
                 f'select * from {data_source.table} limit 50;')
        df = pandas_read_sql(query,
                             con=connection,
                             params=query_params,
                             adapt_params=True)

        connection.close()

        return df
コード例 #18
0
ファイル: querysearch.py プロジェクト: trolley813/text-search
def bi_idf(word_id_1, word_id_2):
    con = clickhouse_driver.connect("clickhouse://127.0.0.1")
    cur = con.cursor()
    cur.execute("SELECT COUNT(*) FROM documents")
    n_total = cur.fetchone()[0] if cur.rowcount else 0
    cur.execute(
        "SELECT * FROM idf_doc_count_bigrams WHERE word_id_1 = %(id1)s AND word_id_2 = %(id2)s",
        {
            "id1": word_id_1,
            "id2": word_id_2
        })
    n_word = cur.fetchone()[2] if cur.rowcount else 0
    con.close()
    return log2((n_total - n_word + 0.5) /
                (n_word + 0.5)) - log2(0.5 / (n_total + 0.5))
コード例 #19
0
 def insert(host_port):
     connection = clickhouse_driver.connect(
         host='127.0.0.1',
         port=host_port,
         database='clickhouse_db',
         user='******',
         password='******',
     )
     cur = connection.cursor()
     cur.execute(
         """INSERT into clickhouse_db.city values (3986,'Palmdale','USA','California',116670), (3999,
         'Simi Valley','USA','California',111351), (3958,'Orange','USA','California',128821) """
     )
     cur.close()
     connection.close()
コード例 #20
0
ファイル: main.py プロジェクト: folknik/fill_operations
def get_aircraft_geos(params, id):
    ch_conn = connect('clickhouse://{}:{}@{}:9000/{}'.format(
        params["clickhouse_user"], params["clickhouse_pw"],
        params["clickhouse_host"], params["clickhouse_db"]))
    ch_cursor = ch_conn.cursor()

    ch_cursor.execute(
        """ SELECT time_stamp, longitude_aircraft, latitude_aircraft,
                                altitude_aircraft, serial_number_equip 
                          FROM eco_monitoring.adsb_raw_data 
                          WHERE id_track = '{}' """.format(id))
    arr = ch_cursor.fetchall()
    if len(arr) > 0:
        arr = np.array(arr)
        return arr[arr[:, 0].argsort()][::-1]
    return arr
コード例 #21
0
 def _cnx(self, database=None):
     if database:
         self.config['database'] = database
     try:
         if self.rds_category in [1, 2]:
             cnx = pymysql.connect(**self.config)
             with cnx.cursor() as cursor:
                 cursor.execute(
                     'set session group_concat_max_len=1073741824;')
             return cnx
         if self.rds_category in [3]:
             # clickhouse
             cnx = clickhouse_driver.connect(**self.config)
             return cnx
     except Exception as err:
         logger.error(err)
         return None
コード例 #22
0
 def clickhouseSelect(self, sql):
     try:
         qoeConnection = connect(self.qoeDbStr, port=8123)
         dbCursor = qoeConnection.cursor()
         dbCursor.execute(sql)
         result = dbCursor.fetchone()
         qoeConnection.close()
         fetch = str(result)
         bad_chars = ['(', ',', ')']
         for i in bad_chars:
             fetch = fetch.replace(i, '')
         return str(fetch)
     except Exception as e:
         logging.error(f'{self.cn} Error {e}', exc_info=1)
         return 0
     except NetworkError as ne:
         logging.error(f'{self.cn} Error {ne}', exc_info=1)
         return 1
コード例 #23
0
ファイル: querysearch.py プロジェクト: trolley813/text-search
def get_word_ids(query):
    con = clickhouse_driver.connect("clickhouse://127.0.0.1")
    ids = []
    ps = PorterStemmer()
    for word_start, word_end in TreebankWordTokenizer().span_tokenize(query):
        word = query[word_start:word_end]
        stem = ps.stem(word)
        cur = con.cursor()
        cur.execute("SELECT id FROM words WHERE word = %(word)s",
                    {"word": stem})
        row = cur.fetchone()
        if row is None:
            print(
                f"Warning: Word {word} in form of {stem} not found in a database, skipping"
            )
        else:
            id = row[0]
            ids.append(id)
    return ids
コード例 #24
0
ファイル: sqlQuery.py プロジェクト: simonhou/YaSQL
    def _remote_cnx(self):
        """连接到目标数据库"""
        config = self.kwargs.get('config')
        config['database'] = self.kwargs['schema']

        if self.kwargs['rds_category'] in [1, 2]:
            config.update({
                'max_allowed_packet': 1024 * 1024 * 1024,
                'db': self.kwargs['schema'],
                'read_timeout': 600,  # 设置最大查询时间600s
                'cursorclass': pymysql.cursors.DictCursor
            })
            # 先注释掉,兼容低版本,后续做下版本采集进行判断
            # if self.kwargs['rds_category'] in [1]:
            #     config['init_command'] = 'set session MAX_EXECUTION_TIME=600000'
            cnx = pymysql.connect(**config)
            return cnx
        if self.kwargs['rds_category'] in [3]:
            config.pop('charset')
            cnx = clickhouse_driver.connect(**config)
            return cnx
コード例 #25
0
def save_to_clickhouse_1():
    conn = connect("clickhouse://192.168.0.9")

    @task()
    def log(ds):
        print("Should be saved in clickhouse")
        print(ds)

    @task()
    def save():
        cursor = conn.cursor()
        cursor.execute("""
                INSERT INTO test.testtable2
                (id, `hostname`, `ip`, `random`)
                VALUES(0, 'somename1','192.168.0.1', '3892432');
            """)

        cursor.close()

    aaa = save()
    log(aaa)
コード例 #26
0
ファイル: main.py プロジェクト: cellarstone/iotoi-sample
def get_from_clickhouse_1():
    conn = connect("clickhouse://192.168.0.9")

    @task()
    def log(ds):
        print("Should be saved in clickhouse")
        print(ds)

    @task()
    def save():
        cursor = conn.cursor()
        result = cursor.execute("""
                SELECT * FROM test.testtable2
            """)
        cursor.fetchall()
        print(result)
        cursor.close()
        return result

    aaa = save()
    log(aaa)
コード例 #27
0
ファイル: packet_op.py プロジェクト: wwwbjqcom/mysql_audit
    def ck_insert(self, jsons):
        '''
        必须先在clickhouse创建表

        CREATE table mysql_audit.mysql_audit_info(
        source_host String,
        source_port UInt64,
        destination_host String,
        destination_port UInt64,
        user_name String,
        sql String,
        reponse_value String,
        execute_time Float64,
        response_status String,
        event_date DateTime)
        ENGINE=MergeTree()
        PARTITION BY toYYYYMMDD(event_date)
        ORDER BY (source_host, source_port, event_date)
        TTL event_date + INTERVAL 5 DAY
        SETTINGS index_granularity=8192,enable_mixed_granularity_parts=1;
        :param jsons:
        :return:
        '''
        self.op_list.append(jsons)
        self.op_num += 1
        if self.op_num >= self.many:
            try:
                ck_url = 'clickhouse://{}'.format(self.ckhost)
                conn = connect(ck_url)
                cursor = conn.cursor()
                cursor.executemany('insert into mysql_audit.mysql_audit_info(source_host,source_port,destination_host,destination_port,user_name,'
                                   'sql,db,reponse_value,execute_time,response_status,event_date) values',self.op_list)
            except:
                print(traceback.format_exc())
            self.op_num = 0
            self.op_list = []
コード例 #28
0
ファイル: db.py プロジェクト: gitchenping/wukongbak
    def connect_clickhouse(self):

        if self.dbinfo is not None:
            host = self.dbinfo['host']
            port = self.dbinfo['port']
            user = self.dbinfo['user']
            password = self.dbinfo['password']
            database = self.dbinfo['database']
        else:
            env = readconfini('./conf/')
            host = env.get('db_ck', 'db_host')
            port = env.get('db_ck', 'db_port')
            user = env.get('db_ck', 'db_username')
            password = env.get('db_ck', 'db_password')
            database = env.get('db_ck', 'db_name')
            pass

        conn = connect(host=host,
                       port=port,
                       user=user,
                       password=password,
                       database=database)
        # conn = connect(host=host,user=user, password=password, database=database)
        return conn.cursor()
コード例 #29
0
ファイル: tasks.py プロジェクト: simonhou/YaSQL
def sqlquery_sync_schemas_tables():
    """
        同步远程的schema信息到表yasql_sqlquery_schemas/yasql_sqlquery_tables,查询授权使用
        """
    ignored_schemas = ('PERFORMANCE_SCHEMA', 'INFORMATION_SCHEMA', 'PERCONA',
                       'MYSQL', 'SYS', 'DM_META', 'DM_HEARTBEAT',
                       'DBMS_MONITOR', 'METRICS_SCHEMA', 'TIDB_BINLOG',
                       'TIDB_LOADER', '_TEMPORARY_AND_EXTERNAL_TABLES',
                       'DEFAULT', 'MYSQL_MONITOR', 'SYSTEM')

    mysql_query = f"SELECT " \
                  f"TABLE_SCHEMA," \
                  f"GROUP_CONCAT(TABLE_NAME) AS TABLE_NAME " \
                  f"FROM INFORMATION_SCHEMA.TABLES " \
                  f"WHERE  TABLE_SCHEMA NOT IN {ignored_schemas} " \
                  f"GROUP BY TABLE_SCHEMA"

    clickhouse_query = f"SELECT " \
                       f"`database` AS TABLE_SCHEMA, " \
                       f"`tables`  AS TABLE_NAME " \
                       f"FROM " \
                       f"(" \
                       f"SELECT " \
                       f"`database`, " \
                       f"groupArray(`name`) AS `name_array`, " \
                       f"arrayStringConcat(name_array, ',') AS `tables` " \
                       f"FROM system.tables " \
                       f"WHERE `database` NOT IN {ignored_schemas} GROUP BY `database`" \
                       f")"

    for row in DbConfig.objects.filter(use_type=1):
        try:
            result = []
            if row.rds_category in [1, 2]:
                # mysql/tidb
                config = {
                    'host': row.host,
                    'port': row.port,
                    'read_timeout':
                    3,  # socket.timeout: timed out,比如阿里的rds就很操蛋,没开白名单会hang住
                }
                config.update(QUERY_USER)
                cnx = pymysql.connect(**config)
                cursor = cnx.cursor()
                cursor.execute('set session group_concat_max_len = 1073741824')
                cursor.execute(mysql_query)
                result = cursor.fetchall()
                cursor.close()
                cnx.close()
            if row.rds_category in [3]:
                # clickhouse
                config = {
                    'host': row.host,
                    'port': row.port,
                    'connect_timeout': 3,
                    'database': 'system',
                }
                config.update(QUERY_USER)
                cnx = clickhouse_driver.connect(**config)
                cursor = cnx.cursor()
                cursor.execute(clickhouse_query)
                result = cursor.fetchall()
                cursor.close()
                cnx.close()

            for i in result:
                obj, _ = DbQuerySchemas.objects.update_or_create(
                    cid_id=row.id, schema=i[0], defaults={'schema': i[0]})
                for t in i[1].split(','):
                    DbQueryTables.objects.update_or_create(
                        schema_id=obj.pk, table=t, defaults={'table': t})
        except Exception as err:
            logger.error(f"异常主机:{row.host}")
            logger.error(err)
            continue
    logger.info(f'dbms_sync_dbschems任务结束...')
コード例 #30
0
ファイル: base.py プロジェクト: trolley813/text-search
import os
from nltk.tokenize.punkt import PunktSentenceTokenizer
from nltk import TreebankWordTokenizer
from nltk.stem import PorterStemmer
import clickhouse_driver

if __name__ == "__main__":
    data_path = "../data/Gutenberg/txt"
    files = [f for f in os.listdir(data_path) if f.endswith(".txt")]
    file_count = len(files)
    con = clickhouse_driver.connect("clickhouse://127.0.0.1")
    cur = con.cursor()
    cur.execute("DROP TABLE IF EXISTS inv_index")
    cur.execute("DROP TABLE IF EXISTS documents")
    cur.execute("DROP TABLE IF EXISTS words")
    cur.execute("CREATE TABLE words(id INTEGER, word TEXT) ENGINE = MergeTree() ORDER BY id")
    cur.execute("CREATE TABLE documents(id INTEGER, name TEXT) ENGINE = MergeTree() ORDER BY id")
    cur.execute("""CREATE TABLE inv_index(
                word_id INTEGER, document_id INTEGER,
                start_pos INTEGER, end_pos INTEGER)
                ENGINE = MergeTree()
                ORDER BY document_id
                PARTITION BY document_id
                """)
    con.commit()
    words_cache = {}
    for (idx, filename) in enumerate(files, 1):
        ps = PorterStemmer()
        print(f"""Processing file {idx} of {file_count} - "{filename}"...""")
        cur.executemany("INSERT INTO documents(id, name) VALUES", [(idx, filename)])
        con.commit()