Esempio n. 1
0
    def setup(self):
        # create a throwaway pool which
        # has the effect of initializing
        # class-level event listeners on Pool,
        # if not present already.
        p1 = QueuePool(creator=self.Connection, pool_size=3, max_overflow=-1)
        p1.connect()

        global pool
        pool = QueuePool(creator=self.Connection, pool_size=3, max_overflow=-1)
Esempio n. 2
0
    def setup(self):
        # create a throwaway pool which
        # has the effect of initializing
        # class-level event listeners on Pool,
        # if not present already.
        p1 = QueuePool(creator=self.Connection, pool_size=3, max_overflow=-1)
        p1.connect()

        global pool
        pool = QueuePool(creator=self.Connection, pool_size=3, max_overflow=-1)
Esempio n. 3
0
    def setup_test(self):
        # create a throwaway pool which
        # has the effect of initializing
        # class-level event listeners on Pool,
        # if not present already.
        p1 = QueuePool(creator=self.Connection, pool_size=3, max_overflow=-1)
        p1.connect()

        global pool
        pool = QueuePool(creator=self.Connection, pool_size=3, max_overflow=-1)

        # make this a real world case where we have a "connect" handler
        @event.listens_for(pool, "connect")
        def do_connect(dbapi_conn, conn_record):
            pass
Esempio n. 4
0
class VitessDBWrapper(DBWrapper):
    def __init__(self, config):
        config['engine_url'] = ''
        DBWrapper.__init__(self, config)

        self.vtgate = config['vtgate']
        self.keyspace = config['keyspace']
        self.tablet_type = config['tablet_type']
        self.writable = config['writable']

        self.connect_timeout = config.get('connect_timeout', 5)
        self.timeout = config.get('timeout', 600)
        self.pool_recycle = config.get('pool_recycle', 60)

        self.sharded = config['sharded']
        self.batched_parameter = config.get('batched_parameter', None)
        if self.sharded and self.batched_parameter == None:
            raise Exception('Cannot shard without a batched parameter')

        self._cnx_pool = QueuePool(self.__connect,
                                   pool_size=self.query_pool_size,
                                   recycle=self.pool_recycle,
                                   timeout=self.connect_timeout)

    def execute(self, query, params={}):
        start = time.time()
        conn = self._cnx_pool.connect()
        if self.sharded:
            cursor = conn.cursor(self.keyspace,
                                 self.tablet_type,
                                 keyspace_ids=params[self.batched_parameter],
                                 writable=self.writable)
        else:
            cursor = conn.cursor(self.keyspace,
                                 self.tablet_type,
                                 keyranges=UNSHARDED,
                                 writable=self.writable)
        cursor.begin()
        cursor.execute(query, params)
        cursor.commit()
        qtime = (time.time() - start) * 1000

        start = time.time()
        keys = [f[0] for f in cursor.description]
        resultData = cursor.fetchall()
        cursor.close()
        conn.close()
        rtime = (time.time() - start) * 1000
        result_dicts = [dict(zip(keys, values)) for values in resultData]

        return result_dicts, qtime, rtime

    def __connect(self):
        return vtgatev2.connect({'vt': [self.vtgate]}, self.timeout)
Esempio n. 5
0
class ImpalaHelpers(object):
    def __init__(self, impala_hosts, impala_port=21050, pool_size=20):

        if os.environ.get("DAE_IMPALA_HOST", None) is not None:
            impala_host = os.environ.get("DAE_IMPALA_HOST", None)
            logger.info(f"impala host overwritten: {impala_host}")
            if impala_host is not None:
                impala_hosts = [impala_host]
        if impala_hosts is None:
            impala_hosts = []

        host_generator = itertools.cycle(impala_hosts)

        def create_connection():
            impala_host = next(host_generator)
            logger.debug(f"creating connection to impala host {impala_host}")
            connection = dbapi.connect(host=impala_host, port=impala_port)
            connection.host = impala_host
            return connection

        self._connection_pool = QueuePool(
            create_connection,
            pool_size=20,  # pool_size,
            reset_on_return=False,
            # use_threadlocal=True,
        )
        logger.debug(
            f"created impala pool with {self._connection_pool.status()} "
            f"connections")
        # connections = []
        # for i in range(20):
        #     conn = self.connection()
        #     conn.id = i
        #     connections.append(conn)
        # for conn in connections:
        #     conn.close()

    def connection(self):
        logger.debug(f"going to get impala connection from the pool; "
                     f"{self._connection_pool.status()}; {id(self)}")
        conn = self._connection_pool.connect()
        logger.debug(
            f"[DONE] going to get impala connection to host {conn.host} "
            f"from the pool; {self._connection_pool.status()}; {id(self)}")
        return conn

    def _import_single_file(self, cursor, db, table, import_file):

        cursor.execute(f"""
            DROP TABLE IF EXISTS {db}.{table}
            """)

        dirname = os.path.dirname(import_file)
        statement = f"""
            CREATE EXTERNAL TABLE {db}.{table} LIKE PARQUET '{import_file}'
            STORED AS PARQUET LOCATION '{dirname}'
        """
        logger.debug(f"{statement}")
        cursor.execute(statement)
        cursor.execute(f"REFRESH {db}.{table}")

    def _add_partition_properties(self, cursor, db, table,
                                  partition_description):

        chromosomes = ", ".join(partition_description.chromosomes)
        cursor.execute(f"ALTER TABLE {db}.{table} "
                       "SET TBLPROPERTIES("
                       f"'gpf_partitioning_region_bin_chromosomes' = "
                       f"'{chromosomes}'"
                       ")")
        cursor.execute(f"ALTER TABLE {db}.{table} "
                       "SET TBLPROPERTIES("
                       f"'gpf_partitioning_region_bin_region_length' = "
                       f"'{partition_description.region_length}'"
                       ")")
        cursor.execute(f"ALTER TABLE {db}.{table} "
                       "SET TBLPROPERTIES("
                       f"'gpf_partitioning_family_bin_family_bin_size' = "
                       f"'{partition_description.family_bin_size}'"
                       ")")
        coding_effect_types = ",".join(
            partition_description.coding_effect_types)
        coding_effect_types = coding_effect_types.replace("'", "\\'")
        cursor.execute(f"ALTER TABLE {db}.{table} "
                       "SET TBLPROPERTIES("
                       f"'gpf_partitioning_coding_bin_coding_effect_types' = "
                       f"'{coding_effect_types}'"
                       ")")
        cursor.execute(f"ALTER TABLE {db}.{table} "
                       "SET TBLPROPERTIES("
                       f"'gpf_partitioning_frequency_bin_rare_boundary' = "
                       f"'{partition_description.rare_boundary}'"
                       ")")

    def _create_dataset_table(self, cursor, db, table, sample_file, pd):

        cursor.execute(f"DROP TABLE IF EXISTS {db}.{table}")

        hdfs_dir = pd.variants_filename_basedir(sample_file)
        if not pd.has_partitions():
            statement = f"""
                CREATE EXTERNAL TABLE {db}.{table} LIKE PARQUET '{sample_file}'
                STORED AS PARQUET LOCATION '{hdfs_dir}'
            """
        else:
            partitions = pd.build_impala_partitions()
            statement = f"""
                CREATE EXTERNAL TABLE {db}.{table} LIKE PARQUET '{sample_file}'
                PARTITIONED BY ({partitions})
                STORED AS PARQUET LOCATION '{hdfs_dir}'
            """
        cursor.execute(statement)

        if pd.has_partitions():
            cursor.execute(f"ALTER TABLE {db}.{table} RECOVER PARTITIONS")
        cursor.execute(f"REFRESH {db}.{table}")

    # def import_dataset_into_db(
    #         self,
    #         db,
    #         pedigree_table,
    #         variants_table,
    #         pedigree_hdfs_file,
    #         variants_hdfs_file,
    #         partition_description):

    #     with closing(self.connection()) as conn:
    #         with conn.cursor() as cursor:
    #             cursor.execute(
    #                 f"CREATE DATABASE IF NOT EXISTS {db}")

    #             self._import_single_file(
    #                 cursor, db, pedigree_table, pedigree_hdfs_file)

    #             self._create_dataset_table(
    #                 cursor,
    #                 db,
    #                 variants_table,
    #                 variants_hdfs_file,
    #                 partition_description
    #             )
    #             if partition_description.has_partitions():
    #                 self._add_partition_properties(
    #                     cursor, db, variants_table, partition_description)

    def import_pedigree_into_db(self, db, pedigree_table, pedigree_hdfs_file):
        with closing(self.connection()) as conn:
            with conn.cursor() as cursor:
                cursor.execute(f"CREATE DATABASE IF NOT EXISTS {db}")

                self._import_single_file(cursor, db, pedigree_table,
                                         pedigree_hdfs_file)

    def _build_variants_schema(self, variants_schema):
        TYPE_CONVERTION = {
            "int32": "INT",
            "int16": "SMALLINT",
            "int8": "TINYINT",
            "float": "FLOAT",
            "string": "STRING",
            "binary": "STRING",
        }
        result = []
        for field_name, field_type in variants_schema.items():
            impala_type = TYPE_CONVERTION.get(field_type)
            assert impala_type is not None, (field_name, field_type)
            result.append(f"`{field_name}` {impala_type}")
        statement = ", ".join(result)
        statement = f"( {statement} )"
        return statement

    def _build_import_variants_statement(self,
                                         db,
                                         variants_table,
                                         variants_hdfs_dir,
                                         partition_description,
                                         variants_sample=None,
                                         variants_schema=None):

        assert variants_sample is not None or variants_schema is not None

        statement = ["CREATE EXTERNAL TABLE", f"{db}.{variants_table}"]
        if variants_schema is not None:
            statement.append(self._build_variants_schema(variants_schema))
        else:
            assert variants_sample is not None
            statement.extend(["LIKE PARQUET", f"'{variants_sample}'"])

        if partition_description.has_partitions():
            partitions = partition_description.build_impala_partitions()
            statement.extend(["PARTITIONED BY", f"({partitions})"])

        statement.extend(
            ["STORED AS PARQUET LOCATION", f"'{variants_hdfs_dir}'"])
        return " ".join(statement)

    def import_variants_into_db(self,
                                db,
                                variants_table,
                                variants_hdfs_dir,
                                partition_description,
                                variants_sample=None,
                                variants_schema=None):

        assert variants_schema is not None or variants_sample is not None

        with closing(self.connection()) as conn:
            with conn.cursor() as cursor:
                cursor.execute(f"CREATE DATABASE IF NOT EXISTS {db}")

                cursor.execute(f"DROP TABLE IF EXISTS {db}.{variants_table}")

                statement = self._build_import_variants_statement(
                    db,
                    variants_table,
                    variants_hdfs_dir,
                    partition_description,
                    variants_sample=variants_sample,
                    variants_schema=variants_schema)
                # statement = " ".join(statement)
                logger.info(f"going to execute: {statement}")
                cursor.execute(statement)

                if partition_description.has_partitions():
                    cursor.execute(f"ALTER TABLE {db}.{variants_table} "
                                   f"RECOVER PARTITIONS")
                cursor.execute(f"REFRESH {db}.{variants_table}")

                if partition_description.has_partitions():
                    self._add_partition_properties(cursor, db, variants_table,
                                                   partition_description)

    def get_table_create_statement(self, db, table):
        with closing(self.connection()) as conn:
            with conn.cursor() as cursor:
                statement = f"SHOW CREATE TABLE {db}.{table}"
                cursor.execute(statement)

                create_statement = None
                for row in cursor:
                    create_statement = row[0]
                    break
                return create_statement

    def recreate_table(self, db, table, new_table, new_hdfs_dir):

        create_statement = self.get_table_create_statement(db, table)
        assert create_statement is not None

        with closing(self.connection()) as conn:

            table_name = re.compile(
                r"CREATE EXTERNAL TABLE (?P<table_name>[a-zA-Z0-9._]+)\s")
            create_statement = table_name.sub(
                f"CREATE EXTERNAL TABLE {db}.{new_table} ", create_statement)

            location = re.compile(r"LOCATION '(?P<location>.+)'\s")
            create_statement = location.sub(f"LOCATION '{new_hdfs_dir}' ",
                                            create_statement)

            position = re.compile(r"\s(position)\s")

            create_statement = position.sub(" `position` ", create_statement)

            role = re.compile(r"\s(role)\s")

            create_statement = role.sub(" `role` ", create_statement)

            create_statement = create_statement.replace("3'UTR", "3\\'UTR")
            create_statement = create_statement.replace("5'UTR", "5\\'UTR")

            with conn.cursor() as cursor:
                cursor.execute(f"DROP TABLE IF EXISTS {db}.{new_table}")

                logger.info(f"going to execute {create_statement}")
                cursor.execute(create_statement)

                if "PARTITIONED" in create_statement:
                    cursor.execute(f"ALTER TABLE {db}.{new_table} "
                                   f"RECOVER PARTITIONS")
                cursor.execute(f"REFRESH {db}.{new_table}")

    def rename_table(self, db, table, new_table):
        statement = [f"ALTER TABLE {db}.{table} RENAME TO {db}.{new_table}"]
        statement = " ".join(statement)
        logger.info(f"going to execute {statement}")

        with closing(self.connection()) as conn:
            with conn.cursor() as cursor:
                cursor.execute(statement)

    def check_database(self, dbname):
        with closing(self.connection()) as conn:
            with conn.cursor() as cursor:
                q = "SHOW DATABASES"
                cursor.execute(q)
                for row in cursor:
                    if row[0] == dbname:
                        return True
        return False

    def check_table(self, dbname, tablename):
        with closing(self.connection()) as conn:
            with conn.cursor() as cursor:
                q = f"SHOW TABLES IN {dbname}"
                cursor.execute(q)
                for row in cursor:
                    if row[0] == tablename.lower():
                        return True
        return False

    def drop_table(self, dbname, tablename):
        with closing(self.connection()) as conn:
            with conn.cursor() as cursor:
                q = f"DROP TABLE IF EXISTS {dbname}.{tablename}"
                cursor.execute(q)

    def create_database(self, dbname):
        with closing(self.connection()) as conn:
            with conn.cursor() as cursor:
                q = f"CREATE DATABASE IF NOT EXISTS {dbname}"
                cursor.execute(q)

    def drop_database(self, dbname):
        with closing(self.connection()) as conn:
            with conn.cursor() as cursor:
                cursor.execute(f"DROP DATABASE IF EXISTS {dbname} CASCADE")
Esempio n. 6
0
import config

def ConnCreator():
    '''建立一个数据库连接'''
    db_args = {}
    db_args['charset'] = config.DB_CHARSET
    db_args['host'] = config.DB_HOST
    db_args['user'] = config.DB_USER
    db_args['passwd'] = config.DB_PASSWD
    db_args['port'] = config.DB_PORT
    db_args['db'] = config.DB_SELECT
    
    return mysql.connector.connect(**db_args)

DBPool = QueuePool(ConnCreator,
                   pool_size=500,
                   max_overflow=-1,
                   recycle=86400,
                   use_threadlocal=False,
                   echo=True
                   )

conn = DBPool.connect()
cursor = conn.cursor()
cursor.execute('select * from tb_cfg_building')
print reduce(lambda x, y:x + (dict(zip(cursor.column_names, y)), ),
             cursor.fetchall(),
             ())