Exemple #1
0
    def load_file_to_database(
            self, tsv_filepath, tablename, sep, quotechar='"', quoting=csv.QUOTE_MINIMAL,
            skiprows=1):
        logger.debug("Loading data into MySQL table: '{}'...".format(tablename))

        # Database options
        db_params = parse_connection_string(self.connection_string)

        # Run
        if db_params['db_socket']:
            header = "--socket={db_socket}".format(**db_params)
        elif db_params['db_password']:
            header = "-h {db_url} -P {db_port} -p{db_password}".format(**db_params)
        else:
            header = "-h {db_url} -P {db_port}".format(**db_params)

        if quotechar == '"':
            quotechar = '\\"'
        if (quoting is None or
                (quoting == csv.QUOTE_MINIMAL or quoting == 0) or
                (quoting == csv.QUOTE_NONNUMERIC or quoting == 2)):
            quoting = """optionally enclosed by '{}'""".format(quotechar)
        elif (quoting == csv.QUOTE_ALL or quoting == 3):
            quoting = """enclosed by '{}'""".format(quotechar)
        else:
            quoting = ""
        system_command = """\
mysql --local-infile {header} -u {db_username} {db_schema} -e \
"load data local infile '{tsv_filepath}' into table `{tablename}` \
fields terminated by {sep} {quoting} ignore {skiprows} lines; \
 show warnings;" \
""".format(header=header, tsv_filepath=tsv_filepath, tablename=tablename, skiprows=skiprows,
           sep=repr(sep), quoting=quoting, **db_params)
        run_command(system_command)
Exemple #2
0
 def __init__(
         self, connection_string, shared_folder, storage_host, datadir=None,
         echo=False, db_engine=None, use_compression=False):
     self.connection_string = connection_string
     self.shared_folder = op.abspath(shared_folder)
     os.makedirs(self.shared_folder, exist_ok=True)
     self.storage_host = storage_host
     self.datadir = datadir
     self.db_engine = (
         db_engine if db_engine is not None else MySQLDaemon._default_storage_engine)
     self.use_compression = use_compression
     #
     logger.debug("Connection string: {}".format(repr(self.connection_string)))
     self.engine = sa.create_engine(self.connection_string, echo=echo)
     try:
         self.db_schema = self._get_db_schema()
     except sa.exc.OperationalError:
         db_params = parse_connection_string(connection_string)
         _schema = db_params['db_schema']
         db_params['db_schema'] = ''
         logger.debug("db_params: {}".format(db_params))
         _connection_string = make_connection_string(**db_params)
         logger.debug("_connection_string: {}".format(_connection_string))
         _engine = sa.create_engine(_connection_string, echo=echo)
         _engine.execute('CREATE DATABASE {}'.format(_schema))
         self.db_schema = self._get_db_schema()
 def drop_database_schema(self, db_schema):
     """Drop ELASPIC database schema."""
     # Create engine without a default schema
     engine = sa.create_engine(
         make_connection_string(**{**parse_connection_string(conf.CONFIGS["connection_string"]), "db_schema": ""})
     )
     sql_command = "DROP SCHEMA IF EXISTS {};".format(db_schema)
     logger.debug("sql_command: '{}'".format(sql_command))
     engine.execute(sql_command)
Exemple #4
0
    def get_indexes(self):
        db_params = parse_connection_string(self.connection_string)
        sql_query = """\
SELECT DISTINCT INDEX_NAME FROM information_schema.statistics
WHERE table_schema = '{db_schema}'
AND table_name = '{tablename}';
""".format(db_schema=db_params['db_schema'], tablename=self.name)
        existing_indexes = set(pd.read_sql_query(sql_query, self.engine)['INDEX_NAME'])
        return existing_indexes
Exemple #5
0
def read_database_configs(config):
    """[DATABASE]."""
    if config.get('connection_string'):
        CONFIGS['connection_string'] = config.get('connection_string')
        CONFIGS.update(parse_connection_string(CONFIGS['connection_string']))
    elif config.get('db_type'):
        CONFIGS['db_type'] = config.get('db_type')
        CONFIGS['db_schema'] = config.get('db_schema')
        CONFIGS['db_database'] = config.get('db_database', fallback='')
        CONFIGS['db_username'] = config.get('db_username')
        CONFIGS['db_password'] = config.get('db_password')
        CONFIGS['db_url'] = config.get('db_url')
        CONFIGS['db_port'] = config.get('db_port')
        CONFIGS['db_socket'] = _get_db_socket(
            config, CONFIGS['db_type'], CONFIGS['db_url'])
        CONFIGS['connection_string'] = make_connection_string(**CONFIGS)
    CONFIGS['db_is_immutable'] = config.get('db_is_immutable', fallback=False)
Exemple #6
0
 def compress(self):
     db_params = parse_connection_string(self.connection_string)
     db_file = op.abspath(op.join(self.datadir, db_params['db_schema'], self.name + '.MYD'))
     index_file = op.abspath(op.join(self.datadir, db_params['db_schema'], self.name + '.MYI'))
     file_size_before = op.getsize(db_file) / (1024 ** 2)
     # Flush table
     self.engine.execute('flush tables;')
     # Compress table
     system_command = "myisampack --no-defaults '{}'".format(index_file)
     logger.debug("system_command: '{}'".format(system_command))
     p1 = subprocess.run(
         shlex.split(system_command), stdout=subprocess.PIPE, stderr=subprocess.PIPE,
         universal_newlines=True)
     if p1.stdout.strip():
         logger.debug(p1.stdout.strip())
     if p1.stderr.strip():
         logger.error(p1.stderr.strip())
     if p1.returncode:
         raise Exception("Failed to compress table (returncode = {})".format(p1.returncode))
     # Recreate indexes
     system_command = "myisamchk -rq '{}'".format(index_file)
     logger.debug("system_command: '{}'".format(system_command))
     p2 = subprocess.run(
         shlex.split(system_command), stdout=subprocess.PIPE, stderr=subprocess.PIPE,
         universal_newlines=True)
     if p2.stdout.strip():
         logger.debug(p2.stdout.strip())
     if p2.stderr.strip():
         logger.error(p2.stderr.strip())
     if p2.returncode:
         raise Exception("Failed to recreate indexes (returncode = {})".format(p2.returncode))
     file_size_after = op.getsize(db_file) / (1024 ** 2)
     logger.info(
         "File size before: {:,.2f} MB".format(file_size_before))
     logger.info(
         "File size after: {:,.2f} MB".format(file_size_after))
     logger.info(
         "File size savings: {:,.2f} MB ({:.2f} %)"
         .format(file_size_after, file_size_after / file_size_before * 100))