def load_file_to_database( self, tsv_filepath, tablename, sep, quotechar='"', quoting=csv.QUOTE_MINIMAL, skiprows=1): logger.debug("Loading data into MySQL table: '{}'...".format(tablename)) # Database options db_params = parse_connection_string(self.connection_string) # Run if db_params['db_socket']: header = "--socket={db_socket}".format(**db_params) elif db_params['db_password']: header = "-h {db_url} -P {db_port} -p{db_password}".format(**db_params) else: header = "-h {db_url} -P {db_port}".format(**db_params) if quotechar == '"': quotechar = '\\"' if (quoting is None or (quoting == csv.QUOTE_MINIMAL or quoting == 0) or (quoting == csv.QUOTE_NONNUMERIC or quoting == 2)): quoting = """optionally enclosed by '{}'""".format(quotechar) elif (quoting == csv.QUOTE_ALL or quoting == 3): quoting = """enclosed by '{}'""".format(quotechar) else: quoting = "" system_command = """\ mysql --local-infile {header} -u {db_username} {db_schema} -e \ "load data local infile '{tsv_filepath}' into table `{tablename}` \ fields terminated by {sep} {quoting} ignore {skiprows} lines; \ show warnings;" \ """.format(header=header, tsv_filepath=tsv_filepath, tablename=tablename, skiprows=skiprows, sep=repr(sep), quoting=quoting, **db_params) run_command(system_command)
def __init__( self, connection_string, shared_folder, storage_host, datadir=None, echo=False, db_engine=None, use_compression=False): self.connection_string = connection_string self.shared_folder = op.abspath(shared_folder) os.makedirs(self.shared_folder, exist_ok=True) self.storage_host = storage_host self.datadir = datadir self.db_engine = ( db_engine if db_engine is not None else MySQLDaemon._default_storage_engine) self.use_compression = use_compression # logger.debug("Connection string: {}".format(repr(self.connection_string))) self.engine = sa.create_engine(self.connection_string, echo=echo) try: self.db_schema = self._get_db_schema() except sa.exc.OperationalError: db_params = parse_connection_string(connection_string) _schema = db_params['db_schema'] db_params['db_schema'] = '' logger.debug("db_params: {}".format(db_params)) _connection_string = make_connection_string(**db_params) logger.debug("_connection_string: {}".format(_connection_string)) _engine = sa.create_engine(_connection_string, echo=echo) _engine.execute('CREATE DATABASE {}'.format(_schema)) self.db_schema = self._get_db_schema()
def drop_database_schema(self, db_schema): """Drop ELASPIC database schema.""" # Create engine without a default schema engine = sa.create_engine( make_connection_string(**{**parse_connection_string(conf.CONFIGS["connection_string"]), "db_schema": ""}) ) sql_command = "DROP SCHEMA IF EXISTS {};".format(db_schema) logger.debug("sql_command: '{}'".format(sql_command)) engine.execute(sql_command)
def get_indexes(self): db_params = parse_connection_string(self.connection_string) sql_query = """\ SELECT DISTINCT INDEX_NAME FROM information_schema.statistics WHERE table_schema = '{db_schema}' AND table_name = '{tablename}'; """.format(db_schema=db_params['db_schema'], tablename=self.name) existing_indexes = set(pd.read_sql_query(sql_query, self.engine)['INDEX_NAME']) return existing_indexes
def read_database_configs(config): """[DATABASE].""" if config.get('connection_string'): CONFIGS['connection_string'] = config.get('connection_string') CONFIGS.update(parse_connection_string(CONFIGS['connection_string'])) elif config.get('db_type'): CONFIGS['db_type'] = config.get('db_type') CONFIGS['db_schema'] = config.get('db_schema') CONFIGS['db_database'] = config.get('db_database', fallback='') CONFIGS['db_username'] = config.get('db_username') CONFIGS['db_password'] = config.get('db_password') CONFIGS['db_url'] = config.get('db_url') CONFIGS['db_port'] = config.get('db_port') CONFIGS['db_socket'] = _get_db_socket( config, CONFIGS['db_type'], CONFIGS['db_url']) CONFIGS['connection_string'] = make_connection_string(**CONFIGS) CONFIGS['db_is_immutable'] = config.get('db_is_immutable', fallback=False)
def compress(self): db_params = parse_connection_string(self.connection_string) db_file = op.abspath(op.join(self.datadir, db_params['db_schema'], self.name + '.MYD')) index_file = op.abspath(op.join(self.datadir, db_params['db_schema'], self.name + '.MYI')) file_size_before = op.getsize(db_file) / (1024 ** 2) # Flush table self.engine.execute('flush tables;') # Compress table system_command = "myisampack --no-defaults '{}'".format(index_file) logger.debug("system_command: '{}'".format(system_command)) p1 = subprocess.run( shlex.split(system_command), stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) if p1.stdout.strip(): logger.debug(p1.stdout.strip()) if p1.stderr.strip(): logger.error(p1.stderr.strip()) if p1.returncode: raise Exception("Failed to compress table (returncode = {})".format(p1.returncode)) # Recreate indexes system_command = "myisamchk -rq '{}'".format(index_file) logger.debug("system_command: '{}'".format(system_command)) p2 = subprocess.run( shlex.split(system_command), stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) if p2.stdout.strip(): logger.debug(p2.stdout.strip()) if p2.stderr.strip(): logger.error(p2.stderr.strip()) if p2.returncode: raise Exception("Failed to recreate indexes (returncode = {})".format(p2.returncode)) file_size_after = op.getsize(db_file) / (1024 ** 2) logger.info( "File size before: {:,.2f} MB".format(file_size_before)) logger.info( "File size after: {:,.2f} MB".format(file_size_after)) logger.info( "File size savings: {:,.2f} MB ({:.2f} %)" .format(file_size_after, file_size_after / file_size_before * 100))