Beispiel #1
0
    def __init__(self, connInfo, read_only):
        """
        Initialize shared metadata, including list of supported commands.

        @param connInfo     Connection information.
        @param read_only    Disable modification if True.
        """
        self._initLogging()
        self._funcMap = {
            'CREATE': self._parseCreate,
            'DELETE': self._parseDelete,
            'DROP': self._parseDrop,
            'DUMP': self._parseDump,
            'EXIT': self._justExit,
            'HELP': self._printHelp,
            'QUIT': self._justExit,
            'RELEASE': self._parseRelease,
            'RESTORE': self._restore,
            'SHOW': self._parseShow,
            'UPDATE': self._parseUpdate
        }
        config = cssConfig.configFromUrl(connInfo)
        self._css = css.CssAccess.createFromConfig(config, "", read_only)
        self._supportedCommands = """
  Supported commands:
    CREATE DATABASE <dbName> <configFile>;
    CREATE DATABASE <dbName> LIKE <dbName2>;
    CREATE TABLE <dbName>.<tableName> <configFile>;
    CREATE TABLE <dbName>.<tableName> LIKE <dbName2>.<tableName2>;
    CREATE NODE <nodeName> <key=value ...>;  # keys: type, host, port, state
    UPDATE NODE <nodeName> state=value;  # value: ACTIVE, INACTIVE
    DELETE NODE <nodeName>;
    DROP DATABASE <dbName>;
    DROP TABLE <dbName>.<tableName>;
    DROP EVERYTHING;
    DUMP </key> [<outFile>];      # dumps given key and its sub-keys
    DUMP EVERYTHING [<outFile>];  # same as "DUMP / [<outFile>]"
    DUMP;                         # same as "DUMP /"
    RESTORE <inFile>;
    SHOW DATABASES;
    SHOW NODES;
    CREATE CHUNK <dbName>.<tableName> <chunk> <nodeName>;
    DELETE CHUNK <dbName>.<tableName> <chunk>;
    SHOW CHUNKS <dbName>.<tableName>;
    QUIT;
    EXIT;
    ...more coming soon
"""
        # only prompt if input comes from terminal
        self._prompt = "qserv > " if sys.stdin.isatty() else ""
Beispiel #2
0
    def __init__(self, connInfo, read_only):
        """
        Initialize shared metadata, including list of supported commands.

        @param connInfo     Connection information.
        @param read_only    Disable modification if True.
        """
        self._initLogging()
        self._funcMap = {
            'CREATE': self._parseCreate,
            'DELETE': self._parseDelete,
            'DROP': self._parseDrop,
            'DUMP': self._parseDump,
            'EXIT': self._justExit,
            'HELP': self._printHelp,
            'QUIT': self._justExit,
            'RELEASE': self._parseRelease,
            'RESTORE': self._restore,
            'SHOW': self._parseShow,
            'UPDATE': self._parseUpdate
        }
        config = cssConfig.configFromUrl(connInfo)
        self._css = css.CssAccess.createFromConfig(config, "", read_only)
        self._supportedCommands = """
  Supported commands:
    CREATE DATABASE <dbName> <configFile>;
    CREATE DATABASE <dbName> LIKE <dbName2>;
    CREATE TABLE <dbName>.<tableName> <configFile>;
    CREATE TABLE <dbName>.<tableName> LIKE <dbName2>.<tableName2>;
    CREATE NODE <nodeName> <key=value ...>;  # keys: type, host, port, state
    UPDATE NODE <nodeName> state=value;  # value: ACTIVE, INACTIVE
    DELETE NODE <nodeName>;
    DROP DATABASE <dbName>;
    DROP TABLE <dbName>.<tableName>;
    DROP EVERYTHING;
    DUMP </key> [<outFile>];      # dumps given key and its sub-keys
    DUMP EVERYTHING [<outFile>];  # same as "DUMP / [<outFile>]"
    DUMP;                         # same as "DUMP /"
    RESTORE <inFile>;
    SHOW DATABASES;
    SHOW NODES;
    CREATE CHUNK <dbName>.<tableName> <chunk> <nodeName>;
    DELETE CHUNK <dbName>.<tableName> <chunk>;
    SHOW CHUNKS <dbName>.<tableName>;
    QUIT;
    EXIT;
    ...more coming soon
"""
        # only prompt if input comes from terminal
        self._prompt = "qserv > " if sys.stdin.isatty() else ""
Beispiel #3
0
    def __init__(self, config, wmgrSecretFile=None, czar=None):
        """
        @param config: Either string specifying URL for CSS connection or
                       dictionary specifying configuration for CssAccess
        @param wmgrSecretFile: path to a file with wmgr secret
        @param czar: optional czar wmgr config, tuple with host name and
                     port number, use when czar node is not registered in CSS
        """

        _LOG.debug('connecting to css: %s', config)
        if isinstance(config, (str, unicode)):
            config = cssConfig.configFromUrl(config)
        self.css = css.CssAccess.createFromConfig(config, "")
        self.wmgrSecretFile = wmgrSecretFile
        self.czar = czar
Beispiel #4
0
    def __init__(self, config, wmgrSecretFile=None, czar=None):
        """
        @param config: Either string specifying URL for CSS connection or
                       dictionary specifying configuration for CssAccess
        @param wmgrSecretFile: path to a file with wmgr secret
        @param czar: optional czar wmgr config, tuple with host name and
                     port number, use when czar node is not registered in CSS
        """

        _LOG.debug('connecting to css: %s', config)
        if isinstance(config, (str, unicode)):
            config = cssConfig.configFromUrl(config)
        self.css = css.CssAccess.createFromConfig(config, "")
        self.wmgrSecretFile = wmgrSecretFile
        self.czar = czar
Beispiel #5
0
    def __init__(self):
        """
        Constructor parse all arguments and prepares for execution.
        """

        # define all command-line arguments
        parser = argparse.ArgumentParser(description='Single-node data loading script for Qserv.')

        parser.add_argument('-v', '--verbose', dest='verbose', default=[], action='append_const',
                            const=None, help='More verbose output, can use several times.')
        parser.add_argument('--verbose-all', dest='verboseAll', default=False, action='store_true',
                            help='Apply verbosity to all loggers, by default only loader level is set.')
        parser = lsst.qserv.admin.logger.add_logfile_opt(parser)
        group = parser.add_argument_group('Partitioning options',
                                          'Options defining how partitioning is performed')
        group.add_argument('-f', '--config', dest='configFiles', default=[], action='append',
                           required=True, metavar='PATH',
                           help='Partitioner configuration file, required, more than one acceptable.')
        group.add_argument('-d', '--chunks-dir', dest='chunksDir', metavar='PATH',
                           default="./loader_chunks", help='Directory where to store chunk data, must '
                           'have enough space to keep all data. If option --skip-partition is specified '
                           '(without --one-table) then directory must exist and have existing data in it. '
                           'Otherwise directory must be empty or do not exist. def: %(default)s.')
        group.add_argument('-t', '--tmp-dir', dest='tmpDir', metavar='PATH',
                           default=None, help='Directory for non-chunk temporary files, e.g. uncompressed '
                           'data files. By default temporary directory with random name inside chunks-dir '
                           'is created to hold temporary data.')
        group.add_argument('-k', '--keep-chunks', dest='keepChunks', action='store_true', default=False,
                           help='If specified then chunks will not be deleted after loading.')
        group.add_argument('-s', '--skip-partition', dest='skipPart', action='store_true', default=False,
                           help='If specified then skip partitioning, chunks must exist already '
                           'if option --one-table is not specified (from previous run with -k option).')
        group.add_argument('-1', '--one-table', dest='oneTable', action='store_true', default=False,
                           help='If specified then load whole dataset into one table. This is useful for '
                           'testing quries against mysql directly. If --skip-partition is specified '
                           'then original non-partitioned data will be loaded, otherwise data will be '
                           'partitioned but still loaded into a single table.')

        group = parser.add_argument_group('CSS options', 'Options controlling CSS metadata')
        parser.set_defaults(cssConn='mysql://[email protected]:13306/qservCssData')
        group.add_argument('-c', '--css-conn', dest='cssConn',
                           help='Connection string for CSS, def: %(default)s.')
        group.add_argument('-r', '--css-remove', dest='cssClear', default=False, action='store_true',
                           help='Remove CSS table info if it already exists.')
        group.add_argument('-C', '--no-css', dest='cssConn', action='store_const', const=None,
                           help='Disable CSS updates.')

        group = parser.add_argument_group('Database options', 'Options for database connection')
        group.add_argument('-H', '--host', dest='czarHost', default='localhost', metavar='HOST',
                           help='Host name for czar wmgr service, def: %(default)s.')
        group.add_argument('-P', '--port', dest='czarPort', default=5012, metavar='PORT_NUMBER', type=int,
                           help='Port number to use for czar wmgr connection, def: %(default)s.')
        group.add_argument('-W', '--worker', dest='workerNodes', default=[], action='append',
                           metavar='STRING', help='Node name for worker server, may be specified '
                           'more than once. If missing then czar server is used to store worker '
                           'data. If more than one node is given then chunks are distributed '
                           'randomly across all hosts. If CSS is used then nodes must already be '
                           'defined in CSS (using qserv-admin command "CREATE NODE ..."). If CSS '
                           'is disabled (with --no-css) then node name will be treated as a host '
                           'name.')
        group.add_argument('-x', '--secret', dest='secret', default=None,
                           help='Path name for the file with wmgr secret.')

        group = parser.add_argument_group('Control options', 'Options for controlling other operations')
        group.add_argument('-E', '--empty-chunks', dest='emptyChunks', default=None, metavar='PATH',
                           help='Path name for "empty chunks" file, if not specified then this file is '
                           'not produced.')
        group.add_argument('-i', '--index-db', dest='indexDb', default='qservMeta', metavar='DB_NAME',
                           help='Name of the database which keeps czar-side object index, def: '
                           '%(default)s. Index is generated only for director table which is specified '
                           'with dirTable option in configuration file. Set to empty string to avoid '
                           'building index. If name is not empty then database must already exist.')
        group.add_argument('-e', '--delete-tables', dest='deleteTables', default=False, action='store_true',
                           help='If specified then existing tables in database will be deleted if '
                           'they exist, this includes both data and metadata.')

        parser.add_argument('database',
                            help='Database name, Expected to exist and have correct permissions.')
        parser.add_argument('table', help='Table name, must not exist.')
        parser.add_argument('schema',
                            help='Table schema file (should contain CREATE [TABLE|VIEW] ... statement).')
        parser.add_argument('data', nargs='*',
                            help='Input data files (CSV or anything that partitioner accepts). '
                            'Input can be empty, e.g. in case of defining SQL view instead of '
                            'regular table.')

        # parse all arguments
        self.args = parser.parse_args()

        # configure logging
        loggerName = None
        if lsst.qserv.admin.logger.setup_logging(self.args.log_conf):
            logger = logging.getLogger()
        else:
            # if global configuration file for logging isn't available
            # then use custom procedure (could be removed to simplify code?)
            loggerName = "Loader"
            verbosity = len(self.args.verbose)
            levels = {0: logging.WARNING, 1: logging.INFO, 2: logging.DEBUG}
            handler = logging.StreamHandler()
            handler.setFormatter(logging.Formatter("[%(levelname)s] %(name)s: %(message)s"))
            if not self.args.verboseAll:
                # suppress INFO/DEBUG regular messages from other loggers
                handler.addFilter(_LogFilter(loggerName))
            logger = logging.getLogger()
            logger.setLevel(level=levels.get(verbosity, logging.DEBUG))
            logger.handlers = []
            logger.addHandler(handler)

        # configure log4cxx logging based on the logging level of Python logger
        levels = {logging.ERROR: lsst.log.ERROR,
                  logging.WARNING: lsst.log.WARN,
                  logging.INFO: lsst.log.INFO,
                  logging.DEBUG: lsst.log.DEBUG}
        lsst.log.setLevel('', levels.get(logger.level, lsst.log.DEBUG))

        # connect to czar server
        czarWmgr = WmgrClient(self.args.czarHost, self.args.czarPort, secretFile=self.args.secret)

        # instantiate CSS interface
        css_inst = None
        if self.args.cssConn:
            logger.debug('connecting to css: %s', self.args.cssConn)
            config = cssConfig.configFromUrl(self.args.cssConn)
            css_inst = css.CssAccess.createFromConfig(config, "")

        # connect to all worker servers
        workerWmgrMap = {}
        for worker in self.args.workerNodes:
            workerWmgrMap[worker] = self._wmgrConnect(worker, css_inst)

        # instantiate loader
        self.loader = DataLoader(self.args.configFiles,
                                 czarWmgr,
                                 workerWmgrMap=workerWmgrMap,
                                 chunksDir=self.args.chunksDir,
                                 keepChunks=self.args.keepChunks,
                                 tmpDir=self.args.tmpDir,
                                 skipPart=self.args.skipPart,
                                 oneTable=self.args.oneTable,
                                 css=css_inst,
                                 cssClear=self.args.cssClear,
                                 indexDb=self.args.indexDb,
                                 emptyChunks=self.args.emptyChunks,
                                 deleteTables=self.args.deleteTables,
                                 loggerName=loggerName)
Beispiel #6
0
    def __init__(self):
        """
        Constructor parse all arguments and prepares for execution.
        """

        # define all command-line arguments
        parser = argparse.ArgumentParser(
            description='Single-node data loading script for Qserv.')

        parser.add_argument('-v',
                            '--verbose',
                            dest='verbose',
                            default=[],
                            action='append_const',
                            const=None,
                            help='More verbose output, can use several times.')
        parser.add_argument(
            '--verbose-all',
            dest='verboseAll',
            default=False,
            action='store_true',
            help=
            'Apply verbosity to all loggers, by default only loader level is set.'
        )
        parser = lsst.qserv.admin.logger.add_logfile_opt(parser)
        group = parser.add_argument_group(
            'Partitioning options',
            'Options defining how partitioning is performed')
        group.add_argument(
            '-f',
            '--config',
            dest='configFiles',
            default=[],
            action='append',
            required=True,
            metavar='PATH',
            help=
            'Partitioner configuration file, required, more than one acceptable.'
        )
        group.add_argument(
            '-d',
            '--chunks-dir',
            dest='chunksDir',
            metavar='PATH',
            default="./loader_chunks",
            help='Directory where to store chunk data, must '
            'have enough space to keep all data. If option --skip-partition is specified '
            '(without --one-table) then directory must exist and have existing data in it. '
            'Otherwise directory must be empty or do not exist. def: %(default)s.'
        )
        group.add_argument(
            '-t',
            '--tmp-dir',
            dest='tmpDir',
            metavar='PATH',
            default=None,
            help='Directory for non-chunk temporary files, e.g. uncompressed '
            'data files. By default temporary directory with random name inside chunks-dir '
            'is created to hold temporary data.')
        group.add_argument(
            '-k',
            '--keep-chunks',
            dest='keepChunks',
            action='store_true',
            default=False,
            help='If specified then chunks will not be deleted after loading.')
        group.add_argument(
            '-s',
            '--skip-partition',
            dest='skipPart',
            action='store_true',
            default=False,
            help=
            'If specified then skip partitioning, chunks must exist already '
            'if option --one-table is not specified (from previous run with -k option).'
        )
        group.add_argument(
            '-1',
            '--one-table',
            dest='oneTable',
            action='store_true',
            default=False,
            help=
            'If specified then load whole dataset into one table. This is useful for '
            'testing quries against mysql directly. If --skip-partition is specified '
            'then original non-partitioned data will be loaded, otherwise data will be '
            'partitioned but still loaded into a single table.')

        group = parser.add_argument_group('CSS options',
                                          'Options controlling CSS metadata')
        parser.set_defaults(
            cssConn='mysql://[email protected]:13306/qservCssData')
        group.add_argument('-c',
                           '--css-conn',
                           dest='cssConn',
                           help='Connection string for CSS, def: %(default)s.')
        group.add_argument('-r',
                           '--css-remove',
                           dest='cssClear',
                           default=False,
                           action='store_true',
                           help='Remove CSS table info if it already exists.')
        group.add_argument('-C',
                           '--no-css',
                           dest='cssConn',
                           action='store_const',
                           const=None,
                           help='Disable CSS updates.')

        group = parser.add_argument_group('Database options',
                                          'Options for database connection')
        group.add_argument(
            '-H',
            '--host',
            dest='czarHost',
            default='localhost',
            metavar='HOST',
            help='Host name for czar wmgr service, def: %(default)s.')
        group.add_argument(
            '-P',
            '--port',
            dest='czarPort',
            default=5012,
            metavar='PORT_NUMBER',
            type=int,
            help=
            'Port number to use for czar wmgr connection, def: %(default)s.')
        group.add_argument(
            '-W',
            '--worker',
            dest='workerNodes',
            default=[],
            action='append',
            metavar='STRING',
            help='Node name for worker server, may be specified '
            'more than once. If missing then czar server is used to store worker '
            'data. If more than one node is given then chunks are distributed '
            'randomly across all hosts. If CSS is used then nodes must already be '
            'defined in CSS (using qserv-admin command "CREATE NODE ..."). If CSS '
            'is disabled (with --no-css) then node name will be treated as a host '
            'name.')
        group.add_argument('-x',
                           '--secret',
                           dest='secret',
                           default=None,
                           help='Path name for the file with wmgr secret.')

        group = parser.add_argument_group(
            'Control options', 'Options for controlling other operations')
        group.add_argument(
            '-E',
            '--empty-chunks',
            dest='emptyChunks',
            default=None,
            metavar='PATH',
            help=
            'Path name for "empty chunks" file, if not specified then this file is '
            'not produced.')
        group.add_argument(
            '-i',
            '--index-db',
            dest='indexDb',
            default='qservMeta',
            metavar='DB_NAME',
            help='Name of the database which keeps czar-side object index, def: '
            '%(default)s. Index is generated only for director table which is specified '
            'with dirTable option in configuration file. Set to empty string to avoid '
            'building index. If name is not empty then database must already exist.'
        )
        group.add_argument(
            '-e',
            '--delete-tables',
            dest='deleteTables',
            default=False,
            action='store_true',
            help=
            'If specified then existing tables in database will be deleted if '
            'they exist, this includes both data and metadata.')

        parser.add_argument(
            'database',
            help=
            'Database name, Expected to exist and have correct permissions.')
        parser.add_argument('table', help='Table name, must not exist.')
        parser.add_argument(
            'schema',
            help=
            'Table schema file (should contain CREATE [TABLE|VIEW] ... statement).'
        )
        parser.add_argument(
            'data',
            nargs='*',
            help='Input data files (CSV or anything that partitioner accepts). '
            'Input can be empty, e.g. in case of defining SQL view instead of '
            'regular table.')

        # parse all arguments
        self.args = parser.parse_args()

        # configure logging
        loggerName = None
        if lsst.qserv.admin.logger.setup_logging(self.args.log_conf):
            logger = logging.getLogger()
        else:
            # if global configuration file for logging isn't available
            # then use custom procedure (could be removed to simplify code?)
            loggerName = "Loader"
            verbosity = len(self.args.verbose)
            levels = {0: logging.WARNING, 1: logging.INFO, 2: logging.DEBUG}
            handler = logging.StreamHandler()
            handler.setFormatter(
                logging.Formatter("[%(levelname)s] %(name)s: %(message)s"))
            if not self.args.verboseAll:
                # suppress INFO/DEBUG regular messages from other loggers
                handler.addFilter(_LogFilter(loggerName))
            logger = logging.getLogger()
            logger.setLevel(level=levels.get(verbosity, logging.DEBUG))
            logger.handlers = []
            logger.addHandler(handler)

        # configure log4cxx logging based on the logging level of Python logger
        levels = {
            logging.ERROR: lsst.log.ERROR,
            logging.WARNING: lsst.log.WARN,
            logging.INFO: lsst.log.INFO,
            logging.DEBUG: lsst.log.DEBUG
        }
        lsst.log.setLevel('', levels.get(logger.level, lsst.log.DEBUG))

        # connect to czar server
        czarWmgr = WmgrClient(self.args.czarHost,
                              self.args.czarPort,
                              secretFile=self.args.secret)

        # instantiate CSS interface
        css_inst = None
        if self.args.cssConn:
            logger.debug('connecting to css: %s', self.args.cssConn)
            config = cssConfig.configFromUrl(self.args.cssConn)
            css_inst = css.CssAccess.createFromConfig(config, "")

        # connect to all worker servers
        workerWmgrMap = {}
        for worker in self.args.workerNodes:
            workerWmgrMap[worker] = self._wmgrConnect(worker, css_inst)

        # instantiate loader
        self.loader = DataLoader(self.args.configFiles,
                                 czarWmgr,
                                 workerWmgrMap=workerWmgrMap,
                                 chunksDir=self.args.chunksDir,
                                 keepChunks=self.args.keepChunks,
                                 tmpDir=self.args.tmpDir,
                                 skipPart=self.args.skipPart,
                                 oneTable=self.args.oneTable,
                                 css=css_inst,
                                 cssClear=self.args.cssClear,
                                 indexDb=self.args.indexDb,
                                 emptyChunks=self.args.emptyChunks,
                                 deleteTables=self.args.deleteTables,
                                 loggerName=loggerName)