Beispiel #1
0
def build_arg_parser_and_parse_args():
    # """
    # Get an argparse.Namespace from sys.argv,
    # Lazily import the appropriate plugin module based on the given app name
    # And recreate the namespace with arguments specific to that plugin module
    # """

    parser = at.build_arg_parser(
        [
            at.group(
                "Runtime options",
                at.app_name,
                at.add_argument(
                    '--bypass_scheduler',
                    action='store_true',
                    help=("Run a task directly. Do not schedule it."
                          "  Do not obtain a lock on this job."
                          "  Requires passing --job_id")),
                at.add_argument(
                    '--timeout',
                    type=int,
                    default=2,
                    help='time to wait for task to appear in queue before dying'
                ),
                at.add_argument(
                    '--max_retry',
                    type=int,
                    default=5,
                    help='Maximum number of times to retry a failed task.'),
                at.add_argument(
                    '--job_id',
                    help=('run a specific job_id. If a job is already queued,'
                          ' it will run twice')),
            )
        ],
        description=
        ("This script intelligently executes your application's jobs."
         " Specifically, an instance of this script fetches exactly 1 job"
         " from your application's queue, decides how to perform those jobs,"
         " and then dies.  Because jobs are managed in a DAG, Stolos may choose"
         " to delay execution of a job until dependencies have been met."
         " It may also queue child or parent jobs depending on their status."),
    )
    parser, ns = initialize([parser(), dt, cb, qb], parse_known_args=True)

    # get plugin parser
    plugin = importlib.import_module('stolos.plugins.%s_plugin' %
                                     dt.get_job_type(ns.app_name))
    ns = at.build_arg_parser(parents=[parser,
                                      plugin.build_arg_parser()],
                             add_help=True).parse_args()
    ns.job_type_func = plugin.main
    return ns
Beispiel #2
0
def build_arg_parser_and_parse_args():
    # """
    # Get an argparse.Namespace from sys.argv,
    # Lazily import the appropriate plugin module based on the given app name
    # And recreate the namespace with arguments specific to that plugin module
    # """

    parser = at.build_arg_parser([at.group(
        "Runtime options",
        at.app_name,
        at.add_argument(
            '--bypass_scheduler', action='store_true', help=(
                "Run a task directly. Do not schedule it."
                "  Do not obtain a lock on this job."
                "  Requires passing --job_id")),
        at.add_argument(
            '--timeout', type=int, default=2,
            help='time to wait for task to appear in queue before dying'),
        at.add_argument(
            '--max_retry', type=int, default=5,
            help='Maximum number of times to retry a failed task.'),
        at.add_argument(
            '--job_id', help=(
                'run a specific job_id. If a job is already queued,'
                ' it will run twice')),
    )], description=(
        "This script intelligently executes your application's jobs."
        " Specifically, an instance of this script fetches exactly 1 job"
        " from your application's queue, decides how to perform those jobs,"
        " and then dies.  Because jobs are managed in a DAG, Stolos may choose"
        " to delay execution of a job until dependencies have been met."
        " It may also queue child or parent jobs depending on their status."),
    )
    parser, ns = initialize(
        [parser(), dt, cb, qb],
        parse_known_args=True)

    # get plugin parser
    plugin = importlib.import_module(
        'stolos.plugins.%s_plugin' % dt.get_job_type(ns.app_name))
    ns = at.build_arg_parser(
        parents=[parser, plugin.build_arg_parser()],
        add_help=True
    ).parse_args()
    ns.job_type_func = plugin.main
    return ns
Beispiel #3
0
def initialize(objects, args=None, parse_known_args=False,
               **argument_parser_kwargs):
    """
    Initialize Stolos such that we ensure all required configuration settings
    are unified in one central place before we do anything with Stolos.
    Raises error if any parsers define conflicting argument options.

    This function is called by user-facing or application-level code.
    All internal stolos libraries should call to stolos.get_NS() when they need
    to access configuration.  Internal libraries should not call this function.

    Returns (argparse.ArgumentParser(...), argparse.Namespace(...))

    `objects` - is a list of build_arg_parser functions or objects
        (ie Stolos modules) containing a callable build_arg_parser attribute.
    `args` - (optional).  Define command-line arguments to use.
        Default to sys.argv (which is what argparse does).
        Explicitly pass args=[] to not read command-line arguments, and instead
        expect that all arguments are passed in as environment variables.
        To guarantee NO arguments are read from sys.argv, set args=[]
        Example:  args=['--option1', 'val', ...]
    `parse_known_args` - if True, parse only known commandline arguments and
        do not add_help (ie don't recognize '-h').  Assume you will
        post-process the argument parser and add a --help option later.
        If False, add_help (ie recognize '-h') and fail if anything on
        command-line is not recognized by the argument parser
    `argument_parser_kwargs` - (optional) passed to the ArgumentParser(...)
    """
    # partially initialize a parser to get selected configuration backend
    parser = at.build_arg_parser(
        description="Initialize Stolos, whether running it or calling its api",
        parents=list(_get_parent_parsers(objects)),
        **argument_parser_kwargs)
    if args is not None:
        ns, _ = parser.parse_known_args(args)
    else:
        ns, _ = parser.parse_known_args()

    # get a new parser updated with options for each chosen backend
    parser = initialize_backend(
        ns.configuration_backend, parser, add_help=False)
    parser = initialize_backend(
        ns.queue_backend, parser, add_help=not bool(parse_known_args))

    if not parse_known_args:
        ns = parser.parse_args(args)
    else:
        ns, _ = parser.parse_known_args()
    stolos.NS = ns
    try:
        del stolos.Uninitialized
    except AttributeError:
        log.warn(
            "Stolos was re-initialized.  You may have imported the api and"
            " then done something weird like re-import it or manually"
            " call Stolos's initializer."
        )
    return parser, ns
Beispiel #4
0
def initialize_backend(backend, parser, add_help):
    """
    get options for the chosen backend
    ensure they don't conflict with previously defined ones
    """
    if hasattr(backend, '__module__'):
        # ie. configuration_backends are classes
        # in a module containing the arg parser
        # (otherwise, assume the backend is a module containing the arg parser)
        backend = importlib.import_module(backend.__module__)
    newparser = at.build_arg_parser(
        parents=[parser, backend.build_arg_parser()], add_help=add_help)
    return newparser
Beispiel #5
0
def initialize_backend(backend, parser, add_help):
    """
    get options for the chosen backend
    ensure they don't conflict with previously defined ones
    """
    if hasattr(backend, '__module__'):
        # ie. configuration_backends are classes
        # in a module containing the arg parser
        # (otherwise, assume the backend is a module containing the arg parser)
        backend = importlib.import_module(backend.__module__)
    newparser = at.build_arg_parser(
        parents=[parser, backend.build_arg_parser()], add_help=add_help)
    return newparser
Beispiel #6
0
        return mapping_kls(value)
    else:
        return value


def get_tasks_config():
    """
    Returns object to read Stolos application config from your chosen
    configuration backend.
    """
    ns = stolos.get_NS()
    return ns.configuration_backend()


build_arg_parser = at.build_arg_parser([at.group(
    "Application Dependency Configuration",
    at.backend(
        backend_type='configuration',
        default='json',
        known_backends={
            "json": "stolos.configuration_backend.json_config.JSONMapping",
            "redis": "stolos.configuration_backend.redis_config.RedisMapping"},
        help=(
            "Where do you store the application dependency data?"
            ' This option defines which configuration backend Stolos uses'
            ' to access the directed graph defining how your applications'
            ' depend on each other.'
            ' You can supply your own configuration backend or choose from the'
            ' following supported options: {known_backends}')),
)])
Beispiel #7
0
def create(path, value):
    try:
        return raw_client().create(path, util.tobytes(value), makepath=True)
    except NodeExistsError as err:
        raise exceptions.NodeExistsError("%s: %s" % (path, err))


def increment(path, value=1):
    """Increment the counter at given path
    Return the incremented count as an int
    """
    c = raw_client().Counter(path)
    c += value
    return c.value


build_arg_parser = at.build_arg_parser(
    [
        at.add_argument('--qb_zookeeper_hosts',
                        help="The address to your Zookeeper cluster"),
        at.add_argument(
            '--qb_zookeeper_timeout',
            default=5,
            type=float,
            help="Max num secs to wait on response if timeout not specified"),
    ],
    description=(
        "Options that specify which queue to use to store state about your jobs"
    ))
Beispiel #8
0
build_arg_parser = at.build_arg_parser([
    at.group(
        # "DAG: Details relating to how your app dependencies are defined",
        "Application Dependency Configuration",
        at.add_argument(
            '--job_id_default_template',
            required=True,
            help=("Defines the default way to identify `job_id`s for all"
                  " applications.  See conf/stolos-env.sh for an example")),
        at.add_argument(
            '--job_id_validations',
            required=True,
            type=lambda pth: importlib.import_module(pth).JOB_ID_VALIDATIONS,
            help=
            ('A python import path to a python module where Stolos can expect'
             ' to find the a dict named `JOB_ID_VALIDATIONS`.  This dict'
             ' contains validation functions for job_id components.'
             ' You can also configure Stolos logging here.'
             ' See conf/stolos-env.sh for an example')),
        at.add_argument(
            '--job_id_delimiter',
            default='_',
            help=(
                'The identifying components of a job_id (as defined in'
                ' the job_id_template) are separated by a character sequence.'
                ' The default for this is an underscore: "_"')),
        at.add_argument(
            "--dependency_group_default_name",
            default='default',
            help=(
                'A very low-level option that specifies how unnamed dependency'
                " groups are identified. Don't bother changing this")),
    )
])
Beispiel #9
0
import simplejson

from . import (
    TasksConfigBaseMapping, TasksConfigBaseSequence, log,
    _ensure_type)

from stolos import argparse_shared as at
from stolos import get_NS


build_arg_parser = at.build_arg_parser([at.group(
    "Configuration Backend Options: JSON",
    at.add_argument(
        '--tasks_json', required=True, help=(
            "Filepath to a json file defining Stolos application config")),
)])


class _JSONMappingBase(object):
    def __getitem__(self, key):
        return _ensure_type(
            self.cache[key], JSONMapping, JSONSequence)

    def __len__(self):
        return len(self.cache)


class JSONMapping(_JSONMappingBase, TasksConfigBaseMapping):
    """
    A read-only dictionary loaded with data from a file identified by
    the --tasks_json option
Beispiel #10
0

# define configuration dependencies for dag_tools to be usable
build_arg_parser = at.build_arg_parser([at.group(
    # "DAG: Details relating to how your app dependencies are defined",
    "Application Dependency Configuration",
    at.add_argument(
        '--job_id_default_template', required=True, help=(
            "Defines the default way to identify `job_id`s for all"
            " applications.  See conf/stolos-env.sh for an example")),
    at.add_argument(
        '--job_id_validations', required=True,
        type=lambda pth: importlib.import_module(pth).JOB_ID_VALIDATIONS,
        help=(
            'A python import path to a python module where Stolos can expect'
            ' to find the a dict named `JOB_ID_VALIDATIONS`.  This dict'
            ' contains validation functions for job_id components.'
            ' You can also configure Stolos logging here.'
            ' See conf/stolos-env.sh for an example')),
    at.add_argument(
        '--job_id_delimiter', default='_', help=(
            'The identifying components of a job_id (as defined in'
            ' the job_id_template) are separated by a character sequence.'
            ' The default for this is an underscore: "_"')),
    at.add_argument(
        "--dependency_group_default_name", default='default', help=(
            'A very low-level option that specifies how unnamed dependency'
            " groups are identified. Don't bother changing this")),
)])


# Expose various functions to the rest of Stolos internals
Beispiel #11
0
def delete(path, _recursive=False):
    """Remove path from queue backend.

    `_recursive` - This is only for tests
    """
    raise NotImplementedError()


def set(path, value):
    """Set value at given path
    If the path does not already exist, raise stolos.exceptions.NoNodeError
    """
    raise NotImplementedError()


def create(path, value):
    """Set value at given path.
    If path already exists, raise stolos.exceptions.NodeExistsError
    """
    raise NotImplementedError()


def increment(path, value=1):
    """Increment the counter at given path
    Return the incremented count as an int
    """
    raise NotImplementedError()


build_arg_parser = _at.build_arg_parser([])
Beispiel #12
0
def get_tasks_config():
    """
    Returns object to read Stolos application config from your chosen
    configuration backend.
    """
    ns = stolos.get_NS()
    return ns.configuration_backend()


build_arg_parser = at.build_arg_parser([
    at.group(
        "Application Dependency Configuration",
        at.backend(
            backend_type='configuration',
            default='json',
            known_backends={
                "json": "stolos.configuration_backend.json_config.JSONMapping",
                "redis":
                "stolos.configuration_backend.redis_config.RedisMapping"
            },
            help=
            ("Where do you store the application dependency data?"
             ' This option defines which configuration backend Stolos uses'
             ' to access the directed graph defining how your applications'
             ' depend on each other.'
             ' You can supply your own configuration backend or choose from the'
             ' following supported options: {known_backends}')),
    )
])
Beispiel #13
0
def initialize(objects,
               args=None,
               parse_known_args=False,
               **argument_parser_kwargs):
    """
    Initialize Stolos such that we ensure all required configuration settings
    are unified in one central place before we do anything with Stolos.
    Raises error if any parsers define conflicting argument options.

    This function is called by user-facing or application-level code.
    All internal stolos libraries should call to stolos.get_NS() when they need
    to access configuration.  Internal libraries should not call this function.

    Returns (argparse.ArgumentParser(...), argparse.Namespace(...))

    `objects` - is a list of build_arg_parser functions or objects
        (ie Stolos modules) containing a callable build_arg_parser attribute.
    `args` - (optional).  Define command-line arguments to use.
        Default to sys.argv (which is what argparse does).
        Explicitly pass args=[] to not read command-line arguments, and instead
        expect that all arguments are passed in as environment variables.
        To guarantee NO arguments are read from sys.argv, set args=[]
        Example:  args=['--option1', 'val', ...]
    `parse_known_args` - if True, parse only known commandline arguments and
        do not add_help (ie don't recognize '-h').  Assume you will
        post-process the argument parser and add a --help option later.
        If False, add_help (ie recognize '-h') and fail if anything on
        command-line is not recognized by the argument parser
    `argument_parser_kwargs` - (optional) passed to the ArgumentParser(...)
    """
    # partially initialize a parser to get selected configuration backend
    parser = at.build_arg_parser(
        description="Initialize Stolos, whether running it or calling its api",
        parents=list(_get_parent_parsers(objects)),
        **argument_parser_kwargs)
    if args is not None:
        ns, _ = parser.parse_known_args(args)
    else:
        ns, _ = parser.parse_known_args()

    # get a new parser updated with options for each chosen backend
    parser = initialize_backend(ns.configuration_backend,
                                parser,
                                add_help=False)
    parser = initialize_backend(ns.queue_backend,
                                parser,
                                add_help=not bool(parse_known_args))

    if not parse_known_args:
        ns = parser.parse_args(args)
    else:
        ns, _ = parser.parse_known_args()
    stolos.NS = ns
    try:
        del stolos.Uninitialized
    except AttributeError:
        log.warn(
            "Stolos was re-initialized.  You may have imported the api and"
            " then done something weird like re-import it or manually"
            " call Stolos's initializer.")
    return parser, ns
Beispiel #14
0
import simplejson

from . import (TasksConfigBaseMapping, TasksConfigBaseSequence, log,
               _ensure_type)

from stolos import argparse_shared as at
from stolos import get_NS

build_arg_parser = at.build_arg_parser([
    at.group(
        "Configuration Backend Options: JSON",
        at.add_argument(
            '--tasks_json',
            required=True,
            help=(
                "Filepath to a json file defining Stolos application config")),
    )
])


class _JSONMappingBase(object):
    def __getitem__(self, key):
        return _ensure_type(self.cache[key], JSONMapping, JSONSequence)

    def __len__(self):
        return len(self.cache)


class JSONMapping(_JSONMappingBase, TasksConfigBaseMapping):
    """
    A read-only dictionary loaded with data from a file identified by
Beispiel #15
0
from stolos import argparse_shared as at

build_arg_parser = at.build_arg_parser([
    at.group(
        ("Configuration Backend Options: Redis"
         "  By default, assume the Redis DB is available locally"),
        at.add_argument(
            '--redis_key_prefix',
            default='stolos/',
            help=("All redis keys stolos creates are prefixed by this value")),
        at.add_argument('--redis_db',
                        default=0,
                        type=int,
                        help="Number of the DB that redis connects to"),
        at.add_argument('--redis_host',
                        default='localhost',
                        help="Host address to redis server"),
        at.add_argument('--redis_port',
                        default=6379,
                        type=int,
                        help="Port to connect to redis server at"),
        at.add_argument(
            '--redis_connection_opts',
            type=lambda x: x.split('='),
            help=("Additional arguments to pass to redis.StrictRedis")),
    )
])


class _RedisConfig(object):
Beispiel #16
0
    if value == '':
        value = '--STOLOSEMPTYSTRING--'
    rv = raw_client().set(path, value, nx=True)
    if not rv:
        raise stolos.exceptions.NodeExistsError(
            "Could not create path: %s" % path)


def increment(path, value=1):
    """Increment the counter at given path
    Return the incremented count as an int
    """
    rc = raw_client()
    return rc.incrby(path, value)


build_arg_parser = at.build_arg_parser([
    at.add_argument('--qb_redis_host', help="Host address to redis server"),
    at.add_argument(
        '--qb_redis_port', type=int, default=6379,
        help="Port to connect to Redis server"),
    at.add_argument('--qb_redis_db', default=0, type=int),
    at.add_argument('--qb_redis_lock_timeout', default=60, type=int),
    at.add_argument('--qb_redis_max_network_delay', default=30, type=int),
    at.add_argument(
        '--qb_redis_socket_timeout', default='15', type=float, help=(
            "number of seconds that the redis client will spend waiting for a"
            " response from Redis.")),
], description=(
    "These options specify which queue to use to store state about your jobs"))
Beispiel #17
0
from . import TasksConfigBaseMapping, _ensure_type, log
from .json_config import JSONMapping, JSONSequence

from stolos import argparse_shared as at


build_arg_parser = at.build_arg_parser([at.group(
    ("Configuration Backend Options: Redis"
     "  By default, assume the Redis DB is available locally"),
    at.add_argument(
        '--redis_key_prefix', default='stolos/', help=(
            "All redis keys stolos creates are prefixed by this value")),
    at.add_argument(
        '--redis_db', default=0, type=int,
        help="Number of the DB that redis connects to"),
    at.add_argument(
        '--redis_host', default='localhost',
        help="Host address to redis server"),
    at.add_argument(
        '--redis_port', default=6379, type=int,
        help="Port to connect to redis server at"),
    at.add_argument(
        '--redis_connection_opts', type=lambda x: x.split('='), help=(
            "Additional arguments to pass to redis.StrictRedis")),
)])


class _RedisConfig(object):
    def __getitem__(self, key):
        if key not in self.cache:
            key = "%s%s" % (self.redis_key_prefix, key)
Beispiel #18
0
"""
Test that tasks get executed properly using zookeeper
This is used to test Stolos (pyspark plugin)
"""
from stolos import argparse_shared as at
from stolos.examples import log


def main(sc, ns, **job_id_identifiers):
    if ns.disable_log:
        import logging
        logging.disable = True
    log.info(ns.read_fp)
    log.info('test_module!!!')
    log.info('default ns: %s' % ns)
    if ns.fail:
        raise Exception("You asked me to fail, so here I am!")


build_arg_parser = at.build_arg_parser([
    at.group(
        "Test spark task",
        at.add_argument('--fail', action='store_true'),
        at.add_argument('--disable_log', action='store_true'),
    )
],
                                       conflict_handler='resolve')
Beispiel #19
0
    if value == '':
        value = '--STOLOSEMPTYSTRING--'
    rv = raw_client().set(path, value, nx=True)
    if not rv:
        raise stolos.exceptions.NodeExistsError(
            "Could not create path: %s" % path)


def increment(path, value=1):
    """Increment the counter at given path
    Return the incremented count as an int
    """
    rc = raw_client()
    return rc.incrby(path, value)


build_arg_parser = at.build_arg_parser([
    at.add_argument('--qb_redis_host', help="Host address to redis server"),
    at.add_argument(
        '--qb_redis_port', type=int, default=6379,
        help="Port to connect to Redis server"),
    at.add_argument('--qb_redis_db', default=0, type=int),
    at.add_argument('--qb_redis_lock_timeout', default=60, type=int),
    at.add_argument('--qb_redis_max_network_delay', default=30, type=int),
    at.add_argument(
        '--qb_redis_socket_timeout', default='15', type=float, help=(
            "number of seconds that the redis client will spend waiting for a"
            " response from Redis.")),
], description=(
    "These options specify which queue to use to store state about your jobs"))
Beispiel #20
0
"""
Test that tasks get executed properly using zookeeper
This is used to test Stolos (pyspark plugin)
"""
from stolos import argparse_shared as at
from stolos.examples import log


def main(sc, ns, **job_id_identifiers):
    if ns.disable_log:
        import logging
        logging.disable = True
    log.info(ns.read_fp)
    log.info('test_module!!!')
    log.info('default ns: %s' % ns)
    if ns.fail:
        raise Exception("You asked me to fail, so here I am!")


build_arg_parser = at.build_arg_parser([at.group(
    "Test spark task",
    at.add_argument('--fail', action='store_true'),
    at.add_argument('--disable_log', action='store_true'),

)], conflict_handler='resolve'
)
Beispiel #21
0
ensure_parents_completed, _set_state_unsafe

from .read_job_state import (check_state)

check_state

from .locking import (obtain_execute_lock, is_execute_locked)

obtain_execute_lock, is_execute_locked

from .qbcli_baseapi import Lock as BaseLock

BaseLock

build_arg_parser = at.build_arg_parser([
    at.group(
        "Stolos Queue Backend (manages job state)",
        at.backend(
            backend_type='queue',
            default='redis',
            known_backends={
                "zookeeper": "stolos.queue_backend.qbcli_zookeeper",
                "redis": "stolos.queue_backend.qbcli_redis"
            },
            help=('Select a database that stores job state.'
                  ' This option defines which queue backend Stolos uses.'
                  ' You can supply your own queue backend or choose from the'
                  ' following supported options: {known_backends}')),
    )
])
Beispiel #22
0
    ensure_parents_completed,
    _set_state_unsafe  # TODO: get rid of _set_state_unsafe
)
maybe_add_subtask, readd_subtask, set_state, inc_retry_count,
ensure_parents_completed, _set_state_unsafe

from .read_job_state import (check_state)
check_state

from .locking import (obtain_execute_lock, is_execute_locked)
obtain_execute_lock, is_execute_locked

from .qbcli_baseapi import Lock as BaseLock
BaseLock


build_arg_parser = at.build_arg_parser([at.group(
    "Stolos Queue Backend (manages job state)",
    at.backend(
        backend_type='queue',
        default='redis',
        known_backends={
            "zookeeper": "stolos.queue_backend.qbcli_zookeeper",
            "redis": "stolos.queue_backend.qbcli_redis"},
        help=(
            'Select a database that stores job state.'
            ' This option defines which queue backend Stolos uses.'
            ' You can supply your own queue backend or choose from the'
            ' following supported options: {known_backends}')),
)])
Beispiel #23
0
    except NoNodeError as err:
        raise exceptions.NoNodeError(
            "Must first create node before setting a new value. %s" % err)


def create(path, value):
    try:
        return raw_client().create(path, util.tobytes(value), makepath=True)
    except NodeExistsError as err:
        raise exceptions.NodeExistsError("%s: %s" % (path, err))


def increment(path, value=1):
    """Increment the counter at given path
    Return the incremented count as an int
    """
    c = raw_client().Counter(path)
    c += value
    return c.value


build_arg_parser = at.build_arg_parser([
    at.add_argument(
        '--qb_zookeeper_hosts', help="The address to your Zookeeper cluster"),
    at.add_argument(
        '--qb_zookeeper_timeout', default=5, type=float,
        help="Max num secs to wait on response if timeout not specified"),
], description=(
    "Options that specify which queue to use to store state about your jobs")
)