Exemple #1
0
def build_arg_parser_and_parse_args():
    # """
    # Get an argparse.Namespace from sys.argv,
    # Lazily import the appropriate plugin module based on the given app name
    # And recreate the namespace with arguments specific to that plugin module
    # """

    parser = at.build_arg_parser(
        [
            at.group(
                "Runtime options",
                at.app_name,
                at.add_argument(
                    '--bypass_scheduler',
                    action='store_true',
                    help=("Run a task directly. Do not schedule it."
                          "  Do not obtain a lock on this job."
                          "  Requires passing --job_id")),
                at.add_argument(
                    '--timeout',
                    type=int,
                    default=2,
                    help='time to wait for task to appear in queue before dying'
                ),
                at.add_argument(
                    '--max_retry',
                    type=int,
                    default=5,
                    help='Maximum number of times to retry a failed task.'),
                at.add_argument(
                    '--job_id',
                    help=('run a specific job_id. If a job is already queued,'
                          ' it will run twice')),
            )
        ],
        description=
        ("This script intelligently executes your application's jobs."
         " Specifically, an instance of this script fetches exactly 1 job"
         " from your application's queue, decides how to perform those jobs,"
         " and then dies.  Because jobs are managed in a DAG, Stolos may choose"
         " to delay execution of a job until dependencies have been met."
         " It may also queue child or parent jobs depending on their status."),
    )
    parser, ns = initialize([parser(), dt, cb, qb], parse_known_args=True)

    # get plugin parser
    plugin = importlib.import_module('stolos.plugins.%s_plugin' %
                                     dt.get_job_type(ns.app_name))
    ns = at.build_arg_parser(parents=[parser,
                                      plugin.build_arg_parser()],
                             add_help=True).parse_args()
    ns.job_type_func = plugin.main
    return ns
Exemple #2
0
def build_arg_parser_and_parse_args():
    # """
    # Get an argparse.Namespace from sys.argv,
    # Lazily import the appropriate plugin module based on the given app name
    # And recreate the namespace with arguments specific to that plugin module
    # """

    parser = at.build_arg_parser([at.group(
        "Runtime options",
        at.app_name,
        at.add_argument(
            '--bypass_scheduler', action='store_true', help=(
                "Run a task directly. Do not schedule it."
                "  Do not obtain a lock on this job."
                "  Requires passing --job_id")),
        at.add_argument(
            '--timeout', type=int, default=2,
            help='time to wait for task to appear in queue before dying'),
        at.add_argument(
            '--max_retry', type=int, default=5,
            help='Maximum number of times to retry a failed task.'),
        at.add_argument(
            '--job_id', help=(
                'run a specific job_id. If a job is already queued,'
                ' it will run twice')),
    )], description=(
        "This script intelligently executes your application's jobs."
        " Specifically, an instance of this script fetches exactly 1 job"
        " from your application's queue, decides how to perform those jobs,"
        " and then dies.  Because jobs are managed in a DAG, Stolos may choose"
        " to delay execution of a job until dependencies have been met."
        " It may also queue child or parent jobs depending on their status."),
    )
    parser, ns = initialize(
        [parser(), dt, cb, qb],
        parse_known_args=True)

    # get plugin parser
    plugin = importlib.import_module(
        'stolos.plugins.%s_plugin' % dt.get_job_type(ns.app_name))
    ns = at.build_arg_parser(
        parents=[parser, plugin.build_arg_parser()],
        add_help=True
    ).parse_args()
    ns.job_type_func = plugin.main
    return ns
Exemple #3
0
import simplejson

from . import (
    TasksConfigBaseMapping, TasksConfigBaseSequence, log,
    _ensure_type)

from stolos import argparse_shared as at
from stolos import get_NS


build_arg_parser = at.build_arg_parser([at.group(
    "Configuration Backend Options: JSON",
    at.add_argument(
        '--tasks_json', required=True, help=(
            "Filepath to a json file defining Stolos application config")),
)])


class _JSONMappingBase(object):
    def __getitem__(self, key):
        return _ensure_type(
            self.cache[key], JSONMapping, JSONSequence)

    def __len__(self):
        return len(self.cache)


class JSONMapping(_JSONMappingBase, TasksConfigBaseMapping):
    """
    A read-only dictionary loaded with data from a file identified by
    the --tasks_json option
Exemple #4
0
This __init__ file is the official api for how other Stolos internals should
use this sub-package.  External projects should refer directly to Stolos.api.
"""
import importlib
from stolos import argparse_shared as at

import logging
log = logging.getLogger('stolos.dag_tools')


# define configuration dependencies for dag_tools to be usable
build_arg_parser = at.build_arg_parser([at.group(
    # "DAG: Details relating to how your app dependencies are defined",
    "Application Dependency Configuration",
    at.add_argument(
        '--job_id_default_template', required=True, help=(
            "Defines the default way to identify `job_id`s for all"
            " applications.  See conf/stolos-env.sh for an example")),
    at.add_argument(
        '--job_id_validations', required=True,
        type=lambda pth: importlib.import_module(pth).JOB_ID_VALIDATIONS,
        help=(
            'A python import path to a python module where Stolos can expect'
            ' to find the a dict named `JOB_ID_VALIDATIONS`.  This dict'
            ' contains validation functions for job_id components.'
            ' You can also configure Stolos logging here.'
            ' See conf/stolos-env.sh for an example')),
    at.add_argument(
        '--job_id_delimiter', default='_', help=(
            'The identifying components of a job_id (as defined in'
            ' the job_id_template) are separated by a character sequence.'
            ' The default for this is an underscore: "_"')),
Exemple #5
0
"""
Test that tasks get executed properly using zookeeper
This is used to test Stolos (pyspark plugin)
"""
from stolos import argparse_shared as at
from stolos.examples import log


def main(sc, ns, **job_id_identifiers):
    if ns.disable_log:
        import logging
        logging.disable = True
    log.info(ns.read_fp)
    log.info('test_module!!!')
    log.info('default ns: %s' % ns)
    if ns.fail:
        raise Exception("You asked me to fail, so here I am!")


build_arg_parser = at.build_arg_parser([
    at.group(
        "Test spark task",
        at.add_argument('--fail', action='store_true'),
        at.add_argument('--disable_log', action='store_true'),
    )
],
                                       conflict_handler='resolve')
Exemple #6
0
    if value == '':
        value = '--STOLOSEMPTYSTRING--'
    rv = raw_client().set(path, value, nx=True)
    if not rv:
        raise stolos.exceptions.NodeExistsError(
            "Could not create path: %s" % path)


def increment(path, value=1):
    """Increment the counter at given path
    Return the incremented count as an int
    """
    rc = raw_client()
    return rc.incrby(path, value)


build_arg_parser = at.build_arg_parser([
    at.add_argument('--qb_redis_host', help="Host address to redis server"),
    at.add_argument(
        '--qb_redis_port', type=int, default=6379,
        help="Port to connect to Redis server"),
    at.add_argument('--qb_redis_db', default=0, type=int),
    at.add_argument('--qb_redis_lock_timeout', default=60, type=int),
    at.add_argument('--qb_redis_max_network_delay', default=30, type=int),
    at.add_argument(
        '--qb_redis_socket_timeout', default='15', type=float, help=(
            "number of seconds that the redis client will spend waiting for a"
            " response from Redis.")),
], description=(
    "These options specify which queue to use to store state about your jobs"))
Exemple #7
0
"""
import redis

from stolos import get_NS
from stolos.exceptions import _log_raise_if
from . import TasksConfigBaseMapping, _ensure_type, log
from .json_config import JSONMapping, JSONSequence

from stolos import argparse_shared as at

build_arg_parser = at.build_arg_parser([
    at.group(
        ("Configuration Backend Options: Redis"
         "  By default, assume the Redis DB is available locally"),
        at.add_argument(
            '--redis_key_prefix',
            default='stolos/',
            help=("All redis keys stolos creates are prefixed by this value")),
        at.add_argument('--redis_db',
                        default=0,
                        type=int,
                        help="Number of the DB that redis connects to"),
        at.add_argument('--redis_host',
                        default='localhost',
                        help="Host address to redis server"),
        at.add_argument('--redis_port',
                        default=6379,
                        type=int,
                        help="Port to connect to redis server at"),
        at.add_argument(
            '--redis_connection_opts',
            type=lambda x: x.split('='),
Exemple #8
0
"""
Test that tasks get executed properly using zookeeper
This is used to test Stolos (pyspark plugin)
"""
from stolos import argparse_shared as at
from stolos.examples import log


def main(sc, ns, **job_id_identifiers):
    if ns.disable_log:
        import logging
        logging.disable = True
    log.info(ns.read_fp)
    log.info('test_module!!!')
    log.info('default ns: %s' % ns)
    if ns.fail:
        raise Exception("You asked me to fail, so here I am!")


build_arg_parser = at.build_arg_parser([at.group(
    "Test spark task",
    at.add_argument('--fail', action='store_true'),
    at.add_argument('--disable_log', action='store_true'),

)], conflict_handler='resolve'
)
Exemple #9
0
def create(path, value):
    try:
        return raw_client().create(path, util.tobytes(value), makepath=True)
    except NodeExistsError as err:
        raise exceptions.NodeExistsError("%s: %s" % (path, err))


def increment(path, value=1):
    """Increment the counter at given path
    Return the incremented count as an int
    """
    c = raw_client().Counter(path)
    c += value
    return c.value


build_arg_parser = at.build_arg_parser(
    [
        at.add_argument('--qb_zookeeper_hosts',
                        help="The address to your Zookeeper cluster"),
        at.add_argument(
            '--qb_zookeeper_timeout',
            default=5,
            type=float,
            help="Max num secs to wait on response if timeout not specified"),
    ],
    description=(
        "Options that specify which queue to use to store state about your jobs"
    ))
Exemple #10
0
    if value == '':
        value = '--STOLOSEMPTYSTRING--'
    rv = raw_client().set(path, value, nx=True)
    if not rv:
        raise stolos.exceptions.NodeExistsError(
            "Could not create path: %s" % path)


def increment(path, value=1):
    """Increment the counter at given path
    Return the incremented count as an int
    """
    rc = raw_client()
    return rc.incrby(path, value)


build_arg_parser = at.build_arg_parser([
    at.add_argument('--qb_redis_host', help="Host address to redis server"),
    at.add_argument(
        '--qb_redis_port', type=int, default=6379,
        help="Port to connect to Redis server"),
    at.add_argument('--qb_redis_db', default=0, type=int),
    at.add_argument('--qb_redis_lock_timeout', default=60, type=int),
    at.add_argument('--qb_redis_max_network_delay', default=30, type=int),
    at.add_argument(
        '--qb_redis_socket_timeout', default='15', type=float, help=(
            "number of seconds that the redis client will spend waiting for a"
            " response from Redis.")),
], description=(
    "These options specify which queue to use to store state about your jobs"))
Exemple #11
0
"""
import redis

from stolos import get_NS
from stolos.exceptions import _log_raise_if
from . import TasksConfigBaseMapping, _ensure_type, log
from .json_config import JSONMapping, JSONSequence

from stolos import argparse_shared as at


build_arg_parser = at.build_arg_parser([at.group(
    ("Configuration Backend Options: Redis"
     "  By default, assume the Redis DB is available locally"),
    at.add_argument(
        '--redis_key_prefix', default='stolos/', help=(
            "All redis keys stolos creates are prefixed by this value")),
    at.add_argument(
        '--redis_db', default=0, type=int,
        help="Number of the DB that redis connects to"),
    at.add_argument(
        '--redis_host', default='localhost',
        help="Host address to redis server"),
    at.add_argument(
        '--redis_port', default=6379, type=int,
        help="Port to connect to redis server at"),
    at.add_argument(
        '--redis_connection_opts', type=lambda x: x.split('='), help=(
            "Additional arguments to pass to redis.StrictRedis")),
)])
Exemple #12
0
import simplejson

from . import (TasksConfigBaseMapping, TasksConfigBaseSequence, log,
               _ensure_type)

from stolos import argparse_shared as at
from stolos import get_NS

build_arg_parser = at.build_arg_parser([
    at.group(
        "Configuration Backend Options: JSON",
        at.add_argument(
            '--tasks_json',
            required=True,
            help=(
                "Filepath to a json file defining Stolos application config")),
    )
])


class _JSONMappingBase(object):
    def __getitem__(self, key):
        return _ensure_type(self.cache[key], JSONMapping, JSONSequence)

    def __len__(self):
        return len(self.cache)


class JSONMapping(_JSONMappingBase, TasksConfigBaseMapping):
    """
    A read-only dictionary loaded with data from a file identified by
Exemple #13
0
use this sub-package.  External projects should refer directly to Stolos.api.
"""
import importlib
from stolos import argparse_shared as at

import logging
log = logging.getLogger('stolos.dag_tools')

# define configuration dependencies for dag_tools to be usable
build_arg_parser = at.build_arg_parser([
    at.group(
        # "DAG: Details relating to how your app dependencies are defined",
        "Application Dependency Configuration",
        at.add_argument(
            '--job_id_default_template',
            required=True,
            help=("Defines the default way to identify `job_id`s for all"
                  " applications.  See conf/stolos-env.sh for an example")),
        at.add_argument(
            '--job_id_validations',
            required=True,
            type=lambda pth: importlib.import_module(pth).JOB_ID_VALIDATIONS,
            help=
            ('A python import path to a python module where Stolos can expect'
             ' to find the a dict named `JOB_ID_VALIDATIONS`.  This dict'
             ' contains validation functions for job_id components.'
             ' You can also configure Stolos logging here.'
             ' See conf/stolos-env.sh for an example')),
        at.add_argument(
            '--job_id_delimiter',
            default='_',
Exemple #14
0
    except NoNodeError as err:
        raise exceptions.NoNodeError(
            "Must first create node before setting a new value. %s" % err)


def create(path, value):
    try:
        return raw_client().create(path, util.tobytes(value), makepath=True)
    except NodeExistsError as err:
        raise exceptions.NodeExistsError("%s: %s" % (path, err))


def increment(path, value=1):
    """Increment the counter at given path
    Return the incremented count as an int
    """
    c = raw_client().Counter(path)
    c += value
    return c.value


build_arg_parser = at.build_arg_parser([
    at.add_argument(
        '--qb_zookeeper_hosts', help="The address to your Zookeeper cluster"),
    at.add_argument(
        '--qb_zookeeper_timeout', default=5, type=float,
        help="Max num secs to wait on response if timeout not specified"),
], description=(
    "Options that specify which queue to use to store state about your jobs")
)