def build_arg_parser_and_parse_args(): # """ # Get an argparse.Namespace from sys.argv, # Lazily import the appropriate plugin module based on the given app name # And recreate the namespace with arguments specific to that plugin module # """ parser = at.build_arg_parser( [ at.group( "Runtime options", at.app_name, at.add_argument( '--bypass_scheduler', action='store_true', help=("Run a task directly. Do not schedule it." " Do not obtain a lock on this job." " Requires passing --job_id")), at.add_argument( '--timeout', type=int, default=2, help='time to wait for task to appear in queue before dying' ), at.add_argument( '--max_retry', type=int, default=5, help='Maximum number of times to retry a failed task.'), at.add_argument( '--job_id', help=('run a specific job_id. If a job is already queued,' ' it will run twice')), ) ], description= ("This script intelligently executes your application's jobs." " Specifically, an instance of this script fetches exactly 1 job" " from your application's queue, decides how to perform those jobs," " and then dies. Because jobs are managed in a DAG, Stolos may choose" " to delay execution of a job until dependencies have been met." " It may also queue child or parent jobs depending on their status."), ) parser, ns = initialize([parser(), dt, cb, qb], parse_known_args=True) # get plugin parser plugin = importlib.import_module('stolos.plugins.%s_plugin' % dt.get_job_type(ns.app_name)) ns = at.build_arg_parser(parents=[parser, plugin.build_arg_parser()], add_help=True).parse_args() ns.job_type_func = plugin.main return ns
def build_arg_parser_and_parse_args(): # """ # Get an argparse.Namespace from sys.argv, # Lazily import the appropriate plugin module based on the given app name # And recreate the namespace with arguments specific to that plugin module # """ parser = at.build_arg_parser([at.group( "Runtime options", at.app_name, at.add_argument( '--bypass_scheduler', action='store_true', help=( "Run a task directly. Do not schedule it." " Do not obtain a lock on this job." " Requires passing --job_id")), at.add_argument( '--timeout', type=int, default=2, help='time to wait for task to appear in queue before dying'), at.add_argument( '--max_retry', type=int, default=5, help='Maximum number of times to retry a failed task.'), at.add_argument( '--job_id', help=( 'run a specific job_id. If a job is already queued,' ' it will run twice')), )], description=( "This script intelligently executes your application's jobs." " Specifically, an instance of this script fetches exactly 1 job" " from your application's queue, decides how to perform those jobs," " and then dies. Because jobs are managed in a DAG, Stolos may choose" " to delay execution of a job until dependencies have been met." " It may also queue child or parent jobs depending on their status."), ) parser, ns = initialize( [parser(), dt, cb, qb], parse_known_args=True) # get plugin parser plugin = importlib.import_module( 'stolos.plugins.%s_plugin' % dt.get_job_type(ns.app_name)) ns = at.build_arg_parser( parents=[parser, plugin.build_arg_parser()], add_help=True ).parse_args() ns.job_type_func = plugin.main return ns
import simplejson from . import ( TasksConfigBaseMapping, TasksConfigBaseSequence, log, _ensure_type) from stolos import argparse_shared as at from stolos import get_NS build_arg_parser = at.build_arg_parser([at.group( "Configuration Backend Options: JSON", at.add_argument( '--tasks_json', required=True, help=( "Filepath to a json file defining Stolos application config")), )]) class _JSONMappingBase(object): def __getitem__(self, key): return _ensure_type( self.cache[key], JSONMapping, JSONSequence) def __len__(self): return len(self.cache) class JSONMapping(_JSONMappingBase, TasksConfigBaseMapping): """ A read-only dictionary loaded with data from a file identified by the --tasks_json option
This __init__ file is the official api for how other Stolos internals should use this sub-package. External projects should refer directly to Stolos.api. """ import importlib from stolos import argparse_shared as at import logging log = logging.getLogger('stolos.dag_tools') # define configuration dependencies for dag_tools to be usable build_arg_parser = at.build_arg_parser([at.group( # "DAG: Details relating to how your app dependencies are defined", "Application Dependency Configuration", at.add_argument( '--job_id_default_template', required=True, help=( "Defines the default way to identify `job_id`s for all" " applications. See conf/stolos-env.sh for an example")), at.add_argument( '--job_id_validations', required=True, type=lambda pth: importlib.import_module(pth).JOB_ID_VALIDATIONS, help=( 'A python import path to a python module where Stolos can expect' ' to find the a dict named `JOB_ID_VALIDATIONS`. This dict' ' contains validation functions for job_id components.' ' You can also configure Stolos logging here.' ' See conf/stolos-env.sh for an example')), at.add_argument( '--job_id_delimiter', default='_', help=( 'The identifying components of a job_id (as defined in' ' the job_id_template) are separated by a character sequence.' ' The default for this is an underscore: "_"')),
""" Test that tasks get executed properly using zookeeper This is used to test Stolos (pyspark plugin) """ from stolos import argparse_shared as at from stolos.examples import log def main(sc, ns, **job_id_identifiers): if ns.disable_log: import logging logging.disable = True log.info(ns.read_fp) log.info('test_module!!!') log.info('default ns: %s' % ns) if ns.fail: raise Exception("You asked me to fail, so here I am!") build_arg_parser = at.build_arg_parser([ at.group( "Test spark task", at.add_argument('--fail', action='store_true'), at.add_argument('--disable_log', action='store_true'), ) ], conflict_handler='resolve')
if value == '': value = '--STOLOSEMPTYSTRING--' rv = raw_client().set(path, value, nx=True) if not rv: raise stolos.exceptions.NodeExistsError( "Could not create path: %s" % path) def increment(path, value=1): """Increment the counter at given path Return the incremented count as an int """ rc = raw_client() return rc.incrby(path, value) build_arg_parser = at.build_arg_parser([ at.add_argument('--qb_redis_host', help="Host address to redis server"), at.add_argument( '--qb_redis_port', type=int, default=6379, help="Port to connect to Redis server"), at.add_argument('--qb_redis_db', default=0, type=int), at.add_argument('--qb_redis_lock_timeout', default=60, type=int), at.add_argument('--qb_redis_max_network_delay', default=30, type=int), at.add_argument( '--qb_redis_socket_timeout', default='15', type=float, help=( "number of seconds that the redis client will spend waiting for a" " response from Redis.")), ], description=( "These options specify which queue to use to store state about your jobs"))
""" import redis from stolos import get_NS from stolos.exceptions import _log_raise_if from . import TasksConfigBaseMapping, _ensure_type, log from .json_config import JSONMapping, JSONSequence from stolos import argparse_shared as at build_arg_parser = at.build_arg_parser([ at.group( ("Configuration Backend Options: Redis" " By default, assume the Redis DB is available locally"), at.add_argument( '--redis_key_prefix', default='stolos/', help=("All redis keys stolos creates are prefixed by this value")), at.add_argument('--redis_db', default=0, type=int, help="Number of the DB that redis connects to"), at.add_argument('--redis_host', default='localhost', help="Host address to redis server"), at.add_argument('--redis_port', default=6379, type=int, help="Port to connect to redis server at"), at.add_argument( '--redis_connection_opts', type=lambda x: x.split('='),
""" Test that tasks get executed properly using zookeeper This is used to test Stolos (pyspark plugin) """ from stolos import argparse_shared as at from stolos.examples import log def main(sc, ns, **job_id_identifiers): if ns.disable_log: import logging logging.disable = True log.info(ns.read_fp) log.info('test_module!!!') log.info('default ns: %s' % ns) if ns.fail: raise Exception("You asked me to fail, so here I am!") build_arg_parser = at.build_arg_parser([at.group( "Test spark task", at.add_argument('--fail', action='store_true'), at.add_argument('--disable_log', action='store_true'), )], conflict_handler='resolve' )
def create(path, value): try: return raw_client().create(path, util.tobytes(value), makepath=True) except NodeExistsError as err: raise exceptions.NodeExistsError("%s: %s" % (path, err)) def increment(path, value=1): """Increment the counter at given path Return the incremented count as an int """ c = raw_client().Counter(path) c += value return c.value build_arg_parser = at.build_arg_parser( [ at.add_argument('--qb_zookeeper_hosts', help="The address to your Zookeeper cluster"), at.add_argument( '--qb_zookeeper_timeout', default=5, type=float, help="Max num secs to wait on response if timeout not specified"), ], description=( "Options that specify which queue to use to store state about your jobs" ))
""" import redis from stolos import get_NS from stolos.exceptions import _log_raise_if from . import TasksConfigBaseMapping, _ensure_type, log from .json_config import JSONMapping, JSONSequence from stolos import argparse_shared as at build_arg_parser = at.build_arg_parser([at.group( ("Configuration Backend Options: Redis" " By default, assume the Redis DB is available locally"), at.add_argument( '--redis_key_prefix', default='stolos/', help=( "All redis keys stolos creates are prefixed by this value")), at.add_argument( '--redis_db', default=0, type=int, help="Number of the DB that redis connects to"), at.add_argument( '--redis_host', default='localhost', help="Host address to redis server"), at.add_argument( '--redis_port', default=6379, type=int, help="Port to connect to redis server at"), at.add_argument( '--redis_connection_opts', type=lambda x: x.split('='), help=( "Additional arguments to pass to redis.StrictRedis")), )])
import simplejson from . import (TasksConfigBaseMapping, TasksConfigBaseSequence, log, _ensure_type) from stolos import argparse_shared as at from stolos import get_NS build_arg_parser = at.build_arg_parser([ at.group( "Configuration Backend Options: JSON", at.add_argument( '--tasks_json', required=True, help=( "Filepath to a json file defining Stolos application config")), ) ]) class _JSONMappingBase(object): def __getitem__(self, key): return _ensure_type(self.cache[key], JSONMapping, JSONSequence) def __len__(self): return len(self.cache) class JSONMapping(_JSONMappingBase, TasksConfigBaseMapping): """ A read-only dictionary loaded with data from a file identified by
use this sub-package. External projects should refer directly to Stolos.api. """ import importlib from stolos import argparse_shared as at import logging log = logging.getLogger('stolos.dag_tools') # define configuration dependencies for dag_tools to be usable build_arg_parser = at.build_arg_parser([ at.group( # "DAG: Details relating to how your app dependencies are defined", "Application Dependency Configuration", at.add_argument( '--job_id_default_template', required=True, help=("Defines the default way to identify `job_id`s for all" " applications. See conf/stolos-env.sh for an example")), at.add_argument( '--job_id_validations', required=True, type=lambda pth: importlib.import_module(pth).JOB_ID_VALIDATIONS, help= ('A python import path to a python module where Stolos can expect' ' to find the a dict named `JOB_ID_VALIDATIONS`. This dict' ' contains validation functions for job_id components.' ' You can also configure Stolos logging here.' ' See conf/stolos-env.sh for an example')), at.add_argument( '--job_id_delimiter', default='_',
except NoNodeError as err: raise exceptions.NoNodeError( "Must first create node before setting a new value. %s" % err) def create(path, value): try: return raw_client().create(path, util.tobytes(value), makepath=True) except NodeExistsError as err: raise exceptions.NodeExistsError("%s: %s" % (path, err)) def increment(path, value=1): """Increment the counter at given path Return the incremented count as an int """ c = raw_client().Counter(path) c += value return c.value build_arg_parser = at.build_arg_parser([ at.add_argument( '--qb_zookeeper_hosts', help="The address to your Zookeeper cluster"), at.add_argument( '--qb_zookeeper_timeout', default=5, type=float, help="Max num secs to wait on response if timeout not specified"), ], description=( "Options that specify which queue to use to store state about your jobs") )