def build_arg_parser_and_parse_args(): # """ # Get an argparse.Namespace from sys.argv, # Lazily import the appropriate plugin module based on the given app name # And recreate the namespace with arguments specific to that plugin module # """ parser = at.build_arg_parser( [ at.group( "Runtime options", at.app_name, at.add_argument( '--bypass_scheduler', action='store_true', help=("Run a task directly. Do not schedule it." " Do not obtain a lock on this job." " Requires passing --job_id")), at.add_argument( '--timeout', type=int, default=2, help='time to wait for task to appear in queue before dying' ), at.add_argument( '--max_retry', type=int, default=5, help='Maximum number of times to retry a failed task.'), at.add_argument( '--job_id', help=('run a specific job_id. If a job is already queued,' ' it will run twice')), ) ], description= ("This script intelligently executes your application's jobs." " Specifically, an instance of this script fetches exactly 1 job" " from your application's queue, decides how to perform those jobs," " and then dies. Because jobs are managed in a DAG, Stolos may choose" " to delay execution of a job until dependencies have been met." " It may also queue child or parent jobs depending on their status."), ) parser, ns = initialize([parser(), dt, cb, qb], parse_known_args=True) # get plugin parser plugin = importlib.import_module('stolos.plugins.%s_plugin' % dt.get_job_type(ns.app_name)) ns = at.build_arg_parser(parents=[parser, plugin.build_arg_parser()], add_help=True).parse_args() ns.job_type_func = plugin.main return ns
def build_arg_parser_and_parse_args(): # """ # Get an argparse.Namespace from sys.argv, # Lazily import the appropriate plugin module based on the given app name # And recreate the namespace with arguments specific to that plugin module # """ parser = at.build_arg_parser([at.group( "Runtime options", at.app_name, at.add_argument( '--bypass_scheduler', action='store_true', help=( "Run a task directly. Do not schedule it." " Do not obtain a lock on this job." " Requires passing --job_id")), at.add_argument( '--timeout', type=int, default=2, help='time to wait for task to appear in queue before dying'), at.add_argument( '--max_retry', type=int, default=5, help='Maximum number of times to retry a failed task.'), at.add_argument( '--job_id', help=( 'run a specific job_id. If a job is already queued,' ' it will run twice')), )], description=( "This script intelligently executes your application's jobs." " Specifically, an instance of this script fetches exactly 1 job" " from your application's queue, decides how to perform those jobs," " and then dies. Because jobs are managed in a DAG, Stolos may choose" " to delay execution of a job until dependencies have been met." " It may also queue child or parent jobs depending on their status."), ) parser, ns = initialize( [parser(), dt, cb, qb], parse_known_args=True) # get plugin parser plugin = importlib.import_module( 'stolos.plugins.%s_plugin' % dt.get_job_type(ns.app_name)) ns = at.build_arg_parser( parents=[parser, plugin.build_arg_parser()], add_help=True ).parse_args() ns.job_type_func = plugin.main return ns
ensure_parents_completed, _set_state_unsafe # TODO: get rid of _set_state_unsafe ) maybe_add_subtask, readd_subtask, set_state, inc_retry_count, ensure_parents_completed, _set_state_unsafe from .read_job_state import (check_state) check_state from .locking import (obtain_execute_lock, is_execute_locked) obtain_execute_lock, is_execute_locked from .qbcli_baseapi import Lock as BaseLock BaseLock build_arg_parser = at.build_arg_parser([at.group( "Stolos Queue Backend (manages job state)", at.backend( backend_type='queue', default='redis', known_backends={ "zookeeper": "stolos.queue_backend.qbcli_zookeeper", "redis": "stolos.queue_backend.qbcli_redis"}, help=( 'Select a database that stores job state.' ' This option defines which queue backend Stolos uses.' ' You can supply your own queue backend or choose from the' ' following supported options: {known_backends}')), )])
import simplejson from . import ( TasksConfigBaseMapping, TasksConfigBaseSequence, log, _ensure_type) from stolos import argparse_shared as at from stolos import get_NS build_arg_parser = at.build_arg_parser([at.group( "Configuration Backend Options: JSON", at.add_argument( '--tasks_json', required=True, help=( "Filepath to a json file defining Stolos application config")), )]) class _JSONMappingBase(object): def __getitem__(self, key): return _ensure_type( self.cache[key], JSONMapping, JSONSequence) def __len__(self): return len(self.cache) class JSONMapping(_JSONMappingBase, TasksConfigBaseMapping): """ A read-only dictionary loaded with data from a file identified by the --tasks_json option
# define configuration dependencies for dag_tools to be usable build_arg_parser = at.build_arg_parser([at.group( # "DAG: Details relating to how your app dependencies are defined", "Application Dependency Configuration", at.add_argument( '--job_id_default_template', required=True, help=( "Defines the default way to identify `job_id`s for all" " applications. See conf/stolos-env.sh for an example")), at.add_argument( '--job_id_validations', required=True, type=lambda pth: importlib.import_module(pth).JOB_ID_VALIDATIONS, help=( 'A python import path to a python module where Stolos can expect' ' to find the a dict named `JOB_ID_VALIDATIONS`. This dict' ' contains validation functions for job_id components.' ' You can also configure Stolos logging here.' ' See conf/stolos-env.sh for an example')), at.add_argument( '--job_id_delimiter', default='_', help=( 'The identifying components of a job_id (as defined in' ' the job_id_template) are separated by a character sequence.' ' The default for this is an underscore: "_"')), at.add_argument( "--dependency_group_default_name", default='default', help=( 'A very low-level option that specifies how unnamed dependency' " groups are identified. Don't bother changing this")), )]) # Expose various functions to the rest of Stolos internals
""" Test that tasks get executed properly using zookeeper This is used to test Stolos (pyspark plugin) """ from stolos import argparse_shared as at from stolos.examples import log def main(sc, ns, **job_id_identifiers): if ns.disable_log: import logging logging.disable = True log.info(ns.read_fp) log.info('test_module!!!') log.info('default ns: %s' % ns) if ns.fail: raise Exception("You asked me to fail, so here I am!") build_arg_parser = at.build_arg_parser([ at.group( "Test spark task", at.add_argument('--fail', action='store_true'), at.add_argument('--disable_log', action='store_true'), ) ], conflict_handler='resolve')
from stolos import argparse_shared as at build_arg_parser = at.build_arg_parser([ at.group( ("Configuration Backend Options: Redis" " By default, assume the Redis DB is available locally"), at.add_argument( '--redis_key_prefix', default='stolos/', help=("All redis keys stolos creates are prefixed by this value")), at.add_argument('--redis_db', default=0, type=int, help="Number of the DB that redis connects to"), at.add_argument('--redis_host', default='localhost', help="Host address to redis server"), at.add_argument('--redis_port', default=6379, type=int, help="Port to connect to redis server at"), at.add_argument( '--redis_connection_opts', type=lambda x: x.split('='), help=("Additional arguments to pass to redis.StrictRedis")), ) ]) class _RedisConfig(object):
ensure_parents_completed, _set_state_unsafe from .read_job_state import (check_state) check_state from .locking import (obtain_execute_lock, is_execute_locked) obtain_execute_lock, is_execute_locked from .qbcli_baseapi import Lock as BaseLock BaseLock build_arg_parser = at.build_arg_parser([ at.group( "Stolos Queue Backend (manages job state)", at.backend( backend_type='queue', default='redis', known_backends={ "zookeeper": "stolos.queue_backend.qbcli_zookeeper", "redis": "stolos.queue_backend.qbcli_redis" }, help=('Select a database that stores job state.' ' This option defines which queue backend Stolos uses.' ' You can supply your own queue backend or choose from the' ' following supported options: {known_backends}')), ) ])
return mapping_kls(value) else: return value def get_tasks_config(): """ Returns object to read Stolos application config from your chosen configuration backend. """ ns = stolos.get_NS() return ns.configuration_backend() build_arg_parser = at.build_arg_parser([at.group( "Application Dependency Configuration", at.backend( backend_type='configuration', default='json', known_backends={ "json": "stolos.configuration_backend.json_config.JSONMapping", "redis": "stolos.configuration_backend.redis_config.RedisMapping"}, help=( "Where do you store the application dependency data?" ' This option defines which configuration backend Stolos uses' ' to access the directed graph defining how your applications' ' depend on each other.' ' You can supply your own configuration backend or choose from the' ' following supported options: {known_backends}')), )])
""" Test that tasks get executed properly using zookeeper This is used to test Stolos (pyspark plugin) """ from stolos import argparse_shared as at from stolos.examples import log def main(sc, ns, **job_id_identifiers): if ns.disable_log: import logging logging.disable = True log.info(ns.read_fp) log.info('test_module!!!') log.info('default ns: %s' % ns) if ns.fail: raise Exception("You asked me to fail, so here I am!") build_arg_parser = at.build_arg_parser([at.group( "Test spark task", at.add_argument('--fail', action='store_true'), at.add_argument('--disable_log', action='store_true'), )], conflict_handler='resolve' )
from . import TasksConfigBaseMapping, _ensure_type, log from .json_config import JSONMapping, JSONSequence from stolos import argparse_shared as at build_arg_parser = at.build_arg_parser([at.group( ("Configuration Backend Options: Redis" " By default, assume the Redis DB is available locally"), at.add_argument( '--redis_key_prefix', default='stolos/', help=( "All redis keys stolos creates are prefixed by this value")), at.add_argument( '--redis_db', default=0, type=int, help="Number of the DB that redis connects to"), at.add_argument( '--redis_host', default='localhost', help="Host address to redis server"), at.add_argument( '--redis_port', default=6379, type=int, help="Port to connect to redis server at"), at.add_argument( '--redis_connection_opts', type=lambda x: x.split('='), help=( "Additional arguments to pass to redis.StrictRedis")), )]) class _RedisConfig(object): def __getitem__(self, key): if key not in self.cache: key = "%s%s" % (self.redis_key_prefix, key)
def get_tasks_config(): """ Returns object to read Stolos application config from your chosen configuration backend. """ ns = stolos.get_NS() return ns.configuration_backend() build_arg_parser = at.build_arg_parser([ at.group( "Application Dependency Configuration", at.backend( backend_type='configuration', default='json', known_backends={ "json": "stolos.configuration_backend.json_config.JSONMapping", "redis": "stolos.configuration_backend.redis_config.RedisMapping" }, help= ("Where do you store the application dependency data?" ' This option defines which configuration backend Stolos uses' ' to access the directed graph defining how your applications' ' depend on each other.' ' You can supply your own configuration backend or choose from the' ' following supported options: {known_backends}')), ) ])
import simplejson from . import (TasksConfigBaseMapping, TasksConfigBaseSequence, log, _ensure_type) from stolos import argparse_shared as at from stolos import get_NS build_arg_parser = at.build_arg_parser([ at.group( "Configuration Backend Options: JSON", at.add_argument( '--tasks_json', required=True, help=( "Filepath to a json file defining Stolos application config")), ) ]) class _JSONMappingBase(object): def __getitem__(self, key): return _ensure_type(self.cache[key], JSONMapping, JSONSequence) def __len__(self): return len(self.cache) class JSONMapping(_JSONMappingBase, TasksConfigBaseMapping): """ A read-only dictionary loaded with data from a file identified by
at.group( # "DAG: Details relating to how your app dependencies are defined", "Application Dependency Configuration", at.add_argument( '--job_id_default_template', required=True, help=("Defines the default way to identify `job_id`s for all" " applications. See conf/stolos-env.sh for an example")), at.add_argument( '--job_id_validations', required=True, type=lambda pth: importlib.import_module(pth).JOB_ID_VALIDATIONS, help= ('A python import path to a python module where Stolos can expect' ' to find the a dict named `JOB_ID_VALIDATIONS`. This dict' ' contains validation functions for job_id components.' ' You can also configure Stolos logging here.' ' See conf/stolos-env.sh for an example')), at.add_argument( '--job_id_delimiter', default='_', help=( 'The identifying components of a job_id (as defined in' ' the job_id_template) are separated by a character sequence.' ' The default for this is an underscore: "_"')), at.add_argument( "--dependency_group_default_name", default='default', help=( 'A very low-level option that specifies how unnamed dependency' " groups are identified. Don't bother changing this")), )