def get_job_status_trigger_config(args) -> JobStatusTriggerConfig: parser = ArgParser(auto_env_var_prefix="", prog=APP_NAME) def benchmark_status_from_input(input: str) -> BenchmarkJobStatus: return BenchmarkJobStatus(input.strip(" \t,")) # required parser.add_argument("--job-name", type=str, env_var="JOB_NAME", required=True) parser.add_argument( "--trigger-statuses", type=benchmark_status_from_input, nargs="+", env_var="TRIGGER_STATUSES", required=True ) parser.add_argument("--command", type=str, env_var="COMMAND", required=True) # optional parser.add_argument("--job-namespace", type=str, default="default", env_var="JOB_NAMESPACE", required=False) parser.add_argument( "--job-not-found-grace-period-seconds", type=int, default=30, env_var="JOB_NOT_FOUND_GRACE_PERIOD_SECONDS", required=False, ) parsed_args, _ = parser.parse_known_args(args) return JobStatusTriggerConfig( job_namespace=parsed_args.job_namespace, job_name=parsed_args.job_name, trigger_statuses=parsed_args.trigger_statuses, job_not_found_grace_period_seconds=parsed_args.job_not_found_grace_period_seconds, command=parsed_args.command, )
def go_2(p, current_prefix, current_ns): if isinstance(p, BaseParser): new_p = ArgParser(default_config_files=config_files) for a in p.argparser._actions: new_a = copy.copy(a) ss = copy.deepcopy(new_a.option_strings) for ix, s in enumerate(new_a.option_strings): if s.startswith("--"): ss[ix] = "-" + current_prefix + "-" + s[2:] else: raise NotImplementedError new_a.option_strings = ss new_p._add_action(new_a) _used_args, _rest = new_p.parse_known_args(args, namespace=current_ns) # add a "_flags" field to each object so we know what flags caused a certain option to be set: # (however, note that post-parsing we may munge around ...) flags_dict = defaultdict(set) for action in new_p._actions: for opt in action.option_strings: flags_dict[action.dest].add(opt) current_ns.flags_ = Namespace(**flags_dict) # TODO: could continue parsing from `_rest` instead of original `args` elif isinstance(p, CompoundParser): current_ns.flags_ = set() # could also check for the CompoundParser case and not set flags there, # since there will never be any for q in p.parsers: ns = Namespace() if q.namespace in current_ns.__dict__: raise ValueError("Namespace field '%s' already in use" % q.namespace) # TODO could also allow, say, a None else: # gross but how to write n-ary identity fn that behaves sensibly on single arg?? current_ns.__dict__[q.namespace] = ns # FIXME this casting doesn't work for configurations with positional arguments, # which aren't unpacked correctly -- better to use a namedtuple # (making all arguments keyword-only also works, but then you have to supply # often meaningless defaults in the __init__) go_2(q.parser, current_prefix=current_prefix + (('-' + q.prefix) if q.prefix is not None else ''), current_ns=ns) # If a cast function is provided, apply it to the namespace, possibly doing dynamic type checking # and also allowing the checker to provide hinting for the types of the fields flags = ns.flags_ del ns.flags_ fixed = (q.cast(current_ns.__dict__[q.namespace]) #(q.cast(**vars(current_ns.__dict__[q.namespace])) if q.cast else current_ns.__dict__[q.namespace]) if isinstance(fixed, tuple): fixed = fixed.replace(flags_=flags) elif isinstance(fixed, Namespace): setattr(fixed, "flags_", flags) else: raise ValueError("currently only Namespace and NamedTuple objects are supported return types from " "parsing; got %s (a %s)" % (fixed, type(fixed))) current_ns.__dict__[q.namespace] = fixed # TODO current_ns or current_namespace or ns or namespace? else: raise TypeError("parser %s wasn't a %s (%s or %s) but a %s" % (p, Parser, BaseParser, CompoundParser, p.__class__))
def main(): # command line option handling # use an environment variable to look for a default config file # Alternately, we could use a default location for the file # (say `files = ['/etc/pydpiper.cfg', '~/pydpiper.cfg', './pydpiper.cfg']`) # TODO this logic is duplicated in application.py #if "PYDPIPER_CONFIG_FILE" in os.environ: default_config_file = os.getenv("PYDPIPER_CONFIG_FILE") if default_config_file is not None: try: with open(PYDPIPER_CONFIG_FILE): pass except: warnings.warn(f"PYDPIPER_CONFIG_FILE is set to '{default_config_file}', which can't be opened.") if default_config_file is not None: files = [default_config_file] else: files = [] from pydpiper.core.arguments import _mk_execution_parser parser = ArgParser(default_config_files=files) _mk_execution_parser(parser) # using parse_known_args instead of parse_args is a hack since we # currently send ALL arguments from the main program to the executor. # Alternately, we could keep a copy of the executor parser around # when constructing the executor shell command options, _ = parser.parse_known_args() ensure_exec_specified(options.num_exec) def local_launch(options): pe = pipelineExecutor(options=options, uri_file=options.urifile, pipeline_name="anon-executor") # didn't parse application options so don't have a --pipeline-name # FIXME - I doubt missing the other options even works, otherwise we could change the executor interface!! # executors don't use any shared-memory constructs, so OK to copy ps = [Process(target=launchExecutor, args=(pe,)) for _ in range(options.num_exec)] for p in ps: p.start() for p in ps: p.join() if options.local: local_launch(options) elif options.submit_server: roq = q.runOnQueueingSystem(options, sysArgs=sys.argv) for i in range(options.num_exec): roq.createAndSubmitExecutorJobFile(i, after=None, time=q.timestr_to_secs(options.time)) elif options.queue_type is not None: for i in range(options.num_exec): pe = pipelineExecutor(options=options, uri_file=options.urifile, pipeline_name="anon-executor") pe.submitToQueue(1) # TODO is there a reason why we have logic for submitting `i` executors again here? else: local_launch(options)
def get_watcher_service_config(args) -> WatcherServiceConfig: parser = ArgParser(auto_env_var_prefix="", prog=SERVICE_NAME) parser.add_argument( "--kubernetes-namespace-of-running-jobs", default="default", env_var="KUBERNETES_NAMESPACE_OF_RUNNING_JOBS" ) parser.add_argument("--kubeconfig", env_var="KUBECONFIG") parser.add_argument("--service-logging-level", env_var="SERVICE_LOGGING_LEVEL", default="INFO") parser.add_argument("--grafana-endpoint", env_var="GRAFANA_ENDPOINT") parser.add_argument("--grafana-results-url", env_var="GRAFANA_RESULTS_URL") parser.add_argument("--grafana-op-metrics-dashboard-uid", env_var="GRAFANA_OP_METRICS_DASHBOARD_UID") parsed_args, _ = parser.parse_known_args(args) return WatcherServiceConfig( kubernetes_namespace_of_running_jobs=parsed_args.kubernetes_namespace_of_running_jobs, kubeconfig=parsed_args.kubeconfig, logging_level=parsed_args.service_logging_level, grafana_endpoint=parsed_args.grafana_endpoint, grafana_results_url=parsed_args.grafana_results_url, grafana_op_metrics_dashboard_uid=parsed_args.grafana_op_metrics_dashboard_uid, )
def go_2(p, current_prefix, current_ns): if isinstance(p, BaseParser): new_p = ArgParser(default_config_files=config_files) for a in p.argparser._actions: new_a = copy.copy(a) ss = copy.deepcopy(new_a.option_strings) for ix, s in enumerate(new_a.option_strings): if s.startswith("--"): ss[ix] = "-" + current_prefix + "-" + s[2:] else: raise NotImplementedError new_a.option_strings = ss new_p._add_action(new_a) _used_args, _rest = new_p.parse_known_args(args, namespace=current_ns) # TODO: could continue parsing from `_rest` instead of original `args` elif isinstance(p, CompoundParser): for q in p.parsers: ns = Namespace() if q.namespace in current_ns.__dict__: raise ValueError("Namespace field '%s' already in use" % q.namespace) # TODO could also allow, say, a None else: # gross but how to write n-ary identity fn that behaves sensibly on single arg?? current_ns.__dict__[q.namespace] = ns # FIXME this casting doesn't work for configurations with positional arguments, # which aren't unpacked correctly -- better to use a namedtuple # (making all arguments keyword-only also works, but then you have to supply # often meaningless defaults in the __init__) go_2(q.parser, current_prefix=current_prefix + (('-' + q.prefix) if q.prefix is not None else ''), current_ns=ns) # If a cast function is provided, apply it to the namespace, possibly doing dynamic type checking # and also allowing the checker to provide hinting for the types of the fields current_ns.__dict__[q.namespace] = (q.cast(current_ns.__dict__[q.namespace]) #(q.cast(**vars(current_ns.__dict__[q.namespace])) if q.cast else current_ns.__dict__[q.namespace]) # TODO current_ns or current_namespace or ns or namespace? else: raise TypeError("parser %s wasn't a %s (%s or %s) but a %s" % (p, Parser, BaseParser, CompoundParser, p.__class__))
def parse_args(): parser = ArgParser(default_config_files=[os.getcwd() + '/src/initial_configurations/default']) # Core setting core_parse = parser.add_argument_group('Core setting') core_parse.add_argument('-s', '--start_date', dest='start_date', default='now', type=str, help='Training start date') core_parse.add_argument('-p', '--train_period', dest='train_period', default=-1, type=int, help='Time period of training file is used') core_parse.add_argument('-n', '--new_run', dest='new_run', default=0, type=int, help='If the model checkpoint is erased to run new model') core_parse.add_argument('-l', '--local_run', dest='local_run', default=0, type=int, help='If the parameter JSON file is kept locally insteat to redis') core_parse.add_argument('-nni', '--tuning', dest='tuning', default=0, type=int, help='Whether or not to peform NNI hyper parameter tuning') # Model model_parse = parser.add_argument_group('Model') model_parse.add_argument('-m', '--model', dest='model', default='DNN', type=str, help='Select the model to train e.g. DNN') model_parse.add_argument('--loss', dest='loss', default=30, type=int, help="Setting of loss function '10','11','12','20','21','22','30','31','32'" ) model_parse.add_argument('--hidden_units', dest='hidden_units', default=[128, 64], type=int, nargs='+', help='List containing the number of hidden units to use for each hidden layer') model_parse.add_argument('--dropout_rate', dest='dropout_rate', default=0.5, type=float, help='List containing the number of dropout rate to use for each hidden layer') model_parse.add_argument('--one_hot_units', dest='one_hot_units', default=[2, 35, 359, 3, 2], type=int, nargs='+', help='List containing the number of embedding units to use for features (in order): [weekday, region, city, adexchange, slotformat]; this replaces the one hot encoding') model_parse.add_argument('--multi_hot_units', dest='multi_hot_units', default=[45], type=int, nargs='+', help='List containing the number of embedding units to use for features: [usertag]') model_parse.add_argument('--learning_rate', dest='learning_rate', default=0.002, type=float, help='Learning rate of updating gradient') model_parse.add_argument('--decay_step', dest='decay_step', default=100, type=int, help='Decay step') model_parse.add_argument('--decay_rate', dest='decay_rate', default=0.98, type=float, help='Decay rate for exponential decay of learning rate') model_parse.add_argument('--class_ratio', dest='class_ratio', default=0.5, type=float, help='Ratio of 2 classes for imbalanced data') model_parse.add_argument('--alpha', dest='alpha', default=1., type=float, help='Alpha for Focal loss regularization in DNN') model_parse.add_argument('--beta', dest='beta', default=1., type=float, help='Beta for regularization') model_parse.add_argument('--gamma', dest='gamma', default=1., type=float, help='Gamma for Focal loss regularization in DNN') # Training train_parse = parser.add_argument_group('Training hyperparameters') train_parse.add_argument('--save_summary_steps', dest='save_summary_steps', default=100, type=int, help='save summary steps') train_parse.add_argument('--log_step_count_steps', dest='log_step_count_steps', default=100, type=int, help='logging step count steps') train_parse.add_argument('--checkpoints_steps', dest='save_checkpoints_steps',default=500, type=int, help='checkpoints steps') train_parse.add_argument('--has_gpu', dest='has_gpu', default=0, type=int, help='1 if GPU is present, else 0') train_parse.add_argument('--oversample', dest='oversample', default=0, type=int, help='1 if will oversample training dataset, else 0') train_parse.add_argument('--is_test', dest='is_test', default=0, type=int, help='1 if the trained model will be evaluated, else 0') train_parse.add_argument('--num_epochs', dest='num_epochs', default=1.0, type=float, help='Number of total epochs') train_parse.add_argument('--start_delay_secs', dest='start_delay_secs', default=10, type=int, help='Start evaluating after 10 secs') train_parse.add_argument('--throttle_secs', dest='throttle_secs', default=10, type=int, help='Evaluate only every 30 secs') train_parse.add_argument('--batch_size', dest='batch_size', default=128, type=int, help='Number of examples per batch') # Directory paths dir_parse = parser.add_argument_group('Directory paths') dir_parse.add_argument('--train_data_path', dest='train_data_path', default='./data/', type=str, help='Directory where the training files are located') dir_parse.add_argument('--save_dir', dest='save_dir', default='./Outputs/', type=str, help='Directory to save model directories') dir_parse.add_argument('--load_dir', dest='load_dir', default='latest', type=str, help='Directory to load old model,default "new" as the latest model') dir_parse.add_argument('--store_dir', dest='store_dir', default='latest', type=str, help='Directory to store current model, default "latest" to save in timestamp') dir_parse.add_argument('--builder_save_dir', dest='builder_save_dir', default='builder_save', type=str, help='Directory to store current model for tfjs predictor') _args, _ = parser.parse_known_args() _params = vars(_args) _params['train_data_path'] = os.getcwd() + _params['train_data_path'] # Identify whether it's using NNI tuning mode if _params['tuning'] == 1: import nni tuner_params = nni.get_next_parameter() try: _params.update(tuner_params) except Exception as err: tf.logging.error('Error args updated: %s', err) tf.logging.error('Failed with params: %s', str(_params)) _params['num_features'] = len(INT_NUM_FEAT) + sum(_params['one_hot_units']) + sum(_params['multi_hot_units']) _params['model_name'] = _params['model'] # Adjust filename to restore/save by config settings if _params['store_dir'] == 'latest': _params['store_dir'] = _params['model_name'] + '_' + parse_date('now').strftime(FILE_DATE_FORMAT) if _params['load_dir'] == 'latest': _params['load_dir'] = find_latest_model_dir(_params['save_dir'], _params['store_dir'], _params['model_name']) if _params['new_run'] == 1: _params['load_dir'] = _params['store_dir'] return _params
def main(): # command line option handling # use an environment variable to look for a default config file # Alternately, we could use a default location for the file # (say `files = ['/etc/pydpiper.cfg', '~/pydpiper.cfg', './pydpiper.cfg']`) # TODO this logic is duplicated in application.py #if "PYDPIPER_CONFIG_FILE" in os.environ: default_config_file = os.getenv("PYDPIPER_CONFIG_FILE") if default_config_file is not None: try: with open(PYDPIPER_CONFIG_FILE): pass except: warnings.warn( f"PYDPIPER_CONFIG_FILE is set to '{default_config_file}', which can't be opened." ) if default_config_file is not None: files = [default_config_file] else: files = [] from pydpiper.core.arguments import _mk_execution_parser parser = ArgParser(default_config_files=files) _mk_execution_parser(parser) # using parse_known_args instead of parse_args is a hack since we # currently send ALL arguments from the main program to the executor. # Alternately, we could keep a copy of the executor parser around # when constructing the executor shell command options, _ = parser.parse_known_args() ensure_exec_specified(options.num_exec) def local_launch(options): pe = pipelineExecutor( options=options, uri_file=options.urifile, pipeline_name="anon-executor" ) # didn't parse application options so don't have a --pipeline-name # FIXME - I doubt missing the other options even works, otherwise we could change the executor interface!! # executors don't use any shared-memory constructs, so OK to copy ps = [ Process(target=launchExecutor, args=(pe, )) for _ in range(options.num_exec) ] for p in ps: p.start() for p in ps: p.join() if options.local: local_launch(options) elif options.submit_server: roq = q.runOnQueueingSystem(options, sysArgs=sys.argv) for i in range(options.num_exec): roq.createAndSubmitExecutorJobFile(i, after=None, time=q.timestr_to_secs( options.time)) elif options.queue_type is not None: for i in range(options.num_exec): pe = pipelineExecutor(options=options, uri_file=options.urifile, pipeline_name="anon-executor") pe.submitToQueue( 1 ) # TODO is there a reason why we have logic for submitting `i` executors again here? else: local_launch(options)
if default_config_file is not None: files = [default_config_file] else: files = [] parser = ArgParser(default_config_files=files) rf.addGenRegArgumentGroup(parser) # just to get --pipeline-name addExecutorArgumentGroup(parser) # using parse_known_args instead of parse_args is a hack since we # currently send ALL arguments from the main program to the executor # on PBS queues (FIXME not yet true on SGE queues, but this is # not the best solution anyway). # Alternately, we could keep a copy of the executor parser around # when constructing the executor shell command options = parser.parse_known_args()[0] #Check to make sure some executors have been specified. noExecSpecified(options.num_exec) def local_launch(options): pe = pipelineExecutor(options) # executors don't use any shared-memory constructs, so OK to copy ps = [ Process(target=launchExecutor, args=(pe, )) for _ in range(options.num_exec) ] for p in ps: p.start() for p in ps: p.join()
auto_env_var_prefix='KLAUD_', default_config_files=['./settings.ini'], ) p.add('-c', '--config', is_config_file=True, help='config file path') p.add('-p', '--port', type=int, default=8000, help='port for serving') p.add('-H', '--host', type=str, default='0.0.0.0', help='host for serving') p.add('--hot-reload', action='store_true', help='enable hot reload') p.add('--db-host', type=str, default='localhost', help='mongo server host') p.add('--db-port', type=int, default=27017, help='mongo server port') p.add('--db-user', type=str, default='user', help='mongo server user') p.add('--db-password', type=str, default='hackme', help='mongo server password') p.add('--db-name', type=str, default='klaud', help='mongo database name') p.add('-S', '--secret', type=str, default=secrets.token_hex(64), help='secret token') p.add('--access-token-life', type=int, default=15, help='access token life duration (in minutes)') p.add('--master-name', type=str, default='master', help='master username') p.add('--master-password', type=str, default='master', help='master password') settings = p.parse_known_args()[0]
def get_fetcher_service_config(args) -> FetcherServiceConfig: parser = ArgParser(auto_env_var_prefix="", prog=SERVICE_NAME) parser.add_argument("--zookeeper-ensemble-hosts", env_var="ZOOKEEPER_ENSEMBLE_HOSTS", default="localhost:2181") parser.add_argument("--s3-download-bucket", env_var="S3_DOWNLOAD_BUCKET", required=True) parser.add_argument("--kubeconfig", env_var="KUBECONFIG") parser.add_argument("--fetcher-job-image", env_var="FETCHER_JOB_IMAGE", required=True) parser.add_argument("--fetcher-job-ttl", env_var="FETCHER_JOB_TTL", type=int, required=False) parser.add_argument("--fetcher-job-node-selector", env_var="FETCHER_JOB_NODE_SELECTOR", type=json.loads, default={}) parser.add_argument( "--fetcher-job-pull-policy", env_var="FETCHER_JOB_PULL_POLICY", required=False, # Default is complicated - Always if not tag, IfNotPresent - otherwise choices=["Always", "Never", "IfNotPresent"], ) parser.add_argument( "--fetcher-job-restart-policy", env_var="FETCHER_JOB_RESTART_POLICY", required=False, choices=["Never", "OnFailure"], default="OnFailure", ) parser.add_argument("--fetcher-job-namespace", env_var="FETCHER_JOB_NAMESPACE", required=False, default="default") parser.add_argument( "--fetcher-job-min-volume-size", env_var="FETCHER_JOB_MIN_VOLUME_SIZE", required=False, type=int, default=MIN_VOLUME_SIZE_MB, ) parser.add_argument("--fetcher-job-volume-storage-class", env_var="FETCHER_JOB_VOLUME_STORAGE_CLASS", required=False) parsed_args, _ = parser.parse_known_args(args, env_vars=os.environ) return FetcherServiceConfig( zookeeper_ensemble_hosts=parsed_args.zookeeper_ensemble_hosts, s3_download_bucket=parsed_args.s3_download_bucket, kubeconfig=parsed_args.kubeconfig, fetcher_job=FetcherJobConfig( namespace=parsed_args.fetcher_job_namespace, image=parsed_args.fetcher_job_image, node_selector=parsed_args.fetcher_job_node_selector, pull_policy=parsed_args.fetcher_job_pull_policy, ttl=parsed_args.fetcher_job_ttl, restart_policy=parsed_args.fetcher_job_restart_policy, volume=FetcherVolumeConfig( storage_class=parsed_args.fetcher_job_volume_storage_class, min_size=parsed_args.fetcher_job_min_volume_size, ), ), )
def go_2(p, current_prefix, current_ns): if isinstance(p, BaseParser): new_p = ArgParser(default_config_files=config_files) for a in p.argparser._actions: new_a = copy.copy(a) ss = copy.deepcopy(new_a.option_strings) for ix, s in enumerate(new_a.option_strings): if s.startswith("--"): ss[ix] = "-" + current_prefix + "-" + s[2:] else: raise NotImplementedError new_a.option_strings = ss new_p._add_action(new_a) _used_args, _rest = new_p.parse_known_args(args, namespace=current_ns) # add a "_flags" field to each object so we know what flags caused a certain option to be set: # (however, note that post-parsing we may munge around ...) flags_dict = defaultdict(set) for action in new_p._actions: for opt in action.option_strings: flags_dict[action.dest].add(opt) current_ns.flags_ = Namespace(**flags_dict) # TODO: could continue parsing from `_rest` instead of original `args` elif isinstance(p, CompoundParser): current_ns.flags_ = set( ) # could also check for the CompoundParser case and not set flags there, # since there will never be any for q in p.parsers: ns = Namespace() if q.namespace in current_ns.__dict__: raise ValueError("Namespace field '%s' already in use" % q.namespace) # TODO could also allow, say, a None else: # gross but how to write n-ary identity fn that behaves sensibly on single arg?? current_ns.__dict__[q.namespace] = ns # FIXME this casting doesn't work for configurations with positional arguments, # which aren't unpacked correctly -- better to use a namedtuple # (making all arguments keyword-only also works, but then you have to supply # often meaningless defaults in the __init__) go_2(q.parser, current_prefix=current_prefix + (('-' + q.prefix) if q.prefix is not None else ''), current_ns=ns) # If a cast function is provided, apply it to the namespace, possibly doing dynamic type checking # and also allowing the checker to provide hinting for the types of the fields flags = ns.flags_ del ns.flags_ fixed = ( q.cast( current_ns.__dict__[q.namespace] ) #(q.cast(**vars(current_ns.__dict__[q.namespace])) if q.cast else current_ns.__dict__[q.namespace]) if isinstance(fixed, tuple): fixed = fixed.replace(flags_=flags) elif isinstance(fixed, Namespace): setattr(fixed, "flags_", flags) else: raise ValueError( "currently only Namespace and NamedTuple objects are supported return types from " "parsing; got %s (a %s)" % (fixed, type(fixed))) current_ns.__dict__[q.namespace] = fixed # TODO current_ns or current_namespace or ns or namespace? else: raise TypeError( "parser %s wasn't a %s (%s or %s) but a %s" % (p, Parser, BaseParser, CompoundParser, p.__class__))
if default_config_file is not None: files = [default_config_file] else: files = [] parser = ArgParser(default_config_files=files) rf.addGenRegArgumentGroup(parser) # just to get --pipeline-name addExecutorArgumentGroup(parser) # using parse_known_args instead of parse_args is a hack since we # currently send ALL arguments from the main program to the executor # on PBS queues (FIXME not yet true on SGE queues, but this is # not the best solution anyway). # Alternately, we could keep a copy of the executor parser around # when constructing the executor shell command options = parser.parse_known_args()[0] #Check to make sure some executors have been specified. noExecSpecified(options.num_exec) def local_launch(options): pe = pipelineExecutor(options) # executors don't use any shared-memory constructs, so OK to copy ps = [Process(target=launchExecutor, args=(pe,)) for _ in range(options.num_exec)] for p in ps: p.start() for p in ps: p.join() if options.local:
def ParseArgs(): parser = ArgParser(default_config_files=[ os.getcwd() + '/src/initial_configurations/default']) # Core settings core_parse = parser.add_argument_group('Core setting') core_parse.add_argument('-s', '--start_date', dest='start_date', default=START_DATE, type=str, help='Training start date') core_parse.add_argument('-p','--train_period', dest='train_period', default=TRAIN_PERIOD, type=int, help='Time period of training file is used') core_parse.add_argument('-n','--new_run', dest='new_run', default=1, type=int, help='If the model checkpoint is erased to run new model') core_parse.add_argument('-l','--local_run', dest='local_run', default=1, type=int, help='If the parameter JSON file is kept locally insteat to redis') core_parse.add_argument('-nni','--tuning', dest='tuning', default=0, type=int, help='Whether or not to peform hyper parameter tuning') # Data data_parse = parser.add_argument_group('Data setting') data_parse.add_argument('--train_data_path', dest='train_data_path', default=DATA_PATH, type=str, help='Directory where the training files are located') data_parse.add_argument('--random_seed', dest='random_seed', default=8888, type=int, help='Random seed used for shuffling the list of training files') data_parse.add_argument('--num_cores', dest='num_cores', default=24, type=int, help='Number of CPU cores') data_parse.add_argument('--train_ratio', dest='train_ratio', default=0.7, type=float, help='Fraction of data to be used for training') data_parse.add_argument('--valid_ratio', dest='valid_ratio', default=0.15, type=float, help='Fraction of data to be used for validation (only matters when there is a third dataset to be created for testing)') data_parse.add_argument('--batch_size', dest='batch_size', default=32, type=int, help='Number of examples per batch') data_parse.add_argument('--prefetch_size', dest='prefetch_size', default=1, type=int, help='Number of batches to be prepared in queue') # Model model_parse = parser.add_argument_group('Model setting') model_parse.add_argument('--model', dest='model', default='DNN', type=str, help='Select the model to train e.g. DNN; note that this version only has DNN') model_parse.add_argument('--loss', dest='loss', default=40, type=int, help="Setting of loss function '10','11','12','20','21','22','30','31','32','40'") model_parse.add_argument('--hidden_units', dest='hidden_units', default=[ 128, 64], type=int, nargs='+', help='List containing the number of hidden units to use for each hidden layer') model_parse.add_argument('--learning_rate', dest='learning_rate', default=0.001, type=float, help='Learning rate of updating gradient') model_parse.add_argument('--decay_step', dest='decay_step', default=100, type=int, help='Decay step') model_parse.add_argument('--decay_rate', dest='decay_rate', default=0.98, type=float, help='Decay rate for exponential decay of learning rate') model_parse.add_argument('--Lambda', dest='Lambda', default=0.25, type=float, help='Lambda for L2,L1 regularization; alpha for focal loss') model_parse.add_argument('--gamma', dest='gamma', default=2., type=float, help='parameter for focal loss') model_parse.add_argument('--beta', dest='beta', default=1., type=float, help='Regularization parameter') model_parse.add_argument('--drop_rate', dest='drop_rate', default=0.5, type=float, help='dropout rate') model_parse.add_argument('--embedding_units', dest='embedding_units', default=[1, 35, 359, 3, 2], type=int, nargs='+', help='List containing the number of embedding units to use for features (in order): [weekday, region, city, adexchange, slotformat]; this replaces the one hot encoding') model_parse.add_argument('--embedding_units_ohe', dest='embedding_units_ohe', default=[ 45], type=int, nargs='+', help='List containing the number of embedding units to use for OHE features (in order): usertag') # Training train_parse = parser.add_argument_group('Training hyperparameters') train_parse.add_argument('--has_gpu', dest='has_gpu', default=0, type=int, help='1 if GPU is present, else 0') train_parse.add_argument('--is_test', dest='is_test', default=0, type=int, help='1 if the trained model will be evaluated, else 0') train_parse.add_argument('--num_epochs_min', dest='num_epochs_min', default=100, type=float, help='Minimum number of training epochs') train_parse.add_argument('--num_epochs', dest='num_epochs', default=101, type=float, help='Number of total training epochs') train_parse.add_argument('--validation_length', dest='validation_length', default=100, type=int, help='In one validation, how many number of batches to use') train_parse.add_argument('--test_length', dest='test_length', default=100, type=int, help='In one test, how many number of batches to use') train_parse.add_argument('--earlystop_check_frequency', dest='earlystop_check_frequency', default=10, type=int, help='earlystop_check_frequency') train_parse.add_argument('--earlystop_duration', dest='earlystop_duration', default=10, type=int, help='earlystop_duration') train_parse.add_argument('--valid_loss_delta', dest='valid_loss_delta', default=0.0001, type=float, help='valid_loss_delta') train_parse.add_argument('--num_threshold_buffer', dest='num_threshold_buffer', default=3, type=int, help='num_threshold_buffer') train_parse.add_argument('--percentile_threshold', dest='percentile_threshold', default=8, type=int, help='percentile_threshold') # Directory paths dir_parse = parser.add_argument_group('Directory paths') dir_parse.add_argument('--save_dir', dest='save_dir', default='./Outputs/', type=str, help='Directory to save model directories') dir_parse.add_argument('--load_dir', dest='load_dir', default='latest', type=str, help='Directory to load old model,default "new" as the latest model') dir_parse.add_argument('--store_dir', dest='store_dir', default='latest', type=str, help='Directory to store current model, default "latest" to save in timestamp') dir_parse.add_argument('--result_dir', dest='result_dir', default='result.csv', type=str, help='Directory to store (history) performance result') dir_parse.add_argument('--builder_save_dir', dest='builder_save_dir', default='builder_save', type=str, help='Directory to store current model for tfjs predictor') _args, _ = parser.parse_known_args() return vars(_args)