예제 #1
0
def get_job_status_trigger_config(args) -> JobStatusTriggerConfig:
    parser = ArgParser(auto_env_var_prefix="", prog=APP_NAME)

    def benchmark_status_from_input(input: str) -> BenchmarkJobStatus:
        return BenchmarkJobStatus(input.strip(" \t,"))

    # required
    parser.add_argument("--job-name", type=str, env_var="JOB_NAME", required=True)
    parser.add_argument(
        "--trigger-statuses", type=benchmark_status_from_input, nargs="+", env_var="TRIGGER_STATUSES", required=True
    )
    parser.add_argument("--command", type=str, env_var="COMMAND", required=True)

    # optional
    parser.add_argument("--job-namespace", type=str, default="default", env_var="JOB_NAMESPACE", required=False)
    parser.add_argument(
        "--job-not-found-grace-period-seconds",
        type=int,
        default=30,
        env_var="JOB_NOT_FOUND_GRACE_PERIOD_SECONDS",
        required=False,
    )

    parsed_args, _ = parser.parse_known_args(args)

    return JobStatusTriggerConfig(
        job_namespace=parsed_args.job_namespace,
        job_name=parsed_args.job_name,
        trigger_statuses=parsed_args.trigger_statuses,
        job_not_found_grace_period_seconds=parsed_args.job_not_found_grace_period_seconds,
        command=parsed_args.command,
    )
예제 #2
0
 def go_2(p, current_prefix, current_ns):
     if isinstance(p, BaseParser):
         new_p = ArgParser(default_config_files=config_files)
         for a in p.argparser._actions:
             new_a = copy.copy(a)
             ss = copy.deepcopy(new_a.option_strings)
             for ix, s in enumerate(new_a.option_strings):
                 if s.startswith("--"):
                     ss[ix] = "-" + current_prefix + "-" + s[2:]
                 else:
                     raise NotImplementedError
                 new_a.option_strings = ss
             new_p._add_action(new_a)
         _used_args, _rest = new_p.parse_known_args(args, namespace=current_ns)
         # add a "_flags" field to each object so we know what flags caused a certain option to be set:
         # (however, note that post-parsing we may munge around ...)
         flags_dict = defaultdict(set)
         for action in new_p._actions:
             for opt in action.option_strings:
                 flags_dict[action.dest].add(opt)
         current_ns.flags_ = Namespace(**flags_dict)
         # TODO: could continue parsing from `_rest` instead of original `args`
     elif isinstance(p, CompoundParser):
         current_ns.flags_ = set()  # could also check for the CompoundParser case and not set flags there,
                                    # since there will never be any
         for q in p.parsers:
             ns = Namespace()
             if q.namespace in current_ns.__dict__:
                 raise ValueError("Namespace field '%s' already in use" % q.namespace)
                 # TODO could also allow, say, a None
             else:
                 # gross but how to write n-ary identity fn that behaves sensibly on single arg??
                 current_ns.__dict__[q.namespace] = ns
                 # FIXME this casting doesn't work for configurations with positional arguments,
                 # which aren't unpacked correctly -- better to use a namedtuple
                 # (making all arguments keyword-only also works, but then you have to supply
                 # often meaningless defaults in the __init__)
             go_2(q.parser, current_prefix=current_prefix + (('-' + q.prefix) if q.prefix is not None else ''),
                  current_ns=ns)
             # If a cast function is provided, apply it to the namespace, possibly doing dynamic type checking
             # and also allowing the checker to provide hinting for the types of the fields
             flags = ns.flags_
             del ns.flags_
             fixed = (q.cast(current_ns.__dict__[q.namespace]) #(q.cast(**vars(current_ns.__dict__[q.namespace]))
                                                 if q.cast else current_ns.__dict__[q.namespace])
             if isinstance(fixed, tuple):
                 fixed = fixed.replace(flags_=flags)
             elif isinstance(fixed, Namespace):
                 setattr(fixed, "flags_", flags)
             else:
                 raise ValueError("currently only Namespace and NamedTuple objects are supported return types from "
                                  "parsing; got %s (a %s)" % (fixed, type(fixed)))
             current_ns.__dict__[q.namespace] = fixed
             # TODO current_ns or current_namespace or ns or namespace?
     else:
         raise TypeError("parser %s wasn't a %s (%s or %s) but a %s" %
                         (p, Parser, BaseParser, CompoundParser, p.__class__))
def main():
    # command line option handling
    # use an environment variable to look for a default config file
    # Alternately, we could use a default location for the file
    # (say `files = ['/etc/pydpiper.cfg', '~/pydpiper.cfg', './pydpiper.cfg']`)
    # TODO this logic is duplicated in application.py
    #if "PYDPIPER_CONFIG_FILE" in os.environ:
    default_config_file = os.getenv("PYDPIPER_CONFIG_FILE")
    if default_config_file is not None:
        try:
            with open(PYDPIPER_CONFIG_FILE):
                pass
        except:
            warnings.warn(f"PYDPIPER_CONFIG_FILE is set to '{default_config_file}', which can't be opened.")
    if default_config_file is not None:
        files = [default_config_file]
    else:
        files = []

    from pydpiper.core.arguments import _mk_execution_parser
    parser = ArgParser(default_config_files=files)
    _mk_execution_parser(parser)

    # using parse_known_args instead of parse_args is a hack since we
    # currently send ALL arguments from the main program to the executor.
    # Alternately, we could keep a copy of the executor parser around
    # when constructing the executor shell command
    options, _ = parser.parse_known_args()

    ensure_exec_specified(options.num_exec)

    def local_launch(options):
        pe = pipelineExecutor(options=options, uri_file=options.urifile, pipeline_name="anon-executor")  # didn't parse application options so don't have a --pipeline-name
        # FIXME - I doubt missing the other options even works, otherwise we could change the executor interface!!
        # executors don't use any shared-memory constructs, so OK to copy
        ps = [Process(target=launchExecutor, args=(pe,))
              for _ in range(options.num_exec)]
        for p in ps:
            p.start()
        for p in ps:
            p.join()

    if options.local:
        local_launch(options)
    elif options.submit_server:
        roq = q.runOnQueueingSystem(options, sysArgs=sys.argv)
        for i in range(options.num_exec):
            roq.createAndSubmitExecutorJobFile(i, after=None,
                                               time=q.timestr_to_secs(options.time))
    elif options.queue_type is not None:
        for i in range(options.num_exec):
            pe = pipelineExecutor(options=options, uri_file=options.urifile, pipeline_name="anon-executor")
            pe.submitToQueue(1)  # TODO is there a reason why we have logic for submitting `i` executors again here?
    else:
        local_launch(options)
예제 #4
0
def get_watcher_service_config(args) -> WatcherServiceConfig:
    parser = ArgParser(auto_env_var_prefix="", prog=SERVICE_NAME)

    parser.add_argument(
        "--kubernetes-namespace-of-running-jobs", default="default", env_var="KUBERNETES_NAMESPACE_OF_RUNNING_JOBS"
    )
    parser.add_argument("--kubeconfig", env_var="KUBECONFIG")
    parser.add_argument("--service-logging-level", env_var="SERVICE_LOGGING_LEVEL", default="INFO")
    parser.add_argument("--grafana-endpoint", env_var="GRAFANA_ENDPOINT")
    parser.add_argument("--grafana-results-url", env_var="GRAFANA_RESULTS_URL")
    parser.add_argument("--grafana-op-metrics-dashboard-uid", env_var="GRAFANA_OP_METRICS_DASHBOARD_UID")

    parsed_args, _ = parser.parse_known_args(args)
    return WatcherServiceConfig(
        kubernetes_namespace_of_running_jobs=parsed_args.kubernetes_namespace_of_running_jobs,
        kubeconfig=parsed_args.kubeconfig,
        logging_level=parsed_args.service_logging_level,
        grafana_endpoint=parsed_args.grafana_endpoint,
        grafana_results_url=parsed_args.grafana_results_url,
        grafana_op_metrics_dashboard_uid=parsed_args.grafana_op_metrics_dashboard_uid,
    )
예제 #5
0
 def go_2(p, current_prefix, current_ns):
     if isinstance(p, BaseParser):
         new_p = ArgParser(default_config_files=config_files)
         for a in p.argparser._actions:
             new_a = copy.copy(a)
             ss = copy.deepcopy(new_a.option_strings)
             for ix, s in enumerate(new_a.option_strings):
                 if s.startswith("--"):
                     ss[ix] = "-" + current_prefix + "-" + s[2:]
                 else:
                     raise NotImplementedError
                 new_a.option_strings = ss
             new_p._add_action(new_a)
         _used_args, _rest = new_p.parse_known_args(args, namespace=current_ns)
         # TODO: could continue parsing from `_rest` instead of original `args`
     elif isinstance(p, CompoundParser):
         for q in p.parsers:
             ns = Namespace()
             if q.namespace in current_ns.__dict__:
                 raise ValueError("Namespace field '%s' already in use" % q.namespace)
                 # TODO could also allow, say, a None
             else:
                 # gross but how to write n-ary identity fn that behaves sensibly on single arg??
                 current_ns.__dict__[q.namespace] = ns
                 # FIXME this casting doesn't work for configurations with positional arguments,
                 # which aren't unpacked correctly -- better to use a namedtuple
                 # (making all arguments keyword-only also works, but then you have to supply
                 # often meaningless defaults in the __init__)
             go_2(q.parser, current_prefix=current_prefix + (('-' + q.prefix) if q.prefix is not None else ''),
                  current_ns=ns)
             # If a cast function is provided, apply it to the namespace, possibly doing dynamic type checking
             # and also allowing the checker to provide hinting for the types of the fields
             current_ns.__dict__[q.namespace] = (q.cast(current_ns.__dict__[q.namespace]) #(q.cast(**vars(current_ns.__dict__[q.namespace]))
                                                 if q.cast else current_ns.__dict__[q.namespace])
             # TODO current_ns or current_namespace or ns or namespace?
     else:
         raise TypeError("parser %s wasn't a %s (%s or %s) but a %s" %
                         (p, Parser, BaseParser, CompoundParser, p.__class__))
예제 #6
0
def parse_args():
    parser = ArgParser(default_config_files=[os.getcwd() + '/src/initial_configurations/default'])
    # Core setting
    core_parse = parser.add_argument_group('Core setting')
    core_parse.add_argument('-s',   '--start_date',   dest='start_date',   default='now', type=str, help='Training start date')
    core_parse.add_argument('-p',   '--train_period', dest='train_period', default=-1,    type=int, help='Time period of training file is used')
    core_parse.add_argument('-n',   '--new_run',      dest='new_run',      default=0,     type=int, help='If the model checkpoint is erased to run new model')
    core_parse.add_argument('-l',   '--local_run',    dest='local_run',    default=0,     type=int, help='If the parameter JSON file is kept locally insteat to  redis')
    core_parse.add_argument('-nni', '--tuning',       dest='tuning',       default=0,     type=int, help='Whether or not to peform NNI hyper parameter tuning')


    # Model
    model_parse = parser.add_argument_group('Model')
    model_parse.add_argument('-m',   '--model',   dest='model',           default='DNN',               type=str,   help='Select the model to train e.g. DNN')
    model_parse.add_argument('--loss',            dest='loss',            default=30,                  type=int,   help="Setting of loss function '10','11','12','20','21','22','30','31','32'" )
    model_parse.add_argument('--hidden_units',    dest='hidden_units',    default=[128, 64],           type=int,   nargs='+', help='List containing the number of hidden units to use for each hidden layer')
    model_parse.add_argument('--dropout_rate',    dest='dropout_rate',    default=0.5,                 type=float, help='List containing the number of dropout rate to use for each hidden layer')
    model_parse.add_argument('--one_hot_units',   dest='one_hot_units',   default=[2, 35, 359, 3, 2], type=int,   nargs='+', help='List containing the number of embedding units to use for features (in order): [weekday, region, city, adexchange, slotformat]; this replaces the one hot encoding')
    model_parse.add_argument('--multi_hot_units', dest='multi_hot_units', default=[45],             type=int,   nargs='+', help='List containing the number of embedding units to use for features: [usertag]')
    model_parse.add_argument('--learning_rate',   dest='learning_rate',   default=0.002,            type=float, help='Learning rate of updating gradient')
    model_parse.add_argument('--decay_step',      dest='decay_step',      default=100,              type=int,   help='Decay step')
    model_parse.add_argument('--decay_rate',      dest='decay_rate',      default=0.98,             type=float, help='Decay rate for exponential decay of learning rate')
    model_parse.add_argument('--class_ratio',     dest='class_ratio',     default=0.5,              type=float, help='Ratio of 2 classes for imbalanced data')
    model_parse.add_argument('--alpha',           dest='alpha',           default=1.,               type=float, help='Alpha for Focal loss regularization in DNN')
    model_parse.add_argument('--beta',            dest='beta',            default=1.,               type=float, help='Beta for regularization')
    model_parse.add_argument('--gamma',           dest='gamma',           default=1.,               type=float, help='Gamma for Focal loss regularization in DNN')

    # Training
    train_parse = parser.add_argument_group('Training hyperparameters')
    train_parse.add_argument('--save_summary_steps',   dest='save_summary_steps',    default=100,   type=int, help='save summary steps')
    train_parse.add_argument('--log_step_count_steps', dest='log_step_count_steps',  default=100,   type=int, help='logging step count steps')
    train_parse.add_argument('--checkpoints_steps',    dest='save_checkpoints_steps',default=500,   type=int, help='checkpoints steps')
    train_parse.add_argument('--has_gpu',              dest='has_gpu',               default=0,     type=int, help='1 if GPU is present, else 0')
    train_parse.add_argument('--oversample',           dest='oversample',            default=0,     type=int, help='1 if will oversample training dataset, else 0')
    train_parse.add_argument('--is_test',              dest='is_test',               default=0,     type=int, help='1 if the trained model will be evaluated, else 0')
    train_parse.add_argument('--num_epochs',           dest='num_epochs',            default=1.0,   type=float, help='Number of total epochs')
    train_parse.add_argument('--start_delay_secs',     dest='start_delay_secs',      default=10,    type=int, help='Start evaluating after 10 secs')
    train_parse.add_argument('--throttle_secs',        dest='throttle_secs',         default=10,    type=int, help='Evaluate only every 30 secs')
    train_parse.add_argument('--batch_size',           dest='batch_size',            default=128,      type=int, help='Number of examples per batch')
    
    # Directory paths
    dir_parse = parser.add_argument_group('Directory paths')
    dir_parse.add_argument('--train_data_path',  dest='train_data_path',  default='./data/',        type=str, help='Directory where the training files are located')
    dir_parse.add_argument('--save_dir',         dest='save_dir',         default='./Outputs/',    type=str, help='Directory to save model directories')
    dir_parse.add_argument('--load_dir',         dest='load_dir',         default='latest',        type=str, help='Directory to load old model,default "new" as the latest model')
    dir_parse.add_argument('--store_dir',        dest='store_dir',        default='latest',        type=str, help='Directory to store current model, default "latest" to save in timestamp')
    dir_parse.add_argument('--builder_save_dir', dest='builder_save_dir', default='builder_save',  type=str, help='Directory to store current model for tfjs predictor')

    _args, _ = parser.parse_known_args()
    _params = vars(_args)
    _params['train_data_path'] = os.getcwd() + _params['train_data_path']

    # Identify whether it's using NNI tuning mode
    if _params['tuning'] == 1:
        import nni
        tuner_params = nni.get_next_parameter()
        try:
            _params.update(tuner_params)
        except Exception as err:
            tf.logging.error('Error args updated: %s', err)
            tf.logging.error('Failed with params: %s', str(_params))
            
    _params['num_features'] = len(INT_NUM_FEAT) + sum(_params['one_hot_units']) + sum(_params['multi_hot_units'])
    _params['model_name'] = _params['model']

    # Adjust filename to restore/save by config settings
    if _params['store_dir'] == 'latest':
        _params['store_dir'] = _params['model_name'] + '_' + parse_date('now').strftime(FILE_DATE_FORMAT)
    if _params['load_dir'] == 'latest':
        _params['load_dir'] = find_latest_model_dir(_params['save_dir'], _params['store_dir'], _params['model_name'])
    if _params['new_run'] == 1:
        _params['load_dir'] = _params['store_dir']
    return _params
예제 #7
0
def main():
    # command line option handling
    # use an environment variable to look for a default config file
    # Alternately, we could use a default location for the file
    # (say `files = ['/etc/pydpiper.cfg', '~/pydpiper.cfg', './pydpiper.cfg']`)
    # TODO this logic is duplicated in application.py
    #if "PYDPIPER_CONFIG_FILE" in os.environ:
    default_config_file = os.getenv("PYDPIPER_CONFIG_FILE")
    if default_config_file is not None:
        try:
            with open(PYDPIPER_CONFIG_FILE):
                pass
        except:
            warnings.warn(
                f"PYDPIPER_CONFIG_FILE is set to '{default_config_file}', which can't be opened."
            )
    if default_config_file is not None:
        files = [default_config_file]
    else:
        files = []

    from pydpiper.core.arguments import _mk_execution_parser
    parser = ArgParser(default_config_files=files)
    _mk_execution_parser(parser)

    # using parse_known_args instead of parse_args is a hack since we
    # currently send ALL arguments from the main program to the executor.
    # Alternately, we could keep a copy of the executor parser around
    # when constructing the executor shell command
    options, _ = parser.parse_known_args()

    ensure_exec_specified(options.num_exec)

    def local_launch(options):
        pe = pipelineExecutor(
            options=options,
            uri_file=options.urifile,
            pipeline_name="anon-executor"
        )  # didn't parse application options so don't have a --pipeline-name
        # FIXME - I doubt missing the other options even works, otherwise we could change the executor interface!!
        # executors don't use any shared-memory constructs, so OK to copy
        ps = [
            Process(target=launchExecutor, args=(pe, ))
            for _ in range(options.num_exec)
        ]
        for p in ps:
            p.start()
        for p in ps:
            p.join()

    if options.local:
        local_launch(options)
    elif options.submit_server:
        roq = q.runOnQueueingSystem(options, sysArgs=sys.argv)
        for i in range(options.num_exec):
            roq.createAndSubmitExecutorJobFile(i,
                                               after=None,
                                               time=q.timestr_to_secs(
                                                   options.time))
    elif options.queue_type is not None:
        for i in range(options.num_exec):
            pe = pipelineExecutor(options=options,
                                  uri_file=options.urifile,
                                  pipeline_name="anon-executor")
            pe.submitToQueue(
                1
            )  # TODO is there a reason why we have logic for submitting `i` executors again here?
    else:
        local_launch(options)
예제 #8
0
    if default_config_file is not None:
        files = [default_config_file]
    else:
        files = []
    parser = ArgParser(default_config_files=files)

    rf.addGenRegArgumentGroup(parser)  # just to get --pipeline-name
    addExecutorArgumentGroup(parser)

    # using parse_known_args instead of parse_args is a hack since we
    # currently send ALL arguments from the main program to the executor
    # on PBS queues (FIXME not yet true on SGE queues, but this is
    # not the best solution anyway).
    # Alternately, we could keep a copy of the executor parser around
    # when constructing the executor shell command
    options = parser.parse_known_args()[0]

    #Check to make sure some executors have been specified.
    noExecSpecified(options.num_exec)

    def local_launch(options):
        pe = pipelineExecutor(options)
        # executors don't use any shared-memory constructs, so OK to copy
        ps = [
            Process(target=launchExecutor, args=(pe, ))
            for _ in range(options.num_exec)
        ]
        for p in ps:
            p.start()
        for p in ps:
            p.join()
예제 #9
0
파일: settings.py 프로젝트: GUDN/klaud
    auto_env_var_prefix='KLAUD_',
    default_config_files=['./settings.ini'],
)
p.add('-c', '--config', is_config_file=True, help='config file path')
p.add('-p', '--port', type=int, default=8000, help='port for serving')
p.add('-H', '--host', type=str, default='0.0.0.0', help='host for serving')
p.add('--hot-reload', action='store_true', help='enable hot reload')

p.add('--db-host', type=str, default='localhost', help='mongo server host')
p.add('--db-port', type=int, default=27017, help='mongo server port')
p.add('--db-user', type=str, default='user', help='mongo server user')
p.add('--db-password',
      type=str,
      default='hackme',
      help='mongo server password')
p.add('--db-name', type=str, default='klaud', help='mongo database name')

p.add('-S',
      '--secret',
      type=str,
      default=secrets.token_hex(64),
      help='secret token')
p.add('--access-token-life',
      type=int,
      default=15,
      help='access token life duration (in minutes)')
p.add('--master-name', type=str, default='master', help='master username')
p.add('--master-password', type=str, default='master', help='master password')

settings = p.parse_known_args()[0]
예제 #10
0
def get_fetcher_service_config(args) -> FetcherServiceConfig:
    parser = ArgParser(auto_env_var_prefix="", prog=SERVICE_NAME)

    parser.add_argument("--zookeeper-ensemble-hosts",
                        env_var="ZOOKEEPER_ENSEMBLE_HOSTS",
                        default="localhost:2181")

    parser.add_argument("--s3-download-bucket",
                        env_var="S3_DOWNLOAD_BUCKET",
                        required=True)

    parser.add_argument("--kubeconfig", env_var="KUBECONFIG")

    parser.add_argument("--fetcher-job-image",
                        env_var="FETCHER_JOB_IMAGE",
                        required=True)

    parser.add_argument("--fetcher-job-ttl",
                        env_var="FETCHER_JOB_TTL",
                        type=int,
                        required=False)

    parser.add_argument("--fetcher-job-node-selector",
                        env_var="FETCHER_JOB_NODE_SELECTOR",
                        type=json.loads,
                        default={})

    parser.add_argument(
        "--fetcher-job-pull-policy",
        env_var="FETCHER_JOB_PULL_POLICY",
        required=False,
        # Default is complicated - Always if not tag, IfNotPresent - otherwise
        choices=["Always", "Never", "IfNotPresent"],
    )

    parser.add_argument(
        "--fetcher-job-restart-policy",
        env_var="FETCHER_JOB_RESTART_POLICY",
        required=False,
        choices=["Never", "OnFailure"],
        default="OnFailure",
    )

    parser.add_argument("--fetcher-job-namespace",
                        env_var="FETCHER_JOB_NAMESPACE",
                        required=False,
                        default="default")
    parser.add_argument(
        "--fetcher-job-min-volume-size",
        env_var="FETCHER_JOB_MIN_VOLUME_SIZE",
        required=False,
        type=int,
        default=MIN_VOLUME_SIZE_MB,
    )
    parser.add_argument("--fetcher-job-volume-storage-class",
                        env_var="FETCHER_JOB_VOLUME_STORAGE_CLASS",
                        required=False)

    parsed_args, _ = parser.parse_known_args(args, env_vars=os.environ)
    return FetcherServiceConfig(
        zookeeper_ensemble_hosts=parsed_args.zookeeper_ensemble_hosts,
        s3_download_bucket=parsed_args.s3_download_bucket,
        kubeconfig=parsed_args.kubeconfig,
        fetcher_job=FetcherJobConfig(
            namespace=parsed_args.fetcher_job_namespace,
            image=parsed_args.fetcher_job_image,
            node_selector=parsed_args.fetcher_job_node_selector,
            pull_policy=parsed_args.fetcher_job_pull_policy,
            ttl=parsed_args.fetcher_job_ttl,
            restart_policy=parsed_args.fetcher_job_restart_policy,
            volume=FetcherVolumeConfig(
                storage_class=parsed_args.fetcher_job_volume_storage_class,
                min_size=parsed_args.fetcher_job_min_volume_size,
            ),
        ),
    )
예제 #11
0
 def go_2(p, current_prefix, current_ns):
     if isinstance(p, BaseParser):
         new_p = ArgParser(default_config_files=config_files)
         for a in p.argparser._actions:
             new_a = copy.copy(a)
             ss = copy.deepcopy(new_a.option_strings)
             for ix, s in enumerate(new_a.option_strings):
                 if s.startswith("--"):
                     ss[ix] = "-" + current_prefix + "-" + s[2:]
                 else:
                     raise NotImplementedError
                 new_a.option_strings = ss
             new_p._add_action(new_a)
         _used_args, _rest = new_p.parse_known_args(args,
                                                    namespace=current_ns)
         # add a "_flags" field to each object so we know what flags caused a certain option to be set:
         # (however, note that post-parsing we may munge around ...)
         flags_dict = defaultdict(set)
         for action in new_p._actions:
             for opt in action.option_strings:
                 flags_dict[action.dest].add(opt)
         current_ns.flags_ = Namespace(**flags_dict)
         # TODO: could continue parsing from `_rest` instead of original `args`
     elif isinstance(p, CompoundParser):
         current_ns.flags_ = set(
         )  # could also check for the CompoundParser case and not set flags there,
         # since there will never be any
         for q in p.parsers:
             ns = Namespace()
             if q.namespace in current_ns.__dict__:
                 raise ValueError("Namespace field '%s' already in use" %
                                  q.namespace)
                 # TODO could also allow, say, a None
             else:
                 # gross but how to write n-ary identity fn that behaves sensibly on single arg??
                 current_ns.__dict__[q.namespace] = ns
                 # FIXME this casting doesn't work for configurations with positional arguments,
                 # which aren't unpacked correctly -- better to use a namedtuple
                 # (making all arguments keyword-only also works, but then you have to supply
                 # often meaningless defaults in the __init__)
             go_2(q.parser,
                  current_prefix=current_prefix +
                  (('-' + q.prefix) if q.prefix is not None else ''),
                  current_ns=ns)
             # If a cast function is provided, apply it to the namespace, possibly doing dynamic type checking
             # and also allowing the checker to provide hinting for the types of the fields
             flags = ns.flags_
             del ns.flags_
             fixed = (
                 q.cast(
                     current_ns.__dict__[q.namespace]
                 )  #(q.cast(**vars(current_ns.__dict__[q.namespace]))
                 if q.cast else current_ns.__dict__[q.namespace])
             if isinstance(fixed, tuple):
                 fixed = fixed.replace(flags_=flags)
             elif isinstance(fixed, Namespace):
                 setattr(fixed, "flags_", flags)
             else:
                 raise ValueError(
                     "currently only Namespace and NamedTuple objects are supported return types from "
                     "parsing; got %s (a %s)" % (fixed, type(fixed)))
             current_ns.__dict__[q.namespace] = fixed
             # TODO current_ns or current_namespace or ns or namespace?
     else:
         raise TypeError(
             "parser %s wasn't a %s (%s or %s) but a %s" %
             (p, Parser, BaseParser, CompoundParser, p.__class__))
예제 #12
0
    if default_config_file is not None:
        files = [default_config_file]
    else:
        files = []
    parser = ArgParser(default_config_files=files)    

    rf.addGenRegArgumentGroup(parser) # just to get --pipeline-name
    addExecutorArgumentGroup(parser)

    # using parse_known_args instead of parse_args is a hack since we
    # currently send ALL arguments from the main program to the executor
    # on PBS queues (FIXME not yet true on SGE queues, but this is
    # not the best solution anyway).
    # Alternately, we could keep a copy of the executor parser around
    # when constructing the executor shell command
    options = parser.parse_known_args()[0]

    #Check to make sure some executors have been specified. 
    noExecSpecified(options.num_exec)

    def local_launch(options):
        pe = pipelineExecutor(options)
        # executors don't use any shared-memory constructs, so OK to copy
        ps = [Process(target=launchExecutor, args=(pe,))
              for _ in range(options.num_exec)]
        for p in ps:
            p.start()
        for p in ps:
            p.join()

    if options.local:
예제 #13
0
def ParseArgs():
    parser = ArgParser(default_config_files=[
                       os.getcwd() + '/src/initial_configurations/default'])

    # Core settings
    core_parse = parser.add_argument_group('Core setting')
    core_parse.add_argument('-s', '--start_date',         dest='start_date',
                            default=START_DATE,   type=str, help='Training start date')
    core_parse.add_argument('-p','--train_period',       dest='train_period',
                            default=TRAIN_PERIOD, type=int, help='Time period of training file is used')
    core_parse.add_argument('-n','--new_run',              dest='new_run',         default=1,
                             type=int, help='If the model checkpoint is erased to run new model')
    core_parse.add_argument('-l','--local_run',            dest='local_run',       default=1,
                             type=int, help='If the parameter JSON file is kept locally insteat to  redis')
    core_parse.add_argument('-nni','--tuning',               dest='tuning',          default=0,
                             type=int, help='Whether or not to peform hyper parameter tuning')

    # Data
    data_parse = parser.add_argument_group('Data setting')
    data_parse.add_argument('--train_data_path',    dest='train_data_path',
                            default=DATA_PATH,  type=str, help='Directory where the training files are located')
    
    data_parse.add_argument('--random_seed',        dest='random_seed',        default=8888,
                            type=int, help='Random seed used for shuffling the list of training files')
    data_parse.add_argument('--num_cores',          dest='num_cores',
                            default=24,         type=int, help='Number of CPU cores')
    data_parse.add_argument('--train_ratio',        dest='train_ratio',        default=0.7,
                            type=float, help='Fraction of data to be used for training')
    data_parse.add_argument('--valid_ratio',        dest='valid_ratio',        default=0.15,       type=float,
                            help='Fraction of data to be used for validation (only matters when there is a third dataset to be created for testing)')
    data_parse.add_argument('--batch_size',         dest='batch_size',
                            default=32,         type=int, help='Number of examples per batch')
    data_parse.add_argument('--prefetch_size',      dest='prefetch_size',      default=1,
                            type=int, help='Number of batches to be prepared in queue')

    # Model
    model_parse = parser.add_argument_group('Model setting')
    model_parse.add_argument('--model',                dest='model',               default='DNN',
                             type=str,   help='Select the model to train e.g. DNN; note that this version only has DNN')
    model_parse.add_argument('--loss',            dest='loss',              default=40,
                             type=int,   help="Setting of loss function '10','11','12','20','21','22','30','31','32','40'")
    model_parse.add_argument('--hidden_units',    dest='hidden_units',      default=[
                             128, 64],      type=int,   nargs='+', help='List containing the number of hidden units to use for each hidden layer')
    model_parse.add_argument('--learning_rate',   dest='learning_rate',
                             default=0.001,         type=float, help='Learning rate of updating gradient')
    model_parse.add_argument('--decay_step',      dest='decay_step',
                             default=100,           type=int,   help='Decay step')
    model_parse.add_argument('--decay_rate',      dest='decay_rate',        default=0.98,
                             type=float, help='Decay rate for exponential decay of learning rate')
    model_parse.add_argument('--Lambda',          dest='Lambda',            default=0.25,
                             type=float, help='Lambda for L2,L1 regularization; alpha for focal loss')
    model_parse.add_argument('--gamma',           dest='gamma',
                             default=2.,            type=float, help='parameter for focal loss')
    model_parse.add_argument('--beta',            dest='beta',
                             default=1.,            type=float, help='Regularization parameter')
    model_parse.add_argument('--drop_rate',       dest='drop_rate',
                             default=0.5,            type=float, help='dropout rate')
    model_parse.add_argument('--embedding_units', dest='embedding_units',   default=[1, 35, 359, 3, 2], type=int, nargs='+',
                             help='List containing the number of embedding units to use for features (in order): [weekday, region, city, adexchange, slotformat]; this replaces the one hot encoding')
    model_parse.add_argument('--embedding_units_ohe', dest='embedding_units_ohe',   default=[
                             45], type=int, nargs='+', help='List containing the number of embedding units to use for OHE features (in order): usertag')

    # Training
    train_parse = parser.add_argument_group('Training hyperparameters')
    train_parse.add_argument('--has_gpu',              dest='has_gpu',
                             default=0,     type=int,   help='1 if GPU is present, else 0')
    train_parse.add_argument('--is_test',              dest='is_test',             default=0,
                             type=int,   help='1 if the trained model will be evaluated, else 0')
    train_parse.add_argument('--num_epochs_min',       dest='num_epochs_min',
                             default=100,   type=float,   help='Minimum number of training epochs')
    train_parse.add_argument('--num_epochs',           dest='num_epochs',
                             default=101,   type=float,   help='Number of total training epochs')
    train_parse.add_argument('--validation_length',    dest='validation_length',   default=100,
                             type=int,   help='In one validation, how many number of batches to use')
    train_parse.add_argument('--test_length',          dest='test_length',
                             default=100,   type=int,   help='In one test, how many number of batches to use')
    train_parse.add_argument('--earlystop_check_frequency', dest='earlystop_check_frequency',
                             default=10,     type=int,   help='earlystop_check_frequency')
    train_parse.add_argument('--earlystop_duration',       dest='earlystop_duration',
                             default=10,     type=int,   help='earlystop_duration')
    train_parse.add_argument('--valid_loss_delta',         dest='valid_loss_delta',
                             default=0.0001, type=float, help='valid_loss_delta')
    train_parse.add_argument('--num_threshold_buffer',     dest='num_threshold_buffer',
                             default=3,      type=int,   help='num_threshold_buffer')
    train_parse.add_argument('--percentile_threshold',     dest='percentile_threshold',
                             default=8,      type=int,   help='percentile_threshold')

    # Directory paths
    dir_parse = parser.add_argument_group('Directory paths')
    dir_parse.add_argument('--save_dir',         dest='save_dir',
                           default='./Outputs/',        type=str, help='Directory to save model directories')
    dir_parse.add_argument('--load_dir',         dest='load_dir',         default='latest',
                           type=str, help='Directory to load old model,default "new" as the latest model')
    dir_parse.add_argument('--store_dir',        dest='store_dir',        default='latest',
                           type=str, help='Directory to store current model, default "latest" to save in timestamp')
    dir_parse.add_argument('--result_dir',       dest='result_dir',       default='result.csv',
                           type=str, help='Directory to store (history) performance result')
    dir_parse.add_argument('--builder_save_dir', dest='builder_save_dir', default='builder_save',
                           type=str, help='Directory to store current model for tfjs predictor')

    _args, _ = parser.parse_known_args()
    return vars(_args)