Example #1
0
def main(opt, device_id):
    opt = training_opt_postprocessing(opt, device_id)
    init_logger(opt.log_file)
    # Load checkpoint if we resume from a previous training.
    if opt.train_from:
        logger.info('Loading checkpoint from %s' % opt.train_from)
        checkpoint = torch.load(opt.train_from,
                                map_location=lambda storage, loc: storage)

        # Load default opts values then overwrite it with opts from
        # the checkpoint. It's usefull in order to re-train a model
        # after adding a new option (not set in checkpoint)
        dummy_parser = configargparse.ArgumentParser()
        opts.model_opts(dummy_parser)
        default_opt = dummy_parser.parse_known_args([])[0]

        model_opt = default_opt
        model_opt.__dict__.update(checkpoint['opt'].__dict__)
    else:
        checkpoint = None
        model_opt = opt

    # Peek the first dataset to determine the data_type.
    # (All datasets have the same data_type).
    first_dataset = next(lazily_load_dataset("train", opt))
    data_type = first_dataset.data_type

    # Load fields generated from preprocess phase.
    fields = _load_fields(first_dataset, data_type, opt, checkpoint)

    # Report src/tgt features.

    src_features, tgt_features = _collect_report_features(fields)
    for j, feat in enumerate(src_features):
        logger.info(' * src feature %d size = %d'
                    % (j, len(fields[feat].vocab)))
    for j, feat in enumerate(tgt_features):
        logger.info(' * tgt feature %d size = %d'
                    % (j, len(fields[feat].vocab)))

    # Build model.
    model = build_model(model_opt, opt, fields, checkpoint)
    if opt.fp16:
        model.half()
    n_params, enc, dec = _tally_parameters(model)
    logger.info('encoder: %d' % enc)
    logger.info('decoder: %d' % dec)
    logger.info('* number of parameters: %d' % n_params)
    _check_save_model_path(opt)

    # Build optimizer.
    optim = build_optim(model, opt, checkpoint)

    # Build model saver
    model_saver = build_model_saver(model_opt, opt, model, fields, optim)

    trainer = build_trainer(opt, device_id, model, fields,
                            optim, data_type, model_saver=model_saver)

    def train_iter_fct(): return build_dataset_iter(
        lazily_load_dataset("train", opt), fields, opt)

    def valid_iter_fct(): return build_dataset_iter(
        lazily_load_dataset("valid", opt), fields, opt, is_train=False)

    # Do training.
    if len(opt.gpu_ranks):
        logger.info('Starting training on GPU: %s' % opt.gpu_ranks)
    else:
        logger.info('Starting training on CPU, could be very slow')
    trainer.train(train_iter_fct, valid_iter_fct, opt.train_steps,
                  opt.valid_steps)

    if opt.tensorboard:
        trainer.report_manager.tensorboard_writer.close()
Example #2
0
def main(opt, device_id):
    opt = training_opt_postprocessing(opt, device_id)
    init_logger(opt.log_file)
    # print out the arguments
    for k, v in opt.__dict__.items():
        logger.info("{}: {}".format(k, v))
    # Load checkpoint if we resume from a previous training.
    if opt.train_from:
        logger.info('Loading checkpoint from %s' % opt.train_from)
        checkpoint = torch.load(opt.train_from,
                                map_location=lambda storage, loc: storage)

        # Load default opts values then overwrite it with opts from
        # the checkpoint. It's usefull in order to re-train a model
        # after adding a new option (not set in checkpoint)
        dummy_parser = configargparse.ArgumentParser()
        opts.model_opts(dummy_parser)
        default_opt = dummy_parser.parse_known_args([])[0]

        model_opt = default_opt
        model_opt.__dict__.update(checkpoint['opt'].__dict__)
        logger.info('Loading vocab from checkpoint at %s.' % opt.train_from)
        vocab = checkpoint['vocab']
    else:
        checkpoint = None
        model_opt = opt
        vocab = torch.load(opt.data + '.vocab.pt')

    # Load a shard dataset to determine the data_type.
    # (All datasets have the same data_type).
    # this should be refactored out of existence reasonably soon
    first_dataset = torch.load(glob.glob(opt.data + '.train*.pt')[0])
    data_type = first_dataset.data_type

    # check for code where vocab is saved instead of fields
    # (in the future this will be done in a smarter way
    if old_style_vocab(vocab):
        fields = load_fields_from_vocab(vocab, data_type)
    else:
        fields = vocab

    # Report src and tgt vocab sizes, including for features
    for side in ['src', 'tgt']:
        for name, f in fields[side]:
            if f.use_vocab:
                logger.info(' * %s vocab size = %d' % (name, len(f.vocab)))

    # Build model.
    model = build_model(model_opt, opt, fields, checkpoint)
    n_params, enc, dec = _tally_parameters(model)
    logger.info('encoder: %d' % enc)
    logger.info('decoder: %d' % dec)
    logger.info('* number of parameters: %d' % n_params)
    _check_save_model_path(opt)

    # Build optimizer.
    optim = build_optim(model, opt, checkpoint)

    # Build model saver
    model_saver = build_model_saver(model_opt, opt, model, fields, optim)

    trainer = build_trainer(opt,
                            device_id,
                            model,
                            fields,
                            optim,
                            data_type,
                            model_saver=model_saver)

    # this line is kind of a temporary kludge because different objects expect
    # fields to have a different structure
    dataset_fields = dict(chain.from_iterable(fields.values()))

    train_iter = build_dataset_iter("train", dataset_fields, opt)
    valid_iter = build_dataset_iter("valid",
                                    dataset_fields,
                                    opt,
                                    is_train=False)

    if len(opt.gpu_ranks):
        logger.info('Starting training on GPU: %s' % opt.gpu_ranks)
    else:
        logger.info('Starting training on CPU, could be very slow')
    trainer.train(train_iter, valid_iter, opt.train_steps, opt.valid_steps)

    if opt.tensorboard:
        trainer.report_manager.tensorboard_writer.close()
Example #3
0
            'opt': self.model_opt,
            'optim': self.optim,
        }

        logger.info("Saving checkpoint %s_step_%d.pt" % (self.base_path, step))
        checkpoint_path = '%s_step_%d.pt' % (self.base_path, step)
        torch.save(checkpoint, checkpoint_path)
        return checkpoint, checkpoint_path

    def _rm_checkpoint(self, name):
        """
        Remove a checkpoint

        Args:
            name(str): name that indentifies the checkpoint
                (it may be a filepath)
        """
        os.remove(name)


if __name__ == "__main__":
    parser = configargparse.ArgumentParser(
        description='train.py',
        formatter_class=configargparse.ArgumentDefaultsHelpFormatter)

    opts.model_opts(parser)
    opts.train_opts(parser)

    opt = parser.parse_args()
    main(opt)
Example #4
0
                                             api_url + '/v1/application',
                                             json=dict(id=api_key,
                                                       jwt_secret=jwt_secret))
                if resp.status != 200:
                    raise ValueError(f"Wrong response status: ${resp.status}")
            except Exception as exc:
                message = f"SDK server unavailable: ${exc}"
                obj.logger.exception(message)
                raise RuntimeError(message)
        return obj


if __name__ == '__main__':
    import configargparse

    parser = configargparse.ArgumentParser("VideoRoom backend")
    parser.add_argument('-c',
                        '--config',
                        is_config_file=True,
                        help='config file path')
    parser.add_argument('-x',
                        '--prefix',
                        default='/backend',
                        help='URI prefix',
                        env_var='PREFIX')
    parser.add_argument('-u',
                        '--postgres-dsn',
                        required=True,
                        help='URL of Postgres',
                        env_var='POSTGRES_DSN')
    parser.add_argument('-s',
Example #5
0
def config_parser():

    import configargparse
    parser = configargparse.ArgumentParser()
    parser.add_argument('--config',
                        is_config_file=True,
                        help='config file path')
    parser.add_argument("--expname", type=str, help='experiment name')
    parser.add_argument("--basedir",
                        type=str,
                        help='where to store ckpts and logs')
    parser.add_argument("--datadir", type=str, help='input data directory')

    # training options
    parser.add_argument("--netdepth",
                        type=int,
                        default=8,
                        help='layers in network')
    parser.add_argument("--netwidth",
                        type=int,
                        default=256,
                        help='channels per layer')
    parser.add_argument("--netdepth_fine",
                        type=int,
                        default=8,
                        help='layers in fine network')
    parser.add_argument("--netwidth_fine",
                        type=int,
                        default=256,
                        help='channels per layer in fine network')
    parser.add_argument(
        "--N_rand",
        type=int,
        default=32 * 32 * 4,
        help='batch size (number of random rays per gradient step)')
    parser.add_argument("--lrate",
                        type=float,
                        default=5e-4,
                        help='learning rate')
    parser.add_argument("--lrate_decay",
                        type=int,
                        default=250,
                        help='exponential learning rate decay (in 1000s)')
    parser.add_argument(
        "--chunk",
        type=int,
        default=1024 * 32,
        help=
        'number of rays processed in parallel, decrease if running out of memory'
    )
    parser.add_argument(
        "--netchunk",
        type=int,
        default=1024 * 64,
        help=
        'number of pts sent through network in parallel, decrease if running out of memory'
    )
    parser.add_argument("--no_batching",
                        action='store_true',
                        help='only take random rays from 1 image at a time')
    parser.add_argument("--no_reload",
                        action='store_true',
                        help='do not reload weights from saved ckpt')
    parser.add_argument(
        "--ft_path",
        type=str,
        default=None,
        help='specific weights npy file to reload for coarse network')
    parser.add_argument("--random_seed",
                        type=int,
                        default=None,
                        help='fix random seed for repeatability')

    # pre-crop options
    parser.add_argument("--precrop_iters",
                        type=int,
                        default=0,
                        help='number of steps to train on central crops')
    parser.add_argument("--precrop_frac",
                        type=float,
                        default=.5,
                        help='fraction of img taken for central crops')

    # rendering options
    parser.add_argument("--N_samples",
                        type=int,
                        default=64,
                        help='number of coarse samples per ray')
    parser.add_argument("--N_importance",
                        type=int,
                        default=0,
                        help='number of additional fine samples per ray')
    parser.add_argument("--perturb",
                        type=float,
                        default=1.,
                        help='set to 0. for no jitter, 1. for jitter')
    parser.add_argument("--use_viewdirs",
                        action='store_true',
                        help='use full 5D input instead of 3D')
    parser.add_argument(
        "--i_embed",
        type=int,
        default=0,
        help='set 0 for default positional encoding, -1 for none')
    parser.add_argument(
        "--multires",
        type=int,
        default=10,
        help='log2 of max freq for positional encoding (3D location)')
    parser.add_argument(
        "--multires_views",
        type=int,
        default=4,
        help='log2 of max freq for positional encoding (2D direction)')
    parser.add_argument(
        "--raw_noise_std",
        type=float,
        default=0.,
        help=
        'std dev of noise added to regularize sigma_a output, 1e0 recommended')

    parser.add_argument(
        "--render_only",
        action='store_true',
        help='do not optimize, reload weights and render out render_poses path'
    )
    parser.add_argument(
        "--render_test",
        action='store_true',
        help='render the test set instead of render_poses path')
    parser.add_argument(
        "--render_factor",
        type=int,
        default=0,
        help=
        'downsampling factor to speed up rendering, set 4 or 8 for fast preview'
    )

    # dataset options
    parser.add_argument("--dataset_type",
                        type=str,
                        default='llff',
                        help='options: llff / blender / deepvoxels')
    parser.add_argument(
        "--testskip",
        type=int,
        default=8,
        help=
        'will load 1/N images from test/val sets, useful for large datasets like deepvoxels'
    )

    # deepvoxels flags
    parser.add_argument("--shape",
                        type=str,
                        default='greek',
                        help='options : armchair / cube / greek / vase')

    # blender flags
    parser.add_argument(
        "--white_bkgd",
        action='store_true',
        help=
        'set to render synthetic data on a white bkgd (always use for dvoxels)'
    )
    parser.add_argument(
        "--half_res",
        action='store_true',
        help='load blender synthetic data at 400x400 instead of 800x800')

    # llff flags
    parser.add_argument("--factor",
                        type=int,
                        default=8,
                        help='downsample factor for LLFF images')
    parser.add_argument(
        "--no_ndc",
        action='store_true',
        help=
        'do not use normalized device coordinates (set for non-forward facing scenes)'
    )
    parser.add_argument(
        "--lindisp",
        action='store_true',
        help='sampling linearly in disparity rather than depth')
    parser.add_argument("--spherify",
                        action='store_true',
                        help='set for spherical 360 scenes')
    parser.add_argument(
        "--llffhold",
        type=int,
        default=8,
        help='will take every 1/N images as LLFF test set, paper uses 8')

    # logging/saving options
    parser.add_argument("--i_print",
                        type=int,
                        default=100,
                        help='frequency of console printout and metric loggin')
    parser.add_argument("--i_img",
                        type=int,
                        default=500,
                        help='frequency of tensorboard image logging')
    parser.add_argument("--i_weights",
                        type=int,
                        default=10000,
                        help='frequency of weight ckpt saving')
    parser.add_argument("--i_testset",
                        type=int,
                        default=50000,
                        help='frequency of testset saving')
    parser.add_argument("--i_video",
                        type=int,
                        default=50000,
                        help='frequency of render_poses video saving')

    return parser
Example #6
0
        v = dr['value']
        t = trackletsById[dr['id']]
        fullResult['divisionResults'].append({'id': t['maxUid'], 'value': v})

    t1 = time.time()
    _getLogger().info("Extracting result took {} secs".format(t1 - t0))

    _getLogger().info("Saving stitched result to {}".format(
        args.results_filename))
    hytra.core.jsongraph.writeToFormattedJSON(args.results_filename,
                                              fullResult)


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description=
        'Take a json file containing a result to a set of HDF5 events files',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('-c',
                        '--config',
                        is_config_file=True,
                        help='config file path')

    parser.add_argument('--graph-json-file',
                        required=True,
                        type=str,
                        dest='model_filename',
                        help='Filename of the json model description')
    parser.add_argument('--weights-json-file',
                        required=True,
                        type=str,
                        dest='weights_filename',
        print(' '.join(['"%s"' % x if ' ' in x else x for x in bsub_command]),
              file=sys.stderr)
        return None


if __name__ == '__main__':
    INFO = """Runs VEP with LOFTEE.
Minimal usage is: python run_lof_annotation.py -i input.vcf[.gz] -o output_directory
This will count the number of lines in the file, and prompt for the number of lines to split the file into and which queue to submit to.
If you'd like to omit the interactive step, add -s NUMBER_OF_LINES -q QUEUE. -s 20000 -q hour is a sensible default."""

    try:
        import configargparse
        parser = configargparse.ArgumentParser(
            description=INFO,
            default_config_files=['~/.run_lof_config'],
            args_for_setting_config_path=["-c", "--run-lof-config"],
            formatter_class=configargparse.DefaultsRawFormatter)
    except ImportError:
        parser = argparse.ArgumentParser(
            description=INFO,
            formatter_class=argparse.RawDescriptionHelpFormatter)
        parser.add_argument(
            "-c",
            "--run-lof-config",
            dest="config",
            help="To enable this option, please install configargparse")

    parser.add_argument('--vcf',
                        '--input',
                        '-i',
Example #8
0
def get_parser():
    parser = configargparse.ArgumentParser(
        description="ASR Decoding",
        config_file_parser_class=configargparse.YAMLConfigFileParser,
        formatter_class=configargparse.ArgumentDefaultsHelpFormatter,
    )

    # Note(kamo): Use '_' instead of '-' as separator.
    # '-' is confusing if written in yaml.
    parser.add_argument("--config", is_config_file=True, help="config file path")

    parser.add_argument(
        "--log_level",
        type=lambda x: x.upper(),
        default="INFO",
        choices=("INFO", "ERROR", "WARNING", "INFO", "DEBUG", "NOTSET"),
        help="The verbose level of logging",
    )

    parser.add_argument("--output_dir", type=str, required=True)
    parser.add_argument(
        "--ngpu", type=int, default=0, help="The number of gpus. 0 indicates CPU mode",
    )
    parser.add_argument("--seed", type=int, default=0, help="Random seed")
    parser.add_argument(
        "--dtype",
        default="float32",
        choices=["float16", "float32", "float64"],
        help="Data type",
    )
    parser.add_argument(
        "--num_workers",
        type=int,
        default=1,
        help="The number of workers used for DataLoader",
    )

    group = parser.add_argument_group("Input data related")
    group.add_argument(
        "--data_path_and_name_and_type",
        type=str2triple_str,
        required=True,
        action="append",
    )
    group.add_argument("--key_file", type=str_or_none)
    group.add_argument("--allow_variable_data_keys", type=str2bool, default=False)

    group = parser.add_argument_group("The model configuration related")
    group.add_argument("--asr_train_config", type=str, required=True)
    group.add_argument("--asr_model_file", type=str, required=True)
    group.add_argument("--lm_train_config", type=str)
    group.add_argument("--lm_file", type=str)
    group.add_argument("--word_lm_train_config", type=str)
    group.add_argument("--word_lm_file", type=str)

    group = parser.add_argument_group("Beam-search related")
    group.add_argument(
        "--batch_size", type=int, default=1, help="The batch size for inference",
    )
    group.add_argument("--nbest", type=int, default=1, help="Output N-best hypotheses")
    group.add_argument("--beam_size", type=int, default=20, help="Beam size")
    group.add_argument("--penalty", type=float, default=0.0, help="Insertion penalty")
    group.add_argument(
        "--maxlenratio",
        type=float,
        default=0.0,
        help="Input length ratio to obtain max output length. "
        "If maxlenratio=0.0 (default), it uses a end-detect "
        "function "
        "to automatically find maximum hypothesis lengths",
    )
    group.add_argument(
        "--minlenratio",
        type=float,
        default=0.0,
        help="Input length ratio to obtain min output length",
    )
    group.add_argument(
        "--ctc_weight", type=float, default=0.5, help="CTC weight in joint decoding",
    )
    group.add_argument("--lm_weight", type=float, default=1.0, help="RNNLM weight")
    group.add_argument(
        "--blank_symbol",
        type=str,
        default="<blank>",
        help="The token symbol represents CTC-blank",
    )

    group = parser.add_argument_group("Text converter related")
    group.add_argument(
        "--token_type",
        type=str_or_none,
        default=None,
        choices=["char", "bpe", None],
        help="The token type for ASR model. "
        "If not given, refers from the training args",
    )
    group.add_argument(
        "--bpemodel",
        type=str_or_none,
        default=None,
        help="The model path of sentencepiece. "
        "If not given, refers from the training args",
    )

    return parser
Example #9
0
def parse_connection(config_files, args):
    """Parse out connection node arguments for an autopush node"""
    parser = configargparse.ArgumentParser(
        description='Runs a Connection Node.',
        default_config_files=config_files,
    )
    parser.add_argument('--config-connection',
                        help="Connection node configuration file path",
                        dest='config_file',
                        is_config_file=True)
    parser.add_argument('-p',
                        '--port',
                        help='Websocket Port',
                        type=int,
                        default=8080,
                        env_var="PORT")
    parser.add_argument('--router_hostname',
                        help="HTTP Router Hostname to use for internal "
                        "router connects",
                        type=str,
                        default=None,
                        env_var="ROUTER_HOSTNAME")
    parser.add_argument('-r',
                        '--router_port',
                        help="HTTP Router Port for internal router connects",
                        type=int,
                        default=8081,
                        env_var="ROUTER_PORT")
    parser.add_argument('--router_ssl_key',
                        help="Routing listener SSL key path",
                        type=str,
                        default="",
                        env_var="ROUTER_SSL_KEY")
    parser.add_argument('--router_ssl_cert',
                        help="Routing listener SSL cert path",
                        type=str,
                        default="",
                        env_var="ROUTER_SSL_CERT")
    parser.add_argument('--auto_ping_interval',
                        help="Interval between Websocket pings",
                        default=0,
                        type=float,
                        env_var="AUTO_PING_INTERVAL")
    parser.add_argument('--auto_ping_timeout',
                        help="Timeout in seconds for Websocket ping replys",
                        default=4,
                        type=float,
                        env_var="AUTO_PING_TIMEOUT")
    parser.add_argument('--max_connections',
                        help="The maximum number of concurrent connections.",
                        default=0,
                        type=int,
                        env_var="MAX_CONNECTIONS")
    parser.add_argument('--close_handshake_timeout',
                        help="The WebSocket closing handshake timeout. Set to "
                        "0 to disable.",
                        default=0,
                        type=int,
                        env_var="CLOSE_HANDSHAKE_TIMEOUT")
    parser.add_argument('--hello_timeout',
                        help="The client handshake timeout. Set to 0 to"
                        "disable.",
                        default=0,
                        type=int,
                        env_var="HELLO_TIMEOUT")

    add_shared_args(parser)
    return parser.parse_args(args)
Example #10
0
def build_parser():
    parser = configargparse.ArgumentParser(
        config_file_parser_class=configargparse.YAMLConfigFileParser,
        formatter_class=configargparse.ArgumentDefaultsHelpFormatter)
    parser.add('--config', is_config_file=True, help='config file path')
    parser.add('--config2', is_config_file=True, default=False, nargs='?',
               help='another config file path to overwrite --config')
    # general
    parser.add_argument('--corpus', type=str,
                        help='corpus name')
    parser.add_argument('--n_gpus', type=int, default=1,
                        help='number of GPUs (0 indicates CPU)')
    parser.add_argument('--cudnn_benchmark', type=strtobool, default=True,
                        help='use CuDNN benchmark mode')
    parser.add_argument("--train_dtype", default="float32",
                        choices=["float16", "float32", "float64", "O0", "O1", "O2", "O3"],
                        help="Data type for training")
    parser.add_argument('--model_save_dir', type=str, default=False,
                        help='directory to save a model')
    parser.add_argument('--resume', type=str, default=False, nargs='?',
                        help='model path to resume training')
    parser.add_argument('--job_name', type=str, default=False,
                        help='job name')
    parser.add_argument('--stdout', type=strtobool, default=False,
                        help='print to standard output during training')
    parser.add_argument('--remove_old_checkpoints', type=strtobool, default=True,
                        help='remove old checkpoints to save disk (turned off when training Transformer')
    # dataset
    parser.add_argument('--train_set', type=str,
                        help='tsv file path for the training set')
    parser.add_argument('--train_set_sub1', type=str, default=False,
                        help='tsv file path for the training set for the 1st auxiliary task')
    parser.add_argument('--train_set_sub2', type=str, default=False,
                        help='tsv file path for the training set for the 2nd auxiliary task')
    parser.add_argument('--train_word_alignment', type=str,
                        help='word alignment directory path for the training set')
    parser.add_argument('--train_ctc_alignment', type=str,
                        help='CTC alignment directory path for the training set')
    parser.add_argument('--dev_set', type=str,
                        help='tsv file path for the development set')
    parser.add_argument('--dev_set_sub1', type=str, default=False,
                        help='tsv file path for the development set for the 1st auxiliary task')
    parser.add_argument('--dev_set_sub2', type=str, default=False,
                        help='tsv file path for the development set for the 2nd auxiliary task')
    parser.add_argument('--dev_word_alignment', type=str,
                        help='word alignment directory path for the development set')
    parser.add_argument('--dev_ctc_alignment', type=str,
                        help='CTC alignment directory path for the development set')
    parser.add_argument('--eval_sets', type=str, default=[], nargs='+',
                        help='tsv file paths for the evaluation sets')
    parser.add_argument('--nlsyms', type=str, default=False, nargs='?',
                        help='non-linguistic symbols file path')
    parser.add_argument('--dict', type=str,
                        help='dictionary file path')
    parser.add_argument('--dict_sub1', type=str, default=False,
                        help='dictionary file path for the 1st auxiliary task')
    parser.add_argument('--dict_sub2', type=str, default=False,
                        help='dictionary file path for the 2nd auxiliary task')
    parser.add_argument('--unit', type=str, default='wp',
                        choices=['word', 'wp', 'char', 'phone', 'word_char', 'char_space'],
                        help='output unit for the main task')
    parser.add_argument('--unit_sub1', type=str, default=False,
                        choices=['wp', 'char', 'phone'],
                        help='output unit for the 1st auxiliary task')
    parser.add_argument('--unit_sub2', type=str, default=False,
                        choices=['wp', 'char', 'phone'],
                        help='output unit for the 2nd auxiliary task')
    parser.add_argument('--wp_model', type=str, default=False, nargs='?',
                        help='wordpiece model path for the main task')
    parser.add_argument('--wp_model_sub1', type=str, default=False, nargs='?',
                        help='wordpiece model path for the 1st auxiliary task')
    parser.add_argument('--wp_model_sub2', type=str, default=False, nargs='?',
                        help='wordpiece model path for the 2nd auxiliary task')
    # features
    parser.add_argument('--input_type', type=str, default='speech',
                        choices=['speech', 'text'],
                        help='type of input features')
    parser.add_argument('--n_splices', type=int, default=1,
                        help='number of input frames to splice (both for left and right frames)')
    parser.add_argument('--n_stacks', type=int, default=1,
                        help='number of input frames to stack (frame stacking)')
    parser.add_argument('--n_skips', type=int, default=1,
                        help='number of input frames to skip')
    parser.add_argument('--max_n_frames', type=int, default=2000,
                        help='maximum number of input frames')
    parser.add_argument('--min_n_frames', type=int, default=40,
                        help='minimum number of input frames')
    parser.add_argument('--dynamic_batching', type=strtobool, default=True,
                        help='')
    parser.add_argument('--input_noise_std', type=float, default=0,
                        help='standard deviation of Gaussian noise to input features')
    parser.add_argument('--weight_noise_std', type=float, default=0,
                        help='standard deviation of Gaussian noise to weight parameters')
    parser.add_argument('--sequence_summary_network', type=strtobool, default=False,
                        help='use sequence summary network')
    # topology (encoder)
    parser.add_argument('--enc_type', type=str, default='blstm',
                        choices=ENCODER_TYPES,
                        help='type of the encoder')
    parser.add_argument('--enc_n_layers', type=int, default=5,
                        help='number of encoder RNN layers')
    parser.add_argument('--enc_n_layers_sub1', type=int, default=0,
                        help='number of encoder RNN layers in the 1st auxiliary task')
    parser.add_argument('--enc_n_layers_sub2', type=int, default=0,
                        help='number of encoder RNN layers in the 2nd auxiliary task')
    parser.add_argument('--subsample', type=str, default="1_1_1_1_1",
                        help='delimited list input')
    parser.add_argument('--subsample_type', type=str, default='drop',
                        choices=['drop', 'concat', 'max_pool', '1dconv', 'add'],
                        help='type of subsampling in the encoder')
    # topology (decoder)
    parser.add_argument('--dec_type', type=str, default='lstm',
                        choices=DECODER_TYPES,
                        help='type of the decoder')
    parser.add_argument('--dec_type_sub1', type=str, default='lstm',
                        choices=DECODER_TYPES,
                        help='type of the decoder in the 1st auxiliary task')
    parser.add_argument('--dec_type_sub2', type=str, default='lstm',
                        choices=DECODER_TYPES,
                        help='type of the decoder in the 2nd auxiliary task')
    parser.add_argument('--dec_n_layers', type=int, default=1,
                        help='number of decoder RNN layers')
    parser.add_argument('--dec_n_layers_sub1', type=int, default=0,
                        help='number of decoder RNN layers in the 1st auxiliary task')
    parser.add_argument('--dec_n_layers_sub2', type=int, default=0,
                        help='number of decoder RNN layers in the 2nd auxiliary task')
    parser.add_argument('--tie_embedding', type=strtobool, default=False, nargs='?',
                        help='tie weights of an embedding matrix and a linear layer before the softmax layer')
    parser.add_argument('--ctc_fc_list', type=str, default="", nargs='?',
                        help='')
    parser.add_argument('--ctc_fc_list_sub1', type=str, default="", nargs='?',
                        help='')
    parser.add_argument('--ctc_fc_list_sub2', type=str, default="", nargs='?',
                        help='')
    # optimization
    parser.add_argument('--batch_size', type=int, default=50,
                        help='mini-batch size')
    parser.add_argument('--optimizer', type=str, default='adam',
                        choices=['adam', 'adadelta', 'adagrad', 'sgd', 'momentum', 'nesterov', 'noam'],
                        help='type of optimizer')
    parser.add_argument('--n_epochs', type=int, default=25,
                        help='number of epochs to train the model')
    parser.add_argument('--convert_to_sgd_epoch', type=int, default=100,
                        help='epoch to convert to SGD fine-tuning')
    parser.add_argument('--print_step', type=int, default=200,
                        help='print log per this value')
    parser.add_argument('--metric', type=str, default='edit_distance',
                        choices=['edit_distance', 'loss', 'accuracy', 'ppl', 'bleu', 'mse'],
                        help='metric for evaluation during training')
    parser.add_argument('--lr', type=float, default=1e-3,
                        help='initial learning rate')
    parser.add_argument('--lr_factor', type=float, default=10.0,
                        help='factor of learning rate for Transformer')
    parser.add_argument('--eps', type=float, default=1e-6,
                        help='epsilon parameter for Adadelta optimizer')
    parser.add_argument('--lr_decay_type', type=str, default='always',
                        choices=['always', 'metric', 'warmup'],
                        help='type of learning rate decay')
    parser.add_argument('--lr_decay_start_epoch', type=int, default=10,
                        help='epoch to start to decay learning rate')
    parser.add_argument('--lr_decay_rate', type=float, default=0.9,
                        help='decay rate of learning rate')
    parser.add_argument('--lr_decay_patient_n_epochs', type=int, default=0,
                        help='number of epochs to tolerate learning rate decay when validation performance is not improved')
    parser.add_argument('--early_stop_patient_n_epochs', type=int, default=5,
                        help='number of epochs to tolerate stopping training when validation performance is not improved')
    parser.add_argument('--sort_stop_epoch', type=int, default=10000,
                        help='epoch to stop soring utterances by length')
    parser.add_argument('--sort_short2long', type=strtobool, default=True,
                        help='sort utterances in the ascending order')
    parser.add_argument('--sort_by', type=str, default='input',
                        choices=['input', 'output', 'shuffle', 'utt_id'],
                        help='metric to sort utterances')
    parser.add_argument('--shuffle_bucket', type=strtobool, default=False,
                        help='gather the similar length of utterances and shuffle them')
    parser.add_argument('--eval_start_epoch', type=int, default=1,
                        help='first epoch to start evaluation')
    parser.add_argument('--warmup_start_lr', type=float, default=0,
                        help='initial learning rate for learning rate warm up')
    parser.add_argument('--warmup_n_steps', type=int, default=0,
                        help='number of steps to warm up learning rate')
    parser.add_argument('--accum_grad_n_steps', type=int, default=1,
                        help='total number of steps to accumulate gradients')
    # initialization
    parser.add_argument('--param_init', type=float, default=0.1,
                        help='')
    parser.add_argument('--asr_init', type=str, default=False, nargs='?',
                        help='pre-trained seq2seq model path')
    parser.add_argument('--asr_init_enc_only', type=strtobool, default=False,
                        help='Initialize the encoder only')
    parser.add_argument('--freeze_encoder', type=strtobool, default=False,
                        help='freeze the encoder parameter')
    # regularization
    parser.add_argument('--clip_grad_norm', type=float, default=5.0,
                        help='')
    parser.add_argument('--dropout_in', type=float, default=0.0,
                        help='dropout probability for the input')
    parser.add_argument('--dropout_enc', type=float, default=0.0,
                        help='dropout probability for the encoder')
    parser.add_argument('--dropout_dec', type=float, default=0.0,
                        help='dropout probability for the decoder')
    parser.add_argument('--dropout_emb', type=float, default=0.0,
                        help='dropout probability for the embedding')
    parser.add_argument('--dropout_att', type=float, default=0.0,
                        help='dropout probability for the attention weights')
    parser.add_argument('--weight_decay', type=float, default=0,
                        help='weight decay parameter')
    parser.add_argument('--lsm_prob', type=float, default=0.0,
                        help='probability of label smoothing')
    parser.add_argument('--ctc_lsm_prob', type=float, default=0.0,
                        help='probability of label smoothing for CTC')
    # SpecAugment
    parser.add_argument('--freq_width', type=int, default=27,
                        help='width of frequency mask for SpecAugment')
    parser.add_argument('--n_freq_masks', type=int, default=0,
                        help='number of frequency masks for SpecAugment')
    parser.add_argument('--time_width', type=int, default=100,
                        help='width of time mask for SpecAugment')
    parser.add_argument('--n_time_masks', type=int, default=0,
                        help='number of time masks for SpecAugment')
    parser.add_argument('--time_width_upper', type=float, default=1.0,
                        help='')
    parser.add_argument('--adaptive_number_ratio', type=float, default=0.0,
                        help='adaptive multiplicity ratio for time masking')
    parser.add_argument('--adaptive_size_ratio', type=float, default=0.0,
                        help='adaptive size ratio for time masking')
    parser.add_argument('--max_n_time_masks', type=int, default=20,
                        help='maximum number of time masking')
    # MTL
    parser.add_argument('--ctc_weight', type=float, default=0.0,
                        help='CTC loss weight for the main task')
    parser.add_argument('--ctc_weight_sub1', type=float, default=0.0,
                        help='CTC loss weight for the 1st auxiliary task')
    parser.add_argument('--ctc_weight_sub2', type=float, default=0.0,
                        help='CTC loss weight for the 2nd auxiliary task')
    parser.add_argument('--sub1_weight', type=float, default=0.0,
                        help='total loss weight for the 1st auxiliary task')
    parser.add_argument('--sub2_weight', type=float, default=0.0,
                        help='total loss weight for the 2nd auxiliary task')
    parser.add_argument('--mtl_per_batch', type=strtobool, default=False, nargs='?',
                        help='change mini-batch per task')
    parser.add_argument('--task_specific_layer', type=strtobool, default=False, nargs='?',
                        help='insert a task-specific encoder layer per task')
    # forward-backward
    parser.add_argument('--bwd_weight', type=float, default=0.0,
                        help='cross entropy loss weight for the backward decoder in the main task')
    # cold fusion, LM initialization
    parser.add_argument('--external_lm', type=str, default=False, nargs='?',
                        help='LM path')
    parser.add_argument('--lm_fusion', type=str, default='',
                        choices=['', 'cold', 'cold_prob', 'deep', 'cold_attention'],
                        help='type of LM fusion')
    parser.add_argument('--lm_init', type=strtobool, default=False,
                        help='initialize the decoder with the external LM')
    # contextualization
    parser.add_argument('--discourse_aware', type=strtobool, default=False, nargs='?',
                        help='carry over the last decoder state to the initial state in the next utterance')
    # MBR
    parser.add_argument('--mbr_training', type=strtobool, default=False,
                        help='Minimum Bayes Risk (MBR) training')
    parser.add_argument('--mbr_ce_weight', type=float, default=0.0,
                        help='MBR loss weight for the main task')
    parser.add_argument('--mbr_nbest', type=int, default=4,
                        help='N-best for MBR training')
    parser.add_argument('--mbr_softmax_smoothing', type=float, default=0.8,
                        help='softmax smoothing (beta) for MBR training')
    # TransformerXL
    parser.add_argument('--bptt', type=int, default=0,
                        help='number of tokens to truncate in TransformerXL decoder during training')
    parser.add_argument('--mem_len', type=int, default=0,
                        help='number of tokens for memory in TransformerXL decoder during training')
    # distillation related
    parser.add_argument('--teacher', default=False, nargs='?',
                        help='Teacher ASR model for knowledge distillation')
    parser.add_argument('--teacher_lm', default=False, nargs='?',
                        help='Teacher LM for knowledge distillation')
    parser.add_argument('--distillation_weight', type=float, default=0.1,
                        help='soft label weight for knowledge distillation')
    # special label
    parser.add_argument('--replace_sos', type=strtobool, default=False,
                        help='')
    # decoding parameters
    parser.add_argument('--recog_stdout', type=strtobool, default=False,
                        help='print to standard output during evaluation')
    parser.add_argument('--recog_n_gpus', type=int, default=0,
                        help='number of GPUs (0 indicates CPU)')
    parser.add_argument('--recog_sets', type=str, default=[], nargs='+',
                        help='tsv file paths for the evaluation sets')
    parser.add_argument('--recog_word_alignments', type=str, default=[], nargs='+',
                        help='word alignment directory paths for the evaluation sets')
    parser.add_argument('--recog_first_n_utt', type=int, default=-1,
                        help='recognize the first N utterances for quick evaluation')
    parser.add_argument('--recog_model', type=str, default=False, nargs='+',
                        help='model path')
    parser.add_argument('--recog_model_bwd', type=str, default=False, nargs='?',
                        help='model path in the reverse direction')
    parser.add_argument('--recog_dir', type=str, default=False,
                        help='directory to save decoding results')
    parser.add_argument('--recog_unit', type=str, default=False, nargs='?',
                        choices=['word', 'wp', 'char', 'phone', 'word_char', 'char_space'],
                        help='')
    parser.add_argument('--recog_metric', type=str, default='edit_distance',
                        choices=['edit_distance', 'loss', 'accuracy', 'ppl', 'bleu'],
                        help='metric for evaluation')
    parser.add_argument('--recog_oracle', type=strtobool, default=False,
                        help='recognize by teacher-forcing')
    parser.add_argument('--recog_batch_size', type=int, default=1,
                        help='size of mini-batch in evaluation')
    parser.add_argument('--recog_beam_width', type=int, default=1,
                        help='size of beam')
    parser.add_argument('--recog_max_len_ratio', type=float, default=1.0,
                        help='')
    parser.add_argument('--recog_min_len_ratio', type=float, default=0.0,
                        help='')
    parser.add_argument('--recog_length_penalty', type=float, default=0.0,
                        help='length penalty')
    parser.add_argument('--recog_length_norm', type=strtobool, default=False, nargs='?',
                        help='normalize score by hypothesis length')
    parser.add_argument('--recog_coverage_penalty', type=float, default=0.0,
                        help='coverage penalty')
    parser.add_argument('--recog_coverage_threshold', type=float, default=0.0,
                        help='coverage threshold')
    parser.add_argument('--recog_gnmt_decoding', type=strtobool, default=False, nargs='?',
                        help='adopt Google NMT beam search decoding')
    parser.add_argument('--recog_eos_threshold', type=float, default=1.5,
                        help='threshold for emitting a EOS token')
    parser.add_argument('--recog_lm_weight', type=float, default=0.0,
                        help='weight of first-path LM score')
    parser.add_argument('--recog_lm_second_weight', type=float, default=0.0,
                        help='weight of second-path LM score')
    parser.add_argument('--recog_lm_bwd_weight', type=float, default=0.0,
                        help='weight of second-path backward LM score. \
                                  First-pass backward LM in case of synchronous bidirectional decoding.')
    parser.add_argument('--recog_cache_embedding', type=strtobool, default=True,
                        help='cache token emebdding')
    parser.add_argument('--recog_ctc_weight', type=float, default=0.0,
                        help='weight of CTC score')
    parser.add_argument('--recog_lm', type=str, default=False, nargs='?',
                        help='path to first path LM for shallow fusion')
    parser.add_argument('--recog_lm_second', type=str, default=False, nargs='?',
                        help='path to second path LM for rescoring')
    parser.add_argument('--recog_lm_bwd', type=str, default=False, nargs='?',
                        help='path to second path LM in the reverse direction for rescoring')
    parser.add_argument('--recog_resolving_unk', type=strtobool, default=False,
                        help='resolving UNK for the word-based model')
    parser.add_argument('--recog_fwd_bwd_attention', type=strtobool, default=False,
                        help='forward-backward attention decoding')
    parser.add_argument('--recog_bwd_attention', type=strtobool, default=False,
                        help='backward attention decoding')
    parser.add_argument('--recog_reverse_lm_rescoring', type=strtobool, default=False,
                        help='rescore with another LM in the reverse direction')
    parser.add_argument('--recog_asr_state_carry_over', type=strtobool, default=False,
                        help='carry over ASR decoder state')
    parser.add_argument('--recog_lm_state_carry_over', type=strtobool, default=False,
                        help='carry over LM state')
    parser.add_argument('--recog_softmax_smoothing', type=float, default=1.0,
                        help='softmax smoothing (beta) for diverse hypothesis generation')
    parser.add_argument('--recog_wordlm', type=strtobool, default=False,
                        help='')
    parser.add_argument('--recog_n_average', type=int, default=1,
                        help='number of models for the model averaging of Transformer')
    parser.add_argument('--recog_streaming', type=strtobool, default=False,
                        help='streaming decoding (both encoding and decoding are streaming)')
    parser.add_argument('--recog_streaming_encoding', type=strtobool, default=False,
                        help='streaming encoding (decoding is offline)')
    parser.add_argument('--recog_block_sync', type=strtobool, default=False,
                        help='block-synchronous streaming beam search decoding')
    parser.add_argument('--recog_block_sync_size', type=int, default=40,
                        help='block size in block-synchronous streaming beam search decoding')
    parser.add_argument('--recog_ctc_spike_forced_decoding', type=strtobool, default=False,
                        help='force MoChA to generate tokens corresponding to CTC spikes')
    parser.add_argument('--recog_ctc_vad', type=strtobool, default=True,
                        help='')
    parser.add_argument('--recog_ctc_vad_blank_threshold', type=int, default=40,
                        help='')
    parser.add_argument('--recog_ctc_vad_spike_threshold', type=float, default=0.1,
                        help='')
    parser.add_argument('--recog_ctc_vad_n_accum_frames', type=int, default=4000,
                        help='')
    parser.add_argument('--recog_mma_delay_threshold', type=int, default=-1,
                        help='delay threshold for MMA decoder')
    parser.add_argument('--recog_mem_len', type=int, default=0,
                        help='number of tokens for memory in TransformerXL decoder during evaluation')
    return parser
Example #11
0
def main():
    """main function
    """

    default_parser = default_argparser(CFG_DIR, with_readunits=True)
    parser = configargparse.ArgumentParser(description=__doc__.format(
        PIPELINE_NAME=PIPELINE_NAME, PIPELINE_VERSION=get_pipeline_version()),
                                           parents=[default_parser])

    parser._optionals.title = "Arguments"
    # pipeline specific args
    parser.add_argument("--cuffdiff",
                        action='store_true',
                        dest="run_cuffdiff",
                        help="Also run cuffdiff")
    choices = ["none", "forward", "reverse"]
    default = "none"
    parser.add_argument(
        '--stranded',
        choices=choices,
        default=default,
        help=
        "Stranded library prep (default is {}; Following RSEM definition but see also"
        " http://chipster.csc.fi/manual/library-type-summary.html)".format(
            default))
    parser.add_argument(
        '--rsem-estimate-rspd',
        action='store_true',
        help="Estimate read start position distribution in RSEM")
    args = parser.parse_args()

    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no logging at all
    logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose)
    aux_logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose)

    if os.path.exists(args.outdir):
        logger.fatal("Output directory %s already exists", args.outdir)
        sys.exit(1)

    # samples is a dictionary with sample names as key (mostly just
    # one) and readunit keys as value. readunits is a dict with
    # readunits (think: fastq pairs with attributes) as value
    if args.sample_cfg:
        if any([args.fq1, args.fq2, args.sample]):
            logger.fatal(
                "Config file overrides fastq and sample input arguments."
                " Use one or the other")
            sys.exit(1)
        if not os.path.exists(args.sample_cfg):
            logger.fatal("Config file %s does not exist", args.sample_cfg)
            sys.exit(1)
        samples, readunits = get_samples_and_readunits_from_cfgfile(
            args.sample_cfg)
    else:
        if not all([args.fq1, args.sample]):
            logger.fatal("Need at least fq1 and sample without config file")
            sys.exit(1)

        readunits = get_readunits_from_args(args.fq1, args.fq2)
        # all readunits go into this one sample specified on the command-line
        samples = dict()
        samples[args.sample] = list(readunits.keys())

    # FIXME add checks on reffa index (currently not exposed via args)

    # turn arguments into cfg_dict that gets merged into pipeline config
    #
    cfg_dict = dict()
    cfg_dict['readunits'] = readunits
    cfg_dict['samples'] = samples
    cfg_dict['rsem_extra_args'] = ''
    if args.rsem_estimate_rspd:
        cfg_dict['rsem_extra_args'] += ' --estimate-rspd'
    cfg_dict['stranded'] = args.stranded
    cfg_dict['run_cuffdiff'] = args.run_cuffdiff
    cfg_dict['paired_end'] = any(ru.get('fq2') for ru in readunits.values())
    if cfg_dict['paired_end']:
        assert all(ru.get('fq2') for ru in readunits.values()), (
            "Can't handle mix of paired-end and single-end")

    pipeline_handler = PipelineHandler(
        PIPELINE_NAME,
        PIPELINE_BASEDIR,
        args,
        cfg_dict,
        cluster_cfgfile=get_cluster_cfgfile(CFG_DIR))
    pipeline_handler.setup_env()
    pipeline_handler.submit(args.no_run)
Example #12
0
def get_trainer_parser() -> configargparse.ArgumentParser:

    parser = configargparse.ArgumentParser(description='Trainer config parser.')
    init_base_arguments(parser)

    parser.add_argument('--trainer_config_file', required=False, is_config_file=True, help='Trainer config file path.')

    parser.add_argument('--dump_dir', type=Path, default='../results', help='Dump path.')
    parser.add_argument('--experiment_name', type=str, required=True, help='Experiment name.')

    parser.add_argument('--last', type=cast2(str), default=None, help='Restored checkpoint.')

    parser.add_argument('--seed', type=cast2(int), default=None, help='Seed for random state.')

    parser.add_argument('--n_epochs', type=int, default=10, help='Number of epochs.')

    parser.add_argument('--train_batch_size', type=int, default=128, help='Number of items in batch.')
    parser.add_argument('--test_batch_size', type=int, default=16, help='Number of items in batch.')
    parser.add_argument('--batch_split', type=int, default=1,
                        help='Batch will be split into this number of chunks during training.')

    parser.add_argument('--lr', type=float, default=1e-5, help='Learning rate for optimizer.')
    parser.add_argument('--weight_decay', type=float, default=0.01, help='Weight decay for optimizer.')

    parser.add_argument('--clear_processed', action='store_true', help='Clear previous processed dataset.')

    parser.add_argument('--w_start', type=float, default=1, help='Weight of start position classification.')
    parser.add_argument('--w_end', type=float, default=1, help='Weight of end position classification.')
    parser.add_argument('--w_start_reg', type=float, default=0, help='Weight of start position regression loss.')
    parser.add_argument('--w_end_reg', type=float, default=0, help='Weight of end position regression loss.')
    parser.add_argument('--w_cls', type=float, default=1, help='Weight of doc label classification.')

    parser.add_argument('--loss', type=str, default='ce', choices=['ce', 'focal', 'smooth'],
                        help='Type of doc label classification loss')

    parser.add_argument('--smooth_alpha', type=float, default=0.01, help='Smooth CE loss parameter.')

    parser.add_argument('--focal_alpha', type=float, default=1, help='Focal loss parameter.')
    parser.add_argument('--focal_gamma', type=float, default=2, help='Focal loss parameter.')

    parser.add_argument('--max_grad_norm', type=float, default=1, help='Max norm of the gradients')
    parser.add_argument('--sync_bn', action='store_true',
                        help='Synchronize batch norm parameters during distributed training.')

    parser.add_argument('--warmup_coef', type=float, default=0.05, help='Warmup coefficient.')

    parser.add_argument('--apex_level', type=cast2(str), choices=[None, 'O0', 'O1', 'O2', 'O3'],
                        default=None, help='Apex optimization level.')
    parser.add_argument('--apex_verbosity', type=int, default=1, help='Apex output verbosity.')
    parser.add_argument('--apex_loss_scale', type=cast2(float), default=None, help='Apex loss scale coef.')

    parser.add_argument('--drop_optimizer', action='store_true',
                        help='Not restore optimizer and scheduler from checkpoint.')

    parser.add_argument('--debug', action='store_true', help='Debug mode.')
    parser.add_argument('--dummy_dataset', action='store_true', help='Use generated dataset instead real data.')

    parser.add_argument('--local_rank', type=int, default=-1,
                        help='Local rank of process during distributed training. '
                             'To run distributed training on single node, set this parameter equals to 0.')
    parser.add_argument('--dist_backend', type=str, default='nccl', choices=['nccl'],
                        help='Distibuted training backend.')
    parser.add_argument('--dist_init_method', type=str, default='tcp://127.0.0.1:9080',
                        help='Ditributed training init method. Set master process host name.')
    parser.add_argument('--dist_world_size', type=int, default=1, help='Number of machines are used during training. '
                                                                       'Can be changed during training.')

    parser.add_argument('--best_metric', choices=['map'], type=str, default='map', help='Best metric name.')
    parser.add_argument('--best_order', choices=['>', '<'], type=str, default='>', help='Best metric order.')

    parser.add_argument('--finetune', action='store_true', help='Turn on finetune mode.')
    parser.add_argument('--finetune_transformer', action='store_true', help='Finetune transformer module.')
    parser.add_argument('--finetune_position', action='store_true', help='Finetune classification head.')
    parser.add_argument('--finetune_position_reg', action='store_true', help='Finetune regression head.')
    parser.add_argument('--finetune_class', action='store_true', help='Finetune doc label classification head.')

    parser.add_argument('--bpe_dropout', type=cast2(float), default=None, help='Use BPE dropout.')

    parser.add_argument('--optimizer', type=str, default='adam', choices=['adam', 'adamod'], help='Optimizer name.')

    parser.add_argument('--train_label_weights', action='store_true', help='Use label weights in CE loss.')
    parser.add_argument('--train_sampler_weights', action='store_true', help='Use oversampling.')

    parser.add_argument('--log_file', type=str, default=None, help='This parameter is ignored. After dump will '
                                                                   'consist path to log file.')

    return parser
import pandas as pd
import os
import configargparse as argparse
import copy
from prediction_utils.pytorch_utils.metrics import (
    StandardEvaluator,
    FairOVAEvaluator,
    CalibrationEvaluator,
)

from .train_model import parser as parent_parser
from .train_model import read_file, filter_cohort

parser = argparse.ArgumentParser(
    parents=[parent_parser],
    conflict_handler="resolve",
    config_file_parser_class=argparse.YAMLConfigFileParser,
)

parser.add_argument(
    "--output_df_filename",
    type=str,
    default="output_df.parquet",
)

if __name__ == "__main__":
    args = parser.parse_args()
    config_dict = copy.deepcopy(args.__dict__)

    cohort = read_file(args.cohort_path)
    cohort = filter_cohort(cohort)
Example #14
0
def generate_parser():
    parser = configargparse.ArgumentParser()
    group_logging = parser.add_argument_group("logging")
    group_reporting = parser.add_argument_group("reporting")
    group_limits = parser.add_argument_group("limits")
    group_networking = parser.add_argument_group("networking")
    group_fs = parser.add_argument_group("filesystem")
    group_clock = parser.add_argument_group("clock")
    parser.add("-c", "--config", is_config_file=True, help="config file path")
    group_fs.add_argument(
        "--virtual_filename",
        type=str,
        default=None,
        help="Emulated filename (if different from real filename).",
    )
    group_fs.add_argument(
        "--virtual_path",
        type=str,
        default=None,
        help="Emulated file path (optional). "
        "(default: '/home/admin/zelos_dir/').",
    )
    group_logging.add_argument(
        "-v",
        "--verbosity",
        action="count",
        default=0,
        help="Increase output verbosity. Enables instruction-level tracing.",
    )
    group_logging.add_argument(
        "--log",
        type=str,
        default="info",
        help="Decide what level of logging should be used. LOG is "
        "'info', 'verbose', 'debug', 'spam', 'notice', 'warning', 'success', "
        "'error', or 'fatal'. Note that this does not affect "
        "verbosity. (default: 'info')",
    )
    group_networking.add_argument(
        "--dns",
        action="count",
        default=0,
        help="Simulate DNS response for all domains (resolve to 127.0.0.1)",
    )
    group_logging.add_argument(
        "--fasttrace",
        action="count",
        default=0,
        help="Enable instruction-level tracing only the first time a memory "
        "address is reached.",
    )
    group_limits.add_argument(
        "-t",
        "--timeout",
        type=int,
        default=0,
        help="If specified, execution will end after TIMEOUT seconds have "
        "passed.",
    )
    group_limits.add_argument(
        "-m",
        "--memlimit",
        type=int,
        default=0,
        help="Limits memory allocation to MEMLIMIT total mb.",
    )
    group_logging.add_argument(
        "--traceon",
        type=str,
        default="",
        help="[Experimental] Enable verbose tracing after specified address "
        "or API name.",
    )
    group_logging.add_argument(
        "--traceoff",
        type=str,
        default="",
        help="[Experimental] Disable verbose tracing after "
        "specified address or API name.",
    )
    group_logging.add_argument(
        "--tracethread",
        type=str,
        default="",
        help="[Experimental] Enable verbose tracing on a single thread.",
    )
    group_logging.add_argument(
        "--writetrace",
        type=str,
        default="",
        help="Print a message every time a value at the given memory "
        "location is written.",
    )
    group_clock.add_argument(
        "--date",
        type=str,
        default="2019-02-02",
        help="Emulated system date. Format: YYYY-MM-DD. "
        "(default: '2019-02-02')",
    )
    parser.add_argument(
        "--startat",
        type=str,
        default=None,
        help="[Experimental] Start execution at the given hex address.",
    )
    parser.add_argument(
        "--disableNX",
        action="store_true",
        help="Disable the no-execute bit. All memory becomes executable.",
    )
    group_reporting.add_argument(
        "--strace",
        type=str,
        default=None,
        help="Writes the system call trace to the specified output file.",
    )
    group_logging.add_argument(
        "--log_exports",
        action="store_true",
        help="Enable logging of calls to exported functions. (default: off)",
    )
    group_logging.add_argument(
        "--no_log_syscalls",
        dest="log_syscalls",
        action="store_false",
        help="Disable logging of syscalls.",
    )
    group_fs.add_argument(
        "--mount",
        action="append",
        default=[],
        help="[Experimental] Mount the specified file or path into the "
        "emulated root filesystem. Format: '--mount ARCH,DEST,"
        "SRC'. ARCH is 'x86', 'x86-64', 'arm', or 'mips'. "
        "DEST is the emulated path to mount. SRC is the absolute host path to "
        "the file or directory to mount. Can be specified multiple times to "
        "mount multiple files.",
    )
    group_fs.add_argument(
        "--env_vars",
        action="append",
        default=[],
        help="Emulated environment variables. ENV_VARS is a comma separated "
        "key value pair. Can be specified multiple times to set multiple "
        "environment variables. Format: '--env_vars FOO:bar --env_vars "
        "ZERO:point'.",
    )

    path = os.environ.get("ZELOS_PLUGIN_DIR", None)
    paths = path.split(",") if path is not None else []
    _ = PluginCommands(paths, parser)

    parser.add_argument("filename", type=str, help="Executable to emulate")
    parser.add_argument(
        "cmdline_args", type=str, nargs="*", help="Arguments to the executable"
    )
    return parser
Example #15
0
def config_parser():

    import configargparse
    parser = configargparse.ArgumentParser()
    parser.add_argument('--config',
                        is_config_file=True,
                        help='config file path')
    parser.add_argument("--expname", type=str, help='experiment name')
    parser.add_argument("--basedir",
                        type=str,
                        default='./logs/',
                        help='where to store ckpts and logs')
    parser.add_argument("--datadir",
                        type=str,
                        default='./data/llff/fern',
                        help='input data directory')
    parser.add_argument("--render_lockcam_slowmo",
                        action='store_true',
                        help='render fixed view + slowmo')
    parser.add_argument("--render_slowmo_bt",
                        action='store_true',
                        help='render space-time interpolation')

    parser.add_argument("--final_height",
                        type=int,
                        default=288,
                        help='training image height, default is 512x288')
    # training options
    parser.add_argument("--netdepth",
                        type=int,
                        default=8,
                        help='layers in network')
    parser.add_argument("--netwidth",
                        type=int,
                        default=256,
                        help='channels per layer')
    parser.add_argument("--netdepth_fine",
                        type=int,
                        default=8,
                        help='layers in fine network')
    parser.add_argument("--netwidth_fine",
                        type=int,
                        default=256,
                        help='channels per layer in fine network')
    parser.add_argument(
        "--N_rand",
        type=int,
        default=32 * 32 * 4,
        help='batch size (number of random rays per gradient step)')
    parser.add_argument("--lrate",
                        type=float,
                        default=5e-4,
                        help='learning rate')
    parser.add_argument("--lrate_decay",
                        type=int,
                        default=300,
                        help='exponential learning rate decay (in 1000 steps)')
    parser.add_argument(
        "--chunk",
        type=int,
        default=1024 * 128,
        help=
        'number of rays processed in parallel, decrease if running out of memory'
    )
    parser.add_argument(
        "--netchunk",
        type=int,
        default=1024 * 128,
        help=
        'number of pts sent through network in parallel, decrease if running out of memory'
    )
    parser.add_argument("--no_batching",
                        action='store_true',
                        help='only take random rays from 1 image at a time')
    parser.add_argument("--no_reload",
                        action='store_true',
                        help='do not reload weights from saved ckpt')
    parser.add_argument(
        "--ft_path",
        type=str,
        default=None,
        help='specific weights npy file to reload for coarse network')

    # rendering options
    parser.add_argument("--N_samples",
                        type=int,
                        default=64,
                        help='number of coarse samples per ray')
    parser.add_argument("--N_importance",
                        type=int,
                        default=0,
                        help='number of additional fine samples per ray')
    parser.add_argument("--perturb",
                        type=float,
                        default=1.,
                        help='set to 0. for no jitter, 1. for jitter')
    parser.add_argument("--use_viewdirs",
                        action='store_true',
                        help='use full 5D input instead of 3D')
    parser.add_argument(
        "--i_embed",
        type=int,
        default=0,
        help='set 0 for default positional encoding, -1 for none')
    parser.add_argument(
        "--multires",
        type=int,
        default=10,
        help='log2 of max freq for positional encoding (3D location)')
    parser.add_argument(
        "--multires_views",
        type=int,
        default=4,
        help='log2 of max freq for positional encoding (2D direction)')
    parser.add_argument(
        "--raw_noise_std",
        type=float,
        default=0.,
        help=
        'std dev of noise added to regularize sigma_a output, 1e0 recommended')

    parser.add_argument("--render_bt",
                        action='store_true',
                        help='render bullet time')

    parser.add_argument(
        "--render_test",
        action='store_true',
        help='do not optimize, reload weights and render out render_poses path'
    )
    parser.add_argument(
        "--render_factor",
        type=int,
        default=0,
        help=
        'downsampling factor to speed up rendering, set 4 or 8 for fast preview'
    )

    # dataset options
    parser.add_argument("--dataset_type",
                        type=str,
                        default='llff',
                        help='options: llff / blender / deepvoxels')
    parser.add_argument(
        "--testskip",
        type=int,
        default=8,
        help=
        'will load 1/N images from test/val sets, useful for large datasets like deepvoxels'
    )
    ## blender flags
    parser.add_argument(
        "--white_bkgd",
        action='store_true',
        help=
        'set to render synthetic data on a white bkgd (always use for dvoxels)'
    )

    ## llff flags
    parser.add_argument("--factor",
                        type=int,
                        default=8,
                        help='downsample factor for LLFF images')
    parser.add_argument(
        "--no_ndc",
        action='store_true',
        help=
        'do not use normalized device coordinates (set for non-forward facing scenes)'
    )
    parser.add_argument(
        "--lindisp",
        action='store_true',
        help='sampling linearly in disparity rather than depth')
    parser.add_argument("--spherify",
                        action='store_true',
                        help='set for spherical 360 scenes')
    parser.add_argument(
        "--llffhold",
        type=int,
        default=8,
        help='will take every 1/N images as LLFF test set, paper uses 8')

    parser.add_argument("--target_idx",
                        type=int,
                        default=10,
                        help='target_idx')
    parser.add_argument("--num_extra_sample",
                        type=int,
                        default=512,
                        help='num_extra_sample')
    parser.add_argument("--decay_depth_w",
                        action='store_true',
                        help='decay depth weights')
    parser.add_argument(
        "--use_motion_mask",
        action='store_true',
        help=
        'use motion segmentation mask for hard-mining data-driven initialization'
    )
    parser.add_argument("--decay_optical_flow_w",
                        action='store_true',
                        help='decay optical flow weights')

    parser.add_argument("--w_depth",
                        type=float,
                        default=0.04,
                        help='weights of depth loss')
    parser.add_argument("--w_optical_flow",
                        type=float,
                        default=0.02,
                        help='weights of optical flow loss')
    parser.add_argument("--w_sm",
                        type=float,
                        default=0.1,
                        help='weights of scene flow smoothness')
    parser.add_argument("--w_sf_reg",
                        type=float,
                        default=0.1,
                        help='weights of scene flow regularization')
    parser.add_argument("--w_cycle",
                        type=float,
                        default=0.1,
                        help='weights of cycle consistency')
    parser.add_argument("--w_prob_reg",
                        type=float,
                        default=0.1,
                        help='weights of disocculusion weights')

    parser.add_argument("--w_entropy",
                        type=float,
                        default=1e-3,
                        help='w_entropy regularization weight')

    parser.add_argument("--decay_iteration",
                        type=int,
                        default=50,
                        help='data driven priors decay iteration * 1000')

    parser.add_argument("--chain_sf",
                        action='store_true',
                        help='5 frame consistency if true, \
                             otherwise 3 frame consistency')

    parser.add_argument("--start_frame", type=int, default=0)
    parser.add_argument("--end_frame", type=int, default=50)

    # logging/saving options
    parser.add_argument("--i_print",
                        type=int,
                        default=1000,
                        help='frequency of console printout and metric loggin')
    parser.add_argument("--i_img",
                        type=int,
                        default=1000,
                        help='frequency of tensorboard image logging')
    parser.add_argument("--i_weights",
                        type=int,
                        default=10000,
                        help='frequency of weight ckpt saving')

    return parser
Example #16
0
def get_parser():
    """Get parser."""
    parser = configargparse.ArgumentParser(
        description='Train a new language model on one CPU or one GPU',
        config_file_parser_class=configargparse.YAMLConfigFileParser,
        formatter_class=configargparse.ArgumentDefaultsHelpFormatter)
    # general configuration
    parser.add('--config', is_config_file=True, help='config file path')
    parser.add(
        '--config2',
        is_config_file=True,
        help=
        'second config file path that overwrites the settings in `--config`.')
    parser.add(
        '--config3',
        is_config_file=True,
        help=
        'third config file path that overwrites the settings in `--config` and `--config2`.'
    )

    parser.add_argument(
        '--ngpu',
        default=None,
        type=int,
        help='Number of GPUs. If not given, use all visible devices')
    parser.add_argument('--backend',
                        default='chainer',
                        type=str,
                        choices=['chainer', 'pytorch'],
                        help='Backend library')
    parser.add_argument('--outdir',
                        type=str,
                        required=True,
                        help='Output directory')
    parser.add_argument('--debugmode', default=1, type=int, help='Debugmode')
    parser.add_argument('--dict', type=str, required=True, help='Dictionary')
    parser.add_argument('--seed', default=1, type=int, help='Random seed')
    parser.add_argument('--resume',
                        '-r',
                        default='',
                        nargs='?',
                        help='Resume the training from snapshot')
    parser.add_argument('--verbose',
                        '-V',
                        default=0,
                        type=int,
                        help='Verbose option')
    parser.add_argument('--tensorboard-dir',
                        default=None,
                        type=str,
                        nargs='?',
                        help="Tensorboard log dir path")
    parser.add_argument('--report-interval-iters',
                        default=100,
                        type=int,
                        help="Report interval iterations")
    # task related
    parser.add_argument('--train-label',
                        type=str,
                        required=True,
                        help='Filename of train label data')
    parser.add_argument('--valid-label',
                        type=str,
                        required=True,
                        help='Filename of validation label data')
    parser.add_argument('--test-label',
                        type=str,
                        help='Filename of test label data')
    parser.add_argument('--dump-hdf5-path',
                        type=str,
                        default=None,
                        help='Path to dump a preprocessed dataset as hdf5')
    # training configuration
    parser.add_argument('--opt',
                        default='sgd',
                        type=str,
                        choices=['sgd', 'adam'],
                        help='Optimizer')
    parser.add_argument(
        '--sortagrad',
        default=0,
        type=int,
        nargs='?',
        help=
        "How many epochs to use sortagrad for. 0 = deactivated, -1 = all epochs"
    )
    parser.add_argument('--batchsize',
                        '-b',
                        type=int,
                        default=300,
                        help='Number of examples in each mini-batch')
    parser.add_argument('--epoch',
                        '-e',
                        type=int,
                        default=20,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument(
        '--early-stop-criterion',
        default='validation/main/loss',
        type=str,
        nargs='?',
        help="Value to monitor to trigger an early stopping of the training")
    parser.add_argument(
        '--patience',
        default=3,
        type=int,
        nargs='?',
        help=
        "Number of epochs to wait without improvement before stopping the training"
    )
    parser.add_argument('--gradclip',
                        '-c',
                        type=float,
                        default=5,
                        help='Gradient norm threshold to clip')
    parser.add_argument(
        '--maxlen',
        type=int,
        default=40,
        help='Batch size is reduced if the input sequence > ML')
    parser.add_argument(
        '--model-module',
        type=str,
        default='default',
        help=
        'model defined module (default: espnet.nets.xxx_backend.lm.default:DefaultRNNLM)'
    )
    return parser
Example #17
0
def get_parser():
    """Get parser of training arguments."""
    parser = configargparse.ArgumentParser(
        description=
        'Train a new text-to-speech (TTS) model on one CPU, one or multiple GPUs',
        config_file_parser_class=configargparse.YAMLConfigFileParser,
        formatter_class=configargparse.ArgumentDefaultsHelpFormatter)

    # general configuration
    parser.add('--config', is_config_file=True, help='config file path')
    parser.add(
        '--config2',
        is_config_file=True,
        help=
        'second config file path that overwrites the settings in `--config`.')
    parser.add(
        '--config3',
        is_config_file=True,
        help=
        'third config file path that overwrites the settings in `--config` and `--config2`.'
    )

    parser.add_argument(
        '--ngpu',
        default=None,
        type=int,
        help='Number of GPUs. If not given, use all visible devices')
    parser.add_argument('--backend',
                        default='pytorch',
                        type=str,
                        choices=['chainer', 'pytorch'],
                        help='Backend library')
    parser.add_argument('--outdir',
                        type=str,
                        required=True,
                        help='Output directory')
    parser.add_argument('--debugmode', default=1, type=int, help='Debugmode')
    parser.add_argument('--seed', default=1, type=int, help='Random seed')
    parser.add_argument('--resume',
                        '-r',
                        default='',
                        type=str,
                        nargs='?',
                        help='Resume the training from snapshot')
    parser.add_argument('--minibatches',
                        '-N',
                        type=int,
                        default='-1',
                        help='Process only N minibatches (for debug)')
    parser.add_argument('--verbose',
                        '-V',
                        default=0,
                        type=int,
                        help='Verbose option')
    parser.add_argument('--tensorboard-dir',
                        default=None,
                        type=str,
                        nargs='?',
                        help="Tensorboard log directory path")
    parser.add_argument('--save-interval-epochs',
                        default=1,
                        type=int,
                        help="Save interval epochs")
    parser.add_argument('--report-interval-iters',
                        default=100,
                        type=int,
                        help="Report interval iterations")
    # task related
    parser.add_argument('--train-json',
                        type=str,
                        required=True,
                        help='Filename of training json')
    parser.add_argument('--valid-json',
                        type=str,
                        required=True,
                        help='Filename of validation json')
    # network architecture
    parser.add_argument(
        '--model-module',
        type=str,
        default="espnet.nets.pytorch_backend.e2e_tts_tacotron2:Tacotron2",
        help='model defined module')
    # minibatch related
    parser.add_argument(
        '--sortagrad',
        default=0,
        type=int,
        nargs='?',
        help=
        "How many epochs to use sortagrad for. 0 = deactivated, -1 = all epochs"
    )
    parser.add_argument(
        '--batch-sort-key',
        default='shuffle',
        type=str,
        choices=['shuffle', 'output', 'input'],
        nargs='?',
        help='Batch sorting key. "shuffle" only work with --batch-count "seq".'
    )
    parser.add_argument(
        '--batch-count',
        default='auto',
        choices=BATCH_COUNT_CHOICES,
        help=
        'How to count batch_size. The default (auto) will find how to count by args.'
    )
    parser.add_argument('--batch-size',
                        '--batch-seqs',
                        '-b',
                        default=0,
                        type=int,
                        help='Maximum seqs in a minibatch (0 to disable)')
    parser.add_argument('--batch-bins',
                        default=0,
                        type=int,
                        help='Maximum bins in a minibatch (0 to disable)')
    parser.add_argument(
        '--batch-frames-in',
        default=0,
        type=int,
        help='Maximum input frames in a minibatch (0 to disable)')
    parser.add_argument(
        '--batch-frames-out',
        default=0,
        type=int,
        help='Maximum output frames in a minibatch (0 to disable)')
    parser.add_argument(
        '--batch-frames-inout',
        default=0,
        type=int,
        help='Maximum input+output frames in a minibatch (0 to disable)')
    parser.add_argument(
        '--maxlen-in',
        '--batch-seq-maxlen-in',
        default=100,
        type=int,
        metavar='ML',
        help=
        'When --batch-count=seq, batch size is reduced if the input sequence length > ML.'
    )
    parser.add_argument(
        '--maxlen-out',
        '--batch-seq-maxlen-out',
        default=200,
        type=int,
        metavar='ML',
        help=
        'When --batch-count=seq, batch size is reduced if the output sequence length > ML'
    )
    parser.add_argument('--num-iter-processes',
                        default=0,
                        type=int,
                        help='Number of processes of iterator')
    parser.add_argument('--preprocess-conf',
                        type=str,
                        default=None,
                        help='The configuration file for the pre-processing')
    parser.add_argument('--use-speaker-embedding',
                        default=False,
                        type=strtobool,
                        help='Whether to use speaker embedding')
    parser.add_argument('--train-spkid-extractor',
                        default=False,
                        type=strtobool,
                        help='Whether to train speaker id network')
    parser.add_argument(
        "--train-spk-embed-dim",
        default=None,
        type=int,
        help=
        "Number of speaker embedding dimensions in training speaker id network module"
    )
    parser.add_argument('--use-second-target',
                        default=False,
                        type=strtobool,
                        help='Whether to use second target')
    # optimization related
    parser.add_argument('--opt',
                        default='adam',
                        type=str,
                        choices=['adam', 'noam'],
                        help='Optimizer')
    parser.add_argument('--accum-grad',
                        default=1,
                        type=int,
                        help='Number of gradient accumuration')
    parser.add_argument('--lr',
                        default=1e-3,
                        type=float,
                        help='Learning rate for optimizer')
    parser.add_argument('--eps',
                        default=1e-6,
                        type=float,
                        help='Epsilon for optimizer')
    parser.add_argument('--weight-decay',
                        default=1e-6,
                        type=float,
                        help='Weight decay coefficient for optimizer')
    parser.add_argument('--epochs',
                        '-e',
                        default=30,
                        type=int,
                        help='Number of maximum epochs')
    parser.add_argument(
        '--early-stop-criterion',
        default='validation/main/loss',
        type=str,
        nargs='?',
        help="Value to monitor to trigger an early stopping of the training")
    parser.add_argument(
        '--patience',
        default=3,
        type=int,
        nargs='?',
        help=
        "Number of epochs to wait without improvement before stopping the training"
    )
    parser.add_argument('--grad-clip',
                        default=1,
                        type=float,
                        help='Gradient norm threshold to clip')
    parser.add_argument('--num-save-attention',
                        default=5,
                        type=int,
                        help='Number of samples of attention to be saved')
    parser.add_argument('--keep-all-data-on-mem',
                        default=False,
                        type=strtobool,
                        help='Whether to keep all data on memory')

    return parser
Example #18
0
import torch
import numpy as np
import configargparse
import resnext
import time
import sys
from torch import optim as optim
import torch.nn as nn
from apex import amp

if __name__ == "__main__":
    parser = configargparse.ArgumentParser(default_config_files=[""],
                                           auto_env_var_prefix="veesion_")

    "----------------------------- Modality -----------------------------"

    parser.add_argument("-d", "--device", type=int, default=0)
    parser.add_argument("-w", "--window", type=int, default=10)
    parser.add_argument("-bs", "--batch_size", type=int, default=20)
    parser.add_argument("-ss", "--sample_size", type=int, default=224)
    parser.add_argument("-sd", "--sample_duration", type=int, default=35)
    parser.add_argument("-ag", "--all_gpus", action="store_true")
    parser.add_argument("-t", "--transfer", action="store_true")
    parser.add_argument("-bt", "--bench_train", action="store_true")
    parser.add_argument(
        "-hm",
        "--half_mode",
        type=int,
        default=0,
        help=
        "Should we use FP32 (0), or dumb half mode (1) or apex half mode (2)",
Example #19
0
def get_parser():
    """Get parser of decoding arguments."""
    parser = configargparse.ArgumentParser(
        description='Synthesize speech from text using a TTS model on one CPU',
        config_file_parser_class=configargparse.YAMLConfigFileParser,
        formatter_class=configargparse.ArgumentDefaultsHelpFormatter)
    # general configuration
    parser.add('--config', is_config_file=True, help='config file path')
    parser.add(
        '--config2',
        is_config_file=True,
        help=
        'second config file path that overwrites the settings in `--config`.')
    parser.add(
        '--config3',
        is_config_file=True,
        help=
        'third config file path that overwrites the settings in `--config` and `--config2`.'
    )

    parser.add_argument('--ngpu', default=0, type=int, help='Number of GPUs')
    parser.add_argument('--backend',
                        default='pytorch',
                        type=str,
                        choices=['chainer', 'pytorch'],
                        help='Backend library')
    parser.add_argument('--debugmode', default=1, type=int, help='Debugmode')
    parser.add_argument('--seed', default=1, type=int, help='Random seed')
    parser.add_argument('--out',
                        type=str,
                        required=True,
                        help='Output filename')
    parser.add_argument('--verbose',
                        '-V',
                        default=0,
                        type=int,
                        help='Verbose option')
    parser.add_argument('--preprocess-conf',
                        type=str,
                        default=None,
                        help='The configuration file for the pre-processing')
    # task related
    parser.add_argument('--json',
                        type=str,
                        required=True,
                        help='Filename of train label data (json)')
    parser.add_argument('--model',
                        type=str,
                        required=True,
                        help='Model file parameters to read')
    parser.add_argument('--model-conf',
                        type=str,
                        default=None,
                        help='Model config file')
    # decoding related
    parser.add_argument('--maxlenratio',
                        type=float,
                        default=5,
                        help='Maximum length ratio in decoding')
    parser.add_argument('--minlenratio',
                        type=float,
                        default=0,
                        help='Minimum length ratio in decoding')
    parser.add_argument('--threshold',
                        type=float,
                        default=0.5,
                        help='Threshold value in decoding')
    # JJ added - start
    parser.add_argument(
        '--feat-scp',
        type=str,
        default=0.5,
        help='feats.scp file path to extract speaker embedding from')
    parser.add_argument(
        '--out-file',
        type=str,
        default=0.5,
        help='output file path to store extracted speaker embedding in')

    # JJ added - end
    return parser
Example #20
0
def load_opts():
    parser = configargparse.ArgumentParser(description="main")

    parser.add('-c', '--config', is_config_file=True, help='config file path')

    # --- general
    parser.add_argument('--gpu_id',
                        type=int,
                        default=0,
                        help='-1: all, 0-7: GPU index')

    parser.add_argument('--output_dir',
                        type=str,
                        default="./save",
                        help='Path for saving results')

    parser.add_argument('--n_workers',
                        type=int,
                        default=0,
                        help='Num data workers')

    parser.add_argument('--batch_size', type=int, default=1, help='Batch size')

    parser.add_argument('--resume',
                        type=str,
                        default=None,
                        help='Checkpoints to load model weights from')

    parser.add_argument('--input_video',
                        type=str,
                        default="./save",
                        help='Input video path')

    # --- video
    parser.add_argument('--resize',
                        default=540,
                        type=int,
                        help='Scale input video to that resolution')
    parser.add_argument('--fps', type=int, default=25, help='Video input fps')

    # --- audio
    parser.add_argument('--sample_rate', type=int, default=16000, help='')

    # -- avobjects
    parser.add_argument('--n_negative_samples',
                        type=int,
                        default=30,
                        help='Shift range used for synchronization.'
                        'E.g. set to 30 from -15 to +15 frame shifts')
    parser.add_argument('--n_peaks',
                        default=4,
                        type=int,
                        help='Number of peaks to use for separation')

    parser.add_argument('--nms_thresh',
                        type=int,
                        default=100,
                        help='Area for thresholding nms in pixels')

    # -- viz
    parser.add_argument('--const_box_size',
                        type=int,
                        default=80,
                        help='Size of bounding box in visualization')

    args = parser.parse_args()

    return args
Example #21
0
import dataio
import utils
import training
import loss_functions
import modules

from torch.utils.data import DataLoader
import configargparse
from functools import partial
import torch
import re

torch.backends.cudnn.benchmark = True
torch.set_num_threads(8)

p = configargparse.ArgumentParser()
p.add('-c',
      '--config',
      required=False,
      is_config_file=True,
      help='Path to config file.')

# Experiment & I/O general properties
p.add_argument('--dataset',
               type=str,
               default='blender',
               choices=['blender', 'deepvoxels', 'llff'],
               help='which dataset to use')
p.add_argument(
    '--experiment_name',
    type=str,
Example #22
0
def get_parser(parser=None, required=True):
    if parser is None:
        parser = configargparse.ArgumentParser(
            description=
            "Train an automatic speech recognition (ASR) model on one CPU, one or multiple GPUs",
            config_file_parser_class=configargparse.YAMLConfigFileParser,
            formatter_class=configargparse.ArgumentDefaultsHelpFormatter)
    # general configuration
    parser.add('--config', is_config_file=True, help='config file path')
    parser.add(
        '--config2',
        is_config_file=True,
        help=
        'second config file path that overwrites the settings in `--config`.')
    parser.add(
        '--config3',
        is_config_file=True,
        help=
        'third config file path that overwrites the settings in `--config` and `--config2`.'
    )

    parser.add_argument(
        '--ngpu',
        default=None,
        type=int,
        help='Number of GPUs. If not given, use all visible devices')
    parser.add_argument(
        '--train-dtype',
        default="float32",
        choices=["float16", "float32", "float64", "O0", "O1", "O2", "O3"],
        help='Data type for training (only pytorch backend). '
        'O0,O1,.. flags require apex. See https://nvidia.github.io/apex/amp.html#opt-levels'
    )
    parser.add_argument('--backend',
                        default='chainer',
                        type=str,
                        choices=['chainer', 'pytorch'],
                        help='Backend library')
    parser.add_argument('--outdir',
                        type=str,
                        required=required,
                        help='Output directory')
    parser.add_argument('--debugmode', default=1, type=int, help='Debugmode')
    parser.add_argument('--dict', required=required, help='Dictionary')
    parser.add_argument('--seed', default=1, type=int, help='Random seed')
    parser.add_argument('--debugdir',
                        type=str,
                        help='Output directory for debugging')
    parser.add_argument('--resume',
                        '-r',
                        default='',
                        nargs='?',
                        help='Resume the training from snapshot')
    parser.add_argument('--minibatches',
                        '-N',
                        type=int,
                        default='-1',
                        help='Process only N minibatches (for debug)')
    parser.add_argument('--verbose',
                        '-V',
                        default=0,
                        type=int,
                        help='Verbose option')
    parser.add_argument('--tensorboard-dir',
                        default=None,
                        type=str,
                        nargs='?',
                        help="Tensorboard log dir path")
    parser.add_argument('--report-interval-iters',
                        default=100,
                        type=int,
                        help="Report interval iterations")
    # task related
    parser.add_argument('--train-json',
                        type=str,
                        default=None,
                        help='Filename of train label data (json)')
    parser.add_argument('--valid-json',
                        type=str,
                        default=None,
                        help='Filename of validation label data (json)')
    # network architecture
    parser.add_argument(
        '--model-module',
        type=str,
        default=None,
        help=
        'model defined module (default: espnet.nets.xxx_backend.e2e_asr:E2E)')
    # encoder
    parser.add_argument('--num-encs',
                        default=1,
                        type=int,
                        help='Number of encoders in the model.')
    # loss related
    parser.add_argument('--ctc_type',
                        default='warpctc',
                        type=str,
                        choices=['builtin', 'warpctc'],
                        help='Type of CTC implementation to calculate loss.')
    parser.add_argument(
        '--mtlalpha',
        default=0.5,
        type=float,
        help=
        'Multitask learning coefficient, alpha: alpha*ctc_loss + (1-alpha)*att_loss '
    )
    parser.add_argument(
        '--lsm-type',
        const='',
        default='',
        type=str,
        nargs='?',
        choices=['', 'unigram'],
        help='Apply label smoothing with a specified distribution type')
    parser.add_argument('--lsm-weight',
                        default=0.0,
                        type=float,
                        help='Label smoothing weight')
    # recognition options to compute CER/WER
    parser.add_argument('--report-cer',
                        default=False,
                        action='store_true',
                        help='Compute CER on development set')
    parser.add_argument('--report-wer',
                        default=False,
                        action='store_true',
                        help='Compute WER on development set')
    parser.add_argument('--nbest',
                        type=int,
                        default=1,
                        help='Output N-best hypotheses')
    parser.add_argument('--beam-size', type=int, default=4, help='Beam size')
    parser.add_argument('--penalty',
                        default=0.0,
                        type=float,
                        help='Incertion penalty')
    parser.add_argument('--maxlenratio',
                        default=0.0,
                        type=float,
                        help="""Input length ratio to obtain max output length.
                        If maxlenratio=0.0 (default), it uses a end-detect function
                        to automatically find maximum hypothesis lengths""")
    parser.add_argument('--minlenratio',
                        default=0.0,
                        type=float,
                        help='Input length ratio to obtain min output length')
    parser.add_argument('--ctc-weight',
                        default=0.3,
                        type=float,
                        help='CTC weight in joint decoding')
    parser.add_argument('--rnnlm',
                        type=str,
                        default=None,
                        help='RNNLM model file to read')
    parser.add_argument('--rnnlm-conf',
                        type=str,
                        default=None,
                        help='RNNLM model config file to read')
    parser.add_argument('--lm-weight',
                        default=0.1,
                        type=float,
                        help='RNNLM weight.')
    parser.add_argument('--sym-space',
                        default='<space>',
                        type=str,
                        help='Space symbol')
    parser.add_argument('--sym-blank',
                        default='<blank>',
                        type=str,
                        help='Blank symbol')
    # minibatch related
    parser.add_argument(
        '--sortagrad',
        default=0,
        type=int,
        nargs='?',
        help=
        "How many epochs to use sortagrad for. 0 = deactivated, -1 = all epochs"
    )
    parser.add_argument(
        '--batch-count',
        default='auto',
        choices=BATCH_COUNT_CHOICES,
        help=
        'How to count batch_size. The default (auto) will find how to count by args.'
    )
    parser.add_argument('--batch-size',
                        '--batch-seqs',
                        '-b',
                        default=0,
                        type=int,
                        help='Maximum seqs in a minibatch (0 to disable)')
    parser.add_argument('--batch-bins',
                        default=0,
                        type=int,
                        help='Maximum bins in a minibatch (0 to disable)')
    parser.add_argument(
        '--batch-frames-in',
        default=0,
        type=int,
        help='Maximum input frames in a minibatch (0 to disable)')
    parser.add_argument(
        '--batch-frames-out',
        default=0,
        type=int,
        help='Maximum output frames in a minibatch (0 to disable)')
    parser.add_argument(
        '--batch-frames-inout',
        default=0,
        type=int,
        help='Maximum input+output frames in a minibatch (0 to disable)')
    parser.add_argument(
        '--maxlen-in',
        '--batch-seq-maxlen-in',
        default=800,
        type=int,
        metavar='ML',
        help=
        'When --batch-count=seq, batch size is reduced if the input sequence length > ML.'
    )
    parser.add_argument(
        '--maxlen-out',
        '--batch-seq-maxlen-out',
        default=150,
        type=int,
        metavar='ML',
        help=
        'When --batch-count=seq, batch size is reduced if the output sequence length > ML'
    )
    parser.add_argument('--n-iter-processes',
                        default=0,
                        type=int,
                        help='Number of processes of iterator')
    parser.add_argument('--preprocess-conf',
                        type=str,
                        default=None,
                        nargs='?',
                        help='The configuration file for the pre-processing')
    # optimization related
    parser.add_argument('--opt',
                        default='adadelta',
                        type=str,
                        choices=['adadelta', 'adam', 'noam'],
                        help='Optimizer')
    parser.add_argument('--accum-grad',
                        default=1,
                        type=int,
                        help='Number of gradient accumuration')
    parser.add_argument('--eps',
                        default=1e-8,
                        type=float,
                        help='Epsilon constant for optimizer')
    parser.add_argument('--eps-decay',
                        default=0.01,
                        type=float,
                        help='Decaying ratio of epsilon')
    parser.add_argument('--weight-decay',
                        default=0.0,
                        type=float,
                        help='Weight decay ratio')
    parser.add_argument('--criterion',
                        default='acc',
                        type=str,
                        choices=['loss', 'acc'],
                        help='Criterion to perform epsilon decay')
    parser.add_argument('--threshold',
                        default=1e-4,
                        type=float,
                        help='Threshold to stop iteration')
    parser.add_argument('--epochs',
                        '-e',
                        default=30,
                        type=int,
                        help='Maximum number of epochs')
    parser.add_argument(
        '--early-stop-criterion',
        default='validation/main/acc',
        type=str,
        nargs='?',
        help="Value to monitor to trigger an early stopping of the training")
    parser.add_argument(
        '--patience',
        default=3,
        type=int,
        nargs='?',
        help=
        "Number of epochs to wait without improvement before stopping the training"
    )
    parser.add_argument('--grad-clip',
                        default=5,
                        type=float,
                        help='Gradient norm threshold to clip')
    parser.add_argument('--num-save-attention',
                        default=3,
                        type=int,
                        help='Number of samples of attention to be saved')
    parser.add_argument(
        '--grad-noise',
        type=strtobool,
        default=False,
        help=
        'The flag to switch to use noise injection to gradients during training'
    )
    # asr_mix related
    parser.add_argument(
        '--num-spkrs',
        default=1,
        type=int,
        choices=[1, 2],
        help=
        'Maximum number of speakers in the speech for multi-speaker speech recognition task.'
    )
    # speech translation related
    parser.add_argument(
        '--context-residual',
        default=False,
        type=strtobool,
        nargs='?',
        help=
        'The flag to switch to use context vector residual in the decoder network'
    )
    parser.add_argument(
        '--replace-sos',
        default=False,
        nargs='?',
        help='Replace <sos> in the decoder with a target language ID \
                              (the first token in the target sequence)')
    # finetuning related
    parser.add_argument('--enc-init',
                        default=None,
                        type=str,
                        help='Pre-trained ASR model to initialize encoder.')
    parser.add_argument(
        '--enc-init-mods',
        default='enc.enc.',
        type=lambda s: [str(mod) for mod in s.split(',') if s != ''],
        help='List of encoder modules to initialize, separated by a comma.')
    parser.add_argument(
        '--dec-init',
        default=None,
        type=str,
        help='Pre-trained ASR, MT or LM model to initialize decoder.')
    parser.add_argument(
        '--dec-init-mods',
        default='att., dec.',
        type=lambda s: [str(mod) for mod in s.split(',') if s != ''],
        help='List of decoder modules to initialize, separated by a comma.')
    # front end related
    parser.add_argument('--use-frontend',
                        type=strtobool,
                        default=False,
                        help='The flag to switch to use frontend system.')

    # WPE related
    parser.add_argument('--use-wpe',
                        type=strtobool,
                        default=False,
                        help='Apply Weighted Prediction Error')
    parser.add_argument('--wtype',
                        default='blstmp',
                        type=str,
                        choices=[
                            'lstm', 'blstm', 'lstmp', 'blstmp', 'vgglstmp',
                            'vggblstmp', 'vgglstm', 'vggblstm', 'gru', 'bgru',
                            'grup', 'bgrup', 'vgggrup', 'vggbgrup', 'vgggru',
                            'vggbgru'
                        ],
                        help='Type of encoder network architecture '
                        'of the mask estimator for WPE. '
                        '')
    parser.add_argument('--wlayers', type=int, default=2, help='')
    parser.add_argument('--wunits', type=int, default=300, help='')
    parser.add_argument('--wprojs', type=int, default=300, help='')
    parser.add_argument('--wdropout-rate', type=float, default=0.0, help='')
    parser.add_argument('--wpe-taps', type=int, default=5, help='')
    parser.add_argument('--wpe-delay', type=int, default=3, help='')
    parser.add_argument('--use-dnn-mask-for-wpe',
                        type=strtobool,
                        default=False,
                        help='Use DNN to estimate the power spectrogram. '
                        'This option is experimental.')
    # Beamformer related
    parser.add_argument('--use-beamformer',
                        type=strtobool,
                        default=True,
                        help='')
    parser.add_argument('--btype',
                        default='blstmp',
                        type=str,
                        choices=[
                            'lstm', 'blstm', 'lstmp', 'blstmp', 'vgglstmp',
                            'vggblstmp', 'vgglstm', 'vggblstm', 'gru', 'bgru',
                            'grup', 'bgrup', 'vgggrup', 'vggbgrup', 'vgggru',
                            'vggbgru'
                        ],
                        help='Type of encoder network architecture '
                        'of the mask estimator for Beamformer.')
    parser.add_argument('--blayers', type=int, default=2, help='')
    parser.add_argument('--bunits', type=int, default=300, help='')
    parser.add_argument('--bprojs', type=int, default=300, help='')
    parser.add_argument('--badim', type=int, default=320, help='')
    parser.add_argument('--bnmask',
                        type=int,
                        default=2,
                        help='Number of beamforming masks, '
                        'default is 2 for [speech, noise].')
    parser.add_argument('--ref-channel',
                        type=int,
                        default=-1,
                        help='The reference channel used for beamformer. '
                        'By default, the channel is estimated by DNN.')
    parser.add_argument('--bdropout-rate', type=float, default=0.0, help='')
    # Feature transform: Normalization
    parser.add_argument('--stats-file',
                        type=str,
                        default=None,
                        help='The stats file for the feature normalization')
    parser.add_argument('--apply-uttmvn',
                        type=strtobool,
                        default=True,
                        help='Apply utterance level mean '
                        'variance normalization.')
    parser.add_argument('--uttmvn-norm-means',
                        type=strtobool,
                        default=True,
                        help='')
    parser.add_argument('--uttmvn-norm-vars',
                        type=strtobool,
                        default=False,
                        help='')
    # Feature transform: Fbank
    parser.add_argument('--fbank-fs',
                        type=int,
                        default=16000,
                        help='The sample frequency used for '
                        'the mel-fbank creation.')
    parser.add_argument('--n-mels',
                        type=int,
                        default=80,
                        help='The number of mel-frequency bins.')
    parser.add_argument('--fbank-fmin', type=float, default=0., help='')
    parser.add_argument('--fbank-fmax', type=float, default=None, help='')
    return parser
Example #23
0
def main():
    # set arguments
    # arguments are passed to classes
    parser = configargparse.ArgumentParser(
        description="Evaluate completeness and contamination of a MAG.")
    parser.add_argument("fasta",
                        type=str,
                        help="Run script on this bin (fasta file)")
    parser.add_argument("--db",
                        type=str,
                        required=True,
                        help="Path to EukCC DB")
    parser.add_argument(
        "--outdir",
        "-o",
        type=str,
        default="./",
        help=
        "Location for the output. Names will be prefixed using the bin filenames",
    )
    parser.add_argument(
        "--config",
        "-c",
        type=str,
        required=False,
        is_config_file=True,
        help="Config file to define parameters, YAML",
    )
    parser.add_argument(
        "--ncores",
        "-n",
        metavar="int",
        type=int,
        default=1,
        help="set number of cores for GeneMark-ES, pplacer and Hmmer",
    )
    parser.add_argument(
        "--ncorespplacer",
        metavar="int",
        type=int,
        default=0,
        help="Pplacer requires a lot of memory. If you want \
                              you can set less cores for pplacer,\
                              which improves memory consumption significantly",
    )
    parser.add_argument(
        "--hmm",
        dest="hmm",
        type=str,
        default=None,
        help="run hmmer on all these HMMs instead",
    )
    parser.add_argument(
        "--training",
        dest="training",
        action="store_true",
        default=False,
        help=
        "Run EukCC in training mode (needed to create a new release of the DB)",
    )
    parser.add_argument("--proteins",
                        default=False,
                        action="store_true",
                        dest="proteins",
                        help="Input fasta is proteins")
    parser.add_argument(
        "--bed",
        "-b",
        metavar="file.bed",
        type=str,
        default=None,
        help=
        "You can pass a bedfile of the protein location to omit fragmented proteins being detected twice",
    )
    parser.add_argument(
        "--force",
        "-f",
        dest="force",
        action="store_true",
        default=False,
        help="Force rerun of computation even if \
                                              output is newer than input. Don't resume previous run.",
    )
    parser.add_argument(
        "--keeptemp",
        dest="clean",
        action="store_false",
        default=True,
        help=
        "Keep all temporary files, by default EukCC will remove some temp files",
    )
    parser.add_argument(
        "--fplace",
        "-p",
        dest="fplace",
        action="store_true",
        default=False,
        help="Force rerun of placement and subsequent steps",
    )
    parser.add_argument(
        "--noglob",
        "-g",
        dest="noglob",
        action="store_true",
        default=False,
        help="Do not expand paths using glob",
    )
    parser.add_argument(
        "--quiet",
        "-q",
        dest="quiet",
        action="store_true",
        default=False,
        help="Silcence most output",
    )
    parser.add_argument(
        "--debug",
        "-d",
        action="store_true",
        default=False,
        help="Debug and thus ignore safety",
    )
    parser.add_argument(
        "--HPA",
        default=False,
        action="store_true",
        help="Set placement method to HPA",
    )
    parser.add_argument(
        "--nPlacements",
        type=int,
        default=2,
        metavar="n",
        help="Set number of proteins to support location \
                                in tree (default: 2)",
    )
    parser.add_argument(
        "--minGenomes",
        type=int,
        default=3,
        metavar="n",
        help="Minimal number of genomes to support a set (default: 3)",
    )
    parser.add_argument(
        "--fullineage",
        default=False,
        action="store_true",
        help="Output full lineage for MAGs",
    )
    parser.add_argument(
        "--minPlacementLikelyhood",
        default=0.4,
        type=float,
        metavar="float",
        help="minimal pplacer likelyhood (default: 0.4)",
    )
    parser.add_argument(
        "--mindist",
        type=int,
        default=2000,
        metavar="n",
        help="Distance to collapse hits (default: 2000)",
    )
    parser.add_argument(
        "--touch",
        default=False,
        action="store_true",
        help="Do not run, but touch all output files",
    )
    parser.add_argument(
        "--gmes",
        default=False,
        action="store_true",
        help="only run GeneMark-ES",
    )
    parser.add_argument(
        "--pygmes",
        default=False,
        action="store_true",
        help=
        "Use pygmes, will improve eukccs capability of running on highly fragmented bins but will take longer",
    )
    parser.add_argument("--diamond",
                        default=None,
                        type=str,
                        help="required to use pygmes option")
    parser.add_argument("--plot",
                        default=False,
                        action="store_true",
                        help="produce plots")
    parser.add_argument("-v",
                        "--version",
                        action="version",
                        version=f"EukCC version {version.__version__}")
    options = parser.parse_args()

    # define logging
    logLevel = logging.INFO
    if options.quiet:
        logLevel = logging.WARNING
    elif options.debug:
        logLevel = logging.DEBUG
    logging.basicConfig(
        format="%(asctime)s %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S: ",
        level=logLevel,
    )

    # for pygmes we need a diamond DB
    if options.pygmes and options.diamond is None:
        logging.error(
            "For pygmes you need to provide a diamond database with taxonomic information"
        )
        exit(1)

    logging.debug("Launching EukCC in debug mode")
    logging.info("Starting EukCC")

    # Now we start the run with EukCC
    # All magic numbers should be defined in info.py if they are not
    # part of the configuration options
    m = workflow.eukcc(options)

    # skip gene predition if this is already protein sequences
    if options.bed is None and options.proteins is False and options.pygmes is False:
        # run gmes
        proteinfaa, bedfile = m.gmes(options.fasta)
    elif options.bed is None and options.proteins is False and options.pygmes is True:
        proteinfaa, bedfile = m.pygmes(options.fasta, options.diamond)
    else:
        proteinfaa = options.fasta
        if options.bed is None:
            # create bed file
            bedpath = os.path.join(options.outdir, "workfiles",
                                   "proteins_tmp.bed")
            file.isdir(os.path.join(options.outdir, "workfiles"))

            bedfile = faabed(proteinfaa, bedpath)
        else:
            bedfile = options.bed

    # terminate if only gmes step was to be run
    if m.cfg["gmes"]:
        logging.info("Finished running GeneMark-ES")
        logging.info("Terminating as requested")
        exit(0)

    # run hmm file if we are asked to
    # this is needed during for training
    if m.cfg["training"] or m.cfg["hmm"]:
        logging.info("Running on custom hmm for training mode")
        m.runPlacedHMM(m.cfg["hmm"], proteinfaa, bedfile)
        logging.info("Stopping now as we are only doing training")
        exit(0)

    # place using pplacer and hmmer
    m.place(proteinfaa, bedfile)

    # concat hmms for hmmer
    hmmfile = m.concatHMM()
    # run Hmmer for sets of placement
    hits = m.runPlacedHMM(hmmfile, proteinfaa, bedfile)
    # infer lineage
    _ = m.inferLineage(m.placements[m.cfg["placementMethod"]])

    # estimate completeness and contamiantion
    outputfile = os.path.join(m.cfg["outdir"], "eukcc.tsv")
    m.estimate(hits, outputfile, m.placements[m.cfg["placementMethod"]])

    if m.cfg["plot"]:
        _ = m.plot()
Example #24
0
    def error_listener(self):
        """ error listener """
        (rank, original_trace) = self.error_queue.get()
        self.error_queue.put((rank, original_trace))
        os.kill(os.getpid(), signal.SIGUSR1)

    def signal_handler(self):
        """ signal handler """
        for pid in self.children_pids:
            os.kill(pid, signal.SIGINT)  # kill children processes
        (_, original_trace) = self.error_queue.get()
        msg = """\n\n-- Tracebacks above this line can probably
             be ignored --\n\n"""
        msg += original_trace
        raise Exception(msg)


if __name__ == "__main__":
    parser = configargparse.ArgumentParser(
        description='train.py',
        config_file_parser_class=configargparse.YAMLConfigFileParser,
        formatter_class=configargparse.ArgumentDefaultsHelpFormatter)

    opts.config_opts(parser)
    # opts.add_md_help_argument(parser)
    opts.model_opts(parser)
    opts.train_opts(parser)

    opt = parser.parse_args()
    main(opt)
Example #25
0
def _load_config():
    parser = configargparse.ArgumentParser(
        description=
        "Small script to create celestial instances and group them in a task")
    parser.add_argument(
        '--config',
        type=str,
        help='Path to config file (default: cel_instances_settings.yaml)',
        default="gen_instances_settings.yaml",
        is_config_file_arg=True)
    parser.add_argument('--min-n',
                        type=int,
                        default=6,
                        help="Minimum amount of vertices (default: 6)")
    parser.add_argument('--max-n',
                        type=int,
                        default=25,
                        help="Maximum amount of vertices (default: 25)")
    parser.add_argument(
        '--edge-min',
        type=float,
        default=0.5,
        help="Minimum chance an edge will be added (default: 0.1)")
    parser.add_argument(
        '--edge-max',
        type=float,
        default=1,
        help="Maximum chance an edge will be added (default: 0.8)")
    parser.add_argument(
        '--edge-step',
        type=float,
        default=0.1,
        help="Chance increase for edge added per step (default: 0.1)")
    parser.add_argument(
        '--repetitions',
        type=int,
        default=3,
        help=
        "The amount of instances that will be created per vertex amount per chance step"
    )
    parser.add_argument(
        '--vertex-shape',
        type=float,
        default=[1, 1],
        nargs='*',
        help=
        "Shape of how the vertices are placed (elliptically). Can also contain a list of sizes which will be repeated. (default: [1,1] = cicle)"
    )
    parser.add_argument('--url-path',
                        type=str,
                        default="angular.db",
                        help="Path to sqlite database")
    parser.add_argument(
        '--name',
        type=str,
        default="CelestialGraphInstances",
        help="Name of the task (default: CelestialGraphInstances)")
    parser.add_argument(
        '--seed',
        type=int,
        default=None,
        help=
        "Seed for current instance creation (default: None; will set a random seed)"
    )
    try:
        open("gen_instances_settings.yaml")
        return parser.parse_args()
    except FileNotFoundError:
        parser._remove_action(*[
            action for action in parser._actions
            if getattr(action, "is_config_file_arg", False)
        ])
    return parser.parse_args()
Example #26
0
def get_arg_parser():
    """Create an argument parser with options used by this script."""
    # Determine arguments and get input file
    parser = configargparse.ArgumentParser(
        default_config_files=['/etc/ctitoolkit.conf', '~/.ctitoolkit'],
        description=("Utility to extract observables from local STIX files " +
                     "or a TAXII server."),
    )
    # Global options
    global_group = parser.add_argument_group('global arguments')
    global_group.add_argument(
        "-c",
        "--config",
        is_config_file=True,
        help="configuration file to use",
    )
    global_group.add_argument(
        "-v",
        "--verbose",
        action="store_true",
        help="verbose output",
    )
    global_group.add_argument(
        "-d",
        "--debug",
        action="store_true",
        help="enable debug output",
    )
    # Source options
    source_group = parser.add_argument_group('input (source) options')
    source_ex_group = source_group.add_mutually_exclusive_group(
        required=True, )
    source_ex_group.add_argument(
        "--file",
        nargs="+",
        help="obtain STIX packages from supplied files or directories",
    )
    source_ex_group.add_argument(
        "--taxii",
        action="store_true",
        help="poll TAXII server to obtain STIX packages",
    )
    # Output (transform) options
    output_group = parser.add_argument_group('output (transform) options')
    output_ex_group = output_group.add_mutually_exclusive_group(
        required=True, )
    output_ex_group.add_argument(
        "-s",
        "--stats",
        action="store_true",
        help="display summary statistics for each STIX package",
    )
    output_ex_group.add_argument(
        "-t",
        "--text",
        action="store_true",
        help="output observables in delimited text",
    )
    output_ex_group.add_argument(
        "-b",
        "--bro",
        action="store_true",
        help="output observables in Bro intel framework format",
    )
    output_ex_group.add_argument(
        "-m",
        "--misp",
        action="store_true",
        help="feed output to a MISP server",
    )
    output_ex_group.add_argument(
        "-x",
        "--xml_output",
        help=("output XML STIX packages to the given directory " +
              "(use with --taxii)"),
    )
    # File source options
    file_group = parser.add_argument_group(
        title='file input arguments (use with --file)', )
    file_group.add_argument(
        "-r",
        "--recurse",
        action="store_true",
        help="recurse subdirectories when processing files.",
    )
    # TAXII source options
    taxii_group = parser.add_argument_group(
        title='taxii input arguments (use with --taxii)', )
    taxii_group.add_argument(
        "--hostname",
        help="hostname of TAXII server",
    )
    taxii_group.add_argument(
        "--port",
        help="port of TAXII server",
    )
    taxii_group.add_argument(
        "--ca_file",
        help="File containing CA certs of TAXII server",
    )
    taxii_group.add_argument(
        "--username",
        help="username for TAXII authentication",
    )
    taxii_group.add_argument(
        "--password",
        help="password for TAXII authentication",
    )
    taxii_group.add_argument(
        "--ssl",
        action="store_true",
        help="use SSL to connect to TAXII server",
    )
    taxii_group.add_argument(
        "--key",
        help="file containing PEM key for TAXII SSL authentication",
    )
    taxii_group.add_argument(
        "--cert",
        help="file containing PEM certificate for TAXII SSL authentication",
    )
    taxii_group.add_argument(
        "--path",
        help="path on TAXII server for polling",
    )
    taxii_group.add_argument(
        "--collection",
        help="TAXII collection to poll",
    )
    taxii_group.add_argument(
        "--begin-timestamp",
        help=("the begin timestamp (format: " +
              "YYYY-MM-DDTHH:MM:SS.ssssss+/-hh:mm) for the poll request"),
    )
    taxii_group.add_argument(
        "--end-timestamp",
        help=("the end timestamp (format: " +
              "YYYY-MM-DDTHH:MM:SS.ssssss+/-hh:mm) for the poll request"),
    )
    taxii_group.add_argument(
        "--subscription-id",
        help="a subscription ID for the poll request",
    )
    other_group = parser.add_argument_group(title='other output options', )
    other_group.add_argument(
        "-f",
        "--field-separator",
        help="field delimiter character/string to use in text output",
    )
    other_group.add_argument(
        "--header",
        action="store_true",
        help="include header row for text output",
    )
    other_group.add_argument(
        "--title",
        help="title for package (if not included in STIX file)",
    )
    other_group.add_argument(
        "--source",
        help="source of indicators - e.g. Hailataxii, CERT-AU",
    )
    other_group.add_argument(
        "--bro-no-notice",
        action="store_true",
        help="suppress Bro intel notice framework messages (use with --bro)",
    )
    other_group.add_argument(
        "--base-url",
        help="base URL for indicator source - use with --bro or --misp",
    )
    misp_group = parser.add_argument_group(
        title='misp output arguments (use with --misp)', )
    misp_group.add_argument(
        "--misp-url",
        help="URL of MISP server",
    )
    misp_group.add_argument(
        "--misp-key",
        help="token for accessing MISP instance",
    )
    misp_group.add_argument(
        "--misp-distribution",
        default=0,
        type=int,
        help=("MISP distribution group - default: 0 " +
              "(your organisation only)"),
    )
    misp_group.add_argument(
        "--misp-threat",
        default=4,
        type=int,
        help="MISP threat level - default: 4 (undefined)",
    )
    misp_group.add_argument(
        "--misp-analysis",
        default=0,
        type=int,
        help="MISP analysis phase - default: 0 (initial)",
    )
    misp_group.add_argument(
        "--misp-info",
        #default='Automated STIX ingest',
        help="MISP event description",
    )
    misp_group.add_argument(
        "--misp-published",
        action="store_true",
        help="set MISP published state to True",
    )
    return parser
Example #27
0
def get_parser():
    """Get parser of training arguments."""
    parser = configargparse.ArgumentParser(
        description="Train a new text-to-speech (TTS) model on one CPU, "
        "one or multiple GPUs",
        config_file_parser_class=configargparse.YAMLConfigFileParser,
        formatter_class=configargparse.ArgumentDefaultsHelpFormatter,
    )

    # general configuration
    parser.add("--config", is_config_file=True, help="config file path")
    parser.add(
        "--config2",
        is_config_file=True,
        help=
        "second config file path that overwrites the settings in `--config`.",
    )
    parser.add(
        "--config3",
        is_config_file=True,
        help="third config file path that overwrites "
        "the settings in `--config` and `--config2`.",
    )

    parser.add_argument(
        "--ngpu",
        default=None,
        type=int,
        help="Number of GPUs. If not given, use all visible devices",
    )
    parser.add_argument(
        "--backend",
        default="pytorch",
        type=str,
        choices=["chainer", "pytorch"],
        help="Backend library",
    )
    parser.add_argument("--outdir",
                        type=str,
                        required=True,
                        help="Output directory")
    parser.add_argument("--debugmode", default=1, type=int, help="Debugmode")
    parser.add_argument("--seed", default=1, type=int, help="Random seed")
    parser.add_argument(
        "--resume",
        "-r",
        default="",
        type=str,
        nargs="?",
        help="Resume the training from snapshot",
    )
    parser.add_argument(
        "--minibatches",
        "-N",
        type=int,
        default="-1",
        help="Process only N minibatches (for debug)",
    )
    parser.add_argument("--verbose",
                        "-V",
                        default=0,
                        type=int,
                        help="Verbose option")
    parser.add_argument(
        "--tensorboard-dir",
        default=None,
        type=str,
        nargs="?",
        help="Tensorboard log directory path",
    )
    parser.add_argument("--eval-interval-epochs",
                        default=1,
                        type=int,
                        help="Evaluation interval epochs")
    parser.add_argument("--save-interval-epochs",
                        default=1,
                        type=int,
                        help="Save interval epochs")
    parser.add_argument(
        "--report-interval-iters",
        default=100,
        type=int,
        help="Report interval iterations",
    )
    # task related
    parser.add_argument("--train-json",
                        type=str,
                        required=True,
                        help="Filename of training json")
    parser.add_argument("--valid-json",
                        type=str,
                        required=True,
                        help="Filename of validation json")
    # network architecture
    parser.add_argument(
        "--model-module",
        type=str,
        default="espnet.nets.pytorch_backend.e2e_tts_tacotron2:Tacotron2",
        help="model defined module",
    )
    # minibatch related
    parser.add_argument(
        "--sortagrad",
        default=0,
        type=int,
        nargs="?",
        help=
        "How many epochs to use sortagrad for. 0 = deactivated, -1 = all epochs",
    )
    parser.add_argument(
        "--batch-sort-key",
        default="shuffle",
        type=str,
        choices=["shuffle", "output", "input"],
        nargs="?",
        help='Batch sorting key. "shuffle" only work with --batch-count "seq".',
    )
    parser.add_argument(
        "--batch-count",
        default="auto",
        choices=BATCH_COUNT_CHOICES,
        help="How to count batch_size. "
        "The default (auto) will find how to count by args.",
    )
    parser.add_argument(
        "--batch-size",
        "--batch-seqs",
        "-b",
        default=0,
        type=int,
        help="Maximum seqs in a minibatch (0 to disable)",
    )
    parser.add_argument(
        "--batch-bins",
        default=0,
        type=int,
        help="Maximum bins in a minibatch (0 to disable)",
    )
    parser.add_argument(
        "--batch-frames-in",
        default=0,
        type=int,
        help="Maximum input frames in a minibatch (0 to disable)",
    )
    parser.add_argument(
        "--batch-frames-out",
        default=0,
        type=int,
        help="Maximum output frames in a minibatch (0 to disable)",
    )
    parser.add_argument(
        "--batch-frames-inout",
        default=0,
        type=int,
        help="Maximum input+output frames in a minibatch (0 to disable)",
    )
    parser.add_argument(
        "--maxlen-in",
        "--batch-seq-maxlen-in",
        default=100,
        type=int,
        metavar="ML",
        help="When --batch-count=seq, "
        "batch size is reduced if the input sequence length > ML.",
    )
    parser.add_argument(
        "--maxlen-out",
        "--batch-seq-maxlen-out",
        default=200,
        type=int,
        metavar="ML",
        help="When --batch-count=seq, "
        "batch size is reduced if the output sequence length > ML",
    )
    parser.add_argument(
        "--num-iter-processes",
        default=0,
        type=int,
        help="Number of processes of iterator",
    )
    parser.add_argument(
        "--preprocess-conf",
        type=str,
        default=None,
        help="The configuration file for the pre-processing",
    )
    parser.add_argument(
        "--use-speaker-embedding",
        default=False,
        type=strtobool,
        help="Whether to use speaker embedding",
    )
    parser.add_argument(
        "--use-second-target",
        default=False,
        type=strtobool,
        help="Whether to use second target",
    )
    # optimization related
    parser.add_argument("--opt",
                        default="adam",
                        type=str,
                        choices=["adam", "noam"],
                        help="Optimizer")
    parser.add_argument("--accum-grad",
                        default=1,
                        type=int,
                        help="Number of gradient accumuration")
    parser.add_argument("--lr",
                        default=1e-3,
                        type=float,
                        help="Learning rate for optimizer")
    parser.add_argument("--eps",
                        default=1e-6,
                        type=float,
                        help="Epsilon for optimizer")
    parser.add_argument(
        "--weight-decay",
        default=1e-6,
        type=float,
        help="Weight decay coefficient for optimizer",
    )
    parser.add_argument("--epochs",
                        "-e",
                        default=30,
                        type=int,
                        help="Number of maximum epochs")
    parser.add_argument(
        "--early-stop-criterion",
        default="validation/main/loss",
        type=str,
        nargs="?",
        help="Value to monitor to trigger an early stopping of the training",
    )
    parser.add_argument(
        "--patience",
        default=3,
        type=int,
        nargs="?",
        help="Number of epochs to wait "
        "without improvement before stopping the training",
    )
    parser.add_argument("--grad-clip",
                        default=1,
                        type=float,
                        help="Gradient norm threshold to clip")
    parser.add_argument(
        "--num-save-attention",
        default=5,
        type=int,
        help="Number of samples of attention to be saved",
    )
    parser.add_argument(
        "--keep-all-data-on-mem",
        default=False,
        type=strtobool,
        help="Whether to keep all data on memory",
    )
    # finetuning related
    parser.add_argument(
        "--enc-init",
        default=None,
        type=str,
        help="Pre-trained TTS model path to initialize encoder.",
    )
    parser.add_argument(
        "--enc-init-mods",
        default="enc.",
        type=lambda s: [str(mod) for mod in s.split(",") if s != ""],
        help="List of encoder modules to initialize, separated by a comma.",
    )
    parser.add_argument(
        "--dec-init",
        default=None,
        type=str,
        help="Pre-trained TTS model path to initialize decoder.",
    )
    parser.add_argument(
        "--dec-init-mods",
        default="dec.",
        type=lambda s: [str(mod) for mod in s.split(",") if s != ""],
        help="List of decoder modules to initialize, separated by a comma.",
    )
    parser.add_argument(
        "--freeze-mods",
        default=None,
        type=lambda s: [str(mod) for mod in s.split(",") if s != ""],
        help="List of modules to freeze (not to train), separated by a comma.",
    )

    return parser
Example #28
0
def construct_model_config_parser(add_trainer_args=True):
    """Construct the configuration parser for the Gesticulator model and (optionally) for the Trainer.
    
    The path to the config file must be provided with the -config option, e.g.
        'python train.py -config config/model_config.yaml'
    
    The parameter names with two dashes (e.g. --data_dir) can be used in the .yaml file,
    while the parameter names with a single dash (e.g. -data) are for the command line.

    Command line values override values found in the config file.
    """
    if add_trainer_args:
        # if we call Trainer.add_argparse_args() after creating the cfgparse.ArgumentParser,
        # then it will be upcasted to the base class argparse.ArgumentParser, and we would
        # lose some of the functionality (e.g. yaml config file reading)

        # therefore we pass the Trainer arguments to the model parser as a parent_parser
        # NOTE: for details, please visit the official Pytorch Lightning documentation for Trainer
        trainer_parser = argparse.ArgumentParser(add_help=False)
        trainer_parser = Trainer.add_argparse_args(trainer_parser)

    parser = cfgparse.ArgumentParser(
        config_file_parser_class=cfgparse.YAMLConfigFileParser,
        add_help=True,
        parents=[trainer_parser] if add_trainer_args else [])

    parser.add('--config',
               '-c',
               default='config/default_model_config.yaml',
               help='Path to the .yaml config file',
               is_config_file=True)

    # Directories
    parser.add('--data_dir',
               '-data',
               default='../dataset/processed',
               help='Path to a folder with the dataset')

    parser.add(
        '--result_dir',
        '-res_d',
        default='../results',
        help='Path to the <results> directory, where all results are saved')

    parser.add('--run_name',
               '-name',
               default='last_run',
               help='Name of the subdirectory within <results> '
               'where the results of this run will be saved')

    parser.add(
        '--generated_gestures_dir',
        default=None,
        help=
        "Path to the directory where final test gestures and the predicted validation or training gestures"
        " will be saved (default: <results>/<run_name>/generated_gestures")

    parser.add('--saved_models_dir',
               '-model_d',
               default=None,
               help='Path to the directory where models will be saved '
               '(default: <results>/<run_name>/models/')

    # Data processing parameters
    parser.add('--sequence_length',
               '-seq_l',
               default=40,
               type=int,
               help='Length of each training sequence')

    parser.add(
        '--past_context',
        '-p_cont',
        default=10,
        type=int,
        help='Length of past speech context to be used for generating gestures'
    )

    parser.add(
        '--future_context',
        '-f_cont',
        default=20,
        type=int,
        help=
        'Length of future speech context to be used for generating gestures')

    parser.add(
        '--text_context',
        '-txt_l',
        default=10,
        type=int,
        help=
        'Length of (future) text context to be used for generating gestures')

    parser.add('--speech_enc_frame_dim',
               '-speech_t_e',
               default=124,
               type=int,
               help='Dimensionality of the speech frame encoding')

    parser.add('--full_speech_enc_dim',
               '-speech_f_e',
               default=612,
               type=int,
               help='Dimensionality of the full speech encoding')

    # Network architecture
    parser.add('--n_layers',
               '-lay',
               default=1,
               type=int,
               choices=[1, 2, 3],
               help='Number of hidden layers (excluding RNN)')

    parser.add('--activation',
               '-act',
               default="TanH",
               choices=["TanH", "LeakyReLU"],
               help='The activation function')

    parser.add('--first_l_sz',
               '-first_l',
               default=256,
               type=int,
               help='Dimensionality of the first layer')

    parser.add('--second_l_sz',
               '-second_l',
               default=512,
               type=int,
               help='Dimensionality of the second layer')

    parser.add('--third_l_sz',
               '-third_l',
               default=384,
               type=int,
               help='Dimensionality of the third layer')

    parser.add('--n_prev_poses',
               '-pose_numb',
               default=3,
               type=int,
               help='Number of previous poses to consider for auto-regression')

    parser.add(
        '--text_embedding',
        '-text_emb',
        default="BERT",
        choices=["BERT", "FastText"],
        help='Which text embedding do we use (\'BERT\' or \'FastText\')')

    # Training params
    parser.add('--batch_size',
               '-btch',
               default=64,
               type=int,
               help='Batch size')
    parser.add('--learning_rate',
               '-lr',
               default=0.0001,
               type=float,
               help='Learning rate')
    parser.add('--vel_coef',
               '-vel_c',
               default=0.6,
               type=float,
               help='Coefficient for the velocity loss')
    parser.add('--dropout',
               '-drop',
               default=0.2,
               type=float,
               help='Dropout probability')
    parser.add(
        '--dropout_multiplier',
        '-d_mult',
        default=4.0,
        type=float,
        help=
        'The dropout is multiplied by this factor in the conditioning layer')

    # Prediction saving parameters
    parser.add(
        '--save_val_predictions_every_n_epoch',
        '-val_save_rate',
        default=0,
        type=int,
        help=
        'If n > 0, generate and save the predicted gestures on the first validation sequence '
        'every n training epochs (default: 0 i.e. saving is disabled)')

    parser.add(
        '--save_train_predictions_every_n_epoch',
        '-train_save_rate',
        default=0,
        type=int,
        help=
        'If n > 0, generate and save the predicted gestures on the first training sequence '
        'every n training epochs (default: 0 i.e. saving is disabled)')

    parser.add('--saved_prediction_duration_sec',
               '-gesture_len',
               default=9,
               type=int,
               help='The length of the saved gesture predictions in seconds')

    parser.add(
        '--prediction_save_formats',
        '-save_formats',
        action='append',
        default=[],
        choices=["bvh_file", "raw_gesture", "video", "3d_coordinates"],
        help='The format(s) in which the predictions will be saved.'
        'To enable multiple formats, provide the formats separately e.g. '
        '--prediction_save_formats bvh_file --prediction_save_formats videos')
    # Flags
    parser.add('--generate_semantic_test_predictions',
               '-save_semantic',
               action='store_true',
               help='If set, save the learned model\'s predictions on the'
               'predefined semantic test segments')

    parser.add('--generate_random_test_predictions',
               '-save_random',
               action='store_true',
               help='If set, save the learned model\'s predictions on the'
               'predefined random test segments')

    parser.add('--use_pca',
               '-pca',
               action='store_true',
               help='If set, use PCA on the gestures')

    parser.add('--use_recurrent_speech_enc',
               '-use_rnn',
               action='store_true',
               help='If set, use only the rnn for encoding speech frames')

    parser.add(
        '--no_overwrite_warning',
        '-no_warn',
        action='store_true',
        help=
        'If this flag is set, and the given <run_name> directory already exists, '
        'it will be cleared without displaying any warnings')

    return parser
Example #29
0
def main(opt, device_id):  # device_id=0 only 1 GPU
    opt = training_opt_postprocessing(opt, device_id)
    init_logger(opt.log_file)
    logger.info("Input args: %r", opt)
    # Load checkpoint if we resume from a previous training.
    if opt.train_from:
        logger.info('Loading checkpoint from %s' % opt.train_from)
        checkpoint = torch.load(
            opt.train_from,
            map_location=lambda storage, loc: storage)  # 把所有的张量加载到CPU中

        # Load default opts values then overwrite it with opts from
        # the checkpoint. It's usefull in order to re-train a model
        # after adding a new option (not set in checkpoint)
        dummy_parser = configargparse.ArgumentParser()
        opts.model_opts(dummy_parser)
        default_opt = dummy_parser.parse_known_args([])[0]

        model_opt = default_opt
        model_opt.__dict__.update(checkpoint['opt'].__dict__)
    else:
        checkpoint = None
        model_opt = opt

    # Load fields generated from preprocess phase.
    fields = load_fields(opt, checkpoint)

    # Build model.
    model = build_model(model_opt, opt, fields, checkpoint)

    n_params, enc, dec = _tally_parameters(model)
    logger.info('encoder: %d' % enc)
    logger.info('decoder: %d' % dec)
    logger.info('* number of parameters: %d' % n_params)
    _check_save_model_path(opt)

    # Build optimizer.
    optim = build_optim(model, opt, checkpoint)

    # Build model saver
    model_saver = build_model_saver(model_opt, opt, model, fields, optim)

    trainer = build_trainer(opt,
                            device_id,
                            model,
                            fields,
                            optim,
                            model_saver=model_saver)

    def train_iter_fct():
        return build_dataset_iter(load_dataset("train", opt), fields, opt)

    def valid_iter_fct():
        return build_dataset_iter(load_dataset("valid", opt),
                                  fields,
                                  opt,
                                  is_train=False)

    # Do training.
    if len(opt.gpu_ranks):
        logger.info('Starting training on GPU: %s' % opt.gpu_ranks)
    else:
        logger.info('Starting training on CPU, could be very slow')
    trainer.train(train_iter_fct, valid_iter_fct, opt.train_steps,
                  opt.valid_steps)

    if opt.tensorboard:
        trainer.report_manager.tensorboard_writer.close()
Example #30
0
def generate_parser():
    parser = configargparse.ArgumentParser()
    group_logging = parser.add_argument_group("logging")
    group_feeds = parser.add_argument_group("feeds")
    group_limits = parser.add_argument_group("limits")
    group_networking = parser.add_argument_group("networking")
    group_fs = parser.add_argument_group("filesystem")
    group_clock = parser.add_argument_group("clock")
    parser.add("-c", "--config", is_config_file=True, help="config file path")
    group_fs.add_argument(
        "--virtual_filename",
        type=str,
        default=None,
        help="Emulated filename (if different from real filename).",
    )
    group_fs.add_argument(
        "--virtual_path",
        type=str,
        default=None,
        help="Emulated file path (optional). "
        "(default: '/home/admin/zelos_dir/').",
    )
    group_logging.add_argument(
        "--log",
        type=str,
        default="info",
        help="Decide what level of logging should be used. LOG is "
        "'info', 'verbose', 'debug', 'spam', 'notice', 'warning', 'success', "
        "'error', or 'fatal'. (default: 'info')",
    )
    group_networking.add_argument(
        "--dns",
        action="count",
        default=0,
        help="Simulate DNS response for all domains (resolve to 127.0.0.1)",
    )
    group_limits.add_argument(
        "-t",
        "--timeout",
        type=int,
        default=0,
        help=(
            "If specified, execution will end after TIMEOUT seconds"
            "have passed."
        ),
    )
    group_limits.add_argument(
        "-m",
        "--memlimit",
        type=int,
        default=0,
        help="Limits memory allocation to MEMLIMIT total mb.",
    )
    group_feeds.add_argument(
        "--inst_feed",
        action="append",
        nargs="?",
        default=[],
        const="",
        metavar="ZML_STRING",
        help=(
            "Provided without input, sets the feed level to INST. "
            "This results in enabling the inst, api, and syscall feeds."
            "Alternatively, A ZML string can be used to specify conditions"
            "to set the feed level to INST. Multiple triggers can be "
            "specified by using this flag multiple times."
        ),
    )
    group_feeds.add_argument(
        "--inst",
        action="store_true",
        help=("Shortcut for setting the starting feed level to INST"),
    )

    group_feeds.add_argument(
        "--func_feed",
        action="append",
        nargs="?",
        default=[],
        const="",
        metavar="ZML_STRING",
        help=(
            "Provided without input, sets the feed level to FUNC. "
            "This results in enabling the func and syscall feeds."
            "Alternatively, A ZML string can be used to specify conditions"
            "to set the feed level to FUNC. Multiple triggers can be "
            "specified by using this flag multiple times."
        ),
    )

    group_feeds.add_argument(
        "--func",
        action="store_true",
        help=("Shortcut for setting the starting feed level to FUNC"),
    )

    group_feeds.add_argument(
        "--syscall_feed",
        action="append",
        nargs="?",
        default=[],
        const="",
        metavar="ZML_STRING",
        help=(
            "Provided without input, sets the feed level to SYSCALL. "
            "This results in enabling only the syscall feed."
            "Alternatively, A ZML string can be used to specify conditions"
            "to set the feed level to SYSCALL. Multiple triggers can be "
            "specified by using this flag multiple times. This is the "
            "default feed level."
        ),
    )

    group_feeds.add_argument(
        "--syscall",
        action="store_true",
        help=(
            "Shortcut for setting the starting feed level to SYSCALL. "
            "This is a no-op since the default feel level is SYSCALL."
        ),
    )

    group_feeds.add_argument(
        "--no_feeds",
        action="append",
        nargs="?",
        default=[],
        const="",
        metavar="ZML_STRING",
        help=(
            "Provided without input, sets the feed level to NONE, disabling "
            "all feeds. Alternatively, A ZML string can be used to specify "
            "conditions to set the feed level to NONE. Multiple triggers  "
            "can be specified by using this flag multiple times."
        ),
    )

    group_logging.add_argument(
        "--writetrace",
        type=str,
        default="",
        help="Print a message every time a value at the given memory "
        "location is written.",
    )
    group_clock.add_argument(
        "--date",
        type=str,
        default="2019-02-02",
        help="Emulated system date. Format: YYYY-MM-DD. "
        "(default: '2019-02-02')",
    )
    parser.add_argument(
        "--startat",
        type=str,
        default=None,
        help="[Experimental] Start execution at the given hex address.",
    )
    parser.add_argument(
        "--disableNX",
        action="store_true",
        help="Disable the no-execute bit. All memory becomes executable.",
    )
    group_logging.add_argument(
        "--log_exports",
        action="store_true",
        help="Enable logging of calls to exported functions. (default: off)",
    )
    group_fs.add_argument(
        "--mount",
        action="append",
        default=[],
        help="[Experimental] Mount the specified file or path into the "
        "emulated root filesystem. Format: '--mount ARCH,DEST,"
        "SRC'. ARCH is 'x86', 'x86-64', 'arm', or 'mips'. "
        "DEST is the emulated path to mount. SRC is the absolute host path to "
        "the file or directory to mount. Can be specified multiple times to "
        "mount multiple files.",
    )
    group_fs.add_argument(
        "-ev",
        "--env_vars",
        metavar="KEY=VALUE",
        default={},
        help="Emulated environment variables. ENV_VARS is a key value pair "
        "of the form KEY=VALUE. Can be specified multiple times to set "
        "multiple environment variables. Format: '--env_vars FOO=bar "
        "--env_vars ZERO=point'.",
        action=_ParseEnvVars,
    )

    path = os.environ.get("ZELOS_PLUGIN_DIR", None)
    paths = path.split(",") if path is not None else []
    _ = PluginCommands(paths, parser)

    parser.add_argument("filename", type=str, help="Executable to emulate")
    parser.add_argument(
        "cmdline_args", type=str, nargs="*", help="Arguments to the executable"
    )
    return parser