Esempio n. 1
0
def log_basic_info(args) -> None:
    """
    Log basic information like version number, arguments, etc.

    :param args: Arguments as returned by argparse.
    """
    log_sockeye_version(logger)
    log_mxnet_version(logger)
    logger.info("Command: %s", " ".join(sys.argv))
    logger.info("Arguments: %s", args)
Esempio n. 2
0
def main():
    params = argparse.ArgumentParser(
        description='Evaluate translations by calculating metrics with '
        'respect to a reference set.')
    arguments.add_evaluate_args(params)
    arguments.add_logging_args(params)
    args = params.parse_args()

    if args.quiet:
        logger.setLevel(logging.ERROR)

    utils.check_condition(args.offset >= 0, "Offset should be non-negative.")
    log_sockeye_version(logger)

    logger.info("Command: %s", " ".join(sys.argv))
    logger.info("Arguments: %s", args)

    references = [' '.join(e) for e in data_io.read_content(args.references)]
    hypotheses = [h.strip() for h in args.hypotheses]
    logger.info("%d hypotheses | %d references", len(hypotheses),
                len(references))

    if not args.not_strict:
        utils.check_condition(
            len(hypotheses) == len(references),
            "Number of hypotheses (%d) and references (%d) does not match." %
            (len(hypotheses), len(references)))

    if not args.sentence:
        scores = []
        for metric in args.metrics:
            if metric == C.BLEU:
                bleu_score = raw_corpus_bleu(hypotheses, references,
                                             args.offset)
                scores.append("%.6f" % bleu_score)
            elif metric == C.CHRF:
                chrf_score = chrf.corpus_chrf(hypotheses,
                                              references,
                                              trim_whitespaces=True)
                scores.append("%.6f" % chrf_score)
        print("\t".join(scores), file=sys.stdout)
    else:
        for h, r in zip(hypotheses, references):
            scores = []
            for metric in args.metrics:
                if metric == C.BLEU:
                    bleu = raw_corpus_bleu(h, r, args.offset)
                    scores.append("%.6f" % bleu)
                elif metric == C.CHRF:
                    chrf_score = chrf.corpus_chrf(h, r, trim_whitespaces=True)
                    scores.append("%.6f" % chrf_score)
            print("\t".join(scores), file=sys.stdout)
Esempio n. 3
0
def main():
    """
    Commandline interface to extract parameters.
    """
    log_sockeye_version(logger)
    params = argparse.ArgumentParser(description="Extract specific parameters.")
    arguments.add_extract_args(params)
    args = params.parse_args()

    if os.path.isdir(args.input):
        param_path = os.path.join(args.input, C.PARAMS_BEST_NAME)
    else:
        param_path = args.input
    ext_params = extract(param_path, args.names, args.list_all)
    
    if len(ext_params) > 0:
        utils.check_condition(args.output != None, "An output filename must be specified. (Use --output)")
        logger.info("Writting extracted parameters to '%s'", args.output)
        np.savez_compressed(args.output, **ext_params)
Esempio n. 4
0
def main():
    """
    Commandline interface for building top-k lexicons using during decoding.
    """

    params = argparse.ArgumentParser(description="Build a top-k lexicon for use during decoding.")
    arguments.add_lexicon_args(params)
    arguments.add_logging_args(params)
    args = params.parse_args()

    logger = setup_main_logger(__name__, console=not args.quiet, file_logging=False)
    log_sockeye_version(logger)

    logger.info("Reading source and target vocab from \"%s\"", args.model)
    vocab_source = vocab.vocab_from_json_or_pickle(os.path.join(args.model, C.VOCAB_SRC_NAME))
    vocab_target = vocab.vocab_from_json_or_pickle(os.path.join(args.model, C.VOCAB_TRG_NAME))

    logger.info("Creating top-k lexicon from \"%s\"", args.input)
    lexicon = TopKLexicon(vocab_source, vocab_target)
    lexicon.create(args.input, args.k)
    lexicon.save(args.output)
Esempio n. 5
0
def main():
    """
    Commandline interface to average parameters.
    """
    log_sockeye_version(logger)
    params = argparse.ArgumentParser(
        description="Averages parameters from multiple models.")
    arguments.add_average_args(params)
    args = params.parse_args()

    if len(args.inputs) > 1:
        avg_params = average(args.inputs)
    else:
        param_paths = find_checkpoints(model_path=args.inputs[0],
                                       size=args.n,
                                       strategy=args.strategy,
                                       metric=args.metric)
        avg_params = average(param_paths)

    mx.nd.save(args.output, avg_params)
    logger.info("Averaged parameters written to '%s'", args.output)
Esempio n. 6
0
def main():
    """
    Commandline interface to initialize Sockeye embedding weights with pretrained word representations.
    """
    log_sockeye_version(logger)
    params = argparse.ArgumentParser(
        description='Quick usage: python3 -m contrib.utils.init_embedding '
        '-e embed-in-src.npy embed-in-tgt.npy '
        '-i vocab-in-src.json vocab-in-tgt.json '
        '-o vocab-out-src.json vocab-out-tgt.json '
        '-n source_embed_weight target_embed_weight '
        '-f params.init')
    arguments.add_init_embedding_args(params)
    args = params.parse_args()

    if len(args.embeddings) != len(args.vocabularies_in) or \
       len(args.embeddings) != len(args.vocabularies_out) or \
       len(args.embeddings) != len(args.names):
        logger.error(
            "Exactly the same number of 'input embedding weights', 'input vocabularies', "
            "'output vocabularies' and 'Sockeye parameter names' should be provided."
        )
        sys.exit(1)

    params = {}  # type: Dict[str, mx.nd.NDArray]
    for embed_file, vocab_in_file, vocab_out_file, name in zip(args.embeddings, args.vocabularies_in, \
                                                               args.vocabularies_out, args.names):
        logger.info('Loading input embedding weight: %s', embed_file)
        embed = np.load(embed_file)
        logger.info('Loading input/output vocabularies: %s %s', vocab_in_file,
                    vocab_out_file)
        vocab_in = vocab.vocab_from_json(vocab_in_file, encoding=args.encoding)
        vocab_out = vocab.vocab_from_json(vocab_out_file)
        logger.info('Initializing parameter: %s', name)
        initializer = mx.init.Normal(sigma=np.std(embed))
        params[name] = init_embedding(embed, vocab_in, vocab_out, initializer)

    logger.info('Saving initialized parameters to %s', args.file)
    utils.save_params(params, args.file)