def log_basic_info(args) -> None: """ Log basic information like version number, arguments, etc. :param args: Arguments as returned by argparse. """ log_sockeye_version(logger) log_mxnet_version(logger) logger.info("Command: %s", " ".join(sys.argv)) logger.info("Arguments: %s", args)
def main(): params = argparse.ArgumentParser( description='Evaluate translations by calculating metrics with ' 'respect to a reference set.') arguments.add_evaluate_args(params) arguments.add_logging_args(params) args = params.parse_args() if args.quiet: logger.setLevel(logging.ERROR) utils.check_condition(args.offset >= 0, "Offset should be non-negative.") log_sockeye_version(logger) logger.info("Command: %s", " ".join(sys.argv)) logger.info("Arguments: %s", args) references = [' '.join(e) for e in data_io.read_content(args.references)] hypotheses = [h.strip() for h in args.hypotheses] logger.info("%d hypotheses | %d references", len(hypotheses), len(references)) if not args.not_strict: utils.check_condition( len(hypotheses) == len(references), "Number of hypotheses (%d) and references (%d) does not match." % (len(hypotheses), len(references))) if not args.sentence: scores = [] for metric in args.metrics: if metric == C.BLEU: bleu_score = raw_corpus_bleu(hypotheses, references, args.offset) scores.append("%.6f" % bleu_score) elif metric == C.CHRF: chrf_score = chrf.corpus_chrf(hypotheses, references, trim_whitespaces=True) scores.append("%.6f" % chrf_score) print("\t".join(scores), file=sys.stdout) else: for h, r in zip(hypotheses, references): scores = [] for metric in args.metrics: if metric == C.BLEU: bleu = raw_corpus_bleu(h, r, args.offset) scores.append("%.6f" % bleu) elif metric == C.CHRF: chrf_score = chrf.corpus_chrf(h, r, trim_whitespaces=True) scores.append("%.6f" % chrf_score) print("\t".join(scores), file=sys.stdout)
def main(): """ Commandline interface to extract parameters. """ log_sockeye_version(logger) params = argparse.ArgumentParser(description="Extract specific parameters.") arguments.add_extract_args(params) args = params.parse_args() if os.path.isdir(args.input): param_path = os.path.join(args.input, C.PARAMS_BEST_NAME) else: param_path = args.input ext_params = extract(param_path, args.names, args.list_all) if len(ext_params) > 0: utils.check_condition(args.output != None, "An output filename must be specified. (Use --output)") logger.info("Writting extracted parameters to '%s'", args.output) np.savez_compressed(args.output, **ext_params)
def main(): """ Commandline interface for building top-k lexicons using during decoding. """ params = argparse.ArgumentParser(description="Build a top-k lexicon for use during decoding.") arguments.add_lexicon_args(params) arguments.add_logging_args(params) args = params.parse_args() logger = setup_main_logger(__name__, console=not args.quiet, file_logging=False) log_sockeye_version(logger) logger.info("Reading source and target vocab from \"%s\"", args.model) vocab_source = vocab.vocab_from_json_or_pickle(os.path.join(args.model, C.VOCAB_SRC_NAME)) vocab_target = vocab.vocab_from_json_or_pickle(os.path.join(args.model, C.VOCAB_TRG_NAME)) logger.info("Creating top-k lexicon from \"%s\"", args.input) lexicon = TopKLexicon(vocab_source, vocab_target) lexicon.create(args.input, args.k) lexicon.save(args.output)
def main(): """ Commandline interface to average parameters. """ log_sockeye_version(logger) params = argparse.ArgumentParser( description="Averages parameters from multiple models.") arguments.add_average_args(params) args = params.parse_args() if len(args.inputs) > 1: avg_params = average(args.inputs) else: param_paths = find_checkpoints(model_path=args.inputs[0], size=args.n, strategy=args.strategy, metric=args.metric) avg_params = average(param_paths) mx.nd.save(args.output, avg_params) logger.info("Averaged parameters written to '%s'", args.output)
def main(): """ Commandline interface to initialize Sockeye embedding weights with pretrained word representations. """ log_sockeye_version(logger) params = argparse.ArgumentParser( description='Quick usage: python3 -m contrib.utils.init_embedding ' '-e embed-in-src.npy embed-in-tgt.npy ' '-i vocab-in-src.json vocab-in-tgt.json ' '-o vocab-out-src.json vocab-out-tgt.json ' '-n source_embed_weight target_embed_weight ' '-f params.init') arguments.add_init_embedding_args(params) args = params.parse_args() if len(args.embeddings) != len(args.vocabularies_in) or \ len(args.embeddings) != len(args.vocabularies_out) or \ len(args.embeddings) != len(args.names): logger.error( "Exactly the same number of 'input embedding weights', 'input vocabularies', " "'output vocabularies' and 'Sockeye parameter names' should be provided." ) sys.exit(1) params = {} # type: Dict[str, mx.nd.NDArray] for embed_file, vocab_in_file, vocab_out_file, name in zip(args.embeddings, args.vocabularies_in, \ args.vocabularies_out, args.names): logger.info('Loading input embedding weight: %s', embed_file) embed = np.load(embed_file) logger.info('Loading input/output vocabularies: %s %s', vocab_in_file, vocab_out_file) vocab_in = vocab.vocab_from_json(vocab_in_file, encoding=args.encoding) vocab_out = vocab.vocab_from_json(vocab_out_file) logger.info('Initializing parameter: %s', name) initializer = mx.init.Normal(sigma=np.std(embed)) params[name] = init_embedding(embed, vocab_in, vocab_out, initializer) logger.info('Saving initialized parameters to %s', args.file) utils.save_params(params, args.file)