Beispiel #1
0
def main_status(argv=None):
    args = parse_args_status(argv)

    if args.engine is None:
        engines = Engine.list()
    else:
        engine = Engine(args.engine)
        ensure_engine_exists(engine)

        engines = [engine]

    if len(engines) == 0:
        print('No engine found.')

    for engine in engines:
        node = EngineNode(engine)
        node_running = node.running
        node_state = node.state

        rest_api_s = ('running - %s/translate' % node_state.api_port) \
            if node_running else 'stopped'
        cluster_s = ('running - port %d' % node_state.cluster_port) \
            if node_running else 'stopped'
        binlog_s = ('running - %s:%d' % (node_state.binlog_host, node_state.binlog_port)) \
            if node_running else 'stopped'
        database_s = ('running - %s:%d' % (node_state.database_host, node_state.database_port)) \
            if node_running else 'stopped'

        print('[Engine: "%s"]' % engine.name)
        print('    REST API:   %s' % rest_api_s)
        print('    Cluster:    %s' % cluster_s)
        print('    Binary log: %s' % binlog_s)
        print('    Database:   %s' % database_s)
Beispiel #2
0
def main(argv=None):
    args, extra_argv = parse_args(argv)
    engine = Engine(args.engine)

    wdir = engine.get_tempdir('training')
    log_file = engine.get_logfile('training', ensure=True, append=True)

    if engine.exists():
        if not args.resume and not args.force_delete:
            confirm_or_die(args.engine)

        node = EngineNode(engine)
        node.stop()

    if not args.resume:
        if os.path.isdir(engine.path):
            shutil.rmtree(engine.path)
        os.makedirs(engine.path)

        shutil.rmtree(wdir, ignore_errors=True)
        os.makedirs(wdir)

        if os.path.isfile(log_file):
            os.remove(log_file)

    activity = CreateActivity(engine,
                              args,
                              extra_argv,
                              wdir=wdir,
                              log_file=log_file,
                              delete_on_exit=not args.debug)
    activity.run()
Beispiel #3
0
    def __init__(self, name, source_lang, target_lang):
        Engine.__init__(self, name, source_lang, target_lang)

        # Phrase-based specific models
        self.moses = Moses(os.path.join(self.models_path, 'decoder'),
                           self.source_lang, self.target_lang)
        self.vocabulary_path = self.moses.vb.model
Beispiel #4
0
def main(argv=None):
    args = parse_args(argv)

    engine = Engine(args.engine)
    ensure_engine_exists(engine)

    node = EngineNode(engine)
    ensure_node_running(node)
    ensure_node_has_api(node)

    wdir = engine.get_tempdir('evaluate')
    shutil.rmtree(wdir, ignore_errors=True)
    os.makedirs(wdir)

    activity = EvaluateActivity(node, args, wdir=wdir, delete_on_exit=not args.debug)
    activity.run()
Beispiel #5
0
def main(argv=None):
    args = parse_args(argv)

    engine = Engine(args.engine)
    node = EngineNode(engine)
    ensure_node_running(node)
    ensure_node_has_api(node)

    mmt = ModernMTTranslate(node,
                            args.source_lang,
                            args.target_lang,
                            context_string=args.context,
                            context_file=args.context_file,
                            context_vector=args.context_vector,
                            split_lines=args.split_lines)

    if args.text is not None:
        print(mmt.translate_text(args.text.strip()))
    else:
        if args.is_xliff:
            translator = XLIFFTranslator(mmt)
        elif args.batch:
            translator = BatchTranslator(mmt)
        else:
            translator = InteractiveTranslator(mmt)

        try:
            translator.run(sys.stdin, sys.stdout, threads=args.threads)
        except KeyboardInterrupt:
            pass  # exit
Beispiel #6
0
def _load_node(engine_name):
    engine = Engine(engine_name)
    ensure_engine_exists(engine)

    node = EngineNode(engine)
    ensure_node_running(node)
    ensure_node_has_api(node)

    return node
Beispiel #7
0
    def __init__(self, name, source_lang, target_lang, bpe_symbols, max_vocab_size=None, vocab_pruning_threshold=None):
        Engine.__init__(self, name, source_lang, target_lang)

        self._bleu_script = os.path.join(PYOPT_DIR, 'mmt-bleu.perl')

        decoder_path = os.path.join(self.models_path, 'decoder')

        # Neural specific models
        model_name = 'model.%s__%s' % (source_lang, target_lang)

        memory_path = os.path.join(decoder_path, 'memory')
        decoder_model = os.path.join(decoder_path, model_name)

        self.memory = TranslationMemory(memory_path, self.source_lang, self.target_lang)
        self.nmt_preprocessor = NMTPreprocessor(self.source_lang, self.target_lang,
                                                bpe_symbols=bpe_symbols, max_vocab_size=max_vocab_size,
                                                vocab_pruning_threshold=vocab_pruning_threshold)
        self.decoder = NMTDecoder(decoder_model, self.source_lang, self.target_lang)
Beispiel #8
0
def parse_args(argv=None):
    parser = argparse.ArgumentParser(description='Evaluate a ModernMT engine', prog='mmt evaluate')
    parser.add_argument('-s', '--source', dest='src_lang', metavar='SOURCE_LANGUAGE', default=None,
                        help='the source language (ISO 639-1). Can be omitted if engine is monolingual.')
    parser.add_argument('-t', '--target', dest='tgt_lang', metavar='TARGET_LANGUAGE', default=None,
                        help='the target language (ISO 639-1). Can be omitted if engine is monolingual.')
    parser.add_argument('--path', dest='test_set', metavar='CORPORA', default=None,
                        help='the path to the test corpora (default is the automatically extracted sample)')

    parser.add_argument('-e', '--engine', dest='engine', help='the engine name, \'default\' will be used if absent',
                        default='default')
    parser.add_argument('--gt-key', dest='google_key', metavar='GT_API_KEY', default=None,
                        help='A custom Google Translate API Key to use during evaluation')
    parser.add_argument('--human-eval', dest='human_eval_path', metavar='OUTPUT', default=None,
                        help='the output folder for the tab-spaced files needed to setup a Human Evaluation benchmark')
    parser.add_argument('-d', '--debug', action='store_true', dest='debug',
                        help='if debug is set, prevents temporary files to be removed after execution')

    # Context arguments
    parser.add_argument('--context', metavar='CONTEXT', dest='context',
                        help='A string to be used as translation context')
    parser.add_argument('--context-file', metavar='CONTEXT_FILE', dest='context_file',
                        help='A local file to be used as translation context')
    parser.add_argument('--context-vector', metavar='CONTEXT_VECTOR', dest='context_vector',
                        help='The context vector with format: <document 1>:<score 1>[,<document N>:<score N>]')

    args = parser.parse_args(argv)

    engine = Engine(args.engine)
    if args.src_lang is None or args.tgt_lang is None:
        if len(engine.languages) > 1:
            raise CLIArgsException(parser,
                                   'Missing language. Options "-s" and "-t" are mandatory for multilingual engines.')
        args.src_lang, args.tgt_lang = engine.languages[0]

    if args.test_set is None:
        args.test_set = engine.get_test_path(args.src_lang, args.tgt_lang)

    if len(ParallelFileFormat.list(args.src_lang, args.tgt_lang, args.test_set)) == 0:
        raise CLIArgsException(parser, 'No parallel corpora found in path: ' + args.test_set)

    return args
Beispiel #9
0
    def connect(engine_name, silent=False):
        engine = None

        try:
            # Load the already created engine
            engine = Engine.load(engine_name)
        except IllegalArgumentException:
            if not silent:
                raise

        # create a clusterNode and load its node.status file
        return ClusterNode(engine) if engine is not None else None
Beispiel #10
0
def main_stop(argv=None):
    args = parse_args_stop(argv)

    engine = Engine(args.engine)
    ensure_engine_exists(engine)
    node = EngineNode(engine)
    ensure_node_running(node)

    try:
        print('Halting engine "%s"...' % engine.name, end='', flush=True)
        node.stop(force=args.forced)
        print('OK', flush=True)
    except Exception:
        print('FAIL', flush=True)
        raise
Beispiel #11
0
def main_start(argv=None):
    args = parse_args_start(argv)

    engine = Engine(args.engine)
    ensure_engine_exists(engine)
    node = EngineNode(engine)
    ensure_node_not_running(node)

    success = False

    try:
        # start the ClusterNode
        print('Starting engine "%s"...' % engine.name, end='', flush=True)
        node.start(api_port=args.api_port,
                   cluster_port=args.cluster_port,
                   binlog_port=args.binlog_port,
                   db_port=args.db_port,
                   leader=args.leader,
                   verbosity=args.verbosity,
                   remote_debug=args.remote_debug,
                   log_file=args.log_file)
        node.wait('JOINED')
        print('OK', flush=True)

        print('Loading models...', end='', flush=True)
        node.wait('RUNNING')
        print('OK', flush=True)

        # the node has started
        print('\nEngine "%s" started successfully\n' % engine.name)

        if node.api is not None:
            print(
                'You can try the API with:\n'
                '\tcurl "%s/translate?q=world&source=en&target=it&context=computer"'
                ' | python -mjson.tool\n' % node.api.base_path)
        success = True
    except Exception:
        print('FAIL', flush=True)
        raise
    finally:
        if not success:
            node.stop()
Beispiel #12
0
def main(argv=None):
    args = parse_args(argv)

    if args.echo:
        engine = EchoTranslate(args.source_lang, args.target_lang)
    elif args.api_key is not None:
        engine = ModernMTEnterpriseTranslate(
            args.source_lang,
            args.target_lang,
            args.api_key,
            context_vector=args.context_vector)
    else:  # local ModernMT engine
        node = EngineNode(Engine(args.engine))
        ensure_node_running(node)
        ensure_node_has_api(node)

        engine = ModernMTTranslate(node,
                                   args.source_lang,
                                   args.target_lang,
                                   context_string=args.context,
                                   context_file=args.context_file,
                                   context_vector=args.context_vector,
                                   split_lines=args.split_lines)

    if args.text is not None:
        print(engine.translate_text(args.text.strip()))
    else:
        if args.is_xliff:
            translator = XLIFFTranslator(engine)
        elif args.batch:
            translator = BatchTranslator(engine)
        else:
            translator = InteractiveTranslator(engine)

        try:
            translator.run(sys.stdin,
                           sys.stdout,
                           threads=args.threads,
                           suppress_errors=args.quiet)
        except KeyboardInterrupt:
            pass  # exit
Beispiel #13
0
 def __init__(self, engine_name):
     self.engine = Engine(engine_name)
     self.api = EngineNode.RestApi(port=8045)
     self._mmt_script = os.path.join(mmt.MMT_HOME_DIR, 'mmt')
Beispiel #14
0
 def __init__(self, engine_name):
     self.engine = Engine(engine_name)
     self._process = None
Beispiel #15
0
def parse_args(argv=None):
    parser = argparse.ArgumentParser(
        description='Translate text with ModernMT', prog='mmt translate')
    parser.add_argument('text',
                        metavar='TEXT',
                        help='text to be translated (optional)',
                        default=None,
                        nargs='?')
    parser.add_argument(
        '-s',
        '--source',
        dest='source_lang',
        metavar='SOURCE_LANGUAGE',
        default=None,
        help=
        'the source language (ISO 639-1). Can be omitted if engine is monolingual.'
    )
    parser.add_argument(
        '-t',
        '--target',
        dest='target_lang',
        metavar='TARGET_LANGUAGE',
        default=None,
        help=
        'the target language (ISO 639-1). Can be omitted if engine is monolingual.'
    )

    # Context arguments
    parser.add_argument('--context',
                        metavar='CONTEXT',
                        dest='context',
                        help='A string to be used as translation context')
    parser.add_argument('--context-file',
                        metavar='CONTEXT_FILE',
                        dest='context_file',
                        help='A local file to be used as translation context')
    parser.add_argument(
        '--context-vector',
        metavar='CONTEXT_VECTOR',
        dest='context_vector',
        help=
        'The context vector with format: <document 1>:<score 1>[,<document N>:<score N>]'
    )

    # Mixed arguments
    parser.add_argument(
        '-e',
        '--engine',
        dest='engine',
        help='the engine name, \'default\' will be used if absent',
        default='default')
    parser.add_argument(
        '--batch',
        action='store_true',
        dest='batch',
        default=False,
        help=
        'if set, the script will read the whole stdin before send translations to MMT.'
        'This can be used to execute translation in parallel for a faster translation. '
    )
    parser.add_argument('--threads',
                        dest='threads',
                        default=None,
                        type=int,
                        help='number of concurrent translation requests.')
    parser.add_argument('--xliff',
                        dest='is_xliff',
                        action='store_true',
                        default=False,
                        help='if set, the input is a XLIFF file.')
    parser.add_argument(
        '--split-lines',
        dest='split_lines',
        action='store_true',
        default=False,
        help='if set, ModernMT will split input text by carriage-return char')

    args = parser.parse_args(argv)

    engine = Engine(args.engine)

    if args.source_lang is None or args.target_lang is None:
        if len(engine.languages) > 1:
            raise CLIArgsException(
                parser,
                'Missing language. Options "-s" and "-t" are mandatory for multilingual engines.'
            )
        args.source_lang, args.target_lang = engine.languages[0]

    return parser.parse_args(argv)