Beispiel #1
0
    def download_from_mirrors(self,
                              name,
                              mirrors,
                              destination_folder,
                              expected_file_size=None):
        work_directory = tempfile.mkdtemp()

        progressbar = Progressbar('Downloading ' + name) \
            if expected_file_size is not None else UndefinedProgressbar('Downloading ' + name)
        progressbar.start()

        try:

            def _callback(length):
                progress = min(1., length / float(expected_file_size))
                progressbar.set_progress(progress * 0.8)

            tar_file = self._download_from_mirrors(
                mirrors,
                work_directory,
                callback=None if expected_file_size is None else _callback)

            folder = self._untar(tar_file, work_directory)
            progressbar.set_progress(0.9)

            shutil.rmtree(destination_folder, ignore_errors=True)
            shutil.move(folder, destination_folder)

            chown(destination_folder, *get_owner(mmt.MMT_VENDOR_DIR))

            progressbar.complete()
        except Exception as e:
            progressbar.abort(str(e))
            raise
        finally:
            shutil.rmtree(work_directory, ignore_errors=True)
Beispiel #2
0
def copy_opennlp_resources():
    opennlp_home = os.path.join(mmt.MMT_VENDOR_DIR, 'opennlp')
    assert os.path.exists(opennlp_home)

    opennlp_res = os.path.join(mmt.MMT_RES_DIR, 'opennlp')
    if not os.path.exists(opennlp_res):
        os.makedirs(opennlp_res)

    files = [f for f in os.listdir(opennlp_home) if f.endswith('.bin')]

    progressbar = Progressbar('Downloading OpenNLP')
    progressbar.start()

    count = 0
    try:
        for filename in files:
            shutil.copyfile(os.path.join(opennlp_home, filename),
                            os.path.join(opennlp_res, filename))
            count += 1
            progressbar.set_progress(count / float(len(files)))
    finally:
        progressbar.complete()

    chown(mmt.MMT_RES_DIR, *get_owner(mmt.MMT_BUILD_DIR))
Beispiel #3
0
def main_import(argv=None):
    parser = argparse.ArgumentParser(
        description=
        'Import content, TMX or Parallel files, into a new or existing memory')
    parser.add_argument('-x',
                        '--tmx-file',
                        dest='tmx',
                        metavar='TMX_FILE',
                        help='TMX file to import',
                        default=None)
    parser.add_argument(
        '-p',
        '--parallel-files',
        dest='parallel_file',
        default=None,
        nargs=2,
        help=
        'source and target file (file extension must be source and target languages)'
    )
    parser.add_argument(
        '-e',
        '--engine',
        dest='engine',
        help='the engine name, \'default\' will be used if absent',
        default='default')
    parser.add_argument(
        '--id',
        type=int,
        default=None,
        dest='memory',
        help=
        'the optional destination memory id (by default, a new Memory is created)'
    )

    args = parser.parse_args(argv)

    if args.tmx is None and args.parallel_file is None:
        raise CLIArgsException(
            parser, 'missing one of the following options: "-x" or "-p"')

    node = _load_node(args.engine)
    corpus_name = os.path.splitext(
        os.path.basename(args.tmx or args.parallel_file[0]))[0]

    new_memory = None
    if args.memory is None:
        new_memory = node.api.create_memory(corpus_name)
        args.memory = new_memory['id']

    progressbar = Progressbar(label='Importing %s' % corpus_name)
    progressbar.start()

    try:
        if args.tmx is not None:
            job = node.api.import_into_memory(args.memory, tmx=args.tmx)
        else:
            src_file, tgt_file = args.parallel_file
            src_lang, tgt_lang = os.path.splitext(
                src_file)[1][1:], os.path.splitext(tgt_file)[1][1:]

            job = node.api.import_into_memory(args.memory,
                                              source_file=src_file,
                                              target_file=tgt_file,
                                              source_lang=src_lang,
                                              target_lang=tgt_lang)

        progressbar.set_progress(job['progress'])

        while job['progress'] != 1.0:
            time.sleep(1)
            job = node.api.get_import_job(job['id'])
            progressbar.set_progress(job['progress'])

        progressbar.complete()
        print('IMPORT SUCCESS')
    except BaseException as e:
        if new_memory is not None:
            try:
                node.api.delete_memory(new_memory['id'])
            except:
                pass

        progressbar.abort(repr(e))
        print('IMPORT FAILED')

        raise