def download_from_mirrors(self, name, mirrors, destination_folder, expected_file_size=None): work_directory = tempfile.mkdtemp() progressbar = Progressbar('Downloading ' + name) \ if expected_file_size is not None else UndefinedProgressbar('Downloading ' + name) progressbar.start() try: def _callback(length): progress = min(1., length / float(expected_file_size)) progressbar.set_progress(progress * 0.8) tar_file = self._download_from_mirrors( mirrors, work_directory, callback=None if expected_file_size is None else _callback) folder = self._untar(tar_file, work_directory) progressbar.set_progress(0.9) shutil.rmtree(destination_folder, ignore_errors=True) shutil.move(folder, destination_folder) chown(destination_folder, *get_owner(mmt.MMT_VENDOR_DIR)) progressbar.complete() except Exception as e: progressbar.abort(str(e)) raise finally: shutil.rmtree(work_directory, ignore_errors=True)
def copy_opennlp_resources(): opennlp_home = os.path.join(mmt.MMT_VENDOR_DIR, 'opennlp') assert os.path.exists(opennlp_home) opennlp_res = os.path.join(mmt.MMT_RES_DIR, 'opennlp') if not os.path.exists(opennlp_res): os.makedirs(opennlp_res) files = [f for f in os.listdir(opennlp_home) if f.endswith('.bin')] progressbar = Progressbar('Downloading OpenNLP') progressbar.start() count = 0 try: for filename in files: shutil.copyfile(os.path.join(opennlp_home, filename), os.path.join(opennlp_res, filename)) count += 1 progressbar.set_progress(count / float(len(files))) finally: progressbar.complete() chown(mmt.MMT_RES_DIR, *get_owner(mmt.MMT_BUILD_DIR))
def main_import(argv=None): parser = argparse.ArgumentParser( description= 'Import content, TMX or Parallel files, into a new or existing memory') parser.add_argument('-x', '--tmx-file', dest='tmx', metavar='TMX_FILE', help='TMX file to import', default=None) parser.add_argument( '-p', '--parallel-files', dest='parallel_file', default=None, nargs=2, help= 'source and target file (file extension must be source and target languages)' ) parser.add_argument( '-e', '--engine', dest='engine', help='the engine name, \'default\' will be used if absent', default='default') parser.add_argument( '--id', type=int, default=None, dest='memory', help= 'the optional destination memory id (by default, a new Memory is created)' ) args = parser.parse_args(argv) if args.tmx is None and args.parallel_file is None: raise CLIArgsException( parser, 'missing one of the following options: "-x" or "-p"') node = _load_node(args.engine) corpus_name = os.path.splitext( os.path.basename(args.tmx or args.parallel_file[0]))[0] new_memory = None if args.memory is None: new_memory = node.api.create_memory(corpus_name) args.memory = new_memory['id'] progressbar = Progressbar(label='Importing %s' % corpus_name) progressbar.start() try: if args.tmx is not None: job = node.api.import_into_memory(args.memory, tmx=args.tmx) else: src_file, tgt_file = args.parallel_file src_lang, tgt_lang = os.path.splitext( src_file)[1][1:], os.path.splitext(tgt_file)[1][1:] job = node.api.import_into_memory(args.memory, source_file=src_file, target_file=tgt_file, source_lang=src_lang, target_lang=tgt_lang) progressbar.set_progress(job['progress']) while job['progress'] != 1.0: time.sleep(1) job = node.api.get_import_job(job['id']) progressbar.set_progress(job['progress']) progressbar.complete() print('IMPORT SUCCESS') except BaseException as e: if new_memory is not None: try: node.api.delete_memory(new_memory['id']) except: pass progressbar.abort(repr(e)) print('IMPORT FAILED') raise