# for 1.0 use "wiki."+embedding_code+".vec" # for 1.1 use "wiki.dummy.vec" metadata.embeddings_file_name = "wiki.dummy.vec" # token delimiter. Must be either space (default) or "" (for languages like Japanese, Chinese, etc.) if language_code in "zh ja": metadata.token_delimiter = "" else: metadata.token_delimiter = " " # OPTIONAL: model build date: string metadata.model_build_date = str(datetime.now()) # OPTIONAL: model build source: what corpus was it built from. Ex: UD-Romanian-RRT v2.2 metadata.model_build_source = local_model # OPTIONAL: other notes, string value metadata.notes = "Source: ud-treebanks-v2.2" metadata.info() try: input_folder = os.path.join(input_models_root_folder, local_model) model_store.package_model( input_folder, output_models_root_folder, metadata, should_contain_tokenizer=True, should_contain_compound_word_expander=False, should_contain_lemmatizer=True, should_contain_tagger=True, should_contain_parser=True) except: print("Error processing model") #break # test just one package
print("\n\tModel folder: " + model_folder_path) print("\tUse tokenizer: {}".format(_tokenizer)) print("\tUse compound word expander: {}".format(_compound_word_expander)) print("\tUse lemmatizer: {}".format(_lemmatizer)) print("\tUse tagger: {}".format(_tagger)) print("\tUse parser: {}\n".format(_parser)) # check if path exists if not os.path.exists(model_folder_path): raise Exception("Model folder not found!") # check if metadata exists if not os.path.exists(os.path.join(model_folder_path, "metadata.json")): raise Exception("metadata.json not found in model folder!") # check if metadata is valid metadata = ModelMetadata() metadata.read(os.path.join(model_folder_path, "metadata.json")) output_folder_path = os.path.dirname(model_folder_path) model_store_object = ModelStore(disk_path=output_folder_path) model_store_object.package_model( model_folder_path, output_folder_path, metadata, should_contain_tokenizer=_tokenizer, should_contain_compound_word_expander=_compound_word_expander, should_contain_lemmatizer=_lemmatizer, should_contain_tagger=_tagger, should_contain_parser=_parser)