def __init__( self, dataset: str, l1: str, l2: str, use_tokenizer: bool = True, max_tokens_per_doc=-1, max_chars_per_doc=-1, in_memory: bool = True, **corpusargs, ): """ Instantiates a Parallel Corpus from OPUS (http://opus.nlpl.eu/) :param dataset: Name of the dataset (one of "tatoeba") :param l1: Language code of first language in pair ("en", "de", etc.) :param l2: Language code of second language in pair ("en", "de", etc.) :param use_tokenizer: Whether or not to use in-built tokenizer :param max_tokens_per_doc: If set, shortens sentences to this maximum number of tokens :param max_chars_per_doc: If set, shortens sentences to this maximum number of characters :param in_memory: If True, keeps dataset fully in memory """ if l1 > l2: l1, l2 = l2, l1 # check if dataset is supported supported_datasets = ["tatoeba", "subtitles"] if dataset not in supported_datasets: log.error(f"Dataset must be one of: {supported_datasets}") # set file names if dataset == "tatoeba": link = f"https://object.pouta.csc.fi/OPUS-Tatoeba/v20190709/moses/{l1}-{l2}.txt.zip" l1_file = flair.cache_root / "datasets" / dataset / f"{l1}-{l2}" / f"Tatoeba.{l1}-{l2}.{l1}" l2_file = flair.cache_root / "datasets" / dataset / f"{l1}-{l2}" / f"Tatoeba.{l1}-{l2}.{l2}" # set file names if dataset == "subtitles": link = f"https://object.pouta.csc.fi/OPUS-OpenSubtitles/v2018/moses/{l1}-{l2}.txt.zip" l1_file = flair.cache_root / "datasets" / dataset / f"{l1}-{l2}" / f"OpenSubtitles.{l1}-{l2}.{l1}" l2_file = flair.cache_root / "datasets" / dataset / f"{l1}-{l2}" / f"OpenSubtitles.{l1}-{l2}.{l2}" # download and unzip in file structure if necessary if not l1_file.exists(): path = cached_path(link, Path("datasets") / dataset / f"{l1}-{l2}") unzip_file(path, flair.cache_root / Path("datasets") / dataset / f"{l1}-{l2}") # instantiate corpus super(OpusParallelCorpus, self).__init__( l1_file, l2_file, name=f"{dataset}-{l1_file}-{l2_file}", use_tokenizer=use_tokenizer, max_tokens_per_doc=max_tokens_per_doc, max_chars_per_doc=max_chars_per_doc, in_memory=in_memory, **corpusargs, )
def __init__( self, **corpusargs, ): # this dataset name dataset_name = self.__class__.__name__.lower() # default dataset folder is the cache root data_folder = Path(flair.cache_root) / "datasets" / dataset_name # download data if necessary if not (data_folder / "train.txt").is_file(): # download senteval datasets if necessary und unzip senteval_path = "https://dl.fbaipublicfiles.com/senteval/senteval_data/datasmall_NB_ACL12.zip" cached_path(senteval_path, Path("datasets") / "senteval") senteval_folder = Path(flair.cache_root) / "datasets" / "senteval" unzip_file(senteval_folder / "datasmall_NB_ACL12.zip", senteval_folder) # create dataset directory if necessary if not os.path.exists(data_folder): os.makedirs(data_folder) # create train.txt file by iterating over pos and neg file with open(data_folder / "train.txt", "a") as train_file: with open(senteval_folder / "data" / "customerr" / "custrev.pos", encoding="latin1") as file: for line in file: train_file.write(f"__label__POSITIVE {line}") with open(senteval_folder / "data" / "customerr" / "custrev.neg", encoding="latin1") as file: for line in file: train_file.write(f"__label__NEGATIVE {line}") super(SENTEVAL_CR, self).__init__( data_folder, label_type='sentiment', tokenizer=segtok_tokenizer, **corpusargs, )
def _fetch_model(model_name) -> str: # core Flair models on Huggingface ModelHub huggingface_model_map = { "ner": "flair/ner-english", "ner-fast": "flair/ner-english-fast", "ner-ontonotes": "flair/ner-english-ontonotes", "ner-ontonotes-fast": "flair/ner-english-ontonotes-fast", # Large NER models, "ner-large": "flair/ner-english-large", "ner-ontonotes-large": "flair/ner-english-ontonotes-large", "de-ner-large": "flair/ner-german-large", "nl-ner-large": "flair/ner-dutch-large", "es-ner-large": "flair/ner-spanish-large", # Multilingual NER models "ner-multi": "flair/ner-multi", "multi-ner": "flair/ner-multi", "ner-multi-fast": "flair/ner-multi-fast", # English POS models "upos": "flair/upos-english", "upos-fast": "flair/upos-english-fast", "pos": "flair/pos-english", "pos-fast": "flair/pos-english-fast", # Multilingual POS models "pos-multi": "flair/upos-multi", "multi-pos": "flair/upos-multi", "pos-multi-fast": "flair/upos-multi-fast", "multi-pos-fast": "flair/upos-multi-fast", # English SRL models "frame": "flair/frame-english", "frame-fast": "flair/frame-english-fast", # English chunking models "chunk": "flair/chunk-english", "chunk-fast": "flair/chunk-english-fast", # Language-specific NER models "da-ner": "flair/ner-danish", "de-ner": "flair/ner-german", "de-ler": "flair/ner-german-legal", "de-ner-legal": "flair/ner-german-legal", "fr-ner": "flair/ner-french", "nl-ner": "flair/ner-dutch", } hu_path: str = "https://nlp.informatik.hu-berlin.de/resources/models" hu_model_map = { # English NER models "ner": "/".join([hu_path, "ner", "en-ner-conll03-v0.4.pt"]), "ner-pooled": "/".join([hu_path, "ner-pooled", "en-ner-conll03-pooled-v0.5.pt"]), "ner-fast": "/".join([hu_path, "ner-fast", "en-ner-fast-conll03-v0.4.pt"]), "ner-ontonotes": "/".join([hu_path, "ner-ontonotes", "en-ner-ontonotes-v0.4.pt"]), "ner-ontonotes-fast": "/".join([ hu_path, "ner-ontonotes-fast", "en-ner-ontonotes-fast-v0.4.pt" ]), # Multilingual NER models "ner-multi": "/".join([hu_path, "multi-ner", "quadner-large.pt"]), "multi-ner": "/".join([hu_path, "multi-ner", "quadner-large.pt"]), "ner-multi-fast": "/".join([hu_path, "multi-ner-fast", "ner-multi-fast.pt"]), # English POS models "upos": "/".join([hu_path, "upos", "en-pos-ontonotes-v0.4.pt"]), "upos-fast": "/".join([hu_path, "upos-fast", "en-upos-ontonotes-fast-v0.4.pt"]), "pos": "/".join([hu_path, "pos", "en-pos-ontonotes-v0.5.pt"]), "pos-fast": "/".join([hu_path, "pos-fast", "en-pos-ontonotes-fast-v0.5.pt"]), # Multilingual POS models "pos-multi": "/".join([hu_path, "multi-pos", "pos-multi-v0.1.pt"]), "multi-pos": "/".join([hu_path, "multi-pos", "pos-multi-v0.1.pt"]), "pos-multi-fast": "/".join([hu_path, "multi-pos-fast", "pos-multi-fast.pt"]), "multi-pos-fast": "/".join([hu_path, "multi-pos-fast", "pos-multi-fast.pt"]), # English SRL models "frame": "/".join([hu_path, "frame", "en-frame-ontonotes-v0.4.pt"]), "frame-fast": "/".join( [hu_path, "frame-fast", "en-frame-ontonotes-fast-v0.4.pt"]), # English chunking models "chunk": "/".join([hu_path, "chunk", "en-chunk-conll2000-v0.4.pt"]), "chunk-fast": "/".join( [hu_path, "chunk-fast", "en-chunk-conll2000-fast-v0.4.pt"]), # Danish models "da-pos": "/".join([hu_path, "da-pos", "da-pos-v0.1.pt"]), "da-ner": "/".join([hu_path, "NER-danish", "da-ner-v0.1.pt"]), # German models "de-pos": "/".join([hu_path, "de-pos", "de-pos-ud-hdt-v0.5.pt"]), "de-pos-tweets": "/".join([hu_path, "de-pos-tweets", "de-pos-twitter-v0.1.pt"]), "de-ner": "/".join([hu_path, "de-ner", "de-ner-conll03-v0.4.pt"]), "de-ner-germeval": "/".join([hu_path, "de-ner-germeval", "de-ner-germeval-0.4.1.pt"]), "de-ler": "/".join([hu_path, "de-ner-legal", "de-ner-legal.pt"]), "de-ner-legal": "/".join([hu_path, "de-ner-legal", "de-ner-legal.pt"]), # French models "fr-ner": "/".join([hu_path, "fr-ner", "fr-ner-wikiner-0.4.pt"]), # Dutch models "nl-ner": "/".join([hu_path, "nl-ner", "nl-ner-bert-conll02-v0.8.pt"]), "nl-ner-rnn": "/".join([hu_path, "nl-ner-rnn", "nl-ner-conll02-v0.5.pt"]), # Malayalam models "ml-pos": "https://raw.githubusercontent.com/qburst/models-repository/master/FlairMalayalamModels/malayalam-xpos-model.pt", "ml-upos": "https://raw.githubusercontent.com/qburst/models-repository/master/FlairMalayalamModels/malayalam-upos-model.pt", # Portuguese models "pt-pos-clinical": "/".join([ hu_path, "pt-pos-clinical", "pucpr-flair-clinical-pos-tagging-best-model.pt", ]), # Keyphase models "keyphrase": "/".join([hu_path, "keyphrase", "keyphrase-en-scibert.pt"]), "negation-speculation": "/".join([ hu_path, "negation-speculation", "negation-speculation-model.pt" ]), # Biomedical models "hunflair-paper-cellline": "/".join([ hu_path, "hunflair_smallish_models", "cellline", "hunflair-celline-v1.0.pt", ]), "hunflair-paper-chemical": "/".join([ hu_path, "hunflair_smallish_models", "chemical", "hunflair-chemical-v1.0.pt", ]), "hunflair-paper-disease": "/".join([ hu_path, "hunflair_smallish_models", "disease", "hunflair-disease-v1.0.pt", ]), "hunflair-paper-gene": "/".join([ hu_path, "hunflair_smallish_models", "gene", "hunflair-gene-v1.0.pt" ]), "hunflair-paper-species": "/".join([ hu_path, "hunflair_smallish_models", "species", "hunflair-species-v1.0.pt", ]), "hunflair-cellline": "/".join([ hu_path, "hunflair_smallish_models", "cellline", "hunflair-celline-v1.0.pt", ]), "hunflair-chemical": "/".join([ hu_path, "hunflair_allcorpus_models", "huner-chemical", "hunflair-chemical-full-v1.0.pt", ]), "hunflair-disease": "/".join([ hu_path, "hunflair_allcorpus_models", "huner-disease", "hunflair-disease-full-v1.0.pt", ]), "hunflair-gene": "/".join([ hu_path, "hunflair_allcorpus_models", "huner-gene", "hunflair-gene-full-v1.0.pt", ]), "hunflair-species": "/".join([ hu_path, "hunflair_allcorpus_models", "huner-species", "hunflair-species-full-v1.1.pt", ]), } cache_dir = Path("models") get_from_model_hub = False # check if model name is a valid local file if Path(model_name).exists(): model_path = model_name # check if model key is remapped to HF key - if so, print out information elif model_name in huggingface_model_map: # get mapped name hf_model_name = huggingface_model_map[model_name] # use mapped name instead model_name = hf_model_name get_from_model_hub = True # if not, check if model key is remapped to direct download location. If so, download model elif model_name in hu_model_map: model_path = cached_path(hu_model_map[model_name], cache_dir=cache_dir) # special handling for the taggers by the @redewiegergabe project (TODO: move to model hub) elif model_name == "de-historic-indirect": model_file = flair.cache_root / cache_dir / "indirect" / "final-model.pt" if not model_file.exists(): cached_path( "http://www.redewiedergabe.de/models/indirect.zip", cache_dir=cache_dir, ) unzip_file( flair.cache_root / cache_dir / "indirect.zip", flair.cache_root / cache_dir, ) model_path = str(flair.cache_root / cache_dir / "indirect" / "final-model.pt") elif model_name == "de-historic-direct": model_file = flair.cache_root / cache_dir / "direct" / "final-model.pt" if not model_file.exists(): cached_path( "http://www.redewiedergabe.de/models/direct.zip", cache_dir=cache_dir, ) unzip_file( flair.cache_root / cache_dir / "direct.zip", flair.cache_root / cache_dir, ) model_path = str(flair.cache_root / cache_dir / "direct" / "final-model.pt") elif model_name == "de-historic-reported": model_file = flair.cache_root / cache_dir / "reported" / "final-model.pt" if not model_file.exists(): cached_path( "http://www.redewiedergabe.de/models/reported.zip", cache_dir=cache_dir, ) unzip_file( flair.cache_root / cache_dir / "reported.zip", flair.cache_root / cache_dir, ) model_path = str(flair.cache_root / cache_dir / "reported" / "final-model.pt") elif model_name == "de-historic-free-indirect": model_file = flair.cache_root / cache_dir / "freeIndirect" / "final-model.pt" if not model_file.exists(): cached_path( "http://www.redewiedergabe.de/models/freeIndirect.zip", cache_dir=cache_dir, ) unzip_file( flair.cache_root / cache_dir / "freeIndirect.zip", flair.cache_root / cache_dir, ) model_path = str(flair.cache_root / cache_dir / "freeIndirect" / "final-model.pt") # for all other cases (not local file or special download location), use HF model hub else: get_from_model_hub = True # if not a local file, get from model hub if get_from_model_hub: hf_model_name = "pytorch_model.bin" revision = "main" if "@" in model_name: model_name_split = model_name.split("@") revision = model_name_split[-1] model_name = model_name_split[0] # use model name as subfolder if "/" in model_name: model_folder = model_name.split("/", maxsplit=1)[1] else: model_folder = model_name # Lazy import from huggingface_hub import cached_download, hf_hub_url url = hf_hub_url(model_name, revision=revision, filename=hf_model_name) try: model_path = cached_download( url=url, library_name="flair", library_version=flair.__version__, cache_dir=flair.cache_root / "models" / model_folder, ) except HTTPError: # output information log.error("-" * 80) log.error( f"ACHTUNG: The key '{model_name}' was neither found on the ModelHub nor is this a valid path to a file on your system!" ) # log.error(f" - Error message: {e}") log.error( " -> Please check https://huggingface.co/models?filter=flair for all available models." ) log.error( " -> Alternatively, point to a model file on your local drive." ) log.error("-" * 80) Path(flair.cache_root / "models" / model_folder).rmdir() # remove folder again if not valid return model_path
def _fetch_model(model_name) -> str: hu_path: str = "https://nlp.informatik.hu-berlin.de/resources/models" model_map = { # English NER models "ner": "/".join([hu_path, "ner", "en-ner-conll03-v0.4.pt"]), "ner-pooled": "/".join([hu_path, "ner-pooled", "en-ner-conll03-pooled-v0.5.pt"]), "ner-fast": "/".join([hu_path, "ner-fast", "en-ner-fast-conll03-v0.4.pt"]), "ner-ontonotes": "/".join([hu_path, "ner-ontonotes", "en-ner-ontonotes-v0.4.pt"]), "ner-ontonotes-fast": "/".join([hu_path, "ner-ontonotes-fast", "en-ner-ontonotes-fast-v0.4.pt"]), # Multilingual NER models "ner-multi": "/".join([hu_path, "multi-ner", "quadner-large.pt"]), "multi-ner": "/".join([hu_path, "multi-ner", "quadner-large.pt"]), "ner-multi-fast": "/".join([hu_path, "multi-ner-fast", "ner-multi-fast.pt"]), # English POS models "upos": "/".join([hu_path, "upos", "en-pos-ontonotes-v0.4.pt"]), "upos-fast": "/".join([hu_path, "upos-fast", "en-upos-ontonotes-fast-v0.4.pt"]), "pos": "/".join([hu_path, "pos", "en-pos-ontonotes-v0.5.pt"]), "pos-fast": "/".join([hu_path, "pos-fast", "en-pos-ontonotes-fast-v0.5.pt"]), # Multilingual POS models "pos-multi": "/".join([hu_path, "multi-pos", "pos-multi-v0.1.pt"]), "multi-pos": "/".join([hu_path, "multi-pos", "pos-multi-v0.1.pt"]), "pos-multi-fast": "/".join([hu_path, "multi-pos-fast", "pos-multi-fast.pt"]), "multi-pos-fast": "/".join([hu_path, "multi-pos-fast", "pos-multi-fast.pt"]), # English SRL models "frame": "/".join([hu_path, "frame", "en-frame-ontonotes-v0.4.pt"]), "frame-fast": "/".join([hu_path, "frame-fast", "en-frame-ontonotes-fast-v0.4.pt"]), # English chunking models "chunk": "/".join([hu_path, "chunk", "en-chunk-conll2000-v0.4.pt"]), "chunk-fast": "/".join([hu_path, "chunk-fast", "en-chunk-conll2000-fast-v0.4.pt"]), # Danish models "da-pos": "/".join([hu_path, "da-pos", "da-pos-v0.1.pt"]), "da-ner": "/".join([hu_path, "NER-danish", "da-ner-v0.1.pt"]), # German models "de-pos": "/".join([hu_path, "de-pos", "de-pos-ud-hdt-v0.5.pt"]), "de-pos-tweets": "/".join([hu_path, "de-pos-tweets", "de-pos-twitter-v0.1.pt"]), "de-ner": "/".join([hu_path, "de-ner", "de-ner-conll03-v0.4.pt"]), "de-ner-germeval": "/".join([hu_path, "de-ner-germeval", "de-ner-germeval-0.4.1.pt"]), # French models "fr-ner": "/".join([hu_path, "fr-ner", "fr-ner-wikiner-0.4.pt"]), # Dutch models "nl-ner": "/".join([hu_path, "nl-ner", "nl-ner-bert-conll02-v0.5b.pt"]), "nl-ner-rnn": "/".join([hu_path, "nl-ner-rnn", "nl-ner-conll02-v0.5.pt"]), # Malayalam models "ml-pos": "https://raw.githubusercontent.com/qburst/models-repository/master/FlairMalayalamModels/malayalam-xpos-model.pt", "ml-upos": "https://raw.githubusercontent.com/qburst/models-repository/master/FlairMalayalamModels/malayalam-upos-model.pt", # Portuguese models "pt-pos-clinical": "/".join([hu_path, "pt-pos-clinical", "pucpr-flair-clinical-pos-tagging-best-model.pt"]), # Keyphase models "keyphrase": "/".join([hu_path, "keyphrase", "keyphrase-en-scibert.pt"]), "negation-speculation": "/".join( [hu_path, "negation-speculation", "negation-speculation-model.pt"]), # Biomedical models "hunflair-paper-cellline": "/".join( [hu_path, "hunflair_smallish_models", "cellline", "hunflair-celline-v1.0.pt"] ), "hunflair-paper-chemical": "/".join( [hu_path, "hunflair_smallish_models", "chemical", "hunflair-chemical-v1.0.pt"] ), "hunflair-paper-disease": "/".join( [hu_path, "hunflair_smallish_models", "disease", "hunflair-disease-v1.0.pt"] ), "hunflair-paper-gene": "/".join( [hu_path, "hunflair_smallish_models", "gene", "hunflair-gene-v1.0.pt"] ), "hunflair-paper-species": "/".join( [hu_path, "hunflair_smallish_models", "species", "hunflair-species-v1.0.pt"] ), "hunflair-cellline": "/".join( [hu_path, "hunflair_smallish_models", "cellline", "hunflair-celline-v1.0.pt"] ), "hunflair-chemical": "/".join( [hu_path, "hunflair_allcorpus_models", "huner-chemical", "hunflair-chemical-full-v1.0.pt"] ), "hunflair-disease": "/".join( [hu_path, "hunflair_allcorpus_models", "huner-disease", "hunflair-disease-full-v1.0.pt"] ), "hunflair-gene": "/".join( [hu_path, "hunflair_allcorpus_models", "huner-gene", "hunflair-gene-full-v1.0.pt"] ), "hunflair-species": "/".join( [hu_path, "hunflair_allcorpus_models", "huner-species", "hunflair-species-full-v1.1.pt"] )} cache_dir = Path("models") if model_name in model_map: model_name = cached_path(model_map[model_name], cache_dir=cache_dir) # the historical German taggers by the @redewiegergabe project if model_name == "de-historic-indirect": model_file = Path(flair.cache_root) / cache_dir / 'indirect' / 'final-model.pt' if not model_file.exists(): cached_path('http://www.redewiedergabe.de/models/indirect.zip', cache_dir=cache_dir) unzip_file(Path(flair.cache_root) / cache_dir / 'indirect.zip', Path(flair.cache_root) / cache_dir) model_name = str(Path(flair.cache_root) / cache_dir / 'indirect' / 'final-model.pt') if model_name == "de-historic-direct": model_file = Path(flair.cache_root) / cache_dir / 'direct' / 'final-model.pt' if not model_file.exists(): cached_path('http://www.redewiedergabe.de/models/direct.zip', cache_dir=cache_dir) unzip_file(Path(flair.cache_root) / cache_dir / 'direct.zip', Path(flair.cache_root) / cache_dir) model_name = str(Path(flair.cache_root) / cache_dir / 'direct' / 'final-model.pt') if model_name == "de-historic-reported": model_file = Path(flair.cache_root) / cache_dir / 'reported' / 'final-model.pt' if not model_file.exists(): cached_path('http://www.redewiedergabe.de/models/reported.zip', cache_dir=cache_dir) unzip_file(Path(flair.cache_root) / cache_dir / 'reported.zip', Path(flair.cache_root) / cache_dir) model_name = str(Path(flair.cache_root) / cache_dir / 'reported' / 'final-model.pt') if model_name == "de-historic-free-indirect": model_file = Path(flair.cache_root) / cache_dir / 'freeIndirect' / 'final-model.pt' if not model_file.exists(): cached_path('http://www.redewiedergabe.de/models/freeIndirect.zip', cache_dir=cache_dir) unzip_file(Path(flair.cache_root) / cache_dir / 'freeIndirect.zip', Path(flair.cache_root) / cache_dir) model_name = str(Path(flair.cache_root) / cache_dir / 'freeIndirect' / 'final-model.pt') return model_name
def _fetch_model(model_name) -> str: model_map = {} aws_resource_path_v04 = "https://s3.eu-central-1.amazonaws.com/alan-nlp/resources/models-v0.4" hu_path: str = "https://nlp.informatik.hu-berlin.de/resources/models" model_map["ner"] = "/".join([ aws_resource_path_v04, "NER-conll03-english", "en-ner-conll03-v0.4.pt" ]) model_map["ner-fast"] = "/".join([ aws_resource_path_v04, "NER-conll03--h256-l1-b32-p3-0.5-%2Bglove%2Bnews-forward-fast%2Bnews-backward-fast-normal-locked0.5-word0.05--release_4", "en-ner-fast-conll03-v0.4.pt", ]) model_map["ner-ontonotes"] = "/".join([ aws_resource_path_v04, "release-ner-ontonotes-0", "en-ner-ontonotes-v0.4.pt", ]) model_map["ner-ontonotes-fast"] = "/".join([ aws_resource_path_v04, "release-ner-ontonotes-fast-0", "en-ner-ontonotes-fast-v0.4.pt", ]) for key in ["ner-multi", "multi-ner"]: model_map[key] = "/".join([ aws_resource_path_v04, "release-quadner-512-l2-multi-embed", "quadner-large.pt", ]) for key in ["ner-multi-fast", "multi-ner-fast"]: model_map[key] = "/".join( [aws_resource_path_v04, "NER-multi-fast", "ner-multi-fast.pt"]) for key in ["ner-multi-fast-learn", "multi-ner-fast-learn"]: model_map[key] = "/".join([ aws_resource_path_v04, "NER-multi-fast-evolve", "ner-multi-fast-learn.pt", ]) model_map["upos"] = "/".join([ aws_resource_path_v04, "POS-ontonotes--h256-l1-b32-p3-0.5-%2Bglove%2Bnews-forward%2Bnews-backward-normal-locked0.5-word0.05--v0.4_0", "en-pos-ontonotes-v0.4.pt", ]) model_map["pos"] = "/".join([ hu_path, "release-pos-0", "en-pos-ontonotes-v0.5.pt", ]) model_map["upos-fast"] = "/".join([ aws_resource_path_v04, "release-pos-fast-0", "en-pos-ontonotes-fast-v0.4.pt", ]) model_map["pos-fast"] = "/".join([ hu_path, "release-pos-fast-0", "en-pos-ontonotes-fast-v0.5.pt", ]) for key in ["pos-multi", "multi-pos"]: model_map[key] = "/".join([ aws_resource_path_v04, "release-dodekapos-512-l2-multi", "pos-multi-v0.1.pt", ]) for key in ["pos-multi-fast", "multi-pos-fast"]: model_map[key] = "/".join([ aws_resource_path_v04, "UPOS-multi-fast", "pos-multi-fast.pt" ]) model_map["frame"] = "/".join([ aws_resource_path_v04, "release-frame-1", "en-frame-ontonotes-v0.4.pt" ]) model_map["frame-fast"] = "/".join([ aws_resource_path_v04, "release-frame-fast-0", "en-frame-ontonotes-fast-v0.4.pt", ]) model_map["chunk"] = "/".join([ aws_resource_path_v04, "NP-conll2000--h256-l1-b32-p3-0.5-%2Bnews-forward%2Bnews-backward-normal-locked0.5-word0.05--v0.4_0", "en-chunk-conll2000-v0.4.pt", ]) model_map["chunk-fast"] = "/".join([ aws_resource_path_v04, "release-chunk-fast-0", "en-chunk-conll2000-fast-v0.4.pt", ]) model_map["da-pos"] = "/".join( [aws_resource_path_v04, "POS-danish", "da-pos-v0.1.pt"]) model_map["da-ner"] = "/".join( [aws_resource_path_v04, "NER-danish", "da-ner-v0.1.pt"]) model_map["de-pos"] = "/".join( [hu_path, "release-de-pos-0", "de-pos-ud-hdt-v0.5.pt"]) model_map["de-pos-tweets"] = "/".join([ aws_resource_path_v04, "POS-fine-grained-german-tweets", "de-pos-twitter-v0.1.pt", ]) model_map["de-ner"] = "/".join([ aws_resource_path_v04, "release-de-ner-0", "de-ner-conll03-v0.4.pt" ]) model_map["de-ner-germeval"] = "/".join([ aws_resource_path_v04, "NER-germeval", "de-ner-germeval-0.4.1.pt" ]) model_map["fr-ner"] = "/".join([ aws_resource_path_v04, "release-fr-ner-0", "fr-ner-wikiner-0.4.pt" ]) model_map["nl-ner"] = "/".join([ aws_resource_path_v04, "NER-conll2002-dutch", "nl-ner-conll02-v0.1.pt" ]) model_map[ "ml-pos"] = "https://raw.githubusercontent.com/qburst/models-repository/master/FlairMalayalamModels/malayalam-xpos-model.pt" model_map[ "ml-upos"] = "https://raw.githubusercontent.com/qburst/models-repository/master/FlairMalayalamModels/malayalam-upos-model.pt" cache_dir = Path("models") if model_name in model_map: model_name = cached_path(model_map[model_name], cache_dir=cache_dir) # the historical German taggers by the @redewiegergabe project if model_name == "de-historic-indirect": model_file = Path( flair.cache_root) / cache_dir / 'indirect' / 'final-model.pt' if not model_file.exists(): cached_path('http://www.redewiedergabe.de/models/indirect.zip', cache_dir=cache_dir) unzip_file( Path(flair.cache_root) / cache_dir / 'indirect.zip', Path(flair.cache_root) / cache_dir) model_name = str( Path(flair.cache_root) / cache_dir / 'indirect' / 'final-model.pt') if model_name == "de-historic-direct": model_file = Path( flair.cache_root) / cache_dir / 'direct' / 'final-model.pt' if not model_file.exists(): cached_path('http://www.redewiedergabe.de/models/direct.zip', cache_dir=cache_dir) unzip_file( Path(flair.cache_root) / cache_dir / 'direct.zip', Path(flair.cache_root) / cache_dir) model_name = str( Path(flair.cache_root) / cache_dir / 'direct' / 'final-model.pt') if model_name == "de-historic-reported": model_file = Path( flair.cache_root) / cache_dir / 'reported' / 'final-model.pt' if not model_file.exists(): cached_path('http://www.redewiedergabe.de/models/reported.zip', cache_dir=cache_dir) unzip_file( Path(flair.cache_root) / cache_dir / 'reported.zip', Path(flair.cache_root) / cache_dir) model_name = str( Path(flair.cache_root) / cache_dir / 'reported' / 'final-model.pt') if model_name == "de-historic-free-indirect": model_file = Path(flair.cache_root ) / cache_dir / 'freeIndirect' / 'final-model.pt' if not model_file.exists(): cached_path( 'http://www.redewiedergabe.de/models/freeIndirect.zip', cache_dir=cache_dir) unzip_file( Path(flair.cache_root) / cache_dir / 'freeIndirect.zip', Path(flair.cache_root) / cache_dir) model_name = str( Path(flair.cache_root) / cache_dir / 'freeIndirect' / 'final-model.pt') return model_name