def __init__(self, annotator_class='lemmatizer', language='en', component_type='lemmatizer', get_default=False, model=None, nlp_ref='', nlu_ref='', is_licensed=False, loaded_from_pretrained_pipe=False): if model != None: self.model = model else: if 'lemma' in annotator_class: from nlu import SparkNLPLemmatizer if get_default: self.model = SparkNLPLemmatizer.get_default_model() else: self.model = SparkNLPLemmatizer.get_pretrained_model( nlp_ref, language) SparkNLUComponent.__init__( self, annotator_class, component_type, loaded_from_pretrained_pipe=loaded_from_pretrained_pipe)
def __init__(self, annotator_class='default_chunker', language='en', component_type='chunker', get_default=True, nlp_ref='', nlu_ref='', model=None, lang='en', loaded_from_pretrained_pipe=False, is_licensed=False): if model != None: self.model = model else: if annotator_class == 'default_chunker': from nlu import DefaultChunker if get_default: self.model = DefaultChunker.get_default_model() else: self.model = DefaultChunker.get_default_model( ) # there are no pretrained chunkers, only default 1 if annotator_class == 'ngram': from nlu import NGram if get_default: self.model = NGram.get_default_model() else: self.model = NGram.get_default_model( ) # there are no pretrained chunkers, only default 1 if annotator_class == 'contextual_parser': from nlu.components.chunkers.contextual_parser.contextual_parser import ContextualParser if get_default: self.model = ContextualParser.get_default_model() else: self.model = ContextualParser.get_default_model() SparkNLUComponent.__init__(self, annotator_class, component_type, nlu_ref, nlp_ref, lang, loaded_from_pretrained_pipe, is_licensed)
def __init__(self, annotator_class='relation_extractor', lang='en', component_type='relation_extractor', get_default=True, model=None, nlp_ref='', nlu_ref='', trainable=False, is_licensed=False, loaded_from_pretrained_pipe=False): if 're_' in nlp_ref: annotator_class = 'relation_extractor' if 'redl' in nlp_ref: annotator_class = 'relation_extractor_dl' if model != None: self.model = model else: if annotator_class == 'relation_extractor': from nlu.components.relation_extractors.relation_extractor.relation_extractor import RelationExtraction if trainable: self.model = RelationExtraction.get_default_trainable_model( ) else: self.model = RelationExtraction.get_pretrained_model( nlp_ref, lang, 'clinical/models') elif annotator_class == 'relation_extractor_dl': from nlu.components.relation_extractors.relation_extractor_dl.relation_extractor_dl import RelationExtractionDL # if trainable : self.model = RelationExtractionDL.get_default_trainable_model() self.model = RelationExtractionDL.get_pretrained_model( nlp_ref, lang, 'clinical/models') SparkNLUComponent.__init__(self, annotator_class, component_type, nlu_ref, nlp_ref, lang, loaded_from_pretrained_pipe, is_licensed)
def __init__(self, annotator_class='default_tokenizer', language='en', component_type='tokenizer', get_default=True, nlp_ref='', nlu_ref='', lang='en', model=None, is_licensed=False, loaded_from_pretrained_pipe=False): if 'segment_words' in nlu_ref: annotator_class = 'word_segmenter' elif 'token' in annotator_class and language in nlu.AllComponentsInfo().all_right_to_left_langs_with_pretrained_tokenizer: annotator_class = 'word_segmenter' if model != None: self.model = model elif annotator_class == 'default_tokenizer': from nlu import DefaultTokenizer if get_default: self.model = DefaultTokenizer.get_default_model() else: self.model = DefaultTokenizer.get_default_model() # there are no pretrained tokenizrs, only default 1 elif annotator_class == 'word_segmenter': from nlu import WordSegmenter if get_default and language == '': self.model = WordSegmenter.get_default_model() elif get_default and language != '': self.model = WordSegmenter.get_default_model_for_lang(language) else: self.model = WordSegmenter.get_pretrained_model(nlp_ref, language) # there are no pretrained tokenizrs, only default 1 SparkNLUComponent.__init__(self, annotator_class, component_type, nlu_ref=nlu_ref, nlp_ref=nlp_ref, loaded_from_pretrained_pipe=True, lang=lang)
def __init__(self, annotator_class='chunk_embedder', language='en', component_type='embeddings_chunk', get_default = True, nlp_ref='', model=None, nlu_ref='',lang='en',loaded_from_pretrained_pipe=False): if model != None : self.model = model else : if annotator_class == 'chunk_embedder' : from nlu import ChunkEmbedder if get_default : self.model = ChunkEmbedder.get_default_model() else : self.model = ChunkEmbedder.get_default_model() # there are no pretrained chunkers, only default 1 SparkNLUComponent.__init__(self, annotator_class, component_type, nlu_ref,nlp_ref)
def __init__(self, annotator_class='date_matcher', language='en', component_type='matcher', get_default=False, nlp_ref='', model=None, nlu_ref='', dataset='', is_licensed=False, loaded_from_pretrained_pipe=False): if 'date' in nlp_ref or 'date' in nlu_ref: annotator_class = 'date_matcher' elif 'regex' in nlp_ref or 'regex' in nlu_ref: annotator_class = 'regex_matcher' elif 'context' in nlu_ref: annotator_class = 'context_parser' elif 'text' in nlp_ref or 'text' in nlu_ref: annotator_class = 'text_matcher' elif '_matcher' not in annotator_class: annotator_class = annotator_class + '_matcher' if model != None: self.model = model else: if 'context' in annotator_class: from nlu.components.matchers.context_parser.context_parser import ContextParser is_licensed = True if get_default: self.model = ContextParser.get_default_model() else: self.model = ContextParser.get_default_model() elif 'text' in annotator_class: from nlu import TextMatcher if get_default or nlp_ref == 'text_matcher': self.model = TextMatcher.get_default_model() else: self.model = TextMatcher.get_pretrained_model( nlp_ref, language) elif 'date' in annotator_class: from nlu import DateMatcher from nlu.components.matchers.date_matcher.date_matcher import DateMatcher as DateM if get_default: self.model = DateM.get_default_model() else: self.model = DateM.get_default_model() elif 'regex' in annotator_class: from nlu import RegexMatcher if get_default: self.model = RegexMatcher.get_default_model() else: self.model = RegexMatcher.get_pretrained_model( nlu_ref, language) SparkNLUComponent.__init__(self, annotator_class, component_type, nlu_ref, nlp_ref, language, loaded_from_pretrained_pipe, is_licensed)
def __init__(self, annotator_class='deidentifier', lang='en', component_type='deidentifier', get_default=False, model = None, nlp_ref ='', nlu_ref='', trainable=False, is_licensed=True,loaded_from_pretrained_pipe=False): annotator_class= 'deidentifier' if model != None : self.model = model else : if annotator_class == 'deidentifier': from nlu.components.deidentifiers.deidentifier.deidentifier import Deidentifier if get_default : self.model = Deidentifier.get_default_model() else : self.model = Deidentifier.get_pretrained_model(nlp_ref, lang) SparkNLUComponent.__init__(self, annotator_class, component_type, nlu_ref, nlp_ref, lang,loaded_from_pretrained_pipe , is_licensed)
def __init__(self, annotator_class='document_assembler', component_type='util', model=None, loaded_from_pretrained_pipe=False, nlu_ref='', nlp_ref='', lang='en', is_licensed=False): # super(Tokenizer,self).__init__(annotator_class = annotator_class, component_type = component_type) if annotator_class == 'ner_converter': annotator_class = 'ner_to_chunk_converter' if model != None: self.model = model else: if annotator_class == 'document_assembler': from nlu import SparkNlpDocumentAssembler self.model = SparkNlpDocumentAssembler.get_default_model() elif annotator_class == 'deep_sentence_detector': from nlu import SentenceDetectorDeep self.model = SentenceDetectorDeep.get_default_model() elif annotator_class == 'sentence_detector': from nlu import SparkNLPSentenceDetector self.model = SparkNLPSentenceDetector.get_default_model() elif annotator_class == 'ner_to_chunk_converter': from nlu import NerToChunkConverter self.model = NerToChunkConverter.get_default_model() elif annotator_class == 'sentence_embeddings': from nlu import SparkNLPSentenceEmbeddings self.model = SparkNLPSentenceEmbeddings.get_default_model() elif annotator_class == 'feature_assembler': from nlu.components.utils.feature_assembler.feature_assembler import SparkNLPFeatureAssembler self.model = SparkNLPFeatureAssembler.get_default_model() elif annotator_class == 'ner_to_chunk_converter_licensed': from nlu.components.utils.ner_to_chunk_converter_licensed.ner_to_chunk_converter_licensed import NerToChunkConverterLicensed self.model = NerToChunkConverterLicensed.get_default_model() elif annotator_class == 'chunk_merger': from nlu.components.utils.chunk_merger.chunk_merger import ChunkMerger self.model = ChunkMerger.get_default_model() elif annotator_class == 'doc2chunk': from nlu.components.utils.doc2chunk.doc_2_chunk import Doc_2_Chunk self.model = Doc_2_Chunk.get_default_model() elif annotator_class == 'chunk_2_doc': from nlu.components.utils.chunk_2_doc.doc_2_chunk import Chunk_2_Doc self.model = Chunk_2_Doc.get_default_model() SparkNLUComponent.__init__( self, annotator_class, component_type, nlu_ref, lang, loaded_from_pretrained_pipe=loaded_from_pretrained_pipe)
def __init__(self, annotator_class='context_spell', language='en', component_type='spell_checker', get_default=True, model=None, nlp_ref='', dataset='', nlu_ref='', is_licensed=False, loaded_from_pretrained_pipe=True): if 'context' in nlu_ref: annotator_class = 'context_spell' elif 'norvig' in nlu_ref: annotator_class = 'norvig_spell' elif 'spellcheck_dl' in nlp_ref: annotator_class = 'context_spell' elif 'spell.med' in nlu_ref: annotator_class = 'context_spell' elif 'spell.clinical' in nlu_ref: annotator_class = 'context_spell' elif '.med' in nlu_ref: annotator_class = 'context_spell' if model != None: self.model = model else: if 'context' in annotator_class: from nlu import ContextSpellChecker if is_licensed: self.model = ContextSpellChecker.get_pretrained_model( nlp_ref, language, bucket='clinical/models') elif get_default: self.model = ContextSpellChecker.get_default_model() else: self.model = ContextSpellChecker.get_pretrained_model( nlp_ref, language) elif 'norvig' in annotator_class: from nlu import NorvigSpellChecker if get_default: self.model = NorvigSpellChecker.get_default_model() else: self.model = NorvigSpellChecker.get_pretrained_model( nlp_ref, language) elif 'symmetric' in annotator_class: from nlu import SymmetricSpellChecker if get_default: self.model = SymmetricSpellChecker.get_default_model() else: self.model = SymmetricSpellChecker.get_pretrained_model( nlp_ref, language) SparkNLUComponent.__init__( self, annotator_class, component_type, loaded_from_pretrained_pipe=loaded_from_pretrained_pipe)
def __init__(self, annotator_class='sentiment_dl', component_type='classifier', model=None): self.model = model SparkNLUComponent.__init__(self, annotator_class, component_type) # Make sure input/output cols match up with NLU defaults if len(self.info.spark_input_column_names) == 1: model.setInputCols(self.info.spark_input_column_names[0]) else: model.setInputCols(self.info.spark_input_column_names) if len(self.info.spark_output_column_names) == 1: model.setOutputCol(self.info.spark_output_column_names[0]) else: model.setOutputCol(self.info.spark_output_column_names)
def __init__(self, annotator_class='stemmer', component_type='stemmer', model=None, nlu_ref='', nlp_ref='', loaded_from_pretrained_pipe=False): if model != None: self.model = model else: if annotator_class == 'stemmer': from nlu import SparkNLPStemmer self.model = SparkNLPStemmer.get_default_model() SparkNLUComponent.__init__( self, annotator_class, component_type, loaded_from_pretrained_pipe=loaded_from_pretrained_pipe)
def __init__(self, annotator_class='normalizer', language='en', component_type='normalizer', get_default=True, nlp_ref='', nlu_ref='', model=None, is_licensed=False, loaded_from_pretrained_pipe=False): if model != None: self.model = model else: if 'norm_document' in nlu_ref: annotator_class = 'document_normalizer' elif 'drug' in nlu_ref: annotator_class = 'drug_normalizer' elif 'norm' in nlu_ref: annotator_class = 'normalizer' if annotator_class == 'normalizer': from nlu import SparkNLPNormalizer if get_default: self.model = SparkNLPNormalizer.get_default_model() else: self.model = SparkNLPNormalizer.get_pretrained_model( nlp_ref, language ) # there is no pretrained API for Normalizer in SparkNLP yet elif annotator_class == 'document_normalizer': from nlu import SparkNLPDocumentNormalizer if get_default: self.model = SparkNLPDocumentNormalizer.get_default_model() else: self.model = SparkNLPDocumentNormalizer.get_pretrained_model( nlp_ref, language ) # there is no pretrained API for Normalizer in SparkNLP yet elif annotator_class == 'drug_normalizer': from nlu.components.normalizers.drug_normalizer.drug_normalizer import DrugNorm is_licensed = True if get_default: self.model = DrugNorm.get_default_model() SparkNLUComponent.__init__( self, annotator_class, component_type, loaded_from_pretrained_pipe=loaded_from_pretrained_pipe)
def __init__(self, annotator_class='t5', language ='en', component_type='seq2seq', get_default=True, model = None, nlp_ref ='', nlu_ref ='',dataset='', configs='', is_licensed=False,loaded_from_pretrained_pipe=False): if 't5' in nlu_ref or 't5' in nlp_ref: annotator_class = 't5' elif 'marian' in nlu_ref or 'marian' in nlp_ref: annotator_class = 'marian' elif 'translate_to' in nlu_ref or 'translate_to' in nlp_ref or 'translate_to' in annotator_class: annotator_class = 'marian' if model != None : self.model = model else : if 't5' in annotator_class : from nlu import T5 if is_licensed : self.model = T5.get_pretrained_model(nlp_ref, language, bucket='clinical/models') elif get_default: self.model = T5.get_default_model() elif configs !='' : self.model = T5.get_preconfigured_model(nlp_ref,language,configs) else : self.model = T5.get_pretrained_model(nlp_ref, language) elif 'marian' in annotator_class : from nlu import Marian if get_default : self.model = Marian.get_default_model() else : self.model = Marian.get_pretrained_model(nlp_ref, language) SparkNLUComponent.__init__(self, annotator_class, component_type,loaded_from_pretrained_pipe=loaded_from_pretrained_pipe)
def __init__(self, annotator_class='sentence_entity_resolver', language='en', component_type='resolution', get_default=True, model=None, nlp_ref='', nlu_ref='', trainable=False, is_licensed=True, loaded_from_pretrained_pipe=False): if 'resolve' in nlu_ref and 'resolve_chunk' not in nlu_ref: annotator_class = 'sentence_entity_resolver' if 'resolve_chunk' in nlu_ref: annotator_class = 'chunk_entity_resolver' if model != None: self.model = model else: if annotator_class == 'sentence_entity_resolver': from nlu.components.resolutions.sentence_entity_resolver.sentence_resolver import SentenceResolver if trainable: self.model = SentenceResolver.get_default_trainable_model() elif get_default: self.model = SentenceResolver.get_default_model() else: self.model = SentenceResolver.get_pretrained_model( nlp_ref, language) elif annotator_class == 'chunk_entity_resolver': from nlu.components.resolutions.chunk_entity_resolver.chunk_resolver import ChunkResolver if trainable: self.model = ChunkResolver.get_default_trainable_model() elif get_default: self.model = ChunkResolver.get_default_model() else: self.model = ChunkResolver.get_pretrained_model( nlp_ref, language) SparkNLUComponent.__init__(self, annotator_class, component_type, nlu_ref, nlp_ref, language, loaded_from_pretrained_pipe, is_licensed)
def __init__(self, annotator_class='sentence_detector', language='en', component_type='sentence_detector', get_default=True, model=None, nlp_ref='', nlu_ref='', trainable=False, is_licensed=False, lang='en', loaded_from_pretrained_pipe=False): if annotator_class == 'sentence_detector' and 'pragmatic' not in nlu_ref: annotator_class = 'deep_sentence_detector' #default else: annotator_class = 'pragmatic_sentence_detector' if model != None: self.model = model else: if annotator_class == 'deep_sentence_detector' or 'ner_dl' in nlp_ref: from nlu import SentenceDetectorDeep if trainable: self.model = SentenceDetectorDeep.get_trainable_model() elif get_default: self.model = SentenceDetectorDeep.get_default_model() else: self.model = SentenceDetectorDeep.get_pretrained_model( nlp_ref, language) elif annotator_class == 'pragmatic_sentence_detector': from nlu import PragmaticSentenceDetector if get_default: self.model = PragmaticSentenceDetector.get_default_model() SparkNLUComponent.__init__( self, annotator_class, component_type, nlu_ref, lang, loaded_from_pretrained_pipe=loaded_from_pretrained_pipe)
def __init__(self, annotator_class='assertion_dl', lang='en', component_type='assertion', get_default=True, model=None, nlp_ref='', nlu_ref='', trainable=False, is_licensed=False, loaded_from_pretrained_pipe=False): if 'jsl' in nlu_ref: annotator_class = 'assertion_dl' if model != None: self.model = model else: if annotator_class == 'assertion_dl': from nlu.components.assertions.assertion_dl.assertion_dl import AssertionDL if trainable: self.model = AssertionDL.get_default_trainable_model() elif get_default: self.model = AssertionDL.get_default_model() else: self.model = AssertionDL.get_pretrained_model( nlp_ref, lang) elif annotator_class == 'assertion_log_reg': from nlu.components.assertions.assertion_log_reg.assertion_log_reg import AssertionLogReg if trainable: self.model = AssertionLogReg.get_default_trainable_model() elif get_default: self.model = AssertionLogReg.get_default_model() else: self.model = AssertionLogReg\ .get_pretrained_model(nlp_ref, lang) SparkNLUComponent.__init__(self, annotator_class, component_type, nlu_ref, nlp_ref, lang, loaded_from_pretrained_pipe, is_licensed)
def __init__(self, annotator_class='unlabeled_dependency_parser', language='en', component_type='dependency_untyped', get_default=True, nlp_ref='', nlu_ref='', model=None, loaded_from_pretrained_pipe=False, is_licensed=False): if model != None: self.model = model elif 'dep' in annotator_class or 'dep.untyped' in annotator_class or annotator_class == 'unlabeled_dependency_parser': from nlu.components.dependency_untypeds.unlabeled_dependency_parser.unlabeled_dependency_parser import UnlabeledDependencyParser if get_default: self.model = UnlabeledDependencyParser.get_default_model() else: self.model = UnlabeledDependencyParser.get_pretrained_model( nlp_ref, language) SparkNLUComponent.__init__(self, annotator_class, component_type, nlu_ref, nlp_ref, language, loaded_from_pretrained_pipe, is_licensed)
def __init__(self, annotator_class='stopwordcleaner', lang='en', component_type='stopwordscleaner', get_default=False, model=None, nlp_ref='', nlu_ref='', loaded_from_pretrained_pipe=False, is_licensed=False): if model != None: self.model = model else: if 'stop' in annotator_class: from nlu import NLUStopWordcleaner if get_default: self.model = NLUStopWordcleaner.get_default_model() else: self.model = NLUStopWordcleaner.get_pretrained_model( nlp_ref, lang) SparkNLUComponent.__init__(self, annotator_class, component_type, nlu_ref, nlp_ref, lang, loaded_from_pretrained_pipe, is_licensed)
def __init__(self, annotator_class='glove', lang='en', component_type='embedding', get_default=True, model=None, nlp_ref='', nlu_ref='', is_licensed=False, resolution_ref='', loaded_from_pretrained_pipe=False, do_ref_checks=True): if do_ref_checks: if 'use' in nlu_ref and 'bert' not in nlu_ref or 'tfhub_use' in nlp_ref and 'bert' not in nlp_ref: annotator_class = 'use' # first check for sentence then token embeddings. elif 'longformer' in nlu_ref: annotator_class = 'longformer' elif 'doc2vec' in nlu_ref: annotator_class = 'doc2vec' elif 'sent' in nlu_ref and 'xlm_roberta' in nlu_ref: annotator_class = 'sentence_xlm' elif 'xlm' in nlu_ref or 'xlm' in nlp_ref: annotator_class = 'xlm' elif 'roberta' in nlu_ref or 'roberta' in nlp_ref: annotator_class = 'roberta' elif 'distil' in nlu_ref or 'distil' in nlp_ref: annotator_class = 'distil_bert' elif 'bert' in nlp_ref and 'albert' not in nlp_ref and 'sent' in nlp_ref: annotator_class = 'sentence_bert' elif 'bert' in nlu_ref and 'albert' not in nlu_ref and 'sent' in nlu_ref: annotator_class = 'sentence_bert' elif 'bert' in nlu_ref and 'use' in nlu_ref: annotator_class = 'sentence_bert' elif 'elmo' in nlp_ref: annotator_class = 'elmo' elif 'elmo' in nlu_ref: annotator_class = 'elmo' elif 'electra' in nlp_ref and 'sent' in nlp_ref: annotator_class = 'sentence_bert' elif 'electra' in nlu_ref and 'sent' in nlu_ref: annotator_class = 'sentence_bert' elif 'bert' in nlu_ref and 'albert' not in nlu_ref: annotator_class = 'bert' elif 'bert' in nlp_ref and 'albert' not in nlp_ref: annotator_class = 'bert' elif 'electra' in nlu_ref or 'electra' in nlp_ref: annotator_class = 'bert' elif 'labse' in nlu_ref or 'labse' in nlp_ref: annotator_class = 'sentence_bert' elif 'tfhub' in nlu_ref or 'tfhub' in nlp_ref: annotator_class = 'use' elif 'glove' in nlu_ref or 'glove' in nlp_ref: annotator_class = 'glove' elif 'cc_300d' in nlu_ref or 'cc_300d' in nlp_ref: annotator_class = 'glove' elif 'albert' in nlu_ref or 'albert' in nlp_ref: annotator_class = 'albert' elif 'xlnet' in nlu_ref or 'xlnet' in nlp_ref: annotator_class = 'xlnet' # Default component models for nlu actions that dont specify a particular model elif 'embed_sentence' in nlu_ref: annotator_class = 'glove' elif 'embed' in nlu_ref: annotator_class = 'glove' if model != None: self.model = model else: # Check if this lang has embeddings, if NOT set to multi lang xx! multi_lang_embeds = ['th'] if lang in multi_lang_embeds: lang = 'xx' if 'sentence_xlm' == annotator_class: from nlu import Sentence_XLM if get_default: self.model = Sentence_XLM.get_default_model() else: self.model = Sentence_XLM.get_pretrained_model( nlp_ref, lang) elif 'doc2vec' == annotator_class: from nlu import Doc2Vec if get_default: self.model = Doc2Vec.get_default_model() else: self.model = Doc2Vec.get_pretrained_model(nlp_ref, lang) elif 'longformer' == annotator_class: from nlu import Longformer if get_default: self.model = Longformer.get_default_model() else: self.model = Longformer.get_pretrained_model(nlp_ref, lang) elif 'xlm' == annotator_class: from nlu import XLM if get_default: self.model = XLM.get_default_model() else: self.model = XLM.get_pretrained_model(nlp_ref, lang) elif 'roberta' == annotator_class: from nlu import Roberta if get_default: self.model = Roberta.get_default_model() else: self.model = Roberta.get_pretrained_model(nlp_ref, lang) elif 'distil_bert' == annotator_class: from nlu import DistilBert if get_default: self.model = DistilBert.get_default_model() else: self.model = DistilBert.get_pretrained_model(nlp_ref, lang) elif 'albert' == annotator_class: from nlu import SparkNLPAlbert if get_default: self.model = SparkNLPAlbert.get_default_model() else: self.model = SparkNLPAlbert.get_pretrained_model( nlp_ref, lang) elif 'bert' in annotator_class and 'sent' in annotator_class: from nlu import BertSentence if get_default: self.model = BertSentence.get_default_model() elif is_licensed: self.model = BertSentence.get_pretrained_model( nlp_ref, lang, 'clinical/models') else: self.model = BertSentence.get_pretrained_model( nlp_ref, lang) elif 'electra' in annotator_class and 'sent' in annotator_class: from nlu import BertSentence if get_default: self.model = BertSentence.get_default_model() elif is_licensed: self.model = BertSentence.get_pretrained_model( nlp_ref, lang, 'clinical/models') else: self.model = BertSentence.get_pretrained_model( nlp_ref, lang) elif 'bert' in annotator_class: from nlu import SparkNLPBert if get_default: self.model = SparkNLPBert.get_default_model() elif is_licensed: self.model = SparkNLPBert.get_pretrained_model( nlp_ref, lang, 'clinical/models') else: self.model = SparkNLPBert.get_pretrained_model( nlp_ref, lang) elif 'elmo' in annotator_class: from nlu import SparkNLPElmo if get_default: self.model = SparkNLPElmo.get_default_model() else: self.model = SparkNLPElmo.get_pretrained_model( nlp_ref, lang) elif 'xlnet' in annotator_class: from nlu import SparkNLPXlnet if get_default: self.model = SparkNLPXlnet.get_default_model() else: self.model = SparkNLPXlnet.get_pretrained_model( nlp_ref, lang) elif 'use' in annotator_class: from nlu import SparkNLPUse if get_default: self.model = SparkNLPUse.get_default_model() else: self.model = SparkNLPUse.get_pretrained_model( nlp_ref, lang) elif 'glove' in annotator_class: from nlu import Glove if annotator_class == 'glove' and get_default == True: self.model = Glove.get_default_model() else: if get_default: self.model = Glove.get_default_model() elif is_licensed: self.model = Glove.get_pretrained_model( nlp_ref, lang, 'clinical/models') else: if nlp_ref == 'glove_840B_300' or nlp_ref == 'glove_6B_300': # if lang=='en' and nlp_ref=='glove_6B_300': #special case lang = 'xx' # For these particular Glove embeddings, anyreference to them is actually the reference to the multilingual onces self.model = Glove.get_pretrained_model( nlp_ref, lang) else: self.model = Glove.get_pretrained_model( nlp_ref, lang) SparkNLUComponent.__init__( self, annotator_class, component_type, nlu_ref, nlp_ref, lang, loaded_from_pretrained_pipe=loaded_from_pretrained_pipe)
def __init__(self, annotator_class='sentiment_dl', language='en', component_type='classifier', get_default=True, model=None, nlp_ref='', nlu_ref='', trainable=False, is_licensed=False, do_ref_checks=True, loaded_from_pretrained_pipe=False): if do_ref_checks: if 'e2e' in nlu_ref or 'toxic' in nlu_ref: annotator_class = 'multi_classifier' elif 'e2e' in nlp_ref or 'toxic' in nlp_ref: annotator_class = 'multi_classifier' elif 'distilbert_sequence' in nlp_ref or 'distilbert_sequence' in nlu_ref: annotator_class = 'seq_distilbert' elif 'bert_sequence' in nlp_ref or 'bert_sequence' in nlu_ref: annotator_class = 'seq_bert' elif 'token_bert' in nlp_ref or 'token_bert' in nlu_ref: annotator_class = 'token_bert' elif 'token_distilbert' in nlp_ref or 'token_distilbert' in nlu_ref: annotator_class = 'token_distilbert' elif 'token_distilroberta' in nlp_ref or 'token_distilroberta' in nlu_ref: annotator_class = 'token_roberta' elif 'token_xlm_roberta' in nlp_ref or 'token_xlm_roberta' in nlu_ref: annotator_class = 'token_xlm_roberta' elif 'token_roberta' in nlp_ref or 'token_roberta' in nlu_ref: annotator_class = 'token_roberta' elif 'token_albert' in nlp_ref or 'token_albert' in nlu_ref: annotator_class = 'token_albert' elif 'token_xlnet' in nlp_ref or 'token_xlnet' in nlu_ref: annotator_class = 'token_xlnet' elif 'token_longformer' in nlp_ref or 'token_longformer' in nlu_ref: annotator_class = 'token_longformer' elif 'multiclassifierdl' in nlp_ref: annotator_class = 'multi_classifier' elif 'classifierdl' in nlp_ref: annotator_class = 'classifier_dl' elif 'yake' in nlu_ref: annotator_class = 'yake' elif 'yake' in nlp_ref: annotator_class = 'yake' elif 'sentimentdl' in nlp_ref: annotator_class = 'sentiment_dl' elif 'vivekn' in nlp_ref or 'vivekn' in nlp_ref: annotator_class = 'vivekn_sentiment' elif 'wiki_' in nlu_ref or 'wiki_' in nlp_ref: annotator_class = 'language_detector' elif 'pos' in nlu_ref and 'ner' not in nlu_ref: annotator_class = 'pos' elif 'pos' in nlp_ref and 'ner' not in nlp_ref: annotator_class = 'pos' elif 'icd' in nlu_ref and 'med_ner' not in nlu_ref: annotator_class = 'classifier_dl' elif 'med_ner' in nlu_ref: annotator_class = 'ner_healthcare' elif 'generic_classifier' in nlu_ref: annotator_class = 'generic_classifier' elif 'ner' in nlu_ref and 'generic' not in nlu_ref: annotator_class = 'ner' elif 'ner' in nlp_ref and 'generic' not in nlp_ref: annotator_class = 'ner' if model != None: self.model = model from sparknlp.annotator import NerDLModel, NerCrfModel if isinstance(self.model, (NerDLModel, NerCrfModel)): self.model.setIncludeConfidence(True) elif is_licensed: from sparknlp_jsl.annotator import MedicalNerModel if isinstance(self.model, MedicalNerModel): self.model.setIncludeConfidence(True) else: if 'seq_distilbert' == annotator_class: from nlu import SeqDilstilBertClassifier if get_default: self.model = SeqDilstilBertClassifier.get_default_model() elif is_licensed: self.model = SeqDilstilBertClassifier.get_pretrained_model( nlp_ref, language, 'clinical/models') else: self.model = SeqDilstilBertClassifier.get_pretrained_model( nlp_ref, language) elif 'seq_bert' == annotator_class: from nlu import SeqBertClassifier if get_default: self.model = SeqBertClassifier.get_default_model() elif is_licensed: self.model = SeqBertClassifier.get_pretrained_model( nlp_ref, language, 'clinical/models') else: self.model = SeqBertClassifier.get_pretrained_model( nlp_ref, language) elif 'sentiment' in annotator_class and 'vivekn' not in annotator_class: from nlu import SentimentDl if trainable: self.model = SentimentDl.get_default_trainable_model() elif is_licensed: self.model = SentimentDl.get_pretrained_model( nlp_ref, language, bucket='clinical/models') elif get_default: self.model = SentimentDl.get_default_model() else: self.model = SentimentDl.get_pretrained_model( nlp_ref, language) elif 'token_distilbert' == annotator_class: from nlu import TokenDistilBert if get_default: self.model = TokenDistilBert.get_default_model() elif is_licensed: self.model = TokenDistilBert.get_pretrained_model( nlp_ref, language, 'clinical/models') else: self.model = TokenDistilBert.get_pretrained_model( nlp_ref, language) elif 'token_bert' == annotator_class: from nlu import TokenBert if get_default: self.model = TokenBert.get_default_model() elif is_licensed: self.model = TokenBert.get_pretrained_model( nlp_ref, language, 'clinical/models') else: self.model = TokenBert.get_pretrained_model( nlp_ref, language) elif 'token_xlm_roberta' == annotator_class: from nlu import TokenXlmRoBerta if get_default: self.model = TokenXlmRoBerta.get_default_model() elif is_licensed: self.model = TokenXlmRoBerta.get_pretrained_model( nlp_ref, language, 'clinical/models') else: self.model = TokenXlmRoBerta.get_pretrained_model( nlp_ref, language) elif 'token_roberta' == annotator_class: from nlu import TokenRoBerta if get_default: self.model = TokenRoBerta.get_default_model() elif is_licensed: self.model = TokenRoBerta.get_pretrained_model( nlp_ref, language, 'clinical/models') else: self.model = TokenRoBerta.get_pretrained_model( nlp_ref, language) elif 'token_albert' == annotator_class: from nlu import TokenAlbert if get_default: self.model = TokenAlbert.get_default_model() elif is_licensed: self.model = TokenAlbert.get_pretrained_model( nlp_ref, language, 'clinical/models') else: self.model = TokenAlbert.get_pretrained_model( nlp_ref, language) elif 'token_longformer' == annotator_class: from nlu import TokenLongFormer if get_default: self.model = TokenLongFormer.get_default_model() elif is_licensed: self.model = TokenLongFormer.get_pretrained_model( nlp_ref, language, 'clinical/models') else: self.model = TokenLongFormer.get_pretrained_model( nlp_ref, language) elif 'token_xlnet' == annotator_class: from nlu import TokenXlnet if get_default: self.model = TokenXlnet.get_default_model() elif is_licensed: self.model = TokenXlnet.get_pretrained_model( nlp_ref, language, 'clinical/models') else: self.model = TokenXlnet.get_pretrained_model( nlp_ref, language) elif 'generic_classifier' in annotator_class: from nlu.components.classifiers.generic_classifier.generic_classifier import GenericClassifier if trainable: self.model = GenericClassifier.get_default_trainable_model( ) else: self.model = GenericClassifier.get_pretrained_model( nlp_ref, language, bucket='clinical/models') elif 'vivekn' in annotator_class: from nlu import ViveknSentiment if get_default: self.model = ViveknSentiment.get_default_model() else: self.model = ViveknSentiment.get_pretrained_model( nlp_ref, language) elif 'ner' in annotator_class and 'ner_healthcare' not in annotator_class: from nlu import NERDL if trainable: self.model = NERDL.get_default_trainable_model() elif is_licensed: self.model = NERDL.get_pretrained_model( nlp_ref, language, bucket='clinical/models') elif get_default: self.model = NERDL.get_default_model() else: self.model = NERDL.get_pretrained_model(nlp_ref, language) if hasattr(self, 'model'): self.model.setIncludeConfidence(True) elif 'ner.crf' in annotator_class: from nlu import NERDLCRF if get_default: self.model = NERDLCRF.get_default_model() else: self.model = NERDLCRF.get_pretrained_model( nlp_ref, language) if hasattr(self, 'model'): self.model.setIncludeConfidence(True) elif ('classifier_dl' in annotator_class or annotator_class == 'toxic') and not 'multi' in annotator_class: from nlu import ClassifierDl if trainable: self.model = ClassifierDl.get_trainable_model() elif is_licensed: self.model = ClassifierDl.get_pretrained_model( nlp_ref, language, bucket='clinical/models') elif get_default: self.model = ClassifierDl.get_default_model() else: self.model = ClassifierDl.get_pretrained_model( nlp_ref, language) if hasattr(self.model, 'setIncludeConfidence'): self.model.setIncludeConfidence(True) elif 'language_detector' in annotator_class: from nlu import LanguageDetector if get_default: self.model = LanguageDetector.get_default_model() else: self.model = LanguageDetector.get_pretrained_model( nlp_ref, language) elif 'pos' in annotator_class: from nlu import PartOfSpeechJsl if trainable: self.model = PartOfSpeechJsl.get_default_trainable_model() elif get_default: self.model = PartOfSpeechJsl.get_default_model() elif is_licensed: self.model = PartOfSpeechJsl.get_pretrained_model( nlp_ref, language, bucket='clinical/models') else: self.model = PartOfSpeechJsl.get_pretrained_model( nlp_ref, language) elif 'yake' in annotator_class: from nlu import Yake self.model = Yake.get_default_model() elif 'multi_classifier' in annotator_class: from nlu import MultiClassifier if trainable: self.model = MultiClassifier.get_default_trainable_model() elif get_default: self.model = MultiClassifier.get_default_model() else: self.model = MultiClassifier.get_pretrained_model( nlp_ref, language) elif 'ner_healthcare' in annotator_class: from nlu.components.classifiers.ner_healthcare.ner_dl_healthcare import NERDLHealthcare if trainable: self.model = NERDLHealthcare.get_default_trainable_model() else: self.model = NERDLHealthcare.get_pretrained_model( nlp_ref, language, bucket='clinical/models') SparkNLUComponent.__init__(self, annotator_class, component_type, nlu_ref, nlp_ref, language, loaded_from_pretrained_pipe, is_licensed)