def __init__(self, annotator_class='default_tokenizer', language='en', component_type='tokenizer', get_default=True, nlp_ref='', nlu_ref='', model=None): if 'segment_words' in nlu_ref: annotator_class = 'word_segmenter' elif 'token' in annotator_class and language in nlu.AllComponentsInfo( ).all_right_to_left_langs_with_pretrained_tokenizer: annotator_class = 'word_segmenter' elif 'token' in annotator_class and not 'regex' in annotator_class: annotator_class = 'default_tokenizer' if model != None: self.model = model elif annotator_class == 'default_tokenizer': from nlu import DefaultTokenizer if get_default: self.model = DefaultTokenizer.get_default_model() else: self.model = DefaultTokenizer.get_default_model( ) # there are no pretrained tokenizrs, only default 1 elif annotator_class == 'word_segmenter': from nlu import WordSegmenter if get_default and language == '': self.model = WordSegmenter.get_default_model() elif get_default and language != '': self.model = WordSegmenter.get_default_model_for_lang(language) else: self.model = WordSegmenter.get_pretrained_model( nlp_ref, language ) # there are no pretrained tokenizrs, only default 1 SparkNLUComponent.__init__(self, annotator_class, component_type)
def __init__(self, annotator_class='normalizer', language='en', component_type='normalizer', get_default=True, nlp_ref='', nlu_ref='', model=None): if model != None: self.model = model else: if 'norm_document' in nlu_ref: annotator_class = 'document_normalizer' elif 'norm' in nlu_ref: annotator_class = 'normalizer' if annotator_class == 'normalizer': from nlu import SparkNLPNormalizer if get_default: self.model = SparkNLPNormalizer.get_default_model() else: self.model = SparkNLPNormalizer.get_pretrained_model( nlp_ref, language ) # there is no pretrained API for Normalizer in SparkNLP yet elif annotator_class == 'document_normalizer': from nlu import SparkNLPDocumentNormalizer if get_default: self.model = SparkNLPDocumentNormalizer.get_default_model() else: self.model = SparkNLPDocumentNormalizer.get_pretrained_model( nlp_ref, language ) # there is no pretrained API for Normalizer in SparkNLP yet SparkNLUComponent.__init__(self, annotator_class, component_type)
def __init__(self, annotator_class='sentence_detector', language='en', component_type='sentence_detector', get_default=True, model=None, nlp_ref='', nlu_ref='', trainable=False): if annotator_class == 'sentence_detector' and 'pragmatic' not in nlu_ref: annotator_class = 'deep_sentence_detector' #default else: annotator_class = 'pragmatic_sentence_detector' if model != None: self.model = model else: if annotator_class == 'deep_sentence_detector' or 'ner_dl' in nlp_ref: from nlu import SentenceDetectorDeep if trainable: self.model = SentenceDetectorDeep.get_trainable_model() elif get_default: self.model = SentenceDetectorDeep.get_default_model() else: self.model = SentenceDetectorDeep.get_pretrained_model( nlp_ref, language) elif annotator_class == 'pragmatic_sentence_detector': from nlu import PragmaticSentenceDetector if get_default: self.model = PragmaticSentenceDetector.get_default_model() SparkNLUComponent.__init__(self, annotator_class, component_type)
def __init__(self,component_name='stemmer', component_type='stemmer',model = None): if model != None : self.model = model else : SparkNLUComponent.__init__(self,component_name,component_type) if component_name == 'stemmer': from nlu import SparkNLPStemmer self.model = SparkNLPStemmer.get_default_model()
def __init__(self,component_name='chunk_embedder', language='en', component_type='embeddings_chunk', get_default = True,sparknlp_reference='', model=None): SparkNLUComponent.__init__(self,component_name,component_type) if model != None : self.model = model else : if component_name == 'chunk_embedder' : from nlu import ChunkEmbedder if get_default : self.model = ChunkEmbedder.get_default_model() else : self.model = ChunkEmbedder.get_default_model() # there are no pretrained chunkers, only default 1
def __init__(self, annotator_class='lemmatizer', language='en', component_type='lemmatizer', get_default=False, model = None, nlp_ref='', nlu_ref =''): SparkNLUComponent.__init__(self, annotator_class, component_type) if model != None : self.model = model else : if 'lemma' in annotator_class : from nlu import SparkNLPLemmatizer if get_default : self.model = SparkNLPLemmatizer.get_default_model() else : self.model = SparkNLPLemmatizer.get_pretrained_model(nlp_ref, language)
def __init__(self, annotator_class='default_tokenizer', language='en', component_type='tokenizer', get_default = True, nlp_ref='', nlu_ref='', model=None): if 'token' in annotator_class and not 'regex' in annotator_class: annotator_class = 'default_tokenizer' if model != None : self.model = model else: from nlu import DefaultTokenizer if get_default : self.model = DefaultTokenizer.get_default_model() else : self.model = DefaultTokenizer.get_default_model() # there are no pretrained tokenizrs, only default 1 SparkNLUComponent.__init__(self, annotator_class, component_type)
def __init__(self, annotator_class='stemmer', component_type='stemmer', model=None, nlu_ref='', nlp_ref=''): SparkNLUComponent.__init__(self, annotator_class, component_type) if model != None: self.model = model else: if annotator_class == 'stemmer': from nlu import SparkNLPStemmer self.model = SparkNLPStemmer.get_default_model()
def __init__(self, component_name='default_tokenizer', language='en', component_type='tokenizer', get_default=True, sparknlp_reference=''): if 'token' in component_name: component_name = 'default_tokenizer' SparkNLUComponent.__init__(self, component_name, component_type) if component_name == 'default_tokenizer' or 'token' in component_name: from nlu import DefaultTokenizer if get_default: self.model = DefaultTokenizer.get_default_model() else: self.model = DefaultTokenizer.get_default_model( ) # there are no pretrained tokenizrs, only default 1
def __init__(self, component_name='normalizer', language='en', component_type='normalizer', get_default=True, sparknlp_reference=''): SparkNLUComponent.__init__(self, component_name, component_type) if component_name == 'normalizer': from nlu import SparkNLPNormalizer if get_default: self.model = SparkNLPNormalizer.get_default_model() else: self.model = SparkNLPNormalizer.get_pretrained_model( sparknlp_reference, language ) # there is no pretrained API for Normalizer in SparkNLP yet
def __init__(self, component_name='labeled_dependency_parser', language='en', component_type='dependency_typed', get_default=True, sparknlp_reference=''): SparkNLUComponent.__init__(self, component_name, component_type) if 'dep' in component_name: from nlu.components.dependency_typeds.labeled_dependency_parser.labeled_dependency_parser import \ LabeledDependencyParser if get_default: self.model = LabeledDependencyParser.get_default_model() else: self.model = LabeledDependencyParser.get_pretrained_model( sparknlp_reference, language)
def __init__(self, component_name='unlabeled_dependency_parser', language='en', component_type='dependency_untyped', get_default=True, sparknlp_reference=''): # super(Tokenizer,self).__init__(component_name = component_name, component_type = component_type) SparkNLUComponent.__init__(self, component_name, component_type) if 'dep' in component_name or 'dep.untyped' in component_name or component_name == 'unlabeled_dependency_parser': from nlu.components.dependency_untypeds.unlabeled_dependency_parser.unlabeled_dependency_parser import UnlabeledDependencyParser if get_default: self.model = UnlabeledDependencyParser.get_default_model() else: self.model = UnlabeledDependencyParser.get_pretrained_model( sparknlp_reference, language)
def __init__(self, annotator_class='labeled_dependency_parser', language='en', component_type='dependency_typed', get_default=True, nlp_ref='', nlu_ref=''): SparkNLUComponent.__init__(self, annotator_class, component_type) if 'dep' in annotator_class: from nlu.components.dependency_typeds.labeled_dependency_parser.labeled_dependency_parser import \ LabeledDependencyParser if get_default: self.model = LabeledDependencyParser.get_default_model() else: self.model = LabeledDependencyParser.get_pretrained_model( nlp_ref, language)
def __init__(self, annotator_class='chunk_embedder', language='en', component_type='embeddings_chunk', get_default=True, nlp_ref='', model=None, nlu_ref=''): if model != None: self.model = model else: if annotator_class == 'chunk_embedder': from nlu import ChunkEmbedder if get_default: self.model = ChunkEmbedder.get_default_model() else: self.model = ChunkEmbedder.get_default_model( ) # there are no pretrained chunkers, only default 1 SparkNLUComponent.__init__(self, annotator_class, component_type)
def __init__(self, annotator_class='t5', language ='en', component_type='seq2seq', get_default=True, model = None, nlp_ref ='', nlu_ref ='',dataset='', configs=''): if 't5' in nlu_ref or 't5' in nlp_ref: annotator_class = 't5' elif 'marian' in nlu_ref or 'marian' in nlp_ref: annotator_class = 'marian' elif 'translate_to' in nlu_ref or 'translate_to' in nlp_ref or 'translate_to' in annotator_class: annotator_class = 'marian' if model != None : self.model = model else : if 't5' in annotator_class : from nlu import T5 if get_default: self.model = T5.get_default_model() elif configs !='' : self.model = T5.get_preconfigured_model(nlp_ref,language,configs) else : self.model = T5.get_pretrained_model(nlp_ref, language) elif 'marian' in annotator_class : from nlu import Marian if get_default : self.model = Marian.get_default_model() else : self.model = Marian.get_pretrained_model(nlp_ref, language) SparkNLUComponent.__init__(self, annotator_class, component_type)
def __init__(self,component_name='date_matcher', language = 'en', component_type='matcher', get_default=True, model = None,sparknlp_reference='',dataset='' ): if '_matcher' not in component_name : component_name+='_matcher' SparkNLUComponent.__init__(self,component_name,component_type) if model != None : self.model = model else : if 'text' in component_name: from nlu import TextMatcher if get_default : self.model = TextMatcher.get_default_model() else : self.model = TextMatcher.get_pretrained_model(sparknlp_reference, language) elif 'date' in component_name: from nlu import DateMatcher if get_default : self.model = DateMatcher.get_default_model() elif 'regex' in component_name : from nlu import RegexMatcher if get_default : self.model = RegexMatcher.get_default_model() else : self.model = RegexMatcher.get_pretrained_model(sparknlp_reference, language)
def __init__(self, component_name='stopwordcleaner', language='en', component_type='stopwordscleaner', get_default=False, model=None, sparknlp_reference=''): SparkNLUComponent.__init__(self, component_name, component_type) # component_name = utils.lower_case(component_name) TODO if model != None: self.model = model else: if 'stop' in component_name: from nlu import NLUStopWordcleaner if get_default: self.model = NLUStopWordcleaner.get_default_model() else: self.model = NLUStopWordcleaner.get_pretrained_model( sparknlp_reference, language)
def __init__(self, annotator_class='stopwordcleaner', language='en', component_type='stopwordscleaner', get_default=False, model=None, nlp_ref='', nlu_ref=''): if model != None: self.model = model else: if 'stop' in annotator_class: from nlu import NLUStopWordcleaner if get_default: self.model = NLUStopWordcleaner.get_default_model() else: self.model = NLUStopWordcleaner.get_pretrained_model( nlp_ref, language) SparkNLUComponent.__init__(self, annotator_class, component_type)
def __init__(self, component_name='document_assembler', component_type='util', model=None): # super(Tokenizer,self).__init__(component_name = component_name, component_type = component_type) if component_name == 'ner_converter': component_name = 'ner_to_chunk_converter' SparkNLUComponent.__init__(self, component_name, component_type) if model != None: self.model = model else: if component_name == 'document_assembler': from nlu import SparkNlpDocumentAssembler self.model = SparkNlpDocumentAssembler.get_default_model() elif component_name == 'sentence_detector': from nlu import SparkNLPSentenceDetector self.model = SparkNLPSentenceDetector.get_default_model() elif component_name == 'ner_to_chunk_converter': from nlu import NerToChunkConverter self.model = NerToChunkConverter.get_default_model()
def __init__(self, annotator_class='unlabeled_dependency_parser', language='en', component_type='dependency_untyped', get_default=True, nlp_ref='', nlu_ref='', model=None): if model != None: self.model = model elif 'dep' in annotator_class or 'dep.untyped' in annotator_class or annotator_class == 'unlabeled_dependency_parser': from nlu.components.dependency_untypeds.unlabeled_dependency_parser.unlabeled_dependency_parser import UnlabeledDependencyParser if get_default: self.model = UnlabeledDependencyParser.get_default_model() else: self.model = UnlabeledDependencyParser.get_pretrained_model( nlp_ref, language) SparkNLUComponent.__init__(self, annotator_class, component_type)
def __init__(self, component_name='lemma', language='en', component_type='lemmatizer', get_default=False, model=None, sparknlp_reference=''): component_name = 'lemmatizer' SparkNLUComponent.__init__(self, component_name, component_type) # component_name = utils.lower_case(component_name) TODO if model != None: self.model = model else: if 'lemma' in component_name: from nlu import SparkNLPLemmatizer if get_default: self.model = SparkNLPLemmatizer.get_default_model() else: self.model = SparkNLPLemmatizer.get_pretrained_model( sparknlp_reference, language)
def __init__(self, component_name='pragmatic_sentence_detector', language='en', component_type='sentence_detector', get_default=False, model=None, sparknlp_reference=''): if component_name == 'sentence_detector': component_name = 'deep_sentence_detector' #default SparkNLUComponent.__init__(self, component_name, component_type) if model != None: self.model = model else: if component_name == 'deep_sentence_detector' or 'ner_dl' in sparknlp_reference: from nlu import SentenDetectorDeep # wierd import issue ... does not work when outside scoped. if get_default: self.model = SentenDetectorDeep.get_default_model() elif component_name == 'pragmatic_sentence_detector': from nlu import PragmaticSentenceDetector if get_default: self.model = PragmaticSentenceDetector.get_default_model()
def __init__(self, annotator_class='sentence_detector', language='en', component_type='sentence_detector', get_default=True, model=None, nlp_ref='', nlu_ref=''): if annotator_class == 'sentence_detector': annotator_class = 'deep_sentence_detector' #default SparkNLUComponent.__init__(self, annotator_class, component_type) if model != None: self.model = model else: if annotator_class == 'deep_sentence_detector' or 'ner_dl' in nlp_ref: from nlu import SentenDetectorDeep # wierd import issue ... does not work when outside scoped. if get_default: self.model = SentenDetectorDeep.get_default_model() elif annotator_class == 'pragmatic_sentence_detector': from nlu import PragmaticSentenceDetector if get_default: self.model = PragmaticSentenceDetector.get_default_model()
def __init__(self, annotator_class='context_spell', language ='en', component_type='spell_checker', get_default=True, model = None, nlp_ref='', dataset='', nlu_ref =''): if annotator_class == 'context' or annotator_class == 'norvig' or annotator_class == 'symmetric': annotator_class = annotator_class + '_spell' if dataset != '':annotator_class = dataset + '_spell' if model != None : self.model = model else : if 'context' in annotator_class: from nlu import ContextSpellChecker if get_default : self.model = ContextSpellChecker.get_default_model() else : self.model = ContextSpellChecker.get_pretrained_model(nlp_ref, language) elif 'norvig' in annotator_class: from nlu import NorvigSpellChecker if get_default : self.model = NorvigSpellChecker.get_default_model() else : self.model = NorvigSpellChecker.get_pretrained_model(nlp_ref, language) elif 'symmetric' in annotator_class : from nlu import SymmetricSpellChecker if get_default : self.model = SymmetricSpellChecker.get_default_model() else : self.model = SymmetricSpellChecker.get_pretrained_model(nlp_ref, language) SparkNLUComponent.__init__(self, annotator_class, component_type)
def __init__(self, annotator_class='document_assembler', component_type='util', model = None): # super(Tokenizer,self).__init__(annotator_class = annotator_class, component_type = component_type) if annotator_class == 'ner_converter': annotator_class = 'ner_to_chunk_converter' if model != None : self.model = model else : if annotator_class == 'document_assembler': from nlu import SparkNlpDocumentAssembler self.model = SparkNlpDocumentAssembler.get_default_model() elif annotator_class == 'sentence_detector' : from nlu import SparkNLPSentenceDetector self.model = SparkNLPSentenceDetector.get_default_model() elif annotator_class == 'sentence_detector_deep' : from nlu import SparkNLPSentenceDetector self.model = SparkNLPSentenceDetector.get_default_model() elif annotator_class == 'ner_to_chunk_converter' : from nlu import NerToChunkConverter self.model = NerToChunkConverter.get_default_model() elif annotator_class == 'sentence_embeddings': from nlu import SparkNLPSentenceEmbeddings self.model = SparkNLPSentenceEmbeddings.get_default_model() SparkNLUComponent.__init__(self, annotator_class, component_type)
def __init__(self, annotator_class='date_matcher', language='en', component_type='matcher', get_default=False, nlp_ref='', model=None, nlu_ref='', dataset=''): if 'date' in nlp_ref or 'date' in nlu_ref: annotator_class = 'date_matcher' elif 'regex' in nlp_ref or 'regex' in nlu_ref: annotator_class = 'regex_matcher' elif 'text' in nlp_ref or 'text' in nlu_ref: annotator_class = 'text_matcher' elif '_matcher' not in annotator_class: annotator_class = annotator_class + '_matcher' if model != None: self.model = model else: if 'text' in annotator_class: from nlu import TextMatcher if get_default: self.model = TextMatcher.get_default_model() else: self.model = TextMatcher.get_pretrained_model( nlu_ref, language) elif 'date' in annotator_class: from nlu import DateMatcher if get_default: self.model = DateMatcher.get_default_model() elif 'regex' in annotator_class: from nlu import RegexMatcher if get_default: self.model = RegexMatcher.get_default_model() else: self.model = RegexMatcher.get_pretrained_model( nlu_ref, language) SparkNLUComponent.__init__(self, annotator_class, component_type)
def __init__(self, annotator_class='default_chunker', language='en', component_type='chunker', get_default=True, nlp_ref='', nlu_ref='', model=None): if model != None: self.model = model else: if annotator_class == 'default_chunker': from nlu import DefaultChunker if get_default: self.model = DefaultChunker.get_default_model() else: self.model = DefaultChunker.get_default_model( ) # there are no pretrained chunkers, only default 1 if annotator_class == 'ngram': from nlu import NGram if get_default: self.model = NGram.get_default_model() else: self.model = NGram.get_default_model( ) # there are no pretrained chunkers, only default 1 SparkNLUComponent.__init__(self, annotator_class, component_type)
def __init__(self, component_name='context_spell', language='en', component_type='spell_checker', get_default=True, model=None, sparknlp_reference='', dataset=''): if component_name == 'context' or component_name == 'norvig' or component_name == 'symmetric': component_name = component_name + '_spell' if dataset != '': component_name = dataset + '_spell' SparkNLUComponent.__init__(self, component_name, component_type) if model != None: self.model = model else: if 'context' in component_name: from nlu import ContextSpellChecker if get_default: self.model = ContextSpellChecker.get_default_model() else: self.model = ContextSpellChecker.get_pretrained_model( sparknlp_reference, language) elif 'norvig' in component_name: from nlu import NorvigSpellChecker if get_default: self.model = NorvigSpellChecker.get_default_model() else: self.model = NorvigSpellChecker.get_pretrained_model( sparknlp_reference, language) elif 'symmetric' in component_name: from nlu import SymmetricSpellChecker if get_default: self.model = SymmetricSpellChecker.get_default_model() else: self.model = SymmetricSpellChecker.get_pretrained_model( sparknlp_reference, language)
def __init__(self, annotator_class='sentiment_dl', language='en', component_type='classifier', get_default=True, model=None, nlp_ref='', nlu_ref='', trainable=False): if 'e2e' in nlu_ref or 'toxic' in nlu_ref: annotator_class = 'multi_classifier' elif 'e2e' in nlp_ref or 'toxic' in nlp_ref: annotator_class = 'multi_classifier' elif 'multiclassifierdl' in nlp_ref: annotator_class = 'multi_classifier' elif 'classifierdl' in nlp_ref: annotator_class = 'classifier_dl' elif 'yake' in nlu_ref: annotator_class = 'yake' elif 'yake' in nlp_ref: annotator_class = 'yake' elif 'sentimentdl' in nlp_ref: annotator_class = 'sentiment_dl' elif 'vivekn' in nlp_ref or 'vivekn' in nlp_ref: annotator_class = 'vivekn_sentiment' elif 'wiki_' in nlu_ref or 'wiki_' in nlp_ref: annotator_class = 'language_detector' elif 'pos' in nlu_ref: annotator_class = 'pos' elif 'pos' in nlp_ref: annotator_class = 'pos' elif 'ner' in nlu_ref: annotator_class = 'ner' elif 'ner' in nlp_ref: annotator_class = 'ner' if model != None: self.model = model else: if 'sentiment' in annotator_class and 'vivekn' not in annotator_class: from nlu import SentimentDl if trainable: self.model = SentimentDl.get_default_trainable_model() elif get_default: self.model = SentimentDl.get_default_model() else: self.model = SentimentDl.get_pretrained_model( nlp_ref, language) elif 'vivekn' in annotator_class: from nlu import ViveknSentiment if get_default: self.model = ViveknSentiment.get_default_model() else: self.model = ViveknSentiment.get_pretrained_model( nlp_ref, language) elif 'ner' in annotator_class or 'ner.dl' in annotator_class: from nlu import NERDL if trainable: self.model = NERDL.get_default_trainable_model() elif get_default: self.model = NERDL.get_default_model() else: self.model = NERDL.get_pretrained_model(nlp_ref, language) elif 'ner.crf' in annotator_class: from nlu import NERDLCRF if get_default: self.model = NERDLCRF.get_default_model() else: self.model = NERDLCRF.get_pretrained_model( nlp_ref, language) # elif 'multi_classifier_dl' in annotator_class: # from nlu import MultiClassifier # if trainable : self.model = MultiClassifier.get_default_trainable_model() # elif get_default : self.model = MultiClassifier.get_default_model() # else : self.model = MultiClassifier.get_pretrained_model(nlp_ref, language) elif ('classifier_dl' in annotator_class or annotator_class == 'toxic') and not 'multi' in annotator_class: from nlu import ClassifierDl if trainable: self.model = ClassifierDl.get_trainable_model() elif get_default: self.model = ClassifierDl.get_default_model() else: self.model = ClassifierDl.get_pretrained_model( nlp_ref, language) elif 'language_detector' in annotator_class: from nlu import LanguageDetector if get_default: self.model = LanguageDetector.get_default_model() else: self.model = LanguageDetector.get_pretrained_model( nlp_ref, language) elif 'pos' in annotator_class: from nlu import PartOfSpeechJsl if trainable: self.model = PartOfSpeechJsl.get_default_trainable_model() elif get_default: self.model = PartOfSpeechJsl.get_default_model() else: self.model = PartOfSpeechJsl.get_pretrained_model( nlp_ref, language) elif 'yake' in annotator_class: from nlu import Yake self.model = Yake.get_default_model() elif 'multi_classifier' in annotator_class: from nlu import MultiClassifier if trainable: self.model = MultiClassifier.get_default_trainable_model() elif get_default: self.model = MultiClassifier.get_default_model() else: self.model = MultiClassifier.get_pretrained_model( nlp_ref, language) SparkNLUComponent.__init__(self, annotator_class, component_type)
def __init__(self, annotator_class='glove', language='en', component_type='embedding', get_default=True, model=None, nlp_ref='', nlu_ref=''): if 'use' in nlu_ref or 'tfhub_use' in nlp_ref: annotator_class = 'use' # first check for sentence then token embeddings. elif 'bert' in nlp_ref and 'albert' not in nlp_ref and 'sent' in nlp_ref: annotator_class = 'sentence_bert' elif 'bert' in nlu_ref and 'albert' not in nlu_ref and 'sent' in nlu_ref: annotator_class = 'sentence_bert' elif 'elmo' in nlp_ref: annotator_class = 'elmo' elif 'elmo' in nlu_ref: annotator_class = 'elmo' elif 'electra' in nlp_ref and 'sent' in nlp_ref: annotator_class = 'sentence_bert' elif 'electra' in nlu_ref and 'sent' in nlu_ref: annotator_class = 'sentence_bert' elif 'bert' in nlu_ref and 'albert' not in nlu_ref: annotator_class = 'bert' elif 'electra' in nlu_ref or 'electra' in nlp_ref: annotator_class = 'bert' elif 'labse' in nlu_ref or 'labse' in nlp_ref: annotator_class = 'sentence_bert' elif 'tfhub' in nlu_ref or 'tfhub' in nlp_ref: annotator_class = 'use' elif 'glove' in nlu_ref or 'glove' in nlp_ref: annotator_class = 'glove' elif 'albert' in nlu_ref or 'albert' in nlp_ref: annotator_class = 'albert' elif 'xlnet' in nlu_ref or 'xlnet' in nlp_ref: annotator_class = 'xlnet' # Default component models for nlu actions that dont specify a particular model elif 'embed_sentence' in nlu_ref: annotator_class = 'glove' elif 'embed' in nlu_ref: annotator_class = 'glove' if model != None: self.model = model else: if 'albert' in annotator_class: from nlu import SparkNLPAlbert if get_default: self.model = SparkNLPAlbert.get_default_model() else: self.model = SparkNLPAlbert.get_pretrained_model( nlp_ref, language) elif 'bert' in annotator_class and 'sent' in annotator_class: from nlu import BertSentence if get_default: self.model = BertSentence.get_default_model() else: self.model = BertSentence.get_pretrained_model( nlp_ref, language) elif 'electra' in annotator_class and 'sent' in annotator_class: from nlu import BertSentence if get_default: self.model = BertSentence.get_default_model() else: self.model = BertSentence.get_pretrained_model( nlp_ref, language) elif 'bert' in annotator_class: from nlu import SparkNLPBert if get_default: self.model = SparkNLPBert.get_default_model() else: self.model = SparkNLPBert.get_pretrained_model( nlp_ref, language) elif 'elmo' in annotator_class: from nlu import SparkNLPElmo if get_default: self.model = SparkNLPElmo.get_default_model() else: self.model = SparkNLPElmo.get_pretrained_model( nlp_ref, language) elif 'xlnet' in annotator_class: from nlu import SparkNLPXlnet if get_default: self.model = SparkNLPXlnet.get_default_model() else: self.model = SparkNLPXlnet.get_pretrained_model( nlp_ref, language) elif 'use' in annotator_class: from nlu import SparkNLPUse if get_default: self.model = SparkNLPUse.get_default_model() else: self.model = SparkNLPUse.get_pretrained_model( nlp_ref, language) elif 'glove' in annotator_class: from nlu import Glove if annotator_class == 'glove' and get_default == True: self.model = Glove.get_default_model() else: if get_default: self.model = Glove.get_default_model() else: if nlp_ref == 'glove_840B_300' or nlp_ref == 'glove_6B_300': # if language=='en' and nlp_ref=='glove_6B_300': #special case language = 'xx' # For these particular Glove embeddings, anyreference to them is actually the reference to the multilingual onces self.model = Glove.get_pretrained_model( nlp_ref, language) else: self.model = Glove.get_pretrained_model( nlp_ref, language) SparkNLUComponent.__init__(self, annotator_class, component_type)