Beispiel #1
0
    def __init__(self,
                 annotator_class='default_tokenizer',
                 language='en',
                 component_type='tokenizer',
                 get_default=True,
                 nlp_ref='',
                 nlu_ref='',
                 model=None):

        if 'segment_words' in nlu_ref: annotator_class = 'word_segmenter'
        elif 'token' in annotator_class and language in nlu.AllComponentsInfo(
        ).all_right_to_left_langs_with_pretrained_tokenizer:
            annotator_class = 'word_segmenter'
        elif 'token' in annotator_class and not 'regex' in annotator_class:
            annotator_class = 'default_tokenizer'

        if model != None: self.model = model
        elif annotator_class == 'default_tokenizer':
            from nlu import DefaultTokenizer
            if get_default: self.model = DefaultTokenizer.get_default_model()
            else:
                self.model = DefaultTokenizer.get_default_model(
                )  # there are no pretrained tokenizrs, only default 1
        elif annotator_class == 'word_segmenter':
            from nlu import WordSegmenter
            if get_default and language == '':
                self.model = WordSegmenter.get_default_model()
            elif get_default and language != '':
                self.model = WordSegmenter.get_default_model_for_lang(language)
            else:
                self.model = WordSegmenter.get_pretrained_model(
                    nlp_ref, language
                )  # there are no pretrained tokenizrs, only default 1

        SparkNLUComponent.__init__(self, annotator_class, component_type)
Beispiel #2
0
    def __init__(self,
                 annotator_class='normalizer',
                 language='en',
                 component_type='normalizer',
                 get_default=True,
                 nlp_ref='',
                 nlu_ref='',
                 model=None):
        if model != None: self.model = model
        else:
            if 'norm_document' in nlu_ref:
                annotator_class = 'document_normalizer'
            elif 'norm' in nlu_ref:
                annotator_class = 'normalizer'

            if annotator_class == 'normalizer':
                from nlu import SparkNLPNormalizer
                if get_default:
                    self.model = SparkNLPNormalizer.get_default_model()
                else:
                    self.model = SparkNLPNormalizer.get_pretrained_model(
                        nlp_ref, language
                    )  # there is no pretrained API for Normalizer in SparkNLP yet
            elif annotator_class == 'document_normalizer':
                from nlu import SparkNLPDocumentNormalizer
                if get_default:
                    self.model = SparkNLPDocumentNormalizer.get_default_model()
                else:
                    self.model = SparkNLPDocumentNormalizer.get_pretrained_model(
                        nlp_ref, language
                    )  # there is no pretrained API for Normalizer in SparkNLP yet

        SparkNLUComponent.__init__(self, annotator_class, component_type)
 def __init__(self,
              annotator_class='sentence_detector',
              language='en',
              component_type='sentence_detector',
              get_default=True,
              model=None,
              nlp_ref='',
              nlu_ref='',
              trainable=False):
     if annotator_class == 'sentence_detector' and 'pragmatic' not in nlu_ref:
         annotator_class = 'deep_sentence_detector'  #default
     else:
         annotator_class = 'pragmatic_sentence_detector'
     if model != None: self.model = model
     else:
         if annotator_class == 'deep_sentence_detector' or 'ner_dl' in nlp_ref:
             from nlu import SentenceDetectorDeep
             if trainable:
                 self.model = SentenceDetectorDeep.get_trainable_model()
             elif get_default:
                 self.model = SentenceDetectorDeep.get_default_model()
             else:
                 self.model = SentenceDetectorDeep.get_pretrained_model(
                     nlp_ref, language)
         elif annotator_class == 'pragmatic_sentence_detector':
             from nlu import PragmaticSentenceDetector
             if get_default:
                 self.model = PragmaticSentenceDetector.get_default_model()
     SparkNLUComponent.__init__(self, annotator_class, component_type)
Beispiel #4
0
    def __init__(self,component_name='stemmer', component_type='stemmer',model = None):
        if model != None : self.model = model
        else :

            SparkNLUComponent.__init__(self,component_name,component_type)
            if component_name == 'stemmer':
                from nlu import SparkNLPStemmer 
                self.model =  SparkNLPStemmer.get_default_model()
Beispiel #5
0
 def __init__(self,component_name='chunk_embedder', language='en', component_type='embeddings_chunk', get_default = True,sparknlp_reference='', model=None):
     SparkNLUComponent.__init__(self,component_name,component_type)
     if model != None : self.model = model
     else : 
         if component_name == 'chunk_embedder' :
             from nlu import ChunkEmbedder
             if get_default : self.model =  ChunkEmbedder.get_default_model()
             else : self.model =  ChunkEmbedder.get_default_model()  # there are no pretrained chunkers, only default 1
Beispiel #6
0
    def __init__(self, annotator_class='lemmatizer', language='en', component_type='lemmatizer', get_default=False, model = None, nlp_ref='', nlu_ref =''):
        SparkNLUComponent.__init__(self, annotator_class, component_type)

        if model != None : self.model = model
        else :
            if 'lemma' in annotator_class :
                from nlu import SparkNLPLemmatizer
                if get_default : self.model =  SparkNLPLemmatizer.get_default_model()
                else : self.model =  SparkNLPLemmatizer.get_pretrained_model(nlp_ref, language)
Beispiel #7
0
    def __init__(self, annotator_class='default_tokenizer', language='en', component_type='tokenizer', get_default = True, nlp_ref='', nlu_ref='', model=None):

        if 'token' in annotator_class and not 'regex' in annotator_class: annotator_class = 'default_tokenizer'

        if model != None : self.model = model
        else:
            from nlu import DefaultTokenizer
            if get_default : self.model =  DefaultTokenizer.get_default_model()
            else : self.model =  DefaultTokenizer.get_default_model()  # there are no pretrained tokenizrs, only default 1
        SparkNLUComponent.__init__(self, annotator_class, component_type)
Beispiel #8
0
 def __init__(self,
              annotator_class='stemmer',
              component_type='stemmer',
              model=None,
              nlu_ref='',
              nlp_ref=''):
     SparkNLUComponent.__init__(self, annotator_class, component_type)
     if model != None: self.model = model
     else:
         if annotator_class == 'stemmer':
             from nlu import SparkNLPStemmer
             self.model = SparkNLPStemmer.get_default_model()
Beispiel #9
0
 def __init__(self,
              component_name='default_tokenizer',
              language='en',
              component_type='tokenizer',
              get_default=True,
              sparknlp_reference=''):
     if 'token' in component_name: component_name = 'default_tokenizer'
     SparkNLUComponent.__init__(self, component_name, component_type)
     if component_name == 'default_tokenizer' or 'token' in component_name:
         from nlu import DefaultTokenizer
         if get_default: self.model = DefaultTokenizer.get_default_model()
         else:
             self.model = DefaultTokenizer.get_default_model(
             )  # there are no pretrained tokenizrs, only default 1
Beispiel #10
0
 def __init__(self,
              component_name='normalizer',
              language='en',
              component_type='normalizer',
              get_default=True,
              sparknlp_reference=''):
     SparkNLUComponent.__init__(self, component_name, component_type)
     if component_name == 'normalizer':
         from nlu import SparkNLPNormalizer
         if get_default: self.model = SparkNLPNormalizer.get_default_model()
         else:
             self.model = SparkNLPNormalizer.get_pretrained_model(
                 sparknlp_reference, language
             )  # there is no pretrained API for Normalizer in SparkNLP yet
 def __init__(self,
              component_name='labeled_dependency_parser',
              language='en',
              component_type='dependency_typed',
              get_default=True,
              sparknlp_reference=''):
     SparkNLUComponent.__init__(self, component_name, component_type)
     if 'dep' in component_name:
         from nlu.components.dependency_typeds.labeled_dependency_parser.labeled_dependency_parser import \
             LabeledDependencyParser
         if get_default:
             self.model = LabeledDependencyParser.get_default_model()
         else:
             self.model = LabeledDependencyParser.get_pretrained_model(
                 sparknlp_reference, language)
Beispiel #12
0
 def __init__(self,
              component_name='unlabeled_dependency_parser',
              language='en',
              component_type='dependency_untyped',
              get_default=True,
              sparknlp_reference=''):
     # super(Tokenizer,self).__init__(component_name = component_name, component_type = component_type)
     SparkNLUComponent.__init__(self, component_name, component_type)
     if 'dep' in component_name or 'dep.untyped' in component_name or component_name == 'unlabeled_dependency_parser':
         from nlu.components.dependency_untypeds.unlabeled_dependency_parser.unlabeled_dependency_parser import UnlabeledDependencyParser
         if get_default:
             self.model = UnlabeledDependencyParser.get_default_model()
         else:
             self.model = UnlabeledDependencyParser.get_pretrained_model(
                 sparknlp_reference, language)
Beispiel #13
0
 def __init__(self,
              annotator_class='labeled_dependency_parser',
              language='en',
              component_type='dependency_typed',
              get_default=True,
              nlp_ref='',
              nlu_ref=''):
     SparkNLUComponent.__init__(self, annotator_class, component_type)
     if 'dep' in annotator_class:
         from nlu.components.dependency_typeds.labeled_dependency_parser.labeled_dependency_parser import \
             LabeledDependencyParser
         if get_default:
             self.model = LabeledDependencyParser.get_default_model()
         else:
             self.model = LabeledDependencyParser.get_pretrained_model(
                 nlp_ref, language)
Beispiel #14
0
 def __init__(self,
              annotator_class='chunk_embedder',
              language='en',
              component_type='embeddings_chunk',
              get_default=True,
              nlp_ref='',
              model=None,
              nlu_ref=''):
     if model != None: self.model = model
     else:
         if annotator_class == 'chunk_embedder':
             from nlu import ChunkEmbedder
             if get_default: self.model = ChunkEmbedder.get_default_model()
             else:
                 self.model = ChunkEmbedder.get_default_model(
                 )  # there are no pretrained chunkers, only default 1
     SparkNLUComponent.__init__(self, annotator_class, component_type)
Beispiel #15
0
    def __init__(self, annotator_class='t5', language ='en', component_type='seq2seq', get_default=True, model = None, nlp_ref ='', nlu_ref ='',dataset='', configs=''):
        if 't5' in nlu_ref or 't5' in nlp_ref: annotator_class = 't5'
        elif 'marian' in nlu_ref or 'marian' in nlp_ref: annotator_class = 'marian'
        elif 'translate_to' in nlu_ref or 'translate_to' in nlp_ref or 'translate_to' in annotator_class: annotator_class = 'marian'


        if model != None : self.model = model
        else :
            if 't5' in annotator_class :
                from nlu import T5
                if get_default: self.model =  T5.get_default_model()
                elif configs !='' : self.model = T5.get_preconfigured_model(nlp_ref,language,configs)
                else : self.model = T5.get_pretrained_model(nlp_ref, language)
            elif 'marian' in annotator_class  :
                from nlu import Marian
                if get_default : self.model =  Marian.get_default_model()
                else : self.model = Marian.get_pretrained_model(nlp_ref, language)
        SparkNLUComponent.__init__(self, annotator_class, component_type)
Beispiel #16
0
    def __init__(self,component_name='date_matcher', language = 'en', component_type='matcher', get_default=True, model = None,sparknlp_reference='',dataset='' ):
        if '_matcher' not in component_name : component_name+='_matcher' 
        SparkNLUComponent.__init__(self,component_name,component_type)


        if model != None : self.model = model
        else :
            if 'text' in component_name:
                from nlu import TextMatcher
                if get_default : self.model =  TextMatcher.get_default_model()
                else : self.model = TextMatcher.get_pretrained_model(sparknlp_reference, language)
            elif 'date' in component_name:
                from nlu import DateMatcher
                if get_default : self.model =  DateMatcher.get_default_model()
            elif 'regex' in component_name :
                from nlu import RegexMatcher
                if get_default : self.model = RegexMatcher.get_default_model()
                else : self.model = RegexMatcher.get_pretrained_model(sparknlp_reference, language)
Beispiel #17
0
    def __init__(self,
                 component_name='stopwordcleaner',
                 language='en',
                 component_type='stopwordscleaner',
                 get_default=False,
                 model=None,
                 sparknlp_reference=''):
        SparkNLUComponent.__init__(self, component_name, component_type)
        # component_name = utils.lower_case(component_name) TODO

        if model != None: self.model = model
        else:
            if 'stop' in component_name:
                from nlu import NLUStopWordcleaner
                if get_default:
                    self.model = NLUStopWordcleaner.get_default_model()
                else:
                    self.model = NLUStopWordcleaner.get_pretrained_model(
                        sparknlp_reference, language)
    def __init__(self,
                 annotator_class='stopwordcleaner',
                 language='en',
                 component_type='stopwordscleaner',
                 get_default=False,
                 model=None,
                 nlp_ref='',
                 nlu_ref=''):

        if model != None: self.model = model
        else:
            if 'stop' in annotator_class:
                from nlu import NLUStopWordcleaner
                if get_default:
                    self.model = NLUStopWordcleaner.get_default_model()
                else:
                    self.model = NLUStopWordcleaner.get_pretrained_model(
                        nlp_ref, language)
        SparkNLUComponent.__init__(self, annotator_class, component_type)
Beispiel #19
0
 def __init__(self,
              component_name='document_assembler',
              component_type='util',
              model=None):
     # super(Tokenizer,self).__init__(component_name = component_name, component_type = component_type)
     if component_name == 'ner_converter':
         component_name = 'ner_to_chunk_converter'
     SparkNLUComponent.__init__(self, component_name, component_type)
     if model != None: self.model = model
     else:
         if component_name == 'document_assembler':
             from nlu import SparkNlpDocumentAssembler
             self.model = SparkNlpDocumentAssembler.get_default_model()
         elif component_name == 'sentence_detector':
             from nlu import SparkNLPSentenceDetector
             self.model = SparkNLPSentenceDetector.get_default_model()
         elif component_name == 'ner_to_chunk_converter':
             from nlu import NerToChunkConverter
             self.model = NerToChunkConverter.get_default_model()
Beispiel #20
0
    def __init__(self,
                 annotator_class='unlabeled_dependency_parser',
                 language='en',
                 component_type='dependency_untyped',
                 get_default=True,
                 nlp_ref='',
                 nlu_ref='',
                 model=None):

        if model != None: self.model = model
        elif 'dep' in annotator_class or 'dep.untyped' in annotator_class or annotator_class == 'unlabeled_dependency_parser':
            from nlu.components.dependency_untypeds.unlabeled_dependency_parser.unlabeled_dependency_parser import UnlabeledDependencyParser
            if get_default:
                self.model = UnlabeledDependencyParser.get_default_model()
            else:
                self.model = UnlabeledDependencyParser.get_pretrained_model(
                    nlp_ref, language)

        SparkNLUComponent.__init__(self, annotator_class, component_type)
Beispiel #21
0
    def __init__(self,
                 component_name='lemma',
                 language='en',
                 component_type='lemmatizer',
                 get_default=False,
                 model=None,
                 sparknlp_reference=''):
        component_name = 'lemmatizer'
        SparkNLUComponent.__init__(self, component_name, component_type)
        # component_name = utils.lower_case(component_name) TODO

        if model != None: self.model = model
        else:
            if 'lemma' in component_name:
                from nlu import SparkNLPLemmatizer
                if get_default:
                    self.model = SparkNLPLemmatizer.get_default_model()
                else:
                    self.model = SparkNLPLemmatizer.get_pretrained_model(
                        sparknlp_reference, language)
Beispiel #22
0
 def __init__(self,
              component_name='pragmatic_sentence_detector',
              language='en',
              component_type='sentence_detector',
              get_default=False,
              model=None,
              sparknlp_reference=''):
     if component_name == 'sentence_detector':
         component_name = 'deep_sentence_detector'  #default
     SparkNLUComponent.__init__(self, component_name, component_type)
     if model != None: self.model = model
     else:
         if component_name == 'deep_sentence_detector' or 'ner_dl' in sparknlp_reference:
             from nlu import SentenDetectorDeep  # wierd import issue ... does not work when outside scoped.
             if get_default:
                 self.model = SentenDetectorDeep.get_default_model()
         elif component_name == 'pragmatic_sentence_detector':
             from nlu import PragmaticSentenceDetector
             if get_default:
                 self.model = PragmaticSentenceDetector.get_default_model()
Beispiel #23
0
 def __init__(self,
              annotator_class='sentence_detector',
              language='en',
              component_type='sentence_detector',
              get_default=True,
              model=None,
              nlp_ref='',
              nlu_ref=''):
     if annotator_class == 'sentence_detector':
         annotator_class = 'deep_sentence_detector'  #default
     SparkNLUComponent.__init__(self, annotator_class, component_type)
     if model != None: self.model = model
     else:
         if annotator_class == 'deep_sentence_detector' or 'ner_dl' in nlp_ref:
             from nlu import SentenDetectorDeep  # wierd import issue ... does not work when outside scoped.
             if get_default:
                 self.model = SentenDetectorDeep.get_default_model()
         elif annotator_class == 'pragmatic_sentence_detector':
             from nlu import PragmaticSentenceDetector
             if get_default:
                 self.model = PragmaticSentenceDetector.get_default_model()
Beispiel #24
0
    def __init__(self, annotator_class='context_spell', language ='en', component_type='spell_checker', get_default=True, model = None, nlp_ref='', dataset='', nlu_ref =''):
        if annotator_class == 'context' or annotator_class == 'norvig' or annotator_class == 'symmetric':
            annotator_class = annotator_class + '_spell'
        if dataset != '':annotator_class = dataset + '_spell'


        if model != None : self.model = model
        else :
            if 'context' in annotator_class:
                from nlu import ContextSpellChecker
                if get_default : self.model =  ContextSpellChecker.get_default_model()
                else : self.model = ContextSpellChecker.get_pretrained_model(nlp_ref, language)
            elif 'norvig' in annotator_class:
                from nlu import NorvigSpellChecker
                if get_default : self.model =  NorvigSpellChecker.get_default_model()
                else : self.model = NorvigSpellChecker.get_pretrained_model(nlp_ref, language)
            elif 'symmetric' in annotator_class :
                from nlu import SymmetricSpellChecker
                if get_default : self.model = SymmetricSpellChecker.get_default_model()
                else : self.model = SymmetricSpellChecker.get_pretrained_model(nlp_ref, language)

        SparkNLUComponent.__init__(self, annotator_class, component_type)
Beispiel #25
0
 def __init__(self, annotator_class='document_assembler', component_type='util', model = None):
     # super(Tokenizer,self).__init__(annotator_class = annotator_class, component_type = component_type)
     if annotator_class == 'ner_converter':
         annotator_class = 'ner_to_chunk_converter'
     if model != None : self.model = model
     else :
         if annotator_class == 'document_assembler':
             from nlu import SparkNlpDocumentAssembler
             self.model =  SparkNlpDocumentAssembler.get_default_model()
         elif annotator_class == 'sentence_detector' :
             from nlu import SparkNLPSentenceDetector
             self.model =  SparkNLPSentenceDetector.get_default_model()
         elif annotator_class == 'sentence_detector_deep' :
             from nlu import SparkNLPSentenceDetector
             self.model =  SparkNLPSentenceDetector.get_default_model()
         elif annotator_class == 'ner_to_chunk_converter' :
             from nlu import NerToChunkConverter
             self.model =  NerToChunkConverter.get_default_model()
         elif annotator_class == 'sentence_embeddings':
             from nlu import SparkNLPSentenceEmbeddings
             self.model = SparkNLPSentenceEmbeddings.get_default_model()
     SparkNLUComponent.__init__(self, annotator_class, component_type)
Beispiel #26
0
    def __init__(self,
                 annotator_class='date_matcher',
                 language='en',
                 component_type='matcher',
                 get_default=False,
                 nlp_ref='',
                 model=None,
                 nlu_ref='',
                 dataset=''):

        if 'date' in nlp_ref or 'date' in nlu_ref:
            annotator_class = 'date_matcher'
        elif 'regex' in nlp_ref or 'regex' in nlu_ref:
            annotator_class = 'regex_matcher'
        elif 'text' in nlp_ref or 'text' in nlu_ref:
            annotator_class = 'text_matcher'
        elif '_matcher' not in annotator_class:
            annotator_class = annotator_class + '_matcher'

        if model != None: self.model = model
        else:
            if 'text' in annotator_class:
                from nlu import TextMatcher
                if get_default: self.model = TextMatcher.get_default_model()
                else:
                    self.model = TextMatcher.get_pretrained_model(
                        nlu_ref, language)
            elif 'date' in annotator_class:
                from nlu import DateMatcher
                if get_default: self.model = DateMatcher.get_default_model()
            elif 'regex' in annotator_class:
                from nlu import RegexMatcher
                if get_default: self.model = RegexMatcher.get_default_model()
                else:
                    self.model = RegexMatcher.get_pretrained_model(
                        nlu_ref, language)

        SparkNLUComponent.__init__(self, annotator_class, component_type)
Beispiel #27
0
 def __init__(self,
              annotator_class='default_chunker',
              language='en',
              component_type='chunker',
              get_default=True,
              nlp_ref='',
              nlu_ref='',
              model=None):
     if model != None: self.model = model
     else:
         if annotator_class == 'default_chunker':
             from nlu import DefaultChunker
             if get_default: self.model = DefaultChunker.get_default_model()
             else:
                 self.model = DefaultChunker.get_default_model(
                 )  # there are no pretrained chunkers, only default 1
         if annotator_class == 'ngram':
             from nlu import NGram
             if get_default: self.model = NGram.get_default_model()
             else:
                 self.model = NGram.get_default_model(
                 )  # there are no pretrained chunkers, only default 1
     SparkNLUComponent.__init__(self, annotator_class, component_type)
Beispiel #28
0
    def __init__(self,
                 component_name='context_spell',
                 language='en',
                 component_type='spell_checker',
                 get_default=True,
                 model=None,
                 sparknlp_reference='',
                 dataset=''):
        if component_name == 'context' or component_name == 'norvig' or component_name == 'symmetric':
            component_name = component_name + '_spell'
        if dataset != '': component_name = dataset + '_spell'
        SparkNLUComponent.__init__(self, component_name, component_type)

        if model != None: self.model = model
        else:
            if 'context' in component_name:
                from nlu import ContextSpellChecker
                if get_default:
                    self.model = ContextSpellChecker.get_default_model()
                else:
                    self.model = ContextSpellChecker.get_pretrained_model(
                        sparknlp_reference, language)
            elif 'norvig' in component_name:
                from nlu import NorvigSpellChecker
                if get_default:
                    self.model = NorvigSpellChecker.get_default_model()
                else:
                    self.model = NorvigSpellChecker.get_pretrained_model(
                        sparknlp_reference, language)
            elif 'symmetric' in component_name:
                from nlu import SymmetricSpellChecker
                if get_default:
                    self.model = SymmetricSpellChecker.get_default_model()
                else:
                    self.model = SymmetricSpellChecker.get_pretrained_model(
                        sparknlp_reference, language)
Beispiel #29
0
    def __init__(self,
                 annotator_class='sentiment_dl',
                 language='en',
                 component_type='classifier',
                 get_default=True,
                 model=None,
                 nlp_ref='',
                 nlu_ref='',
                 trainable=False):
        if 'e2e' in nlu_ref or 'toxic' in nlu_ref:
            annotator_class = 'multi_classifier'
        elif 'e2e' in nlp_ref or 'toxic' in nlp_ref:
            annotator_class = 'multi_classifier'

        elif 'multiclassifierdl' in nlp_ref:
            annotator_class = 'multi_classifier'
        elif 'classifierdl' in nlp_ref:
            annotator_class = 'classifier_dl'

        elif 'yake' in nlu_ref:
            annotator_class = 'yake'
        elif 'yake' in nlp_ref:
            annotator_class = 'yake'

        elif 'sentimentdl' in nlp_ref:
            annotator_class = 'sentiment_dl'

        elif 'vivekn' in nlp_ref or 'vivekn' in nlp_ref:
            annotator_class = 'vivekn_sentiment'

        elif 'wiki_' in nlu_ref or 'wiki_' in nlp_ref:
            annotator_class = 'language_detector'
        elif 'pos' in nlu_ref:
            annotator_class = 'pos'
        elif 'pos' in nlp_ref:
            annotator_class = 'pos'

        elif 'ner' in nlu_ref:
            annotator_class = 'ner'
        elif 'ner' in nlp_ref:
            annotator_class = 'ner'

        if model != None: self.model = model
        else:
            if 'sentiment' in annotator_class and 'vivekn' not in annotator_class:
                from nlu import SentimentDl
                if trainable:
                    self.model = SentimentDl.get_default_trainable_model()
                elif get_default:
                    self.model = SentimentDl.get_default_model()
                else:
                    self.model = SentimentDl.get_pretrained_model(
                        nlp_ref, language)
            elif 'vivekn' in annotator_class:
                from nlu import ViveknSentiment
                if get_default:
                    self.model = ViveknSentiment.get_default_model()
                else:
                    self.model = ViveknSentiment.get_pretrained_model(
                        nlp_ref, language)
            elif 'ner' in annotator_class or 'ner.dl' in annotator_class:
                from nlu import NERDL
                if trainable: self.model = NERDL.get_default_trainable_model()
                elif get_default: self.model = NERDL.get_default_model()
                else:
                    self.model = NERDL.get_pretrained_model(nlp_ref, language)
            elif 'ner.crf' in annotator_class:
                from nlu import NERDLCRF
                if get_default: self.model = NERDLCRF.get_default_model()
                else:
                    self.model = NERDLCRF.get_pretrained_model(
                        nlp_ref, language)
            # elif 'multi_classifier_dl' in annotator_class:
            #     from nlu import MultiClassifier
            #     if trainable : self.model = MultiClassifier.get_default_trainable_model()
            #     elif get_default : self.model = MultiClassifier.get_default_model()
            #     else : self.model = MultiClassifier.get_pretrained_model(nlp_ref, language)
            elif ('classifier_dl' in annotator_class or annotator_class
                  == 'toxic') and not 'multi' in annotator_class:
                from nlu import ClassifierDl
                if trainable: self.model = ClassifierDl.get_trainable_model()
                elif get_default: self.model = ClassifierDl.get_default_model()
                else:
                    self.model = ClassifierDl.get_pretrained_model(
                        nlp_ref, language)
            elif 'language_detector' in annotator_class:
                from nlu import LanguageDetector
                if get_default:
                    self.model = LanguageDetector.get_default_model()
                else:
                    self.model = LanguageDetector.get_pretrained_model(
                        nlp_ref, language)
            elif 'pos' in annotator_class:
                from nlu import PartOfSpeechJsl
                if trainable:
                    self.model = PartOfSpeechJsl.get_default_trainable_model()
                elif get_default:
                    self.model = PartOfSpeechJsl.get_default_model()
                else:
                    self.model = PartOfSpeechJsl.get_pretrained_model(
                        nlp_ref, language)
            elif 'yake' in annotator_class:
                from nlu import Yake
                self.model = Yake.get_default_model()
            elif 'multi_classifier' in annotator_class:
                from nlu import MultiClassifier
                if trainable:
                    self.model = MultiClassifier.get_default_trainable_model()
                elif get_default:
                    self.model = MultiClassifier.get_default_model()
                else:
                    self.model = MultiClassifier.get_pretrained_model(
                        nlp_ref, language)
        SparkNLUComponent.__init__(self, annotator_class, component_type)
Beispiel #30
0
    def __init__(self,
                 annotator_class='glove',
                 language='en',
                 component_type='embedding',
                 get_default=True,
                 model=None,
                 nlp_ref='',
                 nlu_ref=''):
        if 'use' in nlu_ref or 'tfhub_use' in nlp_ref:
            annotator_class = 'use'
            # first check for sentence then token embeddings.
        elif 'bert' in nlp_ref and 'albert' not in nlp_ref and 'sent' in nlp_ref:
            annotator_class = 'sentence_bert'
        elif 'bert' in nlu_ref and 'albert' not in nlu_ref and 'sent' in nlu_ref:
            annotator_class = 'sentence_bert'

        elif 'elmo' in nlp_ref:
            annotator_class = 'elmo'
        elif 'elmo' in nlu_ref:
            annotator_class = 'elmo'

        elif 'electra' in nlp_ref and 'sent' in nlp_ref:
            annotator_class = 'sentence_bert'
        elif 'electra' in nlu_ref and 'sent' in nlu_ref:
            annotator_class = 'sentence_bert'

        elif 'bert' in nlu_ref and 'albert' not in nlu_ref:
            annotator_class = 'bert'

        elif 'electra' in nlu_ref or 'electra' in nlp_ref:
            annotator_class = 'bert'
        elif 'labse' in nlu_ref or 'labse' in nlp_ref:
            annotator_class = 'sentence_bert'

        elif 'tfhub' in nlu_ref or 'tfhub' in nlp_ref:
            annotator_class = 'use'
        elif 'glove' in nlu_ref or 'glove' in nlp_ref:
            annotator_class = 'glove'
        elif 'albert' in nlu_ref or 'albert' in nlp_ref:
            annotator_class = 'albert'
        elif 'xlnet' in nlu_ref or 'xlnet' in nlp_ref:
            annotator_class = 'xlnet'

            # Default component models for nlu actions that dont specify a particular model
        elif 'embed_sentence' in nlu_ref:
            annotator_class = 'glove'
        elif 'embed' in nlu_ref:
            annotator_class = 'glove'

        if model != None: self.model = model
        else:
            if 'albert' in annotator_class:
                from nlu import SparkNLPAlbert
                if get_default: self.model = SparkNLPAlbert.get_default_model()
                else:
                    self.model = SparkNLPAlbert.get_pretrained_model(
                        nlp_ref, language)
            elif 'bert' in annotator_class and 'sent' in annotator_class:
                from nlu import BertSentence
                if get_default: self.model = BertSentence.get_default_model()
                else:
                    self.model = BertSentence.get_pretrained_model(
                        nlp_ref, language)
            elif 'electra' in annotator_class and 'sent' in annotator_class:
                from nlu import BertSentence
                if get_default: self.model = BertSentence.get_default_model()
                else:
                    self.model = BertSentence.get_pretrained_model(
                        nlp_ref, language)
            elif 'bert' in annotator_class:
                from nlu import SparkNLPBert
                if get_default: self.model = SparkNLPBert.get_default_model()
                else:
                    self.model = SparkNLPBert.get_pretrained_model(
                        nlp_ref, language)
            elif 'elmo' in annotator_class:
                from nlu import SparkNLPElmo
                if get_default: self.model = SparkNLPElmo.get_default_model()
                else:
                    self.model = SparkNLPElmo.get_pretrained_model(
                        nlp_ref, language)
            elif 'xlnet' in annotator_class:
                from nlu import SparkNLPXlnet
                if get_default: self.model = SparkNLPXlnet.get_default_model()
                else:
                    self.model = SparkNLPXlnet.get_pretrained_model(
                        nlp_ref, language)
            elif 'use' in annotator_class:
                from nlu import SparkNLPUse
                if get_default: self.model = SparkNLPUse.get_default_model()
                else:
                    self.model = SparkNLPUse.get_pretrained_model(
                        nlp_ref, language)
            elif 'glove' in annotator_class:
                from nlu import Glove
                if annotator_class == 'glove' and get_default == True:
                    self.model = Glove.get_default_model()
                else:
                    if get_default: self.model = Glove.get_default_model()
                    else:
                        if nlp_ref == 'glove_840B_300' or nlp_ref == 'glove_6B_300':
                            # if language=='en' and nlp_ref=='glove_6B_300': #special case
                            language = 'xx'  # For these particular Glove embeddings, anyreference to them is actually the reference to the multilingual onces
                            self.model = Glove.get_pretrained_model(
                                nlp_ref, language)
                        else:
                            self.model = Glove.get_pretrained_model(
                                nlp_ref, language)
        SparkNLUComponent.__init__(self, annotator_class, component_type)