Example #1
0
 def _validate(self, finetune_enabled=False):
     check_true(
         isinstance(self.n_components, int),
         TypeError("The number of components must be an integer."))
     check_true(
         self.n_components >= 2,
         ValueError("The number of components must be at least two."))
Example #2
0
 def _validate(self, finetune_enabled=False):
     check_true(
         self.pretrained is None
         or self.pretrained is Constants.default_model,
         ValueError(
             "Random embeddings cannot be pretrained. Save the TextWiser object instead."
         ))
Example #3
0
 def _validate(self, finetune_enabled=False):
     import umap  # this will fail if umap is not available
     check_true(
         isinstance(self.n_components, int),
         TypeError("The number of components must be an integer."))
     check_true(
         self.n_components >= 2,
         ValueError("The number of components must be at least two."))
Example #4
0
 def _validate(self, finetune_enabled=False):
     check_true(
         not self.pretrained
         or self.pretrained is Constants.default_model
         or (isinstance(self.pretrained, str)
             and os.path.exists(self.pretrained))
         or hasattr(self.pretrained, 'read'),  # file-like
         ValueError(
             "The pretrained model should be a path to a pickle file or a file-like object."
         ))
Example #5
0
 def _validate(self, finetune_enabled=False):
     check_true(
         isinstance(self.schema, (dict, str, tuple, list)),
         TypeError(
             "The schema should either be a dictionary, a valid embedding, an embedding-parameters tuple, or the path to a JSON file."
         ))
     check_true(
         self.pretrained is None
         or self.pretrained is Constants.default_model,
         ValueError(
             "Compound embeddings cannot be pretrained. Save the TextWiser object instead."
         ))
Example #6
0
    def _validate_init_args(embedding, transformations, is_finetuneable,
                            dtype):
        """
        Validates arguments for the constructor.
        """

        # Embedding
        embedding._validate(finetune_enabled=is_finetuneable)

        # Transformation
        if transformations:
            [transformation._validate() for transformation in transformations]

        # words should be pooled
        if isinstance(
                embedding,
                Embedding.Word) and embedding.inline_pool_option is None and (
                    not transformations or not any([
                        isinstance(transformation, Transformation.Pool)
                        for transformation in transformations
                    ])):
            warnings.warn(
                "Word embeddings are specified but no pool options are specified. Are you sure you don't want to pool them?",
                RuntimeWarning)

        # words shouldn't be double-pooled
        check_false(
            isinstance(embedding, Embedding.Word)
            and embedding.inline_pool_option is not None and transformations
            and any([
                isinstance(transformation, Transformation.Pool)
                for transformation in transformations
            ]),
            ValueError(
                "You cannot specify both `inline_pool_option` and `Pool` transformation for the same"
                " embedding at the same time. Please pick one!"))

        # dtype
        check_true(
            isinstance(dtype, torch.dtype) or issubclass(dtype, np.generic),
            TypeError("The dtype must be either a numpy or torch type."))
        check_true(
            not is_finetuneable or isinstance(dtype, torch.dtype),
            TypeError(
                "The dtype must be torch for model to be fine-tuneable."))
        check_true(
            not is_finetuneable
            or TextWiser._check_finetuneable(embedding, transformations),
            ValueError(
                "Model must have fine-tuneable weights if `is_finetuneable` is specified."
            ))
Example #7
0
 def _validate(self, finetune_enabled=False):
     check_true(
         isinstance(self.deterministic, bool),
         TypeError("The deterministic parameter should be a boolean."))
     if self.tokenizer:
         doc = "string"
         res = self.tokenizer(doc)
         check_true(
             isinstance(res, list),
             TypeError("The tokenizer should return a list of tokens."))
         check_true(isinstance(res[0], str),
                    TypeError("The tokens should be of string type."))
     check_true(
         not self.pretrained
         or self.pretrained is Constants.default_model
         or (isinstance(self.pretrained, str)
             and os.path.exists(self.pretrained))
         or hasattr(self.pretrained, 'read'),  # file-like
         ValueError(
             "The pretrained model should be a path to a pickle file or a file-like object."
         ))
Example #8
0
 def _validate(self, finetune_enabled=False):
     check_true(
         isinstance(self.pool_option, PoolOptions),
         TypeError("The pool type must be models.options.PoolOptions"))
Example #9
0
        def _validate(self, finetune_enabled=False):
            check_true(
                isinstance(self.word_option, WordOptions),
                ValueError(
                    "The embedding must be one of the supported word embeddings."
                ))
            check_true(
                self.pretrained or self.word_option is WordOptions.word2vec,
                ValueError(
                    "Only word2vec embeddings can be trained from scratch."))
            check_true(
                not finetune_enabled or self._is_finetuneable(),
                ValueError(
                    "The weights can only be fine-tuned if they are not ELMo embeddings."
                ))
            check_false(
                not finetune_enabled and self.word_option == WordOptions.char,
                ValueError(
                    "Character embeddings are only available if the model is fine-tuneable."
                ))
            check_true(
                not self.sparse or self.word_option == WordOptions.word2vec,
                ValueError(
                    "Sparse embeddings only supported with word2vec embeddings"
                ))
            check_true(
                isinstance(self.layers, int)
                or all([isinstance(l, int) for l in self.layers]),
                ValueError(
                    "Layers can only be an integer or a list of integers"))
            check_true(
                not self.inline_pool_option
                or isinstance(self.inline_pool_option, PoolOptions),
                ValueError(
                    "Inline pooling should either be None or a pool option."))

            if self.tokenizer:
                check_true(
                    self.word_option == WordOptions.word2vec,
                    ValueError(
                        "The tokenizer can only be used if word2vec embeddings are used."
                    ))
                doc = "string"
                res = self.tokenizer(doc)
                check_true(
                    isinstance(res, list),
                    TypeError("The tokenizer should return a list of tokens."))
                check_true(isinstance(res[0], str),
                           TypeError("The tokens should be of string type."))
Example #10
0
 def _validate(self, finetune_enabled=False):
     import tensorflow  # this will fail if tensorflow is not available
     import tensorflow_hub  # this will fail if tensorflow_hub is not available
     check_true(self.pretrained,
                ValueError("USE needs to be pretrained."))