def __init__( self, dm: DiscreteValue(min=0, max=2), dbow_words: DiscreteValue(min=-100, max=100), dm_concat: DiscreteValue(min=-100, max=100), dm_tag_count: DiscreteValue(min=0, max=2), alpha: ContinuousValue(min=0.001, max=0.075), epochs: DiscreteValue(min=2, max=10), window: DiscreteValue(min=2, max=10), inner_tokenizer: algorithm(Sentence, Seq[Word]), inner_stemmer: algorithm(Word, Stem), inner_stopwords: algorithm(Seq[Word], Seq[Word]), lowercase: BooleanValue(), stopwords_remove: BooleanValue(), ): self.inner_tokenizer = inner_tokenizer self.inner_stemmer = inner_stemmer self.inner_stopwords = inner_stopwords self.lowercase = lowercase self.stopwords_remove = stopwords_remove super().__init__( dm=dm, dbow_words=dbow_words, dm_concat=dm_concat, dm_tag_count=dm_tag_count, alpha=alpha, epochs=epochs, window=window, )
def __init__( self, Trained: BooleanValue(), N: DiscreteValue(min=500, max=2000), C: BooleanValue(), ): self.Trained = Trained self.N = N self.C = C NltkTrainedTagger.__init__(self) _TnT.__init__(self, Trained=Trained, N=N, C=C)
def __init__( self, extract_word: BooleanValue() = True, window_size: DiscreteValue(0, 5) = 0, ): self.extract_word = extract_word self.window_size = window_size
def __init__( self, preserve_case: BooleanValue(), reduce_len: BooleanValue(), strip_handles: BooleanValue(), ): self.preserve_case = preserve_case self.reduce_len = reduce_len self.strip_handles = strip_handles NltkTokenizer.__init__(self) _TweetTokenizer.__init__( self, preserve_case=preserve_case, reduce_len=reduce_len, strip_handles=strip_handles, )
def __init__( self, tokenizer: algorithm(Sentence, Seq[Word]), feature_extractor: algorithm(Word, FeatureSet), include_text: BooleanValue(), ): self.tokenizer = tokenizer self.feature_extractor = feature_extractor self.include_text = include_text
def _get_arg_values(arg, value, cls): if isinstance(value, bool): return BooleanValue() if isinstance(value, int): return DiscreteValue(*_get_integer_values(arg, value, cls)) if isinstance(value, float): return ContinuousValue(*_get_float_values(arg, value, cls)) if isinstance(value, str): values = _find_parameter_values(arg, cls) return CategoricalValue(*values) if values else None return None
def __init__( self, # min_length tomará valores entre cero y cinco de forma automática para diferentes pipelines. # Este parámetro está permitiendo buscar distintos tamaños de palabra y probar cual de ellos será mejor min_length: DiscreteValue(min=0, max=5), # lower es un parámetro que en algunos casos será True y en otros False. Podemos utilizarlo # para llevar o no a minúsculas el texto. lower: BooleanValue(), ): self.min_length = min_length self.lower = lower
def __init__( self, featurewise_center: BooleanValue(), samplewise_center: BooleanValue(), featurewise_std_normalization: BooleanValue(), samplewise_std_normalization: BooleanValue(), rotation_range: DiscreteValue(0, 15), width_shift_range: ContinuousValue(0, 0.25), height_shift_range: ContinuousValue(0, 0.25), shear_range: ContinuousValue(0, 15), zoom_range: ContinuousValue(0, 0.25), horizontal_flip: BooleanValue(), vertical_flip: BooleanValue(), ): super().__init__( featurewise_center=featurewise_center, samplewise_center=samplewise_center, featurewise_std_normalization=featurewise_std_normalization, samplewise_std_normalization=samplewise_std_normalization, rotation_range=rotation_range, width_shift_range=width_shift_range, height_shift_range=height_shift_range, shear_range=shear_range, zoom_range=zoom_range, horizontal_flip=horizontal_flip, vertical_flip=vertical_flip, )
def _get_arg_values(arg, value, cls): print(f"Computing valid values for: {arg}={value}") try: if isinstance(value, bool): annotation = BooleanValue() elif isinstance(value, int): annotation = _get_integer_values(arg, value, cls) elif isinstance(value, float): annotation = _get_float_values(arg, value, cls) elif isinstance(value, str): annotation = _find_parameter_values(arg, cls) else: annotation = None except: annotation = None print(f"Found annotation {arg}:{annotation}") return annotation
def __init__( self, language: CategoricalValue("en", "es"), extract_pos: BooleanValue(), extract_lemma: BooleanValue(), extract_pos_tag: BooleanValue(), extract_dep: BooleanValue(), extract_entity: BooleanValue(), extract_details: BooleanValue(), extract_sentiment: BooleanValue(), ): self.language = language self.extract_pos = extract_pos self.extract_lemma = extract_lemma self.extract_pos_tag = extract_pos_tag self.extract_dep = extract_dep self.extract_entity = extract_entity self.extract_details = extract_details self.extract_sentiment = extract_sentiment self._nlp = None
def __init__(self, strip_prefix_flag: BooleanValue()): self.strip_prefix_flag = strip_prefix_flag NltkStemmer.__init__(self) _LancasterStemmer.__init__(self, strip_prefix_flag=strip_prefix_flag)
def __init__(self, strict: BooleanValue()): self.strict = strict NltkTokenizer.__init__(self) _SExprTokenizer.__init__(self, strict=strict)
def __init__(self, case_insensitive: BooleanValue()): self.case_insensitive = case_insensitive NltkStemmer.__init__(self) _Cistem.__init__(self, case_insensitive=case_insensitive)
def __init__(self, ngram: DiscreteValue(1, 3), use_idf: BooleanValue()): super().__init__(ngram_range=(1, ngram), use_idf=use_idf) self.ngram = ngram