from ludwig.decoders.registry import get_decoder_classes from ludwig.encoders.registry import get_encoder_classes from ludwig.schema import utils as schema_utils from ludwig.utils.registry import Registry input_type_registry = Registry() output_type_registry = Registry() def register_input_feature(name: str): def wrap(cls): input_type_registry[name] = cls return cls return wrap def register_output_feature(name: str): def wrap(cls): output_type_registry[name] = cls return cls return wrap def update_encoders(feature_props, feature_type): """This function updates the list of encoders acquired from the registry with any custom encoders that are not registered before schema validation. Args: feature_props: Input feature properties
from ludwig.utils.misc_utils import get_from_registry from ludwig.utils.registry import Registry, register from ludwig.utils.tf_utils import sequence_length_3D, sequence_length_2D logger = logging.getLogger(__name__) rnn_layers_registry = { 'rnn': SimpleRNNCell, 'gru': GRUCell, 'lstm': LSTMCell } PAD_TOKEN = 0 DECODER_REGISTRY = Registry() class SequenceDecoder(Decoder, ABC): @classmethod def register(cls, name): DECODER_REGISTRY[name] = cls @register(name='generator') class SequenceGeneratorDecoder(SequenceDecoder): def __init__( self, num_classes, cell_type='rnn',
from typing import List, Union from ludwig.utils.registry import Registry metric_feature_registry = Registry() metric_registry = Registry() def register_metric(name: str, features: Union[str, List[str]]): if isinstance(features, str): features = [features] def wrap(cls): for feature in features: feature_registry = metric_feature_registry.get(feature, {}) feature_registry[name] = cls metric_feature_registry[feature] = feature_registry metric_registry[name] = cls return cls return wrap def get_metric_classes(feature: str): return metric_feature_registry[feature] def get_metric_cls(feature: str, name: str): return metric_feature_registry[feature][name]
from typing import List, Optional, Union from marshmallow_dataclass import dataclass from ludwig.constants import COMBINED, LOSS, MODEL_ECD, MODEL_GBM, TRAINING, TYPE from ludwig.schema import utils as schema_utils from ludwig.schema.metadata.trainer_metadata import TRAINER_METADATA from ludwig.schema.optimizers import ( BaseOptimizerConfig, GradientClippingConfig, GradientClippingDataclassField, OptimizerDataclassField, ) from ludwig.utils.registry import Registry trainer_schema_registry = Registry() def register_trainer_schema(name: str): def wrap(trainer_config: BaseTrainerConfig): trainer_schema_registry[name] = trainer_config return trainer_config return wrap @dataclass class BaseTrainerConfig(schema_utils.BaseMarshmallowConfig, ABC): """Common trainer parameter values.""" type: str
from typing import List, Union from ludwig.utils.registry import DEFAULT_KEYS, Registry trainers_registry = Registry() ray_trainers_registry = Registry() def register_trainer(name: str, model_types: Union[str, List[str]], default=False): """Register a trainer class that supports training the given model types. Using default=True will make the trainer the default trainer for the model type. Args: name: The name of the trainer, as it can be used in the config. model_types: The model types that the trainer supports. default: Whether the trainer should be the default trainer for the model type. """ if isinstance(model_types, str): model_types = [model_types] def wrap(cls): for model_type in model_types: _model_type_registry = trainers_registry.get(model_type, {}) _model_type_registry[name] = cls if default: if DEFAULT_KEYS[0] in _model_type_registry: raise ValueError( f"Default trainer already registered for model type {model_type}"
# limitations under the License. # ============================================================================== import logging import sys from abc import ABC import tensorflow as tf from ludwig.encoders import sequence_encoders from ludwig.encoders.base import Encoder from ludwig.utils.registry import Registry, register from ludwig.modules.reduction_modules import SequenceReducer logger = logging.getLogger(__name__) ENCODER_REGISTRY = Registry(sequence_encoders.ENCODER_REGISTRY) class TextEncoder(Encoder, ABC): @classmethod def register(cls, name): ENCODER_REGISTRY[name] = cls @register(name='bert') class BERTEncoder(TextEncoder): fixed_preprocessing_parameters = { 'word_tokenizer': 'hf_tokenizer', 'pretrained_model_name_or_path': 'feature.pretrained_model_name_or_path', }
# limitations under the License. # ============================================================================== import logging from abc import ABC, abstractmethod from dataclasses import dataclass from typing import List, Type, Union import numpy as np import torch import torch.nn as nn from ludwig.constants import BINARY, CATEGORY from ludwig.utils.registry import DEFAULT_KEYS, Registry calibration_registry = Registry() def register_calibration(name: str, features: Union[str, List[str]], default=False): """Registers a calibration implementation for a list of features.""" if isinstance(features, str): features = [features] def wrap(cls): for feature in features: feature_registry = calibration_registry.get(feature, {}) feature_registry[name] = cls if default: for key in DEFAULT_KEYS:
#! /usr/bin/env python # Copyright (c) 2022 Predibase, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== from ludwig.utils.registry import Registry dataset_registry = Registry() def register_dataset(name: str): def wrap(cls): dataset_registry[name] = cls return cls return wrap
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== import logging from abc import ABC from ludwig.encoders.base import Encoder from ludwig.utils.registry import Registry, register, DEFAULT_KEYS from ludwig.encoders.generic_encoders import PassthroughEncoder from ludwig.modules.embedding_modules import Embed logger = logging.getLogger(__name__) ENCODER_REGISTRY = Registry( {key: PassthroughEncoder for key in DEFAULT_KEYS + ['passthrough']}) class CategoricalEncoder(Encoder, ABC): @classmethod def register(cls, name): ENCODER_REGISTRY[name] = cls @register(name='dense') class CategoricalEmbedEncoder(CategoricalEncoder): def __init__(self, vocab, embedding_size=50, embeddings_trainable=True,
CATEGORY, LOGITS, NUMBER, SEQUENCE, SET, TEXT, TIMESERIES, VECTOR, ) from ludwig.utils import strings_utils from ludwig.utils.registry import Registry # used for Laplace smoothing for candidate samplers EPSILON = 1.0e-10 loss_registry = Registry() def register_loss(name: str, features: Union[str, List[str]]): if isinstance(features, str): features = [features] def wrap(cls): for feature in features: feature_registry = loss_registry.get(feature, {}) feature_registry[name] = cls loss_registry[feature] = feature_registry return cls return wrap
from typing import Dict, List, Type, Union from ludwig.encoders.base import Encoder from ludwig.utils.registry import DEFAULT_KEYS, Registry encoder_registry = Registry() sequence_encoder_registry = Registry() def register_sequence_encoder(name: str): def wrap(cls): sequence_encoder_registry[name] = cls return cls return wrap def register_encoder(name: str, features: Union[str, List[str]], default=False): if isinstance(features, str): features = [features] def wrap(cls): for feature in features: feature_registry = encoder_registry.get(feature, {}) feature_registry[name] = cls if default: for key in DEFAULT_KEYS: feature_registry[key] = cls
# limitations under the License. import logging from abc import ABC, abstractmethod from typing import Any, Dict, List, Optional, Tuple import numpy as np from sklearn.model_selection import train_test_split from ludwig.backend.base import Backend from ludwig.constants import BINARY, CATEGORY, COLUMN, DATE, SPLIT, TYPE from ludwig.utils.data_utils import split_dataset_ttv from ludwig.utils.registry import Registry from ludwig.utils.types import DataFrame split_registry = Registry() default_random_seed = 42 TMP_SPLIT_COL = "__SPLIT__" DEFAULT_PROBABILITIES = (0.7, 0.1, 0.2) class Splitter(ABC): @abstractmethod def split( self, df: DataFrame, backend: Backend, random_seed: float = default_random_seed ) -> Tuple[DataFrame, DataFrame, DataFrame]: pass
from marshmallow_dataclass import dataclass from ludwig.schema.metadata.trainer_metadata import TRAINER_METADATA from ludwig.schema.utils import ( BaseMarshmallowConfig, Boolean, create_cond, FloatRange, FloatRangeTupleDataclassField, NonNegativeFloat, StringOptions, unload_jsonschema_from_marshmallow_class, ) from ludwig.utils.registry import Registry optimizer_registry = Registry() def register_optimizer(name: str): def wrap(optimizer_config: BaseOptimizerConfig): optimizer_registry[name] = (optimizer_config.optimizer_class, optimizer_config) return optimizer_config return wrap @dataclass class BaseOptimizerConfig(BaseMarshmallowConfig, ABC): """Base class for optimizers. Not meant to be used directly.
logger = logging.getLogger(__name__) sequence_encoder_registry = { "stacked_cnn": StackedCNN, "parallel_cnn": ParallelCNN, "stacked_parallel_cnn": StackedParallelCNN, "rnn": StackedRNN, "cnnrnn": StackedCNNRNN, # todo: add transformer # 'transformer': StackedTransformer, } combiner_registry = Registry() def register_combiner(name: str): def wrap(cls): combiner_registry[name] = cls return cls return wrap # super class to house common properties class Combiner(LudwigModule, ABC): def __init__(self, input_features: Dict[str, "InputFeature"]): super().__init__() self.input_features = input_features
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== import logging from abc import ABC import tensorflow as tf from ludwig.encoders.base import Encoder from ludwig.utils.registry import Registry, register_default from ludwig.encoders.generic_encoders import DenseEncoder logger = logging.getLogger(__name__) ENCODER_REGISTRY = Registry({'dense': DenseEncoder}) class BinaryEncoder(Encoder, ABC): @classmethod def register(cls, name): ENCODER_REGISTRY[name] = cls @register_default(name='passthrough') class BinaryPassthroughEncoder(BinaryEncoder): def __init__(self, **kwargs): super().__init__() logger.debug(' {}'.format(self.name)) def call(self, inputs, training=None, mask=None):
from typing import Dict, List, Type, Union from ludwig.encoders.base import Encoder from ludwig.utils.registry import DEFAULT_KEYS, Registry encoder_registry = Registry() def register_encoder(name: str, features: Union[str, List[str]], default=False): if isinstance(features, str): features = [features] def wrap(cls): for feature in features: feature_registry = encoder_registry.get(feature, {}) feature_registry[name] = cls if default: for key in DEFAULT_KEYS: feature_registry[key] = cls encoder_registry[feature] = feature_registry return cls return wrap def get_encoder_cls(feature: str, name: str) -> Type[Encoder]: return encoder_registry[feature][name] def get_encoder_classes(feature: str) -> Dict[str, Type[Encoder]]: return encoder_registry[feature]
IMAGE, MISSING_VALUE_STRATEGY_OPTIONS, NUMBER, SEQUENCE, SET, TEXT, TIMESERIES, VECTOR, ) from ludwig.schema import utils as schema_utils from ludwig.schema.metadata.preprocessing_metadata import PREPROCESSING_METADATA from ludwig.utils import strings_utils from ludwig.utils.registry import Registry from ludwig.utils.tokenizers import tokenizer_registry preprocessing_registry = Registry() def register_preprocessor(name: str): def wrap(preprocessing_config: BasePreprocessingConfig): preprocessing_registry[name] = preprocessing_config return preprocessing_config return wrap @dataclass class BasePreprocessingConfig(schema_utils.BaseMarshmallowConfig, ABC): """Base class for input feature preprocessing. Not meant to be used directly. The dataclass format prevents arbitrary properties from being set. Consequently, in child classes, all properties