Beispiel #1
0
from ludwig.decoders.registry import get_decoder_classes
from ludwig.encoders.registry import get_encoder_classes
from ludwig.schema import utils as schema_utils
from ludwig.utils.registry import Registry

input_type_registry = Registry()
output_type_registry = Registry()


def register_input_feature(name: str):
    def wrap(cls):
        input_type_registry[name] = cls
        return cls

    return wrap


def register_output_feature(name: str):
    def wrap(cls):
        output_type_registry[name] = cls
        return cls

    return wrap


def update_encoders(feature_props, feature_type):
    """This function updates the list of encoders acquired from the registry with any custom encoders that are not
    registered before schema validation.

    Args:
        feature_props: Input feature properties
Beispiel #2
0
from ludwig.utils.misc_utils import get_from_registry
from ludwig.utils.registry import Registry, register
from ludwig.utils.tf_utils import sequence_length_3D, sequence_length_2D

logger = logging.getLogger(__name__)

rnn_layers_registry = {
    'rnn': SimpleRNNCell,
    'gru': GRUCell,
    'lstm': LSTMCell
}

PAD_TOKEN = 0


DECODER_REGISTRY = Registry()


class SequenceDecoder(Decoder, ABC):
    @classmethod
    def register(cls, name):
        DECODER_REGISTRY[name] = cls


@register(name='generator')
class SequenceGeneratorDecoder(SequenceDecoder):

    def __init__(
            self,
            num_classes,
            cell_type='rnn',
Beispiel #3
0
from typing import List, Union

from ludwig.utils.registry import Registry

metric_feature_registry = Registry()
metric_registry = Registry()


def register_metric(name: str, features: Union[str, List[str]]):
    if isinstance(features, str):
        features = [features]

    def wrap(cls):
        for feature in features:
            feature_registry = metric_feature_registry.get(feature, {})
            feature_registry[name] = cls
            metric_feature_registry[feature] = feature_registry
        metric_registry[name] = cls
        return cls

    return wrap


def get_metric_classes(feature: str):
    return metric_feature_registry[feature]


def get_metric_cls(feature: str, name: str):
    return metric_feature_registry[feature][name]
Beispiel #4
0
from typing import List, Optional, Union

from marshmallow_dataclass import dataclass

from ludwig.constants import COMBINED, LOSS, MODEL_ECD, MODEL_GBM, TRAINING, TYPE
from ludwig.schema import utils as schema_utils
from ludwig.schema.metadata.trainer_metadata import TRAINER_METADATA
from ludwig.schema.optimizers import (
    BaseOptimizerConfig,
    GradientClippingConfig,
    GradientClippingDataclassField,
    OptimizerDataclassField,
)
from ludwig.utils.registry import Registry

trainer_schema_registry = Registry()


def register_trainer_schema(name: str):
    def wrap(trainer_config: BaseTrainerConfig):
        trainer_schema_registry[name] = trainer_config
        return trainer_config

    return wrap


@dataclass
class BaseTrainerConfig(schema_utils.BaseMarshmallowConfig, ABC):
    """Common trainer parameter values."""

    type: str
Beispiel #5
0
from typing import List, Union

from ludwig.utils.registry import DEFAULT_KEYS, Registry

trainers_registry = Registry()
ray_trainers_registry = Registry()


def register_trainer(name: str,
                     model_types: Union[str, List[str]],
                     default=False):
    """Register a trainer class that supports training the given model types.

    Using default=True will make the trainer the default trainer for the model type.

    Args:
        name: The name of the trainer, as it can be used in the config.
        model_types: The model types that the trainer supports.
        default: Whether the trainer should be the default trainer for the model type.
    """
    if isinstance(model_types, str):
        model_types = [model_types]

    def wrap(cls):
        for model_type in model_types:
            _model_type_registry = trainers_registry.get(model_type, {})
            _model_type_registry[name] = cls
            if default:
                if DEFAULT_KEYS[0] in _model_type_registry:
                    raise ValueError(
                        f"Default trainer already registered for model type {model_type}"
Beispiel #6
0
# limitations under the License.
# ==============================================================================
import logging
import sys
from abc import ABC

import tensorflow as tf

from ludwig.encoders import sequence_encoders
from ludwig.encoders.base import Encoder
from ludwig.utils.registry import Registry, register
from ludwig.modules.reduction_modules import SequenceReducer

logger = logging.getLogger(__name__)

ENCODER_REGISTRY = Registry(sequence_encoders.ENCODER_REGISTRY)


class TextEncoder(Encoder, ABC):
    @classmethod
    def register(cls, name):
        ENCODER_REGISTRY[name] = cls


@register(name='bert')
class BERTEncoder(TextEncoder):
    fixed_preprocessing_parameters = {
        'word_tokenizer': 'hf_tokenizer',
        'pretrained_model_name_or_path':
        'feature.pretrained_model_name_or_path',
    }
Beispiel #7
0
# limitations under the License.
# ==============================================================================

import logging
from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import List, Type, Union

import numpy as np
import torch
import torch.nn as nn

from ludwig.constants import BINARY, CATEGORY
from ludwig.utils.registry import DEFAULT_KEYS, Registry

calibration_registry = Registry()


def register_calibration(name: str,
                         features: Union[str, List[str]],
                         default=False):
    """Registers a calibration implementation for a list of features."""
    if isinstance(features, str):
        features = [features]

    def wrap(cls):
        for feature in features:
            feature_registry = calibration_registry.get(feature, {})
            feature_registry[name] = cls
            if default:
                for key in DEFAULT_KEYS:
Beispiel #8
0
#! /usr/bin/env python
# Copyright (c) 2022 Predibase, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

from ludwig.utils.registry import Registry

dataset_registry = Registry()


def register_dataset(name: str):
    def wrap(cls):
        dataset_registry[name] = cls
        return cls

    return wrap
Beispiel #9
0
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
import logging
from abc import ABC

from ludwig.encoders.base import Encoder
from ludwig.utils.registry import Registry, register, DEFAULT_KEYS
from ludwig.encoders.generic_encoders import PassthroughEncoder
from ludwig.modules.embedding_modules import Embed

logger = logging.getLogger(__name__)

ENCODER_REGISTRY = Registry(
    {key: PassthroughEncoder
     for key in DEFAULT_KEYS + ['passthrough']})


class CategoricalEncoder(Encoder, ABC):
    @classmethod
    def register(cls, name):
        ENCODER_REGISTRY[name] = cls


@register(name='dense')
class CategoricalEmbedEncoder(CategoricalEncoder):
    def __init__(self,
                 vocab,
                 embedding_size=50,
                 embeddings_trainable=True,
Beispiel #10
0
    CATEGORY,
    LOGITS,
    NUMBER,
    SEQUENCE,
    SET,
    TEXT,
    TIMESERIES,
    VECTOR,
)
from ludwig.utils import strings_utils
from ludwig.utils.registry import Registry

# used for Laplace smoothing for candidate samplers
EPSILON = 1.0e-10

loss_registry = Registry()


def register_loss(name: str, features: Union[str, List[str]]):
    if isinstance(features, str):
        features = [features]

    def wrap(cls):
        for feature in features:
            feature_registry = loss_registry.get(feature, {})
            feature_registry[name] = cls
            loss_registry[feature] = feature_registry
        return cls

    return wrap
Beispiel #11
0
from typing import Dict, List, Type, Union

from ludwig.encoders.base import Encoder
from ludwig.utils.registry import DEFAULT_KEYS, Registry

encoder_registry = Registry()

sequence_encoder_registry = Registry()


def register_sequence_encoder(name: str):
    def wrap(cls):
        sequence_encoder_registry[name] = cls
        return cls

    return wrap


def register_encoder(name: str,
                     features: Union[str, List[str]],
                     default=False):
    if isinstance(features, str):
        features = [features]

    def wrap(cls):
        for feature in features:
            feature_registry = encoder_registry.get(feature, {})
            feature_registry[name] = cls
            if default:
                for key in DEFAULT_KEYS:
                    feature_registry[key] = cls
Beispiel #12
0
# limitations under the License.

import logging
from abc import ABC, abstractmethod
from typing import Any, Dict, List, Optional, Tuple

import numpy as np
from sklearn.model_selection import train_test_split

from ludwig.backend.base import Backend
from ludwig.constants import BINARY, CATEGORY, COLUMN, DATE, SPLIT, TYPE
from ludwig.utils.data_utils import split_dataset_ttv
from ludwig.utils.registry import Registry
from ludwig.utils.types import DataFrame

split_registry = Registry()
default_random_seed = 42

TMP_SPLIT_COL = "__SPLIT__"
DEFAULT_PROBABILITIES = (0.7, 0.1, 0.2)


class Splitter(ABC):
    @abstractmethod
    def split(
        self,
        df: DataFrame,
        backend: Backend,
        random_seed: float = default_random_seed
    ) -> Tuple[DataFrame, DataFrame, DataFrame]:
        pass
Beispiel #13
0
from marshmallow_dataclass import dataclass

from ludwig.schema.metadata.trainer_metadata import TRAINER_METADATA
from ludwig.schema.utils import (
    BaseMarshmallowConfig,
    Boolean,
    create_cond,
    FloatRange,
    FloatRangeTupleDataclassField,
    NonNegativeFloat,
    StringOptions,
    unload_jsonschema_from_marshmallow_class,
)
from ludwig.utils.registry import Registry

optimizer_registry = Registry()


def register_optimizer(name: str):
    def wrap(optimizer_config: BaseOptimizerConfig):
        optimizer_registry[name] = (optimizer_config.optimizer_class,
                                    optimizer_config)
        return optimizer_config

    return wrap


@dataclass
class BaseOptimizerConfig(BaseMarshmallowConfig, ABC):
    """Base class for optimizers. Not meant to be used directly.
Beispiel #14
0
logger = logging.getLogger(__name__)


sequence_encoder_registry = {
    "stacked_cnn": StackedCNN,
    "parallel_cnn": ParallelCNN,
    "stacked_parallel_cnn": StackedParallelCNN,
    "rnn": StackedRNN,
    "cnnrnn": StackedCNNRNN,
    # todo: add transformer
    # 'transformer': StackedTransformer,
}


combiner_registry = Registry()


def register_combiner(name: str):
    def wrap(cls):
        combiner_registry[name] = cls
        return cls

    return wrap


# super class to house common properties
class Combiner(LudwigModule, ABC):
    def __init__(self, input_features: Dict[str, "InputFeature"]):
        super().__init__()
        self.input_features = input_features
Beispiel #15
0
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
import logging
from abc import ABC

import tensorflow as tf

from ludwig.encoders.base import Encoder
from ludwig.utils.registry import Registry, register_default
from ludwig.encoders.generic_encoders import DenseEncoder

logger = logging.getLogger(__name__)

ENCODER_REGISTRY = Registry({'dense': DenseEncoder})


class BinaryEncoder(Encoder, ABC):
    @classmethod
    def register(cls, name):
        ENCODER_REGISTRY[name] = cls


@register_default(name='passthrough')
class BinaryPassthroughEncoder(BinaryEncoder):
    def __init__(self, **kwargs):
        super().__init__()
        logger.debug(' {}'.format(self.name))

    def call(self, inputs, training=None, mask=None):
Beispiel #16
0
from typing import Dict, List, Type, Union

from ludwig.encoders.base import Encoder
from ludwig.utils.registry import DEFAULT_KEYS, Registry

encoder_registry = Registry()


def register_encoder(name: str, features: Union[str, List[str]], default=False):
    if isinstance(features, str):
        features = [features]

    def wrap(cls):
        for feature in features:
            feature_registry = encoder_registry.get(feature, {})
            feature_registry[name] = cls
            if default:
                for key in DEFAULT_KEYS:
                    feature_registry[key] = cls
            encoder_registry[feature] = feature_registry
        return cls

    return wrap


def get_encoder_cls(feature: str, name: str) -> Type[Encoder]:
    return encoder_registry[feature][name]


def get_encoder_classes(feature: str) -> Dict[str, Type[Encoder]]:
    return encoder_registry[feature]
Beispiel #17
0
    IMAGE,
    MISSING_VALUE_STRATEGY_OPTIONS,
    NUMBER,
    SEQUENCE,
    SET,
    TEXT,
    TIMESERIES,
    VECTOR,
)
from ludwig.schema import utils as schema_utils
from ludwig.schema.metadata.preprocessing_metadata import PREPROCESSING_METADATA
from ludwig.utils import strings_utils
from ludwig.utils.registry import Registry
from ludwig.utils.tokenizers import tokenizer_registry

preprocessing_registry = Registry()


def register_preprocessor(name: str):
    def wrap(preprocessing_config: BasePreprocessingConfig):
        preprocessing_registry[name] = preprocessing_config
        return preprocessing_config

    return wrap


@dataclass
class BasePreprocessingConfig(schema_utils.BaseMarshmallowConfig, ABC):
    """Base class for input feature preprocessing. Not meant to be used directly.

    The dataclass format prevents arbitrary properties from being set. Consequently, in child classes, all properties