예제 #1
0
class DummyDescriptor(object):
    dummy_map = properties.Mapping(properties.Float(), properties.String)
    dummy_list = properties.List(properties.Float, properties.String)
    dummy_set = properties.Set(type(properties.Float()))
    link_or_else = properties.LinkOrElse()
    map_collection_key = properties.Mapping(
        properties.Optional(properties.String), properties.Integer)
    specified_mixed_list = properties.SpecifiedMixedList(
        [properties.Integer(default=100)])
class IngredientsToFormulationPredictor(
        Resource['IngredientsToFormulationPredictor'], Predictor, AIResourceMetadata):
    """[ALPHA] A predictor interface that constructs a formulation from ingredient quantities.

    Parameters
    ----------
    name: str
        name of the configuration
    description: str
        description of the predictor
    output: FormulationDescriptor
        descriptor that represents the output formulation
    id_to_quantity: Mapping[str, RealDescriptor]
        Map from ingredient identifier to the descriptor that represents its quantity,
        e.g., ``{'water': RealDescriptor('water quantity', 0, 1, "")}``
    labels: Mapping[str, Set[str]]
        Map from each label to all ingredients assigned that label, when present in a mixture,
        e.g., ``{'solvent': {'water'}}``

    """

    _resource_type = ResourceTypeEnum.MODULE

    output = _properties.Object(FormulationDescriptor, 'config.output')
    id_to_quantity = _properties.Mapping(_properties.String, _properties.Object(RealDescriptor),
                                         'config.id_to_quantity')
    labels = _properties.Mapping(_properties.String, _properties.Set(_properties.String),
                                 'config.labels')

    typ = _properties.String('config.type', default='IngredientsToSimpleMixture',
                             deserializable=False)
    module_type = _properties.String('module_type', default='PREDICTOR')

    def __init__(self,
                 name: str,
                 description: str,
                 output: FormulationDescriptor,
                 id_to_quantity: Mapping[str, RealDescriptor],
                 labels: Mapping[str, Set[str]],
                 archived: bool = False):
        self.name: str = name
        self.description: str = description
        self.output: FormulationDescriptor = output
        self.id_to_quantity: Mapping[str, RealDescriptor] = id_to_quantity
        self.labels: Mapping[str, Set[str]] = labels
        self.archived: bool = archived

    def _post_dump(self, data: dict) -> dict:
        data['display_name'] = data['config']['name']
        return data

    def __str__(self):
        return '<IngredientsToFormulationPredictor {!r}>'.format(self.name)
예제 #3
0
class PredictedVsActualRealPoint(Serializable["PredictedVsActualRealPoint"]):
    """Predicted vs. actual data for a single real-valued data point."""

    uuid = properties.UUID("uuid")
    """:UUID: Unique Citrine id given to the candidate"""
    identifiers = properties.Set(properties.String, "identifiers")
    """:Set[str]: Set of globally unique identifiers given to the candidate"""
    trial = properties.Integer("trial")
    """:int: 1-based index of the trial this candidate belonged to"""
    fold = properties.Integer("fold")
    """:int: 1-based index of the fold this candidate belonged to"""
    predicted = properties.Object(RealMetricValue, "predicted")
    """:RealMetricValue: Predicted value"""
    actual = properties.Object(RealMetricValue, "actual")
    """:RealMetricValue: Actual value"""
    def __init__(self):
        pass  # pragma: no cover
예제 #4
0
class LabelFractionsPredictor(Resource['LabelFractionsPredictor'], Predictor,
                              AIResourceMetadata):
    """A predictor interface that computes the relative proportions of labeled ingredients.

    Parameters
    ----------
    name: str
        name of the configuration
    description: str
        description of the predictor
    input_descriptor: FormulationDescriptor
        descriptor that contains formulation data
    labels: Set[str]
        labels to compute the quantity fractions of

    """

    _resource_type = ResourceTypeEnum.MODULE

    input_descriptor = _properties.Object(FormulationDescriptor,
                                          'config.input')
    labels = _properties.Set(_properties.String, 'config.labels')

    typ = _properties.String('config.type',
                             default='LabelFractions',
                             deserializable=False)
    module_type = _properties.String('module_type', default='PREDICTOR')

    def __init__(self,
                 name: str,
                 description: str,
                 input_descriptor: FormulationDescriptor,
                 labels: Set[str],
                 archived: bool = False):
        self.name: str = name
        self.description: str = description
        self.input_descriptor: FormulationDescriptor = input_descriptor
        self.labels: Set[str] = labels
        self.archived: bool = archived

    def _post_dump(self, data: dict) -> dict:
        data['display_name'] = data['config']['name']
        return data

    def __str__(self):
        return '<LabelFractionsPredictor {!r}>'.format(self.name)
class IngredientFractionsPredictor(Resource["IngredientFractionsPredictor"],
                                   Predictor, AIResourceMetadata):
    """A predictor interface that computes ingredient fractions.

    Parameters
    ----------
    name: str
        name of the configuration
    description: str
        the description of the predictor
    input_descriptor: FormulationDescriptor
        descriptor that represents the input formulation
    ingredients: Set[str]
        set of ingredients to featurize.
        This set should contain all possible ingredients.
        If an unknown ingredient is encountered, an error will be thrown.

    """

    _resource_type = ResourceTypeEnum.MODULE

    input_descriptor = _properties.Object(FormulationDescriptor, 'config.input')
    ingredients = _properties.Set(_properties.String, 'config.ingredients')

    module_type = _properties.String('module_type', default='PREDICTOR')
    typ = _properties.String('config.type', default='IngredientFractions',
                             deserializable=False)

    def __init__(self,
                 name: str,
                 description: str,
                 input_descriptor: FormulationDescriptor,
                 ingredients: Set[str],
                 archived: bool = False):
        self.name: str = name
        self.description: str = description
        self.input_descriptor: FormulationDescriptor = input_descriptor
        self.ingredients: Set[str] = ingredients
        self.archived: bool = archived

    def _post_dump(self, data: dict) -> dict:
        data['display_name'] = data['config']['name']
        return data

    def __str__(self):
        return '<IngredientFractionsPredictor {!r}>'.format(self.name)
예제 #6
0
class CategoricalDescriptor(Serializable['CategoricalDescriptor'], Descriptor):
    """[ALPHA] A descriptor to hold categorical variables.

    An exhaustive list of categorical values may be supplied.

    Parameters
    ----------
    key: str
        the key corresponding to a descriptor
    categories: list[str]
        possible categories for this descriptor

    """

    key = properties.String('descriptor_key')
    typ = properties.String('type',
                            default='Categorical',
                            deserializable=False)
    categories = properties.Set(properties.String, 'descriptor_values')

    def __eq__(self, other):
        try:
            attrs = ["key", "categories", "typ"]
            return all([
                self.__getattribute__(key) == other.__getattribute__(key)
                for key in attrs
            ])
        except AttributeError:
            return False

    def __init__(self, key: str, categories: Iterable[str]):
        self.key: str = key
        for category in categories:
            if not isinstance(category, str):
                raise TypeError("All categories must be strings")
        self.categories: Set[str] = set(categories)

    def __str__(self):
        return "<CategoricalDescriptor {!r}>".format(self.key)

    def __repr__(self):
        return "CategoricalDescriptor(key={}, categories={})".format(
            self.key, self.categories)
예제 #7
0
class PredictedVsActualCategoricalPoint(
        Serializable["PredictedVsActualCategoricalPoint"]):
    """Predicted vs. actual data for a single categorical data point."""

    uuid = properties.UUID("uuid")
    """:UUID: Unique Citrine id given to the candidate"""
    identifiers = properties.Set(properties.String, "identifiers")
    """:Set[str]: Set of globally unique identifiers given to the candidate"""
    trial = properties.Integer("trial")
    """:int: 1-based index of the trial this candidate belonged to"""
    fold = properties.Integer("fold")
    """:int: 1-based index of the fold this candidate belonged to"""
    predicted = properties.Mapping(properties.String, properties.Float,
                                   "predicted")
    """:Dict[str, float]: Predicted class probabilities defined as a map from each class name
    to its relative frequency"""
    actual = properties.Mapping(properties.String, properties.Float, "actual")
    """:Dict[str, float]: Actual class probabilities defined as a map from each class name
    to its relative frequency"""
    def __init__(self):
        pass  # pragma: no cover
class CrossValidationEvaluator(Serializable["CrossValidationEvaluator"], PredictorEvaluator):
    """Evaluate a predictor via cross validation.

    Performs cross-validation on requested predictor responses and computes the requested metrics
    on each response. For a discussion of how many folds and trials to use,
    please see the :ref:`documentation<Cross-validation evaluator>`.

    In addition to a name, set of responses to validate, trials, folds and metrics to compute,
    this evaluator defines a set of descriptor keys to ignore when grouping.  Candidates with
    different values for ignored keys and identical values for all other predictor inputs will be
    placed in the same fold.  For example, if you are baking cakes with different ingredients and
    different oven temperatures and want to group together the data by the ingredients, then you
    can set `ignore_when_grouping={"oven temperature"}`. That way, two recipes that differ only in
    their oven temperature will always end up in the same fold.

    Parameters
    ----------
    name: str
        Name of the evaluator
    description: str
        Description of the evaluator
    responses: Set[str]
        Set of descriptor keys to evaluate
    n_folds: int
        Number of cross-validation folds
    n_trials: int
        Number of cross-validation trials, each contains ``n_folds`` folds
    metrics: Optional[Set[PredictorEvaluationMetric]]
        Optional set of metrics to compute for each response.
        Default is all metrics.
    ignore_when_grouping: Optional[Set[str]]
        Set of descriptor keys to group together.
        Candidates with different values for the given keys and identical values
        for all other descriptors will be in the same group.

    """

    def _attrs(self) -> List[str]:
        return ["typ", "name", "description",
                "responses", "n_folds", "n_trials", "metrics", "ignore_when_grouping"]

    name = properties.String("name")
    description = properties.String("description")
    _responses = properties.Set(properties.String, "responses")
    n_folds = properties.Integer("n_folds")
    n_trials = properties.Integer("n_trials")
    _metrics = properties.Optional(properties.Set(properties.Object(PredictorEvaluationMetric)),
                                   "metrics")
    ignore_when_grouping = properties.Optional(properties.Set(properties.String),
                                               "ignore_when_grouping")
    typ = properties.String("type", default="CrossValidationEvaluator", deserializable=False)

    def __init__(self, *,
                 name: str,
                 description: str = "",
                 responses: Set[str],
                 n_folds: int = 5,
                 n_trials: int = 3,
                 metrics: Optional[Set[PredictorEvaluationMetric]] = None,
                 ignore_when_grouping: Optional[Set[str]] = None):
        self.name: str = name
        self.description: str = description
        self._responses: Set[str] = responses
        self._metrics: Optional[Set[PredictorEvaluationMetric]] = metrics
        self.n_folds: int = n_folds
        self.n_trials: int = n_trials
        self.ignore_when_grouping: Optional[Set[str]] = ignore_when_grouping

    @property
    def responses(self) -> Set[str]:
        """Set of predictor responses cross-validated by the evaluator."""
        return self._responses

    @property
    def metrics(self) -> Set[PredictorEvaluationMetric]:
        """Set of metrics computed during cross-validation."""
        return self._metrics
예제 #9
0
class FormulationDesignSpace(Resource['FormulationDesignSpace'], DesignSpace,
                             AIResourceMetadata):
    """Design space composed of mixtures of ingredients.

    Parameters
    ----------
    name: str
        the name of the design space
    description: str
        the description of the design space
    formulation_descriptor: FormulationDescriptor
        descriptor used to store formulations sampled from the design space
    ingredients: Set[str]
        set of ingredient names that can be used in a formulation
    constraints: Set[IngredientConstraint]
        set of constraints that restricts formulations sampled from the space.
        This must include an
        :class:`~io.citrine.informatics.constraints.ingredient_count_constraint.IngredientCountConstraint`
        with maximum count of 32 or fewer.
    labels: Optional[Mapping[str, Set[str]]]
        map from a label to each ingredient that should given that label
        when it's included in a formulation, e.g., ``{'solvent': {'water', 'alcohol'}}``
    resolution: float, optional
        Minimum increment used to specify ingredient quantities.
        Default is 0.0001.

    """

    _resource_type = ResourceTypeEnum.MODULE

    formulation_descriptor = properties.Object(
        FormulationDescriptor, 'config.formulation_descriptor')
    ingredients = properties.Set(properties.String, 'config.ingredients')
    labels = properties.Optional(
        properties.Mapping(properties.String,
                           properties.Set(properties.String)), 'config.labels')
    constraints = properties.Set(properties.Object(Constraint),
                                 'config.constraints')
    resolution = properties.Float('config.resolution')

    typ = properties.String('config.type',
                            default='FormulationDesignSpace',
                            deserializable=False)
    module_type = properties.String('module_type',
                                    default='DESIGN_SPACE',
                                    deserializable=False)

    def __init__(self,
                 *,
                 name: str,
                 description: str,
                 formulation_descriptor: FormulationDescriptor,
                 ingredients: Set[str],
                 constraints: Set[Constraint],
                 labels: Optional[Mapping[str, Set[str]]] = None,
                 resolution: float = 0.0001,
                 session: Session = Session()):
        self.name: str = name
        self.description: str = description
        self.formulation_descriptor: FormulationDescriptor = formulation_descriptor
        self.ingredients: Set[str] = ingredients
        self.constraints: Set[Constraint] = constraints
        self.labels: Optional[Mapping[str, Set[str]]] = labels
        self.resolution: float = resolution
        self.session: Session = session

    def _post_dump(self, data: dict) -> dict:
        data['display_name'] = data['config']['name']
        return data

    def __str__(self):
        return '<FormulationDesignSpace {!r}>'.format(self.name)
class IngredientsToSimpleMixturePredictor(
        Resource['IngredientsToSimpleMixturePredictor'], Predictor,
        AIResourceMetadata):
    """[DEPRECATED] Constructs a simple mixture from ingredient quantities.

    This predictor has been renamed. Please use
    :class:`~citrine.informatics.predictors.ingredients_to_formulation_predictor.IngredientsToFormulationPredictor`
    instead.

    .. seealso::

        :class:`~citrine.informatics.predictors.ingredients_to_formulation_predictor.IngredientsToFormulationPredictor`

    Parameters
    ----------
    name: str
        name of the configuration
    description: str
        description of the predictor
    output: FormulationDescriptor
        descriptor that represents the output formulation
    id_to_quantity: Mapping[str, RealDescriptor]
        Map from ingredient identifier to the descriptor that represents its quantity,
        e.g., ``{'water': RealDescriptor('water quantity', 0, 1, "")}``
    labels: Mapping[str, Set[str]]
        Map from each label to all ingredients assigned that label, when present in a mixture
        e.g., ``{'solvent': {'water'}}``

    """

    _resource_type = ResourceTypeEnum.MODULE

    output = _properties.Object(FormulationDescriptor, 'config.output')
    id_to_quantity = _properties.Mapping(_properties.String,
                                         _properties.Object(RealDescriptor),
                                         'config.id_to_quantity')
    labels = _properties.Mapping(_properties.String,
                                 _properties.Set(_properties.String),
                                 'config.labels')

    typ = _properties.String('config.type',
                             default='IngredientsToSimpleMixture',
                             deserializable=False)
    module_type = _properties.String('module_type', default='PREDICTOR')

    def __init__(self,
                 name: str,
                 description: str,
                 output: FormulationDescriptor,
                 id_to_quantity: Mapping[str, RealDescriptor],
                 labels: Mapping[str, Set[str]],
                 archived: bool = False):
        warn(
            "{this_class} has been renamed. Please use {replacement} instead".
            format(this_class=self.__class__.__name__,
                   replacement="Ingredients To Formulation Predictor"),
            DeprecationWarning)
        self.name: str = name
        self.description: str = description
        self.output: FormulationDescriptor = output
        self.id_to_quantity: Mapping[str, RealDescriptor] = id_to_quantity
        self.labels: Mapping[str, Set[str]] = labels
        self.archived: bool = archived

    def _post_dump(self, data: dict) -> dict:
        data['display_name'] = data['config']['name']  # pragma: no cover
        return data  # pragma: no cover

    def __str__(self):
        return '<IngredientsToSimpleMixturePredictor {!r}>'.format(
            self.name)  # pragma: no cover