コード例 #1
0
class DummyDescriptor(object):
    dummy_map = properties.Mapping(properties.Float(), properties.String)
    dummy_list = properties.List(properties.Float, properties.String)
    dummy_set = properties.Set(type(properties.Float()))
    link_or_else = properties.LinkOrElse()
    map_collection_key = properties.Mapping(
        properties.Optional(properties.String), properties.Integer)
    specified_mixed_list = properties.SpecifiedMixedList(
        [properties.Integer(default=100)])
class IngredientsToFormulationPredictor(
        Resource['IngredientsToFormulationPredictor'], Predictor, AIResourceMetadata):
    """[ALPHA] A predictor interface that constructs a formulation from ingredient quantities.

    Parameters
    ----------
    name: str
        name of the configuration
    description: str
        description of the predictor
    output: FormulationDescriptor
        descriptor that represents the output formulation
    id_to_quantity: Mapping[str, RealDescriptor]
        Map from ingredient identifier to the descriptor that represents its quantity,
        e.g., ``{'water': RealDescriptor('water quantity', 0, 1, "")}``
    labels: Mapping[str, Set[str]]
        Map from each label to all ingredients assigned that label, when present in a mixture,
        e.g., ``{'solvent': {'water'}}``

    """

    _resource_type = ResourceTypeEnum.MODULE

    output = _properties.Object(FormulationDescriptor, 'config.output')
    id_to_quantity = _properties.Mapping(_properties.String, _properties.Object(RealDescriptor),
                                         'config.id_to_quantity')
    labels = _properties.Mapping(_properties.String, _properties.Set(_properties.String),
                                 'config.labels')

    typ = _properties.String('config.type', default='IngredientsToSimpleMixture',
                             deserializable=False)
    module_type = _properties.String('module_type', default='PREDICTOR')

    def __init__(self,
                 name: str,
                 description: str,
                 output: FormulationDescriptor,
                 id_to_quantity: Mapping[str, RealDescriptor],
                 labels: Mapping[str, Set[str]],
                 archived: bool = False):
        self.name: str = name
        self.description: str = description
        self.output: FormulationDescriptor = output
        self.id_to_quantity: Mapping[str, RealDescriptor] = id_to_quantity
        self.labels: Mapping[str, Set[str]] = labels
        self.archived: bool = archived

    def _post_dump(self, data: dict) -> dict:
        data['display_name'] = data['config']['name']
        return data

    def __str__(self):
        return '<IngredientsToFormulationPredictor {!r}>'.format(self.name)
コード例 #3
0
class CSVDataSource(Serializable['CSVDataSource'], DataSource):
    """A data source based on a CSV file stored on the data platform.

    Parameters
    ----------
    file_link: FileLink
        link to the CSV file to read the data from
    column_definitions: Mapping[str, Descriptor]
        Map the column headers to the descriptors that will be used to interpret the cell contents
    identifiers: Optional[List[str]]
        List of one or more column headers whose values uniquely identify a row. These may overlap
        with ``column_definitions`` if a column should be used as data and as an identifier,
        but this is not necessary. Identifiers must be unique within a dataset. No two rows can
        contain the same value.

    """

    typ = properties.String('type', default='csv_data_source', deserializable=False)
    file_link = properties.Object(FileLink, "file_link")
    column_definitions = properties.Mapping(
        properties.String, properties.Object(Descriptor), "column_definitions")
    identifiers = properties.Optional(properties.List(properties.String), "identifiers")

    def _attrs(self) -> List[str]:
        return ["file_link", "column_definitions", "identifiers", "typ"]

    def __init__(self,
                 file_link: FileLink,
                 column_definitions: Mapping[str, Descriptor],
                 identifiers: Optional[List[str]] = None):
        self.file_link = file_link
        self.column_definitions = column_definitions
        self.identifiers = identifiers
コード例 #4
0
ファイル: reports.py プロジェクト: jspeerless/citrine-python
class FeatureImportanceReport(Serializable["FeatureImportanceReport"]):
    """[ALPHA] Feature importances for a specific model response.

    FeatureImportanceReport objects are constructed from saved models and
    should not be user-instantiated.

    Parameters
    ----------
    output_key: str
        key for the output
    importances: dict[str, float]
        feature importances

    """

    output_key = properties.String('response_key')
    importances = properties.Mapping(keys_type=properties.String, values_type=properties.Float,
                                     serialization_path='importances')

    def __init__(self, output_key: str, importances: Dict[str, float]):
        self.output_key = output_key
        self.importances = importances

    def __str__(self):
        return "<FeatureImportanceReport {!r}>".format(self.output_key)
コード例 #5
0
ファイル: job.py プロジェクト: jspeerless/citrine-python
class JobStatusResponse(Resource['JobStatusResponse']):
    """[ALPHA] a response to a job status check.

    The JobStatusResponse summarizes the status for the entire job.

    Parameters
    ----------
    job_type: str
        the type of job for this status report
    status: str
        the actual status of the job.
        One of "Running", "Success", or "Failure".
    tasks: List[TaskNode]
        all of the constituent task required to complete this job
    output: Optional[Map[String,String]]
        job output properties and results

    """

    job_type = properties.String("job_type")
    status = properties.String("status")
    tasks = properties.List(Object(TaskNode), "tasks")
    output = properties.Optional(properties.Mapping(String, String), 'output')

    def __init__(
            self,
            job_type: str,
            status: str,
            tasks: List[TaskNode],
            output: Optional[Dict[str, str]]
    ):
        self.job_type = job_type
        self.status = status
        self.tasks = tasks
        self.output = output
コード例 #6
0
class ResponseMetrics(Serializable["ResponseMetrics"]):
    """Set of metrics computed by a Predictor Evaluator for a single response.

    Results computed for a metric can be accessed by the metric's ``__repr__`` or
    by the metric itself.

    """

    metrics = properties.Mapping(properties.String,
                                 properties.Object(MetricValue), "metrics")
    """:Dict[str, MetricValue]: Metrics computed for a single response, keyed by the
    metric's ``__repr__``."""
    def __init__(self):
        pass  # pragma: no cover

    def __iter__(self):
        return iter(self.metrics)

    def __getitem__(self, item):
        if isinstance(item, str):
            return self.metrics[item]
        elif isinstance(item, PredictorEvaluationMetric):
            return self.metrics[repr(item)]
        else:
            raise TypeError("Cannot index ResponseMetrics with a {}".format(
                type(item)))
コード例 #7
0
def test_mapping_property(key_type, value_type, key_value, value_value,
                          key_serialized, value_serialized):
    prop = properties.Mapping(key_type, value_type)
    value = {key_value: value_value}
    serialized = {key_serialized: value_serialized}
    assert prop.deserialize(serialized) == value
    assert prop.serialize(value) == serialized
コード例 #8
0
class ExpressionPredictor(Resource['ExpressionPredictor'], Predictor,
                          AIResourceMetadata):
    """A predictor that computes an output from an expression and set of bounded inputs.

    For a discussion of expression syntax and a list of allowed symbols,
    please see the :ref:`documentation<Expression Predictor>`.

    .. seealso::
       If you are using the deprecated predictor please see
       :class:`~citrine.informatics.predictors.DeprecatedExpressionPredictor` for an example that
       shows how to migrate to the new format.

    Parameters
    ----------
    name: str
        name of the configuration
    description: str
        the description of the predictor
    expression: str
        expression that computes an output from aliased inputs
    output: RealDescriptor
        descriptor that represents the output of the expression
    aliases: Mapping[str, RealDescriptor]
        a mapping from each unknown argument to its descriptor.
        All unknown arguments must have an associated descriptor.

    """

    expression = _properties.String('config.expression')
    output = _properties.Object(RealDescriptor, 'config.output')
    aliases = _properties.Mapping(_properties.String,
                                  _properties.Object(RealDescriptor),
                                  'config.aliases')

    typ = _properties.String('config.type',
                             default='AnalyticExpression',
                             deserializable=False)
    module_type = _properties.String('module_type', default='PREDICTOR')

    def __init__(self,
                 name: str,
                 description: str,
                 expression: str,
                 output: RealDescriptor,
                 aliases: Mapping[str, RealDescriptor],
                 archived: bool = False):
        self.name: str = name
        self.description: str = description
        self.expression: str = expression
        self.output: RealDescriptor = output
        self.aliases: Mapping[str, RealDescriptor] = aliases
        self.archived: bool = archived

    def _post_dump(self, data: dict) -> dict:
        data['display_name'] = data['config']['name']
        return data

    def __str__(self):
        return '<ExpressionPredictor {!r}>'.format(self.name)
コード例 #9
0
class DesignMaterial(Serializable["DesignMaterial"]):
    """Description of the material that was designed, as a set of DesignVariables."""

    values = properties.Mapping(properties.String, properties.Object(DesignVariable), 'vars')
    """:Dict[str, DesignVariable]: mapping from descriptor keys to the value for this material"""

    def __init__(self):
        pass  # pragma: no cover
コード例 #10
0
def test_mapping_property_list_of_pairs_multiple():
    prop = properties.Mapping(properties.String,
                              properties.Integer,
                              ser_as_list_of_pairs=True)
    value = {'foo': 1, 'bar': 2}
    serialized = [('foo', 1), ('bar', 2)]
    assert prop.deserialize(serialized) == value
    unittest.TestCase().assertCountEqual(prop.serialize(value), serialized)
コード例 #11
0
class EnumeratedDesignSpace(Resource['EnumeratedDesignSpace'], DesignSpace):
    """Design space composed of an explicit enumeration of candidate materials to score. Note that every candidate must have exactly the descriptors in the list populated (no more, no less) to be included. 

    Parameters
    ----------
    name:str
        the name of the design space
    description:str
        the description of the design space
    descriptors: list[Descriptor]
        the list of descriptors included in the candidates of the design space
    data: list[dict]
        list of dicts of the shape `{<descriptor_key>: <descriptor_value>}` where each dict corresponds to a candidate
        in the design space
    """

    _response_key = None

    uid = properties.Optional(properties.UUID, 'id', serializable=False)
    name = properties.String('config.name')
    description = properties.Optional(properties.String(),
                                      'config.description')
    descriptors = properties.List(properties.Object(Descriptor),
                                  'config.descriptors')
    data = properties.List(
        properties.Mapping(properties.String, properties.Raw), 'config.data')

    typ = properties.String('config.type',
                            default='EnumeratedDesignSpace',
                            deserializable=False)
    status = properties.String('status', serializable=False)
    status_info = properties.Optional(properties.List(properties.String()),
                                      'status_info',
                                      serializable=False)

    # NOTE: These could go here or in _post_dump - it's unclear which is better right now
    module_type = properties.String('module_type', default='DESIGN_SPACE')
    schema_id = properties.UUID(
        'schema_id', default=UUID('f3907a58-aa46-462c-8837-a5aa9605e79e'))

    def __init__(self,
                 name: str,
                 description: str,
                 descriptors: List[Descriptor],
                 data: List[Mapping[str, Any]],
                 session: Session = Session()):
        self.name: str = name
        self.description: str = description
        self.descriptors: List[Descriptor] = descriptors
        self.data: List[Mapping[str, Any]] = data
        self.session: Session = session

    def _post_dump(self, data: dict) -> dict:
        data['display_name'] = data['config']['name']
        return data

    def __str__(self):
        return '<EnumeratedDesignSpace {!r}>'.format(self.name)
コード例 #12
0
def test_mapping_property_list_of_pairs(key_type, value_type, key_value,
                                        value_value, key_serialized,
                                        value_serialized):
    prop = properties.Mapping(key_type, value_type, ser_as_list_of_pairs=True)
    value = {key_value: value_value}
    serialized = [
        (key_serialized, value_serialized),
    ]
    assert prop.deserialize(serialized) == value
    unittest.TestCase().assertCountEqual(prop.serialize(value), serialized)
コード例 #13
0
class Mixture(Serializable["Mixture"], DesignVariable):
    """Most likely quantity values for all of the components in a mixture.

    This is a complete list of components with non-zero quantities; there is no
    truncation (but there may be rounding).
    """

    quantities = properties.Mapping(properties.String, properties.Float, 'q')
    """:Dict[str, float]: mapping from ingredient identifiers to their quantities"""

    def __init__(self):
        pass  # pragma: no cover
コード例 #14
0
class TopCategories(Serializable["CategoriesAndProbabilities"], DesignVariable):
    """The category names and probabilities for the most probable categories.

    This list is truncated: these are the most probable categories but other categories
    may have non-zero probabilities.
    """

    probabilities = properties.Mapping(properties.String, properties.Float, 'cp')
    """:Dict[str, float]: mapping from category names to their probabilities"""

    def __init__(self):
        pass  # pragma: no cover
コード例 #15
0
class PredictedVsActualCategoricalPoint(
        Serializable["PredictedVsActualCategoricalPoint"]):
    """Predicted vs. actual data for a single categorical data point."""

    uuid = properties.UUID("uuid")
    """:UUID: Unique Citrine id given to the candidate"""
    identifiers = properties.Set(properties.String, "identifiers")
    """:Set[str]: Set of globally unique identifiers given to the candidate"""
    trial = properties.Integer("trial")
    """:int: 1-based index of the trial this candidate belonged to"""
    fold = properties.Integer("fold")
    """:int: 1-based index of the fold this candidate belonged to"""
    predicted = properties.Mapping(properties.String, properties.Float,
                                   "predicted")
    """:Dict[str, float]: Predicted class probabilities defined as a map from each class name
    to its relative frequency"""
    actual = properties.Mapping(properties.String, properties.Float, "actual")
    """:Dict[str, float]: Actual class probabilities defined as a map from each class name
    to its relative frequency"""
    def __init__(self):
        pass  # pragma: no cover
コード例 #16
0
class GridProcessor(Resource['GridProcessor'], Processor, AIResourceMetadata):
    """Generates samples from the Cartesian product of finite dimensions, then scans over them.

    For each continuous dimensions, a uniform grid is created between the lower and upper bounds of
    the descriptor. The number of points along each continuous dimension is specified.
    by ``grid_sizes``. No such discretization is necessary for enumerated dimensions,
    because they are finite.

    Be careful when using a grid processor, as the number of points grows exponentially with
    the number of dimensions. For high-dimensional design spaces, a continuous processor
    is often preferable.

    Parameters
    ----------
    name: str
        name of the processor
    description: str
        description of the processor
    grid_sizes: dict[str, int]
        the number of points to select along each continuous dimension, by dimension name

    """

    _resource_type = ResourceTypeEnum.MODULE

    grid_sizes = properties.Mapping(properties.String, properties.Integer,
                                    'config.grid_dimensions')

    typ = properties.String('config.type',
                            default='Grid',
                            deserializable=False)
    module_type = properties.String('module_type', default='PROCESSOR')

    def _attrs(self) -> List[str]:
        return ["name", "description", "grid_sizes", "typ"]

    def __init__(self,
                 name: str,
                 description: str,
                 grid_sizes: Mapping[str, int],
                 session: Optional[Session] = None):
        self.name: str = name
        self.description: str = description
        self.grid_sizes: Mapping[str, int] = grid_sizes
        self.session: Optional[Session] = session

    def _post_dump(self, data: dict) -> dict:
        data['display_name'] = data['config']['name']
        return data

    def __str__(self):
        return '<GridProcessor {!r}>'.format(self.name)
コード例 #17
0
class GridProcessor(Serializable['GridProcessor'], Processor):
    """Generates a finite set of materials from the domain defined by the design space, then scans over the set of
    materials. To create a finite set of materials from continuous dimensions, a uniform grid is created between the
    bounds of the descriptor. The number of points is specified by `grid_sizes`.

    Parameters
    ----------
    name: str
        name of the processor
    description: str
        description of the processor
    grid_sizes: dict[str, int]
        the number of points to select along each dimension of the grid, by dimension name
    """

    uid = properties.Optional(properties.UUID, 'id', serializable=False)
    name = properties.String('config.name')
    description = properties.Optional(properties.String(),
                                      'config.description')
    typ = properties.String('config.type',
                            default='Grid',
                            deserializable=False)
    grid_sizes = properties.Mapping(properties.String, properties.Integer,
                                    'config.grid_dimensions')
    status = properties.String('status', serializable=False)
    status_info = properties.Optional(properties.List(properties.String()),
                                      'status_info',
                                      serializable=False)

    # NOTE: These could go here or in _post_dump - it's unclear which is better right now
    module_type = properties.String('module_type', default='PROCESSOR')
    schema_id = properties.UUID(
        'schema_id', default=UUID('272791a5-5468-4344-ac9f-2811d9266a4d'))

    def __init__(self,
                 name: str,
                 description: str,
                 grid_sizes: Mapping[str, int],
                 session: Optional[Session] = None):
        self.name: str = name
        self.description: str = description
        self.grid_sizes: Mapping[str, int] = grid_sizes
        self.session: Optional[Session] = session

    def _post_dump(self, data: dict) -> dict:
        data['display_name'] = data['config']['name']
        return data

    def __str__(self):
        return '<GridProcessor {!r}>'.format(self.name)
コード例 #18
0
class EnumeratedDesignSpace(Resource['EnumeratedDesignSpace'], DesignSpace, AIResourceMetadata):
    """An explicit enumeration of candidate materials to score.

    Enumerated design spaces are intended to capture small spaces with fewer than
    1000 values.  For larger spaces, use the DataSourceDesignSpace.

    Parameters
    ----------
    name:str
        the name of the design space
    description:str
        the description of the design space
    descriptors: list[Descriptor]
        the list of descriptors included in the candidates of the design space
    data: list[dict]
        list of dicts of the shape `{<descriptor_key>: <descriptor_value>}`
        where each dict corresponds to a candidate in the design space

    """

    _resource_type = ResourceTypeEnum.MODULE

    descriptors = properties.List(properties.Object(Descriptor), 'config.descriptors')
    data = properties.List(properties.Mapping(properties.String, properties.Raw), 'config.data')

    typ = properties.String('config.type', default='EnumeratedDesignSpace', deserializable=False)
    module_type = properties.String('module_type', default='DESIGN_SPACE')

    def __init__(self,
                 name: str,
                 description: str,
                 descriptors: List[Descriptor],
                 data: List[Mapping[str, Any]],
                 session: Session = Session()):
        self.name: str = name
        self.description: str = description
        self.descriptors: List[Descriptor] = descriptors
        self.data: List[Mapping[str, Any]] = data
        self.session: Session = session

    def _post_dump(self, data: dict) -> dict:
        data['display_name'] = data['config']['name']
        return data

    def __str__(self):
        return '<EnumeratedDesignSpace {!r}>'.format(self.name)
コード例 #19
0
class FeatureImportanceReport(Serializable["FeatureImportanceReport"]):
    """[ALPHA] Feature importances for a specific model response.

    FeatureImportanceReport objects are constructed from saved models and
    should not be user-instantiated.
    """

    output_key = properties.String('response_key')
    """:str: output descriptor key for which these feature importances are applicable"""
    importances = properties.Mapping(keys_type=properties.String,
                                     values_type=properties.Float,
                                     serialization_path='importances')
    """:dict[str, float]: map from feature name to its importance"""
    def __init__(self, output_key: str, importances: Dict[str, float]):
        self.output_key = output_key
        self.importances = importances

    def __str__(self):
        return "<FeatureImportanceReport {!r}>".format(self.output_key)
コード例 #20
0
class CrossValidationResult(Serializable["CrossValidationResult"],
                            PredictorEvaluationResult):
    """Result of performing a cross-validation evaluation on a predictor.

    Results for a cross-validated response can be accessed via ``cvResult['response_name']``,
    where ``cvResult`` is a
    :class:`citrine.informatics.predictor_evaluation_result.CrossValidationResult`
    and ``'response_name'`` is a response analyzed by a
    :class:`citrine.informatics.predictor_evaluator.PredictorEvaluator`.

    """

    _evaluator = properties.Object(PredictorEvaluator, "evaluator")
    _response_results = properties.Mapping(properties.String,
                                           properties.Object(ResponseMetrics),
                                           "response_results")
    typ = properties.String('type',
                            default='CrossValidationResult',
                            deserializable=False)

    def __getitem__(self, item):
        return self._response_results[item]

    def __iter__(self):
        return iter(self.responses)

    @property
    def evaluator(self) -> PredictorEvaluator:
        """:PredictorEvaluator: Evaluator that produced this result."""
        return self._evaluator

    @property
    def responses(self) -> Set[str]:
        """Responses for which results are present."""
        return set(self._response_results.keys())

    @property
    def metrics(self) -> Set[PredictorEvaluationMetric]:
        """:Set[PredictorEvaluationMetric]: Metrics for which results are present."""
        return self._evaluator.metrics
コード例 #21
0
class DeprecatedExpressionPredictor(Resource['DeprecatedExpressionPredictor'],
                                    Predictor, AIResourceMetadata):
    """[DEPRECATED] A predictor that computes an output from an analytic expression.

    This predictor is deprecated. Please use the
    :class:`~citrine.informatics.predictors.ExpressionPredictor` instead.
    To migrate to the new predictor:

    1. add an alias for all unknown expression arguments and
    2. replace descriptor keys in ``aliases`` with the associated descriptor

    These changes allow the expression to respect descriptor bounds when computing the output and
    avoid potential descriptor mismatches if a descriptor with an identical key and different
    bounds is present in the graph.

    The following example shows how to migrate a deprecated expression predictor to the new format.
    In the deprecated format, an expression that computes shear modulus from Young's modulus and
    Poisson's ratio is given by:

    .. code-block:: python

       from citrine.informatics.predictors import DeprecatedExpressionPredictor

       shear_modulus = RealDescriptor(
           'Property~Shear modulus',
           lower_bound=0,
           upper_bound=100,
           units='GPa'
        )

       shear_modulus_predictor = DeprecatedExpressionPredictor(
           name = 'Shear modulus predictor',
           description = "Computes shear modulus from Young's modulus and Poisson's ratio.",
           expression = 'Y / (2 * (1 + v))',
           output = shear_modulus,
           aliases = {
               'Y': "Young's modulus",
               'v': "Poisson's ratio"
           }
       )

    To create a predictor using the format, we need to create descriptors for the expression
    inputs: Young's modulus and Poisson's ratio. We also need to replace references to the
    descriptor keys in ``aliases`` with the new descriptors:

    .. code-block:: python

       from citrine.informatics.predictors import ExpressionPredictor

       # create a descriptor for each input in addition to the output
       youngs_modulus = RealDescriptor('Property~Young\'s modulus', lower_bound=0,
                                       upper_bound=100, units='GPa')
       poissons_ratio = RealDescriptor('Property~Poisson\'s ratio', lower_bound=-1,
                                       upper_bound=0.5, units='')
       shear_modulus = RealDescriptor('Property~Shear modulus', lower_bound=0,
                                      upper_bound=100, units='GPa')

       shear_modulus_predictor = ExpressionPredictor(
           name = 'Shear modulus predictor',
           description = "Computes shear modulus from Young's modulus and Poisson's ratio.",
           expression = 'Y / (2 * (1 + v))',
           output = shear_modulus,
           # note, arguments map to descriptors not descriptor keys
           aliases = {
               'Y': youngs_modulus,
               'v': poissons_ratio
           }
       )

    .. seealso:: :class:`~citrine.informatics.predictors.ExpressionPredictor`

    Parameters
    ----------
    name: str
        name of the configuration
    description: str
        the description of the predictor
    expression: str
        expression that computes an output from a set of inputs
    output: RealDescriptor
        descriptor that represents the output of the expression
    aliases: Optional[Mapping[str, str]]
        a mapping from each each argument as it appears in the ``expression`` to its descriptor
        key. If an unknown argument is not aliased, the argument and descriptor key are assumed
        to be identical.

    """

    _resource_type = ResourceTypeEnum.MODULE

    expression = _properties.String('config.expression')
    output = _properties.Object(RealDescriptor, 'config.output')
    aliases = _properties.Optional(
        _properties.Mapping(_properties.String, _properties.String),
        'config.aliases')

    typ = _properties.String('config.type',
                             default='Expression',
                             deserializable=False)
    module_type = _properties.String('module_type', default='PREDICTOR')

    def __init__(self,
                 name: str,
                 description: str,
                 expression: str,
                 output: RealDescriptor,
                 aliases: Optional[Mapping[str, str]] = None,
                 archived: bool = False):
        warn("{this_class} is deprecated. Please use {replacement} instead".
             format(this_class=self.__class__.__name__,
                    replacement=ExpressionPredictor.__name__))
        self.name: str = name
        self.description: str = description
        self.expression: str = expression
        self.output: RealDescriptor = output
        self.aliases: Optional[Mapping[str, str]] = aliases
        self.archived: bool = archived

    def _post_dump(self, data: dict) -> dict:
        data['display_name'] = data['config']['name']
        return data

    def __str__(self):
        return '<DeprecatedExpressionPredictor {!r}>'.format(self.name)
コード例 #22
0
class MeanPropertyPredictor(Resource['MeanPropertyPredictor'], Predictor,
                            AIResourceMetadata):
    """A predictor interface that computes mean component properties.

    .. seealso::
       If you are using a deprecated generalized mean property predictor please see
       :class:`~citrine.informatics.predictors.generalized_mean_property_predictor.GeneralizedMeanPropertyPredictor`
       for details on how to migrate to the new format.

    Parameters
    ----------
    name: str
        name of the configuration
    description: str
        description of the predictor
    input_descriptor: FormulationDescriptor
        descriptor that represents the input formulation
    properties: List[RealDescriptor]
        List of descriptors to featurize
    p: int
        Power of the `generalized mean <https://en.wikipedia.org/wiki/Generalized_mean>`_.
        Only integer powers are supported.
    impute_properties: bool
        Whether to impute missing ingredient properties.
        If ``False`` all ingredients must define values for all featurized properties.
        Otherwise, the row will not be featurized.
        If ``True`` and no ``default_properties`` are specified, then the average over the
        entire dataset is used.
        If ``True`` and a default is specified in ``default_properties``, then the specified
        default is used in place of missing values.
    label: Optional[str]
        Optional label
    training_data: Optional[List[DataSource]]
        Sources of training data. Each can be either a CSV or an GEM Table. Candidates from
        multiple data sources will be combined into a flattened list and de-duplicated by uid and
        identifiers. De-duplication is performed if a uid or identifier is shared between two or
        more rows. The content of a de-duplicated row will contain the union of data across all
        rows that share the same uid or at least 1 identifier. Training data is unnecessary if the
        predictor is part of a graph that includes all training data required by this predictor.
    default_properties: Optional[Mapping[str, float]]
        Default values to use for imputed properties.
        Defaults are specified as a map from descriptor key to its default value.
        If not specified and ``impute_properties == True`` the average over the entire dataset
        will be used to fill in missing values. Any specified defaults will be used in place of
        the average over the dataset. ``impute_properties`` must be ``True`` if
        ``default_properties`` are provided.

    """

    _resource_type = ResourceTypeEnum.MODULE

    input_descriptor = _properties.Object(FormulationDescriptor,
                                          'config.input')
    properties = _properties.List(_properties.Object(RealDescriptor),
                                  'config.properties')
    p = _properties.Integer('config.p')
    training_data = _properties.List(_properties.Object(DataSource),
                                     'config.training_data')
    impute_properties = _properties.Boolean('config.impute_properties')
    default_properties = _properties.Optional(
        _properties.Mapping(_properties.String, _properties.Float),
        'config.default_properties')
    label = _properties.Optional(_properties.String, 'config.label')

    typ = _properties.String('config.type',
                             default='MeanProperty',
                             deserializable=False)
    module_type = _properties.String('module_type', default='PREDICTOR')

    def __init__(self,
                 name: str,
                 description: str,
                 input_descriptor: FormulationDescriptor,
                 properties: List[RealDescriptor],
                 p: int,
                 impute_properties: bool,
                 default_properties: Optional[Mapping[str, float]] = None,
                 label: Optional[str] = None,
                 training_data: Optional[List[DataSource]] = None,
                 archived: bool = False):
        self.name: str = name
        self.description: str = description
        self.input_descriptor: FormulationDescriptor = input_descriptor
        self.properties: List[RealDescriptor] = properties
        self.p: int = p
        self.training_data: List[DataSource] = self._wrap_training_data(
            training_data)
        self.impute_properties: bool = impute_properties
        self.default_properties: Optional[Mapping[str,
                                                  float]] = default_properties
        self.label: Optional[str] = label
        self.archived: bool = archived

    def _post_dump(self, data: dict) -> dict:
        data['display_name'] = data['config']['name']
        return data

    def __str__(self):
        return '<MeanPropertyPredictor {!r}>'.format(self.name)
コード例 #23
0
class FormulationDesignSpace(Resource['FormulationDesignSpace'], DesignSpace,
                             AIResourceMetadata):
    """Design space composed of mixtures of ingredients.

    Parameters
    ----------
    name: str
        the name of the design space
    description: str
        the description of the design space
    formulation_descriptor: FormulationDescriptor
        descriptor used to store formulations sampled from the design space
    ingredients: Set[str]
        set of ingredient names that can be used in a formulation
    constraints: Set[IngredientConstraint]
        set of constraints that restricts formulations sampled from the space.
        This must include an
        :class:`~io.citrine.informatics.constraints.ingredient_count_constraint.IngredientCountConstraint`
        with maximum count of 32 or fewer.
    labels: Optional[Mapping[str, Set[str]]]
        map from a label to each ingredient that should given that label
        when it's included in a formulation, e.g., ``{'solvent': {'water', 'alcohol'}}``
    resolution: float, optional
        Minimum increment used to specify ingredient quantities.
        Default is 0.0001.

    """

    _resource_type = ResourceTypeEnum.MODULE

    formulation_descriptor = properties.Object(
        FormulationDescriptor, 'config.formulation_descriptor')
    ingredients = properties.Set(properties.String, 'config.ingredients')
    labels = properties.Optional(
        properties.Mapping(properties.String,
                           properties.Set(properties.String)), 'config.labels')
    constraints = properties.Set(properties.Object(Constraint),
                                 'config.constraints')
    resolution = properties.Float('config.resolution')

    typ = properties.String('config.type',
                            default='FormulationDesignSpace',
                            deserializable=False)
    module_type = properties.String('module_type',
                                    default='DESIGN_SPACE',
                                    deserializable=False)

    def __init__(self,
                 *,
                 name: str,
                 description: str,
                 formulation_descriptor: FormulationDescriptor,
                 ingredients: Set[str],
                 constraints: Set[Constraint],
                 labels: Optional[Mapping[str, Set[str]]] = None,
                 resolution: float = 0.0001,
                 session: Session = Session()):
        self.name: str = name
        self.description: str = description
        self.formulation_descriptor: FormulationDescriptor = formulation_descriptor
        self.ingredients: Set[str] = ingredients
        self.constraints: Set[Constraint] = constraints
        self.labels: Optional[Mapping[str, Set[str]]] = labels
        self.resolution: float = resolution
        self.session: Session = session

    def _post_dump(self, data: dict) -> dict:
        data['display_name'] = data['config']['name']
        return data

    def __str__(self):
        return '<FormulationDesignSpace {!r}>'.format(self.name)
class IngredientsToSimpleMixturePredictor(
        Resource['IngredientsToSimpleMixturePredictor'], Predictor,
        AIResourceMetadata):
    """[DEPRECATED] Constructs a simple mixture from ingredient quantities.

    This predictor has been renamed. Please use
    :class:`~citrine.informatics.predictors.ingredients_to_formulation_predictor.IngredientsToFormulationPredictor`
    instead.

    .. seealso::

        :class:`~citrine.informatics.predictors.ingredients_to_formulation_predictor.IngredientsToFormulationPredictor`

    Parameters
    ----------
    name: str
        name of the configuration
    description: str
        description of the predictor
    output: FormulationDescriptor
        descriptor that represents the output formulation
    id_to_quantity: Mapping[str, RealDescriptor]
        Map from ingredient identifier to the descriptor that represents its quantity,
        e.g., ``{'water': RealDescriptor('water quantity', 0, 1, "")}``
    labels: Mapping[str, Set[str]]
        Map from each label to all ingredients assigned that label, when present in a mixture
        e.g., ``{'solvent': {'water'}}``

    """

    _resource_type = ResourceTypeEnum.MODULE

    output = _properties.Object(FormulationDescriptor, 'config.output')
    id_to_quantity = _properties.Mapping(_properties.String,
                                         _properties.Object(RealDescriptor),
                                         'config.id_to_quantity')
    labels = _properties.Mapping(_properties.String,
                                 _properties.Set(_properties.String),
                                 'config.labels')

    typ = _properties.String('config.type',
                             default='IngredientsToSimpleMixture',
                             deserializable=False)
    module_type = _properties.String('module_type', default='PREDICTOR')

    def __init__(self,
                 name: str,
                 description: str,
                 output: FormulationDescriptor,
                 id_to_quantity: Mapping[str, RealDescriptor],
                 labels: Mapping[str, Set[str]],
                 archived: bool = False):
        warn(
            "{this_class} has been renamed. Please use {replacement} instead".
            format(this_class=self.__class__.__name__,
                   replacement="Ingredients To Formulation Predictor"),
            DeprecationWarning)
        self.name: str = name
        self.description: str = description
        self.output: FormulationDescriptor = output
        self.id_to_quantity: Mapping[str, RealDescriptor] = id_to_quantity
        self.labels: Mapping[str, Set[str]] = labels
        self.archived: bool = archived

    def _post_dump(self, data: dict) -> dict:
        data['display_name'] = data['config']['name']  # pragma: no cover
        return data  # pragma: no cover

    def __str__(self):
        return '<IngredientsToSimpleMixturePredictor {!r}>'.format(
            self.name)  # pragma: no cover