예제 #1
0
class CSVDataSource(Serializable['CSVDataSource'], DataSource):
    """A data source based on a CSV file stored on the data platform.

    Parameters
    ----------
    file_link: FileLink
        link to the CSV file to read the data from
    column_definitions: Mapping[str, Descriptor]
        Map the column headers to the descriptors that will be used to interpret the cell contents
    identifiers: Optional[List[str]]
        List of one or more column headers whose values uniquely identify a row. These may overlap
        with ``column_definitions`` if a column should be used as data and as an identifier,
        but this is not necessary. Identifiers must be unique within a dataset. No two rows can
        contain the same value.

    """

    typ = properties.String('type', default='csv_data_source', deserializable=False)
    file_link = properties.Object(FileLink, "file_link")
    column_definitions = properties.Mapping(
        properties.String, properties.Object(Descriptor), "column_definitions")
    identifiers = properties.Optional(properties.List(properties.String), "identifiers")

    def _attrs(self) -> List[str]:
        return ["file_link", "column_definitions", "identifiers", "typ"]

    def __init__(self,
                 file_link: FileLink,
                 column_definitions: Mapping[str, Descriptor],
                 identifiers: Optional[List[str]] = None):
        self.file_link = file_link
        self.column_definitions = column_definitions
        self.identifiers = identifiers
예제 #2
0
class SimpleMixturePredictor(Resource['SimpleMixturePredictor'], Predictor,
                             AIResourceMetadata):
    """A predictor interface that flattens a formulation into a simple mixture.

    Parameters
    ----------
    name: str
        name of the configuration
    description: str
        description of the predictor
    input_descriptor: FormulationDescriptor
        input descriptor for the hierarchical (un-mixed) formulation
    output_descriptor: FormulationDescriptor
        output descriptor for the flat (mixed) formulation
    training_data: Optional[List[DataSource]]
        Sources of training data. Each can be either a CSV or an GEM Table. Candidates from
        multiple data sources will be combined into a flattened list and de-duplicated by uid and
        identifiers. De-duplication is performed if a uid or identifier is shared between two or
        more rows. The content of a de-duplicated row will contain the union of data across all
        rows that share the same uid or at least 1 identifier. Training data is unnecessary if the
        predictor is part of a graph that includes all training data required by this predictor.

    """

    _resource_type = ResourceTypeEnum.MODULE

    input_descriptor = _properties.Object(FormulationDescriptor,
                                          'config.input')
    output_descriptor = _properties.Object(FormulationDescriptor,
                                           'config.output')
    training_data = _properties.List(_properties.Object(DataSource),
                                     'config.training_data')

    typ = _properties.String('config.type',
                             default='SimpleMixture',
                             deserializable=False)
    module_type = _properties.String('module_type', default='PREDICTOR')

    def __init__(self,
                 name: str,
                 description: str,
                 input_descriptor: FormulationDescriptor,
                 output_descriptor: FormulationDescriptor,
                 training_data: Optional[List[DataSource]] = None,
                 archived: bool = False):
        self.name: str = name
        self.description: str = description
        self.input_descriptor: FormulationDescriptor = input_descriptor
        self.output_descriptor: FormulationDescriptor = output_descriptor
        self.training_data: List[DataSource] = self._wrap_training_data(
            training_data)
        self.archived: bool = archived

    def _post_dump(self, data: dict) -> dict:
        data['display_name'] = data['config']['name']
        return data

    def __str__(self):
        return '<SimpleMixturePredictor {!r}>'.format(self.name)
예제 #3
0
class ExpressionPredictor(Resource['ExpressionPredictor'], Predictor,
                          AIResourceMetadata):
    """A predictor that computes an output from an expression and set of bounded inputs.

    For a discussion of expression syntax and a list of allowed symbols,
    please see the :ref:`documentation<Expression Predictor>`.

    .. seealso::
       If you are using the deprecated predictor please see
       :class:`~citrine.informatics.predictors.DeprecatedExpressionPredictor` for an example that
       shows how to migrate to the new format.

    Parameters
    ----------
    name: str
        name of the configuration
    description: str
        the description of the predictor
    expression: str
        expression that computes an output from aliased inputs
    output: RealDescriptor
        descriptor that represents the output of the expression
    aliases: Mapping[str, RealDescriptor]
        a mapping from each unknown argument to its descriptor.
        All unknown arguments must have an associated descriptor.

    """

    expression = _properties.String('config.expression')
    output = _properties.Object(RealDescriptor, 'config.output')
    aliases = _properties.Mapping(_properties.String,
                                  _properties.Object(RealDescriptor),
                                  'config.aliases')

    typ = _properties.String('config.type',
                             default='AnalyticExpression',
                             deserializable=False)
    module_type = _properties.String('module_type', default='PREDICTOR')

    def __init__(self,
                 name: str,
                 description: str,
                 expression: str,
                 output: RealDescriptor,
                 aliases: Mapping[str, RealDescriptor],
                 archived: bool = False):
        self.name: str = name
        self.description: str = description
        self.expression: str = expression
        self.output: RealDescriptor = output
        self.aliases: Mapping[str, RealDescriptor] = aliases
        self.archived: bool = archived

    def _post_dump(self, data: dict) -> dict:
        data['display_name'] = data['config']['name']
        return data

    def __str__(self):
        return '<ExpressionPredictor {!r}>'.format(self.name)
예제 #4
0
class AttributeByTemplateAndObjectTemplate(
        Serializable['AttributeByTemplateAndObjectTemplate'], Variable):
    """[ALPHA] Attribute marked by an attribute template and an object template.

    For example, one property may be measured by two different measurement techniques.  In this
    case, that property would have the same attribute template.  Filtering by measurement
    templates, which identify the measurement techniques, disambiguates the technique used to
    measure that otherwise ambiguous property.

    Parameters
    ---------
    name: str
        a short human-readable name to use when referencing the variable
    headers: list[str]
        sequence of column headers
    attribute_template: LinkByUID
        attribute template that identifies the attribute to assign to the variable
    object_template: LinkByUID
        template that identifies the associated object
    attribute_constraints: list[(LinkByUID, Bounds)]
        constraints on object attributes in the target object that must be satisfied. Constraints
        are expressed as Bounds.  Attributes are expressed with links. The attribute that the
        variable is being set to may be the target of a constraint as well.
    type_selector: DataObjectTypeSelector
        strategy for selecting data object types to consider when matching, defaults to PREFER_RUN

    """

    name = properties.String('name')
    headers = properties.List(properties.String, 'headers')
    attribute_template = properties.Object(LinkByUID, 'attribute_template')
    object_template = properties.Object(LinkByUID, 'object_template')
    attribute_constraints = properties.Optional(
        properties.List(
            properties.SpecifiedMixedList(
                [properties.Object(LinkByUID), properties.Object(BaseBounds)]
            )
        ), 'attribute_constraints')
    type_selector = properties.Enumeration(DataObjectTypeSelector, "type_selector")
    typ = properties.String('type', default="attribute_by_object", deserializable=False)

    def _attrs(self) -> List[str]:
        return ["name", "headers", "attribute_template", "object_template",
                "attribute_constraints", "type_selector", "typ"]

    def __init__(self, *,
                 name: str,
                 headers: List[str],
                 attribute_template: LinkByUID,
                 object_template: LinkByUID,
                 attribute_constraints: List[List[Union[LinkByUID, BaseBounds]]] = None,
                 type_selector: DataObjectTypeSelector = DataObjectTypeSelector.PREFER_RUN):
        self.name = name
        self.headers = headers
        self.attribute_template = attribute_template
        self.object_template = object_template
        self.attribute_constraints = attribute_constraints
        self.type_selector = type_selector
예제 #5
0
class AttributeByTemplate(Serializable['AttributeByTemplate'], Variable):
    """[ALPHA] Attribute marked by an attribute template.

    Parameters
    ----------
    name: str
        a short human-readable name to use when referencing the variable
    headers: list[str]
        sequence of column headers
    template: LinkByUID
        attribute template that identifies the attribute to assign to the variable
    attribute_constraints: list[list[LinkByUID, Bounds]]
        constraints on object attributes in the target object that must be satisfied. Constraints
        are expressed as Bounds.  Attributes are expressed with links. The attribute that the
        variable is being set to may be the target of a constraint as well.
    type_selector: DataObjectTypeSelector
        strategy for selecting data object types to consider when matching, defaults to PREFER_RUN

    """

    name = properties.String('name')
    headers = properties.List(properties.String, 'headers')
    template = properties.Object(LinkByUID, 'template')
    attribute_constraints = properties.Optional(
        properties.List(
            properties.SpecifiedMixedList(
                [properties.Object(LinkByUID),
                 properties.Object(BaseBounds)])), 'attribute_constraints')
    type_selector = properties.Enumeration(DataObjectTypeSelector,
                                           "type_selector")
    typ = properties.String('type',
                            default="attribute_by_template",
                            deserializable=False)

    def _attrs(self) -> List[str]:
        return [
            "name", "headers", "template", "attribute_constraints",
            "type_selector", "typ"
        ]

    def __init__(
        self,
        *,
        name: str,
        headers: List[str],
        template: LinkByUID,
        attribute_constraints: Optional[List[List[Union[LinkByUID,
                                                        BaseBounds]]]] = None,
        type_selector: DataObjectTypeSelector = DataObjectTypeSelector.
        PREFER_RUN):
        self.name = name
        self.headers = headers
        self.template = template
        self.attribute_constraints = attribute_constraints
        self.type_selector = type_selector
class IngredientsToFormulationPredictor(
        Resource['IngredientsToFormulationPredictor'], Predictor, AIResourceMetadata):
    """[ALPHA] A predictor interface that constructs a formulation from ingredient quantities.

    Parameters
    ----------
    name: str
        name of the configuration
    description: str
        description of the predictor
    output: FormulationDescriptor
        descriptor that represents the output formulation
    id_to_quantity: Mapping[str, RealDescriptor]
        Map from ingredient identifier to the descriptor that represents its quantity,
        e.g., ``{'water': RealDescriptor('water quantity', 0, 1, "")}``
    labels: Mapping[str, Set[str]]
        Map from each label to all ingredients assigned that label, when present in a mixture,
        e.g., ``{'solvent': {'water'}}``

    """

    _resource_type = ResourceTypeEnum.MODULE

    output = _properties.Object(FormulationDescriptor, 'config.output')
    id_to_quantity = _properties.Mapping(_properties.String, _properties.Object(RealDescriptor),
                                         'config.id_to_quantity')
    labels = _properties.Mapping(_properties.String, _properties.Set(_properties.String),
                                 'config.labels')

    typ = _properties.String('config.type', default='IngredientsToSimpleMixture',
                             deserializable=False)
    module_type = _properties.String('module_type', default='PREDICTOR')

    def __init__(self,
                 name: str,
                 description: str,
                 output: FormulationDescriptor,
                 id_to_quantity: Mapping[str, RealDescriptor],
                 labels: Mapping[str, Set[str]],
                 archived: bool = False):
        self.name: str = name
        self.description: str = description
        self.output: FormulationDescriptor = output
        self.id_to_quantity: Mapping[str, RealDescriptor] = id_to_quantity
        self.labels: Mapping[str, Set[str]] = labels
        self.archived: bool = archived

    def _post_dump(self, data: dict) -> dict:
        data['display_name'] = data['config']['name']
        return data

    def __str__(self):
        return '<IngredientsToFormulationPredictor {!r}>'.format(self.name)
예제 #7
0
class ModelSummary(Serializable['ModelSummary']):
    """[ALPHA] Summary of information about a single model in a predictor.

    ModelSummary objects are constructed from saved models and should not be user-instantiated.
    """

    name = properties.String('name')
    """:str: the name of the model"""
    type_ = properties.String('type')
    """:str: the type of the model (e.g., "ML Model", "Featurizer", etc.)"""
    inputs = properties.List(
        properties.Union([properties.Object(Descriptor),
                          properties.String()]), 'inputs')
    """:List[Descriptor]: list of input descriptors"""
    outputs = properties.List(
        properties.Union([properties.Object(Descriptor),
                          properties.String()]), 'outputs')
    """:List[Descriptor]: list of output descriptors"""
    model_settings = properties.Raw('model_settings')
    """:dict: model settings, as a dictionary (keys depend on the model type)"""
    feature_importances = properties.List(
        properties.Object(FeatureImportanceReport), 'feature_importances')
    """:List[FeatureImportanceReport]: feature importance reports for each output"""
    predictor_name = properties.String('predictor_configuration_name',
                                       default='')
    """:str: the name of the predictor that created this model"""
    predictor_uid = properties.Optional(properties.UUID(),
                                        'predictor_configuration_uid')
    """:Optional[UUID]: the unique Citrine id of the predictor that created this model"""
    training_data_count = properties.Optional(properties.Integer,
                                              "training_data_count")
    """:int: Number of rows in the training data for the model, if applicable."""
    def __init__(self,
                 name: str,
                 type_: str,
                 inputs: List[Descriptor],
                 outputs: List[Descriptor],
                 model_settings: Dict[str, Any],
                 feature_importances: List[FeatureImportanceReport],
                 predictor_name: str,
                 predictor_uid: Optional[UUID] = None):
        self.name = name
        self.type_ = type_
        self.inputs = inputs
        self.outputs = outputs
        self.model_settings = model_settings
        self.feature_importances = feature_importances
        self.predictor_name = predictor_name
        self.predictor_uid = predictor_uid

    def __str__(self):
        return '<ModelSummary {!r}>'.format(self.name)
예제 #8
0
class LIScore(Serializable['LIScore'], Score):
    """Evaluates the likelihood of scoring better than some baselines for given objectives.

    Parameters
    ----------
    objectives: list[Objective]
        objectives (e.g., maximize, minimize, tune, etc.)
        If multiple objectives are specified they are evaluated independently, and ranked by the
        highest likelihood of exceeding a baseline. This should therefore *not* be used to
        simultaneously optimize multiple objectives.
    baselines: list[float]
        best-so-far values for the various objectives (there must be one for each objective)
    constraints: list[Constraint]
        constraints limiting the allowed values that material instances can have

    """

    baselines = properties.List(properties.Float, 'baselines')
    objectives = properties.List(properties.Object(Objective), 'objectives')
    constraints = properties.List(properties.Object(Constraint), 'constraints')
    typ = properties.String('type', default='MLI')

    def __init__(self,
                 *,
                 name: Optional[str] = None,
                 description: Optional[str] = None,
                 objectives: List[Objective],
                 baselines: List[float],
                 constraints: Optional[List[Constraint]] = None,
                 session: Optional[Session] = None):
        self.objectives: List[Objective] = objectives
        self.baselines: List[float] = baselines
        self.constraints: List[Constraint] = constraints or []
        self.session: Optional[Session] = session

        if name is not None:
            msg = "Naming of Scores is deprecated.  Please do not define the name."
            warnings.warn(msg, category=DeprecationWarning)
            self._name = name
        else:
            self._name = "Likelihood of Improvement"

        if description is not None:
            msg = "Describing Scores is deprecated.  Please do not define the description."
            warnings.warn(msg, category=DeprecationWarning)
            self._description: str = description
        else:
            self._description = ""

    def __str__(self):
        return '<LIScore>'
예제 #9
0
class EIScore(Serializable['EIScore'], Score):
    """
    Evaluates the expected magnitude of improvement beyond baselines for a given objective.

    Parameters
    ----------
    objectives: list[Objective]
        objectives (e.g., maximize, minimize, tune, etc.)
        EIScore does not support more than 1 objective at this time.
    baselines: list[float]
        best-so-far values for the various objectives (there must be one for each objective)
    constraints: list[Constraint]
        constraints limiting the allowed values that material instances can have

    """

    baselines = properties.List(properties.Float, 'baselines')
    objectives = properties.List(properties.Object(Objective), 'objectives')
    constraints = properties.List(properties.Object(Constraint), 'constraints')
    typ = properties.String('type', default='MEI')

    def __init__(self,
                 *,
                 name: Optional[str] = None,
                 description: Optional[str] = None,
                 objectives: List[Objective],
                 baselines: List[float],
                 constraints: Optional[List[Constraint]] = None,
                 session: Optional[Session] = None):
        self.objectives: List[Objective] = objectives
        self.baselines: List[float] = baselines
        self.constraints: List[Constraint] = constraints or []
        self.session: Optional[Session] = session

        if name is not None:
            msg = "Naming of Scores is deprecated.  Please do not define the name."
            warnings.warn(msg, category=DeprecationWarning)
            self._name = name
        else:
            self._name = "Expected Improvement"

        if description is not None:
            msg = "Describing Scores is deprecated.  Please do not define the description."
            warnings.warn(msg, category=DeprecationWarning)
            self._description: str = description
        else:
            self._description = ""

    def __str__(self):
        return '<EIScore>'
예제 #10
0
class EnumeratedDimension(Serializable['EnumeratedDimension'], Dimension):
    """A finite, enumerated dimension.

    Parameters
    ----------
    descriptor: Descriptor
        a descriptor of the single dimension
    template_id: UUID
        UUID that corresponds to the template in DC
    values: list[str]
        list of values that can be parsed by the descriptor

    """

    descriptor = properties.Object(Descriptor, 'descriptor')
    values = properties.List(properties.String(), 'list')
    typ = properties.String('type', default='EnumeratedDimension', deserializable=False)
    template_id = properties.Optional(properties.UUID, 'template_id', default=uuid4())

    def __init__(self,
                 descriptor: Descriptor,
                 values: List[str],
                 template_id: Optional[UUID] = None):
        self.descriptor: Descriptor = descriptor
        self.values: List[str] = values
        self.template_id: Optional[UUID] = template_id
def test_object_property_serde(sub_prop, sub_value, sub_serialized):
    klass = make_class_with_property(sub_prop, 'some_property_name')
    prop = properties.Object(klass)
    instance = klass(sub_value)
    serialized = {'some_property_name': sub_serialized}
    assert prop.deserialize(serialized) == instance
    assert prop.serialize(instance) == serialized
예제 #12
0
class GemTableDataSource(Serializable['GemTableDataSource'], DataSource):
    """[ALPHA] A data source based on a GEM Table hosted on the data platform.

    Parameters
    ----------
    table_id: UUID
        Unique identifier for the GEM Table
    table_version: Union[str,int]
        Version number for the GEM Table, which starts at 1 rather than 0.
        Strings are cast to ints.
    formulation_descriptor: Optional[FormulationDescriptor]
        Optional descriptor used to store formulations emitted by the data source.

    """

    typ = properties.String('type',
                            default='hosted_table_data_source',
                            deserializable=False)
    table_id = properties.UUID("table_id")
    table_version = properties.Integer("table_version")
    formulation_descriptor = properties.Optional(
        properties.Object(FormulationDescriptor), "formulation_descriptor")

    def _attrs(self) -> List[str]:
        return ["table_id", "table_version", "typ"]

    def __init__(
            self,
            table_id: UUID,
            table_version: Union[int, str],
            formulation_descriptor: Optional[FormulationDescriptor] = None):
        self.table_id: UUID = table_id
        self.table_version: Union[int, str] = table_version
        self.formulation_descriptor: Optional[
            FormulationDescriptor] = formulation_descriptor
예제 #13
0
class ContinuousDimension(Serializable['ContinuousDimension'], Dimension):
    """Continuous dimension that is defined by a template ID, material descriptor, lower bound, and upper bound.

    Parameters
    ----------
    descriptor: RealDescriptor
        a descriptor of the single dimension
    lower_bound: float
        inclusive lower bound
    upper_bound: float
        inclusive upper bound
    template_id: UUID
        UUID that corresponds to the template in DC
    """

    descriptor = properties.Object(RealDescriptor, 'descriptor')
    lower_bound = properties.Float('lower_bound')
    upper_bound = properties.Float('upper_bound')
    typ = properties.String('type', default='ContinuousDimension', deserializable=False)
    template_id = properties.UUID('template_id', default=uuid4())

    def __init__(self,
                 descriptor: RealDescriptor,
                 lower_bound: Optional[float] = None,
                 upper_bound: Optional[float] = None,
                 template_id: Optional[UUID] = None):
        self.descriptor: RealDescriptor = descriptor
        self.lower_bound: float = lower_bound or descriptor.lower_bound
        self.upper_bound: float = upper_bound or descriptor.upper_bound
        self.template_id: UUID = template_id or uuid4()
예제 #14
0
class ResponseMetrics(Serializable["ResponseMetrics"]):
    """Set of metrics computed by a Predictor Evaluator for a single response.

    Results computed for a metric can be accessed by the metric's ``__repr__`` or
    by the metric itself.

    """

    metrics = properties.Mapping(properties.String,
                                 properties.Object(MetricValue), "metrics")
    """:Dict[str, MetricValue]: Metrics computed for a single response, keyed by the
    metric's ``__repr__``."""
    def __init__(self):
        pass  # pragma: no cover

    def __iter__(self):
        return iter(self.metrics)

    def __getitem__(self, item):
        if isinstance(item, str):
            return self.metrics[item]
        elif isinstance(item, PredictorEvaluationMetric):
            return self.metrics[repr(item)]
        else:
            raise TypeError("Cannot index ResponseMetrics with a {}".format(
                type(item)))
예제 #15
0
class ConcatColumn(Serializable['ConcatColumn'], Column):
    """[ALPHA] Column that concatenates multiple values produced by a list- or set-valued variable.

    The input subcolumn need not exist elsewhere in the table config, and its parameters have
    no bearing on how the table is constructed. Only the type of column is relevant. That a
    complete Column object is required is simply a limitation of the current API.

    Parameters
    ----------
    data_source: str
        name of the variable to use when populating the column
    subcolumn: Column
        a column of the type of the individual values to be concatenated

    """

    data_source = properties.String('data_source')
    subcolumn = properties.Object(Column, 'subcolumn')
    typ = properties.String('type',
                            default="concat_column",
                            deserializable=False)

    def _attrs(self) -> List[str]:
        return ["data_source", "typ"]

    def __init__(self, *, data_source: str, subcolumn: Column):
        self.data_source = data_source
        self.subcolumn = subcolumn
예제 #16
0
class IngredientLabelByProcessAndName(
        Serializable['IngredientLabelByProcessAndName'], Variable):
    """[ALPHA] A boolean variable indicating whether a given label is applied.

    Matches by process template, ingredient name, and the label string to check.

    For example, a column might indicate whether or not the ingredient "ethanol" is labeled as a
    "solvent" in the "second mixing" process.  Many such columns would then support the
    downstream analysis "get the volumetric average density of the solvents".

    Parameters
    ---------
    name: str
        a short human-readable name to use when referencing the variable
    headers: list[str]
        sequence of column headers
    process_template: LinkByUID
        process template associated with this ingredient identifier
    ingredient_name: str
        name of ingredient
    label: str
        label to test
    type_selector: DataObjectTypeSelector
        strategy for selecting data object types to consider when matching, defaults to PREFER_RUN

    """

    name = properties.String('name')
    headers = properties.List(properties.String, 'headers')
    process_template = properties.Object(LinkByUID, 'process_template')
    ingredient_name = properties.String('ingredient_name')
    label = properties.String('label')
    type_selector = properties.Enumeration(DataObjectTypeSelector,
                                           "type_selector")
    typ = properties.String('type',
                            default="ing_label_by_process_and_name",
                            deserializable=False)

    def _attrs(self) -> List[str]:
        return [
            "name", "headers", "process_template", "ingredient_name", "label",
            "type_selector", "typ"
        ]

    def __init__(
        self,
        *,
        name: str,
        headers: List[str],
        process_template: LinkByUID,
        ingredient_name: str,
        label: str,
        type_selector: DataObjectTypeSelector = DataObjectTypeSelector.
        PREFER_RUN):
        self.name = name
        self.headers = headers
        self.process_template = process_template
        self.ingredient_name = ingredient_name
        self.label = label
        self.type_selector = type_selector
예제 #17
0
class EVScore(Serializable['EVScore'], Score):
    """
    Evaluates the expected value for given objectives.

    Parameters
    ----------
    objectives: list[Objective]
        objectives (e.g., maximize, minimize, tune, etc.)
        If multiple objectives are specified, their scores are summed together. This allows
        for simultaneous optimization of multiple objectives, although the weighting of the
        various objectives cannot be directly specified.
    constraints: list[Constraint]
        constraints limiting the allowed values that material instances can have

    """

    objectives = properties.List(properties.Object(Objective), 'objectives')
    constraints = properties.List(properties.Object(Constraint), 'constraints')
    typ = properties.String('type', default='MEV')

    def __init__(self,
                 *,
                 name: Optional[str] = None,
                 description: Optional[str] = None,
                 objectives: List[Objective],
                 constraints: Optional[List[Constraint]] = None,
                 session: Optional[Session] = None):
        self.objectives: List[Objective] = objectives
        self.constraints: List[Constraint] = constraints or []
        self.session: Optional[Session] = session

        if name is not None:
            msg = "Naming of Scores is deprecated.  Please do not define the name."
            warnings.warn(msg, category=DeprecationWarning)
            self._name = name
        else:
            self._name = "Expected Value"

        if description is not None:
            msg = "Describing Scores is deprecated.  Please do not define the description."
            warnings.warn(msg, category=DeprecationWarning)
            self._description: str = description
        else:
            self._description = ""

    def __str__(self):
        return '<EVScore>'
class DesignMaterial(Serializable["DesignMaterial"]):
    """Description of the material that was designed, as a set of DesignVariables."""

    values = properties.Mapping(properties.String, properties.Object(DesignVariable), 'vars')
    """:Dict[str, DesignVariable]: mapping from descriptor keys to the value for this material"""

    def __init__(self):
        pass  # pragma: no cover
예제 #19
0
class PredictedVsActualRealPoint(Serializable["PredictedVsActualRealPoint"]):
    """Predicted vs. actual data for a single real-valued data point."""

    uuid = properties.UUID("uuid")
    """:UUID: Unique Citrine id given to the candidate"""
    identifiers = properties.Set(properties.String, "identifiers")
    """:Set[str]: Set of globally unique identifiers given to the candidate"""
    trial = properties.Integer("trial")
    """:int: 1-based index of the trial this candidate belonged to"""
    fold = properties.Integer("fold")
    """:int: 1-based index of the fold this candidate belonged to"""
    predicted = properties.Object(RealMetricValue, "predicted")
    """:RealMetricValue: Predicted value"""
    actual = properties.Object(RealMetricValue, "actual")
    """:RealMetricValue: Actual value"""
    def __init__(self):
        pass  # pragma: no cover
예제 #20
0
class EnumeratedDesignSpace(Resource['EnumeratedDesignSpace'], DesignSpace):
    """Design space composed of an explicit enumeration of candidate materials to score. Note that every candidate must have exactly the descriptors in the list populated (no more, no less) to be included. 

    Parameters
    ----------
    name:str
        the name of the design space
    description:str
        the description of the design space
    descriptors: list[Descriptor]
        the list of descriptors included in the candidates of the design space
    data: list[dict]
        list of dicts of the shape `{<descriptor_key>: <descriptor_value>}` where each dict corresponds to a candidate
        in the design space
    """

    _response_key = None

    uid = properties.Optional(properties.UUID, 'id', serializable=False)
    name = properties.String('config.name')
    description = properties.Optional(properties.String(),
                                      'config.description')
    descriptors = properties.List(properties.Object(Descriptor),
                                  'config.descriptors')
    data = properties.List(
        properties.Mapping(properties.String, properties.Raw), 'config.data')

    typ = properties.String('config.type',
                            default='EnumeratedDesignSpace',
                            deserializable=False)
    status = properties.String('status', serializable=False)
    status_info = properties.Optional(properties.List(properties.String()),
                                      'status_info',
                                      serializable=False)

    # NOTE: These could go here or in _post_dump - it's unclear which is better right now
    module_type = properties.String('module_type', default='DESIGN_SPACE')
    schema_id = properties.UUID(
        'schema_id', default=UUID('f3907a58-aa46-462c-8837-a5aa9605e79e'))

    def __init__(self,
                 name: str,
                 description: str,
                 descriptors: List[Descriptor],
                 data: List[Mapping[str, Any]],
                 session: Session = Session()):
        self.name: str = name
        self.description: str = description
        self.descriptors: List[Descriptor] = descriptors
        self.data: List[Mapping[str, Any]] = data
        self.session: Session = session

    def _post_dump(self, data: dict) -> dict:
        data['display_name'] = data['config']['name']
        return data

    def __str__(self):
        return '<EnumeratedDesignSpace {!r}>'.format(self.name)
 def dump(self) -> dict:
     """Override dump to replace on-platform subspaces with their uids."""
     model_copy = deepcopy(self)
     for i, subspace in enumerate(model_copy.subspaces):
         if isinstance(subspace, DesignSpace) and subspace.uid is not None:
             model_copy.subspaces[i] = subspace.uid
     serialized = properties.Object(ProductDesignSpace).serialize(
         model_copy)
     return self._post_dump(serialized)
예제 #22
0
class ModelSummary(Serializable['ModelSummary']):
    """[ALPHA] Summary of information about a single model in a predictor.

    ModelSummary objects are constructed from saved models and should not be user-instantiated.

    Parameters
    ----------
    name: str
        the name of the model
    type_: str
        the type of the model (e.g., "ML Model", "Featurizer", etc.)
    inputs: List[Descriptor]
        list of input descriptors
    outputs: List[Descriptor]
        list of output descriptors
    model_settings: dict
        settings of the model, as a dictionary (details depend on model type)
    feature_importances: List[FeatureImportanceReport]
        list of feature importance reports, one for each output
    predictor_name: str
        the name of the predictor that created this model
    predictor_uid: Optional[uuid]
        the uid of the predictor that created this model

    """

    name = properties.String('name')
    type_ = properties.String('type')
    inputs = properties.List(properties.String(), 'inputs')
    outputs = properties.List(properties.String(), 'outputs')
    model_settings = properties.Raw('model_settings')
    feature_importances = properties.List(
        properties.Object(FeatureImportanceReport), 'feature_importances')
    predictor_name = properties.String('predictor_configuration_name', default='')
    predictor_uid = properties.Optional(properties.UUID(), 'predictor_configuration_uid')

    def __init__(self,
                 name: str,
                 type_: str,
                 inputs: List[Descriptor],
                 outputs: List[Descriptor],
                 model_settings: Dict[str, Any],
                 feature_importances: List[FeatureImportanceReport],
                 predictor_name: str,
                 predictor_uid: Optional[UUID] = None):
        self.name = name
        self.type_ = type_
        self.inputs = inputs
        self.outputs = outputs
        self.model_settings = model_settings
        self.feature_importances = feature_importances
        self.predictor_name = predictor_name
        self.predictor_uid = predictor_uid

    def __str__(self):
        return '<ModelSummary {!r}>'.format(self.name)
예제 #23
0
class ProductDesignSpace(Resource['ProductDesignSpace'], DesignSpace):
    """[ALPHA] An outer product of univariate dimensions, either continuous or enumerated.

    Parameters
    ----------
    name:str
        the name of the design space
    description:str
        the description of the design space
    dimensions: list[Dimension]
        univariate dimensions that are factors of the design space; can be enumerated or continuous

    """

    _response_key = None

    uid = properties.Optional(properties.UUID, 'id', serializable=False)
    name = properties.String('config.name')
    description = properties.Optional(properties.String(), 'config.description')
    dimensions = properties.List(properties.Object(Dimension), 'config.dimensions')
    typ = properties.String('config.type', default='Univariate', deserializable=False)
    status = properties.String('status', serializable=False)
    status_info = properties.Optional(
        properties.List(properties.String()),
        'status_info',
        serializable=False
    )
    archived = properties.Boolean('archived', default=False)
    experimental = properties.Boolean("experimental", serializable=False, default=True)
    experimental_reasons = properties.Optional(
        properties.List(properties.String()),
        'experimental_reasons',
        serializable=False
    )

    # NOTE: These could go here or in _post_dump - it's unclear which is better right now
    module_type = properties.String('module_type', default='DESIGN_SPACE')
    schema_id = properties.UUID('schema_id', default=UUID('6c16d694-d015-42a7-b462-8ef299473c9a'))

    def __init__(self,
                 name: str,
                 description: str,
                 dimensions: List[Dimension],
                 session: Session = Session()):
        self.name: str = name
        self.description: str = description
        self.dimensions: List[Dimension] = dimensions
        self.session: Session = session

    def _post_dump(self, data: dict) -> dict:
        data['display_name'] = data['config']['name']
        return data

    def __str__(self):
        return '<ProductDesignSpace {!r}>'.format(self.name)
예제 #24
0
class EIScore(Serializable['EIScore'], Score):
    """
    [ALPHA] Evaluates the expected magnitude of improvement beyond baselines for given objectives.

    Parameters
    ----------
    name: str
        the name of the score
    description: str
        the description of the score
    objectives: list[Objective]
        objectives (e.g., maximize, minimize, tune, etc.)
    baselines: list[float]
        best-so-far values for the various objectives (there must be one for each objective)
    constraints: list[Constraint]
        constraints limiting the allowed values that material instances can have

    """

    name = properties.String('name')
    description = properties.String('description')
    baselines = properties.List(properties.Float, 'baselines')
    objectives = properties.List(properties.Object(Objective), 'objectives')
    constraints = properties.List(properties.Object(Constraint), 'constraints')
    typ = properties.String('type', default='MEI')

    def __init__(self,
                 name: str,
                 description: str,
                 objectives: List[Objective],
                 baselines: List[float],
                 constraints: Optional[List[Constraint]] = None,
                 session: Optional[Session] = None):
        self.name: str = name
        self.description: str = description
        self.objectives: List[Objective] = objectives
        self.baselines: List[float] = baselines
        self.constraints: List[Constraint] = constraints or []
        self.session: Optional[Session] = session

    def __str__(self):
        return '<EIScore {!r}>'.format(self.name)
예제 #25
0
class IngredientIdentifierByProcessTemplateAndName(
        Serializable['IngredientIdentifierByProcessAndName'], Variable):
    """[ALPHA] Ingredient identifier associated with a process template and a name.

    Parameters
    ---------
    name: str
        a short human-readable name to use when referencing the variable
    headers: list[str]
        sequence of column headers
    process_template: LinkByUID
        process template associated with this ingredient identifier
    ingredient_name: str
        name of ingredient
    scope: str
        scope of the identifier (default: the Citrine scope)
    type_selector: DataObjectTypeSelector
        strategy for selecting data object types to consider when matching, defaults to PREFER_RUN

    """

    name = properties.String('name')
    headers = properties.List(properties.String, 'headers')
    process_template = properties.Object(LinkByUID, 'process_template')
    ingredient_name = properties.String('ingredient_name')
    scope = properties.String('scope')
    type_selector = properties.Enumeration(DataObjectTypeSelector,
                                           "type_selector")
    typ = properties.String('type',
                            default="ing_id_by_process_and_name",
                            deserializable=False)

    def _attrs(self) -> List[str]:
        return [
            "name", "headers", "process_template", "ingredient_name", "scope",
            "type_selector", "typ"
        ]

    def __init__(
        self,
        *,
        name: str,
        headers: List[str],
        process_template: LinkByUID,
        ingredient_name: str,
        scope: str,
        type_selector: DataObjectTypeSelector = DataObjectTypeSelector.
        PREFER_RUN):
        self.name = name
        self.headers = headers
        self.process_template = process_template
        self.ingredient_name = ingredient_name
        self.scope = scope
        self.type_selector = type_selector
예제 #26
0
class CrossValidationResult(Serializable["CrossValidationResult"],
                            PredictorEvaluationResult):
    """Result of performing a cross-validation evaluation on a predictor.

    Results for a cross-validated response can be accessed via ``cvResult['response_name']``,
    where ``cvResult`` is a
    :class:`citrine.informatics.predictor_evaluation_result.CrossValidationResult`
    and ``'response_name'`` is a response analyzed by a
    :class:`citrine.informatics.predictor_evaluator.PredictorEvaluator`.

    """

    _evaluator = properties.Object(PredictorEvaluator, "evaluator")
    _response_results = properties.Mapping(properties.String,
                                           properties.Object(ResponseMetrics),
                                           "response_results")
    typ = properties.String('type',
                            default='CrossValidationResult',
                            deserializable=False)

    def __getitem__(self, item):
        return self._response_results[item]

    def __iter__(self):
        return iter(self.responses)

    @property
    def evaluator(self) -> PredictorEvaluator:
        """:PredictorEvaluator: Evaluator that produced this result."""
        return self._evaluator

    @property
    def responses(self) -> Set[str]:
        """Responses for which results are present."""
        return set(self._response_results.keys())

    @property
    def metrics(self) -> Set[PredictorEvaluationMetric]:
        """:Set[PredictorEvaluationMetric]: Metrics for which results are present."""
        return self._evaluator.metrics
class PredictorEvaluationWorkflow(Resource['PredictorEvaluationWorkflow'],
                                  Workflow, AIResourceMetadata):
    """A workflow that evaluations a predictor.

    Parameters
    ----------
    name: str
        name of the predictor evaluation workflow
    description: str
        the description of the predictor evaluation workflow
    evaluators: List[PredictorEvaluator]
        the list of evaluators to apply to the predictor

    """

    name = properties.String('name')
    description = properties.String('description')
    evaluators = properties.List(properties.Object(PredictorEvaluator),
                                 "evaluators")

    status_description = properties.String('status_description',
                                           serializable=False)
    """:str: more detailed description of the workflow's status"""

    typ = properties.String('type',
                            default='PredictorEvaluationWorkflow',
                            deserializable=False)

    def __init__(self,
                 *,
                 name: str,
                 description: str = "",
                 evaluators: List[PredictorEvaluator]):
        self.name: str = name
        self.description: str = description
        self.evaluators: List[PredictorEvaluator] = evaluators
        self.session: Optional[Session] = None
        self.project_id: Optional[UUID] = None

    def __str__(self):
        return '<PredictorEvaluationWorkflow {!r}>'.format(self.name)

    @property
    def executions(self) -> PredictorEvaluationExecutionCollection:
        """Return a resource representing all visible executions of this workflow."""
        if getattr(self, 'project_id', None) is None:
            raise AttributeError(
                'Cannot initialize execution without project reference!')
        return PredictorEvaluationExecutionCollection(
            project_id=self.project_id,
            session=self.session,
            workflow_id=self.uid)
예제 #28
0
class EVScore(Serializable['EVScore'], Score):
    """
    [ALPHA] Evaluates the expected value for given objectives.

    Parameters
    ----------
    name: str
        the name of the score
    description: str
        the description of the score
    objectives: list[Objective]
        objectives (e.g., maximize, minimize, tune, etc.)
    constraints: list[Constraint]
        constraints limiting the allowed values that material instances can have

    """

    name = properties.String('name')
    description = properties.String('description')
    objectives = properties.List(properties.Object(Objective), 'objectives')
    constraints = properties.List(properties.Object(Constraint), 'constraints')
    typ = properties.String('type', default='MEV')

    def __init__(self,
                 name: str,
                 description: str,
                 objectives: List[Objective],
                 constraints: Optional[List[Constraint]] = None,
                 session: Optional[Session] = None):
        self.name: str = name
        self.description: str = description
        self.objectives: List[Objective] = objectives
        self.constraints: List[Constraint] = constraints or []
        self.session: Optional[Session] = session

    def __str__(self):
        return '<EVScore {!r}>'.format(self.name)
예제 #29
0
class IngredientQuantityByProcessAndName(
        Serializable['IngredientQuantityByProcessAndName'], Variable):
    """[ALPHA] Get the quantity of an ingredient associated with a process template and a name.

    Parameters
    ---------
    name: str
        a short human-readable name to use when referencing the variable
    headers: list[str]
        sequence of column headers
    process_template: LinkByUID
        process template associated with this ingredient identifier
    ingredient_name: str
        name of ingredient
    quantity_dimension: IngredientQuantityDimension
        dimension of the ingredient quantity: absolute quantity, number, mass, or volume fraction.
        valid options are defined by
        :class:`~citrine.gemtables.variables.IngredientQuantityDimension`
    type_selector: DataObjectTypeSelector
        strategy for selecting data object types to consider when matching, defaults to PREFER_RUN

    """

    name = properties.String('name')
    headers = properties.List(properties.String, 'headers')
    process_template = properties.Object(LinkByUID, 'process_template')
    ingredient_name = properties.String('ingredient_name')
    quantity_dimension = properties.Enumeration(IngredientQuantityDimension, 'quantity_dimension')
    type_selector = properties.Enumeration(DataObjectTypeSelector, "type_selector")
    typ = properties.String('type', default="ing_quantity_by_process_and_name",
                            deserializable=False)

    def _attrs(self) -> List[str]:
        return ["name", "headers", "process_template", "ingredient_name", "quantity_dimension",
                "type_selector", "typ"]

    def __init__(self, *,
                 name: str,
                 headers: List[str],
                 process_template: LinkByUID,
                 ingredient_name: str,
                 quantity_dimension: IngredientQuantityDimension,
                 type_selector: DataObjectTypeSelector = DataObjectTypeSelector.PREFER_RUN):
        self.name = name
        self.headers = headers
        self.process_template = process_template
        self.ingredient_name = ingredient_name
        self.quantity_dimension = quantity_dimension
        self.type_selector = type_selector
예제 #30
0
class LabelFractionConstraint(Serializable['LabelFractionConstraint'],
                              Constraint):
    """Represents a constraint on the total amount of ingredients with a given label.

    Parameters
    ----------
    formulation_descriptor: FormulationDescriptor
        descriptor to constrain
    label: str
        ingredient label to constrain
    min: float
        minimum value
    max: float
        maximum value
    is_required: bool, optional
        whether this ingredient is required.
        If ``True``, the label must be present and its value must be within the
        specified range. if ``False``, the label must be within the specified range only if
        it's present in the formulation, i.e., the value can be 0 or on the range ``[min, max]``.

    """

    formulation_descriptor = properties.Object(FormulationDescriptor,
                                               'formulation_descriptor')
    label = properties.String('label')
    min = properties.Optional(properties.Float, 'min')
    max = properties.Optional(properties.Float, 'max')
    is_required = properties.Boolean('is_required')
    typ = properties.String('type', default='LabelFractionConstraint')

    def __init__(self,
                 *,
                 formulation_descriptor: FormulationDescriptor,
                 label: str,
                 min: float,
                 max: float,
                 is_required: bool = True,
                 session: Optional[Session] = None):
        self.formulation_descriptor: FormulationDescriptor = formulation_descriptor
        self.label: str = label
        self.min: float = min
        self.max: float = max
        self.is_required: bool = is_required
        self.session: Optional[Session] = session

    def __str__(self):
        return '<LabelFractionConstraint {!r}::{!r}>'.format(
            self.formulation_descriptor.key, self.label)