class CSVDataSource(Serializable['CSVDataSource'], DataSource): """A data source based on a CSV file stored on the data platform. Parameters ---------- file_link: FileLink link to the CSV file to read the data from column_definitions: Mapping[str, Descriptor] Map the column headers to the descriptors that will be used to interpret the cell contents identifiers: Optional[List[str]] List of one or more column headers whose values uniquely identify a row. These may overlap with ``column_definitions`` if a column should be used as data and as an identifier, but this is not necessary. Identifiers must be unique within a dataset. No two rows can contain the same value. """ typ = properties.String('type', default='csv_data_source', deserializable=False) file_link = properties.Object(FileLink, "file_link") column_definitions = properties.Mapping( properties.String, properties.Object(Descriptor), "column_definitions") identifiers = properties.Optional(properties.List(properties.String), "identifiers") def _attrs(self) -> List[str]: return ["file_link", "column_definitions", "identifiers", "typ"] def __init__(self, file_link: FileLink, column_definitions: Mapping[str, Descriptor], identifiers: Optional[List[str]] = None): self.file_link = file_link self.column_definitions = column_definitions self.identifiers = identifiers
class SimpleMixturePredictor(Resource['SimpleMixturePredictor'], Predictor, AIResourceMetadata): """A predictor interface that flattens a formulation into a simple mixture. Parameters ---------- name: str name of the configuration description: str description of the predictor input_descriptor: FormulationDescriptor input descriptor for the hierarchical (un-mixed) formulation output_descriptor: FormulationDescriptor output descriptor for the flat (mixed) formulation training_data: Optional[List[DataSource]] Sources of training data. Each can be either a CSV or an GEM Table. Candidates from multiple data sources will be combined into a flattened list and de-duplicated by uid and identifiers. De-duplication is performed if a uid or identifier is shared between two or more rows. The content of a de-duplicated row will contain the union of data across all rows that share the same uid or at least 1 identifier. Training data is unnecessary if the predictor is part of a graph that includes all training data required by this predictor. """ _resource_type = ResourceTypeEnum.MODULE input_descriptor = _properties.Object(FormulationDescriptor, 'config.input') output_descriptor = _properties.Object(FormulationDescriptor, 'config.output') training_data = _properties.List(_properties.Object(DataSource), 'config.training_data') typ = _properties.String('config.type', default='SimpleMixture', deserializable=False) module_type = _properties.String('module_type', default='PREDICTOR') def __init__(self, name: str, description: str, input_descriptor: FormulationDescriptor, output_descriptor: FormulationDescriptor, training_data: Optional[List[DataSource]] = None, archived: bool = False): self.name: str = name self.description: str = description self.input_descriptor: FormulationDescriptor = input_descriptor self.output_descriptor: FormulationDescriptor = output_descriptor self.training_data: List[DataSource] = self._wrap_training_data( training_data) self.archived: bool = archived def _post_dump(self, data: dict) -> dict: data['display_name'] = data['config']['name'] return data def __str__(self): return '<SimpleMixturePredictor {!r}>'.format(self.name)
class ExpressionPredictor(Resource['ExpressionPredictor'], Predictor, AIResourceMetadata): """A predictor that computes an output from an expression and set of bounded inputs. For a discussion of expression syntax and a list of allowed symbols, please see the :ref:`documentation<Expression Predictor>`. .. seealso:: If you are using the deprecated predictor please see :class:`~citrine.informatics.predictors.DeprecatedExpressionPredictor` for an example that shows how to migrate to the new format. Parameters ---------- name: str name of the configuration description: str the description of the predictor expression: str expression that computes an output from aliased inputs output: RealDescriptor descriptor that represents the output of the expression aliases: Mapping[str, RealDescriptor] a mapping from each unknown argument to its descriptor. All unknown arguments must have an associated descriptor. """ expression = _properties.String('config.expression') output = _properties.Object(RealDescriptor, 'config.output') aliases = _properties.Mapping(_properties.String, _properties.Object(RealDescriptor), 'config.aliases') typ = _properties.String('config.type', default='AnalyticExpression', deserializable=False) module_type = _properties.String('module_type', default='PREDICTOR') def __init__(self, name: str, description: str, expression: str, output: RealDescriptor, aliases: Mapping[str, RealDescriptor], archived: bool = False): self.name: str = name self.description: str = description self.expression: str = expression self.output: RealDescriptor = output self.aliases: Mapping[str, RealDescriptor] = aliases self.archived: bool = archived def _post_dump(self, data: dict) -> dict: data['display_name'] = data['config']['name'] return data def __str__(self): return '<ExpressionPredictor {!r}>'.format(self.name)
class AttributeByTemplateAndObjectTemplate( Serializable['AttributeByTemplateAndObjectTemplate'], Variable): """[ALPHA] Attribute marked by an attribute template and an object template. For example, one property may be measured by two different measurement techniques. In this case, that property would have the same attribute template. Filtering by measurement templates, which identify the measurement techniques, disambiguates the technique used to measure that otherwise ambiguous property. Parameters --------- name: str a short human-readable name to use when referencing the variable headers: list[str] sequence of column headers attribute_template: LinkByUID attribute template that identifies the attribute to assign to the variable object_template: LinkByUID template that identifies the associated object attribute_constraints: list[(LinkByUID, Bounds)] constraints on object attributes in the target object that must be satisfied. Constraints are expressed as Bounds. Attributes are expressed with links. The attribute that the variable is being set to may be the target of a constraint as well. type_selector: DataObjectTypeSelector strategy for selecting data object types to consider when matching, defaults to PREFER_RUN """ name = properties.String('name') headers = properties.List(properties.String, 'headers') attribute_template = properties.Object(LinkByUID, 'attribute_template') object_template = properties.Object(LinkByUID, 'object_template') attribute_constraints = properties.Optional( properties.List( properties.SpecifiedMixedList( [properties.Object(LinkByUID), properties.Object(BaseBounds)] ) ), 'attribute_constraints') type_selector = properties.Enumeration(DataObjectTypeSelector, "type_selector") typ = properties.String('type', default="attribute_by_object", deserializable=False) def _attrs(self) -> List[str]: return ["name", "headers", "attribute_template", "object_template", "attribute_constraints", "type_selector", "typ"] def __init__(self, *, name: str, headers: List[str], attribute_template: LinkByUID, object_template: LinkByUID, attribute_constraints: List[List[Union[LinkByUID, BaseBounds]]] = None, type_selector: DataObjectTypeSelector = DataObjectTypeSelector.PREFER_RUN): self.name = name self.headers = headers self.attribute_template = attribute_template self.object_template = object_template self.attribute_constraints = attribute_constraints self.type_selector = type_selector
class AttributeByTemplate(Serializable['AttributeByTemplate'], Variable): """[ALPHA] Attribute marked by an attribute template. Parameters ---------- name: str a short human-readable name to use when referencing the variable headers: list[str] sequence of column headers template: LinkByUID attribute template that identifies the attribute to assign to the variable attribute_constraints: list[list[LinkByUID, Bounds]] constraints on object attributes in the target object that must be satisfied. Constraints are expressed as Bounds. Attributes are expressed with links. The attribute that the variable is being set to may be the target of a constraint as well. type_selector: DataObjectTypeSelector strategy for selecting data object types to consider when matching, defaults to PREFER_RUN """ name = properties.String('name') headers = properties.List(properties.String, 'headers') template = properties.Object(LinkByUID, 'template') attribute_constraints = properties.Optional( properties.List( properties.SpecifiedMixedList( [properties.Object(LinkByUID), properties.Object(BaseBounds)])), 'attribute_constraints') type_selector = properties.Enumeration(DataObjectTypeSelector, "type_selector") typ = properties.String('type', default="attribute_by_template", deserializable=False) def _attrs(self) -> List[str]: return [ "name", "headers", "template", "attribute_constraints", "type_selector", "typ" ] def __init__( self, *, name: str, headers: List[str], template: LinkByUID, attribute_constraints: Optional[List[List[Union[LinkByUID, BaseBounds]]]] = None, type_selector: DataObjectTypeSelector = DataObjectTypeSelector. PREFER_RUN): self.name = name self.headers = headers self.template = template self.attribute_constraints = attribute_constraints self.type_selector = type_selector
class IngredientsToFormulationPredictor( Resource['IngredientsToFormulationPredictor'], Predictor, AIResourceMetadata): """[ALPHA] A predictor interface that constructs a formulation from ingredient quantities. Parameters ---------- name: str name of the configuration description: str description of the predictor output: FormulationDescriptor descriptor that represents the output formulation id_to_quantity: Mapping[str, RealDescriptor] Map from ingredient identifier to the descriptor that represents its quantity, e.g., ``{'water': RealDescriptor('water quantity', 0, 1, "")}`` labels: Mapping[str, Set[str]] Map from each label to all ingredients assigned that label, when present in a mixture, e.g., ``{'solvent': {'water'}}`` """ _resource_type = ResourceTypeEnum.MODULE output = _properties.Object(FormulationDescriptor, 'config.output') id_to_quantity = _properties.Mapping(_properties.String, _properties.Object(RealDescriptor), 'config.id_to_quantity') labels = _properties.Mapping(_properties.String, _properties.Set(_properties.String), 'config.labels') typ = _properties.String('config.type', default='IngredientsToSimpleMixture', deserializable=False) module_type = _properties.String('module_type', default='PREDICTOR') def __init__(self, name: str, description: str, output: FormulationDescriptor, id_to_quantity: Mapping[str, RealDescriptor], labels: Mapping[str, Set[str]], archived: bool = False): self.name: str = name self.description: str = description self.output: FormulationDescriptor = output self.id_to_quantity: Mapping[str, RealDescriptor] = id_to_quantity self.labels: Mapping[str, Set[str]] = labels self.archived: bool = archived def _post_dump(self, data: dict) -> dict: data['display_name'] = data['config']['name'] return data def __str__(self): return '<IngredientsToFormulationPredictor {!r}>'.format(self.name)
class ModelSummary(Serializable['ModelSummary']): """[ALPHA] Summary of information about a single model in a predictor. ModelSummary objects are constructed from saved models and should not be user-instantiated. """ name = properties.String('name') """:str: the name of the model""" type_ = properties.String('type') """:str: the type of the model (e.g., "ML Model", "Featurizer", etc.)""" inputs = properties.List( properties.Union([properties.Object(Descriptor), properties.String()]), 'inputs') """:List[Descriptor]: list of input descriptors""" outputs = properties.List( properties.Union([properties.Object(Descriptor), properties.String()]), 'outputs') """:List[Descriptor]: list of output descriptors""" model_settings = properties.Raw('model_settings') """:dict: model settings, as a dictionary (keys depend on the model type)""" feature_importances = properties.List( properties.Object(FeatureImportanceReport), 'feature_importances') """:List[FeatureImportanceReport]: feature importance reports for each output""" predictor_name = properties.String('predictor_configuration_name', default='') """:str: the name of the predictor that created this model""" predictor_uid = properties.Optional(properties.UUID(), 'predictor_configuration_uid') """:Optional[UUID]: the unique Citrine id of the predictor that created this model""" training_data_count = properties.Optional(properties.Integer, "training_data_count") """:int: Number of rows in the training data for the model, if applicable.""" def __init__(self, name: str, type_: str, inputs: List[Descriptor], outputs: List[Descriptor], model_settings: Dict[str, Any], feature_importances: List[FeatureImportanceReport], predictor_name: str, predictor_uid: Optional[UUID] = None): self.name = name self.type_ = type_ self.inputs = inputs self.outputs = outputs self.model_settings = model_settings self.feature_importances = feature_importances self.predictor_name = predictor_name self.predictor_uid = predictor_uid def __str__(self): return '<ModelSummary {!r}>'.format(self.name)
class LIScore(Serializable['LIScore'], Score): """Evaluates the likelihood of scoring better than some baselines for given objectives. Parameters ---------- objectives: list[Objective] objectives (e.g., maximize, minimize, tune, etc.) If multiple objectives are specified they are evaluated independently, and ranked by the highest likelihood of exceeding a baseline. This should therefore *not* be used to simultaneously optimize multiple objectives. baselines: list[float] best-so-far values for the various objectives (there must be one for each objective) constraints: list[Constraint] constraints limiting the allowed values that material instances can have """ baselines = properties.List(properties.Float, 'baselines') objectives = properties.List(properties.Object(Objective), 'objectives') constraints = properties.List(properties.Object(Constraint), 'constraints') typ = properties.String('type', default='MLI') def __init__(self, *, name: Optional[str] = None, description: Optional[str] = None, objectives: List[Objective], baselines: List[float], constraints: Optional[List[Constraint]] = None, session: Optional[Session] = None): self.objectives: List[Objective] = objectives self.baselines: List[float] = baselines self.constraints: List[Constraint] = constraints or [] self.session: Optional[Session] = session if name is not None: msg = "Naming of Scores is deprecated. Please do not define the name." warnings.warn(msg, category=DeprecationWarning) self._name = name else: self._name = "Likelihood of Improvement" if description is not None: msg = "Describing Scores is deprecated. Please do not define the description." warnings.warn(msg, category=DeprecationWarning) self._description: str = description else: self._description = "" def __str__(self): return '<LIScore>'
class EIScore(Serializable['EIScore'], Score): """ Evaluates the expected magnitude of improvement beyond baselines for a given objective. Parameters ---------- objectives: list[Objective] objectives (e.g., maximize, minimize, tune, etc.) EIScore does not support more than 1 objective at this time. baselines: list[float] best-so-far values for the various objectives (there must be one for each objective) constraints: list[Constraint] constraints limiting the allowed values that material instances can have """ baselines = properties.List(properties.Float, 'baselines') objectives = properties.List(properties.Object(Objective), 'objectives') constraints = properties.List(properties.Object(Constraint), 'constraints') typ = properties.String('type', default='MEI') def __init__(self, *, name: Optional[str] = None, description: Optional[str] = None, objectives: List[Objective], baselines: List[float], constraints: Optional[List[Constraint]] = None, session: Optional[Session] = None): self.objectives: List[Objective] = objectives self.baselines: List[float] = baselines self.constraints: List[Constraint] = constraints or [] self.session: Optional[Session] = session if name is not None: msg = "Naming of Scores is deprecated. Please do not define the name." warnings.warn(msg, category=DeprecationWarning) self._name = name else: self._name = "Expected Improvement" if description is not None: msg = "Describing Scores is deprecated. Please do not define the description." warnings.warn(msg, category=DeprecationWarning) self._description: str = description else: self._description = "" def __str__(self): return '<EIScore>'
class EnumeratedDimension(Serializable['EnumeratedDimension'], Dimension): """A finite, enumerated dimension. Parameters ---------- descriptor: Descriptor a descriptor of the single dimension template_id: UUID UUID that corresponds to the template in DC values: list[str] list of values that can be parsed by the descriptor """ descriptor = properties.Object(Descriptor, 'descriptor') values = properties.List(properties.String(), 'list') typ = properties.String('type', default='EnumeratedDimension', deserializable=False) template_id = properties.Optional(properties.UUID, 'template_id', default=uuid4()) def __init__(self, descriptor: Descriptor, values: List[str], template_id: Optional[UUID] = None): self.descriptor: Descriptor = descriptor self.values: List[str] = values self.template_id: Optional[UUID] = template_id
def test_object_property_serde(sub_prop, sub_value, sub_serialized): klass = make_class_with_property(sub_prop, 'some_property_name') prop = properties.Object(klass) instance = klass(sub_value) serialized = {'some_property_name': sub_serialized} assert prop.deserialize(serialized) == instance assert prop.serialize(instance) == serialized
class GemTableDataSource(Serializable['GemTableDataSource'], DataSource): """[ALPHA] A data source based on a GEM Table hosted on the data platform. Parameters ---------- table_id: UUID Unique identifier for the GEM Table table_version: Union[str,int] Version number for the GEM Table, which starts at 1 rather than 0. Strings are cast to ints. formulation_descriptor: Optional[FormulationDescriptor] Optional descriptor used to store formulations emitted by the data source. """ typ = properties.String('type', default='hosted_table_data_source', deserializable=False) table_id = properties.UUID("table_id") table_version = properties.Integer("table_version") formulation_descriptor = properties.Optional( properties.Object(FormulationDescriptor), "formulation_descriptor") def _attrs(self) -> List[str]: return ["table_id", "table_version", "typ"] def __init__( self, table_id: UUID, table_version: Union[int, str], formulation_descriptor: Optional[FormulationDescriptor] = None): self.table_id: UUID = table_id self.table_version: Union[int, str] = table_version self.formulation_descriptor: Optional[ FormulationDescriptor] = formulation_descriptor
class ContinuousDimension(Serializable['ContinuousDimension'], Dimension): """Continuous dimension that is defined by a template ID, material descriptor, lower bound, and upper bound. Parameters ---------- descriptor: RealDescriptor a descriptor of the single dimension lower_bound: float inclusive lower bound upper_bound: float inclusive upper bound template_id: UUID UUID that corresponds to the template in DC """ descriptor = properties.Object(RealDescriptor, 'descriptor') lower_bound = properties.Float('lower_bound') upper_bound = properties.Float('upper_bound') typ = properties.String('type', default='ContinuousDimension', deserializable=False) template_id = properties.UUID('template_id', default=uuid4()) def __init__(self, descriptor: RealDescriptor, lower_bound: Optional[float] = None, upper_bound: Optional[float] = None, template_id: Optional[UUID] = None): self.descriptor: RealDescriptor = descriptor self.lower_bound: float = lower_bound or descriptor.lower_bound self.upper_bound: float = upper_bound or descriptor.upper_bound self.template_id: UUID = template_id or uuid4()
class ResponseMetrics(Serializable["ResponseMetrics"]): """Set of metrics computed by a Predictor Evaluator for a single response. Results computed for a metric can be accessed by the metric's ``__repr__`` or by the metric itself. """ metrics = properties.Mapping(properties.String, properties.Object(MetricValue), "metrics") """:Dict[str, MetricValue]: Metrics computed for a single response, keyed by the metric's ``__repr__``.""" def __init__(self): pass # pragma: no cover def __iter__(self): return iter(self.metrics) def __getitem__(self, item): if isinstance(item, str): return self.metrics[item] elif isinstance(item, PredictorEvaluationMetric): return self.metrics[repr(item)] else: raise TypeError("Cannot index ResponseMetrics with a {}".format( type(item)))
class ConcatColumn(Serializable['ConcatColumn'], Column): """[ALPHA] Column that concatenates multiple values produced by a list- or set-valued variable. The input subcolumn need not exist elsewhere in the table config, and its parameters have no bearing on how the table is constructed. Only the type of column is relevant. That a complete Column object is required is simply a limitation of the current API. Parameters ---------- data_source: str name of the variable to use when populating the column subcolumn: Column a column of the type of the individual values to be concatenated """ data_source = properties.String('data_source') subcolumn = properties.Object(Column, 'subcolumn') typ = properties.String('type', default="concat_column", deserializable=False) def _attrs(self) -> List[str]: return ["data_source", "typ"] def __init__(self, *, data_source: str, subcolumn: Column): self.data_source = data_source self.subcolumn = subcolumn
class IngredientLabelByProcessAndName( Serializable['IngredientLabelByProcessAndName'], Variable): """[ALPHA] A boolean variable indicating whether a given label is applied. Matches by process template, ingredient name, and the label string to check. For example, a column might indicate whether or not the ingredient "ethanol" is labeled as a "solvent" in the "second mixing" process. Many such columns would then support the downstream analysis "get the volumetric average density of the solvents". Parameters --------- name: str a short human-readable name to use when referencing the variable headers: list[str] sequence of column headers process_template: LinkByUID process template associated with this ingredient identifier ingredient_name: str name of ingredient label: str label to test type_selector: DataObjectTypeSelector strategy for selecting data object types to consider when matching, defaults to PREFER_RUN """ name = properties.String('name') headers = properties.List(properties.String, 'headers') process_template = properties.Object(LinkByUID, 'process_template') ingredient_name = properties.String('ingredient_name') label = properties.String('label') type_selector = properties.Enumeration(DataObjectTypeSelector, "type_selector") typ = properties.String('type', default="ing_label_by_process_and_name", deserializable=False) def _attrs(self) -> List[str]: return [ "name", "headers", "process_template", "ingredient_name", "label", "type_selector", "typ" ] def __init__( self, *, name: str, headers: List[str], process_template: LinkByUID, ingredient_name: str, label: str, type_selector: DataObjectTypeSelector = DataObjectTypeSelector. PREFER_RUN): self.name = name self.headers = headers self.process_template = process_template self.ingredient_name = ingredient_name self.label = label self.type_selector = type_selector
class EVScore(Serializable['EVScore'], Score): """ Evaluates the expected value for given objectives. Parameters ---------- objectives: list[Objective] objectives (e.g., maximize, minimize, tune, etc.) If multiple objectives are specified, their scores are summed together. This allows for simultaneous optimization of multiple objectives, although the weighting of the various objectives cannot be directly specified. constraints: list[Constraint] constraints limiting the allowed values that material instances can have """ objectives = properties.List(properties.Object(Objective), 'objectives') constraints = properties.List(properties.Object(Constraint), 'constraints') typ = properties.String('type', default='MEV') def __init__(self, *, name: Optional[str] = None, description: Optional[str] = None, objectives: List[Objective], constraints: Optional[List[Constraint]] = None, session: Optional[Session] = None): self.objectives: List[Objective] = objectives self.constraints: List[Constraint] = constraints or [] self.session: Optional[Session] = session if name is not None: msg = "Naming of Scores is deprecated. Please do not define the name." warnings.warn(msg, category=DeprecationWarning) self._name = name else: self._name = "Expected Value" if description is not None: msg = "Describing Scores is deprecated. Please do not define the description." warnings.warn(msg, category=DeprecationWarning) self._description: str = description else: self._description = "" def __str__(self): return '<EVScore>'
class DesignMaterial(Serializable["DesignMaterial"]): """Description of the material that was designed, as a set of DesignVariables.""" values = properties.Mapping(properties.String, properties.Object(DesignVariable), 'vars') """:Dict[str, DesignVariable]: mapping from descriptor keys to the value for this material""" def __init__(self): pass # pragma: no cover
class PredictedVsActualRealPoint(Serializable["PredictedVsActualRealPoint"]): """Predicted vs. actual data for a single real-valued data point.""" uuid = properties.UUID("uuid") """:UUID: Unique Citrine id given to the candidate""" identifiers = properties.Set(properties.String, "identifiers") """:Set[str]: Set of globally unique identifiers given to the candidate""" trial = properties.Integer("trial") """:int: 1-based index of the trial this candidate belonged to""" fold = properties.Integer("fold") """:int: 1-based index of the fold this candidate belonged to""" predicted = properties.Object(RealMetricValue, "predicted") """:RealMetricValue: Predicted value""" actual = properties.Object(RealMetricValue, "actual") """:RealMetricValue: Actual value""" def __init__(self): pass # pragma: no cover
class EnumeratedDesignSpace(Resource['EnumeratedDesignSpace'], DesignSpace): """Design space composed of an explicit enumeration of candidate materials to score. Note that every candidate must have exactly the descriptors in the list populated (no more, no less) to be included. Parameters ---------- name:str the name of the design space description:str the description of the design space descriptors: list[Descriptor] the list of descriptors included in the candidates of the design space data: list[dict] list of dicts of the shape `{<descriptor_key>: <descriptor_value>}` where each dict corresponds to a candidate in the design space """ _response_key = None uid = properties.Optional(properties.UUID, 'id', serializable=False) name = properties.String('config.name') description = properties.Optional(properties.String(), 'config.description') descriptors = properties.List(properties.Object(Descriptor), 'config.descriptors') data = properties.List( properties.Mapping(properties.String, properties.Raw), 'config.data') typ = properties.String('config.type', default='EnumeratedDesignSpace', deserializable=False) status = properties.String('status', serializable=False) status_info = properties.Optional(properties.List(properties.String()), 'status_info', serializable=False) # NOTE: These could go here or in _post_dump - it's unclear which is better right now module_type = properties.String('module_type', default='DESIGN_SPACE') schema_id = properties.UUID( 'schema_id', default=UUID('f3907a58-aa46-462c-8837-a5aa9605e79e')) def __init__(self, name: str, description: str, descriptors: List[Descriptor], data: List[Mapping[str, Any]], session: Session = Session()): self.name: str = name self.description: str = description self.descriptors: List[Descriptor] = descriptors self.data: List[Mapping[str, Any]] = data self.session: Session = session def _post_dump(self, data: dict) -> dict: data['display_name'] = data['config']['name'] return data def __str__(self): return '<EnumeratedDesignSpace {!r}>'.format(self.name)
def dump(self) -> dict: """Override dump to replace on-platform subspaces with their uids.""" model_copy = deepcopy(self) for i, subspace in enumerate(model_copy.subspaces): if isinstance(subspace, DesignSpace) and subspace.uid is not None: model_copy.subspaces[i] = subspace.uid serialized = properties.Object(ProductDesignSpace).serialize( model_copy) return self._post_dump(serialized)
class ModelSummary(Serializable['ModelSummary']): """[ALPHA] Summary of information about a single model in a predictor. ModelSummary objects are constructed from saved models and should not be user-instantiated. Parameters ---------- name: str the name of the model type_: str the type of the model (e.g., "ML Model", "Featurizer", etc.) inputs: List[Descriptor] list of input descriptors outputs: List[Descriptor] list of output descriptors model_settings: dict settings of the model, as a dictionary (details depend on model type) feature_importances: List[FeatureImportanceReport] list of feature importance reports, one for each output predictor_name: str the name of the predictor that created this model predictor_uid: Optional[uuid] the uid of the predictor that created this model """ name = properties.String('name') type_ = properties.String('type') inputs = properties.List(properties.String(), 'inputs') outputs = properties.List(properties.String(), 'outputs') model_settings = properties.Raw('model_settings') feature_importances = properties.List( properties.Object(FeatureImportanceReport), 'feature_importances') predictor_name = properties.String('predictor_configuration_name', default='') predictor_uid = properties.Optional(properties.UUID(), 'predictor_configuration_uid') def __init__(self, name: str, type_: str, inputs: List[Descriptor], outputs: List[Descriptor], model_settings: Dict[str, Any], feature_importances: List[FeatureImportanceReport], predictor_name: str, predictor_uid: Optional[UUID] = None): self.name = name self.type_ = type_ self.inputs = inputs self.outputs = outputs self.model_settings = model_settings self.feature_importances = feature_importances self.predictor_name = predictor_name self.predictor_uid = predictor_uid def __str__(self): return '<ModelSummary {!r}>'.format(self.name)
class ProductDesignSpace(Resource['ProductDesignSpace'], DesignSpace): """[ALPHA] An outer product of univariate dimensions, either continuous or enumerated. Parameters ---------- name:str the name of the design space description:str the description of the design space dimensions: list[Dimension] univariate dimensions that are factors of the design space; can be enumerated or continuous """ _response_key = None uid = properties.Optional(properties.UUID, 'id', serializable=False) name = properties.String('config.name') description = properties.Optional(properties.String(), 'config.description') dimensions = properties.List(properties.Object(Dimension), 'config.dimensions') typ = properties.String('config.type', default='Univariate', deserializable=False) status = properties.String('status', serializable=False) status_info = properties.Optional( properties.List(properties.String()), 'status_info', serializable=False ) archived = properties.Boolean('archived', default=False) experimental = properties.Boolean("experimental", serializable=False, default=True) experimental_reasons = properties.Optional( properties.List(properties.String()), 'experimental_reasons', serializable=False ) # NOTE: These could go here or in _post_dump - it's unclear which is better right now module_type = properties.String('module_type', default='DESIGN_SPACE') schema_id = properties.UUID('schema_id', default=UUID('6c16d694-d015-42a7-b462-8ef299473c9a')) def __init__(self, name: str, description: str, dimensions: List[Dimension], session: Session = Session()): self.name: str = name self.description: str = description self.dimensions: List[Dimension] = dimensions self.session: Session = session def _post_dump(self, data: dict) -> dict: data['display_name'] = data['config']['name'] return data def __str__(self): return '<ProductDesignSpace {!r}>'.format(self.name)
class EIScore(Serializable['EIScore'], Score): """ [ALPHA] Evaluates the expected magnitude of improvement beyond baselines for given objectives. Parameters ---------- name: str the name of the score description: str the description of the score objectives: list[Objective] objectives (e.g., maximize, minimize, tune, etc.) baselines: list[float] best-so-far values for the various objectives (there must be one for each objective) constraints: list[Constraint] constraints limiting the allowed values that material instances can have """ name = properties.String('name') description = properties.String('description') baselines = properties.List(properties.Float, 'baselines') objectives = properties.List(properties.Object(Objective), 'objectives') constraints = properties.List(properties.Object(Constraint), 'constraints') typ = properties.String('type', default='MEI') def __init__(self, name: str, description: str, objectives: List[Objective], baselines: List[float], constraints: Optional[List[Constraint]] = None, session: Optional[Session] = None): self.name: str = name self.description: str = description self.objectives: List[Objective] = objectives self.baselines: List[float] = baselines self.constraints: List[Constraint] = constraints or [] self.session: Optional[Session] = session def __str__(self): return '<EIScore {!r}>'.format(self.name)
class IngredientIdentifierByProcessTemplateAndName( Serializable['IngredientIdentifierByProcessAndName'], Variable): """[ALPHA] Ingredient identifier associated with a process template and a name. Parameters --------- name: str a short human-readable name to use when referencing the variable headers: list[str] sequence of column headers process_template: LinkByUID process template associated with this ingredient identifier ingredient_name: str name of ingredient scope: str scope of the identifier (default: the Citrine scope) type_selector: DataObjectTypeSelector strategy for selecting data object types to consider when matching, defaults to PREFER_RUN """ name = properties.String('name') headers = properties.List(properties.String, 'headers') process_template = properties.Object(LinkByUID, 'process_template') ingredient_name = properties.String('ingredient_name') scope = properties.String('scope') type_selector = properties.Enumeration(DataObjectTypeSelector, "type_selector") typ = properties.String('type', default="ing_id_by_process_and_name", deserializable=False) def _attrs(self) -> List[str]: return [ "name", "headers", "process_template", "ingredient_name", "scope", "type_selector", "typ" ] def __init__( self, *, name: str, headers: List[str], process_template: LinkByUID, ingredient_name: str, scope: str, type_selector: DataObjectTypeSelector = DataObjectTypeSelector. PREFER_RUN): self.name = name self.headers = headers self.process_template = process_template self.ingredient_name = ingredient_name self.scope = scope self.type_selector = type_selector
class CrossValidationResult(Serializable["CrossValidationResult"], PredictorEvaluationResult): """Result of performing a cross-validation evaluation on a predictor. Results for a cross-validated response can be accessed via ``cvResult['response_name']``, where ``cvResult`` is a :class:`citrine.informatics.predictor_evaluation_result.CrossValidationResult` and ``'response_name'`` is a response analyzed by a :class:`citrine.informatics.predictor_evaluator.PredictorEvaluator`. """ _evaluator = properties.Object(PredictorEvaluator, "evaluator") _response_results = properties.Mapping(properties.String, properties.Object(ResponseMetrics), "response_results") typ = properties.String('type', default='CrossValidationResult', deserializable=False) def __getitem__(self, item): return self._response_results[item] def __iter__(self): return iter(self.responses) @property def evaluator(self) -> PredictorEvaluator: """:PredictorEvaluator: Evaluator that produced this result.""" return self._evaluator @property def responses(self) -> Set[str]: """Responses for which results are present.""" return set(self._response_results.keys()) @property def metrics(self) -> Set[PredictorEvaluationMetric]: """:Set[PredictorEvaluationMetric]: Metrics for which results are present.""" return self._evaluator.metrics
class PredictorEvaluationWorkflow(Resource['PredictorEvaluationWorkflow'], Workflow, AIResourceMetadata): """A workflow that evaluations a predictor. Parameters ---------- name: str name of the predictor evaluation workflow description: str the description of the predictor evaluation workflow evaluators: List[PredictorEvaluator] the list of evaluators to apply to the predictor """ name = properties.String('name') description = properties.String('description') evaluators = properties.List(properties.Object(PredictorEvaluator), "evaluators") status_description = properties.String('status_description', serializable=False) """:str: more detailed description of the workflow's status""" typ = properties.String('type', default='PredictorEvaluationWorkflow', deserializable=False) def __init__(self, *, name: str, description: str = "", evaluators: List[PredictorEvaluator]): self.name: str = name self.description: str = description self.evaluators: List[PredictorEvaluator] = evaluators self.session: Optional[Session] = None self.project_id: Optional[UUID] = None def __str__(self): return '<PredictorEvaluationWorkflow {!r}>'.format(self.name) @property def executions(self) -> PredictorEvaluationExecutionCollection: """Return a resource representing all visible executions of this workflow.""" if getattr(self, 'project_id', None) is None: raise AttributeError( 'Cannot initialize execution without project reference!') return PredictorEvaluationExecutionCollection( project_id=self.project_id, session=self.session, workflow_id=self.uid)
class EVScore(Serializable['EVScore'], Score): """ [ALPHA] Evaluates the expected value for given objectives. Parameters ---------- name: str the name of the score description: str the description of the score objectives: list[Objective] objectives (e.g., maximize, minimize, tune, etc.) constraints: list[Constraint] constraints limiting the allowed values that material instances can have """ name = properties.String('name') description = properties.String('description') objectives = properties.List(properties.Object(Objective), 'objectives') constraints = properties.List(properties.Object(Constraint), 'constraints') typ = properties.String('type', default='MEV') def __init__(self, name: str, description: str, objectives: List[Objective], constraints: Optional[List[Constraint]] = None, session: Optional[Session] = None): self.name: str = name self.description: str = description self.objectives: List[Objective] = objectives self.constraints: List[Constraint] = constraints or [] self.session: Optional[Session] = session def __str__(self): return '<EVScore {!r}>'.format(self.name)
class IngredientQuantityByProcessAndName( Serializable['IngredientQuantityByProcessAndName'], Variable): """[ALPHA] Get the quantity of an ingredient associated with a process template and a name. Parameters --------- name: str a short human-readable name to use when referencing the variable headers: list[str] sequence of column headers process_template: LinkByUID process template associated with this ingredient identifier ingredient_name: str name of ingredient quantity_dimension: IngredientQuantityDimension dimension of the ingredient quantity: absolute quantity, number, mass, or volume fraction. valid options are defined by :class:`~citrine.gemtables.variables.IngredientQuantityDimension` type_selector: DataObjectTypeSelector strategy for selecting data object types to consider when matching, defaults to PREFER_RUN """ name = properties.String('name') headers = properties.List(properties.String, 'headers') process_template = properties.Object(LinkByUID, 'process_template') ingredient_name = properties.String('ingredient_name') quantity_dimension = properties.Enumeration(IngredientQuantityDimension, 'quantity_dimension') type_selector = properties.Enumeration(DataObjectTypeSelector, "type_selector") typ = properties.String('type', default="ing_quantity_by_process_and_name", deserializable=False) def _attrs(self) -> List[str]: return ["name", "headers", "process_template", "ingredient_name", "quantity_dimension", "type_selector", "typ"] def __init__(self, *, name: str, headers: List[str], process_template: LinkByUID, ingredient_name: str, quantity_dimension: IngredientQuantityDimension, type_selector: DataObjectTypeSelector = DataObjectTypeSelector.PREFER_RUN): self.name = name self.headers = headers self.process_template = process_template self.ingredient_name = ingredient_name self.quantity_dimension = quantity_dimension self.type_selector = type_selector
class LabelFractionConstraint(Serializable['LabelFractionConstraint'], Constraint): """Represents a constraint on the total amount of ingredients with a given label. Parameters ---------- formulation_descriptor: FormulationDescriptor descriptor to constrain label: str ingredient label to constrain min: float minimum value max: float maximum value is_required: bool, optional whether this ingredient is required. If ``True``, the label must be present and its value must be within the specified range. if ``False``, the label must be within the specified range only if it's present in the formulation, i.e., the value can be 0 or on the range ``[min, max]``. """ formulation_descriptor = properties.Object(FormulationDescriptor, 'formulation_descriptor') label = properties.String('label') min = properties.Optional(properties.Float, 'min') max = properties.Optional(properties.Float, 'max') is_required = properties.Boolean('is_required') typ = properties.String('type', default='LabelFractionConstraint') def __init__(self, *, formulation_descriptor: FormulationDescriptor, label: str, min: float, max: float, is_required: bool = True, session: Optional[Session] = None): self.formulation_descriptor: FormulationDescriptor = formulation_descriptor self.label: str = label self.min: float = min self.max: float = max self.is_required: bool = is_required self.session: Optional[Session] = session def __str__(self): return '<LabelFractionConstraint {!r}::{!r}>'.format( self.formulation_descriptor.key, self.label)