Example #1
0
class IngredientLabelByProcessAndName(
        Serializable['IngredientLabelByProcessAndName'], Variable):
    """[ALPHA] A boolean variable indicating whether a given label is applied.

    Matches by process template, ingredient name, and the label string to check.

    For example, a column might indicate whether or not the ingredient "ethanol" is labeled as a
    "solvent" in the "second mixing" process.  Many such columns would then support the
    downstream analysis "get the volumetric average density of the solvents".

    Parameters
    ---------
    name: str
        a short human-readable name to use when referencing the variable
    headers: list[str]
        sequence of column headers
    process_template: LinkByUID
        process template associated with this ingredient identifier
    ingredient_name: str
        name of ingredient
    label: str
        label to test
    type_selector: DataObjectTypeSelector
        strategy for selecting data object types to consider when matching, defaults to PREFER_RUN

    """

    name = properties.String('name')
    headers = properties.List(properties.String, 'headers')
    process_template = properties.Object(LinkByUID, 'process_template')
    ingredient_name = properties.String('ingredient_name')
    label = properties.String('label')
    type_selector = properties.Enumeration(DataObjectTypeSelector,
                                           "type_selector")
    typ = properties.String('type',
                            default="ing_label_by_process_and_name",
                            deserializable=False)

    def _attrs(self) -> List[str]:
        return [
            "name", "headers", "process_template", "ingredient_name", "label",
            "type_selector", "typ"
        ]

    def __init__(
        self,
        *,
        name: str,
        headers: List[str],
        process_template: LinkByUID,
        ingredient_name: str,
        label: str,
        type_selector: DataObjectTypeSelector = DataObjectTypeSelector.
        PREFER_RUN):
        self.name = name
        self.headers = headers
        self.process_template = process_template
        self.ingredient_name = ingredient_name
        self.label = label
        self.type_selector = type_selector
class FlatCompositionColumn(Serializable["FlatCompositionColumn"], Column):
    """[ALPHA] Column that flattens the composition into a string of names and quantities.

    The numeric formatting tries to be human readable. For example, if all of the quantities
    are round numbers like ``{"spam": 4.0, "eggs": 1.0}`` then the result omit the decimal points
    like ``"(spam)4(eggs)1"`` (if sort_order is by quantity).

    Parameters
    ----------
    data_source: str
        name of the variable to use when populating the column
    sort_order: CompositionSortOrder
        order with which to sort the components when generating the flat string

    """

    data_source = properties.String('data_source')
    sort_order = properties.Enumeration(CompositionSortOrder, 'sort_order')
    typ = properties.String('type',
                            default="flat_composition_column",
                            deserializable=False)

    def _attrs(self) -> List[str]:
        return ["data_source", "sort_order", "typ"]

    def __init__(self, *, data_source: str, sort_order: CompositionSortOrder):
        self.data_source = data_source
        self.sort_order = sort_order
Example #3
0
class CategoricalConstraint(Serializable['CategoricalConstraint'], Constraint):
    """
    [ALPHA] A constraint on a categorical material attribute to be in a set of acceptable values.

    Parameters
    ----------
    descriptor_key: str
        the key corresponding to the associated Categorical descriptor
    acceptable_categories: list[str]
        the names of the acceptable categories to constrain to

    """

    descriptor_key = properties.String('descriptor_key')
    acceptable_categories = properties.List(properties.String(), 'acceptable_classes')
    typ = properties.String('type', default='Categorical')

    def __init__(self,
                 descriptor_key: str,
                 acceptable_categories: List[str],
                 session: Optional[Session] = None):
        self.descriptor_key = descriptor_key
        self.acceptable_categories = acceptable_categories
        self.session = session

    def __str__(self):
        return '<CategoricalConstraint {!r}>'.format(self.descriptor_key)
class ComponentQuantityColumn(Serializable["ComponentQuantityColumn"], Column):
    """[ALPHA] Column that extracts the quantity of a given component.

    If the component is not present in the composition, then the value in the column will be 0.0.

    Parameters
    ----------
    data_source: str
        name of the variable to use when populating the column
    component_name: str
        name of the component from which to extract the quantity
    normalize: bool
        whether to normalize the quantity by the sum of all component amounts. Default is false

    """

    data_source = properties.String('data_source')
    component_name = properties.String("component_name")
    normalize = properties.Boolean("normalize")
    typ = properties.String('type',
                            default="component_quantity_column",
                            deserializable=False)

    def _attrs(self) -> List[str]:
        return ["data_source", "component_name", "normalize", "typ"]

    def __init__(self,
                 *,
                 data_source: str,
                 component_name: str,
                 normalize: bool = False):
        self.data_source = data_source
        self.component_name = component_name
        self.normalize = normalize
class NthBiggestComponentNameColumn(
        Serializable["NthBiggestComponentNameColumn"], Column):
    """[ALPHA] Name of the Nth biggest component.

    If there are fewer than N components in the composition, then this column will be empty.

    Parameters
    ----------
    data_source: str
        name of the variable to use when populating the column
    n: int
        index of the component name to extract, starting with 1 for the biggest

    """

    data_source = properties.String('data_source')
    n = properties.Integer("n")
    typ = properties.String('type',
                            default="biggest_component_name_column",
                            deserializable=False)

    def _attrs(self) -> List[str]:
        return ["data_source", "n", "typ"]

    def __init__(self, *, data_source: str, n: int):
        self.data_source = data_source
        self.n = n
class Score(PolymorphicSerializable['Score']):
    """A Score is used to rank materials according to objectives and constraints.

    Abstract type that returns the proper type given a serialized dict.

    """

    _name = properties.String('name')
    _description = properties.String('description')

    @classmethod
    def get_type(cls, data):
        """Return the subtype."""
        return {'MLI': LIScore, 'MEI': EIScore, 'MEV': EVScore}[data['type']]

    @property
    def name(self):
        """Getter for the score's name."""
        msg = "Getting the Score's name is deprecated."
        warnings.warn(msg, category=DeprecationWarning)
        return self._name

    @property
    def description(self):
        """Getter for the score's description."""
        msg = "Getting the Score's description is deprecated."
        warnings.warn(msg, category=DeprecationWarning)
        return self._description
Example #7
0
class FormulationDescriptor(Serializable['FormulationDescriptor'], Descriptor):
    """[ALPHA] A descriptor to hold formulations.

    Parameters
    ----------
    key: str
        the key corresponding to a descriptor

    """

    key = properties.String('descriptor_key')
    typ = properties.String('type',
                            default='Formulation',
                            deserializable=False)

    def __eq__(self, other):
        try:
            attrs = ["key", "typ"]
            return all([
                self.__getattribute__(key) == other.__getattribute__(key)
                for key in attrs
            ])
        except AttributeError:
            return False

    def __init__(self, key: str):
        self.key: str = key

    def __str__(self):
        return "<FormulationDescriptor {!r}>".format(self.key)

    def __repr__(self):
        return "FormulationDescriptor(key={})".format(self.key)
class InorganicDescriptor(Serializable['InorganicDescriptor'], Descriptor):
    """Captures domain-specific context about the chemical formula for an inorganic compound.

    Parameters
    ----------
    key: str
        the key corresponding to a descriptor
    threshold: float
        the threshold for valid chemical formulae. Users can think of this as a level of tolerance for typos and/or loss in interpreting a string input as a parseable chemical formula.
    """

    key = properties.String('descriptor_key')
    threshold = properties.Float('threshold')
    type = properties.String('type', default='Inorganic', deserializable=False)

    def __eq__(self, other):
        try:
            attrs = ["key", "type"]
            return all([
                self.__getattribute__(key) == other.__getattribute__(key)
                for key in attrs
            ])
        except Exception:
            return False

    def __init__(self, key: str, threshold: float = 1.0):
        self.key: str = key
        self.threshold = threshold
Example #9
0
class RootIdentifier(Serializable['RootIdentifier'], Variable):
    """[ALPHA] Get the identifier for the root of the material history, by scope.

    Parameters
    ---------
    name: str
        a short human-readable name to use when referencing the variable
    headers: list[str]
        sequence of column headers
    scope: string
        scope of the identifier (default: the Citrine scope)

    """

    name = properties.String('name')
    headers = properties.List(properties.String, 'headers')
    scope = properties.String('scope')
    typ = properties.String('type', default="root_id", deserializable=False)

    def _attrs(self) -> List[str]:
        return ["name", "headers", "scope", "typ"]

    def __init__(self, *,
                 name: str,
                 headers: List[str],
                 scope: str):
        self.name = name
        self.headers = headers
        self.scope = scope
Example #10
0
class MolecularStructureDescriptor(
        Serializable['MolecularStructureDescriptor'], Descriptor):
    """
    [ALPHA] Material descriptor for an organic molecule.

    Accepts SMILES, IUPAC, and InChI String values.

    Parameters
    ----------
    key: str
        The column header key corresponding to this descriptor

    """

    key = properties.String('descriptor_key')
    typ = properties.String('type', default='Organic', deserializable=False)

    def __eq__(self, other):
        try:
            attrs = ["key", "typ"]
            return all([
                self.__getattribute__(key) == other.__getattribute__(key)
                for key in attrs
            ])
        except AttributeError:
            return False

    def __init__(self, key: str):
        self.key: str = key

    def __str__(self):
        return "<MolecularStructureDescriptor {!r}>".format(self.key)

    def __repr__(self):
        return "MolecularStructureDescriptor(key={})".format(self.key)
class CategoricalDescriptor(Serializable['CategoricalDescriptor'], Descriptor):
    """A descriptor to hold categorical variables. An exhaustive list of categorical values may be supplied.

    Parameters
    ----------
    key: str
        the key corresponding to a descriptor
    categories: list[str]
        possible categories for this descriptor
    """

    key = properties.String('descriptor_key')
    type = properties.String('type',
                             default='Categorical',
                             deserializable=False)
    categories = properties.List(properties.String, 'descriptor_values')

    def __eq__(self, other):
        try:
            attrs = ["key", "type"]
            return all([
                self.__getattribute__(key) == other.__getattribute__(key)
                for key in attrs
            ]) and set(
                self.categories) == set(self.categories + other.categories)
        except Exception:
            return False

    def __init__(self, key: str, categories: List[str]):
        self.key: str = key
        self.categories: List[str] = categories
class ConcatColumn(Serializable['ConcatColumn'], Column):
    """[ALPHA] Column that concatenates multiple values produced by a list- or set-valued variable.

    The input subcolumn need not exist elsewhere in the table config, and its parameters have
    no bearing on how the table is constructed. Only the type of column is relevant. That a
    complete Column object is required is simply a limitation of the current API.

    Parameters
    ----------
    data_source: str
        name of the variable to use when populating the column
    subcolumn: Column
        a column of the type of the individual values to be concatenated

    """

    data_source = properties.String('data_source')
    subcolumn = properties.Object(Column, 'subcolumn')
    typ = properties.String('type',
                            default="concat_column",
                            deserializable=False)

    def _attrs(self) -> List[str]:
        return ["data_source", "typ"]

    def __init__(self, *, data_source: str, subcolumn: Column):
        self.data_source = data_source
        self.subcolumn = subcolumn
class MeanColumn(Serializable['MeanColumn'], Column):
    """[ALPHA] Column containing the mean of a real-valued variable.

    Parameters
    ----------
    data_source: str
        name of the variable to use when populating the column
    target_units: Optional[str]
        units to convert the real variable into

    """

    data_source = properties.String('data_source')
    target_units = properties.Optional(properties.String, "target_units")
    typ = properties.String('type',
                            default="mean_column",
                            deserializable=False)

    def _attrs(self) -> List[str]:
        return ["data_source", "target_units", "typ"]

    def __init__(self,
                 *,
                 data_source: str,
                 target_units: Optional[str] = None):
        self.data_source = data_source
        self.target_units = target_units
class NthBiggestComponentQuantityColumn(
        Serializable["NthBiggestComponentQuantityColumn"], Column):
    """[ALPHA] Quantity of the Nth biggest component.

    If there are fewer than N components in the composition, then this column will be empty.

    Parameters
    ----------
    data_source: str
        name of the variable to use when populating the column
    n: int
        index of the component quantity to extract, starting with 1 for the biggest
    normalize: bool
        whether to normalize the quantity by the sum of all component amounts. Default is false

    """

    data_source = properties.String('data_source')
    n = properties.Integer("n")
    normalize = properties.Boolean("normalize")
    typ = properties.String('type',
                            default="biggest_component_quantity_column",
                            deserializable=False)

    def _attrs(self) -> List[str]:
        return ["data_source", "n", "normalize", "typ"]

    def __init__(self, *, data_source: str, n: int, normalize: bool = False):
        self.data_source = data_source
        self.n = n
        self.normalize = normalize
class MolecularStructureColumn(Serializable['MolecularStructureColumn'],
                               Column):
    """[ALPHA] Column containing a representation of a molecular structure.

    Parameters
    ----------
    data_source: str
        name of the variable to use when populating the column
    format: ChemicalDisplayFormat
        the format in which to display the molecular structure

    """

    data_source = properties.String('data_source')
    format = properties.Enumeration(ChemicalDisplayFormat, 'format')
    typ = properties.String('type',
                            default="molecular_structure_column",
                            deserializable=False)

    def _attrs(self) -> List[str]:
        return ["data_source", "format", "typ"]

    def __init__(self, *, data_source: str, format: ChemicalDisplayFormat):
        self.data_source = data_source
        self.format = format
Example #16
0
class ScalarMinObjective(Serializable['ScalarMinObjective'], Objective):
    """Simple single-response minimization objective with optional bounds on the objective space.

    Parameters
    ----------
    descriptor_key: str
        the key from which to pull the values
    lower_bound: float
        the lower bound on the space, e.g. 0 for a non-negative property
    upper_bound: float
        the upper bound on the space, e.g. 0 for a non-positive property
    """

    descriptor_key = properties.String('descriptor_key')
    lower_bound = properties.Optional(properties.Float, 'lower_bound')
    upper_bound = properties.Optional(properties.Float, 'upper_bound')
    typ = properties.String('type', default='ScalarMin')

    def __init__(self,
                 descriptor_key: str,
                 lower_bound: Optional[float] = None,
                 upper_bound: Optional[float] = None,
                 session: Optional[Session] = None):
        self.descriptor_key = descriptor_key
        self.lower_bound = lower_bound
        self.upper_bound = upper_bound
        self.session: Optional[Session] = session

    def __str__(self):
        return '<ScalarMinObjective {!r}>'.format(self.descriptor_key)
Example #17
0
class JobStatusResponse(Resource['JobStatusResponse']):
    """[ALPHA] a response to a job status check.

    The JobStatusResponse summarizes the status for the entire job.

    Parameters
    ----------
    job_type: str
        the type of job for this status report
    status: str
        the actual status of the job.
        One of "Running", "Success", or "Failure".
    tasks: List[TaskNode]
        all of the constituent task required to complete this job
    output: Optional[Map[String,String]]
        job output properties and results

    """

    job_type = properties.String("job_type")
    status = properties.String("status")
    tasks = properties.List(Object(TaskNode), "tasks")
    output = properties.Optional(properties.Mapping(String, String), 'output')

    def __init__(
            self,
            job_type: str,
            status: str,
            tasks: List[TaskNode],
            output: Optional[Dict[str, str]]
    ):
        self.job_type = job_type
        self.status = status
        self.tasks = tasks
        self.output = output
Example #18
0
class RootInfo(Serializable['RootInfo'], Variable):
    """[ALPHA] Metadata from the root of the material history.

    Parameters
    ----------
    name: str
        a short human-readable name to use when referencing the variable
    headers: list[str]
        sequence of column headers
    field: str
        name of the field to assign the variable to

    """

    name = properties.String('name')
    headers = properties.List(properties.String, 'headers')
    field = properties.String('field')
    typ = properties.String('type', default="root_info", deserializable=False)

    def _attrs(self) -> List[str]:
        return ["name", "headers", "field", "typ"]

    def __init__(self, *,
                 name: str,
                 headers: List[str],
                 field: str):
        self.name = name
        self.headers = headers
        self.field = field
Example #19
0
class EnumeratedDimension(Serializable['EnumeratedDimension'], Dimension):
    """A finite, enumerated dimension.

    Parameters
    ----------
    descriptor: Descriptor
        a descriptor of the single dimension
    template_id: UUID
        UUID that corresponds to the template in DC
    values: list[str]
        list of values that can be parsed by the descriptor

    """

    descriptor = properties.Object(Descriptor, 'descriptor')
    values = properties.List(properties.String(), 'list')
    typ = properties.String('type', default='EnumeratedDimension', deserializable=False)
    template_id = properties.Optional(properties.UUID, 'template_id', default=uuid4())

    def __init__(self,
                 descriptor: Descriptor,
                 values: List[str],
                 template_id: Optional[UUID] = None):
        self.descriptor: Descriptor = descriptor
        self.values: List[str] = values
        self.template_id: Optional[UUID] = template_id
class CategoricalConstraint(Serializable['CategoricalConstraint'], Constraint):
    """
    [DEPRECATED] A constraint on a categorical material attribute in a set of acceptable values.

    Parameters
    ----------
    descriptor_key: str
        the key corresponding to the associated Categorical descriptor
    acceptable_categories: list[str]
        the names of the acceptable categories to constrain to

    """

    descriptor_key = properties.String('descriptor_key')
    acceptable_categories = properties.List(properties.String(), 'acceptable_classes')
    typ = properties.String('type', default='AcceptableCategoriesConstraint')

    def __init__(self,
                 descriptor_key: str,
                 acceptable_categories: List[str],
                 session: Optional[Session] = None):
        msg = "{this_class} is deprecated. Please use {replacement} instead.".format(
            this_class="CategoricalConstraint",
            replacement=AcceptableCategoriesConstraint.__name__)
        warn(msg, category=DeprecationWarning)
        self.descriptor_key = descriptor_key
        self.acceptable_categories = acceptable_categories
        self.session = session

    def __str__(self):
        return '<CategoricalConstraint {!r}>'.format(self.descriptor_key)
class DesignSpace(Module):
    """A Citrine Design Space describes the set of materials that can be made.

    Abstract type that returns the proper type given a serialized dict.

    """

    _project_id: Optional[UUID] = None
    _session: Optional[Session] = None

    uid = properties.Optional(properties.UUID, 'id', serializable=False)
    """:Optional[UUID]: Citrine Platform unique identifier"""
    name = properties.String('config.name')
    description = properties.Optional(properties.String(),
                                      'config.description')

    @classmethod
    def get_type(cls, data) -> Type[Serializable]:
        """Return the subtype."""
        from .data_source_design_space import DataSourceDesignSpace
        from .enumerated_design_space import EnumeratedDesignSpace
        from .formulation_design_space import FormulationDesignSpace
        from .product_design_space import ProductDesignSpace
        return {
            'Univariate': ProductDesignSpace,
            'ProductDesignSpace': ProductDesignSpace,
            'EnumeratedDesignSpace': EnumeratedDesignSpace,
            'FormulationDesignSpace': FormulationDesignSpace,
            'DataSourceDesignSpace': DataSourceDesignSpace
        }[data['config']['type']]
Example #22
0
class ExpressionPredictor(Resource['ExpressionPredictor'], Predictor,
                          AIResourceMetadata):
    """A predictor that computes an output from an expression and set of bounded inputs.

    For a discussion of expression syntax and a list of allowed symbols,
    please see the :ref:`documentation<Expression Predictor>`.

    .. seealso::
       If you are using the deprecated predictor please see
       :class:`~citrine.informatics.predictors.DeprecatedExpressionPredictor` for an example that
       shows how to migrate to the new format.

    Parameters
    ----------
    name: str
        name of the configuration
    description: str
        the description of the predictor
    expression: str
        expression that computes an output from aliased inputs
    output: RealDescriptor
        descriptor that represents the output of the expression
    aliases: Mapping[str, RealDescriptor]
        a mapping from each unknown argument to its descriptor.
        All unknown arguments must have an associated descriptor.

    """

    expression = _properties.String('config.expression')
    output = _properties.Object(RealDescriptor, 'config.output')
    aliases = _properties.Mapping(_properties.String,
                                  _properties.Object(RealDescriptor),
                                  'config.aliases')

    typ = _properties.String('config.type',
                             default='AnalyticExpression',
                             deserializable=False)
    module_type = _properties.String('module_type', default='PREDICTOR')

    def __init__(self,
                 name: str,
                 description: str,
                 expression: str,
                 output: RealDescriptor,
                 aliases: Mapping[str, RealDescriptor],
                 archived: bool = False):
        self.name: str = name
        self.description: str = description
        self.expression: str = expression
        self.output: RealDescriptor = output
        self.aliases: Mapping[str, RealDescriptor] = aliases
        self.archived: bool = archived

    def _post_dump(self, data: dict) -> dict:
        data['display_name'] = data['config']['name']
        return data

    def __str__(self):
        return '<ExpressionPredictor {!r}>'.format(self.name)
Example #23
0
class SimpleMixturePredictor(Resource['SimpleMixturePredictor'], Predictor,
                             AIResourceMetadata):
    """A predictor interface that flattens a formulation into a simple mixture.

    Parameters
    ----------
    name: str
        name of the configuration
    description: str
        description of the predictor
    input_descriptor: FormulationDescriptor
        input descriptor for the hierarchical (un-mixed) formulation
    output_descriptor: FormulationDescriptor
        output descriptor for the flat (mixed) formulation
    training_data: Optional[List[DataSource]]
        Sources of training data. Each can be either a CSV or an GEM Table. Candidates from
        multiple data sources will be combined into a flattened list and de-duplicated by uid and
        identifiers. De-duplication is performed if a uid or identifier is shared between two or
        more rows. The content of a de-duplicated row will contain the union of data across all
        rows that share the same uid or at least 1 identifier. Training data is unnecessary if the
        predictor is part of a graph that includes all training data required by this predictor.

    """

    _resource_type = ResourceTypeEnum.MODULE

    input_descriptor = _properties.Object(FormulationDescriptor,
                                          'config.input')
    output_descriptor = _properties.Object(FormulationDescriptor,
                                           'config.output')
    training_data = _properties.List(_properties.Object(DataSource),
                                     'config.training_data')

    typ = _properties.String('config.type',
                             default='SimpleMixture',
                             deserializable=False)
    module_type = _properties.String('module_type', default='PREDICTOR')

    def __init__(self,
                 name: str,
                 description: str,
                 input_descriptor: FormulationDescriptor,
                 output_descriptor: FormulationDescriptor,
                 training_data: Optional[List[DataSource]] = None,
                 archived: bool = False):
        self.name: str = name
        self.description: str = description
        self.input_descriptor: FormulationDescriptor = input_descriptor
        self.output_descriptor: FormulationDescriptor = output_descriptor
        self.training_data: List[DataSource] = self._wrap_training_data(
            training_data)
        self.archived: bool = archived

    def _post_dump(self, data: dict) -> dict:
        data['display_name'] = data['config']['name']
        return data

    def __str__(self):
        return '<SimpleMixturePredictor {!r}>'.format(self.name)
Example #24
0
class AttributeByTemplateAndObjectTemplate(
        Serializable['AttributeByTemplateAndObjectTemplate'], Variable):
    """[ALPHA] Attribute marked by an attribute template and an object template.

    For example, one property may be measured by two different measurement techniques.  In this
    case, that property would have the same attribute template.  Filtering by measurement
    templates, which identify the measurement techniques, disambiguates the technique used to
    measure that otherwise ambiguous property.

    Parameters
    ---------
    name: str
        a short human-readable name to use when referencing the variable
    headers: list[str]
        sequence of column headers
    attribute_template: LinkByUID
        attribute template that identifies the attribute to assign to the variable
    object_template: LinkByUID
        template that identifies the associated object
    attribute_constraints: list[(LinkByUID, Bounds)]
        constraints on object attributes in the target object that must be satisfied. Constraints
        are expressed as Bounds.  Attributes are expressed with links. The attribute that the
        variable is being set to may be the target of a constraint as well.
    type_selector: DataObjectTypeSelector
        strategy for selecting data object types to consider when matching, defaults to PREFER_RUN

    """

    name = properties.String('name')
    headers = properties.List(properties.String, 'headers')
    attribute_template = properties.Object(LinkByUID, 'attribute_template')
    object_template = properties.Object(LinkByUID, 'object_template')
    attribute_constraints = properties.Optional(
        properties.List(
            properties.SpecifiedMixedList(
                [properties.Object(LinkByUID), properties.Object(BaseBounds)]
            )
        ), 'attribute_constraints')
    type_selector = properties.Enumeration(DataObjectTypeSelector, "type_selector")
    typ = properties.String('type', default="attribute_by_object", deserializable=False)

    def _attrs(self) -> List[str]:
        return ["name", "headers", "attribute_template", "object_template",
                "attribute_constraints", "type_selector", "typ"]

    def __init__(self, *,
                 name: str,
                 headers: List[str],
                 attribute_template: LinkByUID,
                 object_template: LinkByUID,
                 attribute_constraints: List[List[Union[LinkByUID, BaseBounds]]] = None,
                 type_selector: DataObjectTypeSelector = DataObjectTypeSelector.PREFER_RUN):
        self.name = name
        self.headers = headers
        self.attribute_template = attribute_template
        self.object_template = object_template
        self.attribute_constraints = attribute_constraints
        self.type_selector = type_selector
Example #25
0
class DesignWorkflow(Resource['DesignWorkflow'], Workflow):
    """Object that generates scored materials that may approach higher values of the score.

    Parameters
    ----------
    name: str
        the name of the workflow
    design_space_id: UUID
        the UUID corresponding to the design space to use
    processor_id: UUID
        the UUID corresponding to the processor to use
    predictor_id: UUID
        the UUID corresponding to the predictor to use
    project_id: UUID
        the UUID corresponding to the project to use
    """

    uid = properties.Optional(properties.UUID, 'id', serializable=False)
    name = properties.String('display_name')
    status = properties.String('status', serializable=False)
    status_info = properties.Optional(properties.List(properties.String()),
                                      'status_info',
                                      serializable=False)

    # TODO: Figure out how to make these fields richer/use actual objects
    design_space_id = properties.UUID('modules.design_space_id')
    processor_id = properties.UUID('modules.processor_id')
    predictor_id = properties.UUID('modules.predictor_id')

    # The project_id is used to keep a reference to the project under which the workflow was
    # created. It is currently unclear if this is the best way to do this. Another option might
    # be to have all objects have a context object, but that also seems to have downsides.
    def __init__(self,
                 name: str,
                 design_space_id: UUID,
                 processor_id: UUID,
                 predictor_id: UUID,
                 project_id: Optional[UUID] = None,
                 session: Session = Session()):
        self.name = name
        self.design_space_id = design_space_id
        self.processor_id = processor_id
        self.predictor_id = predictor_id
        self.project_id = project_id
        self.session = session

    def __str__(self):
        return '<DesignWorkflow {!r}>'.format(self.name)

    @property
    def executions(self) -> WorkflowExecutionCollection:
        """Return a resource representing all visible executions of this workflow."""
        if getattr(self, 'project_id', None) is None:
            raise AttributeError(
                'Cannot initialize execution without project reference!')
        return WorkflowExecutionCollection(self.project_id, self.uid,
                                           self.session)
Example #26
0
class ModelSummary(Serializable['ModelSummary']):
    """[ALPHA] Summary of information about a single model in a predictor.

    ModelSummary objects are constructed from saved models and should not be user-instantiated.

    Parameters
    ----------
    name: str
        the name of the model
    type_: str
        the type of the model (e.g., "ML Model", "Featurizer", etc.)
    inputs: List[Descriptor]
        list of input descriptors
    outputs: List[Descriptor]
        list of output descriptors
    model_settings: dict
        settings of the model, as a dictionary (details depend on model type)
    feature_importances: List[FeatureImportanceReport]
        list of feature importance reports, one for each output
    predictor_name: str
        the name of the predictor that created this model
    predictor_uid: Optional[uuid]
        the uid of the predictor that created this model

    """

    name = properties.String('name')
    type_ = properties.String('type')
    inputs = properties.List(properties.String(), 'inputs')
    outputs = properties.List(properties.String(), 'outputs')
    model_settings = properties.Raw('model_settings')
    feature_importances = properties.List(
        properties.Object(FeatureImportanceReport), 'feature_importances')
    predictor_name = properties.String('predictor_configuration_name', default='')
    predictor_uid = properties.Optional(properties.UUID(), 'predictor_configuration_uid')

    def __init__(self,
                 name: str,
                 type_: str,
                 inputs: List[Descriptor],
                 outputs: List[Descriptor],
                 model_settings: Dict[str, Any],
                 feature_importances: List[FeatureImportanceReport],
                 predictor_name: str,
                 predictor_uid: Optional[UUID] = None):
        self.name = name
        self.type_ = type_
        self.inputs = inputs
        self.outputs = outputs
        self.model_settings = model_settings
        self.feature_importances = feature_importances
        self.predictor_name = predictor_name
        self.predictor_uid = predictor_uid

    def __str__(self):
        return '<ModelSummary {!r}>'.format(self.name)
class CoverageProbability(Serializable["CoverageProbability"],
                          PredictorEvaluationMetric):
    """Percentage of observations that fall within a given confidence interval.

    The coverage level can be specified to 3 digits, e.g., 0.123, but not 0.1234.

    Parameters
    ----------
    coverage_level: Union[str, float]
        Confidence-interval coverage level.
        The coverage level must be between 0 and 1.0 (non-inclusive) and will be rounded
        to 3 significant figures.  Default: 0.683 corresponds to one std. deviation

    """

    _level_str = properties.String("coverage_level")
    typ = properties.String("type",
                            default="CoverageProbability",
                            deserializable=False)

    def __init__(self, coverage_level: Union[str, float] = "0.683"):
        if isinstance(coverage_level, str):
            try:
                raw_float = float(coverage_level)
            except ValueError:
                raise ValueError(
                    "Invalid coverage level string '{requested_level}'. "
                    "Coverage level must represent a floating point number between "
                    "0 and 1 (non-inclusive).".format(
                        requested_level=coverage_level))
        elif isinstance(coverage_level, float):
            raw_float = coverage_level
        else:
            raise TypeError("Coverage level must be a string or float")

        if raw_float >= 1.0 or raw_float <= 0.0:
            raise ValueError(
                "Coverage level must be between 0 and 1 (non-inclusive).")
        _level_float = round(raw_float, 3)
        if not isclose(_level_float, raw_float):
            warn("Coverage level can only be specified to 3 decimal places."
                 "Requested level '{requested_level}' will be rounded "
                 "to {rounded_level}.".format(requested_level=coverage_level,
                                              rounded_level=_level_float))

        self._level_str = "{:5.3f}".format(_level_float)

    def _attrs(self) -> List[str]:
        return ["typ", "_level_str"]

    def __repr__(self):
        return "coverage_probability_{}".format(self._level_str)

    def __str__(self):
        return "Coverage Probability ({})".format(self._level_str)
Example #28
0
class AttributeByTemplate(Serializable['AttributeByTemplate'], Variable):
    """[ALPHA] Attribute marked by an attribute template.

    Parameters
    ----------
    name: str
        a short human-readable name to use when referencing the variable
    headers: list[str]
        sequence of column headers
    template: LinkByUID
        attribute template that identifies the attribute to assign to the variable
    attribute_constraints: list[list[LinkByUID, Bounds]]
        constraints on object attributes in the target object that must be satisfied. Constraints
        are expressed as Bounds.  Attributes are expressed with links. The attribute that the
        variable is being set to may be the target of a constraint as well.
    type_selector: DataObjectTypeSelector
        strategy for selecting data object types to consider when matching, defaults to PREFER_RUN

    """

    name = properties.String('name')
    headers = properties.List(properties.String, 'headers')
    template = properties.Object(LinkByUID, 'template')
    attribute_constraints = properties.Optional(
        properties.List(
            properties.SpecifiedMixedList(
                [properties.Object(LinkByUID),
                 properties.Object(BaseBounds)])), 'attribute_constraints')
    type_selector = properties.Enumeration(DataObjectTypeSelector,
                                           "type_selector")
    typ = properties.String('type',
                            default="attribute_by_template",
                            deserializable=False)

    def _attrs(self) -> List[str]:
        return [
            "name", "headers", "template", "attribute_constraints",
            "type_selector", "typ"
        ]

    def __init__(
        self,
        *,
        name: str,
        headers: List[str],
        template: LinkByUID,
        attribute_constraints: Optional[List[List[Union[LinkByUID,
                                                        BaseBounds]]]] = None,
        type_selector: DataObjectTypeSelector = DataObjectTypeSelector.
        PREFER_RUN):
        self.name = name
        self.headers = headers
        self.template = template
        self.attribute_constraints = attribute_constraints
        self.type_selector = type_selector
Example #29
0
class IngredientIdentifierByProcessTemplateAndName(
        Serializable['IngredientIdentifierByProcessAndName'], Variable):
    """[ALPHA] Ingredient identifier associated with a process template and a name.

    Parameters
    ---------
    name: str
        a short human-readable name to use when referencing the variable
    headers: list[str]
        sequence of column headers
    process_template: LinkByUID
        process template associated with this ingredient identifier
    ingredient_name: str
        name of ingredient
    scope: str
        scope of the identifier (default: the Citrine scope)
    type_selector: DataObjectTypeSelector
        strategy for selecting data object types to consider when matching, defaults to PREFER_RUN

    """

    name = properties.String('name')
    headers = properties.List(properties.String, 'headers')
    process_template = properties.Object(LinkByUID, 'process_template')
    ingredient_name = properties.String('ingredient_name')
    scope = properties.String('scope')
    type_selector = properties.Enumeration(DataObjectTypeSelector,
                                           "type_selector")
    typ = properties.String('type',
                            default="ing_id_by_process_and_name",
                            deserializable=False)

    def _attrs(self) -> List[str]:
        return [
            "name", "headers", "process_template", "ingredient_name", "scope",
            "type_selector", "typ"
        ]

    def __init__(
        self,
        *,
        name: str,
        headers: List[str],
        process_template: LinkByUID,
        ingredient_name: str,
        scope: str,
        type_selector: DataObjectTypeSelector = DataObjectTypeSelector.
        PREFER_RUN):
        self.name = name
        self.headers = headers
        self.process_template = process_template
        self.ingredient_name = ingredient_name
        self.scope = scope
        self.type_selector = type_selector
Example #30
0
class AIResourceMetadata():
    """Abstract class for representing common metadata for Resources."""

    created_by = properties.Optional(properties.UUID,
                                     'created_by',
                                     serializable=False)
    """:Optional[UUID]: id of the user who created the resource"""
    create_time = properties.Optional(properties.Datetime,
                                      'create_time',
                                      serializable=False)
    """:Optional[datetime]: date and time at which the resource was created"""

    updated_by = properties.Optional(properties.UUID,
                                     'updated_by',
                                     serializable=False)
    """:Optional[UUID]: id of the user who most recently updated the resource,
    if it has been updated"""
    update_time = properties.Optional(properties.Datetime,
                                      'update_time',
                                      serializable=False)
    """:Optional[datetime]: date and time at which the resource was most recently updated,
    if it has been updated"""

    archived = properties.Boolean('archived', default=False)
    """:bool: whether the resource is archived (hidden but not deleted)"""
    archived_by = properties.Optional(properties.UUID,
                                      'archived_by',
                                      serializable=False)
    """:Optional[UUID]: id of the user who archived the resource, if it has been archived"""
    archive_time = properties.Optional(properties.Datetime,
                                       'archive_time',
                                       serializable=False)
    """:Optional[datetime]: date and time at which the resource was archived,
    if it has been archived"""

    experimental = properties.Boolean("experimental",
                                      serializable=False,
                                      default=True)
    """:bool: whether the resource is experimental (newer, less well-tested functionality)"""
    experimental_reasons = properties.Optional(properties.List(
        properties.String()),
                                               'experimental_reasons',
                                               serializable=False)
    """:Optional[List[str]]: human-readable reasons why the resource is experimental"""

    status = properties.Optional(properties.String(),
                                 'status',
                                 serializable=False)
    """:Optional[str]: short description of the resource's status"""
    status_info = properties.Optional(properties.List(properties.String()),
                                      'status_info',
                                      serializable=False)
    """:Optional[List[str]]: human-readable explanations of the status"""