class CrossValidationAnalysisConfiguration(
        Serializable['CrossValidationAnalysisConfiguration']):
    """[DEPRECATED] Configuration settings for running cross-validation in a performance workflow.

    Parameters
    ----------
    name : str
        Name of the analysis configuration
    description: str
        Description of the analysis configuration
    n_folds: int
        Number of folds
    n_trials: int
        Number of cross-validation trials to run, each with ``n_folds`` folds
    max_rows: int
        Maximum number of training candidates to use during cross-validation
    seed: int, optional
        Seed used to generate random test/train splits.
        If not provided, a random seed is used.
    group_by_keys: List[str], optional
        Set of keys used to group candidates.
        If present, candidates are grouped by the hash of
        ``(key, value)`` pairs computed on the given keys.
        If not provided, candidates are not grouped.
    responses: List[str], optional
        Set of descriptor keys to cross-validate.
        All requested responses must be present as an output of the predictor being analyzed.
        If not provided cross-validation metrics will be computed for all predictor responses.
        These cross-validated responses are removed from the data during the analysis,
        so which responses are requested can affect the performance metrics if the predictor
        contains latent variables. For example, if only the final output (leaf) responses are
        requested, latent variables are not removed during cross-validation. In this case the
        actual (and not predicted) values for latent variables are fed into the models used
        to compute leaf responses. Often this will manifest as a lower model error for the
        final response, compared to the model error computed when latent variables are requested
        and hence removed from the data. Note, if no responses are specified all leaf and
        latent variables are removed from the data during cross-validation.

    """

    name = properties.String('name')
    description = properties.String('description')
    n_folds = properties.Integer('n_folds')
    n_trials = properties.Integer('n_trials')
    seed = properties.Optional(properties.Integer, 'seed')
    group_by_keys = properties.Optional(properties.List(properties.String),
                                        'group_by_keys')
    responses = properties.Optional(properties.List(properties.String),
                                    'responses')
    max_rows = properties.Integer('max_rows')
    typ = properties.String('type',
                            default='CrossValidationAnalysis',
                            deserializable=False)

    def __init__(
        self,
        name: str,
        description: str,
        n_folds: int,
        n_trials: int,
        max_rows: int,
        seed: Optional[int] = None,
        group_by_keys: Optional[List[str]] = None,
        responses: Optional[List[str]] = None,
    ):
        warn("{this_class} is deprecated. Please use {replacement} instead".
             format(this_class=self.__class__.name,
                    replacement=CrossValidationEvaluator.__name__))
        self.name = name
        self.description = description
        self.n_folds = n_folds
        self.n_trials = n_trials
        self.seed = seed
        self.group_by_keys = group_by_keys
        self.max_rows = max_rows
        self.responses = responses
Ejemplo n.º 2
0
class ChemicalFormulaFeaturizer(Resource['ChemicalFormulaFeaturizer'],
                                Predictor, AIResourceMetadata):
    """
    A featurizer for chemical formulae. Inspired by Magpie.

    The ChemicalFormulaFeaturizer computes a configurable set of features on chemical formula data.
    The features are functions of element-level properties, which are inspired by
    `Magpie <https://bitbucket.org/wolverton/magpie/src/master/>`_. The features are configured
    using the ``features`` and ``excludes`` arguments, which accept either feature names or
    predefined aliases. Many features are stoichiometrically weighted generalized means of
    element-level properties. How to compute the mean is configured using the ``powers`` argument.

    The default is the "standard" alias, corresponding to features that are intuitive and
    often correlate with properties of interest. Other aliases are "physical," "electronic,"
    and "periodicTable."

    The following features are weighted means of simple elemental properties.

    - "Pauling electronegativity": standard, electronic
    - "Number of d valence electrons": standard, electronic
    - "Number of unfilled f valence electrons": standard, electronic
    - "Number of f valence electrons": standard, electronic
    - "Number of unfilled p valence electrons": standard, electronic
    - "Number of p valence electrons": standard, electronic
    - "Number of unfilled s valence electrons": standard, electronic
    - "Number of s valence electrons": standard, electronic
    - "Total number of unfilled valence electrons": standard, electronic
    - "Total number of valence electrons": standard, electronic
    - "Elemental work function": standard, electronic
    - "Elemental polarizability": standard, electronic
    - "Radius of d orbitals": standard, electronic
    - "Radius of s orbitals": standard, electronic
    - "Radius of p orbitals": standard, electronic
    - "Elemental magnetic moment": standard, electronic
    - "Elemental atomic volume": standard, electronic, physical
    - "Elemental electron density": standard, electronic
    - "Mendeleev number": standard, periodicTable
    - "Row in periodic table": standard, periodicTable
    - "Elemental bulk modulus": standard, physical
    - "Elemental density": standard, physical
    - "Elemental melting temperature": standard, physical
    - "Elemental crystal structure (space group)": standard, electronic, physical
    - "AtomicVolume": electronic, physical
    - "Number": periodicTable
    - "CovalentRadius": electronic, physical
    - "DipolePolarizability": electronic
    - "ElectronAffinity": electronic
    - "FirstIonizationEnergy": electronic
    - "GSbandgap": electronic
    - "GSenergy_pa": electronic
    - "GSestBCClatcnt": electronic, physical
    - "GSvolume_pa": electronic, physical
    - "MiracleRadius": electronic, physical
    - "NdUnfilled": electronic
    - "ZungerPP-r_pi": electronic
    - "AtomicWeight": physical, periodicTable
    - "Column in periodic table": periodicTable
    - "IsAlkali": periodicTable
    - "IsDBlock": periodicTable
    - "IsFBlock": periodicTable
    - "IsMetal": periodicTable
    - "IsNonmetal": periodicTable
    - "BoilingT": physical
    - "FusionEnthalpy": physical
    - "HeatCapacityMass": physical
    - "HeatCapacityMolar": physical
    - "HeatFusion": physical
    - "ShearModulus": physical
    - "ValenceZeff": electronic, physical

    The following features are weighted means of more complex elemental properties.

    - "Packing density": standard, physical
    - "Liquid range": standard, physical
    - "Non-dimensional liquid range": standard, physical
    - "Liquid ratio": standard, physical
    - "Elastic Poisson Ratio": standard, physical
    - "DFT energy density": standard, electronic, physical
    - "Interatomic distance": standard, physical
    - "Ionization Affinity Ratio": standard, electronic
    - "Ratio of Electron Affinity to Electronegativity": standard, electronic
    - "Trouton's Ratio": standard, physical
    - "Miracle Ratio": standard, electronic
    - "DFT volume ratio": standard, physical
    - "Mulliken electronegativity": standard, electronic
    - "Modulii sum": standard, physical
    - "Zunger Pseudopotential radius ratio": standard, electronic
    - "BCC Efficiency": standard, physical
    - "Non-dimensional heat of fusion": standard, physical
    - "Non-dimensional band gap": standard, electronic
    - "Conduction ionization energy": standard, electronic
    - "Valence electron density": standard, electronic
    - "Non-dimensional work function": standard, electronic
    - "Shear Modulus Melting Temp Product": standard, physical

    The following features are not weighted means. Their values do not depend on ``powers``.

    - "Maximum electronegativity difference": standard, electronic
    - "Maximum radius difference": standard, electronic, physical
    - "Maximum radius ratio": standard, electronic, physical
    - "Min atomic radius plus max electronegativity difference": standard, electronic, physical
    - "Number of elements"
    - "Minimum atomic fraction"
    - "Maximum atomic fraction"
    - "Minimum weight fraction": standard, periodicTable
    - "Maximum weight fraction": standard, periodicTable
    - "Formula weight": standard, physical

    Parameters
    ----------
    input_descriptor: ChemicalFormulaDescriptor
        the descriptor to featurize
    features: Optional[List[str]]
        The list of features to compute, either by name or by group alias. Default is "standard."
    excludes: Optional[List[str]]
        The list of features to exclude, either by name or by group alias. Default is none.
        The final set of features generated by the predictor is set(features) - set(excludes).
    powers: Optional[List[int]]
        The list of powers to use when computing generalized weighted means of element properties.
        p=1 corresponds to the ordinary mean, p=2 is the root mean square, etc.

    """

    _resource_type = ResourceTypeEnum.MODULE

    input_descriptor = _properties.Object(Descriptor, 'config.input')
    features = _properties.List(_properties.String, 'config.features')
    excludes = _properties.List(_properties.String, 'config.excludes')
    powers = _properties.List(_properties.Integer, 'config.powers')

    typ = _properties.String('config.type',
                             default='ChemicalFormulaFeaturizer',
                             deserializable=False)
    module_type = _properties.String('module_type', default='PREDICTOR')

    def __init__(self,
                 name: str,
                 description: str,
                 input_descriptor: ChemicalFormulaDescriptor,
                 features: Optional[List[str]] = None,
                 excludes: Optional[List[str]] = None,
                 powers: Optional[List[int]] = None):
        self.name = name
        self.description = description
        self.input_descriptor = input_descriptor
        self.features = features if features is not None else ["standard"]
        self.excludes = excludes if excludes is not None else []
        self.powers = powers if powers is not None else [1]

    def _post_dump(self, data: dict) -> dict:
        data['display_name'] = data['config']['name']
        return data

    def __str__(self):
        return '<ChemicalFormulaFeaturizer {!r}>'.format(self.name)
Ejemplo n.º 3
0
def test_list_property_serde(sub_prop, sub_value, sub_serialized):
    prop = properties.List(sub_prop)
    value = [sub_value for _ in range(5)]
    serialized = [sub_serialized for _ in range(5)]
    assert prop.deserialize(serialized) == value
    assert prop.serialize(value) == serialized
Ejemplo n.º 4
0
class IngredientLabelsSetInOutput(Serializable['IngredientLabelsSetInOutput'],
                                  Variable):
    """[ALPHA] The set of labels on an ingredient in the trunk of a material history tree.

    The search for an ingredient starts at the terminal of the material history tree and proceeds
    until any of the given process templates are reached. Those templates block the search from
    continuing but are inclusive: a match is extracted if an ingredient with the specified
    ingredient name is found at or before a cutoff.

    This variable definition allows a set of labels to be extracted when an ingredient is used
    in multiple processes. As an example, consider a paint formed by mixing red and yellow
    pigments, where the red pigment is formed by mixing yellow and magenta. This variable could be
    used to represent the labels applied to yellow in both mixing processes (red and the final
    paint) in a single column provided the process templates that mixed red and the final paint
    are included as cutoffs.

    In general, this variable should be preferred over an
    :class:`~citrine.gemtables.variables.IngredientLabelSetByProcessTemplateAndName` when
    mixtures are hierarchical (i.e., blends of blends).
    It allows an ingredient with a single name to be used in
    multiple processes without defining additional variables that manifest as additional columns
    in your GEM table, and must be used in place of the former if the same process template is
    used to represent mixing at multiple levels in the material history hierarchy. Going back
    to the previous example, this variable must be used in place of an
    :class:`~citrine.gemtables.variables.IngredientLabelSetByProcessTemplateAndName` if the same
    process template was used to represent the process that mixed red and the final paint.
    Using :class:`~citrine.gemtables.variables.IngredientLabelSetByProcessTemplateAndName`
    would result in an ambiguous match because yellow would be found twice in the
    material history, once when mixing red and again when mixing the final paint.

    Parameters
    ---------
    name: str
        a short human-readable name to use when referencing the variable
    headers: list[str]
        sequence of column headers
    process_templates: list[LinkByUID]
        process templates that should not be traversed through when searching for a matching
        attribute.  The attribute may be present in these processes but not their ingredients.
    ingredient_name: str
        name of ingredient

    """

    name = properties.String('name')
    headers = properties.List(properties.String, 'headers')
    process_templates = properties.List(properties.Object(LinkByUID),
                                        'process_templates')
    ingredient_name = properties.String('ingredient_name')
    typ = properties.String('type',
                            default="ing_label_set_in_output",
                            deserializable=False)

    def _attrs(self) -> List[str]:
        return [
            "name", "headers", "process_templates", "ingredient_name", "typ"
        ]

    def __init__(self, *, name: str, headers: List[str],
                 process_templates: List[LinkByUID], ingredient_name: str):
        self.name = name
        self.headers = headers
        self.process_templates = process_templates
        self.ingredient_name = ingredient_name
Ejemplo n.º 5
0
class IngredientQuantityInOutput(Serializable['IngredientQuantityInOutput'],
                                 Variable):
    """[ALPHA] Ingredient quantity in the trunk of a material history tree.

    The search for an ingredient starts at the terminal of the material history tree and proceeds
    until any of the given process templates are reached. Those templates block the search from
    continuing but are inclusive: a match is extracted if an ingredient with the specified
    ingredient name is found at or before a cutoff.

    This variable definition allows a quantity to be extracted when an ingredient is used in
    multiple processes. As an example, consider a paint formed by mixing red and yellow pigments,
    where the red pigment is formed by mixing yellow and magenta. This variable could be used to
    represent the quantity of yellow in both mixing processes (red and the final paint) in a
    single column provided the process templates that mixed red and the final paint
    are included as cutoffs.

    In general, this variable should be preferred over an
    :class:`~citrine.gemtables.variables.IngredientQuantityByProcessTemplateAndName`
    when mixtures are hierarchical (i.e., blends of blends). It allows an ingredient with a
    single name to be used in multiple processes without defining additional variables
    that manifest as additional columns in your table, and must be used in place of the
    former if the same process template is used to represent mixing at multiple levels
    in the material history hierarchy.
    Going back to the previous example, this variable must be used in place of an
    :class:`~citrine.gemtables.variables.IngredientQuantityByProcessTemplateAndName` if the same
    process template was used to represent the process that mixed red and the final paint.
    Using :class:`~citrine.gemtables.variables.IngredientQuantityByProcessTemplateAndName`
    would result in an ambiguous match because yellow would be found twice in the material history,
    once when mixing red and again when mixing the final paint.

    Parameters
    ---------
    name: str
        a short human-readable name to use when referencing the variable
    headers: list[str]
        sequence of column headers
    ingredient_name: str
        Name of the ingredient to search for
    quantity_dimension: IngredientQuantityDimension
        Dimension of the ingredient quantity: absolute quantity, number, mass, or volume fraction.
        Valid options are defined by
        :class:`~citrine.gemtables.variables.IngredientQuantityDimension`
    process_templates: list[LinkByUID]
        Process templates halt the search for a matching ingredient name.
        These process templates are inclusive.
        The ingredient may be present in these processes but not before.
    type_selector: DataObjectTypeSelector
        strategy for selecting data object types to consider when matching, defaults to PREFER_RUN
    unit: str
        an optional unit: only ingredient quantities that are convertible to this unit will be
        matched. note that this parameter is mandatory when quantity_dimension is
        IngredientQuantityDimension.ABSOLUTE.

    """

    name = properties.String('name')
    headers = properties.List(properties.String, 'headers')
    ingredient_name = properties.String('ingredient_name')
    quantity_dimension = properties.Enumeration(IngredientQuantityDimension,
                                                'quantity_dimension')
    process_templates = properties.List(properties.Object(LinkByUID),
                                        'process_templates')
    type_selector = properties.Enumeration(DataObjectTypeSelector,
                                           "type_selector")
    unit = properties.Optional(properties.String, "unit")
    typ = properties.String('type',
                            default="ing_quantity_in_output",
                            deserializable=False)

    def _attrs(self) -> List[str]:
        return [
            "name", "headers", "ingredient_name", "process_templates",
            "type_selector", "unit", "typ"
        ]

    def __init__(
            self,
            *,
            name: str,
            headers: List[str],
            ingredient_name: str,
            quantity_dimension: IngredientQuantityDimension,
            process_templates: List[LinkByUID],
            type_selector: DataObjectTypeSelector = DataObjectTypeSelector.
        PREFER_RUN,
            unit: Optional[str] = None):
        self.name = name
        self.headers = headers
        self.ingredient_name = ingredient_name
        self.process_templates = process_templates
        self.type_selector = type_selector

        # Cast to make sure the string is valid
        if not isinstance(quantity_dimension, IngredientQuantityDimension):
            quantity_dimension = IngredientQuantityDimension.get_enum(
                quantity_dimension)
        self.quantity_dimension = quantity_dimension

        if quantity_dimension == IngredientQuantityDimension.ABSOLUTE:
            if unit is None:
                raise ValueError(
                    "Absolute Quantity variables require that 'unit' is set")
        else:
            if unit is not None and unit != "":
                raise ValueError("Fractional variables cannot take a 'unit'")
        self.unit = unit
Ejemplo n.º 6
0
class IngredientQuantityByProcessAndName(
        Serializable['IngredientQuantityByProcessAndName'], Variable):
    """[ALPHA] The quantity of an ingredient associated with a process template and a name.

    Parameters
    ---------
    name: str
        a short human-readable name to use when referencing the variable
    headers: list[str]
        sequence of column headers
    process_template: LinkByUID
        process template associated with this ingredient identifier
    ingredient_name: str
        name of ingredient
    quantity_dimension: IngredientQuantityDimension
        Dimension of the ingredient quantity: absolute quantity, number, mass, or volume fraction.
        Valid options are defined by
        :class:`~citrine.gemtables.variables.IngredientQuantityDimension`
    type_selector: DataObjectTypeSelector
        strategy for selecting data object types to consider when matching, defaults to PREFER_RUN
    unit: str
        An optional unit: only ingredient quantities that are convertible to this unit will be
        matched. Note that this parameter is mandatory when quantity_dimension is
        IngredientQuantityDimension.ABSOLUTE.

    """

    name = properties.String('name')
    headers = properties.List(properties.String, 'headers')
    process_template = properties.Object(LinkByUID, 'process_template')
    ingredient_name = properties.String('ingredient_name')
    quantity_dimension = properties.Enumeration(IngredientQuantityDimension,
                                                'quantity_dimension')
    type_selector = properties.Enumeration(DataObjectTypeSelector,
                                           "type_selector")
    typ = properties.String('type',
                            default="ing_quantity_by_process_and_name",
                            deserializable=False)
    unit = properties.Optional(properties.String, "unit")

    def _attrs(self) -> List[str]:
        return [
            "name", "headers", "process_template", "ingredient_name",
            "quantity_dimension", "type_selector", "typ"
        ]

    def __init__(
            self,
            *,
            name: str,
            headers: List[str],
            process_template: LinkByUID,
            ingredient_name: str,
            quantity_dimension: IngredientQuantityDimension,
            type_selector: DataObjectTypeSelector = DataObjectTypeSelector.
        PREFER_RUN,
            unit: Optional[str] = None):
        self.name = name
        self.headers = headers
        self.process_template = process_template
        self.ingredient_name = ingredient_name
        self.type_selector = type_selector

        # Cast to make sure the string is valid
        if not isinstance(quantity_dimension, IngredientQuantityDimension):
            quantity_dimension = IngredientQuantityDimension.get_enum(
                quantity_dimension)
        self.quantity_dimension = quantity_dimension

        if quantity_dimension == IngredientQuantityDimension.ABSOLUTE:
            if unit is None:
                raise ValueError(
                    "Absolute Quantity variables require that 'unit' is set")
        else:
            if unit is not None and unit != "":
                raise ValueError("Fractional variables cannot take a 'unit'")
        self.unit = unit
Ejemplo n.º 7
0
class AttributeInOutput(Serializable['AttributeInOutput'], Variable):
    """[ALPHA] Attribute marked by an attribute template in the trunk of the history tree.

    The search for an attribute that marks the given attribute template starts at the terminal
    of the material history tree and proceeds until any of the given process templates are reached.
    Those templates block the search from continuing into their ingredients but do not halt the
    search entirely. This variable definition allows attributes that are present both in output
    and the inputs of a process to be distinguished.

    For example, a material "paint" might be produced by mixing and then resting "pigments" and
    a "base".  The color of the pigments and base could be measured and recorded as attributes
    in addition to the color of the resulting paint. To define a variable as the color of the
    resulting paint, AttributeInOutput can be used with the mixing process included in the list
    of process templates. Then, when the platform looks for the color of a paint, it will find it
    but *won't* traverse through the mixing process and also find the colors of the pigments and
    base, which would result in an ambiguous variable match.

    Unlike "AttributeByTemplateAfterProcess", AttributeInOutput will also match on the color
    attribute of the pigments in the rows that correspond to those pigments. This way, all the
    colors can be assigned to the same variable and rendered into the same columns in the GEM
    table.

    Parameters
    ---------
    name: str
        a short human-readable name to use when referencing the variable
    headers: list[str]
        sequence of column headers
    attribute_template: LinkByUID
        attribute template that identifies the attribute to assign to the variable
    process_templates: list[LinkByUID]
        process templates that should not be traversed through when searching for a matching
        attribute.  The attribute may be present in these processes but not their ingredients.
    attribute_constraints: Optional[list[list[LinkByUID, Bounds]]]
        constraints on object attributes in the target object that must be satisfied. Constraints
        are expressed as Bounds.  Attributes are expressed with links. The attribute that the
        variable is being set to may be the target of a constraint as well.
    type_selector: DataObjectTypeSelector
        strategy for selecting data object types to consider when matching, defaults to PREFER_RUN

    """

    name = properties.String('name')
    headers = properties.List(properties.String, 'headers')
    attribute_template = properties.Object(LinkByUID, 'attribute_template')
    process_templates = properties.List(properties.Object(LinkByUID),
                                        'process_templates')
    attribute_constraints = properties.Optional(
        properties.List(
            properties.SpecifiedMixedList(
                [properties.Object(LinkByUID),
                 properties.Object(BaseBounds)])), 'attribute_constraints')
    type_selector = properties.Enumeration(DataObjectTypeSelector,
                                           "type_selector")
    typ = properties.String('type',
                            default="attribute_in_trunk",
                            deserializable=False)

    def _attrs(self) -> List[str]:
        return [
            "name", "headers", "attribute_template", "process_templates",
            "attribute_constraints", "type_selector", "typ"
        ]

    def __init__(
        self,
        *,
        name: str,
        headers: List[str],
        attribute_template: LinkByUID,
        process_templates: List[LinkByUID],
        attribute_constraints: Optional[List[List[Union[LinkByUID,
                                                        BaseBounds]]]] = None,
        type_selector: DataObjectTypeSelector = DataObjectTypeSelector.
        PREFER_RUN):
        self.name = name
        self.headers = headers
        self.attribute_template = attribute_template
        self.process_templates = process_templates
        self.attribute_constraints = attribute_constraints
        self.type_selector = type_selector
Ejemplo n.º 8
0
class AttributeByTemplateAndObjectTemplate(
        Serializable['AttributeByTemplateAndObjectTemplate'], Variable):
    """[ALPHA] Attribute marked by an attribute template and an object template.

    For example, one property may be measured by two different measurement techniques.  In this
    case, that property would have the same attribute template.  Filtering by measurement
    templates, which identify the measurement techniques, disambiguates the technique used to
    measure that otherwise ambiguous property.

    Parameters
    ---------
    name: str
        a short human-readable name to use when referencing the variable
    headers: list[str]
        sequence of column headers
    attribute_template: LinkByUID
        attribute template that identifies the attribute to assign to the variable
    object_template: LinkByUID
        template that identifies the associated object
    attribute_constraints: list[list[LinkByUID, Bounds]]
        constraints on object attributes in the target object that must be satisfied. Constraints
        are expressed as Bounds.  Attributes are expressed with links. The attribute that the
        variable is being set to may be the target of a constraint as well.
    type_selector: DataObjectTypeSelector
        strategy for selecting data object types to consider when matching, defaults to PREFER_RUN

    """

    name = properties.String('name')
    headers = properties.List(properties.String, 'headers')
    attribute_template = properties.Object(LinkByUID, 'attribute_template')
    object_template = properties.Object(LinkByUID, 'object_template')
    attribute_constraints = properties.Optional(
        properties.List(
            properties.SpecifiedMixedList(
                [properties.Object(LinkByUID),
                 properties.Object(BaseBounds)])), 'attribute_constraints')
    type_selector = properties.Enumeration(DataObjectTypeSelector,
                                           "type_selector")
    typ = properties.String('type',
                            default="attribute_by_object",
                            deserializable=False)

    def _attrs(self) -> List[str]:
        return [
            "name", "headers", "attribute_template", "object_template",
            "attribute_constraints", "type_selector", "typ"
        ]

    def __init__(
        self,
        *,
        name: str,
        headers: List[str],
        attribute_template: LinkByUID,
        object_template: LinkByUID,
        attribute_constraints: List[List[Union[LinkByUID, BaseBounds]]] = None,
        type_selector: DataObjectTypeSelector = DataObjectTypeSelector.
        PREFER_RUN):
        self.name = name
        self.headers = headers
        self.attribute_template = attribute_template
        self.object_template = object_template
        self.attribute_constraints = attribute_constraints
        self.type_selector = type_selector
Ejemplo n.º 9
0
class AttributeByTemplateAfterProcessTemplate(
        Serializable['AttributeByTemplateAfterProcessTemplate'], Variable):
    """[ALPHA] Attribute of an object marked by an attribute template and a parent process template.

    Parameters
    ---------
    name: str
        a short human-readable name to use when referencing the variable
    headers: list[str]
        sequence of column headers
    attribute_template: LinkByUID
        attribute template that identifies the attribute to assign to the variable
    process_template: LinkByUID
        process template that identifies the originating process
    attribute_constraints: list[list[LinkByUID, Bounds]]
        constraints on object attributes in the target object that must be satisfied. Constraints
        are expressed as Bounds.  Attributes are expressed with links. The attribute that the
        variable is being set to may be the target of a constraint as well.
    type_selector: DataObjectTypeSelector
        strategy for selecting data object types to consider when matching, defaults to PREFER_RUN

    """

    name = properties.String('name')
    headers = properties.List(properties.String, 'headers')
    attribute_template = properties.Object(LinkByUID, 'attribute_template')
    process_template = properties.Object(LinkByUID, 'process_template')
    attribute_constraints = properties.Optional(
        properties.List(
            properties.SpecifiedMixedList(
                [properties.Object(LinkByUID),
                 properties.Object(BaseBounds)])), 'attribute_constraints')
    type_selector = properties.Enumeration(DataObjectTypeSelector,
                                           "type_selector")
    typ = properties.String('type',
                            default="attribute_after_process",
                            deserializable=False)

    def _attrs(self) -> List[str]:
        return [
            "name", "headers", "attribute_template", "process_template",
            "attribute_constraints", "type_selector", "typ"
        ]

    def __init__(
        self,
        *,
        name: str,
        headers: List[str],
        attribute_template: LinkByUID,
        process_template: LinkByUID,
        attribute_constraints: Optional[List[List[Union[LinkByUID,
                                                        BaseBounds]]]] = None,
        type_selector: DataObjectTypeSelector = DataObjectTypeSelector.
        PREFER_RUN):
        self.name = name
        self.headers = headers
        self.attribute_template = attribute_template
        self.process_template = process_template
        self.attribute_constraints = attribute_constraints
        self.type_selector = type_selector
Ejemplo n.º 10
0
class DesignExecution(Resource['DesignExecution'], Pageable,
                      AsynchronousObject):
    """The execution of a DesignWorkflow.

    Possible statuses are INPROGRESS, SUCCEEDED, and FAILED.
    Design executions also have a ``status_description`` field with more information.

    Parameters
    ----------
    project_id: str
        Unique identifier of the project that contains the workflow execution

    """

    _paginator: Paginator = Paginator()
    _collection_key = 'response'

    uid: UUID = properties.UUID('id', serializable=False)
    """:UUID: Unique identifier of the workflow execution"""
    workflow_id = properties.UUID('workflow_id', serializable=False)
    """:UUID: Unique identifier of the workflow that was executed"""
    version_number = properties.Integer("version_number", serializable=False)
    """:int: Integer identifier that increases each time the workflow is executed. The first
    execution has version_number = 1."""

    status = properties.Optional(properties.String(),
                                 'status',
                                 serializable=False)
    """:Optional[str]: short description of the execution's status"""
    status_description = properties.Optional(properties.String(),
                                             'status_description',
                                             serializable=False)
    """:Optional[str]: more detailed description of the execution's status"""
    status_info = properties.Optional(properties.List(properties.String()),
                                      'status_info',
                                      serializable=False)
    """:Optional[List[str]]: human-readable explanations of the status"""
    experimental = properties.Boolean("experimental",
                                      serializable=False,
                                      default=True)
    """:bool: whether the execution is experimental (newer, less well-tested functionality)"""
    experimental_reasons = properties.Optional(properties.List(
        properties.String()),
                                               'experimental_reasons',
                                               serializable=False)
    """:Optional[List[str]]: human-readable reasons why the execution is experimental"""
    created_by = properties.Optional(properties.UUID,
                                     'created_by',
                                     serializable=False)
    """:Optional[UUID]: id of the user who created the resource"""
    updated_by = properties.Optional(properties.UUID,
                                     'updated_by',
                                     serializable=False)
    """:Optional[UUID]: id of the user who most recently updated the resource,
    if it has been updated"""
    create_time = properties.Optional(properties.Datetime,
                                      'create_time',
                                      serializable=False)
    """:Optional[datetime]: date and time at which the resource was created"""
    update_time = properties.Optional(properties.Datetime,
                                      'update_time',
                                      serializable=False)
    """:Optional[datetime]: date and time at which the resource was most recently updated,
    if it has been updated"""

    score = properties.Object(Score, 'score')
    """:Score: score by which this execution was evaluated"""
    descriptors = properties.List(properties.Object(Descriptor), 'descriptors')
    """:List[Descriptor]: all of the descriptors in the candidates generated by this execution"""
    def __init__(self):
        """This shouldn't be called, but it defines members that are set elsewhere."""
        self.project_id: Optional[UUID] = None  # pragma: no cover
        self.session: Optional[Session] = None  # pragma: no cover

    def __str__(self):
        return '<DesignExecution {!r}>'.format(str(self.uid))

    def _path(self):
        return '/projects/{project_id}/design-workflows/{workflow_id}/executions/{execution_id}' \
            .format(project_id=self.project_id,
                    workflow_id=self.workflow_id,
                    execution_id=self.uid)

    def in_progress(self) -> bool:
        """Whether design execution is in progress. Does not query state."""
        return self.status == "INPROGRESS"

    def succeeded(self) -> bool:
        """Whether design execution has completed successfully. Does not query state."""
        return self.status == "SUCCEEDED"

    def failed(self) -> bool:
        """Whether design execution has completed unsuccessfully. Does not query state."""
        return self.status == "FAILED"

    @classmethod
    def _build_candidates(
            cls,
            subset_collection: Iterable[dict]) -> Iterable[DesignCandidate]:
        for candidate in subset_collection:
            yield DesignCandidate.build(candidate)

    def candidates(
        self,
        page: Optional[int] = None,
        per_page: int = 100,
    ) -> Iterable[DesignCandidate]:
        """Fetch the Design Candidates for the particular execution, paginated."""
        path = self._path() + '/candidates'

        fetcher = partial(self._fetch_page, path=path)

        return self._paginator.paginate(
            page_fetcher=fetcher,
            collection_builder=self._build_candidates,
            page=page,
            per_page=per_page)
Ejemplo n.º 11
0
class TableConfig(Resource["TableConfig"]):
    """
    [ALPHA] The Table Configuration used to build GEM Tables.

    Parameters
    ----------
    name: str
        Name of the Table Configuration
    description: str
        Description of the Table Configuration
    datasets: list[UUID]
        Datasets that are in scope for the table, as a list of dataset uuids
    variables: list[Variable]
        Variable definitions, which define data from the material histories to use in the columns
    rows: list[Row]
        List of row definitions that define the rows of the table
    columns: list[Column]
        Column definitions, which describe how the variables are shaped into the table

    """

    # FIXME (DML): rename this (this is dependent on the server side)
    _response_key = "ara_definition"

    @staticmethod
    def _get_dups(lst: List) -> List:
        # Hmmn, this looks like a potentially costly operation?!
        return [x for x in lst if lst.count(x) > 1]

    config_uid = properties.Optional(properties.UUID(), 'definition_id')
    version_uid = properties.Optional(properties.UUID(), 'id')
    version_number = properties.Optional(properties.Integer, 'version_number')
    name = properties.String("name")
    description = properties.String("description")
    datasets = properties.List(properties.UUID, "datasets")
    variables = properties.List(properties.Object(Variable), "variables")
    rows = properties.List(properties.Object(Row), "rows")
    columns = properties.List(properties.Object(Column), "columns")

    # Provide some backwards compatible support for definition_uid, redirecting to config_uid
    @property
    def definition_uid(self):
        """[[DEPRECATED]] This is a deprecated alias to config_uid. Please use that instead."""
        from warnings import warn
        warn(
            "definition_uid is deprecated and will soon be removed. "
            "Please use config_uid instead", DeprecationWarning)
        return self.config_uid

    @definition_uid.setter
    def definition_uid(self, value):  # pragma: no cover
        """[[DEPRECATED]] This is a deprecated alias to config_uid. Please use that instead."""
        from warnings import warn
        warn(
            "definition_uid is deprecated and will soon be removed. "
            "Please use config_uid instead", DeprecationWarning)
        self.config_uid = value

    def __init__(self,
                 *,
                 name: str,
                 description: str,
                 datasets: List[UUID],
                 variables: List[Variable],
                 rows: List[Row],
                 columns: List[Column],
                 version_uid: Optional[UUID] = None,
                 version_number: Optional[int] = None,
                 definition_uid: Optional[UUID] = None,
                 config_uid: Optional[UUID] = None):
        self.name = name
        self.description = description
        self.datasets = datasets
        self.rows = rows
        self.variables = variables
        self.columns = columns
        self.version_uid = version_uid
        self.version_number = version_number

        if config_uid is not None:
            assert definition_uid is None, "Please supply config_uid " \
                                           "instead of definition_uid, and not both"
            self.config_uid = config_uid
        else:
            self.config_uid = definition_uid

        # Note that these validations only apply at construction time. The current intended usage
        # is for this object to be created holistically; if changed, then these will need
        # to move into setters.
        names = [x.name for x in variables]
        dup_names = self._get_dups(names)
        if len(dup_names) > 0:
            raise ValueError("Multiple variables defined these names,"
                             " which much be unique: {}".format(dup_names))
        headers = [x.headers for x in variables]
        dup_headers = self._get_dups(headers)
        if len(dup_headers) > 0:
            raise ValueError("Multiple variables defined these headers,"
                             " which much be unique: {}".format(dup_headers))

        missing_variables = [
            x.data_source for x in columns if x.data_source not in names
        ]
        if len(missing_variables) > 0:
            raise ValueError(
                "The data_source of the columns must match one of the variable names,"
                " but {} were missing".format(missing_variables))

    def add_columns(self,
                    *,
                    variable: Variable,
                    columns: List[Column],
                    name: Optional[str] = None,
                    description: Optional[str] = None) -> 'TableConfig':
        """[ALPHA] Add a variable and one or more columns to this TableConfig (out-of-place).

        This method checks that the variable name is not already in use and that the columns
        only reference that variable.  It is *not* able to check if the columns and the variable
        are compatible (yet, at least).

        Parameters
        ----------
        variable: Variable
            Variable to add and use in the added columns
        columns: list[Column]
            Columns to add, which must only reference the added variable
        name: Optional[str]
            Optional renaming of the table
        description: Optional[str]
            Optional re-description of the table

        """
        if variable.name in [x.name for x in self.variables]:
            raise ValueError("The variable name {} is already used".format(
                variable.name))

        mismatched_data_source = [
            x for x in columns if x.data_source != variable.name
        ]
        if len(mismatched_data_source):
            raise ValueError(
                "Column.data_source must be {} but found {}".format(
                    variable.name, mismatched_data_source))

        return TableConfig(name=name or self.name,
                           description=description or self.description,
                           datasets=copy(self.datasets),
                           rows=copy(self.rows),
                           variables=copy(self.variables) + [variable],
                           columns=copy(self.columns) + columns,
                           config_uid=copy(self.config_uid))

    def add_all_ingredients(self,
                            *,
                            process_template: LinkByUID,
                            project,
                            quantity_dimension: IngredientQuantityDimension,
                            scope: str = CITRINE_SCOPE):
        """[ALPHA] Add variables and columns for all of the possible ingredients in a process.

        For each allowed ingredient name in the process template there is a column for the if of
        the ingredient and a column for the quantity of the ingredient. If the quantities are
        given in absolute amounts then there is also a column for units.

        Parameters
        ------------
        process_template: LinkByUID
            scope and id of a registered process template
        project: Project
            a project that has access to the process template
        quantity_dimension: IngredientQuantityDimension
            the dimension in which to report ingredient quantities
        scope: Optional[str]
            the scope for which to get ingredient ids (default is Citrine scope, 'id')

        """
        dimension_display = {
            IngredientQuantityDimension.ABSOLUTE: "absolute quantity",
            IngredientQuantityDimension.MASS: "mass fraction",
            IngredientQuantityDimension.VOLUME: "volume fraction",
            IngredientQuantityDimension.NUMBER: "number fraction"
        }
        process: ProcessTemplate = project.process_templates.get(
            uid=process_template.id, scope=process_template.scope)
        if not process.allowed_names:
            raise RuntimeError(
                "Cannot add ingredients for process template \'{}\' because it has no defined "
                "ingredients (allowed_names is not defined).".format(
                    process.name))

        new_variables = []
        new_columns = []
        for name in process.allowed_names:
            identifier_variable = IngredientIdentifierByProcessTemplateAndName(
                name='_'.join([
                    process.name, name,
                    str(hash(process_template.id + name + scope))
                ]),
                headers=[process.name, name, scope],
                process_template=process_template,
                ingredient_name=name,
                scope=scope)
            quantity_variable = IngredientQuantityByProcessAndName(
                name='_'.join([
                    process.name, name,
                    str(
                        hash(process_template.id + name +
                             dimension_display[quantity_dimension]))
                ]),
                headers=[
                    process.name, name, dimension_display[quantity_dimension]
                ],
                process_template=process_template,
                ingredient_name=name,
                quantity_dimension=quantity_dimension)

            if identifier_variable.name not in [
                    var.name for var in self.variables
            ]:
                new_variables.append(identifier_variable)
                new_columns.append(
                    IdentityColumn(data_source=identifier_variable.name))
            new_variables.append(quantity_variable)
            new_columns.append(MeanColumn(data_source=quantity_variable.name))
            if quantity_dimension == IngredientQuantityDimension.ABSOLUTE:
                new_columns.append(
                    OriginalUnitsColumn(data_source=quantity_variable.name))

        return TableConfig(name=self.name,
                           description=self.description,
                           datasets=copy(self.datasets),
                           rows=copy(self.rows),
                           variables=copy(self.variables) + new_variables,
                           columns=copy(self.columns) + new_columns,
                           config_uid=copy(self.config_uid))
class AutoMLPredictor(Resource['AutoMLPredictor'], Predictor,
                      AIResourceMetadata):
    """[ALPHA] A predictor interface that builds a single ML model.

    The model uses the set of inputs to predict the output.
    Only one value for output is currently supported.
    Only one machine learning model is built.

    Parameters
    ----------
    name: str
        name of the configuration
    description: str
        the description of the predictor
    inputs: list[Descriptor]
        Descriptors that represent inputs to the model
    output: Descriptor
        A single Descriptor that represents the output of the model
    training_data: Optional[List[DataSource]]
        Sources of training data. Each can be either a CSV or an GEM Table. Candidates from
        multiple data sources will be combined into a flattened list and de-duplicated by uid and
        identifiers. De-duplication is performed if a uid or identifier is shared between two or
        more rows. The content of a de-duplicated row will contain the union of data across all
        rows that share the same uid or at least 1 identifier. Training data is unnecessary if the
        predictor is part of a graph that includes all training data required by this predictor.

    """

    _resource_type = ResourceTypeEnum.MODULE

    inputs = _properties.List(_properties.Object(Descriptor), 'config.inputs')
    output = _properties.Object(Descriptor, 'output')
    training_data = _properties.List(_properties.Object(DataSource),
                                     'config.training_data')

    typ = _properties.String('config.type',
                             default='AutoML',
                             deserializable=False)
    module_type = _properties.String('module_type', default='PREDICTOR')

    def __init__(self,
                 name: str,
                 description: str,
                 output: Descriptor,
                 inputs: List[Descriptor],
                 training_data: Optional[List[DataSource]] = None,
                 archived: bool = False):
        self.name: str = name
        self.description: str = description
        self.inputs: List[Descriptor] = inputs
        self.output: Descriptor = output
        self.training_data: List[DataSource] = self._wrap_training_data(
            training_data)
        self.archived: bool = archived

    def _post_dump(self, data: dict) -> dict:
        data['display_name'] = data['config']['name']
        data['config']['outputs'] = [data['output']]
        data['config']['responses'] = [data['output']]
        return data

    @classmethod
    def _pre_build(cls, data: dict) -> dict:
        if 'outputs' in data['config']:
            data['output'] = data['config']['outputs'][0]
        elif 'responses' in data['config']:
            data['output'] = data['config']['responses'][0]
        return data

    def __str__(self):
        return '<AutoMLPredictor {!r}>'.format(self.name)