class DesignSpace(Module):
    """A Citrine Design Space describes the set of materials that can be made.

    Abstract type that returns the proper type given a serialized dict.

    """

    _project_id: Optional[UUID] = None
    _session: Optional[Session] = None

    uid = properties.Optional(properties.UUID, 'id', serializable=False)
    """:Optional[UUID]: Citrine Platform unique identifier"""
    name = properties.String('config.name')
    description = properties.Optional(properties.String(),
                                      'config.description')

    @classmethod
    def get_type(cls, data) -> Type[Serializable]:
        """Return the subtype."""
        from .data_source_design_space import DataSourceDesignSpace
        from .enumerated_design_space import EnumeratedDesignSpace
        from .formulation_design_space import FormulationDesignSpace
        from .product_design_space import ProductDesignSpace
        return {
            'Univariate': ProductDesignSpace,
            'ProductDesignSpace': ProductDesignSpace,
            'EnumeratedDesignSpace': EnumeratedDesignSpace,
            'FormulationDesignSpace': FormulationDesignSpace,
            'DataSourceDesignSpace': DataSourceDesignSpace
        }[data['config']['type']]
Ejemplo n.º 2
0
class ScalarMinObjective(Serializable['ScalarMinObjective'], Objective):
    """Simple single-response minimization objective with optional bounds on the objective space.

    Parameters
    ----------
    descriptor_key: str
        the key from which to pull the values
    lower_bound: float
        the lower bound on the space, e.g. 0 for a non-negative property
    upper_bound: float
        the upper bound on the space, e.g. 0 for a non-positive property
    """

    descriptor_key = properties.String('descriptor_key')
    lower_bound = properties.Optional(properties.Float, 'lower_bound')
    upper_bound = properties.Optional(properties.Float, 'upper_bound')
    typ = properties.String('type', default='ScalarMin')

    def __init__(self,
                 descriptor_key: str,
                 lower_bound: Optional[float] = None,
                 upper_bound: Optional[float] = None,
                 session: Optional[Session] = None):
        self.descriptor_key = descriptor_key
        self.lower_bound = lower_bound
        self.upper_bound = upper_bound
        self.session: Optional[Session] = session

    def __str__(self):
        return '<ScalarMinObjective {!r}>'.format(self.descriptor_key)
Ejemplo n.º 3
0
class EnumeratedDesignSpace(Resource['EnumeratedDesignSpace'], DesignSpace):
    """Design space composed of an explicit enumeration of candidate materials to score. Note that every candidate must have exactly the descriptors in the list populated (no more, no less) to be included. 

    Parameters
    ----------
    name:str
        the name of the design space
    description:str
        the description of the design space
    descriptors: list[Descriptor]
        the list of descriptors included in the candidates of the design space
    data: list[dict]
        list of dicts of the shape `{<descriptor_key>: <descriptor_value>}` where each dict corresponds to a candidate
        in the design space
    """

    _response_key = None

    uid = properties.Optional(properties.UUID, 'id', serializable=False)
    name = properties.String('config.name')
    description = properties.Optional(properties.String(),
                                      'config.description')
    descriptors = properties.List(properties.Object(Descriptor),
                                  'config.descriptors')
    data = properties.List(
        properties.Mapping(properties.String, properties.Raw), 'config.data')

    typ = properties.String('config.type',
                            default='EnumeratedDesignSpace',
                            deserializable=False)
    status = properties.String('status', serializable=False)
    status_info = properties.Optional(properties.List(properties.String()),
                                      'status_info',
                                      serializable=False)

    # NOTE: These could go here or in _post_dump - it's unclear which is better right now
    module_type = properties.String('module_type', default='DESIGN_SPACE')
    schema_id = properties.UUID(
        'schema_id', default=UUID('f3907a58-aa46-462c-8837-a5aa9605e79e'))

    def __init__(self,
                 name: str,
                 description: str,
                 descriptors: List[Descriptor],
                 data: List[Mapping[str, Any]],
                 session: Session = Session()):
        self.name: str = name
        self.description: str = description
        self.descriptors: List[Descriptor] = descriptors
        self.data: List[Mapping[str, Any]] = data
        self.session: Session = session

    def _post_dump(self, data: dict) -> dict:
        data['display_name'] = data['config']['name']
        return data

    def __str__(self):
        return '<EnumeratedDesignSpace {!r}>'.format(self.name)
Ejemplo n.º 4
0
class DesignWorkflow(Resource['DesignWorkflow'], Workflow):
    """Object that generates scored materials that may approach higher values of the score.

    Parameters
    ----------
    name: str
        the name of the workflow
    design_space_id: UUID
        the UUID corresponding to the design space to use
    processor_id: UUID
        the UUID corresponding to the processor to use
    predictor_id: UUID
        the UUID corresponding to the predictor to use
    project_id: UUID
        the UUID corresponding to the project to use
    """

    uid = properties.Optional(properties.UUID, 'id', serializable=False)
    name = properties.String('display_name')
    status = properties.String('status', serializable=False)
    status_info = properties.Optional(properties.List(properties.String()),
                                      'status_info',
                                      serializable=False)

    # TODO: Figure out how to make these fields richer/use actual objects
    design_space_id = properties.UUID('modules.design_space_id')
    processor_id = properties.UUID('modules.processor_id')
    predictor_id = properties.UUID('modules.predictor_id')

    # The project_id is used to keep a reference to the project under which the workflow was
    # created. It is currently unclear if this is the best way to do this. Another option might
    # be to have all objects have a context object, but that also seems to have downsides.
    def __init__(self,
                 name: str,
                 design_space_id: UUID,
                 processor_id: UUID,
                 predictor_id: UUID,
                 project_id: Optional[UUID] = None,
                 session: Session = Session()):
        self.name = name
        self.design_space_id = design_space_id
        self.processor_id = processor_id
        self.predictor_id = predictor_id
        self.project_id = project_id
        self.session = session

    def __str__(self):
        return '<DesignWorkflow {!r}>'.format(self.name)

    @property
    def executions(self) -> WorkflowExecutionCollection:
        """Return a resource representing all visible executions of this workflow."""
        if getattr(self, 'project_id', None) is None:
            raise AttributeError(
                'Cannot initialize execution without project reference!')
        return WorkflowExecutionCollection(self.project_id, self.uid,
                                           self.session)
Ejemplo n.º 5
0
class MonteCarloProcessor(Serializable['GridProcessor'], Processor):
    """[ALPHA] Using a Monte Carlo optimizer to search for the best candidate.

    The moves that the MonteCarlo optimizer makes are inferred from the descriptors in the
    design space.

    Parameters
    ----------
    name: str
        name of the processor
    description: str
        description of the processor

    """

    uid = properties.Optional(properties.UUID, 'id', serializable=False)
    name = properties.String('config.name')
    description = properties.Optional(properties.String(),
                                      'config.description')
    typ = properties.String('config.type',
                            default='ContinuousSearch',
                            deserializable=False)
    status = properties.String('status', serializable=False)
    status_info = properties.Optional(properties.List(properties.String()),
                                      'status_info',
                                      serializable=False)
    experimental = properties.Boolean("experimental",
                                      serializable=False,
                                      default=True)
    experimental_reasons = properties.Optional(properties.List(
        properties.String()),
                                               'experimental_reasons',
                                               serializable=False)

    # NOTE: These could go here or in _post_dump - it's unclear which is better right now
    module_type = properties.String('module_type', default='PROCESSOR')
    schema_id = properties.UUID(
        'schema_id', default=UUID('d8ddfe73-10f7-4456-9de9-9a1638bae403'))

    def _attrs(self) -> List[str]:
        return ["name", "description", "typ"]

    def __init__(self,
                 name: str,
                 description: str,
                 session: Optional[Session] = None):
        self.name: str = name
        self.description: str = description
        self.session: Optional[Session] = session

    def _post_dump(self, data: dict) -> dict:
        data['display_name'] = data['config']['name']
        return data

    def __str__(self):
        return '<MonteCarloProcessor {!r}>'.format(self.name)
Ejemplo n.º 6
0
class ProductDesignSpace(Resource['ProductDesignSpace'], DesignSpace):
    """[ALPHA] An outer product of univariate dimensions, either continuous or enumerated.

    Parameters
    ----------
    name:str
        the name of the design space
    description:str
        the description of the design space
    dimensions: list[Dimension]
        univariate dimensions that are factors of the design space; can be enumerated or continuous

    """

    _response_key = None

    uid = properties.Optional(properties.UUID, 'id', serializable=False)
    name = properties.String('config.name')
    description = properties.Optional(properties.String(), 'config.description')
    dimensions = properties.List(properties.Object(Dimension), 'config.dimensions')
    typ = properties.String('config.type', default='Univariate', deserializable=False)
    status = properties.String('status', serializable=False)
    status_info = properties.Optional(
        properties.List(properties.String()),
        'status_info',
        serializable=False
    )
    archived = properties.Boolean('archived', default=False)
    experimental = properties.Boolean("experimental", serializable=False, default=True)
    experimental_reasons = properties.Optional(
        properties.List(properties.String()),
        'experimental_reasons',
        serializable=False
    )

    # NOTE: These could go here or in _post_dump - it's unclear which is better right now
    module_type = properties.String('module_type', default='DESIGN_SPACE')
    schema_id = properties.UUID('schema_id', default=UUID('6c16d694-d015-42a7-b462-8ef299473c9a'))

    def __init__(self,
                 name: str,
                 description: str,
                 dimensions: List[Dimension],
                 session: Session = Session()):
        self.name: str = name
        self.description: str = description
        self.dimensions: List[Dimension] = dimensions
        self.session: Session = session

    def _post_dump(self, data: dict) -> dict:
        data['display_name'] = data['config']['name']
        return data

    def __str__(self):
        return '<ProductDesignSpace {!r}>'.format(self.name)
Ejemplo n.º 7
0
class ModelSummary(Serializable['ModelSummary']):
    """[ALPHA] Summary of information about a single model in a predictor.

    ModelSummary objects are constructed from saved models and should not be user-instantiated.
    """

    name = properties.String('name')
    """:str: the name of the model"""
    type_ = properties.String('type')
    """:str: the type of the model (e.g., "ML Model", "Featurizer", etc.)"""
    inputs = properties.List(
        properties.Union([properties.Object(Descriptor),
                          properties.String()]), 'inputs')
    """:List[Descriptor]: list of input descriptors"""
    outputs = properties.List(
        properties.Union([properties.Object(Descriptor),
                          properties.String()]), 'outputs')
    """:List[Descriptor]: list of output descriptors"""
    model_settings = properties.Raw('model_settings')
    """:dict: model settings, as a dictionary (keys depend on the model type)"""
    feature_importances = properties.List(
        properties.Object(FeatureImportanceReport), 'feature_importances')
    """:List[FeatureImportanceReport]: feature importance reports for each output"""
    predictor_name = properties.String('predictor_configuration_name',
                                       default='')
    """:str: the name of the predictor that created this model"""
    predictor_uid = properties.Optional(properties.UUID(),
                                        'predictor_configuration_uid')
    """:Optional[UUID]: the unique Citrine id of the predictor that created this model"""
    training_data_count = properties.Optional(properties.Integer,
                                              "training_data_count")
    """:int: Number of rows in the training data for the model, if applicable."""
    def __init__(self,
                 name: str,
                 type_: str,
                 inputs: List[Descriptor],
                 outputs: List[Descriptor],
                 model_settings: Dict[str, Any],
                 feature_importances: List[FeatureImportanceReport],
                 predictor_name: str,
                 predictor_uid: Optional[UUID] = None):
        self.name = name
        self.type_ = type_
        self.inputs = inputs
        self.outputs = outputs
        self.model_settings = model_settings
        self.feature_importances = feature_importances
        self.predictor_name = predictor_name
        self.predictor_uid = predictor_uid

    def __str__(self):
        return '<ModelSummary {!r}>'.format(self.name)
Ejemplo n.º 8
0
class GridProcessor(Serializable['GridProcessor'], Processor):
    """Generates a finite set of materials from the domain defined by the design space, then scans over the set of
    materials. To create a finite set of materials from continuous dimensions, a uniform grid is created between the
    bounds of the descriptor. The number of points is specified by `grid_sizes`.

    Parameters
    ----------
    name: str
        name of the processor
    description: str
        description of the processor
    grid_sizes: dict[str, int]
        the number of points to select along each dimension of the grid, by dimension name
    """

    uid = properties.Optional(properties.UUID, 'id', serializable=False)
    name = properties.String('config.name')
    description = properties.Optional(properties.String(),
                                      'config.description')
    typ = properties.String('config.type',
                            default='Grid',
                            deserializable=False)
    grid_sizes = properties.Mapping(properties.String, properties.Integer,
                                    'config.grid_dimensions')
    status = properties.String('status', serializable=False)
    status_info = properties.Optional(properties.List(properties.String()),
                                      'status_info',
                                      serializable=False)

    # NOTE: These could go here or in _post_dump - it's unclear which is better right now
    module_type = properties.String('module_type', default='PROCESSOR')
    schema_id = properties.UUID(
        'schema_id', default=UUID('272791a5-5468-4344-ac9f-2811d9266a4d'))

    def __init__(self,
                 name: str,
                 description: str,
                 grid_sizes: Mapping[str, int],
                 session: Optional[Session] = None):
        self.name: str = name
        self.description: str = description
        self.grid_sizes: Mapping[str, int] = grid_sizes
        self.session: Optional[Session] = session

    def _post_dump(self, data: dict) -> dict:
        data['display_name'] = data['config']['name']
        return data

    def __str__(self):
        return '<GridProcessor {!r}>'.format(self.name)
Ejemplo n.º 9
0
class AuditInfo(Serializable, DictSerializable):
    """
    Model that holds audit metadata. AuditInfo objects should not be created by the user.

    Parameters
    ----------
    created_by: Optional[UUID]
        ID of the user who created the object
    created_at: Optional[Datetime]
        Time, in ms since epoch, at which the object was created
    updated_by: Optional[UUID]
        ID of the user who most recently updated the object
    updated_at: Optional[Datetime]
        Time, in ms since epoch, at which the object was most recently updated

    """

    created_by = properties.Optional(properties.UUID, 'created_by')
    created_at = properties.Optional(properties.Datetime, 'created_at')
    updated_by = properties.Optional(properties.UUID, 'updated_by')
    updated_at = properties.Optional(properties.Datetime, 'updated_at')

    def __init__(self, created_by: Optional[UUID], created_at: Optional[datetime],
                 updated_by: Optional[UUID] = None, updated_at: Optional[datetime] = None):
        self.created_by = created_by
        self.created_at = created_at
        self.updated_by = updated_by
        self.updated_at = updated_at

    def __repr__(self):
        return 'Created by: {!r}\nCreated at: {!r}\nUpdated by: {!r}\nUpdated at: {!r}'.format(
            self.created_by, self.created_at, self.updated_by, self.updated_at
        )

    def __str__(self):
        create_str = 'Created by user {} at time {}'.format(
            self.created_by, self.created_at)
        if self.updated_by is not None or self.updated_at is not None:
            update_str = '\nUpdated by user {} at time {}'.format(
                self.updated_by, self.updated_at)
        else:
            update_str = ''
        return create_str + update_str

    def __eq__(self, other):
        return self.__repr__() == other.__repr__()

    def as_dict(self):
        """Return the object as a dictionary."""
        return self.dump()
Ejemplo n.º 10
0
class EnumeratedProcessor(Serializable['EnumeratedProcessor'], Processor):
    """Process a design space by enumerating up to `max_size` materials from the domain and processing each
    independently.

    Parameters
    ----------
    name: str
        name of the processor
    description: str
        description of the processor
    max_size: int
        maximum number of samples that can be enumerated over
    """

    uid = properties.Optional(properties.UUID, 'id', serializable=False)
    name = properties.String('config.name')
    description = properties.Optional(properties.String(),
                                      'config.description')
    max_size = properties.Integer('config.max_size')
    typ = properties.String('config.type',
                            default='Enumerated',
                            deserializable=False)
    status = properties.String('status', serializable=False)
    status_info = properties.Optional(properties.List(properties.String()),
                                      'status_info',
                                      serializable=False)

    # NOTE: These could go here or in _post_dump - it's unclear which is better right now
    module_type = properties.String('module_type', default='PROCESSOR')
    schema_id = properties.UUID(
        'schema_id', default=UUID('307b88a2-fd50-4d27-ae91-b8d6282f68f7'))

    def __init__(self,
                 name: str,
                 description: str,
                 max_size: Optional[int] = None,
                 session: Optional[Session] = None):
        self.name: str = name
        self.description: str = description
        self.max_size: int = max_size or 2**31 - 1  # = 2147483647 (max 32-bit integer)
        self.session: Optional[Session] = session

    def _post_dump(self, data: dict) -> dict:
        data['display_name'] = data['config']['name']
        return data

    def __str__(self):
        return '<EnumeratedProcessor {!r}>'.format(self.name)
Ejemplo n.º 11
0
class LabelFractionConstraint(Serializable['LabelFractionConstraint'],
                              Constraint):
    """Represents a constraint on the total amount of ingredients with a given label.

    Parameters
    ----------
    formulation_descriptor: FormulationDescriptor
        descriptor to constrain
    label: str
        ingredient label to constrain
    min: float
        minimum value
    max: float
        maximum value
    is_required: bool, optional
        whether this ingredient is required.
        If ``True``, the label must be present and its value must be within the
        specified range. if ``False``, the label must be within the specified range only if
        it's present in the formulation, i.e., the value can be 0 or on the range ``[min, max]``.

    """

    formulation_descriptor = properties.Object(FormulationDescriptor,
                                               'formulation_descriptor')
    label = properties.String('label')
    min = properties.Optional(properties.Float, 'min')
    max = properties.Optional(properties.Float, 'max')
    is_required = properties.Boolean('is_required')
    typ = properties.String('type', default='LabelFractionConstraint')

    def __init__(self,
                 *,
                 formulation_descriptor: FormulationDescriptor,
                 label: str,
                 min: float,
                 max: float,
                 is_required: bool = True,
                 session: Optional[Session] = None):
        self.formulation_descriptor: FormulationDescriptor = formulation_descriptor
        self.label: str = label
        self.min: float = min
        self.max: float = max
        self.is_required: bool = is_required
        self.session: Optional[Session] = session

    def __str__(self):
        return '<LabelFractionConstraint {!r}::{!r}>'.format(
            self.formulation_descriptor.key, self.label)
Ejemplo n.º 12
0
class AIResourceMetadata():
    """Abstract class for representing common metadata for Resources."""

    created_by = properties.Optional(properties.UUID,
                                     'created_by',
                                     serializable=False)
    """:Optional[UUID]: id of the user who created the resource"""
    create_time = properties.Optional(properties.Datetime,
                                      'create_time',
                                      serializable=False)
    """:Optional[datetime]: date and time at which the resource was created"""

    updated_by = properties.Optional(properties.UUID,
                                     'updated_by',
                                     serializable=False)
    """:Optional[UUID]: id of the user who most recently updated the resource,
    if it has been updated"""
    update_time = properties.Optional(properties.Datetime,
                                      'update_time',
                                      serializable=False)
    """:Optional[datetime]: date and time at which the resource was most recently updated,
    if it has been updated"""

    archived = properties.Boolean('archived', default=False)
    """:bool: whether the resource is archived (hidden but not deleted)"""
    archived_by = properties.Optional(properties.UUID,
                                      'archived_by',
                                      serializable=False)
    """:Optional[UUID]: id of the user who archived the resource, if it has been archived"""
    archive_time = properties.Optional(properties.Datetime,
                                       'archive_time',
                                       serializable=False)
    """:Optional[datetime]: date and time at which the resource was archived,
    if it has been archived"""

    experimental = properties.Boolean("experimental",
                                      serializable=False,
                                      default=True)
    """:bool: whether the resource is experimental (newer, less well-tested functionality)"""
    experimental_reasons = properties.Optional(properties.List(
        properties.String()),
                                               'experimental_reasons',
                                               serializable=False)
    """:Optional[List[str]]: human-readable reasons why the resource is experimental"""

    status = properties.Optional(properties.String(),
                                 'status',
                                 serializable=False)
    """:Optional[str]: short description of the resource's status"""
    status_info = properties.Optional(properties.List(properties.String()),
                                      'status_info',
                                      serializable=False)
    """:Optional[List[str]]: human-readable explanations of the status"""
Ejemplo n.º 13
0
class JobStatusResponse(Resource['JobStatusResponse']):
    """[ALPHA] a response to a job status check.

    The JobStatusResponse summarizes the status for the entire job.

    Parameters
    ----------
    job_type: str
        the type of job for this status report
    status: str
        the actual status of the job.
        One of "Running", "Success", or "Failure".
    tasks: List[TaskNode]
        all of the constituent task required to complete this job
    output: Optional[Map[String,String]]
        job output properties and results

    """

    job_type = properties.String("job_type")
    status = properties.String("status")
    tasks = properties.List(Object(TaskNode), "tasks")
    output = properties.Optional(properties.Mapping(String, String), 'output')

    def __init__(
            self,
            job_type: str,
            status: str,
            tasks: List[TaskNode],
            output: Optional[Dict[str, str]]
    ):
        self.job_type = job_type
        self.status = status
        self.tasks = tasks
        self.output = output
Ejemplo n.º 14
0
class CSVDataSource(Serializable['CSVDataSource'], DataSource):
    """A data source based on a CSV file stored on the data platform.

    Parameters
    ----------
    file_link: FileLink
        link to the CSV file to read the data from
    column_definitions: Mapping[str, Descriptor]
        Map the column headers to the descriptors that will be used to interpret the cell contents
    identifiers: Optional[List[str]]
        List of one or more column headers whose values uniquely identify a row. These may overlap
        with ``column_definitions`` if a column should be used as data and as an identifier,
        but this is not necessary. Identifiers must be unique within a dataset. No two rows can
        contain the same value.

    """

    typ = properties.String('type', default='csv_data_source', deserializable=False)
    file_link = properties.Object(FileLink, "file_link")
    column_definitions = properties.Mapping(
        properties.String, properties.Object(Descriptor), "column_definitions")
    identifiers = properties.Optional(properties.List(properties.String), "identifiers")

    def _attrs(self) -> List[str]:
        return ["file_link", "column_definitions", "identifiers", "typ"]

    def __init__(self,
                 file_link: FileLink,
                 column_definitions: Mapping[str, Descriptor],
                 identifiers: Optional[List[str]] = None):
        self.file_link = file_link
        self.column_definitions = column_definitions
        self.identifiers = identifiers
Ejemplo n.º 15
0
class MeanColumn(Serializable['MeanColumn'], Column):
    """[ALPHA] Column containing the mean of a real-valued variable.

    Parameters
    ----------
    data_source: str
        name of the variable to use when populating the column
    target_units: Optional[str]
        units to convert the real variable into

    """

    data_source = properties.String('data_source')
    target_units = properties.Optional(properties.String, "target_units")
    typ = properties.String('type',
                            default="mean_column",
                            deserializable=False)

    def _attrs(self) -> List[str]:
        return ["data_source", "target_units", "typ"]

    def __init__(self,
                 *,
                 data_source: str,
                 target_units: Optional[str] = None):
        self.data_source = data_source
        self.target_units = target_units
Ejemplo n.º 16
0
class GemTableDataSource(Serializable['GemTableDataSource'], DataSource):
    """[ALPHA] A data source based on a GEM Table hosted on the data platform.

    Parameters
    ----------
    table_id: UUID
        Unique identifier for the GEM Table
    table_version: Union[str,int]
        Version number for the GEM Table, which starts at 1 rather than 0.
        Strings are cast to ints.
    formulation_descriptor: Optional[FormulationDescriptor]
        Optional descriptor used to store formulations emitted by the data source.

    """

    typ = properties.String('type',
                            default='hosted_table_data_source',
                            deserializable=False)
    table_id = properties.UUID("table_id")
    table_version = properties.Integer("table_version")
    formulation_descriptor = properties.Optional(
        properties.Object(FormulationDescriptor), "formulation_descriptor")

    def _attrs(self) -> List[str]:
        return ["table_id", "table_version", "typ"]

    def __init__(
            self,
            table_id: UUID,
            table_version: Union[int, str],
            formulation_descriptor: Optional[FormulationDescriptor] = None):
        self.table_id: UUID = table_id
        self.table_version: Union[int, str] = table_version
        self.formulation_descriptor: Optional[
            FormulationDescriptor] = formulation_descriptor
Ejemplo n.º 17
0
class NthBiggestComponentQuantityColumn(Serializable["NthBiggestComponentQuantityColumn"], Column):
    """[ALPHA] Quantity of the Nth biggest component.

    If there are not N components in the composition, then this column will be empty.

    Parameters
    ----------
    data_source: str
        name of the variable to use when populating the column
    n: int
        index of the component quantity to extract, starting with 1 for the biggest
    normalize: Optional[bool]
        whether to normalize the quantity by the sum of all component amounts. Default is false

    """

    data_source = properties.String('data_source')
    n = properties.Integer("n")
    normalize = properties.Optional(properties.Boolean, "normalize")
    typ = properties.String('type',
                            default="biggest_component_quantity_column", deserializable=False)

    def _attrs(self) -> List[str]:
        return ["data_source", "n", "normalize", "typ"]

    def __init__(self, *,
                 data_source: str,
                 n: int,
                 normalize: Optional[bool] = False):
        self.data_source = data_source
        self.n = n
        self.normalize = normalize
Ejemplo n.º 18
0
class EnumeratedDimension(Serializable['EnumeratedDimension'], Dimension):
    """A finite, enumerated dimension.

    Parameters
    ----------
    descriptor: Descriptor
        a descriptor of the single dimension
    template_id: UUID
        UUID that corresponds to the template in DC
    values: list[str]
        list of values that can be parsed by the descriptor

    """

    descriptor = properties.Object(Descriptor, 'descriptor')
    values = properties.List(properties.String(), 'list')
    typ = properties.String('type', default='EnumeratedDimension', deserializable=False)
    template_id = properties.Optional(properties.UUID, 'template_id', default=uuid4())

    def __init__(self,
                 descriptor: Descriptor,
                 values: List[str],
                 template_id: Optional[UUID] = None):
        self.descriptor: Descriptor = descriptor
        self.values: List[str] = values
        self.template_id: Optional[UUID] = template_id
Ejemplo n.º 19
0
class ComponentQuantityColumn(Serializable["ComponentQuantityColumn"], Column):
    """[ALPHA] Column that extracts the quantity of a given component.

    If the component is not present in the composition, then the value in the column will be 0.0.

    Parameters
    ----------
    data_source: str
        name of the variable to use when populating the column
    component_name: str
        name of the component from which to extract the quantity
    normalize: Optional[bool]
        whether to normalize the quantity by the sum of all component amounts. Default is false

    """

    data_source = properties.String('data_source')
    component_name = properties.String("component_name")
    normalize = properties.Optional(properties.Boolean, "normalize")
    typ = properties.String('type', default="component_quantity_column", deserializable=False)

    def _attrs(self) -> List[str]:
        return ["data_source", "component_name", "normalize", "typ"]

    def __init__(self, *,
                 data_source: str,
                 component_name: str,
                 normalize: Optional[bool] = False):
        self.data_source = data_source
        self.component_name = component_name
        self.normalize = normalize
Ejemplo n.º 20
0
class AttributeByTemplateAndObjectTemplate(
        Serializable['AttributeByTemplateAndObjectTemplate'], Variable):
    """[ALPHA] Attribute marked by an attribute template and an object template.

    For example, one property may be measured by two different measurement techniques.  In this
    case, that property would have the same attribute template.  Filtering by measurement
    templates, which identify the measurement techniques, disambiguates the technique used to
    measure that otherwise ambiguous property.

    Parameters
    ---------
    name: str
        a short human-readable name to use when referencing the variable
    headers: list[str]
        sequence of column headers
    attribute_template: LinkByUID
        attribute template that identifies the attribute to assign to the variable
    object_template: LinkByUID
        template that identifies the associated object
    attribute_constraints: list[(LinkByUID, Bounds)]
        constraints on object attributes in the target object that must be satisfied. Constraints
        are expressed as Bounds.  Attributes are expressed with links. The attribute that the
        variable is being set to may be the target of a constraint as well.
    type_selector: DataObjectTypeSelector
        strategy for selecting data object types to consider when matching, defaults to PREFER_RUN

    """

    name = properties.String('name')
    headers = properties.List(properties.String, 'headers')
    attribute_template = properties.Object(LinkByUID, 'attribute_template')
    object_template = properties.Object(LinkByUID, 'object_template')
    attribute_constraints = properties.Optional(
        properties.List(
            properties.SpecifiedMixedList(
                [properties.Object(LinkByUID), properties.Object(BaseBounds)]
            )
        ), 'attribute_constraints')
    type_selector = properties.Enumeration(DataObjectTypeSelector, "type_selector")
    typ = properties.String('type', default="attribute_by_object", deserializable=False)

    def _attrs(self) -> List[str]:
        return ["name", "headers", "attribute_template", "object_template",
                "attribute_constraints", "type_selector", "typ"]

    def __init__(self, *,
                 name: str,
                 headers: List[str],
                 attribute_template: LinkByUID,
                 object_template: LinkByUID,
                 attribute_constraints: List[List[Union[LinkByUID, BaseBounds]]] = None,
                 type_selector: DataObjectTypeSelector = DataObjectTypeSelector.PREFER_RUN):
        self.name = name
        self.headers = headers
        self.attribute_template = attribute_template
        self.object_template = object_template
        self.attribute_constraints = attribute_constraints
        self.type_selector = type_selector
Ejemplo n.º 21
0
class GemTable(Resource['Table']):
    """A 2-dimensional projection of data.

    GEM Tables are the basic unit used to flatten and manipulate data objects.
    While data objects can represent complex materials data, the format
    is NOT conducive to analysis and machine learning. GEM Tables, however,
    can be used to 'flatten' data objects into useful projections.

    Attributes
    ----------
    uid: UUID
        Unique uuid4 identifier of this GEM Table.
    version: str
        Version number of the GEM Table
    download_url: int
        Url pointing to the location of the GEM Table's contents.
        This is an expiring download link and is not unique.

    """

    _response_key = 'table'

    uid = properties.Optional(properties.UUID(), 'id')
    version = properties.Optional(properties.Integer, 'version')
    download_url = properties.Optional(properties.String, 'signed_download_url')

    def __init__(self):
        self.uid = None
        self.version = None
        self.download_url = None

    def __str__(self):
        return '<GEM Table {!r}, version {}>'.format(self.uid, self.version)

    def resource_type(self) -> str:
        """Get the access control resource type of this resource."""
        return 'TABLE'

    @deprecation.deprecated(deprecated_in="0.16.0", details="Use TableCollection.read() instead")
    def read(self, local_path):
        """[DEPRECATED] Use TableCollection.read() instead."""  # noqa: D402
        data_location = self.download_url
        data_location = rewrite_s3_links_locally(data_location)
        response = requests.get(data_location)
        write_file_locally(response.content, local_path)
Ejemplo n.º 22
0
class DummyDescriptor(object):
    dummy_map = properties.Mapping(properties.Float(), properties.String)
    dummy_list = properties.List(properties.Float, properties.String)
    dummy_set = properties.Set(type(properties.Float()))
    link_or_else = properties.LinkOrElse()
    map_collection_key = properties.Mapping(
        properties.Optional(properties.String), properties.Integer)
    specified_mixed_list = properties.SpecifiedMixedList(
        [properties.Integer(default=100)])
Ejemplo n.º 23
0
class ModelSummary(Serializable['ModelSummary']):
    """[ALPHA] Summary of information about a single model in a predictor.

    ModelSummary objects are constructed from saved models and should not be user-instantiated.

    Parameters
    ----------
    name: str
        the name of the model
    type_: str
        the type of the model (e.g., "ML Model", "Featurizer", etc.)
    inputs: List[Descriptor]
        list of input descriptors
    outputs: List[Descriptor]
        list of output descriptors
    model_settings: dict
        settings of the model, as a dictionary (details depend on model type)
    feature_importances: List[FeatureImportanceReport]
        list of feature importance reports, one for each output
    predictor_name: str
        the name of the predictor that created this model
    predictor_uid: Optional[uuid]
        the uid of the predictor that created this model

    """

    name = properties.String('name')
    type_ = properties.String('type')
    inputs = properties.List(properties.String(), 'inputs')
    outputs = properties.List(properties.String(), 'outputs')
    model_settings = properties.Raw('model_settings')
    feature_importances = properties.List(
        properties.Object(FeatureImportanceReport), 'feature_importances')
    predictor_name = properties.String('predictor_configuration_name', default='')
    predictor_uid = properties.Optional(properties.UUID(), 'predictor_configuration_uid')

    def __init__(self,
                 name: str,
                 type_: str,
                 inputs: List[Descriptor],
                 outputs: List[Descriptor],
                 model_settings: Dict[str, Any],
                 feature_importances: List[FeatureImportanceReport],
                 predictor_name: str,
                 predictor_uid: Optional[UUID] = None):
        self.name = name
        self.type_ = type_
        self.inputs = inputs
        self.outputs = outputs
        self.model_settings = model_settings
        self.feature_importances = feature_importances
        self.predictor_name = predictor_name
        self.predictor_uid = predictor_uid

    def __str__(self):
        return '<ModelSummary {!r}>'.format(self.name)
Ejemplo n.º 24
0
class RealDescriptor(Serializable['RealDescriptor'], Descriptor):
    """A descriptor to hold real-valued numbers.

    Parameters
    ----------
    key: str
        the key corresponding to a descriptor
    lower_bound: float
        inclusive lower bound for valid real values
    upper_bound: float
        inclusive upper bound for valid real values

    """

    lower_bound = properties.Float('lower_bound')
    upper_bound = properties.Float('upper_bound')
    units = properties.Optional(properties.String, 'units', default='')
    typ = properties.String('type', default='Real', deserializable=False)

    def __eq__(self, other):
        try:
            attrs = ["key", "lower_bound", "upper_bound", "units", "typ"]
            return all([
                self.__getattribute__(key) == other.__getattribute__(key)
                for key in attrs
            ])
        except AttributeError:
            return False

    def __init__(self,
                 key: str,
                 lower_bound: float,
                 upper_bound: float,
                 units: Optional[str] = None):
        self.key: str = key
        self.lower_bound: float = lower_bound
        self.upper_bound: float = upper_bound

        if units is None:
            msg = "Default of dimensionless is deprecated; \
            please specify an empty string explicitly."

            warnings.warn(msg, category=DeprecationWarning)
            self.units = ""
        else:
            self.units = units

    def __str__(self):
        return "<RealDescriptor {!r}>".format(self.key)

    def __repr__(self):
        return "RealDescriptor({}, {}, {}, {})".format(self.key,
                                                       self.lower_bound,
                                                       self.upper_bound,
                                                       self.units)
Ejemplo n.º 25
0
class AttributeByTemplate(Serializable['AttributeByTemplate'], Variable):
    """[ALPHA] Attribute marked by an attribute template.

    Parameters
    ----------
    name: str
        a short human-readable name to use when referencing the variable
    headers: list[str]
        sequence of column headers
    template: LinkByUID
        attribute template that identifies the attribute to assign to the variable
    attribute_constraints: list[list[LinkByUID, Bounds]]
        constraints on object attributes in the target object that must be satisfied. Constraints
        are expressed as Bounds.  Attributes are expressed with links. The attribute that the
        variable is being set to may be the target of a constraint as well.
    type_selector: DataObjectTypeSelector
        strategy for selecting data object types to consider when matching, defaults to PREFER_RUN

    """

    name = properties.String('name')
    headers = properties.List(properties.String, 'headers')
    template = properties.Object(LinkByUID, 'template')
    attribute_constraints = properties.Optional(
        properties.List(
            properties.SpecifiedMixedList(
                [properties.Object(LinkByUID),
                 properties.Object(BaseBounds)])), 'attribute_constraints')
    type_selector = properties.Enumeration(DataObjectTypeSelector,
                                           "type_selector")
    typ = properties.String('type',
                            default="attribute_by_template",
                            deserializable=False)

    def _attrs(self) -> List[str]:
        return [
            "name", "headers", "template", "attribute_constraints",
            "type_selector", "typ"
        ]

    def __init__(
        self,
        *,
        name: str,
        headers: List[str],
        template: LinkByUID,
        attribute_constraints: Optional[List[List[Union[LinkByUID,
                                                        BaseBounds]]]] = None,
        type_selector: DataObjectTypeSelector = DataObjectTypeSelector.
        PREFER_RUN):
        self.name = name
        self.headers = headers
        self.template = template
        self.attribute_constraints = attribute_constraints
        self.type_selector = type_selector
Ejemplo n.º 26
0
class Table(Resource['Table']):
    """A 2-dimensional projection of data.

    Tables are the basic unit used to flatten and manipulate data objects.
    While data objects can represent complex materials data, the format
    is NOT conducive to analysis and machine learning. Tables, however,
    can be used to 'flatten' data objects into useful projections.

    Attributes
    ----------
    uid: UUID
        Unique uuid4 identifier of this project.
    version: str
        Version number of the Table
    download_url: int
        Url pointing to the location of the Table's contents

    """

    _response_key = 'table'

    uid = properties.Optional(properties.UUID(), 'id')
    version = properties.Optional(properties.Integer, 'version')
    download_url = properties.Optional(properties.String,
                                       'signed_download_url')

    def __init__(self):
        self.uid = None
        self.version = None
        self.download_url = None

    def __str__(self):
        # TODO: Change this to name once that's added to the table model
        return '<Table {!r}>'.format(self.uid)

    def read(self, local_path):
        """Read the Table file from S3."""
        data_location = self.download_url
        data_location = rewrite_s3_links_locally(data_location)
        response = requests.get(data_location)
        write_file_locally(response.content, local_path)
Ejemplo n.º 27
0
class ScalarRangeConstraint(Serializable['ScalarRangeConstraint'], Constraint):
    """[ALPHA] Represents an inequality constraint on a scalar-valued material attribute.

    Parameters
    ----------
    descriptor_key: str
        the key corresponding to a descriptor
    min: float
        the minimum value in the range
    max: float
        the maximum value in the range
    min_inclusive: bool
        if True, will include the min value in the range
    max_inclusive: bool
        if True, will include the max value in the range

    """

    descriptor_key = properties.String('descriptor_key')
    min = properties.Optional(properties.Float, 'min')
    max = properties.Optional(properties.Float, 'max')
    min_inclusive = properties.Boolean('min_inclusive')
    max_inclusive = properties.Boolean('max_inclusive')
    typ = properties.String('type', default='ScalarRange')

    def __init__(self,
                 descriptor_key: str,
                 max: Optional[float] = None,
                 min: Optional[float] = None,
                 min_inclusive: Optional[bool] = True,
                 max_inclusive: Optional[bool] = True,
                 session: Optional[Session] = None):
        self.descriptor_key = descriptor_key
        self.max = max
        self.min = min
        self.min_inclusive = min_inclusive
        self.max_inclusive = max_inclusive
        self.session: Optional[Session] = session

    def __str__(self):
        return '<ScalarRangeConstraint {!r}>'.format(self.descriptor_key)
Ejemplo n.º 28
0
class GemTable(Resource['Table']):
    """A 2-dimensional projection of data.

    GEM Tables are the basic unit used to flatten and manipulate data objects.
    While data objects can represent complex materials data, the format
    is NOT conducive to analysis and machine learning. GEM Tables, however,
    can be used to 'flatten' data objects into useful projections.
    """

    _response_key = 'table'
    _resource_type = ResourceTypeEnum.TABLE

    uid = properties.Optional(properties.UUID(), 'id')
    """:Optional[UUID]: unique Citrine id of this GEM Table"""
    version = properties.Optional(properties.Integer, 'version')
    """:Optional[int]: Version number of the GEM Table.
    The first table built from a given config is version 1."""
    download_url = properties.Optional(properties.String,
                                       'signed_download_url')
    """:Optional[str]: Url pointing to the location of the GEM Table's contents.
    This is an expiring download link and is not unique."""
    def __init__(self):
        self.uid = None
        self.version = None
        self.download_url = None

    def __str__(self):
        return '<GEM Table {!r}, version {}>'.format(self.uid, self.version)

    @deprecation.deprecated(deprecated_in="0.16.0",
                            details="Use TableCollection.read() instead")
    def read(self, local_path):
        """[DEPRECATED] Use TableCollection.read() instead."""  # noqa: D402
        data_location = self.download_url
        data_location = rewrite_s3_links_locally(data_location)
        response = requests.get(data_location)
        write_file_locally(response.content, local_path)
Ejemplo n.º 29
0
class Processor(Module):
    """A Citrine Processor describes how a design space is searched.

    Abstract type that returns the proper type given a serialized dict.

    """

    _project_id: Optional[UUID] = None
    _session: Optional[Session] = None

    uid = properties.Optional(properties.UUID, 'id', serializable=False)
    """:Optional[UUID]: Citrine Platform unique identifier"""
    name = properties.String('config.name')
    description = properties.Optional(properties.String(),
                                      'config.description')

    @classmethod
    def get_type(cls, data) -> Type['Processor']:
        """Return the sole currently implemented subtype."""
        return {
            'Grid': GridProcessor,
            'Enumerated': EnumeratedProcessor,
            'ContinuousSearch': MonteCarloProcessor
        }[data['config']['type']]

    def _attrs(self) -> List[str]:
        return ["name", "description", "id"]  # pragma: no cover

    def __eq__(self, other):
        try:
            return all([
                self.__getattribute__(key) == other.__getattribute__(key)
                for key in self._attrs()
            ])
        except AttributeError:
            return False
Ejemplo n.º 30
0
class RealDescriptor(Serializable['RealDescriptor'], Descriptor):
    """[ALPHA] A descriptor to hold real-valued numbers.

    Parameters
    ----------
    key: str
        the key corresponding to a descriptor
    lower_bound: float
        inclusive lower bound for valid real values
    upper_bound: float
        inclusive upper bound for valid real values

    """

    key = properties.String('descriptor_key')
    lower_bound = properties.Float('lower_bound')
    upper_bound = properties.Float('upper_bound')
    units = properties.Optional(properties.String, 'units', default='')
    typ = properties.String('type', default='Real', deserializable=False)

    def __eq__(self, other):
        try:
            attrs = ["key", "lower_bound", "upper_bound", "units", "typ"]
            return all([
                self.__getattribute__(key) == other.__getattribute__(key)
                for key in attrs
            ])
        except AttributeError:
            return False

    def __init__(self,
                 key: str,
                 lower_bound: float,
                 upper_bound: float,
                 units: str = ''):
        self.key: str = key
        self.lower_bound: float = lower_bound
        self.upper_bound: float = upper_bound
        self.units: Optional[str] = units

    def __str__(self):
        return "<RealDescriptor {!r}>".format(self.key)

    def __repr__(self):
        return "RealDescriptor({}, {}, {}, {})".format(self.key,
                                                       self.lower_bound,
                                                       self.upper_bound,
                                                       self.units)