Esempio n. 1
0
class DesignWorkflow(Resource['DesignWorkflow'], Workflow):
    """Object that generates scored materials that may approach higher values of the score.

    Parameters
    ----------
    name: str
        the name of the workflow
    design_space_id: UUID
        the UUID corresponding to the design space to use
    processor_id: UUID
        the UUID corresponding to the processor to use
    predictor_id: UUID
        the UUID corresponding to the predictor to use
    project_id: UUID
        the UUID corresponding to the project to use
    """

    uid = properties.Optional(properties.UUID, 'id', serializable=False)
    name = properties.String('display_name')
    status = properties.String('status', serializable=False)
    status_info = properties.Optional(properties.List(properties.String()),
                                      'status_info',
                                      serializable=False)

    # TODO: Figure out how to make these fields richer/use actual objects
    design_space_id = properties.UUID('modules.design_space_id')
    processor_id = properties.UUID('modules.processor_id')
    predictor_id = properties.UUID('modules.predictor_id')

    # The project_id is used to keep a reference to the project under which the workflow was
    # created. It is currently unclear if this is the best way to do this. Another option might
    # be to have all objects have a context object, but that also seems to have downsides.
    def __init__(self,
                 name: str,
                 design_space_id: UUID,
                 processor_id: UUID,
                 predictor_id: UUID,
                 project_id: Optional[UUID] = None,
                 session: Session = Session()):
        self.name = name
        self.design_space_id = design_space_id
        self.processor_id = processor_id
        self.predictor_id = predictor_id
        self.project_id = project_id
        self.session = session

    def __str__(self):
        return '<DesignWorkflow {!r}>'.format(self.name)

    @property
    def executions(self) -> WorkflowExecutionCollection:
        """Return a resource representing all visible executions of this workflow."""
        if getattr(self, 'project_id', None) is None:
            raise AttributeError(
                'Cannot initialize execution without project reference!')
        return WorkflowExecutionCollection(self.project_id, self.uid,
                                           self.session)
Esempio n. 2
0
class WorkflowExecution(Resource['WorkflowExecution']):
    """[ALPHA] A Citrine Workflow Execution.

    Parameters
    ----------
    uid: str
        Unique identifier of the workflow execution
    project_id: str
        Unique identifier of the project that contains the workflow execution
    workflow_id: str
        Unique identifier of the workflow that was executed
    version_number: int
        Integer identifier that increases each time the workflow is executed.  The first execution
        has version_number = 1.

    """

    _response_key = 'WorkflowExecutions'

    uid = properties.UUID('id')
    project_id = properties.UUID('project_id', deserializable=False)
    workflow_id = properties.UUID('workflow_id', deserializable=False)
    version_number = properties.Integer("version_number")

    def __init__(
        self,
        uid: Optional[str] = None,
        project_id: Optional[str] = None,
        workflow_id: Optional[str] = None,
        session: Optional[Session] = None,
        version_number: Optional[int] = None,
    ):
        self.uid: str = uid
        self.project_id: str = project_id
        self.workflow_id: str = workflow_id
        self.session: Session = session
        self.version_number = version_number

    def __str__(self):
        return '<WorkflowExecution {!r}>'.format(str(self.uid))

    def _path(self):
        return '/projects/{project_id}/workflows/{workflow_id}/executions/{execution_id}'.format(
            **{
                "project_id": self.project_id,
                "workflow_id": self.workflow_id,
                "execution_id": self.uid
            })

    def status(self):
        """Get the current status of this execution."""
        response = self.session.get_resource(self._path() + "/status")
        return WorkflowExecutionStatus.build(response)

    def results(self):
        """Get the results of this execution."""
        return self.session.get_resource(self._path() + "/results")
Esempio n. 3
0
class GemTableDataSource(Serializable['GemTableDataSource'], DataSource):
    """[ALPHA] A data source based on a GEM Table hosted on the data platform.

    Parameters
    ----------
    table_id: UUID
        Unique identifier for the GEM Table
    table_version: Union[str,int]
        Version number for the GEM Table, which starts at 1 rather than 0.
        Strings are cast to ints.
    formulation_descriptor: Optional[FormulationDescriptor]
        Optional descriptor used to store formulations emitted by the data source.

    """

    typ = properties.String('type',
                            default='hosted_table_data_source',
                            deserializable=False)
    table_id = properties.UUID("table_id")
    table_version = properties.Integer("table_version")
    formulation_descriptor = properties.Optional(
        properties.Object(FormulationDescriptor), "formulation_descriptor")

    def _attrs(self) -> List[str]:
        return ["table_id", "table_version", "typ"]

    def __init__(
            self,
            table_id: UUID,
            table_version: Union[int, str],
            formulation_descriptor: Optional[FormulationDescriptor] = None):
        self.table_id: UUID = table_id
        self.table_version: Union[int, str] = table_version
        self.formulation_descriptor: Optional[
            FormulationDescriptor] = formulation_descriptor
Esempio n. 4
0
class EnumeratedDimension(Serializable['EnumeratedDimension'], Dimension):
    """Finite enumerated dimension that is defined by a template ID, material descriptor, and a list of values
    for that descriptor.

    Parameters
    ----------
    descriptor: Descriptor
        a descriptor of the single dimension
    template_id: UUID
        UUID that corresponds to the template in DC
    values: list[str]
        list of values that can be parsed by the descriptor
    """

    descriptor = properties.Object(Descriptor, 'descriptor')
    values = properties.List(properties.String(), 'list')
    typ = properties.String('type', default='EnumeratedDimension', deserializable=False)
    template_id = properties.UUID('template_id', default=uuid4())

    def __init__(self,
                 descriptor: Descriptor,
                 values: List[str],
                 template_id: Optional[UUID] = None):
        self.descriptor: Descriptor = descriptor
        self.values: List[str] = values
        self.template_id: UUID = template_id or uuid4()
Esempio n. 5
0
class ContinuousDimension(Serializable['ContinuousDimension'], Dimension):
    """Continuous dimension that is defined by a template ID, material descriptor, lower bound, and upper bound.

    Parameters
    ----------
    descriptor: RealDescriptor
        a descriptor of the single dimension
    lower_bound: float
        inclusive lower bound
    upper_bound: float
        inclusive upper bound
    template_id: UUID
        UUID that corresponds to the template in DC
    """

    descriptor = properties.Object(RealDescriptor, 'descriptor')
    lower_bound = properties.Float('lower_bound')
    upper_bound = properties.Float('upper_bound')
    typ = properties.String('type', default='ContinuousDimension', deserializable=False)
    template_id = properties.UUID('template_id', default=uuid4())

    def __init__(self,
                 descriptor: RealDescriptor,
                 lower_bound: Optional[float] = None,
                 upper_bound: Optional[float] = None,
                 template_id: Optional[UUID] = None):
        self.descriptor: RealDescriptor = descriptor
        self.lower_bound: float = lower_bound or descriptor.lower_bound
        self.upper_bound: float = upper_bound or descriptor.upper_bound
        self.template_id: UUID = template_id or uuid4()
Esempio n. 6
0
class ModuleRef(Serializable['ModuleRef']):
    """[ALPHA] A reference to a Module by UID."""

    module_uid = properties.UUID('module_uid')

    def __init__(self, module_uid: str):
        self.module_uid = module_uid
Esempio n. 7
0
class EnumeratedDesignSpace(Resource['EnumeratedDesignSpace'], DesignSpace):
    """Design space composed of an explicit enumeration of candidate materials to score. Note that every candidate must have exactly the descriptors in the list populated (no more, no less) to be included. 

    Parameters
    ----------
    name:str
        the name of the design space
    description:str
        the description of the design space
    descriptors: list[Descriptor]
        the list of descriptors included in the candidates of the design space
    data: list[dict]
        list of dicts of the shape `{<descriptor_key>: <descriptor_value>}` where each dict corresponds to a candidate
        in the design space
    """

    _response_key = None

    uid = properties.Optional(properties.UUID, 'id', serializable=False)
    name = properties.String('config.name')
    description = properties.Optional(properties.String(),
                                      'config.description')
    descriptors = properties.List(properties.Object(Descriptor),
                                  'config.descriptors')
    data = properties.List(
        properties.Mapping(properties.String, properties.Raw), 'config.data')

    typ = properties.String('config.type',
                            default='EnumeratedDesignSpace',
                            deserializable=False)
    status = properties.String('status', serializable=False)
    status_info = properties.Optional(properties.List(properties.String()),
                                      'status_info',
                                      serializable=False)

    # NOTE: These could go here or in _post_dump - it's unclear which is better right now
    module_type = properties.String('module_type', default='DESIGN_SPACE')
    schema_id = properties.UUID(
        'schema_id', default=UUID('f3907a58-aa46-462c-8837-a5aa9605e79e'))

    def __init__(self,
                 name: str,
                 description: str,
                 descriptors: List[Descriptor],
                 data: List[Mapping[str, Any]],
                 session: Session = Session()):
        self.name: str = name
        self.description: str = description
        self.descriptors: List[Descriptor] = descriptors
        self.data: List[Mapping[str, Any]] = data
        self.session: Session = session

    def _post_dump(self, data: dict) -> dict:
        data['display_name'] = data['config']['name']
        return data

    def __str__(self):
        return '<EnumeratedDesignSpace {!r}>'.format(self.name)
Esempio n. 8
0
class ModelSummary(Serializable['ModelSummary']):
    """[ALPHA] Summary of information about a single model in a predictor.

    ModelSummary objects are constructed from saved models and should not be user-instantiated.

    Parameters
    ----------
    name: str
        the name of the model
    type_: str
        the type of the model (e.g., "ML Model", "Featurizer", etc.)
    inputs: List[Descriptor]
        list of input descriptors
    outputs: List[Descriptor]
        list of output descriptors
    model_settings: dict
        settings of the model, as a dictionary (details depend on model type)
    feature_importances: List[FeatureImportanceReport]
        list of feature importance reports, one for each output
    predictor_name: str
        the name of the predictor that created this model
    predictor_uid: Optional[uuid]
        the uid of the predictor that created this model

    """

    name = properties.String('name')
    type_ = properties.String('type')
    inputs = properties.List(properties.String(), 'inputs')
    outputs = properties.List(properties.String(), 'outputs')
    model_settings = properties.Raw('model_settings')
    feature_importances = properties.List(
        properties.Object(FeatureImportanceReport), 'feature_importances')
    predictor_name = properties.String('predictor_configuration_name', default='')
    predictor_uid = properties.Optional(properties.UUID(), 'predictor_configuration_uid')

    def __init__(self,
                 name: str,
                 type_: str,
                 inputs: List[Descriptor],
                 outputs: List[Descriptor],
                 model_settings: Dict[str, Any],
                 feature_importances: List[FeatureImportanceReport],
                 predictor_name: str,
                 predictor_uid: Optional[UUID] = None):
        self.name = name
        self.type_ = type_
        self.inputs = inputs
        self.outputs = outputs
        self.model_settings = model_settings
        self.feature_importances = feature_importances
        self.predictor_name = predictor_name
        self.predictor_uid = predictor_uid

    def __str__(self):
        return '<ModelSummary {!r}>'.format(self.name)
Esempio n. 9
0
class MonteCarloProcessor(Serializable['GridProcessor'], Processor):
    """[ALPHA] Using a Monte Carlo optimizer to search for the best candidate.

    The moves that the MonteCarlo optimizer makes are inferred from the descriptors in the
    design space.

    Parameters
    ----------
    name: str
        name of the processor
    description: str
        description of the processor

    """

    uid = properties.Optional(properties.UUID, 'id', serializable=False)
    name = properties.String('config.name')
    description = properties.Optional(properties.String(),
                                      'config.description')
    typ = properties.String('config.type',
                            default='ContinuousSearch',
                            deserializable=False)
    status = properties.String('status', serializable=False)
    status_info = properties.Optional(properties.List(properties.String()),
                                      'status_info',
                                      serializable=False)
    experimental = properties.Boolean("experimental",
                                      serializable=False,
                                      default=True)
    experimental_reasons = properties.Optional(properties.List(
        properties.String()),
                                               'experimental_reasons',
                                               serializable=False)

    # NOTE: These could go here or in _post_dump - it's unclear which is better right now
    module_type = properties.String('module_type', default='PROCESSOR')
    schema_id = properties.UUID(
        'schema_id', default=UUID('d8ddfe73-10f7-4456-9de9-9a1638bae403'))

    def _attrs(self) -> List[str]:
        return ["name", "description", "typ"]

    def __init__(self,
                 name: str,
                 description: str,
                 session: Optional[Session] = None):
        self.name: str = name
        self.description: str = description
        self.session: Optional[Session] = session

    def _post_dump(self, data: dict) -> dict:
        data['display_name'] = data['config']['name']
        return data

    def __str__(self):
        return '<MonteCarloProcessor {!r}>'.format(self.name)
Esempio n. 10
0
class JobSubmissionResponse(Resource['AraJobStatus']):
    """[ALPHA] a response to a submit-job request for the job submission framework.

    This is returned as a successful response from the remote service.
    """

    job_id = properties.UUID("job_id")
    """:UUID: job id of the job submission request"""
    def __init__(self, job_id: UUID):
        self.job_id = job_id
Esempio n. 11
0
class ProductDesignSpace(Resource['ProductDesignSpace'], DesignSpace):
    """[ALPHA] An outer product of univariate dimensions, either continuous or enumerated.

    Parameters
    ----------
    name:str
        the name of the design space
    description:str
        the description of the design space
    dimensions: list[Dimension]
        univariate dimensions that are factors of the design space; can be enumerated or continuous

    """

    _response_key = None

    uid = properties.Optional(properties.UUID, 'id', serializable=False)
    name = properties.String('config.name')
    description = properties.Optional(properties.String(), 'config.description')
    dimensions = properties.List(properties.Object(Dimension), 'config.dimensions')
    typ = properties.String('config.type', default='Univariate', deserializable=False)
    status = properties.String('status', serializable=False)
    status_info = properties.Optional(
        properties.List(properties.String()),
        'status_info',
        serializable=False
    )
    archived = properties.Boolean('archived', default=False)
    experimental = properties.Boolean("experimental", serializable=False, default=True)
    experimental_reasons = properties.Optional(
        properties.List(properties.String()),
        'experimental_reasons',
        serializable=False
    )

    # NOTE: These could go here or in _post_dump - it's unclear which is better right now
    module_type = properties.String('module_type', default='DESIGN_SPACE')
    schema_id = properties.UUID('schema_id', default=UUID('6c16d694-d015-42a7-b462-8ef299473c9a'))

    def __init__(self,
                 name: str,
                 description: str,
                 dimensions: List[Dimension],
                 session: Session = Session()):
        self.name: str = name
        self.description: str = description
        self.dimensions: List[Dimension] = dimensions
        self.session: Session = session

    def _post_dump(self, data: dict) -> dict:
        data['display_name'] = data['config']['name']
        return data

    def __str__(self):
        return '<ProductDesignSpace {!r}>'.format(self.name)
Esempio n. 12
0
class ModelSummary(Serializable['ModelSummary']):
    """[ALPHA] Summary of information about a single model in a predictor.

    ModelSummary objects are constructed from saved models and should not be user-instantiated.
    """

    name = properties.String('name')
    """:str: the name of the model"""
    type_ = properties.String('type')
    """:str: the type of the model (e.g., "ML Model", "Featurizer", etc.)"""
    inputs = properties.List(
        properties.Union([properties.Object(Descriptor),
                          properties.String()]), 'inputs')
    """:List[Descriptor]: list of input descriptors"""
    outputs = properties.List(
        properties.Union([properties.Object(Descriptor),
                          properties.String()]), 'outputs')
    """:List[Descriptor]: list of output descriptors"""
    model_settings = properties.Raw('model_settings')
    """:dict: model settings, as a dictionary (keys depend on the model type)"""
    feature_importances = properties.List(
        properties.Object(FeatureImportanceReport), 'feature_importances')
    """:List[FeatureImportanceReport]: feature importance reports for each output"""
    predictor_name = properties.String('predictor_configuration_name',
                                       default='')
    """:str: the name of the predictor that created this model"""
    predictor_uid = properties.Optional(properties.UUID(),
                                        'predictor_configuration_uid')
    """:Optional[UUID]: the unique Citrine id of the predictor that created this model"""
    training_data_count = properties.Optional(properties.Integer,
                                              "training_data_count")
    """:int: Number of rows in the training data for the model, if applicable."""
    def __init__(self,
                 name: str,
                 type_: str,
                 inputs: List[Descriptor],
                 outputs: List[Descriptor],
                 model_settings: Dict[str, Any],
                 feature_importances: List[FeatureImportanceReport],
                 predictor_name: str,
                 predictor_uid: Optional[UUID] = None):
        self.name = name
        self.type_ = type_
        self.inputs = inputs
        self.outputs = outputs
        self.model_settings = model_settings
        self.feature_importances = feature_importances
        self.predictor_name = predictor_name
        self.predictor_uid = predictor_uid

    def __str__(self):
        return '<ModelSummary {!r}>'.format(self.name)
class WorkflowExecution(Resource['WorkflowExecution']):
    """A Citrine Workflow Execution."""

    _response_key = 'WorkflowExecutions'

    uid = properties.UUID('id')
    project_id = properties.UUID('project_id', deserializable=False)
    workflow_id = properties.UUID('workflow_id', deserializable=False)

    def __init__(self,
                 uid: Optional[str] = None,
                 project_id: Optional[str] = None,
                 workflow_id: Optional[str] = None,
                 session: Optional[Session] = None):
        self.uid: str = uid
        self.project_id: str = project_id
        self.workflow_id: str = workflow_id
        self.session: Session = session

    def __str__(self):
        return '<WorkflowExecution {!r}>'.format(str(self.uid))

    def _path(self):
        return '/projects/{project_id}/workflows/{workflow_id}/executions/{execution_id}'.format(
            **{
                "project_id": self.project_id,
                "workflow_id": self.workflow_id,
                "execution_id": self.uid
            })

    def status(self):
        """Get the current status of this execution."""
        response = self.session.get_resource(self._path() + "/status")
        return WorkflowExecutionStatus.build(response)

    def results(self):
        """Get the results of this execution."""
        return self.session.get_resource(self._path() + "/results")
Esempio n. 14
0
class GridProcessor(Serializable['GridProcessor'], Processor):
    """Generates a finite set of materials from the domain defined by the design space, then scans over the set of
    materials. To create a finite set of materials from continuous dimensions, a uniform grid is created between the
    bounds of the descriptor. The number of points is specified by `grid_sizes`.

    Parameters
    ----------
    name: str
        name of the processor
    description: str
        description of the processor
    grid_sizes: dict[str, int]
        the number of points to select along each dimension of the grid, by dimension name
    """

    uid = properties.Optional(properties.UUID, 'id', serializable=False)
    name = properties.String('config.name')
    description = properties.Optional(properties.String(),
                                      'config.description')
    typ = properties.String('config.type',
                            default='Grid',
                            deserializable=False)
    grid_sizes = properties.Mapping(properties.String, properties.Integer,
                                    'config.grid_dimensions')
    status = properties.String('status', serializable=False)
    status_info = properties.Optional(properties.List(properties.String()),
                                      'status_info',
                                      serializable=False)

    # NOTE: These could go here or in _post_dump - it's unclear which is better right now
    module_type = properties.String('module_type', default='PROCESSOR')
    schema_id = properties.UUID(
        'schema_id', default=UUID('272791a5-5468-4344-ac9f-2811d9266a4d'))

    def __init__(self,
                 name: str,
                 description: str,
                 grid_sizes: Mapping[str, int],
                 session: Optional[Session] = None):
        self.name: str = name
        self.description: str = description
        self.grid_sizes: Mapping[str, int] = grid_sizes
        self.session: Optional[Session] = session

    def _post_dump(self, data: dict) -> dict:
        data['display_name'] = data['config']['name']
        return data

    def __str__(self):
        return '<GridProcessor {!r}>'.format(self.name)
Esempio n. 15
0
class EnumeratedProcessor(Serializable['EnumeratedProcessor'], Processor):
    """Process a design space by enumerating up to `max_size` materials from the domain and processing each
    independently.

    Parameters
    ----------
    name: str
        name of the processor
    description: str
        description of the processor
    max_size: int
        maximum number of samples that can be enumerated over
    """

    uid = properties.Optional(properties.UUID, 'id', serializable=False)
    name = properties.String('config.name')
    description = properties.Optional(properties.String(),
                                      'config.description')
    max_size = properties.Integer('config.max_size')
    typ = properties.String('config.type',
                            default='Enumerated',
                            deserializable=False)
    status = properties.String('status', serializable=False)
    status_info = properties.Optional(properties.List(properties.String()),
                                      'status_info',
                                      serializable=False)

    # NOTE: These could go here or in _post_dump - it's unclear which is better right now
    module_type = properties.String('module_type', default='PROCESSOR')
    schema_id = properties.UUID(
        'schema_id', default=UUID('307b88a2-fd50-4d27-ae91-b8d6282f68f7'))

    def __init__(self,
                 name: str,
                 description: str,
                 max_size: Optional[int] = None,
                 session: Optional[Session] = None):
        self.name: str = name
        self.description: str = description
        self.max_size: int = max_size or 2**31 - 1  # = 2147483647 (max 32-bit integer)
        self.session: Optional[Session] = session

    def _post_dump(self, data: dict) -> dict:
        data['display_name'] = data['config']['name']
        return data

    def __str__(self):
        return '<EnumeratedProcessor {!r}>'.format(self.name)
Esempio n. 16
0
class PredictedVsActualRealPoint(Serializable["PredictedVsActualRealPoint"]):
    """Predicted vs. actual data for a single real-valued data point."""

    uuid = properties.UUID("uuid")
    """:UUID: Unique Citrine id given to the candidate"""
    identifiers = properties.Set(properties.String, "identifiers")
    """:Set[str]: Set of globally unique identifiers given to the candidate"""
    trial = properties.Integer("trial")
    """:int: 1-based index of the trial this candidate belonged to"""
    fold = properties.Integer("fold")
    """:int: 1-based index of the fold this candidate belonged to"""
    predicted = properties.Object(RealMetricValue, "predicted")
    """:RealMetricValue: Predicted value"""
    actual = properties.Object(RealMetricValue, "actual")
    """:RealMetricValue: Actual value"""
    def __init__(self):
        pass  # pragma: no cover
Esempio n. 17
0
class GemTable(Resource['Table']):
    """A 2-dimensional projection of data.

    GEM Tables are the basic unit used to flatten and manipulate data objects.
    While data objects can represent complex materials data, the format
    is NOT conducive to analysis and machine learning. GEM Tables, however,
    can be used to 'flatten' data objects into useful projections.

    Attributes
    ----------
    uid: UUID
        Unique uuid4 identifier of this GEM Table.
    version: str
        Version number of the GEM Table
    download_url: int
        Url pointing to the location of the GEM Table's contents.
        This is an expiring download link and is not unique.

    """

    _response_key = 'table'

    uid = properties.Optional(properties.UUID(), 'id')
    version = properties.Optional(properties.Integer, 'version')
    download_url = properties.Optional(properties.String, 'signed_download_url')

    def __init__(self):
        self.uid = None
        self.version = None
        self.download_url = None

    def __str__(self):
        return '<GEM Table {!r}, version {}>'.format(self.uid, self.version)

    def resource_type(self) -> str:
        """Get the access control resource type of this resource."""
        return 'TABLE'

    @deprecation.deprecated(deprecated_in="0.16.0", details="Use TableCollection.read() instead")
    def read(self, local_path):
        """[DEPRECATED] Use TableCollection.read() instead."""  # noqa: D402
        data_location = self.download_url
        data_location = rewrite_s3_links_locally(data_location)
        response = requests.get(data_location)
        write_file_locally(response.content, local_path)
class DesignCandidate(Serializable["DesignCandidate"]):
    """A Citrine Predictor Evaluation Result.

    This class represents the candidate computed by a design execution.
    """

    material_id = properties.UUID('material_id')
    """:UUID: unique Citrine id of the material"""
    identifiers = properties.List(properties.String(), 'identifiers')
    """:List[str]: globally unique identifiers assigned to the material"""
    primary_score = properties.Float('primary_score')
    """:float: numerical score describing how well the candidate satisfies the objectives
    and constraints (higher is better)"""
    material = properties.Object(DesignMaterial, 'material')
    """:DesignMaterial: the material returned by the design workflow"""

    def __init__(self):
        pass  # pragma: no cover
Esempio n. 19
0
class Table(Resource['Table']):
    """A 2-dimensional projection of data.

    Tables are the basic unit used to flatten and manipulate data objects.
    While data objects can represent complex materials data, the format
    is NOT conducive to analysis and machine learning. Tables, however,
    can be used to 'flatten' data objects into useful projections.

    Attributes
    ----------
    uid: UUID
        Unique uuid4 identifier of this project.
    version: str
        Version number of the Table
    download_url: int
        Url pointing to the location of the Table's contents

    """

    _response_key = 'table'

    uid = properties.Optional(properties.UUID(), 'id')
    version = properties.Optional(properties.Integer, 'version')
    download_url = properties.Optional(properties.String,
                                       'signed_download_url')

    def __init__(self):
        self.uid = None
        self.version = None
        self.download_url = None

    def __str__(self):
        # TODO: Change this to name once that's added to the table model
        return '<Table {!r}>'.format(self.uid)

    def read(self, local_path):
        """Read the Table file from S3."""
        data_location = self.download_url
        data_location = rewrite_s3_links_locally(data_location)
        response = requests.get(data_location)
        write_file_locally(response.content, local_path)
Esempio n. 20
0
class PredictedVsActualCategoricalPoint(
        Serializable["PredictedVsActualCategoricalPoint"]):
    """Predicted vs. actual data for a single categorical data point."""

    uuid = properties.UUID("uuid")
    """:UUID: Unique Citrine id given to the candidate"""
    identifiers = properties.Set(properties.String, "identifiers")
    """:Set[str]: Set of globally unique identifiers given to the candidate"""
    trial = properties.Integer("trial")
    """:int: 1-based index of the trial this candidate belonged to"""
    fold = properties.Integer("fold")
    """:int: 1-based index of the fold this candidate belonged to"""
    predicted = properties.Mapping(properties.String, properties.Float,
                                   "predicted")
    """:Dict[str, float]: Predicted class probabilities defined as a map from each class name
    to its relative frequency"""
    actual = properties.Mapping(properties.String, properties.Float, "actual")
    """:Dict[str, float]: Actual class probabilities defined as a map from each class name
    to its relative frequency"""
    def __init__(self):
        pass  # pragma: no cover
class GemTable(Resource['Table']):
    """A 2-dimensional projection of data.

    GEM Tables are the basic unit used to flatten and manipulate data objects.
    While data objects can represent complex materials data, the format
    is NOT conducive to analysis and machine learning. GEM Tables, however,
    can be used to 'flatten' data objects into useful projections.
    """

    _response_key = 'table'
    _resource_type = ResourceTypeEnum.TABLE

    uid = properties.Optional(properties.UUID(), 'id')
    """:Optional[UUID]: unique Citrine id of this GEM Table"""
    version = properties.Optional(properties.Integer, 'version')
    """:Optional[int]: Version number of the GEM Table.
    The first table built from a given config is version 1."""
    download_url = properties.Optional(properties.String,
                                       'signed_download_url')
    """:Optional[str]: Url pointing to the location of the GEM Table's contents.
    This is an expiring download link and is not unique."""
    def __init__(self):
        self.uid = None
        self.version = None
        self.download_url = None

    def __str__(self):
        return '<GEM Table {!r}, version {}>'.format(self.uid, self.version)

    @deprecation.deprecated(deprecated_in="0.16.0",
                            details="Use TableCollection.read() instead")
    def read(self, local_path):
        """[DEPRECATED] Use TableCollection.read() instead."""  # noqa: D402
        data_location = self.download_url
        data_location = rewrite_s3_links_locally(data_location)
        response = requests.get(data_location)
        write_file_locally(response.content, local_path)
Esempio n. 22
0
class GemTableDataSource(Serializable['GemTableDataSource'], DataSource):
    """A data source based on a GEM Table hosted on the data platform.

    Parameters
    ----------
    table_id: UUID
        Unique identifier for the GEM Table
    table_version: Union[str,int]
        Version number for the GEM Table. The first GEM table built from a configuration
        has version = 1. Strings are cast to ints.
    formulation_descriptor: Optional[FormulationDescriptor]
        Optional descriptor used to store formulations emitted by the data source.
        If the data source emits a formulation but this argument is not provided, then a
        default formulation descriptor will be generated. The formulations descriptor, and
        other descriptors, can be retrieved using
        :func:`~citrine.resources.descriptors.DescriptorMethods.descriptors_from_data_source`.

    """

    typ = properties.String('type', default='hosted_table_data_source', deserializable=False)
    table_id = properties.UUID("table_id")
    table_version = properties.Integer("table_version")
    formulation_descriptor = properties.Optional(
        properties.Object(FormulationDescriptor),
        "formulation_descriptor"
    )

    def _attrs(self) -> List[str]:
        return ["table_id", "table_version", "typ"]

    def __init__(self,
                 table_id: UUID,
                 table_version: Union[int, str],
                 formulation_descriptor: Optional[FormulationDescriptor] = None):
        self.table_id: UUID = table_id
        self.table_version: Union[int, str] = table_version
        self.formulation_descriptor: Optional[FormulationDescriptor] = formulation_descriptor
Esempio n. 23
0
class Dataset(Resource['Dataset']):
    """
    A collection of data objects.

    Datasets are the basic unit of access control. A user with read access to a dataset can view
    every object in that dataset. A user with write access to a dataset can create, update,
    and delete objects in the dataset.

    Parameters
    ----------
    name: str
        Name of the dataset. Can be used for searching.
    summary: str
        A summary of this dataset.
    description: str
        Long-form description of the dataset.
    unique_name: Optional[str]
        An optional, globally unique name that can be used to retrieve the dataset.

    Attributes
    ----------
    uid: UUID
        Unique uuid4 identifier of this dataset.
    deleted: bool
        Flag indicating whether or not this dataset has been deleted.
    created_by: UUID
        ID of the user who created the dataset.
    updated_by: UUID
        ID of the user who last updated the dataset.
    deleted_by: UUID
        ID of the user who deleted the dataset, if it is deleted.
    create_time: int
        Time the dataset was created, in seconds since epoch.
    update_time: int
        Time the dataset was most recently updated, in seconds since epoch.
    delete_time: int
        Time the dataset was deleted, in seconds since epoch, if it is deleted.
    public: bool
        Flag indicating whether the dataset is publicly readable.

    """

    _response_key = 'dataset'
    _resource_type = ResourceTypeEnum.DATASET

    uid = properties.Optional(properties.UUID(), 'id')
    name = properties.String('name')
    unique_name = properties.Optional(properties.String(), 'unique_name')
    summary = properties.String('summary')
    description = properties.String('description')
    deleted = properties.Optional(properties.Boolean(), 'deleted')
    created_by = properties.Optional(properties.UUID(), 'created_by')
    updated_by = properties.Optional(properties.UUID(), 'updated_by')
    deleted_by = properties.Optional(properties.UUID(), 'deleted_by')
    create_time = properties.Optional(properties.Datetime(), 'create_time')
    update_time = properties.Optional(properties.Datetime(), 'update_time')
    delete_time = properties.Optional(properties.Datetime(), 'delete_time')
    public = properties.Optional(properties.Boolean(), 'public')

    def __init__(self,
                 name: str,
                 summary: str,
                 description: str,
                 unique_name: Optional[str] = None):
        self.name: str = name
        self.summary: str = summary
        self.description: str = description
        self.unique_name = unique_name

        # The attributes below should not be set by the user. Instead they will be updated as the
        # dataset interacts with the backend data service
        self.uid = None
        self.deleted = None
        self.created_by = None
        self.updated_by = None
        self.deleted_by = None
        self.create_time = None
        self.update_time = None
        self.delete_time = None
        self.public = None

    def __str__(self):
        return '<Dataset {!r}>'.format(self.name)

    @property
    def property_templates(self) -> PropertyTemplateCollection:
        """Return a resource representing all property templates in this dataset."""
        return PropertyTemplateCollection(self.project_id, self.uid,
                                          self.session)

    @property
    def condition_templates(self) -> ConditionTemplateCollection:
        """Return a resource representing all condition templates in this dataset."""
        return ConditionTemplateCollection(self.project_id, self.uid,
                                           self.session)

    @property
    def parameter_templates(self) -> ParameterTemplateCollection:
        """Return a resource representing all parameter templates in this dataset."""
        return ParameterTemplateCollection(self.project_id, self.uid,
                                           self.session)

    @property
    def material_templates(self) -> MaterialTemplateCollection:
        """Return a resource representing all material templates in this dataset."""
        return MaterialTemplateCollection(self.project_id, self.uid,
                                          self.session)

    @property
    def measurement_templates(self) -> MeasurementTemplateCollection:
        """Return a resource representing all measurement templates in this dataset."""
        return MeasurementTemplateCollection(self.project_id, self.uid,
                                             self.session)

    @property
    def process_templates(self) -> ProcessTemplateCollection:
        """Return a resource representing all process templates in this dataset."""
        return ProcessTemplateCollection(self.project_id, self.uid,
                                         self.session)

    @property
    def process_runs(self) -> ProcessRunCollection:
        """Return a resource representing all process runs in this dataset."""
        return ProcessRunCollection(self.project_id, self.uid, self.session)

    @property
    def measurement_runs(self) -> MeasurementRunCollection:
        """Return a resource representing all measurement runs in this dataset."""
        return MeasurementRunCollection(self.project_id, self.uid,
                                        self.session)

    @property
    def material_runs(self) -> MaterialRunCollection:
        """Return a resource representing all material runs in this dataset."""
        return MaterialRunCollection(self.project_id, self.uid, self.session)

    @property
    def ingredient_runs(self) -> IngredientRunCollection:
        """Return a resource representing all ingredient runs in this dataset."""
        return IngredientRunCollection(self.project_id, self.uid, self.session)

    @property
    def process_specs(self) -> ProcessSpecCollection:
        """Return a resource representing all process specs in this dataset."""
        return ProcessSpecCollection(self.project_id, self.uid, self.session)

    @property
    def measurement_specs(self) -> MeasurementSpecCollection:
        """Return a resource representing all measurement specs in this dataset."""
        return MeasurementSpecCollection(self.project_id, self.uid,
                                         self.session)

    @property
    def material_specs(self) -> MaterialSpecCollection:
        """Return a resource representing all material specs in this dataset."""
        return MaterialSpecCollection(self.project_id, self.uid, self.session)

    @property
    def ingredient_specs(self) -> IngredientSpecCollection:
        """Return a resource representing all ingredient specs in this dataset."""
        return IngredientSpecCollection(self.project_id, self.uid,
                                        self.session)

    @property
    def files(self) -> FileCollection:
        """Return a resource representing all files in the dataset."""
        return FileCollection(self.project_id, self.uid, self.session)

    def _collection_for(self, data_concepts_resource):
        if isinstance(data_concepts_resource, MeasurementTemplate):
            return self.measurement_templates
        if isinstance(data_concepts_resource, MeasurementSpec):
            return self.measurement_specs
        if isinstance(data_concepts_resource, MeasurementRun):
            return self.measurement_runs

        if isinstance(data_concepts_resource, MaterialTemplate):
            return self.material_templates
        if isinstance(data_concepts_resource, MaterialSpec):
            return self.material_specs
        if isinstance(data_concepts_resource, MaterialRun):
            return self.material_runs

        if isinstance(data_concepts_resource, ProcessTemplate):
            return self.process_templates
        if isinstance(data_concepts_resource, ProcessSpec):
            return self.process_specs
        if isinstance(data_concepts_resource, ProcessRun):
            return self.process_runs

        if isinstance(data_concepts_resource, IngredientSpec):
            return self.ingredient_specs
        if isinstance(data_concepts_resource, IngredientRun):
            return self.ingredient_runs

        if isinstance(data_concepts_resource, PropertyTemplate):
            return self.property_templates
        if isinstance(data_concepts_resource, ParameterTemplate):
            return self.parameter_templates
        if isinstance(data_concepts_resource, ConditionTemplate):
            return self.condition_templates

    def register(self,
                 data_concepts_resource: ResourceType,
                 dry_run=False) -> ResourceType:
        """Register a data concepts resource to the appropriate collection."""
        return self._collection_for(data_concepts_resource)\
            .register(data_concepts_resource, dry_run=dry_run)

    def register_all(self,
                     data_concepts_resources: List[ResourceType],
                     dry_run=False) -> List[ResourceType]:
        """
        Register multiple data concepts resources to each of their appropriate collections.

        Does so in an order that is guaranteed to store all linked items before the item that
        references them.

        The uids of the input data concepts resources are updated with their on-platform uids.
        This supports storing an object that has a reference to an object that doesn't have a uid.

        Parameters
        ----------
        data_concepts_resources: List[ResourceType]
            The resources to register. Can be different types.

        dry_run: bool
            Whether to actually register the item or run a dry run of the register operation.
            Dry run is intended to be used for validation. Default: false

        Returns
        -------
        List[ResourceType]
            The registered versions

        """
        resources = list()
        by_type = defaultdict(list)
        for obj in data_concepts_resources:
            by_type[obj.typ].append(obj)
        typ_groups = sorted(list(by_type.values()),
                            key=lambda x: writable_sort_order(x[0]))
        batch_size = 50
        for typ_group in typ_groups:
            num_batches = len(typ_group) // batch_size
            for batch_num in range(num_batches + 1):
                batch = typ_group[batch_num * batch_size:(batch_num + 1) *
                                  batch_size]
                if batch:  # final batch is empty when batch_size divides len(typ_group)
                    registered = self._collection_for(batch[0])\
                        .register_all(batch, dry_run=dry_run)
                    for prewrite, postwrite in zip(batch, registered):
                        if isinstance(postwrite, BaseEntity):
                            prewrite.uids = postwrite.uids
                    resources.extend(registered)
        return resources

    def update(self, model: ResourceType) -> ResourceType:
        """Update a data concepts resource using the appropriate collection."""
        return self._collection_for(model).update(model)

    def delete(self,
               data_concepts_resource: ResourceType,
               dry_run=False) -> ResourceType:
        """Delete a data concepts resource to the appropriate collection."""
        uid = next(iter(data_concepts_resource.uids.items()), None)
        if uid is None:
            raise ValueError(
                "Only objects that contain identifiers can be deleted.")
        return self._collection_for(data_concepts_resource) \
            .delete(uid[1], scope=uid[0], dry_run=dry_run)

    def delete_contents(self,
                        *,
                        timeout: float = 2 * 60,
                        polling_delay: float = 1.0):
        """
        Delete all the GEMD objects from within a single Dataset.

        Parameters
        ----------
        timeout: float
            Amount of time to wait on the job (in seconds) before giving up.
            Note that this number has no effect on the underlying job itself,
            which can also time out server-side.

        polling_delay: float
            How long to delay between each polling retry attempt.

        Returns
        -------
        List[Tuple[LinkByUID, ApiError]]
            A list of (LinkByUID, api_error) for each failure to delete an object.
            Note that this method doesn't raise an exception if an object fails to be
            deleted.

        """
        path = 'projects/{project_id}/datasets/{dataset_uid}/contents'.format(
            dataset_uid=self.uid, project_id=self.project_id)

        response = self.session.delete_resource(path)
        job_id = response["job_id"]

        return _poll_for_async_batch_delete_result(self.project_id,
                                                   self.session, job_id,
                                                   timeout, polling_delay)

    def gemd_batch_delete(
            self,
            id_list: List[Union[LinkByUID, UUID, str, BaseEntity]],
            *,
            timeout: float = 2 * 60,
            polling_delay: float = 1.0) -> List[Tuple[LinkByUID, ApiError]]:
        """
        Remove a set of GEMD objects.

        You may provide GEMD objects that reference each other, and the objects
        will be removed in the appropriate order.

        A failure will be returned if the object cannot be deleted due to an external
        reference.

        All data objects must be associated with this dataset resource. You must also
        have write access on this dataset.

        If you wish to delete more than 50 objects, queuing of deletes requires that
        the types of objects be known, and thus you _must_ provide ids in the form
        of BaseEntities.

        Also note that Attribute Templates cannot be deleted at present.

        Parameters
        ----------
        id_list: List[Union[LinkByUID, UUID, str, BaseEntity]]
            A list of the IDs of data objects to be removed. They can be passed
            as a LinkByUID tuple, a UUID, a string, or the object itself. A UUID
            or string is assumed to be a Citrine ID, whereas a LinkByUID or
            BaseEntity can also be used to provide an external ID.

        Returns
        -------
        List[Tuple[LinkByUID, ApiError]]
            A list of (LinkByUID, api_error) for each failure to delete an object.
            Note that this method doesn't raise an exception if an object fails to be
            deleted.

        """
        return _async_gemd_batch_delete(id_list,
                                        self.project_id,
                                        self.session,
                                        self.uid,
                                        timeout=timeout,
                                        polling_delay=polling_delay)
Esempio n. 24
0
class Project(Resource['Project']):
    """
    A Citrine Project.

    A project is a collection of datasets, some of which belong directly to the project
    and some of which have been shared with the project.

    Parameters
    ----------
    name: str
        Name of the project.
    description: str
        Long-form description of the project.
    session: Session, optional
        The Citrine session used to connect to the database.

    Attributes
    ----------
    uid: UUID
        Unique uuid4 identifier of this project.
    status: str
        Status of the project.
    created_at: int
        Time the project was created, in seconds since epoch.

    """

    _response_key = 'project'

    name = properties.String('name')
    description = properties.Optional(properties.String(), 'description')
    uid = properties.Optional(properties.UUID(), 'id')
    status = properties.Optional(properties.String(), 'status')
    created_at = properties.Optional(properties.Datetime(), 'created_at')

    def __init__(self,
                 name: str,
                 description: Optional[str] = None,
                 session: Optional[Session] = Session()):
        self.name: str = name
        self.description: Optional[str] = description
        self.session: Session = session

    def __str__(self):
        return '<Project {!r}>'.format(self.name)

    def _path(self):
        return '/projects/{project_id}'.format(**{"project_id": self.uid})

    @property
    def design_spaces(self) -> DesignSpaceCollection:
        """Return a resource representing all visible design spaces."""
        return DesignSpaceCollection(self.uid, self.session)

    @property
    def processors(self) -> ProcessorCollection:
        """Return a resource representing all visible processors."""
        return ProcessorCollection(self.uid, self.session)

    @property
    def predictors(self) -> PredictorCollection:
        """Return a resource representing all visible predictors."""
        return PredictorCollection(self.uid, self.session)

    @property
    def workflows(self) -> WorkflowCollection:
        """Return a resource representing all visible workflows."""
        return WorkflowCollection(self.uid, self.session)

    @property
    def datasets(self) -> DatasetCollection:
        """Return a resource representing all visible datasets."""
        return DatasetCollection(self.uid, self.session)

    @property
    def tables(self) -> TableCollection:
        """Return a resource representing all visible Tables."""
        return TableCollection(self.uid, self.session)

    @property
    def property_templates(self) -> PropertyTemplateCollection:
        """Return a resource representing all property templates in this dataset."""
        return PropertyTemplateCollection(self.uid, None, self.session)

    @property
    def condition_templates(self) -> ConditionTemplateCollection:
        """Return a resource representing all condition templates in this dataset."""
        return ConditionTemplateCollection(self.uid, None, self.session)

    @property
    def parameter_templates(self) -> ParameterTemplateCollection:
        """Return a resource representing all parameter templates in this dataset."""
        return ParameterTemplateCollection(self.uid, None, self.session)

    @property
    def material_templates(self) -> MaterialTemplateCollection:
        """Return a resource representing all material templates in this dataset."""
        return MaterialTemplateCollection(self.uid, None, self.session)

    @property
    def measurement_templates(self) -> MeasurementTemplateCollection:
        """Return a resource representing all measurement templates in this dataset."""
        return MeasurementTemplateCollection(self.uid, None, self.session)

    @property
    def process_templates(self) -> ProcessTemplateCollection:
        """Return a resource representing all process templates in this dataset."""
        return ProcessTemplateCollection(self.uid, None, self.session)

    @property
    def process_runs(self) -> ProcessRunCollection:
        """Return a resource representing all process runs in this dataset."""
        return ProcessRunCollection(self.uid, None, self.session)

    @property
    def measurement_runs(self) -> MeasurementRunCollection:
        """Return a resource representing all measurement runs in this dataset."""
        return MeasurementRunCollection(self.uid, None, self.session)

    @property
    def material_runs(self) -> MaterialRunCollection:
        """Return a resource representing all material runs in this dataset."""
        return MaterialRunCollection(self.uid, None, self.session)

    @property
    def ingredient_runs(self) -> IngredientRunCollection:
        """Return a resource representing all ingredient runs in this dataset."""
        return IngredientRunCollection(self.uid, None, self.session)

    @property
    def process_specs(self) -> ProcessSpecCollection:
        """Return a resource representing all process specs in this dataset."""
        return ProcessSpecCollection(self.uid, None, self.session)

    @property
    def measurement_specs(self) -> MeasurementSpecCollection:
        """Return a resource representing all measurement specs in this dataset."""
        return MeasurementSpecCollection(self.uid, None, self.session)

    @property
    def material_specs(self) -> MaterialSpecCollection:
        """Return a resource representing all material specs in this dataset."""
        return MaterialSpecCollection(self.uid, None, self.session)

    @property
    def ingredient_specs(self) -> IngredientSpecCollection:
        """Return a resource representing all ingredient specs in this dataset."""
        return IngredientSpecCollection(self.uid, None, self.session)

    def share(self,
              project_id: str,
              resource_type: str,
              resource_id: str) -> Dict[str, str]:
        """Share a resource with another project."""
        return self.session.post_resource(self._path() + "/share", {
            "project_id": project_id,
            "resource": {"type": resource_type, "id": resource_id}
        })

    def make_public(self,
                    resource: Resource) -> bool:
        """
        Grant public access to a resource owned by this project.

        Parameters
        ----------
        resource: Resource
            An instance of a resource owned by this project (e.g. a dataset).

        Returns
        -------
        bool
            True if the action was performed successfully

        """
        self.session.checked_post(self._path() + "/make-public", {
            "resource": resource.as_entity_dict()
        })
        return True

    def make_private(self,
                     resource: Resource) -> bool:
        """
        Remove public access for a resource owned by this project.

        Parameters
        ----------
        resource: Resource
            An instance of a resource owned by this project (e.g. a dataset).

        Returns
        -------
        bool
            True if the action was performed successfully

        """
        self.session.checked_post(self._path() + "/make-private", {
            "resource": resource.as_entity_dict()
        })
        return True

    def list_members(self) -> List[ProjectMember]:
        """
        List all of the members in the current project.

        Returns
        -------
        List[ProjectMember]
            The members of the current project

        """
        members = self.session.get_resource(self._path() + "/users")["users"]
        return [ProjectMember(user=User.build(m), project=self, role=m["role"]) for m in members]

    def update_user_role(self, user_uid: Union[str, UUID], role: ROLES, actions: ACTIONS = []):
        """
        Update a User's role and action permissions in the Project

        Valid roles are MEMBER or LEAD.

        WRITE is the only action available for specification.

        Returns
        -------
        bool
            Returns True if user role successfully updated
        """
        self.session.checked_post(self._path() + "/users/{}".format(user_uid), {'role': role, 'actions': actions})
        return True

    def add_user(self, user_uid: Union[str, UUID]):
        """
        Add a User to a Project

        Adds User with MEMBER role to the Project. Use the update_user_rule method to change a User's role.

        Returns
        -------
        bool
            Returns True if user successfully added
        """
        self.session.checked_post(self._path() + "/users/{}".format(user_uid), {'role': MEMBER, 'actions': []})
        return True

    def remove_user(self, user_uid: Union[str, UUID]) -> bool:
        """
        Remove a User from a Project.

        Returns
        -------
        bool
            Returns True if user successfully removed

        """
        self.session.checked_delete(
            self._path() + "/users/{}".format(user_uid)
        )
        return True
Esempio n. 25
0
class SimpleMLPredictor(Serializable['SimplePredictor'], Predictor):
    """A predictor interface that builds a graphical model connecting the set of inputs through latent variables to the outputs. Supported complex inputs (such as chemical formulas) are auto-featurized and machine learning models are built for each latent variable and output.

    Parameters
    ----------
    name: str
        name of the configuration
    description: str
        the description of the predictor
    inputs: list[Descriptor]
        Descriptors that represent inputs to relations
    outputs: list[Descriptor]
        Descriptors that represent outputs of relations
    latent_variables: list[Descriptor]
        Descriptors that are predicted from inputs and used when predicting the outputs
    training_data: str
        UUID of the table that contains the training data
    """

    uid = properties.Optional(properties.UUID, 'id', serializable=False)
    name = properties.String('config.name')
    description = properties.Optional(properties.String(),
                                      'config.description')
    inputs = properties.List(properties.Object(Descriptor), 'config.inputs')
    outputs = properties.List(properties.Object(Descriptor), 'config.outputs')
    latent_variables = properties.List(properties.Object(Descriptor),
                                       'config.latent_variables')
    training_data = properties.String('config.training_data')
    typ = properties.String('config.type',
                            default='Simple',
                            deserializable=False)
    status = properties.String('status', serializable=False)
    status_info = properties.Optional(properties.List(properties.String()),
                                      'status_info',
                                      serializable=False)

    # NOTE: These could go here or in _post_dump - it's unclear which is better right now
    module_type = properties.String('module_type', default='PREDICTOR')
    schema_id = properties.UUID(
        'schema_id', default=UUID('08d20e5f-e329-4de0-a90a-4b5e36b91703'))

    def __init__(self,
                 name: str,
                 description: str,
                 inputs: List[Descriptor],
                 outputs: List[Descriptor],
                 latent_variables: List[Descriptor],
                 training_data: str,
                 session: Optional[Session] = None,
                 report: Optional[Report] = None):
        self.name: str = name
        self.description: str = description
        self.inputs: List[Descriptor] = inputs
        self.outputs: List[Descriptor] = outputs
        self.latent_variables: List[Descriptor] = latent_variables
        self.training_data: str = training_data
        self.session: Optional[Session] = session
        self.report: Optional[Report] = report

    def _post_dump(self, data: dict) -> dict:
        data['display_name'] = data['config']['name']
        return data

    def __str__(self):
        return '<SimplePredictor {!r}>'.format(self.name)

    def post_build(self, project_id: UUID, data: dict):
        """Creates the predictor report object."""
        self.report = ReportResource(project_id, self.session).get(data['id'])
class WorkflowExecution(Resource['WorkflowExecution'], Pageable):
    """[DEPRECATED] A Citrine Workflow Execution.

    Parameters
    ----------
    uid: str
        Unique identifier of the workflow execution
    project_id: str
        Unique identifier of the project that contains the workflow execution
    workflow_id: str
        Unique identifier of the workflow that was executed
    version_number: int
        Integer identifier that increases each time the workflow is executed.  The first execution
        has version_number = 1.

    """

    _response_key = 'WorkflowExecutions'
    _paginator: Paginator = Paginator()
    _collection_key = 'response'

    uid = properties.UUID('id')
    project_id = properties.UUID('project_id', deserializable=False)
    workflow_id = properties.UUID('workflow_id', deserializable=False)
    version_number = properties.Integer("version_number")

    def __init__(
        self,
        uid: Optional[str] = None,
        project_id: Optional[str] = None,
        workflow_id: Optional[str] = None,
        session: Optional[Session] = None,
        version_number: Optional[int] = None,
    ):
        msg = "{this_class} is deprecated. Please use {dw_replacement} instead for " \
            "Design Workflows and {pew_replacement} for Predictor Evaluation Workflows".format(
                this_class=self.__class__.__name__,
                dw_replacement=DesignExecution.__name__,
                pew_replacement=PredictorEvaluationExecution.__name__)
        warn(msg, category=DeprecationWarning)
        self.uid: str = uid
        self.project_id: str = project_id
        self.workflow_id: str = workflow_id
        self.session: Session = session
        self.version_number = version_number

    def __str__(self):
        return '<WorkflowExecution {!r}>'.format(str(self.uid))

    def _path(self):
        return '/projects/{project_id}/workflows/{workflow_id}/executions/{execution_id}'.format(
            **{
                "project_id": self.project_id,
                "workflow_id": self.workflow_id,
                "execution_id": self.uid
            })

    def status(self):
        """Get the current status of this execution."""
        response = self.session.get_resource(self._path() + "/status")
        return WorkflowExecutionStatus.build(response)

    def results(self):
        """Get the results of this execution."""
        return self.session.get_resource(self._path() + "/results")

    @classmethod
    def _build_candidates(
            cls,
            subset_collection: Iterable[dict]) -> Iterable[DesignCandidate]:
        for candidate in subset_collection:
            yield DesignCandidate.build(candidate)

    def candidates(
        self,
        page: Optional[int] = None,
        per_page: int = 100,
    ) -> Iterable[DesignCandidate]:
        """Fetch the Design Candidates for the particular execution, paginated.

        Gets candidates from the new candidates API for a workflow executed by the old api.
        New candidates are paginated and have structured types.
        """
        path = '/projects/{p_id}/design-workflows/{w_id}/executions/{e_id}/candidates'.format(
            p_id=self.project_id, w_id=self.workflow_id, e_id=self.uid)

        fetcher = partial(self._fetch_page, path=path)

        return self._paginator.paginate(
            page_fetcher=fetcher,
            collection_builder=self._build_candidates,
            page=page,
            per_page=per_page)
Esempio n. 27
0
class Project(Resource['Project']):
    """
    A Citrine Project.

    A project is a collection of datasets, some of which belong directly to the project
    and some of which have been shared with the project.

    Parameters
    ----------
    name: str
        Name of the project.
    description: str
        Long-form description of the project.
    session: Session, optional
        The Citrine session used to connect to the database.

    Attributes
    ----------
    uid: UUID
        Unique uuid4 identifier of this project.
    status: str
        Status of the project.
    created_at: int
        Time the project was created, in seconds since epoch.

    """

    _response_key = 'project'
    _resource_type = ResourceTypeEnum.PROJECT

    name = properties.String('name')
    description = properties.Optional(properties.String(), 'description')
    uid = properties.Optional(properties.UUID(), 'id')
    status = properties.Optional(properties.String(), 'status')
    created_at = properties.Optional(properties.Datetime(), 'created_at')

    def __init__(self,
                 name: str,
                 description: Optional[str] = None,
                 session: Optional[Session] = Session()):
        self.name: str = name
        self.description: Optional[str] = description
        self.session: Session = session

    def __str__(self):
        return '<Project {!r}>'.format(self.name)

    def _path(self):
        return '/projects/{project_id}'.format(**{"project_id": self.uid})

    @property
    def modules(self) -> ModuleCollection:
        """Return a resource representing all visible design spaces."""
        return ModuleCollection(self.uid, self.session)

    @property
    def design_spaces(self) -> DesignSpaceCollection:
        """Return a resource representing all visible design spaces."""
        return DesignSpaceCollection(self.uid, self.session)

    @property
    def processors(self) -> ProcessorCollection:
        """Return a resource representing all visible processors."""
        return ProcessorCollection(self.uid, self.session)

    @property
    def predictors(self) -> PredictorCollection:
        """Return a resource representing all visible predictors."""
        return PredictorCollection(self.uid, self.session)

    @property
    def descriptors(self) -> DescriptorMethods:
        """Return a resource containing a set of methods returning descriptors."""
        return DescriptorMethods(self.uid, self.session)

    @property
    @deprecated(
        deprecated_in="0.101.0",
        details="Use design_workflows or predictor_evaluation_workflows instead"
    )
    def workflows(self) -> WorkflowCollection:
        """Return a resource representing all visible workflows."""
        return WorkflowCollection(self.uid, self.session)

    @property
    def predictor_evaluation_workflows(
            self) -> PredictorEvaluationWorkflowCollection:
        """Return a collection representing all visible predictor evaluation workflows."""
        return PredictorEvaluationWorkflowCollection(self.uid, self.session)

    @property
    def predictor_evaluation_executions(
            self) -> PredictorEvaluationExecutionCollection:
        """Return a collection representing all visible predictor evaluation executions."""
        return PredictorEvaluationExecutionCollection(project_id=self.uid,
                                                      session=self.session)

    @property
    def design_workflows(self) -> DesignWorkflowCollection:
        """Return a collection representing all visible design workflows."""
        return DesignWorkflowCollection(self.uid, self.session)

    @property
    def datasets(self) -> DatasetCollection:
        """Return a resource representing all visible datasets."""
        return DatasetCollection(self.uid, self.session)

    @property
    def tables(self) -> GemTableCollection:
        """Return a resource representing all visible Tables."""
        return GemTableCollection(self.uid, self.session)

    @property
    def property_templates(self) -> PropertyTemplateCollection:
        """Return a resource representing all property templates in this dataset."""
        return PropertyTemplateCollection(self.uid, None, self.session)

    @property
    def condition_templates(self) -> ConditionTemplateCollection:
        """Return a resource representing all condition templates in this dataset."""
        return ConditionTemplateCollection(self.uid, None, self.session)

    @property
    def parameter_templates(self) -> ParameterTemplateCollection:
        """Return a resource representing all parameter templates in this dataset."""
        return ParameterTemplateCollection(self.uid, None, self.session)

    @property
    def material_templates(self) -> MaterialTemplateCollection:
        """Return a resource representing all material templates in this dataset."""
        return MaterialTemplateCollection(self.uid, None, self.session)

    @property
    def measurement_templates(self) -> MeasurementTemplateCollection:
        """Return a resource representing all measurement templates in this dataset."""
        return MeasurementTemplateCollection(self.uid, None, self.session)

    @property
    def process_templates(self) -> ProcessTemplateCollection:
        """Return a resource representing all process templates in this dataset."""
        return ProcessTemplateCollection(self.uid, None, self.session)

    @property
    def process_runs(self) -> ProcessRunCollection:
        """Return a resource representing all process runs in this dataset."""
        return ProcessRunCollection(self.uid, None, self.session)

    @property
    def measurement_runs(self) -> MeasurementRunCollection:
        """Return a resource representing all measurement runs in this dataset."""
        return MeasurementRunCollection(self.uid, None, self.session)

    @property
    def material_runs(self) -> MaterialRunCollection:
        """Return a resource representing all material runs in this dataset."""
        return MaterialRunCollection(self.uid, None, self.session)

    @property
    def ingredient_runs(self) -> IngredientRunCollection:
        """Return a resource representing all ingredient runs in this dataset."""
        return IngredientRunCollection(self.uid, None, self.session)

    @property
    def process_specs(self) -> ProcessSpecCollection:
        """Return a resource representing all process specs in this dataset."""
        return ProcessSpecCollection(self.uid, None, self.session)

    @property
    def measurement_specs(self) -> MeasurementSpecCollection:
        """Return a resource representing all measurement specs in this dataset."""
        return MeasurementSpecCollection(self.uid, None, self.session)

    @property
    def material_specs(self) -> MaterialSpecCollection:
        """Return a resource representing all material specs in this dataset."""
        return MaterialSpecCollection(self.uid, None, self.session)

    @property
    def ingredient_specs(self) -> IngredientSpecCollection:
        """Return a resource representing all ingredient specs in this dataset."""
        return IngredientSpecCollection(self.uid, None, self.session)

    @property
    def table_configs(self) -> TableConfigCollection:
        """Return a resource representing all Table Configs in the project."""
        return TableConfigCollection(self.uid, self.session)

    @property
    @deprecated(deprecated_in="0.52.2", details="Use table_configs instead")
    def ara_definitions(self) -> TableConfigCollection:  # pragma: no cover
        """[DEPRECATED] Use table_configs instead."""
        from warnings import warn
        warn(
            "ara_definitions is deprecated and will soon be removed. "
            "Please call table_configs instead.", DeprecationWarning)
        return self.table_configs

    def share(self, project_id: str, resource_type: str,
              resource_id: str) -> Dict[str, str]:
        """Share a resource with another project."""
        return self.session.post_resource(
            self._path() + "/share", {
                "project_id": project_id,
                "resource": {
                    "type": resource_type,
                    "id": resource_id
                }
            })

    def transfer_resource(self, resource: Resource,
                          receiving_project_uid: Union[str, UUID]) -> bool:
        """
        Transfer ownership of a resource.

        The new owner of the the supplied resource becomes the project
        with ``uid == receiving_project_uid``.

        Parameters
        ----------
        resource: Resource
            The resource owned by this project, which will get transferred to
            the project with ``uid == receiving_project_uid``.
        receiving_project_uid: Union[string, UUID]
            The uid of the project to which the resource will be transferred.

        Returns
        -------
        bool
            Returns ``True`` upon successful resource transfer.

        """
        try:
            self.session.checked_post(
                self._path() + "/transfer-resource", {
                    "to_project_id": str(receiving_project_uid),
                    "resource": resource.as_entity_dict()
                })
        except AttributeError:  # If _resource_type is not implemented
            raise RuntimeError(
                f"Resource of type  {resource.__class__.__name__} "
                f"cannot be made transferred")

        return True

    def make_public(self, resource: Resource) -> bool:
        """
        Grant public access to a resource owned by this project.

        Parameters
        ----------
        resource: Resource
            An instance of a resource owned by this project (e.g., a dataset).

        Returns
        -------
        bool
            ``True`` if the action was performed successfully

        """
        try:
            self.session.checked_post(self._path() + "/make-public",
                                      {"resource": resource.as_entity_dict()})
        except AttributeError:  # If _resource_type is not implemented
            raise RuntimeError(
                f"Resource of type  {resource.__class__.__name__} "
                f"cannot be made public")
        return True

    def make_private(self, resource: Resource) -> bool:
        """
        Remove public access for a resource owned by this project.

        Parameters
        ----------
        resource: Resource
            An instance of a resource owned by this project (e.g., a dataset).

        Returns
        -------
        bool
            ``True`` if the action was performed successfully

        """
        try:
            self.session.checked_post(self._path() + "/make-private",
                                      {"resource": resource.as_entity_dict()})
        except AttributeError:  # If _resource_type is not implemented
            raise RuntimeError(
                f"Resource of type  {resource.__class__.__name__} "
                f"cannot be made private")
        return True

    def creator(self) -> str:
        """
        Return the creator of this project.

        Returns
        -------
        str
            The email of the creator of this resource.

        """
        email = self.session.get_resource(self._path() + "/creator")["email"]
        return email

    def owned_dataset_ids(self) -> List[str]:
        """
        List all the ids of the datasets owned by the current project.

        Returns
        -------
        List[str]
            The ids of the modules owned by current project

        """
        dataset_ids = self.session.get_resource(self._path() +
                                                "/dataset_ids")["dataset_ids"]
        return dataset_ids

    def owned_table_ids(self) -> List[str]:
        """
        List all the ids of the tables owned by the current project.

        Returns
        -------
        List[str]
            The ids of the tables owned by current project

        """
        table_ids = self.session.get_resource(self._path() +
                                              "/table_ids")["table_ids"]
        return table_ids

    def owned_table_config_ids(self) -> List[str]:
        """
        List all the ids of the table configs owned by the current project.

        Returns
        -------
        List[str]
            The ids of the table configs owned by current project

        """
        result = self.session.get_resource(self._path() +
                                           "/table_definition_ids")
        return result["table_definition_ids"]

    def list_members(self) -> List[ProjectMember]:
        """
        List all of the members in the current project.

        Returns
        -------
        List[ProjectMember]
            The members of the current project

        """
        members = self.session.get_resource(self._path() + "/users")["users"]
        return [
            ProjectMember(user=User.build(m), project=self, role=m["role"])
            for m in members
        ]

    def update_user_role(self,
                         user_uid: Union[str, UUID],
                         role: ROLES,
                         actions: ACTIONS = []):
        """
        Update a User's role and action permissions in the Project.

        Valid roles are ``MEMBER`` or ``LEAD``.

        ``WRITE`` is the only action available for specification.

        Returns
        -------
        bool
            Returns ``True`` if user role successfully updated

        """
        self.session.checked_post(self._path() + "/users/{}".format(user_uid),
                                  {
                                      'role': role,
                                      'actions': actions
                                  })
        return True

    def add_user(self, user_uid: Union[str, UUID]):
        """
        Add a User to a Project.

        Adds User with ``MEMBER`` role to the Project.
        Use the ``update_user_rule`` method to change a User's role.

        Returns
        -------
        bool
            Returns ``True`` if user successfully added

        """
        self.session.checked_post(self._path() + "/users/{}".format(user_uid),
                                  {
                                      'role': MEMBER,
                                      'actions': []
                                  })
        return True

    def remove_user(self, user_uid: Union[str, UUID]) -> bool:
        """
        Remove a User from a Project.

        Returns
        -------
        bool
            Returns ``True`` if user successfully removed

        """
        self.session.checked_delete(self._path() +
                                    "/users/{}".format(user_uid))
        return True

    def gemd_batch_delete(
            self,
            id_list: List[Union[LinkByUID, UUID, str, BaseEntity]],
            *,
            timeout: float = 2 * 60,
            polling_delay: float = 1.0) -> List[Tuple[LinkByUID, ApiError]]:
        """
        Remove a set of GEMD objects.

        You may provide GEMD objects that reference each other, and the objects
        will be removed in the appropriate order.

        A failure will be returned if the object cannot be deleted due to an external
        reference.

        You must have Write access on the associated datasets for each object.

        Parameters
        ----------
        id_list: List[Union[LinkByUID, UUID, str, BaseEntity]]
            A list of the IDs of data objects to be removed. They can be passed
            as a LinkByUID tuple, a UUID, a string, or the object itself. A UUID
            or string is assumed to be a Citrine ID, whereas a LinkByUID or
            BaseEntity can also be used to provide an external ID.

        Returns
        -------
        List[Tuple[LinkByUID, ApiError]]
            A list of (LinkByUID, api_error) for each failure to delete an object.
            Note that this method doesn't raise an exception if an object fails to be
            deleted.

        """
        return _async_gemd_batch_delete(id_list,
                                        self.uid,
                                        self.session,
                                        None,
                                        timeout=timeout,
                                        polling_delay=polling_delay)
Esempio n. 28
0
class DesignWorkflow(Resource['DesignWorkflow'], Workflow):
    """[ALPHA] Object that generates scored materials that may approach higher values of the score.

    Parameters
    ----------
    name: str
        the name of the workflow
    design_space_id: UUID
        the UUID corresponding to the design space to use
    processor_id: Optional[UUID]
        the UUID corresponding to the processor to use
        if none is provided, one matching your design space will be automatically generated
    predictor_id: UUID
        the UUID corresponding to the predictor to use
    project_id: UUID
        the UUID corresponding to the project to use

    """

    uid = properties.Optional(properties.UUID, 'id', serializable=False)
    name = properties.String('display_name')
    status = properties.String('status', serializable=False)
    status_info = properties.Optional(properties.List(properties.String()),
                                      'status_info',
                                      serializable=False)
    experimental = properties.Boolean("experimental",
                                      serializable=False,
                                      default=True)
    experimental_reasons = properties.Optional(properties.List(
        properties.String()),
                                               'experimental_reasons',
                                               serializable=False)
    archived = properties.Boolean('archived', default=False)
    created_by = properties.Optional(properties.UUID,
                                     'created_by',
                                     serializable=False)
    create_time = properties.Optional(properties.Datetime,
                                      'create_time',
                                      serializable=False)
    design_space_id = properties.UUID('config.design_space_id')
    processor_id = properties.Optional(properties.UUID, 'config.processor_id')
    predictor_id = properties.UUID('config.predictor_id')
    module_type = properties.String('module_type', default='DESIGN_WORKFLOW')
    schema_id = properties.UUID(
        'schema_id', default=UUID('8af8b007-3e81-4185-82b2-6f62f4a2e6f1'))

    def __init__(self,
                 name: str,
                 design_space_id: UUID,
                 processor_id: Optional[UUID],
                 predictor_id: UUID,
                 project_id: Optional[UUID] = None,
                 session: Session = Session()):
        self.name = name
        self.design_space_id = design_space_id
        self.processor_id = processor_id
        self.predictor_id = predictor_id
        self.project_id = project_id
        self.session = session

    def __str__(self):
        return '<DesignWorkflow {!r}>'.format(self.name)

    @property
    def executions(self) -> WorkflowExecutionCollection:
        """Return a resource representing all visible executions of this workflow."""
        if getattr(self, 'project_id', None) is None:
            raise AttributeError(
                'Cannot initialize execution without project reference!')
        return WorkflowExecutionCollection(self.project_id, self.uid,
                                           self.session)
Esempio n. 29
0
class PerformanceWorkflow(Resource['PerformanceWorkflow'], Workflow):
    """[ALPHA] Object that executes performance analysis on a given module.

    Parameters
    ----------
    name: str
        the name of the workflow
    analysis: CrossValidationAnalysisConfiguration
        the configuration object

    """

    uid = properties.Optional(properties.UUID, 'id', serializable=False)
    name = properties.String('display_name')
    status = properties.String('status', serializable=False)
    status_info = properties.Optional(properties.List(properties.String()),
                                      'status_info',
                                      serializable=False)
    experimental = properties.Boolean("experimental",
                                      serializable=False,
                                      default=True)
    experimental_reasons = properties.Optional(properties.List(
        properties.String()),
                                               'experimental_reasons',
                                               serializable=False)
    archived = properties.Boolean('archived', default=False)
    created_by = properties.Optional(properties.UUID,
                                     'created_by',
                                     serializable=False)
    create_time = properties.Optional(properties.Datetime,
                                      'create_time',
                                      serializable=False)
    analysis = properties.Object(CrossValidationAnalysisConfiguration,
                                 'config.analysis')
    module_type = properties.String('module_type',
                                    default='PERFORMANCE_WORKFLOW')
    schema_id = properties.UUID(
        'schema_id', default=UUID('1d213f0a-d07c-4f70-a4d0-bda3aa951ee0'))
    typ = properties.String('config.type',
                            default='PerformanceWorkflow',
                            deserializable=False)

    def __init__(self,
                 name: str,
                 analysis: CrossValidationAnalysisConfiguration,
                 project_id: Optional[UUID] = None,
                 session: Session = Session()):
        self.name = name
        self.analysis = analysis
        self.project_id = project_id
        self.session = session

    def __str__(self):
        return '<PerformanceWorkflow {!r}>'.format(self.name)

    @property
    def executions(self) -> WorkflowExecutionCollection:
        """Return a resource representing all visible executions of this workflow."""
        if getattr(self, 'project_id', None) is None:
            raise AttributeError(
                'Cannot initialize execution without project reference!')
        return WorkflowExecutionCollection(self.project_id, self.uid,
                                           self.session)
class TableConfig(Resource["TableConfig"]):
    """
    [ALPHA] The Table Configuration used to build GEM Tables.

    Parameters
    ----------
    name: str
        Name of the Table Configuration
    description: str
        Description of the Table Configuration
    datasets: list[UUID]
        Datasets that are in scope for the table, as a list of dataset uuids
    variables: list[Variable]
        Variable definitions, which define data from the material histories to use in the columns
    rows: list[Row]
        List of row definitions that define the rows of the table
    columns: list[Column]
        Column definitions, which describe how the variables are shaped into the table

    """

    # FIXME (DML): rename this (this is dependent on the server side)
    _response_key = "ara_definition"
    _resource_type = ResourceTypeEnum.TABLE_DEFINITION

    @staticmethod
    def _get_dups(lst: List) -> List:
        # Hmmn, this looks like a potentially costly operation?!
        return [x for x in lst if lst.count(x) > 1]

    config_uid = properties.Optional(properties.UUID(), 'definition_id')
    version_uid = properties.Optional(properties.UUID(), 'id')
    version_number = properties.Optional(properties.Integer, 'version_number')
    name = properties.String("name")
    description = properties.String("description")
    datasets = properties.List(properties.UUID, "datasets")
    variables = properties.List(properties.Object(Variable), "variables")
    rows = properties.List(properties.Object(Row), "rows")
    columns = properties.List(properties.Object(Column), "columns")

    # Provide some backwards compatible support for definition_uid, redirecting to config_uid
    @property
    def definition_uid(self):
        """[[DEPRECATED]] This is a deprecated alias to config_uid. Please use that instead."""
        from warnings import warn
        warn(
            "definition_uid is deprecated and will soon be removed. "
            "Please use config_uid instead", DeprecationWarning)
        return self.config_uid

    @definition_uid.setter
    def definition_uid(self, value):  # pragma: no cover
        """[[DEPRECATED]] This is a deprecated alias to config_uid. Please use that instead."""
        from warnings import warn
        warn(
            "definition_uid is deprecated and will soon be removed. "
            "Please use config_uid instead", DeprecationWarning)
        self.config_uid = value

    def __init__(self,
                 *,
                 name: str,
                 description: str,
                 datasets: List[UUID],
                 variables: List[Variable],
                 rows: List[Row],
                 columns: List[Column],
                 version_uid: Optional[UUID] = None,
                 version_number: Optional[int] = None,
                 definition_uid: Optional[UUID] = None,
                 config_uid: Optional[UUID] = None):
        self.name = name
        self.description = description
        self.datasets = datasets
        self.rows = rows
        self.variables = variables
        self.columns = columns
        self.version_uid = version_uid
        self.version_number = version_number

        if config_uid is not None:
            assert definition_uid is None, "Please supply config_uid " \
                                           "instead of definition_uid, and not both"
            self.config_uid = config_uid
        else:
            self.config_uid = definition_uid

        # Note that these validations only apply at construction time. The current intended usage
        # is for this object to be created holistically; if changed, then these will need
        # to move into setters.
        names = [x.name for x in variables]
        dup_names = self._get_dups(names)
        if len(dup_names) > 0:
            raise ValueError("Multiple variables defined these names,"
                             " which much be unique: {}".format(dup_names))
        headers = [x.headers for x in variables]
        dup_headers = self._get_dups(headers)
        if len(dup_headers) > 0:
            raise ValueError("Multiple variables defined these headers,"
                             " which much be unique: {}".format(dup_headers))

        missing_variables = [
            x.data_source for x in columns if x.data_source not in names
        ]
        if len(missing_variables) > 0:
            raise ValueError(
                "The data_source of the columns must match one of the variable names,"
                " but {} were missing".format(missing_variables))

    def add_columns(self,
                    *,
                    variable: Variable,
                    columns: List[Column],
                    name: Optional[str] = None,
                    description: Optional[str] = None) -> 'TableConfig':
        """[ALPHA] Add a variable and one or more columns to this TableConfig (out-of-place).

        This method checks that the variable name is not already in use and that the columns
        only reference that variable.  It is *not* able to check if the columns and the variable
        are compatible (yet, at least).

        Parameters
        ----------
        variable: Variable
            Variable to add and use in the added columns
        columns: list[Column]
            Columns to add, which must only reference the added variable
        name: Optional[str]
            Optional renaming of the table
        description: Optional[str]
            Optional re-description of the table

        """
        if variable.name in [x.name for x in self.variables]:
            raise ValueError("The variable name {} is already used".format(
                variable.name))

        mismatched_data_source = [
            x for x in columns if x.data_source != variable.name
        ]
        if len(mismatched_data_source):
            raise ValueError(
                "Column.data_source must be {} but found {}".format(
                    variable.name, mismatched_data_source))

        return TableConfig(name=name or self.name,
                           description=description or self.description,
                           datasets=copy(self.datasets),
                           rows=copy(self.rows),
                           variables=copy(self.variables) + [variable],
                           columns=copy(self.columns) + columns,
                           config_uid=copy(self.config_uid))

    def add_all_ingredients(self,
                            *,
                            process_template: LinkByUID,
                            project,
                            quantity_dimension: IngredientQuantityDimension,
                            scope: str = CITRINE_SCOPE,
                            unit: Optional[str] = None):
        """[ALPHA] Add variables and columns for all of the possible ingredients in a process.

        For each allowed ingredient name in the process template there is a column for the id of
        the ingredient and a column for the quantity of the ingredient. If the quantities are
        given in absolute amounts then there is also a column for units.

        Parameters
        ------------
        process_template: LinkByUID
            scope and id of a registered process template
        project: Project
            a project that has access to the process template
        quantity_dimension: IngredientQuantityDimension
            the dimension in which to report ingredient quantities
        scope: Optional[str]
            the scope for which to get ingredient ids (default is Citrine scope, 'id')
        unit: Optional[str]
            the units for the quantity, if selecting Absolute Quantity

        """
        dimension_display = {
            IngredientQuantityDimension.ABSOLUTE: "absolute quantity",
            IngredientQuantityDimension.MASS: "mass fraction",
            IngredientQuantityDimension.VOLUME: "volume fraction",
            IngredientQuantityDimension.NUMBER: "number fraction"
        }
        process: ProcessTemplate = project.process_templates.get(
            uid=process_template.id, scope=process_template.scope)
        if not process.allowed_names:
            raise RuntimeError(
                "Cannot add ingredients for process template \'{}\' because it has no defined "
                "ingredients (allowed_names is not defined).".format(
                    process.name))

        new_variables = []
        new_columns = []
        for name in process.allowed_names:
            identifier_variable = IngredientIdentifierByProcessTemplateAndName(
                name='_'.join([
                    process.name, name,
                    str(hash(process_template.id + name + scope))
                ]),
                headers=[process.name, name, scope],
                process_template=process_template,
                ingredient_name=name,
                scope=scope)
            quantity_variable = IngredientQuantityByProcessAndName(
                name='_'.join([
                    process.name, name,
                    str(
                        hash(process_template.id + name +
                             dimension_display[quantity_dimension]))
                ]),
                headers=[
                    process.name, name, dimension_display[quantity_dimension]
                ],
                process_template=process_template,
                ingredient_name=name,
                quantity_dimension=quantity_dimension,
                unit=unit)

            if identifier_variable.name not in [
                    var.name for var in self.variables
            ]:
                new_variables.append(identifier_variable)
                new_columns.append(
                    IdentityColumn(data_source=identifier_variable.name))
            new_variables.append(quantity_variable)
            new_columns.append(MeanColumn(data_source=quantity_variable.name))
            if quantity_dimension == IngredientQuantityDimension.ABSOLUTE:
                new_columns.append(
                    OriginalUnitsColumn(data_source=quantity_variable.name))

        return TableConfig(name=self.name,
                           description=self.description,
                           datasets=copy(self.datasets),
                           rows=copy(self.rows),
                           variables=copy(self.variables) + new_variables,
                           columns=copy(self.columns) + new_columns,
                           config_uid=copy(self.config_uid))