class DesignWorkflow(Resource['DesignWorkflow'], Workflow): """Object that generates scored materials that may approach higher values of the score. Parameters ---------- name: str the name of the workflow design_space_id: UUID the UUID corresponding to the design space to use processor_id: UUID the UUID corresponding to the processor to use predictor_id: UUID the UUID corresponding to the predictor to use project_id: UUID the UUID corresponding to the project to use """ uid = properties.Optional(properties.UUID, 'id', serializable=False) name = properties.String('display_name') status = properties.String('status', serializable=False) status_info = properties.Optional(properties.List(properties.String()), 'status_info', serializable=False) # TODO: Figure out how to make these fields richer/use actual objects design_space_id = properties.UUID('modules.design_space_id') processor_id = properties.UUID('modules.processor_id') predictor_id = properties.UUID('modules.predictor_id') # The project_id is used to keep a reference to the project under which the workflow was # created. It is currently unclear if this is the best way to do this. Another option might # be to have all objects have a context object, but that also seems to have downsides. def __init__(self, name: str, design_space_id: UUID, processor_id: UUID, predictor_id: UUID, project_id: Optional[UUID] = None, session: Session = Session()): self.name = name self.design_space_id = design_space_id self.processor_id = processor_id self.predictor_id = predictor_id self.project_id = project_id self.session = session def __str__(self): return '<DesignWorkflow {!r}>'.format(self.name) @property def executions(self) -> WorkflowExecutionCollection: """Return a resource representing all visible executions of this workflow.""" if getattr(self, 'project_id', None) is None: raise AttributeError( 'Cannot initialize execution without project reference!') return WorkflowExecutionCollection(self.project_id, self.uid, self.session)
class WorkflowExecution(Resource['WorkflowExecution']): """[ALPHA] A Citrine Workflow Execution. Parameters ---------- uid: str Unique identifier of the workflow execution project_id: str Unique identifier of the project that contains the workflow execution workflow_id: str Unique identifier of the workflow that was executed version_number: int Integer identifier that increases each time the workflow is executed. The first execution has version_number = 1. """ _response_key = 'WorkflowExecutions' uid = properties.UUID('id') project_id = properties.UUID('project_id', deserializable=False) workflow_id = properties.UUID('workflow_id', deserializable=False) version_number = properties.Integer("version_number") def __init__( self, uid: Optional[str] = None, project_id: Optional[str] = None, workflow_id: Optional[str] = None, session: Optional[Session] = None, version_number: Optional[int] = None, ): self.uid: str = uid self.project_id: str = project_id self.workflow_id: str = workflow_id self.session: Session = session self.version_number = version_number def __str__(self): return '<WorkflowExecution {!r}>'.format(str(self.uid)) def _path(self): return '/projects/{project_id}/workflows/{workflow_id}/executions/{execution_id}'.format( **{ "project_id": self.project_id, "workflow_id": self.workflow_id, "execution_id": self.uid }) def status(self): """Get the current status of this execution.""" response = self.session.get_resource(self._path() + "/status") return WorkflowExecutionStatus.build(response) def results(self): """Get the results of this execution.""" return self.session.get_resource(self._path() + "/results")
class GemTableDataSource(Serializable['GemTableDataSource'], DataSource): """[ALPHA] A data source based on a GEM Table hosted on the data platform. Parameters ---------- table_id: UUID Unique identifier for the GEM Table table_version: Union[str,int] Version number for the GEM Table, which starts at 1 rather than 0. Strings are cast to ints. formulation_descriptor: Optional[FormulationDescriptor] Optional descriptor used to store formulations emitted by the data source. """ typ = properties.String('type', default='hosted_table_data_source', deserializable=False) table_id = properties.UUID("table_id") table_version = properties.Integer("table_version") formulation_descriptor = properties.Optional( properties.Object(FormulationDescriptor), "formulation_descriptor") def _attrs(self) -> List[str]: return ["table_id", "table_version", "typ"] def __init__( self, table_id: UUID, table_version: Union[int, str], formulation_descriptor: Optional[FormulationDescriptor] = None): self.table_id: UUID = table_id self.table_version: Union[int, str] = table_version self.formulation_descriptor: Optional[ FormulationDescriptor] = formulation_descriptor
class EnumeratedDimension(Serializable['EnumeratedDimension'], Dimension): """Finite enumerated dimension that is defined by a template ID, material descriptor, and a list of values for that descriptor. Parameters ---------- descriptor: Descriptor a descriptor of the single dimension template_id: UUID UUID that corresponds to the template in DC values: list[str] list of values that can be parsed by the descriptor """ descriptor = properties.Object(Descriptor, 'descriptor') values = properties.List(properties.String(), 'list') typ = properties.String('type', default='EnumeratedDimension', deserializable=False) template_id = properties.UUID('template_id', default=uuid4()) def __init__(self, descriptor: Descriptor, values: List[str], template_id: Optional[UUID] = None): self.descriptor: Descriptor = descriptor self.values: List[str] = values self.template_id: UUID = template_id or uuid4()
class ContinuousDimension(Serializable['ContinuousDimension'], Dimension): """Continuous dimension that is defined by a template ID, material descriptor, lower bound, and upper bound. Parameters ---------- descriptor: RealDescriptor a descriptor of the single dimension lower_bound: float inclusive lower bound upper_bound: float inclusive upper bound template_id: UUID UUID that corresponds to the template in DC """ descriptor = properties.Object(RealDescriptor, 'descriptor') lower_bound = properties.Float('lower_bound') upper_bound = properties.Float('upper_bound') typ = properties.String('type', default='ContinuousDimension', deserializable=False) template_id = properties.UUID('template_id', default=uuid4()) def __init__(self, descriptor: RealDescriptor, lower_bound: Optional[float] = None, upper_bound: Optional[float] = None, template_id: Optional[UUID] = None): self.descriptor: RealDescriptor = descriptor self.lower_bound: float = lower_bound or descriptor.lower_bound self.upper_bound: float = upper_bound or descriptor.upper_bound self.template_id: UUID = template_id or uuid4()
class ModuleRef(Serializable['ModuleRef']): """[ALPHA] A reference to a Module by UID.""" module_uid = properties.UUID('module_uid') def __init__(self, module_uid: str): self.module_uid = module_uid
class EnumeratedDesignSpace(Resource['EnumeratedDesignSpace'], DesignSpace): """Design space composed of an explicit enumeration of candidate materials to score. Note that every candidate must have exactly the descriptors in the list populated (no more, no less) to be included. Parameters ---------- name:str the name of the design space description:str the description of the design space descriptors: list[Descriptor] the list of descriptors included in the candidates of the design space data: list[dict] list of dicts of the shape `{<descriptor_key>: <descriptor_value>}` where each dict corresponds to a candidate in the design space """ _response_key = None uid = properties.Optional(properties.UUID, 'id', serializable=False) name = properties.String('config.name') description = properties.Optional(properties.String(), 'config.description') descriptors = properties.List(properties.Object(Descriptor), 'config.descriptors') data = properties.List( properties.Mapping(properties.String, properties.Raw), 'config.data') typ = properties.String('config.type', default='EnumeratedDesignSpace', deserializable=False) status = properties.String('status', serializable=False) status_info = properties.Optional(properties.List(properties.String()), 'status_info', serializable=False) # NOTE: These could go here or in _post_dump - it's unclear which is better right now module_type = properties.String('module_type', default='DESIGN_SPACE') schema_id = properties.UUID( 'schema_id', default=UUID('f3907a58-aa46-462c-8837-a5aa9605e79e')) def __init__(self, name: str, description: str, descriptors: List[Descriptor], data: List[Mapping[str, Any]], session: Session = Session()): self.name: str = name self.description: str = description self.descriptors: List[Descriptor] = descriptors self.data: List[Mapping[str, Any]] = data self.session: Session = session def _post_dump(self, data: dict) -> dict: data['display_name'] = data['config']['name'] return data def __str__(self): return '<EnumeratedDesignSpace {!r}>'.format(self.name)
class ModelSummary(Serializable['ModelSummary']): """[ALPHA] Summary of information about a single model in a predictor. ModelSummary objects are constructed from saved models and should not be user-instantiated. Parameters ---------- name: str the name of the model type_: str the type of the model (e.g., "ML Model", "Featurizer", etc.) inputs: List[Descriptor] list of input descriptors outputs: List[Descriptor] list of output descriptors model_settings: dict settings of the model, as a dictionary (details depend on model type) feature_importances: List[FeatureImportanceReport] list of feature importance reports, one for each output predictor_name: str the name of the predictor that created this model predictor_uid: Optional[uuid] the uid of the predictor that created this model """ name = properties.String('name') type_ = properties.String('type') inputs = properties.List(properties.String(), 'inputs') outputs = properties.List(properties.String(), 'outputs') model_settings = properties.Raw('model_settings') feature_importances = properties.List( properties.Object(FeatureImportanceReport), 'feature_importances') predictor_name = properties.String('predictor_configuration_name', default='') predictor_uid = properties.Optional(properties.UUID(), 'predictor_configuration_uid') def __init__(self, name: str, type_: str, inputs: List[Descriptor], outputs: List[Descriptor], model_settings: Dict[str, Any], feature_importances: List[FeatureImportanceReport], predictor_name: str, predictor_uid: Optional[UUID] = None): self.name = name self.type_ = type_ self.inputs = inputs self.outputs = outputs self.model_settings = model_settings self.feature_importances = feature_importances self.predictor_name = predictor_name self.predictor_uid = predictor_uid def __str__(self): return '<ModelSummary {!r}>'.format(self.name)
class MonteCarloProcessor(Serializable['GridProcessor'], Processor): """[ALPHA] Using a Monte Carlo optimizer to search for the best candidate. The moves that the MonteCarlo optimizer makes are inferred from the descriptors in the design space. Parameters ---------- name: str name of the processor description: str description of the processor """ uid = properties.Optional(properties.UUID, 'id', serializable=False) name = properties.String('config.name') description = properties.Optional(properties.String(), 'config.description') typ = properties.String('config.type', default='ContinuousSearch', deserializable=False) status = properties.String('status', serializable=False) status_info = properties.Optional(properties.List(properties.String()), 'status_info', serializable=False) experimental = properties.Boolean("experimental", serializable=False, default=True) experimental_reasons = properties.Optional(properties.List( properties.String()), 'experimental_reasons', serializable=False) # NOTE: These could go here or in _post_dump - it's unclear which is better right now module_type = properties.String('module_type', default='PROCESSOR') schema_id = properties.UUID( 'schema_id', default=UUID('d8ddfe73-10f7-4456-9de9-9a1638bae403')) def _attrs(self) -> List[str]: return ["name", "description", "typ"] def __init__(self, name: str, description: str, session: Optional[Session] = None): self.name: str = name self.description: str = description self.session: Optional[Session] = session def _post_dump(self, data: dict) -> dict: data['display_name'] = data['config']['name'] return data def __str__(self): return '<MonteCarloProcessor {!r}>'.format(self.name)
class JobSubmissionResponse(Resource['AraJobStatus']): """[ALPHA] a response to a submit-job request for the job submission framework. This is returned as a successful response from the remote service. """ job_id = properties.UUID("job_id") """:UUID: job id of the job submission request""" def __init__(self, job_id: UUID): self.job_id = job_id
class ProductDesignSpace(Resource['ProductDesignSpace'], DesignSpace): """[ALPHA] An outer product of univariate dimensions, either continuous or enumerated. Parameters ---------- name:str the name of the design space description:str the description of the design space dimensions: list[Dimension] univariate dimensions that are factors of the design space; can be enumerated or continuous """ _response_key = None uid = properties.Optional(properties.UUID, 'id', serializable=False) name = properties.String('config.name') description = properties.Optional(properties.String(), 'config.description') dimensions = properties.List(properties.Object(Dimension), 'config.dimensions') typ = properties.String('config.type', default='Univariate', deserializable=False) status = properties.String('status', serializable=False) status_info = properties.Optional( properties.List(properties.String()), 'status_info', serializable=False ) archived = properties.Boolean('archived', default=False) experimental = properties.Boolean("experimental", serializable=False, default=True) experimental_reasons = properties.Optional( properties.List(properties.String()), 'experimental_reasons', serializable=False ) # NOTE: These could go here or in _post_dump - it's unclear which is better right now module_type = properties.String('module_type', default='DESIGN_SPACE') schema_id = properties.UUID('schema_id', default=UUID('6c16d694-d015-42a7-b462-8ef299473c9a')) def __init__(self, name: str, description: str, dimensions: List[Dimension], session: Session = Session()): self.name: str = name self.description: str = description self.dimensions: List[Dimension] = dimensions self.session: Session = session def _post_dump(self, data: dict) -> dict: data['display_name'] = data['config']['name'] return data def __str__(self): return '<ProductDesignSpace {!r}>'.format(self.name)
class ModelSummary(Serializable['ModelSummary']): """[ALPHA] Summary of information about a single model in a predictor. ModelSummary objects are constructed from saved models and should not be user-instantiated. """ name = properties.String('name') """:str: the name of the model""" type_ = properties.String('type') """:str: the type of the model (e.g., "ML Model", "Featurizer", etc.)""" inputs = properties.List( properties.Union([properties.Object(Descriptor), properties.String()]), 'inputs') """:List[Descriptor]: list of input descriptors""" outputs = properties.List( properties.Union([properties.Object(Descriptor), properties.String()]), 'outputs') """:List[Descriptor]: list of output descriptors""" model_settings = properties.Raw('model_settings') """:dict: model settings, as a dictionary (keys depend on the model type)""" feature_importances = properties.List( properties.Object(FeatureImportanceReport), 'feature_importances') """:List[FeatureImportanceReport]: feature importance reports for each output""" predictor_name = properties.String('predictor_configuration_name', default='') """:str: the name of the predictor that created this model""" predictor_uid = properties.Optional(properties.UUID(), 'predictor_configuration_uid') """:Optional[UUID]: the unique Citrine id of the predictor that created this model""" training_data_count = properties.Optional(properties.Integer, "training_data_count") """:int: Number of rows in the training data for the model, if applicable.""" def __init__(self, name: str, type_: str, inputs: List[Descriptor], outputs: List[Descriptor], model_settings: Dict[str, Any], feature_importances: List[FeatureImportanceReport], predictor_name: str, predictor_uid: Optional[UUID] = None): self.name = name self.type_ = type_ self.inputs = inputs self.outputs = outputs self.model_settings = model_settings self.feature_importances = feature_importances self.predictor_name = predictor_name self.predictor_uid = predictor_uid def __str__(self): return '<ModelSummary {!r}>'.format(self.name)
class WorkflowExecution(Resource['WorkflowExecution']): """A Citrine Workflow Execution.""" _response_key = 'WorkflowExecutions' uid = properties.UUID('id') project_id = properties.UUID('project_id', deserializable=False) workflow_id = properties.UUID('workflow_id', deserializable=False) def __init__(self, uid: Optional[str] = None, project_id: Optional[str] = None, workflow_id: Optional[str] = None, session: Optional[Session] = None): self.uid: str = uid self.project_id: str = project_id self.workflow_id: str = workflow_id self.session: Session = session def __str__(self): return '<WorkflowExecution {!r}>'.format(str(self.uid)) def _path(self): return '/projects/{project_id}/workflows/{workflow_id}/executions/{execution_id}'.format( **{ "project_id": self.project_id, "workflow_id": self.workflow_id, "execution_id": self.uid }) def status(self): """Get the current status of this execution.""" response = self.session.get_resource(self._path() + "/status") return WorkflowExecutionStatus.build(response) def results(self): """Get the results of this execution.""" return self.session.get_resource(self._path() + "/results")
class GridProcessor(Serializable['GridProcessor'], Processor): """Generates a finite set of materials from the domain defined by the design space, then scans over the set of materials. To create a finite set of materials from continuous dimensions, a uniform grid is created between the bounds of the descriptor. The number of points is specified by `grid_sizes`. Parameters ---------- name: str name of the processor description: str description of the processor grid_sizes: dict[str, int] the number of points to select along each dimension of the grid, by dimension name """ uid = properties.Optional(properties.UUID, 'id', serializable=False) name = properties.String('config.name') description = properties.Optional(properties.String(), 'config.description') typ = properties.String('config.type', default='Grid', deserializable=False) grid_sizes = properties.Mapping(properties.String, properties.Integer, 'config.grid_dimensions') status = properties.String('status', serializable=False) status_info = properties.Optional(properties.List(properties.String()), 'status_info', serializable=False) # NOTE: These could go here or in _post_dump - it's unclear which is better right now module_type = properties.String('module_type', default='PROCESSOR') schema_id = properties.UUID( 'schema_id', default=UUID('272791a5-5468-4344-ac9f-2811d9266a4d')) def __init__(self, name: str, description: str, grid_sizes: Mapping[str, int], session: Optional[Session] = None): self.name: str = name self.description: str = description self.grid_sizes: Mapping[str, int] = grid_sizes self.session: Optional[Session] = session def _post_dump(self, data: dict) -> dict: data['display_name'] = data['config']['name'] return data def __str__(self): return '<GridProcessor {!r}>'.format(self.name)
class EnumeratedProcessor(Serializable['EnumeratedProcessor'], Processor): """Process a design space by enumerating up to `max_size` materials from the domain and processing each independently. Parameters ---------- name: str name of the processor description: str description of the processor max_size: int maximum number of samples that can be enumerated over """ uid = properties.Optional(properties.UUID, 'id', serializable=False) name = properties.String('config.name') description = properties.Optional(properties.String(), 'config.description') max_size = properties.Integer('config.max_size') typ = properties.String('config.type', default='Enumerated', deserializable=False) status = properties.String('status', serializable=False) status_info = properties.Optional(properties.List(properties.String()), 'status_info', serializable=False) # NOTE: These could go here or in _post_dump - it's unclear which is better right now module_type = properties.String('module_type', default='PROCESSOR') schema_id = properties.UUID( 'schema_id', default=UUID('307b88a2-fd50-4d27-ae91-b8d6282f68f7')) def __init__(self, name: str, description: str, max_size: Optional[int] = None, session: Optional[Session] = None): self.name: str = name self.description: str = description self.max_size: int = max_size or 2**31 - 1 # = 2147483647 (max 32-bit integer) self.session: Optional[Session] = session def _post_dump(self, data: dict) -> dict: data['display_name'] = data['config']['name'] return data def __str__(self): return '<EnumeratedProcessor {!r}>'.format(self.name)
class PredictedVsActualRealPoint(Serializable["PredictedVsActualRealPoint"]): """Predicted vs. actual data for a single real-valued data point.""" uuid = properties.UUID("uuid") """:UUID: Unique Citrine id given to the candidate""" identifiers = properties.Set(properties.String, "identifiers") """:Set[str]: Set of globally unique identifiers given to the candidate""" trial = properties.Integer("trial") """:int: 1-based index of the trial this candidate belonged to""" fold = properties.Integer("fold") """:int: 1-based index of the fold this candidate belonged to""" predicted = properties.Object(RealMetricValue, "predicted") """:RealMetricValue: Predicted value""" actual = properties.Object(RealMetricValue, "actual") """:RealMetricValue: Actual value""" def __init__(self): pass # pragma: no cover
class GemTable(Resource['Table']): """A 2-dimensional projection of data. GEM Tables are the basic unit used to flatten and manipulate data objects. While data objects can represent complex materials data, the format is NOT conducive to analysis and machine learning. GEM Tables, however, can be used to 'flatten' data objects into useful projections. Attributes ---------- uid: UUID Unique uuid4 identifier of this GEM Table. version: str Version number of the GEM Table download_url: int Url pointing to the location of the GEM Table's contents. This is an expiring download link and is not unique. """ _response_key = 'table' uid = properties.Optional(properties.UUID(), 'id') version = properties.Optional(properties.Integer, 'version') download_url = properties.Optional(properties.String, 'signed_download_url') def __init__(self): self.uid = None self.version = None self.download_url = None def __str__(self): return '<GEM Table {!r}, version {}>'.format(self.uid, self.version) def resource_type(self) -> str: """Get the access control resource type of this resource.""" return 'TABLE' @deprecation.deprecated(deprecated_in="0.16.0", details="Use TableCollection.read() instead") def read(self, local_path): """[DEPRECATED] Use TableCollection.read() instead.""" # noqa: D402 data_location = self.download_url data_location = rewrite_s3_links_locally(data_location) response = requests.get(data_location) write_file_locally(response.content, local_path)
class DesignCandidate(Serializable["DesignCandidate"]): """A Citrine Predictor Evaluation Result. This class represents the candidate computed by a design execution. """ material_id = properties.UUID('material_id') """:UUID: unique Citrine id of the material""" identifiers = properties.List(properties.String(), 'identifiers') """:List[str]: globally unique identifiers assigned to the material""" primary_score = properties.Float('primary_score') """:float: numerical score describing how well the candidate satisfies the objectives and constraints (higher is better)""" material = properties.Object(DesignMaterial, 'material') """:DesignMaterial: the material returned by the design workflow""" def __init__(self): pass # pragma: no cover
class Table(Resource['Table']): """A 2-dimensional projection of data. Tables are the basic unit used to flatten and manipulate data objects. While data objects can represent complex materials data, the format is NOT conducive to analysis and machine learning. Tables, however, can be used to 'flatten' data objects into useful projections. Attributes ---------- uid: UUID Unique uuid4 identifier of this project. version: str Version number of the Table download_url: int Url pointing to the location of the Table's contents """ _response_key = 'table' uid = properties.Optional(properties.UUID(), 'id') version = properties.Optional(properties.Integer, 'version') download_url = properties.Optional(properties.String, 'signed_download_url') def __init__(self): self.uid = None self.version = None self.download_url = None def __str__(self): # TODO: Change this to name once that's added to the table model return '<Table {!r}>'.format(self.uid) def read(self, local_path): """Read the Table file from S3.""" data_location = self.download_url data_location = rewrite_s3_links_locally(data_location) response = requests.get(data_location) write_file_locally(response.content, local_path)
class PredictedVsActualCategoricalPoint( Serializable["PredictedVsActualCategoricalPoint"]): """Predicted vs. actual data for a single categorical data point.""" uuid = properties.UUID("uuid") """:UUID: Unique Citrine id given to the candidate""" identifiers = properties.Set(properties.String, "identifiers") """:Set[str]: Set of globally unique identifiers given to the candidate""" trial = properties.Integer("trial") """:int: 1-based index of the trial this candidate belonged to""" fold = properties.Integer("fold") """:int: 1-based index of the fold this candidate belonged to""" predicted = properties.Mapping(properties.String, properties.Float, "predicted") """:Dict[str, float]: Predicted class probabilities defined as a map from each class name to its relative frequency""" actual = properties.Mapping(properties.String, properties.Float, "actual") """:Dict[str, float]: Actual class probabilities defined as a map from each class name to its relative frequency""" def __init__(self): pass # pragma: no cover
class GemTable(Resource['Table']): """A 2-dimensional projection of data. GEM Tables are the basic unit used to flatten and manipulate data objects. While data objects can represent complex materials data, the format is NOT conducive to analysis and machine learning. GEM Tables, however, can be used to 'flatten' data objects into useful projections. """ _response_key = 'table' _resource_type = ResourceTypeEnum.TABLE uid = properties.Optional(properties.UUID(), 'id') """:Optional[UUID]: unique Citrine id of this GEM Table""" version = properties.Optional(properties.Integer, 'version') """:Optional[int]: Version number of the GEM Table. The first table built from a given config is version 1.""" download_url = properties.Optional(properties.String, 'signed_download_url') """:Optional[str]: Url pointing to the location of the GEM Table's contents. This is an expiring download link and is not unique.""" def __init__(self): self.uid = None self.version = None self.download_url = None def __str__(self): return '<GEM Table {!r}, version {}>'.format(self.uid, self.version) @deprecation.deprecated(deprecated_in="0.16.0", details="Use TableCollection.read() instead") def read(self, local_path): """[DEPRECATED] Use TableCollection.read() instead.""" # noqa: D402 data_location = self.download_url data_location = rewrite_s3_links_locally(data_location) response = requests.get(data_location) write_file_locally(response.content, local_path)
class GemTableDataSource(Serializable['GemTableDataSource'], DataSource): """A data source based on a GEM Table hosted on the data platform. Parameters ---------- table_id: UUID Unique identifier for the GEM Table table_version: Union[str,int] Version number for the GEM Table. The first GEM table built from a configuration has version = 1. Strings are cast to ints. formulation_descriptor: Optional[FormulationDescriptor] Optional descriptor used to store formulations emitted by the data source. If the data source emits a formulation but this argument is not provided, then a default formulation descriptor will be generated. The formulations descriptor, and other descriptors, can be retrieved using :func:`~citrine.resources.descriptors.DescriptorMethods.descriptors_from_data_source`. """ typ = properties.String('type', default='hosted_table_data_source', deserializable=False) table_id = properties.UUID("table_id") table_version = properties.Integer("table_version") formulation_descriptor = properties.Optional( properties.Object(FormulationDescriptor), "formulation_descriptor" ) def _attrs(self) -> List[str]: return ["table_id", "table_version", "typ"] def __init__(self, table_id: UUID, table_version: Union[int, str], formulation_descriptor: Optional[FormulationDescriptor] = None): self.table_id: UUID = table_id self.table_version: Union[int, str] = table_version self.formulation_descriptor: Optional[FormulationDescriptor] = formulation_descriptor
class Dataset(Resource['Dataset']): """ A collection of data objects. Datasets are the basic unit of access control. A user with read access to a dataset can view every object in that dataset. A user with write access to a dataset can create, update, and delete objects in the dataset. Parameters ---------- name: str Name of the dataset. Can be used for searching. summary: str A summary of this dataset. description: str Long-form description of the dataset. unique_name: Optional[str] An optional, globally unique name that can be used to retrieve the dataset. Attributes ---------- uid: UUID Unique uuid4 identifier of this dataset. deleted: bool Flag indicating whether or not this dataset has been deleted. created_by: UUID ID of the user who created the dataset. updated_by: UUID ID of the user who last updated the dataset. deleted_by: UUID ID of the user who deleted the dataset, if it is deleted. create_time: int Time the dataset was created, in seconds since epoch. update_time: int Time the dataset was most recently updated, in seconds since epoch. delete_time: int Time the dataset was deleted, in seconds since epoch, if it is deleted. public: bool Flag indicating whether the dataset is publicly readable. """ _response_key = 'dataset' _resource_type = ResourceTypeEnum.DATASET uid = properties.Optional(properties.UUID(), 'id') name = properties.String('name') unique_name = properties.Optional(properties.String(), 'unique_name') summary = properties.String('summary') description = properties.String('description') deleted = properties.Optional(properties.Boolean(), 'deleted') created_by = properties.Optional(properties.UUID(), 'created_by') updated_by = properties.Optional(properties.UUID(), 'updated_by') deleted_by = properties.Optional(properties.UUID(), 'deleted_by') create_time = properties.Optional(properties.Datetime(), 'create_time') update_time = properties.Optional(properties.Datetime(), 'update_time') delete_time = properties.Optional(properties.Datetime(), 'delete_time') public = properties.Optional(properties.Boolean(), 'public') def __init__(self, name: str, summary: str, description: str, unique_name: Optional[str] = None): self.name: str = name self.summary: str = summary self.description: str = description self.unique_name = unique_name # The attributes below should not be set by the user. Instead they will be updated as the # dataset interacts with the backend data service self.uid = None self.deleted = None self.created_by = None self.updated_by = None self.deleted_by = None self.create_time = None self.update_time = None self.delete_time = None self.public = None def __str__(self): return '<Dataset {!r}>'.format(self.name) @property def property_templates(self) -> PropertyTemplateCollection: """Return a resource representing all property templates in this dataset.""" return PropertyTemplateCollection(self.project_id, self.uid, self.session) @property def condition_templates(self) -> ConditionTemplateCollection: """Return a resource representing all condition templates in this dataset.""" return ConditionTemplateCollection(self.project_id, self.uid, self.session) @property def parameter_templates(self) -> ParameterTemplateCollection: """Return a resource representing all parameter templates in this dataset.""" return ParameterTemplateCollection(self.project_id, self.uid, self.session) @property def material_templates(self) -> MaterialTemplateCollection: """Return a resource representing all material templates in this dataset.""" return MaterialTemplateCollection(self.project_id, self.uid, self.session) @property def measurement_templates(self) -> MeasurementTemplateCollection: """Return a resource representing all measurement templates in this dataset.""" return MeasurementTemplateCollection(self.project_id, self.uid, self.session) @property def process_templates(self) -> ProcessTemplateCollection: """Return a resource representing all process templates in this dataset.""" return ProcessTemplateCollection(self.project_id, self.uid, self.session) @property def process_runs(self) -> ProcessRunCollection: """Return a resource representing all process runs in this dataset.""" return ProcessRunCollection(self.project_id, self.uid, self.session) @property def measurement_runs(self) -> MeasurementRunCollection: """Return a resource representing all measurement runs in this dataset.""" return MeasurementRunCollection(self.project_id, self.uid, self.session) @property def material_runs(self) -> MaterialRunCollection: """Return a resource representing all material runs in this dataset.""" return MaterialRunCollection(self.project_id, self.uid, self.session) @property def ingredient_runs(self) -> IngredientRunCollection: """Return a resource representing all ingredient runs in this dataset.""" return IngredientRunCollection(self.project_id, self.uid, self.session) @property def process_specs(self) -> ProcessSpecCollection: """Return a resource representing all process specs in this dataset.""" return ProcessSpecCollection(self.project_id, self.uid, self.session) @property def measurement_specs(self) -> MeasurementSpecCollection: """Return a resource representing all measurement specs in this dataset.""" return MeasurementSpecCollection(self.project_id, self.uid, self.session) @property def material_specs(self) -> MaterialSpecCollection: """Return a resource representing all material specs in this dataset.""" return MaterialSpecCollection(self.project_id, self.uid, self.session) @property def ingredient_specs(self) -> IngredientSpecCollection: """Return a resource representing all ingredient specs in this dataset.""" return IngredientSpecCollection(self.project_id, self.uid, self.session) @property def files(self) -> FileCollection: """Return a resource representing all files in the dataset.""" return FileCollection(self.project_id, self.uid, self.session) def _collection_for(self, data_concepts_resource): if isinstance(data_concepts_resource, MeasurementTemplate): return self.measurement_templates if isinstance(data_concepts_resource, MeasurementSpec): return self.measurement_specs if isinstance(data_concepts_resource, MeasurementRun): return self.measurement_runs if isinstance(data_concepts_resource, MaterialTemplate): return self.material_templates if isinstance(data_concepts_resource, MaterialSpec): return self.material_specs if isinstance(data_concepts_resource, MaterialRun): return self.material_runs if isinstance(data_concepts_resource, ProcessTemplate): return self.process_templates if isinstance(data_concepts_resource, ProcessSpec): return self.process_specs if isinstance(data_concepts_resource, ProcessRun): return self.process_runs if isinstance(data_concepts_resource, IngredientSpec): return self.ingredient_specs if isinstance(data_concepts_resource, IngredientRun): return self.ingredient_runs if isinstance(data_concepts_resource, PropertyTemplate): return self.property_templates if isinstance(data_concepts_resource, ParameterTemplate): return self.parameter_templates if isinstance(data_concepts_resource, ConditionTemplate): return self.condition_templates def register(self, data_concepts_resource: ResourceType, dry_run=False) -> ResourceType: """Register a data concepts resource to the appropriate collection.""" return self._collection_for(data_concepts_resource)\ .register(data_concepts_resource, dry_run=dry_run) def register_all(self, data_concepts_resources: List[ResourceType], dry_run=False) -> List[ResourceType]: """ Register multiple data concepts resources to each of their appropriate collections. Does so in an order that is guaranteed to store all linked items before the item that references them. The uids of the input data concepts resources are updated with their on-platform uids. This supports storing an object that has a reference to an object that doesn't have a uid. Parameters ---------- data_concepts_resources: List[ResourceType] The resources to register. Can be different types. dry_run: bool Whether to actually register the item or run a dry run of the register operation. Dry run is intended to be used for validation. Default: false Returns ------- List[ResourceType] The registered versions """ resources = list() by_type = defaultdict(list) for obj in data_concepts_resources: by_type[obj.typ].append(obj) typ_groups = sorted(list(by_type.values()), key=lambda x: writable_sort_order(x[0])) batch_size = 50 for typ_group in typ_groups: num_batches = len(typ_group) // batch_size for batch_num in range(num_batches + 1): batch = typ_group[batch_num * batch_size:(batch_num + 1) * batch_size] if batch: # final batch is empty when batch_size divides len(typ_group) registered = self._collection_for(batch[0])\ .register_all(batch, dry_run=dry_run) for prewrite, postwrite in zip(batch, registered): if isinstance(postwrite, BaseEntity): prewrite.uids = postwrite.uids resources.extend(registered) return resources def update(self, model: ResourceType) -> ResourceType: """Update a data concepts resource using the appropriate collection.""" return self._collection_for(model).update(model) def delete(self, data_concepts_resource: ResourceType, dry_run=False) -> ResourceType: """Delete a data concepts resource to the appropriate collection.""" uid = next(iter(data_concepts_resource.uids.items()), None) if uid is None: raise ValueError( "Only objects that contain identifiers can be deleted.") return self._collection_for(data_concepts_resource) \ .delete(uid[1], scope=uid[0], dry_run=dry_run) def delete_contents(self, *, timeout: float = 2 * 60, polling_delay: float = 1.0): """ Delete all the GEMD objects from within a single Dataset. Parameters ---------- timeout: float Amount of time to wait on the job (in seconds) before giving up. Note that this number has no effect on the underlying job itself, which can also time out server-side. polling_delay: float How long to delay between each polling retry attempt. Returns ------- List[Tuple[LinkByUID, ApiError]] A list of (LinkByUID, api_error) for each failure to delete an object. Note that this method doesn't raise an exception if an object fails to be deleted. """ path = 'projects/{project_id}/datasets/{dataset_uid}/contents'.format( dataset_uid=self.uid, project_id=self.project_id) response = self.session.delete_resource(path) job_id = response["job_id"] return _poll_for_async_batch_delete_result(self.project_id, self.session, job_id, timeout, polling_delay) def gemd_batch_delete( self, id_list: List[Union[LinkByUID, UUID, str, BaseEntity]], *, timeout: float = 2 * 60, polling_delay: float = 1.0) -> List[Tuple[LinkByUID, ApiError]]: """ Remove a set of GEMD objects. You may provide GEMD objects that reference each other, and the objects will be removed in the appropriate order. A failure will be returned if the object cannot be deleted due to an external reference. All data objects must be associated with this dataset resource. You must also have write access on this dataset. If you wish to delete more than 50 objects, queuing of deletes requires that the types of objects be known, and thus you _must_ provide ids in the form of BaseEntities. Also note that Attribute Templates cannot be deleted at present. Parameters ---------- id_list: List[Union[LinkByUID, UUID, str, BaseEntity]] A list of the IDs of data objects to be removed. They can be passed as a LinkByUID tuple, a UUID, a string, or the object itself. A UUID or string is assumed to be a Citrine ID, whereas a LinkByUID or BaseEntity can also be used to provide an external ID. Returns ------- List[Tuple[LinkByUID, ApiError]] A list of (LinkByUID, api_error) for each failure to delete an object. Note that this method doesn't raise an exception if an object fails to be deleted. """ return _async_gemd_batch_delete(id_list, self.project_id, self.session, self.uid, timeout=timeout, polling_delay=polling_delay)
class Project(Resource['Project']): """ A Citrine Project. A project is a collection of datasets, some of which belong directly to the project and some of which have been shared with the project. Parameters ---------- name: str Name of the project. description: str Long-form description of the project. session: Session, optional The Citrine session used to connect to the database. Attributes ---------- uid: UUID Unique uuid4 identifier of this project. status: str Status of the project. created_at: int Time the project was created, in seconds since epoch. """ _response_key = 'project' name = properties.String('name') description = properties.Optional(properties.String(), 'description') uid = properties.Optional(properties.UUID(), 'id') status = properties.Optional(properties.String(), 'status') created_at = properties.Optional(properties.Datetime(), 'created_at') def __init__(self, name: str, description: Optional[str] = None, session: Optional[Session] = Session()): self.name: str = name self.description: Optional[str] = description self.session: Session = session def __str__(self): return '<Project {!r}>'.format(self.name) def _path(self): return '/projects/{project_id}'.format(**{"project_id": self.uid}) @property def design_spaces(self) -> DesignSpaceCollection: """Return a resource representing all visible design spaces.""" return DesignSpaceCollection(self.uid, self.session) @property def processors(self) -> ProcessorCollection: """Return a resource representing all visible processors.""" return ProcessorCollection(self.uid, self.session) @property def predictors(self) -> PredictorCollection: """Return a resource representing all visible predictors.""" return PredictorCollection(self.uid, self.session) @property def workflows(self) -> WorkflowCollection: """Return a resource representing all visible workflows.""" return WorkflowCollection(self.uid, self.session) @property def datasets(self) -> DatasetCollection: """Return a resource representing all visible datasets.""" return DatasetCollection(self.uid, self.session) @property def tables(self) -> TableCollection: """Return a resource representing all visible Tables.""" return TableCollection(self.uid, self.session) @property def property_templates(self) -> PropertyTemplateCollection: """Return a resource representing all property templates in this dataset.""" return PropertyTemplateCollection(self.uid, None, self.session) @property def condition_templates(self) -> ConditionTemplateCollection: """Return a resource representing all condition templates in this dataset.""" return ConditionTemplateCollection(self.uid, None, self.session) @property def parameter_templates(self) -> ParameterTemplateCollection: """Return a resource representing all parameter templates in this dataset.""" return ParameterTemplateCollection(self.uid, None, self.session) @property def material_templates(self) -> MaterialTemplateCollection: """Return a resource representing all material templates in this dataset.""" return MaterialTemplateCollection(self.uid, None, self.session) @property def measurement_templates(self) -> MeasurementTemplateCollection: """Return a resource representing all measurement templates in this dataset.""" return MeasurementTemplateCollection(self.uid, None, self.session) @property def process_templates(self) -> ProcessTemplateCollection: """Return a resource representing all process templates in this dataset.""" return ProcessTemplateCollection(self.uid, None, self.session) @property def process_runs(self) -> ProcessRunCollection: """Return a resource representing all process runs in this dataset.""" return ProcessRunCollection(self.uid, None, self.session) @property def measurement_runs(self) -> MeasurementRunCollection: """Return a resource representing all measurement runs in this dataset.""" return MeasurementRunCollection(self.uid, None, self.session) @property def material_runs(self) -> MaterialRunCollection: """Return a resource representing all material runs in this dataset.""" return MaterialRunCollection(self.uid, None, self.session) @property def ingredient_runs(self) -> IngredientRunCollection: """Return a resource representing all ingredient runs in this dataset.""" return IngredientRunCollection(self.uid, None, self.session) @property def process_specs(self) -> ProcessSpecCollection: """Return a resource representing all process specs in this dataset.""" return ProcessSpecCollection(self.uid, None, self.session) @property def measurement_specs(self) -> MeasurementSpecCollection: """Return a resource representing all measurement specs in this dataset.""" return MeasurementSpecCollection(self.uid, None, self.session) @property def material_specs(self) -> MaterialSpecCollection: """Return a resource representing all material specs in this dataset.""" return MaterialSpecCollection(self.uid, None, self.session) @property def ingredient_specs(self) -> IngredientSpecCollection: """Return a resource representing all ingredient specs in this dataset.""" return IngredientSpecCollection(self.uid, None, self.session) def share(self, project_id: str, resource_type: str, resource_id: str) -> Dict[str, str]: """Share a resource with another project.""" return self.session.post_resource(self._path() + "/share", { "project_id": project_id, "resource": {"type": resource_type, "id": resource_id} }) def make_public(self, resource: Resource) -> bool: """ Grant public access to a resource owned by this project. Parameters ---------- resource: Resource An instance of a resource owned by this project (e.g. a dataset). Returns ------- bool True if the action was performed successfully """ self.session.checked_post(self._path() + "/make-public", { "resource": resource.as_entity_dict() }) return True def make_private(self, resource: Resource) -> bool: """ Remove public access for a resource owned by this project. Parameters ---------- resource: Resource An instance of a resource owned by this project (e.g. a dataset). Returns ------- bool True if the action was performed successfully """ self.session.checked_post(self._path() + "/make-private", { "resource": resource.as_entity_dict() }) return True def list_members(self) -> List[ProjectMember]: """ List all of the members in the current project. Returns ------- List[ProjectMember] The members of the current project """ members = self.session.get_resource(self._path() + "/users")["users"] return [ProjectMember(user=User.build(m), project=self, role=m["role"]) for m in members] def update_user_role(self, user_uid: Union[str, UUID], role: ROLES, actions: ACTIONS = []): """ Update a User's role and action permissions in the Project Valid roles are MEMBER or LEAD. WRITE is the only action available for specification. Returns ------- bool Returns True if user role successfully updated """ self.session.checked_post(self._path() + "/users/{}".format(user_uid), {'role': role, 'actions': actions}) return True def add_user(self, user_uid: Union[str, UUID]): """ Add a User to a Project Adds User with MEMBER role to the Project. Use the update_user_rule method to change a User's role. Returns ------- bool Returns True if user successfully added """ self.session.checked_post(self._path() + "/users/{}".format(user_uid), {'role': MEMBER, 'actions': []}) return True def remove_user(self, user_uid: Union[str, UUID]) -> bool: """ Remove a User from a Project. Returns ------- bool Returns True if user successfully removed """ self.session.checked_delete( self._path() + "/users/{}".format(user_uid) ) return True
class SimpleMLPredictor(Serializable['SimplePredictor'], Predictor): """A predictor interface that builds a graphical model connecting the set of inputs through latent variables to the outputs. Supported complex inputs (such as chemical formulas) are auto-featurized and machine learning models are built for each latent variable and output. Parameters ---------- name: str name of the configuration description: str the description of the predictor inputs: list[Descriptor] Descriptors that represent inputs to relations outputs: list[Descriptor] Descriptors that represent outputs of relations latent_variables: list[Descriptor] Descriptors that are predicted from inputs and used when predicting the outputs training_data: str UUID of the table that contains the training data """ uid = properties.Optional(properties.UUID, 'id', serializable=False) name = properties.String('config.name') description = properties.Optional(properties.String(), 'config.description') inputs = properties.List(properties.Object(Descriptor), 'config.inputs') outputs = properties.List(properties.Object(Descriptor), 'config.outputs') latent_variables = properties.List(properties.Object(Descriptor), 'config.latent_variables') training_data = properties.String('config.training_data') typ = properties.String('config.type', default='Simple', deserializable=False) status = properties.String('status', serializable=False) status_info = properties.Optional(properties.List(properties.String()), 'status_info', serializable=False) # NOTE: These could go here or in _post_dump - it's unclear which is better right now module_type = properties.String('module_type', default='PREDICTOR') schema_id = properties.UUID( 'schema_id', default=UUID('08d20e5f-e329-4de0-a90a-4b5e36b91703')) def __init__(self, name: str, description: str, inputs: List[Descriptor], outputs: List[Descriptor], latent_variables: List[Descriptor], training_data: str, session: Optional[Session] = None, report: Optional[Report] = None): self.name: str = name self.description: str = description self.inputs: List[Descriptor] = inputs self.outputs: List[Descriptor] = outputs self.latent_variables: List[Descriptor] = latent_variables self.training_data: str = training_data self.session: Optional[Session] = session self.report: Optional[Report] = report def _post_dump(self, data: dict) -> dict: data['display_name'] = data['config']['name'] return data def __str__(self): return '<SimplePredictor {!r}>'.format(self.name) def post_build(self, project_id: UUID, data: dict): """Creates the predictor report object.""" self.report = ReportResource(project_id, self.session).get(data['id'])
class WorkflowExecution(Resource['WorkflowExecution'], Pageable): """[DEPRECATED] A Citrine Workflow Execution. Parameters ---------- uid: str Unique identifier of the workflow execution project_id: str Unique identifier of the project that contains the workflow execution workflow_id: str Unique identifier of the workflow that was executed version_number: int Integer identifier that increases each time the workflow is executed. The first execution has version_number = 1. """ _response_key = 'WorkflowExecutions' _paginator: Paginator = Paginator() _collection_key = 'response' uid = properties.UUID('id') project_id = properties.UUID('project_id', deserializable=False) workflow_id = properties.UUID('workflow_id', deserializable=False) version_number = properties.Integer("version_number") def __init__( self, uid: Optional[str] = None, project_id: Optional[str] = None, workflow_id: Optional[str] = None, session: Optional[Session] = None, version_number: Optional[int] = None, ): msg = "{this_class} is deprecated. Please use {dw_replacement} instead for " \ "Design Workflows and {pew_replacement} for Predictor Evaluation Workflows".format( this_class=self.__class__.__name__, dw_replacement=DesignExecution.__name__, pew_replacement=PredictorEvaluationExecution.__name__) warn(msg, category=DeprecationWarning) self.uid: str = uid self.project_id: str = project_id self.workflow_id: str = workflow_id self.session: Session = session self.version_number = version_number def __str__(self): return '<WorkflowExecution {!r}>'.format(str(self.uid)) def _path(self): return '/projects/{project_id}/workflows/{workflow_id}/executions/{execution_id}'.format( **{ "project_id": self.project_id, "workflow_id": self.workflow_id, "execution_id": self.uid }) def status(self): """Get the current status of this execution.""" response = self.session.get_resource(self._path() + "/status") return WorkflowExecutionStatus.build(response) def results(self): """Get the results of this execution.""" return self.session.get_resource(self._path() + "/results") @classmethod def _build_candidates( cls, subset_collection: Iterable[dict]) -> Iterable[DesignCandidate]: for candidate in subset_collection: yield DesignCandidate.build(candidate) def candidates( self, page: Optional[int] = None, per_page: int = 100, ) -> Iterable[DesignCandidate]: """Fetch the Design Candidates for the particular execution, paginated. Gets candidates from the new candidates API for a workflow executed by the old api. New candidates are paginated and have structured types. """ path = '/projects/{p_id}/design-workflows/{w_id}/executions/{e_id}/candidates'.format( p_id=self.project_id, w_id=self.workflow_id, e_id=self.uid) fetcher = partial(self._fetch_page, path=path) return self._paginator.paginate( page_fetcher=fetcher, collection_builder=self._build_candidates, page=page, per_page=per_page)
class Project(Resource['Project']): """ A Citrine Project. A project is a collection of datasets, some of which belong directly to the project and some of which have been shared with the project. Parameters ---------- name: str Name of the project. description: str Long-form description of the project. session: Session, optional The Citrine session used to connect to the database. Attributes ---------- uid: UUID Unique uuid4 identifier of this project. status: str Status of the project. created_at: int Time the project was created, in seconds since epoch. """ _response_key = 'project' _resource_type = ResourceTypeEnum.PROJECT name = properties.String('name') description = properties.Optional(properties.String(), 'description') uid = properties.Optional(properties.UUID(), 'id') status = properties.Optional(properties.String(), 'status') created_at = properties.Optional(properties.Datetime(), 'created_at') def __init__(self, name: str, description: Optional[str] = None, session: Optional[Session] = Session()): self.name: str = name self.description: Optional[str] = description self.session: Session = session def __str__(self): return '<Project {!r}>'.format(self.name) def _path(self): return '/projects/{project_id}'.format(**{"project_id": self.uid}) @property def modules(self) -> ModuleCollection: """Return a resource representing all visible design spaces.""" return ModuleCollection(self.uid, self.session) @property def design_spaces(self) -> DesignSpaceCollection: """Return a resource representing all visible design spaces.""" return DesignSpaceCollection(self.uid, self.session) @property def processors(self) -> ProcessorCollection: """Return a resource representing all visible processors.""" return ProcessorCollection(self.uid, self.session) @property def predictors(self) -> PredictorCollection: """Return a resource representing all visible predictors.""" return PredictorCollection(self.uid, self.session) @property def descriptors(self) -> DescriptorMethods: """Return a resource containing a set of methods returning descriptors.""" return DescriptorMethods(self.uid, self.session) @property @deprecated( deprecated_in="0.101.0", details="Use design_workflows or predictor_evaluation_workflows instead" ) def workflows(self) -> WorkflowCollection: """Return a resource representing all visible workflows.""" return WorkflowCollection(self.uid, self.session) @property def predictor_evaluation_workflows( self) -> PredictorEvaluationWorkflowCollection: """Return a collection representing all visible predictor evaluation workflows.""" return PredictorEvaluationWorkflowCollection(self.uid, self.session) @property def predictor_evaluation_executions( self) -> PredictorEvaluationExecutionCollection: """Return a collection representing all visible predictor evaluation executions.""" return PredictorEvaluationExecutionCollection(project_id=self.uid, session=self.session) @property def design_workflows(self) -> DesignWorkflowCollection: """Return a collection representing all visible design workflows.""" return DesignWorkflowCollection(self.uid, self.session) @property def datasets(self) -> DatasetCollection: """Return a resource representing all visible datasets.""" return DatasetCollection(self.uid, self.session) @property def tables(self) -> GemTableCollection: """Return a resource representing all visible Tables.""" return GemTableCollection(self.uid, self.session) @property def property_templates(self) -> PropertyTemplateCollection: """Return a resource representing all property templates in this dataset.""" return PropertyTemplateCollection(self.uid, None, self.session) @property def condition_templates(self) -> ConditionTemplateCollection: """Return a resource representing all condition templates in this dataset.""" return ConditionTemplateCollection(self.uid, None, self.session) @property def parameter_templates(self) -> ParameterTemplateCollection: """Return a resource representing all parameter templates in this dataset.""" return ParameterTemplateCollection(self.uid, None, self.session) @property def material_templates(self) -> MaterialTemplateCollection: """Return a resource representing all material templates in this dataset.""" return MaterialTemplateCollection(self.uid, None, self.session) @property def measurement_templates(self) -> MeasurementTemplateCollection: """Return a resource representing all measurement templates in this dataset.""" return MeasurementTemplateCollection(self.uid, None, self.session) @property def process_templates(self) -> ProcessTemplateCollection: """Return a resource representing all process templates in this dataset.""" return ProcessTemplateCollection(self.uid, None, self.session) @property def process_runs(self) -> ProcessRunCollection: """Return a resource representing all process runs in this dataset.""" return ProcessRunCollection(self.uid, None, self.session) @property def measurement_runs(self) -> MeasurementRunCollection: """Return a resource representing all measurement runs in this dataset.""" return MeasurementRunCollection(self.uid, None, self.session) @property def material_runs(self) -> MaterialRunCollection: """Return a resource representing all material runs in this dataset.""" return MaterialRunCollection(self.uid, None, self.session) @property def ingredient_runs(self) -> IngredientRunCollection: """Return a resource representing all ingredient runs in this dataset.""" return IngredientRunCollection(self.uid, None, self.session) @property def process_specs(self) -> ProcessSpecCollection: """Return a resource representing all process specs in this dataset.""" return ProcessSpecCollection(self.uid, None, self.session) @property def measurement_specs(self) -> MeasurementSpecCollection: """Return a resource representing all measurement specs in this dataset.""" return MeasurementSpecCollection(self.uid, None, self.session) @property def material_specs(self) -> MaterialSpecCollection: """Return a resource representing all material specs in this dataset.""" return MaterialSpecCollection(self.uid, None, self.session) @property def ingredient_specs(self) -> IngredientSpecCollection: """Return a resource representing all ingredient specs in this dataset.""" return IngredientSpecCollection(self.uid, None, self.session) @property def table_configs(self) -> TableConfigCollection: """Return a resource representing all Table Configs in the project.""" return TableConfigCollection(self.uid, self.session) @property @deprecated(deprecated_in="0.52.2", details="Use table_configs instead") def ara_definitions(self) -> TableConfigCollection: # pragma: no cover """[DEPRECATED] Use table_configs instead.""" from warnings import warn warn( "ara_definitions is deprecated and will soon be removed. " "Please call table_configs instead.", DeprecationWarning) return self.table_configs def share(self, project_id: str, resource_type: str, resource_id: str) -> Dict[str, str]: """Share a resource with another project.""" return self.session.post_resource( self._path() + "/share", { "project_id": project_id, "resource": { "type": resource_type, "id": resource_id } }) def transfer_resource(self, resource: Resource, receiving_project_uid: Union[str, UUID]) -> bool: """ Transfer ownership of a resource. The new owner of the the supplied resource becomes the project with ``uid == receiving_project_uid``. Parameters ---------- resource: Resource The resource owned by this project, which will get transferred to the project with ``uid == receiving_project_uid``. receiving_project_uid: Union[string, UUID] The uid of the project to which the resource will be transferred. Returns ------- bool Returns ``True`` upon successful resource transfer. """ try: self.session.checked_post( self._path() + "/transfer-resource", { "to_project_id": str(receiving_project_uid), "resource": resource.as_entity_dict() }) except AttributeError: # If _resource_type is not implemented raise RuntimeError( f"Resource of type {resource.__class__.__name__} " f"cannot be made transferred") return True def make_public(self, resource: Resource) -> bool: """ Grant public access to a resource owned by this project. Parameters ---------- resource: Resource An instance of a resource owned by this project (e.g., a dataset). Returns ------- bool ``True`` if the action was performed successfully """ try: self.session.checked_post(self._path() + "/make-public", {"resource": resource.as_entity_dict()}) except AttributeError: # If _resource_type is not implemented raise RuntimeError( f"Resource of type {resource.__class__.__name__} " f"cannot be made public") return True def make_private(self, resource: Resource) -> bool: """ Remove public access for a resource owned by this project. Parameters ---------- resource: Resource An instance of a resource owned by this project (e.g., a dataset). Returns ------- bool ``True`` if the action was performed successfully """ try: self.session.checked_post(self._path() + "/make-private", {"resource": resource.as_entity_dict()}) except AttributeError: # If _resource_type is not implemented raise RuntimeError( f"Resource of type {resource.__class__.__name__} " f"cannot be made private") return True def creator(self) -> str: """ Return the creator of this project. Returns ------- str The email of the creator of this resource. """ email = self.session.get_resource(self._path() + "/creator")["email"] return email def owned_dataset_ids(self) -> List[str]: """ List all the ids of the datasets owned by the current project. Returns ------- List[str] The ids of the modules owned by current project """ dataset_ids = self.session.get_resource(self._path() + "/dataset_ids")["dataset_ids"] return dataset_ids def owned_table_ids(self) -> List[str]: """ List all the ids of the tables owned by the current project. Returns ------- List[str] The ids of the tables owned by current project """ table_ids = self.session.get_resource(self._path() + "/table_ids")["table_ids"] return table_ids def owned_table_config_ids(self) -> List[str]: """ List all the ids of the table configs owned by the current project. Returns ------- List[str] The ids of the table configs owned by current project """ result = self.session.get_resource(self._path() + "/table_definition_ids") return result["table_definition_ids"] def list_members(self) -> List[ProjectMember]: """ List all of the members in the current project. Returns ------- List[ProjectMember] The members of the current project """ members = self.session.get_resource(self._path() + "/users")["users"] return [ ProjectMember(user=User.build(m), project=self, role=m["role"]) for m in members ] def update_user_role(self, user_uid: Union[str, UUID], role: ROLES, actions: ACTIONS = []): """ Update a User's role and action permissions in the Project. Valid roles are ``MEMBER`` or ``LEAD``. ``WRITE`` is the only action available for specification. Returns ------- bool Returns ``True`` if user role successfully updated """ self.session.checked_post(self._path() + "/users/{}".format(user_uid), { 'role': role, 'actions': actions }) return True def add_user(self, user_uid: Union[str, UUID]): """ Add a User to a Project. Adds User with ``MEMBER`` role to the Project. Use the ``update_user_rule`` method to change a User's role. Returns ------- bool Returns ``True`` if user successfully added """ self.session.checked_post(self._path() + "/users/{}".format(user_uid), { 'role': MEMBER, 'actions': [] }) return True def remove_user(self, user_uid: Union[str, UUID]) -> bool: """ Remove a User from a Project. Returns ------- bool Returns ``True`` if user successfully removed """ self.session.checked_delete(self._path() + "/users/{}".format(user_uid)) return True def gemd_batch_delete( self, id_list: List[Union[LinkByUID, UUID, str, BaseEntity]], *, timeout: float = 2 * 60, polling_delay: float = 1.0) -> List[Tuple[LinkByUID, ApiError]]: """ Remove a set of GEMD objects. You may provide GEMD objects that reference each other, and the objects will be removed in the appropriate order. A failure will be returned if the object cannot be deleted due to an external reference. You must have Write access on the associated datasets for each object. Parameters ---------- id_list: List[Union[LinkByUID, UUID, str, BaseEntity]] A list of the IDs of data objects to be removed. They can be passed as a LinkByUID tuple, a UUID, a string, or the object itself. A UUID or string is assumed to be a Citrine ID, whereas a LinkByUID or BaseEntity can also be used to provide an external ID. Returns ------- List[Tuple[LinkByUID, ApiError]] A list of (LinkByUID, api_error) for each failure to delete an object. Note that this method doesn't raise an exception if an object fails to be deleted. """ return _async_gemd_batch_delete(id_list, self.uid, self.session, None, timeout=timeout, polling_delay=polling_delay)
class DesignWorkflow(Resource['DesignWorkflow'], Workflow): """[ALPHA] Object that generates scored materials that may approach higher values of the score. Parameters ---------- name: str the name of the workflow design_space_id: UUID the UUID corresponding to the design space to use processor_id: Optional[UUID] the UUID corresponding to the processor to use if none is provided, one matching your design space will be automatically generated predictor_id: UUID the UUID corresponding to the predictor to use project_id: UUID the UUID corresponding to the project to use """ uid = properties.Optional(properties.UUID, 'id', serializable=False) name = properties.String('display_name') status = properties.String('status', serializable=False) status_info = properties.Optional(properties.List(properties.String()), 'status_info', serializable=False) experimental = properties.Boolean("experimental", serializable=False, default=True) experimental_reasons = properties.Optional(properties.List( properties.String()), 'experimental_reasons', serializable=False) archived = properties.Boolean('archived', default=False) created_by = properties.Optional(properties.UUID, 'created_by', serializable=False) create_time = properties.Optional(properties.Datetime, 'create_time', serializable=False) design_space_id = properties.UUID('config.design_space_id') processor_id = properties.Optional(properties.UUID, 'config.processor_id') predictor_id = properties.UUID('config.predictor_id') module_type = properties.String('module_type', default='DESIGN_WORKFLOW') schema_id = properties.UUID( 'schema_id', default=UUID('8af8b007-3e81-4185-82b2-6f62f4a2e6f1')) def __init__(self, name: str, design_space_id: UUID, processor_id: Optional[UUID], predictor_id: UUID, project_id: Optional[UUID] = None, session: Session = Session()): self.name = name self.design_space_id = design_space_id self.processor_id = processor_id self.predictor_id = predictor_id self.project_id = project_id self.session = session def __str__(self): return '<DesignWorkflow {!r}>'.format(self.name) @property def executions(self) -> WorkflowExecutionCollection: """Return a resource representing all visible executions of this workflow.""" if getattr(self, 'project_id', None) is None: raise AttributeError( 'Cannot initialize execution without project reference!') return WorkflowExecutionCollection(self.project_id, self.uid, self.session)
class PerformanceWorkflow(Resource['PerformanceWorkflow'], Workflow): """[ALPHA] Object that executes performance analysis on a given module. Parameters ---------- name: str the name of the workflow analysis: CrossValidationAnalysisConfiguration the configuration object """ uid = properties.Optional(properties.UUID, 'id', serializable=False) name = properties.String('display_name') status = properties.String('status', serializable=False) status_info = properties.Optional(properties.List(properties.String()), 'status_info', serializable=False) experimental = properties.Boolean("experimental", serializable=False, default=True) experimental_reasons = properties.Optional(properties.List( properties.String()), 'experimental_reasons', serializable=False) archived = properties.Boolean('archived', default=False) created_by = properties.Optional(properties.UUID, 'created_by', serializable=False) create_time = properties.Optional(properties.Datetime, 'create_time', serializable=False) analysis = properties.Object(CrossValidationAnalysisConfiguration, 'config.analysis') module_type = properties.String('module_type', default='PERFORMANCE_WORKFLOW') schema_id = properties.UUID( 'schema_id', default=UUID('1d213f0a-d07c-4f70-a4d0-bda3aa951ee0')) typ = properties.String('config.type', default='PerformanceWorkflow', deserializable=False) def __init__(self, name: str, analysis: CrossValidationAnalysisConfiguration, project_id: Optional[UUID] = None, session: Session = Session()): self.name = name self.analysis = analysis self.project_id = project_id self.session = session def __str__(self): return '<PerformanceWorkflow {!r}>'.format(self.name) @property def executions(self) -> WorkflowExecutionCollection: """Return a resource representing all visible executions of this workflow.""" if getattr(self, 'project_id', None) is None: raise AttributeError( 'Cannot initialize execution without project reference!') return WorkflowExecutionCollection(self.project_id, self.uid, self.session)
class TableConfig(Resource["TableConfig"]): """ [ALPHA] The Table Configuration used to build GEM Tables. Parameters ---------- name: str Name of the Table Configuration description: str Description of the Table Configuration datasets: list[UUID] Datasets that are in scope for the table, as a list of dataset uuids variables: list[Variable] Variable definitions, which define data from the material histories to use in the columns rows: list[Row] List of row definitions that define the rows of the table columns: list[Column] Column definitions, which describe how the variables are shaped into the table """ # FIXME (DML): rename this (this is dependent on the server side) _response_key = "ara_definition" _resource_type = ResourceTypeEnum.TABLE_DEFINITION @staticmethod def _get_dups(lst: List) -> List: # Hmmn, this looks like a potentially costly operation?! return [x for x in lst if lst.count(x) > 1] config_uid = properties.Optional(properties.UUID(), 'definition_id') version_uid = properties.Optional(properties.UUID(), 'id') version_number = properties.Optional(properties.Integer, 'version_number') name = properties.String("name") description = properties.String("description") datasets = properties.List(properties.UUID, "datasets") variables = properties.List(properties.Object(Variable), "variables") rows = properties.List(properties.Object(Row), "rows") columns = properties.List(properties.Object(Column), "columns") # Provide some backwards compatible support for definition_uid, redirecting to config_uid @property def definition_uid(self): """[[DEPRECATED]] This is a deprecated alias to config_uid. Please use that instead.""" from warnings import warn warn( "definition_uid is deprecated and will soon be removed. " "Please use config_uid instead", DeprecationWarning) return self.config_uid @definition_uid.setter def definition_uid(self, value): # pragma: no cover """[[DEPRECATED]] This is a deprecated alias to config_uid. Please use that instead.""" from warnings import warn warn( "definition_uid is deprecated and will soon be removed. " "Please use config_uid instead", DeprecationWarning) self.config_uid = value def __init__(self, *, name: str, description: str, datasets: List[UUID], variables: List[Variable], rows: List[Row], columns: List[Column], version_uid: Optional[UUID] = None, version_number: Optional[int] = None, definition_uid: Optional[UUID] = None, config_uid: Optional[UUID] = None): self.name = name self.description = description self.datasets = datasets self.rows = rows self.variables = variables self.columns = columns self.version_uid = version_uid self.version_number = version_number if config_uid is not None: assert definition_uid is None, "Please supply config_uid " \ "instead of definition_uid, and not both" self.config_uid = config_uid else: self.config_uid = definition_uid # Note that these validations only apply at construction time. The current intended usage # is for this object to be created holistically; if changed, then these will need # to move into setters. names = [x.name for x in variables] dup_names = self._get_dups(names) if len(dup_names) > 0: raise ValueError("Multiple variables defined these names," " which much be unique: {}".format(dup_names)) headers = [x.headers for x in variables] dup_headers = self._get_dups(headers) if len(dup_headers) > 0: raise ValueError("Multiple variables defined these headers," " which much be unique: {}".format(dup_headers)) missing_variables = [ x.data_source for x in columns if x.data_source not in names ] if len(missing_variables) > 0: raise ValueError( "The data_source of the columns must match one of the variable names," " but {} were missing".format(missing_variables)) def add_columns(self, *, variable: Variable, columns: List[Column], name: Optional[str] = None, description: Optional[str] = None) -> 'TableConfig': """[ALPHA] Add a variable and one or more columns to this TableConfig (out-of-place). This method checks that the variable name is not already in use and that the columns only reference that variable. It is *not* able to check if the columns and the variable are compatible (yet, at least). Parameters ---------- variable: Variable Variable to add and use in the added columns columns: list[Column] Columns to add, which must only reference the added variable name: Optional[str] Optional renaming of the table description: Optional[str] Optional re-description of the table """ if variable.name in [x.name for x in self.variables]: raise ValueError("The variable name {} is already used".format( variable.name)) mismatched_data_source = [ x for x in columns if x.data_source != variable.name ] if len(mismatched_data_source): raise ValueError( "Column.data_source must be {} but found {}".format( variable.name, mismatched_data_source)) return TableConfig(name=name or self.name, description=description or self.description, datasets=copy(self.datasets), rows=copy(self.rows), variables=copy(self.variables) + [variable], columns=copy(self.columns) + columns, config_uid=copy(self.config_uid)) def add_all_ingredients(self, *, process_template: LinkByUID, project, quantity_dimension: IngredientQuantityDimension, scope: str = CITRINE_SCOPE, unit: Optional[str] = None): """[ALPHA] Add variables and columns for all of the possible ingredients in a process. For each allowed ingredient name in the process template there is a column for the id of the ingredient and a column for the quantity of the ingredient. If the quantities are given in absolute amounts then there is also a column for units. Parameters ------------ process_template: LinkByUID scope and id of a registered process template project: Project a project that has access to the process template quantity_dimension: IngredientQuantityDimension the dimension in which to report ingredient quantities scope: Optional[str] the scope for which to get ingredient ids (default is Citrine scope, 'id') unit: Optional[str] the units for the quantity, if selecting Absolute Quantity """ dimension_display = { IngredientQuantityDimension.ABSOLUTE: "absolute quantity", IngredientQuantityDimension.MASS: "mass fraction", IngredientQuantityDimension.VOLUME: "volume fraction", IngredientQuantityDimension.NUMBER: "number fraction" } process: ProcessTemplate = project.process_templates.get( uid=process_template.id, scope=process_template.scope) if not process.allowed_names: raise RuntimeError( "Cannot add ingredients for process template \'{}\' because it has no defined " "ingredients (allowed_names is not defined).".format( process.name)) new_variables = [] new_columns = [] for name in process.allowed_names: identifier_variable = IngredientIdentifierByProcessTemplateAndName( name='_'.join([ process.name, name, str(hash(process_template.id + name + scope)) ]), headers=[process.name, name, scope], process_template=process_template, ingredient_name=name, scope=scope) quantity_variable = IngredientQuantityByProcessAndName( name='_'.join([ process.name, name, str( hash(process_template.id + name + dimension_display[quantity_dimension])) ]), headers=[ process.name, name, dimension_display[quantity_dimension] ], process_template=process_template, ingredient_name=name, quantity_dimension=quantity_dimension, unit=unit) if identifier_variable.name not in [ var.name for var in self.variables ]: new_variables.append(identifier_variable) new_columns.append( IdentityColumn(data_source=identifier_variable.name)) new_variables.append(quantity_variable) new_columns.append(MeanColumn(data_source=quantity_variable.name)) if quantity_dimension == IngredientQuantityDimension.ABSOLUTE: new_columns.append( OriginalUnitsColumn(data_source=quantity_variable.name)) return TableConfig(name=self.name, description=self.description, datasets=copy(self.datasets), rows=copy(self.rows), variables=copy(self.variables) + new_variables, columns=copy(self.columns) + new_columns, config_uid=copy(self.config_uid))