def post_design( self, design: dict, allow_duplicates: bool = False, ): """Sumbits a new design into DESIGN module. Args: design (dict): A dictionary with the design. This dictionary is very complex, but it can be generated \ easily with the `build_design_from_candidates` method at *utils* allow_duplicates (bool): Set to True to avoid raising errors on detection of parts duplication (default \ value is False). Returns: dict: On success, returns a dict containing the id of the new design (ex: `{'id': 5}` ) """ body = { 'designJson': design, 'allowDuplicates': allow_duplicates, } response = post(url=self.post_designs_url, headers=self.headers, json=body) return json.loads(response['content'])
def get_model_info( self, model_id: ModelID, ): """Retrieves model general information. This will return a JSON object with the metadata of a model filtered by the provided model ID. Args : model_id (ModelID): Model identifier. Returns : () : A dict containing model info. An example is shown below: ```json { "id": "0", "labId": "1", "modelType": "predictive", "name": "My First Predictive Model", "description": "This is an example model", "status": "completed-successfully", "evolveModelInfo": { "microserviceQueueId": "1", "dataSchema": [{ "id": "1", "name": "Descriptor1", "value_type": "numeric", "type": "descriptor" }, { "id": "1", "name": "Descriptor2", "value_type": "numeric", "type": "descriptor" }, { "id": "2", "name": "Target", "value_type": "numeric", "type": "target" }], "modelStats": { "MAE": 45 } } } ``` """ body = { 'id': str(model_id), } response = post(url=self.get_model_url, headers=self.headers, json=body) response['content'] = json.loads(response['content']) # Check output return self._get_data_from_content(response['content'])
def post_codon_optimization_job( self, algorithm='ALGORITHMS_NAME', parameters=None, ): parameters = {} if parameters is None else parameters body = { 'algorithm': algorithm, 'parameters': parameters, } response = post(url=self.post_codon_op, headers=self.headers, json=body) return json.loads(response['content'])
def cancel_task( self, task_id: TaskID, ) -> Any: """Cancels the submission of a task matching the specified `task_id`. Args: task_id (TaskID): The task id that wants to be canceled. Returns: (): """ response = post( url=self.cancel_task_url.format(task_id), headers=self.headers, ) return json.loads(response['content'])
def register( self, username: str, password: str, ): """Registers a new user. NB: Registering a new user might require ADMIN privileges. """ body = { 'email': username, 'firstName': 'test', 'lastName': 'user', 'password': password, 'passwordConfirm': password, } response = post(url=self.register_url, json=body) response['content'] = json.loads(response['content']) return response
def cancel_model( self, model_id: ModelID, ): """Cancels the submission of a model matching the specified `model_id`. Args: model_id (ModelID): The model id that wants to be canceled. Returns : () : """ body = { 'id': str(model_id), } response = post(url=self.cancel_model_url, headers=self.headers, json=body) response['content'] = json.loads(response['content']) return self._get_data_from_content(response['content'])
def submit_prediction_task( self, data_input: List[Dict[str, Any]], data_schema: List[Dict[str, Any]], model_id: ModelID, ) -> Dict[str, Any]: """Submits a task used to run predictions on a list of datapoints using a pre-trained Predictive Model. Args: data_input (List[Dict[str, Any]]): Datapoints in the same format described in the submit_model function. data_schema (List[Dict[str, Any]]): Data schema in the same format described in the submit_model function. model_id (ModelID): ID of the pre-trained predictive model going to be used to run predictions for the datapoints in the data_input list. Returns: - A Task object with metadata information on the submitted task including its ID for later retrieval. """ body = { 'dataInput': data_input, 'dataSchema': data_schema, 'modelType': 'predictive', 'predictiveModelId': model_id, # 'configs': {} if configs is None else configs, 'name': 'pretrained', # 'description': '' if description is None else description } response = post( url=self.submit_model_url, headers=self.headers, json=body, ) responseContent: Dict[str, Any] = json.loads( response['content']) # noqa: N806 responseContent['data'].update({'pretrainedModelId': model_id}) return responseContent
def import_aa_sequences( self, aa_sequences: Union[pd.DataFrame, List[List[str]], List[Tuple[str, str]], List[Dict[str, str]]], tags: Optional[List[Dict[str, int]]] = None, ): """This function imports one or many amino acid sequences by means of TeselaGen's DESIGN API. Args: aa_sequences(Union[pd.DataFrame, List[Dict[str,str]], List[Tuple[str, str]]): Amino acid sequences data. The data can come in three different ways: - as a pandas dataframe with 2 columns. Where the first column contains the sequence names and the second column contains the amino acid sequence string. - as a list of python dictionaries, where each dictionary is of the form `{"AA_NAME": SEQUENCE_NAME, "AA_SEQUENCE": SEQUENCE_STRING}`. - as a list of 2-element tuples, where the first element is the sequence name and the second element the sequence string. tags(Optional[List[int]]): A list of integer tag IDs with which each amino acid sequence will be tagged with. (NOTE: tags cannot be created on-the-fly through this function, it only accepts tag IDs that are already created in the DESIGN Module). Returns: A JSON object with the following two key/values: - createdAminoAcidSequences(): 'id' and 'name' of the created amino acid sequences. - existingAminoAcidSequences(): 'id' of the updated amino acid sequences. """ params = {} if aa_sequences is None: raise Exception("The 'aa_sequences' argument is mandatory.") if isinstance(aa_sequences, pd.DataFrame): params['name'] = aa_sequences.iloc[:, 0].values.tolist() params['contents'] = aa_sequences.iloc[:, 1].values.tolist() elif isinstance(aa_sequences, list): if all(isinstance(x, list) and len(x) == 2 for x in aa_sequences): params['name'] = list(map(lambda x: x[0], aa_sequences)) params['contents'] = list(map(lambda x: x[1], aa_sequences)) if all(isinstance(x, tuple) and len(x) == 2 for x in aa_sequences): params['name'] = list(map(lambda x: x[0], aa_sequences)) params['contents'] = list(map(lambda x: x[1], aa_sequences)) elif all(isinstance(x, dict) for x in aa_sequences): params['name'] = list(map(lambda x: x['AA_NAME'], aa_sequences)) params['contents'] = list( map(lambda x: x['AA_SEQUENCE'], aa_sequences)) else: raise ValueError( "All elements in list argument 'aa_sequences' must either be 2-element tuples or properly " "formatted dictionaries according to the function's Args description." ) else: raise ValueError( f"Type {type(aa_sequences)} for argument 'aa_sequences' is not supported." ) if tags is not None and isinstance(tags, list): params['tags'] = list(map(lambda x: {'id': x}, tags)) try: result = post(url=self.import_aa_url, data=json.dumps(params), headers=self.headers) except Exception as e: return e parsed_api_result = json.loads(result['content']) formatted_response = {} created_aa_seqs_key = 'createdAminoAcidSequences' updated_aa_seqs_key = 'existingAminoAcidSequences' if (created_aa_seqs_key in parsed_api_result.keys() and len(parsed_api_result[created_aa_seqs_key]) > 0): formatted_response[created_aa_seqs_key] = list( map(lambda x: { 'id': x['id'], 'name': x['name'], }, parsed_api_result[created_aa_seqs_key])) if (updated_aa_seqs_key in parsed_api_result.keys() and len(parsed_api_result[updated_aa_seqs_key]) > 0): formatted_response[updated_aa_seqs_key] = list( map(lambda x: { 'id': x['id'], }, parsed_api_result[updated_aa_seqs_key])) return formatted_response
def rbs_calculator_submit_job( self, algorithm: str, params: Dict[str, Any], ) -> dict: """Submits a job to the RBS Calculator API Version v2.1. For deeper information on the RBS Calculator tools please refer to the following documentation: - Paper: https://www.researchgate.net/publication/51155303_The_Ribosome_Binding_Site_Calculator. - Browser Application: https://salislab.net/software/ - Swagger API Documentation: https://app.swaggerhub.com/apis-docs/DeNovoDNA/JobControl/1.0.1 The TeselaGen/RBS Integration currently supports one of the three following RBS Calculator Tools: - "ReverseRBS": Calls the RBS Calculator in Reverse Engineering mode to predict the translation initiation rate of each start codon in a mRNA sequence. ([Predict Translation Rates](https://salislab.net/software/predict_rbs_calculator)) parameters: mRNA (str): Valid 'GATCU' mRNA sequence. long_UTR (boolean): Enables long UTRs. organism (str): Valid organism name. (for all available organism names, please call the 'rbs_calculator_organisms' function) - "RBSLibraryCalculator_SearchMode": Calls the RBS Library Calculator in Search mode to design a ribosome binding site library to maximally cover a selected translation rate space between a targeted minimum and maximum rate using the fewest number of RBS variants ([Optimize Expression Levels](https://salislab.net/software/design_rbs_library_calculator)). parameters: CDS (str): Valid 'GATCU' coding sequence. RBS_Constrains (str): Either an empty string or a valid degenerate nucleotide sequence ('GATCURYSWKMBDHVN'). initial_RBS_sequence (str): Either an empty string or a valid 'GATCU' RBS sequence. This is used to initialize the RBS sequence exploration algorithm. If an empty string is provided, a random RBS sequence will be used as the initializing sequence. library_size (int): Number of RBS sequences in your library. maximum_consecutive_degeneracy (int): The maximum number of consecutive degeneracy nucleotides for the RBS library designs. minimum_translation_initiation_rate (int): Lowest translation rate desired for your RBS library (proportional scale varies from 1 to 1,000,000). maximum_translation_initiation_rate (int): Highest translation rate desired for your RBS library (proportional scale varies from 1 to 1,000,000). organism (str): Valid organism name. (for all available organism names, please call the 'rbs_calculator_organisms' function). pre_sequence (str): Either an empty string or a valid 'GATCU' mRNA sequence that is required to appear upstream (5') of the RBS sequence. - "RBSLibraryCalculator_GenomeSearchMode": Calls the RBS Library Calculator in Genome Editing mode to design a genomic ribosome binding site library to maximally cover a selected translation rate space between a targeted minimum and maximum rate, while introducing the fewest number of consecutive genomic mutations. ([Optimize Expression Levels](https://salislab.net/software/design_rbs_library_calculator)). parameters: CDS (str): Valid 'GATCU' coding sequence. RBS_Constrains (str): Either an empty string or a valid degenerate nucleotide sequence ('GATCURYSWKMBDHVN'). genomic_RBS_sequence (str): Genomic RBS sequence. Must be a valid 'GATCU' sequence. initial_RBS_sequence (str): Either an empty string or a valid 'GATCU' RBS sequence. This is used to initialize the RBS sequence exploration algorithm. If an empty string is provided, a random RBS sequence will be used as the initializing sequence. library_size (int): Number of RBS sequences in your library. maximum_consecutive_degeneracy (int): The maximum number of consecutive degeneracy nucleotides for the RBS library designs. minimum_translation_initiation_rate (int): Lowest translation rate desired for your RBS library (proportional scale varies from 1 to 1,000,000). maximum_translation_initiation_rate (int): Highest translation rate desired for your RBS library (proportional scale varies from 1 to 1,000,000). organism (str): Valid organism name. (for all available organism names, please call the 'rbs_calculator_organisms' function). pre_sequence (str): Either an empty string or a valid 'GATCU' mRNA sequence that is required to appear upstream (5') of the RBS sequence. Args: algorithm (str): This should be one for the three algorithm described above currently supported by the TeselaGen/RBS Integration. params (dict): These are the parameters required by the chosen algorithms according to the RBS Calculator API Swagger specifications mentioned above. For more information on the parameters meaning refer to the https://salislab.net/software/ browser application. Examples for the tools parameter inputs are as follows: 'ReverseRBS' params: { "mRNA": "YOUR_mRNA_ SEQUENCE", "long_UTR": false, "organism": "Acetobacter pomorum" } 'RBSLibraryCalculator_SearchMode' params: { "CDS": "YOUR_CDS_SEQUENCE", "RBS_Constraints": 'TCTAGANNNNNNNNNNNNNNNNNNNNNNNNNGAATTC', "initial_RBS_sequence": "GATTGCGTGTGAGTTCTGGCACGGAGGAGCACGTA", "library_size": 16, "maximum_consecutive_degeneracy": 6, "maximum_translation_initiation_rate": 100, "minimum_translation_initiation_rate": 10, "organism": "Escherichia coli str. K-12 substr. MG1655", "pre_sequence": "" } 'RBSLibraryCalculator_GenomeSearchMode' params: { "CDS": "YOUR_CDS_SEQUENCE", "RBS_Constraints": "", "genomic_RBS_sequence": "CUCGUACGGUGCUAACGUGCUUAGU", "initial_RBS_sequence": "", "library_size": 16, "maximum_consecutive_degeneracy": 6, "maximum_translation_initiation_rate": 100, "minimum_translation_initiation_rate": 10, "organism": "Escherichia coli str. K-12 substr. MG1655", "pre_sequence": "" } Returns: JSON with RBS Calculator job response. This may depend on the chosen tool. """ _params: str = json.dumps({ **params, **{ 'algorithm': algorithm, }, }) try: result = post(url=self.rbs_calculator_submit_url, data=_params, headers=self.headers) except Exception as e: return {'error': e} result = json.loads(result['content']) return result
def design_crispr_grnas( self, sequence: str, target_indexes: Optional[Tuple[int, int]] = None, target_sequence: Optional[str] = None, pam_site: str = 'NGG', min_score: float = 40.0, max_number: Optional[int] = 50, wait_for_results: bool = True, ) -> Dict[str, Any]: """Gets CRISPR guide RNAs. Args: sequence (str): This is the genome sequence. The whole genome sequence is needed for more accurate \ on/off target score predictions. target_indexes (Optional[Tuple[int, int]], optional): Start and End position (indexed from 0) of the \ target sequence relative to the genome sequence. Defaults to None, meaning `target_sequence` \ parameter will be used instead. target_sequence (Optional[str], optional): Sequence of the target. Defaults to None, meaning \ `target_indexes` will be used. pam_site (str, optional): PAM Site of your CRISPR Enzyme (default: SpyoCas9 with PAM Site: 'NGG'). \ Supported CRISPR Enzymes: SpyoCas9 ('NGG'), SaurCas9 ('NNGRR'), AsCas12a ('TTTV'). \ Defaults to 'NGG'. min_score (float, optional): Minimum on-target score desired for the designed guide RNAs. \ Defaults to 40.0. max_number (Optional[int], optional): Maximum number of guide RNAs to expected as a response. \ Defaults to 50. wait_for_results (bool, optional): If `True`, the method waits for results to be ready from server and \ gives a complete output. If `False` just returns a submit confirmation object without waiting for \ finalization. Defaults to `True`. Returns: dict: If `wait_for_results` is `True`, the output will contain `guides`, a list with dictionaries \ containing guide info (`sequence`, `start`, `end`, `onTargetScore` and `offTargetScore`) and \ `target_indexes`, a list with the target start, end indexes within the main sequence. If \ `wait_for_results` is `False` it will just return a dict with `taskID`, the id of the submitted \ task, and a `message` string. """ body: Dict[str, Any] = { 'data': { 'sequence': sequence, }, 'options': { 'pamSite': pam_site, 'minScore': min_score, }, } if target_indexes is not None: body['data']['targetStart'] = target_indexes[0] body['data']['targetEnd'] = target_indexes[1] if target_sequence is not None: body['data']['targetSequence'] = target_sequence if max_number is not None: body['options']['maxNumber'] = max_number response = post(url=self.crispr_guide_rnas_url, headers=self.headers, json=body) result = json.loads(response['content']) if wait_for_results and 'taskId' in result: result = wait_for_status( method=self._design_crispr_grnas_get_result, validate=lambda x: x['status'] == 'completed-successfully', task_id=result['taskId'], )['data'] return result
def submit_multi_objective_optimization( self, data_input: List[Any], data_schema: List[Any], pretrainedModelIds: List[Union[int, str]] = None, # noqa: N803 configs: Optional[Any] = None, ): """Submits a multi objective optimization task. Args: data_input (List[Any]): This is required and must contain a JSON array of JSON objects with the input \ training data. These objects must be consistent with the `data_schema` property. ```json [{ "Descriptor1": "A0", "Descriptor2": "B1", "Target_1": "1", "Target_2": "-1" }, { "Descriptor1": "A0", "Descriptor2": "B2", "Target_1": "2", "Target_2": "-2" }, { "Descriptor1": "A0", "Descriptor2": "B3", "Target_1": "3", "Target_2": "-3" }] ``` data_schema (List[Any]): This is an array of the schema of the input data columns. The `name` property \ corresponds to the column's name. he `type` property determines whether the column is a "target" or \ a "descriptor" (feature). Only "target" and "descriptor" are supported. The `value_type` type \ determines the type of the column's values. Only "numeric" and "categoric" are supported. ```json [{ "name": "Descriptor1", "value_type": "categoric", "type": "descriptor" }, { "name": "Descriptor2", "value_type": "categoric", "type": "descriptor" }, { "name": "Target_1", "value_type": "numeric", "type": "target" }, { "name": "Target_2", "value_type": "numeric", "type": "target" }] ``` - `name` : corresponds to the name of the column (descriptor or target) - `type` : describes whether the field is a descriptor (feature) or a target. - `value_type` : defines the type of value of this column. Available types are "numeric" or "categoric" configs (Optional[Any]): This is an advanced property containing advanced configuration for the training \ execution. Please refer to Teselagen's Data Science Team. Returns : (dict): A dictionary containing info of the submitted job. En example is shown below: ```json { "authToken": "1d140371-a59f-4ad2-b57c-6fc8e0a20ff8", "checkInInterval": null, "controlToken": null, "id": "36", "input": { "job": "modeling-tool", "kwargs": {} }, "lastCheckIn": null, "missedCheckInCount": null, "result": null, "resultStatus": null, "service": "ds-tools", "serviceUrl": null, "startedOn": null, "status": "created", "taskId": null, "trackingId": null, "completedOn": null, "createdAt": "2020-10-29T13:18:06.167Z", "updatedAt": "2020-10-29T13:18:06.271Z", "cid": null, "__typename": "microserviceQueue" } ``` """ body = { 'dataInput': data_input, 'dataSchema': data_schema, 'predictiveModelIds': pretrainedModelIds, 'configs': {} if configs is None else configs, } response = post( url=self.submit_multi_objective_optimization_url, headers=self.headers, json=body, ) response['content'] = json.loads(response['content']) return response['content']
def submit_model( self, data_input: List[Any], data_schema: List[Any], model_type: ModelType, configs: Optional[Any] = None, name: str = '', description: Optional[str] = None, ) -> Dict[str, Any]: """Submits a model for training. Args : data_input (List[Any]): This is required and must contain a JSON array of JSON objects with the input \ training data. These objects must be consistent with the `data_schema` property. ```json [{ "Descriptor1": "A0", "Descriptor2": "B1", "Target": "1" }, { "Descriptor1": "A0", "Descriptor2": "B2", "Target": "2" }, { "Descriptor1": "A0", "Descriptor2": "B3", "Target": "3" }] ``` data_schema (List[Dict[str, Any]]): This is an array of the schema of the input data columns. The `name` \ property corresponds to the column's name. The `type` property determines whether the column is a \ "target" or a "descriptor" (feature). Only "target" and "descriptor" are supported. The `value_type` \ type determines the type of the column's values. Only "numeric" and "categoric" are supported. ```json [{ "id": "1", "name": "Descriptor1", "value_type": "categoric", "type": "descriptor" }, { "id": "2", "name": "Descriptor2", "value_type": "categoric", "type": "descriptor" }, { "id": "3", "name": "Target", "value_type": "numeric", "type": "target" }] ``` - `id` : corresponds to the id (position) of the column in the dataset. - `name` : corresponds to the name of the column (descriptor or target) - `type` : describes whether the field is a descriptor (feature) or a target. - `value_type` : defines the type of value of this column. Available types are "numeric" or "categoric". model_type (ModelType) : The type of model wanting to submit. Either "predictive", "evolutive" or "generative". NOTE: If submitting a "generative" model, there's no "descriptor" column, in fact there should only be one "target" column with the amino acid sequence. This needs to be properly set in the dataSchema field according to the documentation. configs (Optional[Any]): This is an advanced property containing advanced configuration for the training \ execution. Please refer to Teselagen's Data Science Team. name (str): This sets the Evolve Model's name. description (Optional[str]): This gives the Evolve Model's a description. Returns: (dict): A dictionary containing info of the submitted job. En example is shown below: ```json { "authToken": "1d140371-a59f-4ad2-b57c-6fc8e0a20ff8", "checkInInterval": null, "controlToken": null, "id": "36", "input": { "job": "modeling-tool", "kwargs": {} }, "lastCheckIn": null, "missedCheckInCount": null, "result": null, "resultStatus": null, "service": "ds-tools", "serviceUrl": null, "startedOn": null, "status": "created", "taskId": null, "trackingId": null, "completedOn": null, "createdAt": "2020-10-29T13:18:06.167Z", "updatedAt": "2020-10-29T13:18:06.271Z", "cid": null, "__typename": "microserviceQueue" } ``` """ body = { 'dataInput': data_input, 'dataSchema': data_schema, 'modelType': model_type, 'configs': {} if configs is None else configs, 'name': name, 'description': '' if description is None else description, } response = post(url=self.submit_model_url, headers=self.headers, json=body) response['content'] = json.loads(response['content']) return self._get_data_from_content(response['content'])
def get_model_datapoints( self, model_id: ModelID, datapoint_type: str, batch_size: int, batch_number: int, ) -> Dict[str, Any]: """Return model datapoints. This will return a JSON object with an array of datapoints filtered by the provided model ID and datapoint \ type. This array will come in the data field in the response body. Each element of the array has a datapoint \ field, this corresponds to a JSON object with the datapoint data. Args : model_id (ModelID): ID of the model datapoint_type (str) : The `datapoint_type` has two options are "input", "output". One can fetch only \ input datapoints (a.k.a training datapoints) or just fetch the output datapoint (a.k.a predicted \ datapoints not seen in the training dataset). batch_size (int): `batch_size` refers to the number of datapoints to fetch from the database table. batch_number (int): `batch_number` depends on `batch_size`, and determines the index position offset of \ length `batch_size` from where to start fetching datapoints. Returns : - An object with a 'data' key with the list of datapoints along with their predictions. ```json { "message": "Submission success.", "data": [{ ... }, { ... }, { ... }] } ``` """ body = { 'modelId': str(model_id), 'datapointType': datapoint_type, 'batchSize': batch_size, 'batchNumber': batch_number, } response = post( url=self.get_model_datapoints_url, headers=self.headers, json=body, ) responseContent: Dict[str, Any] = json.loads( response['content']) # noqa: N806 datapoints: List[Dict[str, Any]] = [] if 'data' in responseContent: datapoints = [{ key: value for key, value in element['datapoint'].items() if key != 'set_tag' and 'PCA' not in key } for element in responseContent['data']] responseContent.update({'data': datapoints}) return responseContent
def get_models_by_type( self, model_type: Optional[ModelType] = None, ): """This will return a JSON object with the metadata of multiple models, filtered by the provided `model_type`. Args : model_type (ModelType) : ``` "predictive" "evolutive" "generative" None ``` Returns : () : ```json { "message": "Submission success.", "data": [{ "id": "1", "labId": "1", "modelType": "evolutive", "name": "My First Evolutive Model", "description": "This is an example model", "status": "completed-successfully", "evolveModelInfo": { "microserviceQueueId": "1", "dataSchema": [{ "id": "1", "name": "Descriptor1", "value_type": "numeric", "type": "descriptor" }, { "id": "1", "name": "Descriptor2", "value_type": "numeric", "type": "descriptor" }, { "id": "2", "name": "Target", "value_type": "numeric", "type": "target" }], "modelStats": { "MAE": 45 } } }, { "id": "2", "labId": "1", "modelType": "evolutive", "name": "My Second Evolutive Model", "description": "This is an example model", "status": "completed-successfully", "evolveModelInfo": { "microserviceQueueId": "1", "dataSchema": [{ "id": "1", "name": "Descriptor1", "value_type": "numeric", "type": "descriptor" }, { "id": "1", "name": "Descriptor2", "value_type": "numeric", "type": "descriptor" }, { "id": "2", "name": "Target", "value_type": "numeric", "type": "target" }], "modelStats": { "MAE": 40 } } }] } ``` """ if model_type not in ALLOWED_MODEL_TYPES: raise ValueError( f'Type: {model_type} not in {ALLOWED_MODEL_TYPES}') # body = { # 'modelType': 'null' if model_type is None else model_type, # } body = { 'modelType': model_type, } response = post(url=self.get_models_by_type_url, headers=self.headers, json=body) response['content'] = json.loads(response['content']) return self._get_data_from_content(response['content'])