def _should_retry(self, data): if data is None: self.retries = 0 return True self.retries += 1 if "error" in data: if "message" in data["error"] and "value" in data["error"][ "message"]: # SAP error causing troubles: {'error': {'code': '/IWBEP/CM_MGW_RT/004', 'message': {value': 'Metadata cache on if self.retries < self.MAX_RETRIES: logging.warning( "Remote service error : {}. Attempt {}, trying again". format(data["error"]["message"]["value"], self.retries)) sleep(2) return True else: logging.error( "Remote service error : {}. Attempt {}, stop trying.". format(data["error"]["message"]["value"], self.retries)) raise DataikuException("Remote service error : {}".format( data["error"]["message"]["value"])) else: logging.error("Remote service error") raise DataikuException("Remote service error") return False
def move_article_in_taxonomy(self, article_id, parent_article_id=None): """ An helper to update the taxonomy by moving an article with its children as a child of another article :param str article_id: the main article ID :param str parent_article_id: the new parent article ID or None for root level """ old_taxonomy = copy.deepcopy(self.settings["taxonomy"]) tax_article = self.__retrieve_article_in_taxonomy__( self.settings["taxonomy"], article_id, True) if tax_article is None: raise DataikuException("Article not found: %s" % (article_id)) if parent_article_id is None: self.settings["taxonomy"].append(tax_article) else: tax_parent_article = self.__retrieve_article_in_taxonomy__( self.settings["taxonomy"], parent_article_id, False) if tax_parent_article is None: self.settings["taxonomy"] = old_taxonomy raise DataikuException( "Parent article not found (or is one of the article descendants): %s" % (parent_article_id)) tax_parent_article["children"].append(tax_article)
def assert_response(self, response): status_code = response.status_code if status_code == 404: raise DataikuException("This entity does not exist") if status_code == 403: raise DataikuException("{}".format(response)) if status_code == 401: raise DataikuException("Forbidden access")
def partition_exists(self, partitioning, partition_id): """Return whether the partition passed as parameter exists Implementation is only required if the corresponding flag is set to True in the connector definition """ raise DataikuException("unimplemented")
def get_records_count(self, partitioning=None, partition_id=None): """ Returns the count of records for the dataset (or a partition). Implementation is only required if the corresponding flag is set to True in the connector definition """ raise DataikuException("unimplemented")
def wait(self, no_fail=False): while not self.scenario_run.run.get('result', False): self.scenario_run = self.trigger_fire.get_scenario_run() time.sleep(5) outcome = self.scenario_run.run.get('result', None).get('outcome', 'UNKNOWN') if outcome == 'SUCCESS' or no_fail: return self.scenario_run else: raise DataikuException("Scenario run returned status %s" % outcome)
def get_writer(self, dataset_schema=None, dataset_partitioning=None, partition_id=None): """ Returns a writer object to write in the dataset (or in a partition). The dataset_schema given here will match the the rows given to the writer below. Note: the writer is responsible for clearing the partition, if relevant. """ raise DataikuException("Unimplemented")
def wait(self, no_fail=False): job_state = self.job.get_status().get("baseStatus", {}).get("state", "") sleep_time = 2 while job_state not in ["DONE", "ABORTED", "FAILED"]: sleep_time = 300 if sleep_time >= 300 else sleep_time * 2 time.sleep(sleep_time) job_state = self.job.get_status().get("baseStatus", {}).get("state", "") if job_state in ["ABORTED", "FAILED"]: if no_fail: break else: raise DataikuException("Job run did not finish. Status: %s" % (job_state)) return job_state
def wait_for_scenario_run(self, no_fail=False): scenario_run = None refresh_trigger_counter = 0 while scenario_run is None: refresh_trigger_counter += 1 if refresh_trigger_counter == 10: refresh_trigger_counter = 0 if self.is_cancelled(refresh=refresh_trigger_counter == 0): if no_fail: return None else: raise DataikuException("Scenario run has been cancelled") scenario_run = self.get_scenario_run() time.sleep(5) return scenario_run
def __get_logs_path(self): # Retrieve model managed-folder path folder_found = False project = self.client.get_project(self.project_key) for folder in project.list_managed_folders(): if self.folder_name == folder['name']: folder_path = dataiku.Folder( folder['id'], project_key=self.project_key).get_path() folder_found = True break if not folder_found: raise DataikuException( "The folder '{}' (in project '{}' cannot be found".format( self.folder_name, self.project_key)) log_path = os.path.join(folder_path, constants.TENSORBOARD_LOGS) return log_path
def generate_rows(self, dataset_schema=None, dataset_partitioning=None, partition_id=None, records_limit=-1): is_records_limit = records_limit > 0 record_count = 0 while self.client.has_more_data(): json_response = self.client.paginated_api_call() if self.extraction_key is None: # Todo: check api_response key is free and add something overwise if isinstance(json_response, list): record_count += len(json_response) for row in json_response: yield {"api_response": row} else: record_count += 1 yield {"api_response": json_response} else: data = json_response.get(self.extraction_key, None) if data is None: raise DataikuException("Extraction key '{}' was not found in the incoming data".format(self.extraction_key)) record_count += len(data) for result in data: yield {"api_response": result} if self.raw_output else result if is_records_limit and record_count >= records_limit: break
def retrieve_next_page(self, is_raw_output): page_rows = [] base_row = copy.deepcopy(self.initial_parameter_columns) logger.info("retrieve_next_page: Calling next page") json_response = self.client.paginated_api_call( can_raise_exeption=False) if self.extraction_key: data_rows = json_response.get(self.extraction_key, [json_response]) if data_rows is None: raise DataikuException( "Extraction key '{}' was not found in the incoming data". format(self.extraction_key)) page_rows.extend(self.format_page_rows(data_rows, is_raw_output)) else: # Todo: check api_response key is free and add something overwise if is_raw_output: if is_error_message(json_response): base_row.update(json_response) else: base_row.update({"api_response": json_response}) else: base_row.update(json_response) page_rows.append(base_row) return page_rows
def write_row(self, row): """ Row is a tuple with N + 1 elements matching the schema passed to get_writer. The last element is a dict of columns not found in the schema """ raise DataikuException("unimplemented")
def get_partitioning(self): """ Return the partitioning schema that the connector defines. """ raise DataikuException("Unimplemented")