def save_data(id, payload, content_type, label=None, project_key=None, encoding=None): """ Saves data as a DSS static insight that can be exposed on the dashboard :param str id: Unique identifier of the insight within the project. If an insight with the same identifier already exists, it will be replaced :param payload: bytes-oriented data, or Base64 string :param content_type: the MIME type of the data in payload (example: text/html or image/png) :param str label: Optional display label for the insight. If None, the id will be used as label :param str project_key: Project key in which the insight must be saved. If None, the contextual (current) project is used :param str encoding: If the payload was a Base64 string, this must be "base64". Else, this must be None """ if project_key is None: project_key = default_project_key() backend_void_call( "insights/save-static-file-insight", { "projectKey": project_key, "id": id, "payload": _get_payload(payload, encoding), "contentType": content_type, "label": label })
def iter_tuples(self, log_every=-1, no_header=False): def none_if_throws(f): def aux(*args, **kargs): try: return f(*args, **kargs) except: return None return aux if self.streamingSession['hasResults'] == False: for r in []: yield r # barf. todo: find how to do that correctly else: queryId = self.streamingSession['queryId'] # open the stream resp_stream = backend_stream_call( "sql-queries/stream", data={ "queryId": queryId, "format": "tsv-excel-noheader" if no_header else "tsv-excel-header" }, err_msg="Query failed") # prepare the casters for the columns casters = [ schema_handling.CASTERS.get(col["type"], lambda s: s) for col in self.streamingSession['schema'] ] # parse the csv stream count = 0 for row_tuple in dkuio.new_utf8_csv_reader(resp_stream, delimiter='\t', quotechar='"', doublequote=True): if count == 0: # first line is the header, skip count = 1 continue yield [ none_if_throws(caster)(val) for (caster, val) in base.dku_zip_longest(casters, row_tuple) ] count += 1 if log_every > 0 and count % log_every == 0: print("Query - read %i lines" % (count)) # query seems to have run fine. 'Seems'. Verify that. # note to self: this call has to be made after the dataframe creation, because it is streamed and the call # returns before the query is actually done backend_void_call("sql-queries/verify", data={"queryId": queryId}, err_msg="Query failed")
def add_report_item(self, object_ref, partition, report_item): """ When used in the code of a custom step, adds a report item to the current step run """ backend_void_call("scenarios/add-step-report-item", data={ 'objectRef': object_ref, 'partition': partition, 'reportItem': json.dumps(report_item) })
def activate_version(self, version_id): """ Activate a version in the model :param version_id: the unique identifier of the version to activate """ backend_void_call("savedmodels/set-active", data={ "projectKey": self.project_key, "smId": self.get_id(), "versionId": version_id })
def _streamed_query_to_df(connection, query, pre_queries, post_queries, find_connection_from_dataset, db_type, extra_conf={}, infer_from_schema=False, parse_dates=True, bool_as_str=False, dtypes=None, script_steps=None, script_input_schema=None, script_output_schema=None): import pandas as pd data = { "connection": connection, "query": query, "preQueries": json.dumps(pre_queries), "postQueries": json.dumps(post_queries), "findConnectionFromDataset": find_connection_from_dataset, "dbType": db_type, "extraConf": json.dumps(extra_conf), "scriptSteps": json.dumps(script_steps) if script_steps is not None else None, "scriptInputSchema": json.dumps(script_input_schema) if script_input_schema is not None else None, "scriptOutputSchema": json.dumps(script_output_schema) if script_output_schema is not None else None } logging.info("Starting SQL query reader") # initiate the streaming (blocks until the database says it's ready to return values) streamingSession = backend_json_call("sql-queries/start-streaming", data=data) logging.info("Got initial SQL query response") queryId = streamingSession['queryId'] # handle the special case of 'nothing to stream' if streamingSession['hasResults'] == False: return pd.DataFrame() parse_date_columns = None if infer_from_schema and "schema" in streamingSession: schema_columns = streamingSession["schema"] (inferred_names, inferred_dtypes, inferred_parse_date_columns) = Dataset.get_dataframe_schema_st( schema_columns, parse_dates=parse_dates, bool_as_str=bool_as_str) dtypes = inferred_dtypes parse_date_columns = inferred_parse_date_columns # fetch the data... resp_stream = backend_stream_call("sql-queries/stream", data={"queryId": queryId}, err_msg="Query failed") # ... and stuff it (streamed) in a dataframe results = pd.read_table(resp_stream, sep='\t', doublequote=True, quotechar='"', dtype=dtypes, parse_dates=parse_date_columns) # query seems to have run fine. 'Seems'. Verify that. # note to self: this call has to be made after the dataframe creation, because it is streamed and the call # returns before the query is actually done backend_void_call("sql-queries/verify", data={"queryId": queryId}, err_msg="Query failed") return results