Example #1
0
def save_data(id,
              payload,
              content_type,
              label=None,
              project_key=None,
              encoding=None):
    """
    Saves data as a DSS static insight that can be exposed on the dashboard

    :param str id: Unique identifier of the insight within the project. If an insight with the same identifier
                   already exists, it will be replaced
    :param payload: bytes-oriented data, or Base64 string
    :param content_type: the MIME type of the data in payload (example: text/html or image/png)
    :param str label: Optional display label for the insight. If None, the id will be used as label
    :param str project_key: Project key in which the insight must be saved. If None, the contextual (current)
                    project is used
    :param str encoding: If the payload was a Base64 string, this must be "base64". Else, this must be None
    """
    if project_key is None:
        project_key = default_project_key()

    backend_void_call(
        "insights/save-static-file-insight", {
            "projectKey": project_key,
            "id": id,
            "payload": _get_payload(payload, encoding),
            "contentType": content_type,
            "label": label
        })
Example #2
0
    def iter_tuples(self, log_every=-1, no_header=False):
        def none_if_throws(f):
            def aux(*args, **kargs):
                try:
                    return f(*args, **kargs)
                except:
                    return None

            return aux

        if self.streamingSession['hasResults'] == False:
            for r in []:
                yield r  # barf. todo: find how to do that correctly
        else:
            queryId = self.streamingSession['queryId']
            # open the stream

            resp_stream = backend_stream_call(
                "sql-queries/stream",
                data={
                    "queryId":
                    queryId,
                    "format":
                    "tsv-excel-noheader" if no_header else "tsv-excel-header"
                },
                err_msg="Query failed")

            # prepare the casters for the columns
            casters = [
                schema_handling.CASTERS.get(col["type"], lambda s: s)
                for col in self.streamingSession['schema']
            ]
            # parse the csv stream
            count = 0
            for row_tuple in dkuio.new_utf8_csv_reader(resp_stream,
                                                       delimiter='\t',
                                                       quotechar='"',
                                                       doublequote=True):
                if count == 0:
                    # first line is the header, skip
                    count = 1
                    continue
                yield [
                    none_if_throws(caster)(val)
                    for (caster,
                         val) in base.dku_zip_longest(casters, row_tuple)
                ]
                count += 1
                if log_every > 0 and count % log_every == 0:
                    print("Query - read %i lines" % (count))

            # query seems to have run fine. 'Seems'. Verify that.
            # note to self: this call has to be made after the dataframe creation, because it is streamed and the call
            # returns before the query is actually done
            backend_void_call("sql-queries/verify",
                              data={"queryId": queryId},
                              err_msg="Query failed")
Example #3
0
 def add_report_item(self, object_ref, partition, report_item):
     """
     When used in the code of a custom step, adds a report item to the current step run
     """
     backend_void_call("scenarios/add-step-report-item",
                       data={
                           'objectRef': object_ref,
                           'partition': partition,
                           'reportItem': json.dumps(report_item)
                       })
Example #4
0
 def activate_version(self, version_id):
     """
     Activate a version in the model
     
     :param version_id: the unique identifier of the version to activate
     """
     backend_void_call("savedmodels/set-active",
                       data={
                           "projectKey": self.project_key,
                           "smId": self.get_id(),
                           "versionId": version_id
                       })
Example #5
0
def _streamed_query_to_df(connection,
                          query,
                          pre_queries,
                          post_queries,
                          find_connection_from_dataset,
                          db_type,
                          extra_conf={},
                          infer_from_schema=False,
                          parse_dates=True,
                          bool_as_str=False,
                          dtypes=None,
                          script_steps=None,
                          script_input_schema=None,
                          script_output_schema=None):
    import pandas as pd
    data = {
        "connection":
        connection,
        "query":
        query,
        "preQueries":
        json.dumps(pre_queries),
        "postQueries":
        json.dumps(post_queries),
        "findConnectionFromDataset":
        find_connection_from_dataset,
        "dbType":
        db_type,
        "extraConf":
        json.dumps(extra_conf),
        "scriptSteps":
        json.dumps(script_steps) if script_steps is not None else None,
        "scriptInputSchema":
        json.dumps(script_input_schema)
        if script_input_schema is not None else None,
        "scriptOutputSchema":
        json.dumps(script_output_schema)
        if script_output_schema is not None else None
    }

    logging.info("Starting SQL query reader")
    # initiate the streaming (blocks until the database says it's ready to return values)
    streamingSession = backend_json_call("sql-queries/start-streaming",
                                         data=data)

    logging.info("Got initial SQL query response")

    queryId = streamingSession['queryId']

    # handle the special case of 'nothing to stream'
    if streamingSession['hasResults'] == False:
        return pd.DataFrame()

    parse_date_columns = None
    if infer_from_schema and "schema" in streamingSession:
        schema_columns = streamingSession["schema"]
        (inferred_names, inferred_dtypes,
         inferred_parse_date_columns) = Dataset.get_dataframe_schema_st(
             schema_columns, parse_dates=parse_dates, bool_as_str=bool_as_str)
        dtypes = inferred_dtypes
        parse_date_columns = inferred_parse_date_columns

    # fetch the data...
    resp_stream = backend_stream_call("sql-queries/stream",
                                      data={"queryId": queryId},
                                      err_msg="Query failed")
    # ... and stuff it (streamed) in a dataframe
    results = pd.read_table(resp_stream,
                            sep='\t',
                            doublequote=True,
                            quotechar='"',
                            dtype=dtypes,
                            parse_dates=parse_date_columns)

    # query seems to have run fine. 'Seems'. Verify that.
    # note to self: this call has to be made after the dataframe creation, because it is streamed and the call
    # returns before the query is actually done
    backend_void_call("sql-queries/verify",
                      data={"queryId": queryId},
                      err_msg="Query failed")

    return results