Beispiel #1
0
    def _misc(request, context):
        """
        Execute functions that provide common data science capabilities for Qlik.
        :param request: an iterable sequence of RowData
        :param context:
        :return: Refer to comments below as the response depends on the function called
        """
        # Get a list from the generator object so that it can be iterated over multiple times
        request_list = [request_rows for request_rows in request]
        
        # Get the function id from the header to determine the variant being called
        function = ExtensionService._get_function_id(context)
        
        # Create an instance of the CommonFunction class
        handle = CommonFunction(request_list, context)
        
        # Call the function based on the mapping in functions.json
        # The if conditions are grouped based on similar output structure
        if function == 33:    
            # Get entities from the default model
            response = handle.association_rules()
            # return six columns: 'rule', 'rule_lhs', 'rule_rhs', 'support', 'confidence', 'lift'
            dtypes = ["str", "str", "str", "num", "num", "num"]
        elif function == 43:
            # Provide predictions in a chart expression based on an existing model
            response = handle.predict(load_script=False)
            # Return predictions
            dtypes = ["str"]
        elif function == 44:
            # Provide predictions in the load script based on an existing model
            response = handle.predict(load_script=True)
            # Return the model name, keys and predictions
            dtypes = ["str", "str", "str"]
        elif function == 45:
            # Get a string that can be evaluated to get the features expression for the predict function
            response = handle.get_features_expression()
            # Return the feature expression
            dtypes = ["str"]

        # Get the response as SSE.Rows
        response_rows = utils.get_response_rows(response.values.tolist(), dtypes) 

        # Get the number of bundles in the request
        num_request_bundles = len(request_list)

        # Get the number of rows in the response
        num_rows = len(response_rows) 

        # Calculate the number of rows to send per bundle
        if num_rows >= num_request_bundles:
            rows_per_bundle = num_rows//num_request_bundles
        else:
            rows_per_bundle = num_rows

        # Stream response as BundledRows
        for i in range(0, num_rows, rows_per_bundle):
            # Yield Row data as Bundled rows
            yield SSE.BundledRows(rows=response_rows[i : i + rows_per_bundle])
Beispiel #2
0
    def _spacy(request, context):
        """
        Execute functions for the spaCy natural language processing library.
        :param request: an iterable sequence of RowData
        :param context:
        :return: Refer to comments below as the response depends on the function called
        """
        # Get a list from the generator object so that it can be iterated over multiple times
        request_list = [request_rows for request_rows in request]

        # Get the function id from the header to determine the variant being called
        function = ExtensionService._get_function_id(context)

        # Create an instance of the SpaCyForQlik class
        model = SpaCyForQlik(request_list, context)

        # Call the function based on the mapping in functions.json
        # The if conditions are grouped based on similar output structure
        if function in (30, 31):
            if function == 30:
                # Get entities from the default model
                response = model.get_entities()
            elif function == 31:
                # Get entities from a named model
                response = model.get_entities(default=False)

            # return six columns: key, entity, start, end, type, description
            dtypes = ["str", "str", "num", "num", "str", "str"]

        elif function == 32:
            # Retrain a model by supplying texts and labeled entities
            response = model.retrain()

            # return four columns: model_name, subset, metric, value
            dtypes = ["str", "str", "str", "num"]

        # Get the response as SSE.Rows
        response_rows = utils.get_response_rows(response.values.tolist(),
                                                dtypes)

        # Get the number of rows in the request
        num_request_bundles = len(request_list)

        # Get the number of rows in the response
        num_rows = len(response_rows)

        # Calculate the number of rows to send per bundle
        if num_rows >= num_request_bundles:
            rows_per_bundle = len(response_rows) // len(request_list)
        else:
            rows_per_bundle = num_rows

        # Stream response as BundledRows
        for i in range(0, len(response_rows), rows_per_bundle):
            # Yield Row data as Bundled rows
            yield SSE.BundledRows(rows=response_rows[i:i + rows_per_bundle])
Beispiel #3
0
    def _sklearn(request, context):
        """
        Execute functions for the sklearn machine learning library.
        :param request: an iterable sequence of RowData
        :param context:
        :return: Refer to comments below as the response depends on the function called
        """
        # Get a list from the generator object so that it can be iterated over multiple times
        request_list = [request_rows for request_rows in request]

        # Get the function id from the header to determine the variant being called
        function = ExtensionService._get_function_id(context)

        # Create an instance of the SKLearnForQlik class
        model = SKLearnForQlik(request_list, context)

        # Call the function based on the mapping in functions.json
        # The if conditions are grouped based on similar output structure
        if function in (9, 10, 21, 24):
            if function == 9:
                # Set up the model and save to disk
                response = model.setup()
            elif function == 21:
                # Set up a model with specific metric and dimensionality reduction arguments and save to disk
                response = model.setup(advanced=True)
            elif function == 10:
                # Set feature definitions for an existing model
                response = model.set_features()
            elif function == 24:
                # Set a parameter grid for hyperparameter optimization
                response = model.set_param_grid()

            dtypes = ["str", "str", "str"]

        elif function == 11:
            # Return the feature definitions for an existing model
            response = model.get_features()
            dtypes = ["str", "num", "str", "str", "str", "str", "str"]

        elif function == 12:
            # Train and Test an existing model, saving the sklearn pipeline for further predictions
            response = model.fit()
            dtypes = ["str", "str", "str", "str", "num"]

        elif function in (14, 16, 19, 20, 27):
            if function == 14:
                # Provide predictions in a chart expression based on an existing model
                response = model.predict(load_script=False)
            elif function == 16:
                # Provide predictions probabilities in a chart expression based on an existing model
                response = model.predict(load_script=False,
                                         variant="predict_proba")
            elif function == 19:
                # Get a list of models based on a search string
                response = model.list_models()
            elif function == 20:
                # Get a string that can be evaluated to get the features expression for the predict function
                response = model.get_features_expression()
            elif function == 27:
                # Get labels for clustering
                response = model.fit_transform(load_script=False)

            dtypes = ["str"]

        elif function in (15, 17, 28):
            if function == 15:
                # Provide predictions in the load script based on an existing model
                response = model.predict(load_script=True)
            elif function == 17:
                # Provide prediction probabilities in the load script based on an existing model
                response = model.predict(load_script=True,
                                         variant="predict_proba")
            elif function == 28:
                # Provide labels for clustering
                response = model.fit_transform(load_script=True)

            dtypes = ["str", "str", "str"]

        elif function in (18, 22):
            if function == 18:
                response = model.get_metrics()
            elif function == 22:
                response = model.calculate_metrics()

            # Check whether the metrics are for a classifier or regressor and whether they come from cross validation or hold-out testing
            if "accuracy_std" in response.columns:
                estimator_type = "classifier_cv"
            elif "accuracy" in response.columns:
                estimator_type = "classifier"
            elif "r2_score_std" in response.columns:
                estimator_type = "regressor_cv"
            elif "r2_score" in response.columns:
                estimator_type = "regressor"

            # We convert values to type SSE.Dual, and group columns into a iterable
            if estimator_type == "classifier_cv":
                dtypes = [
                    "str", "str", "num", "num", "num", "num", "num", "num",
                    "num", "num"
                ]
            elif estimator_type == "classifier":
                dtypes = ["str", "str", "num", "num", "num", "num", "num"]
            elif estimator_type == "regressor_cv":
                dtypes = [
                    "str", "num", "num", "num", "num", "num", "num", "num",
                    "num", "num", "num"
                ]
            elif estimator_type == "regressor":
                dtypes = ["str", "num", "num", "num", "num", "num"]

        elif function == 23:
            # Get the confusion matrix for the classifier
            response = model.get_confusion_matrix()
            dtypes = ["str", "str", "str", "num"]

        elif function == 25:
            # Get the best parameters based on a grid search cross validation
            response = model.get_best_params()
            dtypes = ["str", "str"]

        elif function == 26:
            # Provide results from dimensionality reduction
            response = model.fit_transform(load_script=True)
            dtypes = ["str", "str"]

            for i in range(response.shape[1] - 2):
                dtypes.append("num")

        elif function == 29:
            # Explain the feature importances for the model
            response = model.explain_importances()
            dtypes = ["str", "str", "num"]

        # Get the response as SSE.Rows
        response_rows = utils.get_response_rows(response.values.tolist(),
                                                dtypes)

        # Get the number of rows in the request
        num_request_bundles = len(request_list)

        # Get the number of rows in the response
        num_rows = len(response_rows)

        # Calculate the number of rows to send per bundle
        if num_rows >= num_request_bundles:
            rows_per_bundle = len(response_rows) // len(request_list)
        else:
            rows_per_bundle = num_rows

        # Stream response as BundledRows
        for i in range(0, len(response_rows), rows_per_bundle):
            # Yield Row data as Bundled rows
            yield SSE.BundledRows(rows=response_rows[i:i + rows_per_bundle])
Beispiel #4
0
    def _prophet(request, context):
        """
        Provide a timeseries forecast using Facebook's Prophet library. Scalar function.
        :param request: an iterable sequence of RowData
        :param context: not used for now
        :return: the forecasted value for each row
        :
        :Qlik expression example:
        :<AAI Connection Name>.Prophet(MonthStartDate, sum(Value), 'return=yhat, freq=MS, debug=true')
        :The third argument in the Qlik expression is a string of parameters. 
        :This should take the form of a comma separated string:
        :e.g 'return=yhat, freq=MS, debug=true' or 'return=yhat_upper, freq=MS'
        :
        :<AAI Connection Name>.Prophet_Holidays(ForecastDate, sum(Value), Holiday, 'return=yhat, freq=D, debug=true')
        :In the holidays variant the third argument is a field containing the holiday name or NULL for each row.
        :
        :Parameters accepted for the Prophet() function are: cap, floor, changepoint_prior_scale, interval_width, 
        :lower_window, upper_window 
        :
        :Parameters accepted for the make_future_dataframe() function are: freq
        :
        :For more information on these parameters go here: https://facebook.github.io/prophet/docs/quick_start.html
        :
        :Additional parameters used are: return, take_log, debug, load_script
        :
        :cap = 1000 : A logistic growth model can be defined using cap and floor. Values should be double or integer
        :changepoint_prior_scale = 0.05 : Decrease if the trend changes are being overfit, increase for underfit
        :interval_width = 0.08 : Set the width of the uncertainty intervals
        :lower_window = 1 : Only used with holidays. Extend the holiday by certain no. of days prior to the date.
        :upper_window = 1 : Only used with holidays. Extend the holiday by certain no. of days after the date.
        :freq = MS : The frequency of the time series. e.g. MS for Month Start. See the possible options here:
        :          : http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases
        :return = yhat : Any of the options in the forecast result. You can see these options with debug=true
        :              : yhat, yhat_upper, yhat_lower : Forecast, upper and lower limits
        :              : y_then_yhat, y_then_yhat_upper, y_then_yhat_lower : Return forecast only for forecast periods
        :              : trend, trend_upper, trend_lower : Trend component of the timeseries
        :              : seasonal, seasonal_upper, seasonal_lower: Seasonal component of the timeseries 
        :take_log = false : Apply logarithm to the values before the forecast. Default is true
        :debug = true : Print execution information to the terminal and logs in ..\logs\Prophet Log <n>.txt
        """
        
        # Get a list from the generator object so that it can be iterated over multiple times
        request_list = [request_rows for request_rows in request]
        
        # Calculate timings for the components of the forecasting
        # The results will be stored in ..\logs\Prophet Performance Log.txt
        # The request_list line above is not timed as the generator can only be iterated once
        # ProphetForQlik.timeit(request_list)
                       
        # Create an instance of the ProphetForQlik class
        # This will take the request data from Qlik and prepare it for forecasting
        predictor = ProphetForQlik(request_list, context)
        
        # Calculate the forecast and store in a Pandas series
        forecast = predictor.predict()  
        
        # Check if the response is a DataFrame. 
        # This occurs when the load_script=true argument is passed in the Qlik expression.
        response_is_df = isinstance(forecast, pd.DataFrame)   

        # Set the data types of the output
        if response_is_df:
            dtypes = []
            for dt in forecast.dtypes:
                dtypes.append('num' if is_numeric_dtype(dt) else 'str')
        else:
            dtypes = ['num']

        # Get the response as SSE.Rows
        response_rows = utils.get_response_rows(forecast.values.tolist(), dtypes) 

        # Get the number of bundles in the request
        num_request_bundles = len(request_list)

        # Get the number of rows in the response
        num_rows = len(response_rows) 

        # Calculate the number of rows to send per bundle
        if num_rows >= num_request_bundles:
            rows_per_bundle = num_rows//num_request_bundles
        else:
            rows_per_bundle = num_rows

        # Stream response as BundledRows
        for i in range(0, num_rows, rows_per_bundle):
            # Yield Row data as Bundled rows
            yield SSE.BundledRows(rows=response_rows[i : i + rows_per_bundle])  
Beispiel #5
0
    def _sklearn(request, context):
        """
        Setup the meta data for a sklearn machine learning model.
        :param request: an iterable sequence of RowData
        :param context:
        :return: Refer to comments below as the response depends on the function called
        :Qlik expression examples:
        :<AAI Connection Name>.
        """
        # Get a list from the generator object so that it can be iterated over multiple times
        request_list = [request_rows for request_rows in request]

        # Get the function id from the header to determine the variant being called
        function = ExtensionService._get_function_id(context)

        # Create an instance of the SKLearnForQlik class
        model = SKLearnForQlik(request_list, context)

        # Call the function based on the mapping in functions.json
        # The IF conditions are grouped based on similar output structure
        if function in (9, 10, 21, 24):
            if function == 9:
                # Set up the model and save to disk
                response = model.setup()
            elif function == 21:
                # Set up a model with specific metric and dimensionality reduction arguments and save to disk
                response = model.setup(advanced=True)
            elif function == 10:
                # Set feature definitions for an existing model
                response = model.set_features()
            elif function == 24:
                # Set a parameter grid for hyperparameter optimization
                response = model.set_param_grid()

            # Get the response as SSE.Rows
            response_rows = utils.get_response_rows(response.values.tolist(),
                                                    ["str", "str", "str"])

        elif function == 11:
            # Return the feature definitions for an existing model
            response = model.get_features()

            # Get the response as SSE.Rows
            response_rows = utils.get_response_rows(response.values.tolist(), ["str", "num", "str", "str", "str",\
                                                                               "str", "num"])

        elif function == 12:
            # Train and Test an existing model, saving the sklearn pipeline for further predictions
            response = model.fit()

            # Get the response as SSE.Rows
            response_rows = utils.get_response_rows(
                response.values.tolist(), ["str", "str", "str", "str", "num"])

        elif function in (14, 16, 19, 20):
            if function == 14:
                # Provide predictions in a chart expression based on an existing model
                response = model.predict(load_script=False)
            elif function == 16:
                # Provide predictions probabilities in a chart expression based on an existing model
                response = model.predict(load_script=False,
                                         variant="predict_proba")
            elif function == 19:
                # Get a list of models based on a search string
                response = model.list_models()
            elif function == 20:
                # Get a string that can be evaluated to get the features expression for the predict function
                response = model.get_features_expression()

            # Get the response as SSE.Rows
            response_rows = utils.get_response_rows(response.values.tolist(),
                                                    ["str"])

        elif function in (15, 17):
            if function == 15:
                # Provide predictions in the load script based on an existing model
                response = model.predict(load_script=True)
            if function == 17:
                # Provide prediction probabilities in the load script based on an existing model
                response = model.predict(load_script=True,
                                         variant="predict_proba")

            # Get the response as SSE.Rows
            response_rows = utils.get_response_rows(response.values.tolist(),
                                                    ["str", "str", "str"])

        elif function in (18, 22):
            if function == 18:
                response = model.get_metrics()
            elif function == 22:
                response = model.calculate_metrics()

            # Check whether the metrics are for a classifier
            if "accuracy" in response.columns:
                estimator_type = "classifier"
            # Check whether the metrics are for a regressor
            elif "r2_score" in response.columns:
                estimator_type = "regressor"

            # We convert values to type SSE.Dual, and group columns into a iterable
            if estimator_type == "classifier":
                # Get the response as SSE.Rows
                response_rows = utils.get_response_rows(response.values.tolist(), ["str", "str", "num", "num", "num",\
                                                                                   "num", "num"])
            elif estimator_type == "regressor":
                # Get the response as SSE.Rows
                response_rows = utils.get_response_rows(
                    response.values.tolist(),
                    ["str", "num", "num", "num", "num", "num"])

        elif function == 23:
            # Get the confusion matrix for the classifier
            response = model.get_confusion_matrix()
            response_rows = utils.get_response_rows(
                response.values.tolist(), ["str", "str", "str", "num"])

        elif function == 25:
            response = model.get_best_params()
            response_rows = utils.get_response_rows(response.values.tolist(),
                                                    ["str", "str"])

        # Yield Row data as Bundled rows
        yield SSE.BundledRows(rows=response_rows)