def _misc(request, context): """ Execute functions that provide common data science capabilities for Qlik. :param request: an iterable sequence of RowData :param context: :return: Refer to comments below as the response depends on the function called """ # Get a list from the generator object so that it can be iterated over multiple times request_list = [request_rows for request_rows in request] # Get the function id from the header to determine the variant being called function = ExtensionService._get_function_id(context) # Create an instance of the CommonFunction class handle = CommonFunction(request_list, context) # Call the function based on the mapping in functions.json # The if conditions are grouped based on similar output structure if function == 33: # Get entities from the default model response = handle.association_rules() # return six columns: 'rule', 'rule_lhs', 'rule_rhs', 'support', 'confidence', 'lift' dtypes = ["str", "str", "str", "num", "num", "num"] elif function == 43: # Provide predictions in a chart expression based on an existing model response = handle.predict(load_script=False) # Return predictions dtypes = ["str"] elif function == 44: # Provide predictions in the load script based on an existing model response = handle.predict(load_script=True) # Return the model name, keys and predictions dtypes = ["str", "str", "str"] elif function == 45: # Get a string that can be evaluated to get the features expression for the predict function response = handle.get_features_expression() # Return the feature expression dtypes = ["str"] # Get the response as SSE.Rows response_rows = utils.get_response_rows(response.values.tolist(), dtypes) # Get the number of bundles in the request num_request_bundles = len(request_list) # Get the number of rows in the response num_rows = len(response_rows) # Calculate the number of rows to send per bundle if num_rows >= num_request_bundles: rows_per_bundle = num_rows//num_request_bundles else: rows_per_bundle = num_rows # Stream response as BundledRows for i in range(0, num_rows, rows_per_bundle): # Yield Row data as Bundled rows yield SSE.BundledRows(rows=response_rows[i : i + rows_per_bundle])
def _spacy(request, context): """ Execute functions for the spaCy natural language processing library. :param request: an iterable sequence of RowData :param context: :return: Refer to comments below as the response depends on the function called """ # Get a list from the generator object so that it can be iterated over multiple times request_list = [request_rows for request_rows in request] # Get the function id from the header to determine the variant being called function = ExtensionService._get_function_id(context) # Create an instance of the SpaCyForQlik class model = SpaCyForQlik(request_list, context) # Call the function based on the mapping in functions.json # The if conditions are grouped based on similar output structure if function in (30, 31): if function == 30: # Get entities from the default model response = model.get_entities() elif function == 31: # Get entities from a named model response = model.get_entities(default=False) # return six columns: key, entity, start, end, type, description dtypes = ["str", "str", "num", "num", "str", "str"] elif function == 32: # Retrain a model by supplying texts and labeled entities response = model.retrain() # return four columns: model_name, subset, metric, value dtypes = ["str", "str", "str", "num"] # Get the response as SSE.Rows response_rows = utils.get_response_rows(response.values.tolist(), dtypes) # Get the number of rows in the request num_request_bundles = len(request_list) # Get the number of rows in the response num_rows = len(response_rows) # Calculate the number of rows to send per bundle if num_rows >= num_request_bundles: rows_per_bundle = len(response_rows) // len(request_list) else: rows_per_bundle = num_rows # Stream response as BundledRows for i in range(0, len(response_rows), rows_per_bundle): # Yield Row data as Bundled rows yield SSE.BundledRows(rows=response_rows[i:i + rows_per_bundle])
def _sklearn(request, context): """ Execute functions for the sklearn machine learning library. :param request: an iterable sequence of RowData :param context: :return: Refer to comments below as the response depends on the function called """ # Get a list from the generator object so that it can be iterated over multiple times request_list = [request_rows for request_rows in request] # Get the function id from the header to determine the variant being called function = ExtensionService._get_function_id(context) # Create an instance of the SKLearnForQlik class model = SKLearnForQlik(request_list, context) # Call the function based on the mapping in functions.json # The if conditions are grouped based on similar output structure if function in (9, 10, 21, 24): if function == 9: # Set up the model and save to disk response = model.setup() elif function == 21: # Set up a model with specific metric and dimensionality reduction arguments and save to disk response = model.setup(advanced=True) elif function == 10: # Set feature definitions for an existing model response = model.set_features() elif function == 24: # Set a parameter grid for hyperparameter optimization response = model.set_param_grid() dtypes = ["str", "str", "str"] elif function == 11: # Return the feature definitions for an existing model response = model.get_features() dtypes = ["str", "num", "str", "str", "str", "str", "str"] elif function == 12: # Train and Test an existing model, saving the sklearn pipeline for further predictions response = model.fit() dtypes = ["str", "str", "str", "str", "num"] elif function in (14, 16, 19, 20, 27): if function == 14: # Provide predictions in a chart expression based on an existing model response = model.predict(load_script=False) elif function == 16: # Provide predictions probabilities in a chart expression based on an existing model response = model.predict(load_script=False, variant="predict_proba") elif function == 19: # Get a list of models based on a search string response = model.list_models() elif function == 20: # Get a string that can be evaluated to get the features expression for the predict function response = model.get_features_expression() elif function == 27: # Get labels for clustering response = model.fit_transform(load_script=False) dtypes = ["str"] elif function in (15, 17, 28): if function == 15: # Provide predictions in the load script based on an existing model response = model.predict(load_script=True) elif function == 17: # Provide prediction probabilities in the load script based on an existing model response = model.predict(load_script=True, variant="predict_proba") elif function == 28: # Provide labels for clustering response = model.fit_transform(load_script=True) dtypes = ["str", "str", "str"] elif function in (18, 22): if function == 18: response = model.get_metrics() elif function == 22: response = model.calculate_metrics() # Check whether the metrics are for a classifier or regressor and whether they come from cross validation or hold-out testing if "accuracy_std" in response.columns: estimator_type = "classifier_cv" elif "accuracy" in response.columns: estimator_type = "classifier" elif "r2_score_std" in response.columns: estimator_type = "regressor_cv" elif "r2_score" in response.columns: estimator_type = "regressor" # We convert values to type SSE.Dual, and group columns into a iterable if estimator_type == "classifier_cv": dtypes = [ "str", "str", "num", "num", "num", "num", "num", "num", "num", "num" ] elif estimator_type == "classifier": dtypes = ["str", "str", "num", "num", "num", "num", "num"] elif estimator_type == "regressor_cv": dtypes = [ "str", "num", "num", "num", "num", "num", "num", "num", "num", "num", "num" ] elif estimator_type == "regressor": dtypes = ["str", "num", "num", "num", "num", "num"] elif function == 23: # Get the confusion matrix for the classifier response = model.get_confusion_matrix() dtypes = ["str", "str", "str", "num"] elif function == 25: # Get the best parameters based on a grid search cross validation response = model.get_best_params() dtypes = ["str", "str"] elif function == 26: # Provide results from dimensionality reduction response = model.fit_transform(load_script=True) dtypes = ["str", "str"] for i in range(response.shape[1] - 2): dtypes.append("num") elif function == 29: # Explain the feature importances for the model response = model.explain_importances() dtypes = ["str", "str", "num"] # Get the response as SSE.Rows response_rows = utils.get_response_rows(response.values.tolist(), dtypes) # Get the number of rows in the request num_request_bundles = len(request_list) # Get the number of rows in the response num_rows = len(response_rows) # Calculate the number of rows to send per bundle if num_rows >= num_request_bundles: rows_per_bundle = len(response_rows) // len(request_list) else: rows_per_bundle = num_rows # Stream response as BundledRows for i in range(0, len(response_rows), rows_per_bundle): # Yield Row data as Bundled rows yield SSE.BundledRows(rows=response_rows[i:i + rows_per_bundle])
def _prophet(request, context): """ Provide a timeseries forecast using Facebook's Prophet library. Scalar function. :param request: an iterable sequence of RowData :param context: not used for now :return: the forecasted value for each row : :Qlik expression example: :<AAI Connection Name>.Prophet(MonthStartDate, sum(Value), 'return=yhat, freq=MS, debug=true') :The third argument in the Qlik expression is a string of parameters. :This should take the form of a comma separated string: :e.g 'return=yhat, freq=MS, debug=true' or 'return=yhat_upper, freq=MS' : :<AAI Connection Name>.Prophet_Holidays(ForecastDate, sum(Value), Holiday, 'return=yhat, freq=D, debug=true') :In the holidays variant the third argument is a field containing the holiday name or NULL for each row. : :Parameters accepted for the Prophet() function are: cap, floor, changepoint_prior_scale, interval_width, :lower_window, upper_window : :Parameters accepted for the make_future_dataframe() function are: freq : :For more information on these parameters go here: https://facebook.github.io/prophet/docs/quick_start.html : :Additional parameters used are: return, take_log, debug, load_script : :cap = 1000 : A logistic growth model can be defined using cap and floor. Values should be double or integer :changepoint_prior_scale = 0.05 : Decrease if the trend changes are being overfit, increase for underfit :interval_width = 0.08 : Set the width of the uncertainty intervals :lower_window = 1 : Only used with holidays. Extend the holiday by certain no. of days prior to the date. :upper_window = 1 : Only used with holidays. Extend the holiday by certain no. of days after the date. :freq = MS : The frequency of the time series. e.g. MS for Month Start. See the possible options here: : : http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases :return = yhat : Any of the options in the forecast result. You can see these options with debug=true : : yhat, yhat_upper, yhat_lower : Forecast, upper and lower limits : : y_then_yhat, y_then_yhat_upper, y_then_yhat_lower : Return forecast only for forecast periods : : trend, trend_upper, trend_lower : Trend component of the timeseries : : seasonal, seasonal_upper, seasonal_lower: Seasonal component of the timeseries :take_log = false : Apply logarithm to the values before the forecast. Default is true :debug = true : Print execution information to the terminal and logs in ..\logs\Prophet Log <n>.txt """ # Get a list from the generator object so that it can be iterated over multiple times request_list = [request_rows for request_rows in request] # Calculate timings for the components of the forecasting # The results will be stored in ..\logs\Prophet Performance Log.txt # The request_list line above is not timed as the generator can only be iterated once # ProphetForQlik.timeit(request_list) # Create an instance of the ProphetForQlik class # This will take the request data from Qlik and prepare it for forecasting predictor = ProphetForQlik(request_list, context) # Calculate the forecast and store in a Pandas series forecast = predictor.predict() # Check if the response is a DataFrame. # This occurs when the load_script=true argument is passed in the Qlik expression. response_is_df = isinstance(forecast, pd.DataFrame) # Set the data types of the output if response_is_df: dtypes = [] for dt in forecast.dtypes: dtypes.append('num' if is_numeric_dtype(dt) else 'str') else: dtypes = ['num'] # Get the response as SSE.Rows response_rows = utils.get_response_rows(forecast.values.tolist(), dtypes) # Get the number of bundles in the request num_request_bundles = len(request_list) # Get the number of rows in the response num_rows = len(response_rows) # Calculate the number of rows to send per bundle if num_rows >= num_request_bundles: rows_per_bundle = num_rows//num_request_bundles else: rows_per_bundle = num_rows # Stream response as BundledRows for i in range(0, num_rows, rows_per_bundle): # Yield Row data as Bundled rows yield SSE.BundledRows(rows=response_rows[i : i + rows_per_bundle])
def _sklearn(request, context): """ Setup the meta data for a sklearn machine learning model. :param request: an iterable sequence of RowData :param context: :return: Refer to comments below as the response depends on the function called :Qlik expression examples: :<AAI Connection Name>. """ # Get a list from the generator object so that it can be iterated over multiple times request_list = [request_rows for request_rows in request] # Get the function id from the header to determine the variant being called function = ExtensionService._get_function_id(context) # Create an instance of the SKLearnForQlik class model = SKLearnForQlik(request_list, context) # Call the function based on the mapping in functions.json # The IF conditions are grouped based on similar output structure if function in (9, 10, 21, 24): if function == 9: # Set up the model and save to disk response = model.setup() elif function == 21: # Set up a model with specific metric and dimensionality reduction arguments and save to disk response = model.setup(advanced=True) elif function == 10: # Set feature definitions for an existing model response = model.set_features() elif function == 24: # Set a parameter grid for hyperparameter optimization response = model.set_param_grid() # Get the response as SSE.Rows response_rows = utils.get_response_rows(response.values.tolist(), ["str", "str", "str"]) elif function == 11: # Return the feature definitions for an existing model response = model.get_features() # Get the response as SSE.Rows response_rows = utils.get_response_rows(response.values.tolist(), ["str", "num", "str", "str", "str",\ "str", "num"]) elif function == 12: # Train and Test an existing model, saving the sklearn pipeline for further predictions response = model.fit() # Get the response as SSE.Rows response_rows = utils.get_response_rows( response.values.tolist(), ["str", "str", "str", "str", "num"]) elif function in (14, 16, 19, 20): if function == 14: # Provide predictions in a chart expression based on an existing model response = model.predict(load_script=False) elif function == 16: # Provide predictions probabilities in a chart expression based on an existing model response = model.predict(load_script=False, variant="predict_proba") elif function == 19: # Get a list of models based on a search string response = model.list_models() elif function == 20: # Get a string that can be evaluated to get the features expression for the predict function response = model.get_features_expression() # Get the response as SSE.Rows response_rows = utils.get_response_rows(response.values.tolist(), ["str"]) elif function in (15, 17): if function == 15: # Provide predictions in the load script based on an existing model response = model.predict(load_script=True) if function == 17: # Provide prediction probabilities in the load script based on an existing model response = model.predict(load_script=True, variant="predict_proba") # Get the response as SSE.Rows response_rows = utils.get_response_rows(response.values.tolist(), ["str", "str", "str"]) elif function in (18, 22): if function == 18: response = model.get_metrics() elif function == 22: response = model.calculate_metrics() # Check whether the metrics are for a classifier if "accuracy" in response.columns: estimator_type = "classifier" # Check whether the metrics are for a regressor elif "r2_score" in response.columns: estimator_type = "regressor" # We convert values to type SSE.Dual, and group columns into a iterable if estimator_type == "classifier": # Get the response as SSE.Rows response_rows = utils.get_response_rows(response.values.tolist(), ["str", "str", "num", "num", "num",\ "num", "num"]) elif estimator_type == "regressor": # Get the response as SSE.Rows response_rows = utils.get_response_rows( response.values.tolist(), ["str", "num", "num", "num", "num", "num"]) elif function == 23: # Get the confusion matrix for the classifier response = model.get_confusion_matrix() response_rows = utils.get_response_rows( response.values.tolist(), ["str", "str", "str", "num"]) elif function == 25: response = model.get_best_params() response_rows = utils.get_response_rows(response.values.tolist(), ["str", "str"]) # Yield Row data as Bundled rows yield SSE.BundledRows(rows=response_rows)