Beispiel #1
0
 def initialize_model(self, initializationParameters):
     """
     Creates and returns an Task which initializes the model with the 
     optional parameters provided
     
     :param initializationParameters: The parameters used for initialization
     :return: The created Task object
     """
     logger.debug("Initialize model called")
     try:
         params = initializationParameters
         if not isinstance(params, InitParams) and cx.request.is_json:
             params = datautils.deserialize_model(cx.request.get_json(), InitParams)
         assert isinstance(params, InitParams)
        
         task = ModelInstanceTask(operation='initialize',
             parameters={"objectives": params.objectives, 
                         "props": params.model_properties, 
                         "hparams": params.hyperparameters})
         logger.debug('Created initialize model task', extra={'model_task': task})
     except RuntimeError as inst:
         msg = "Error during model initialization: %s" % str(inst)
         logger.exception(msg)
         return ServiceError(500, msg), 500
     
     self.add_task(task)
Beispiel #2
0
 def evaluate(self, initParams):  # noqa: E501
     """
     Performs the evaluation defined for this plugin
 
     :param initParams: Init Parameters for the evaluation. Based on EvaluationSpecificationInitParams specification
     :type initParams: dict | bytes
 
     :rtype: None
     """
     logger.debug("evaluation called")
      
     try:
         if not isinstance(initParams, EvaluationSpecificationInitParams) and cx.request.is_json:
             initParams = mistk.data.utils.deserialize_model(cx.request.get_json(), EvaluationSpecificationInitParams)
         assert isinstance(initParams, EvaluationSpecificationInitParams)
         
         task = EvaluationPluginTask(operation='evaluate',
             parameters={"ground_truth_path": initParams.ground_truth_path, 
                         "input_data_path": initParams.input_data_path,
                         "evaluation_input_format": initParams.evaluation_input_format,
                         "evaluation_path": initParams.evaluation_path,
                         "assessment_type": initParams.assessment_type,
                         "metrics": initParams.metrics,
                         "properties": initParams.properties})
     except RuntimeError as inst:
         msg = "Runtime Error while performing evaluation for plugin: %s" % str(inst)
         logger.exception(msg)
         return ServiceError(500, msg), 500
     except Exception as ex:
         msg = "Exception while performing evaluation for plugin: %s" % str(ex)
         logger.exception(msg)
         return ServiceError(500, msg), 500
     
     self.add_task(task)
Beispiel #3
0
 def get_status(self, watch=None, resourceVersion=None):  # noqa: E501
     """
     Retrieves the status of the evaluation plugin
 
     :rtype: EvaluationInstanceStatus
     """
     logger.debug("Get status called")
     try:
         with self._status_lock.reader_lock:
             if watch:
                 return Response(watch_manager.watch(
                     'status', resourceVersion, self._status),
                                 mimetype="application/json")
             else:
                 return self._status
     except RuntimeError as inst:
         msg = "Runtime Error while retrieving status of evaluation plugin: %s" % str(
             inst)
         logger.exception(msg)
         return ServiceError(500, msg), 500
     except Exception as ex:
         msg = "Exception while retrieving status of evaluation plugin: %s" % str(
             ex)
         logger.exception(msg)
         return ServiceError(500, msg), 500
     return 'do some magic!'
Beispiel #4
0
    def _do_evaluate(self, assessment_type, metrics, input_data_path,
                     evaluation_input_format, ground_truth_path,
                     evaluation_path, properties):
        """
    Performs metrics' evaluation using the predictions and ground truth files provided.
    Stored the assessment results as a JSON file in the evaluation_path
    
    :param assessment_type: The evaluation type. One of {'BinaryClassification', 
        'MultilabelClassification', 'MulticlassClassification', 'Regression'}
    :param metrics: Specific metrics to evaluate against instead of all metrics defined by assessment_type
    :param input_data_path: Path to input data for the evaluation
    :param evaluation_input_format: The format of the input data
    :param ground_truth_path: The directory path where the ground_truth.csv file is located
    :param evaluation_path: A directory path to where the evaluation.json output file will be stored
    :param properties: A dictionary of key value pairs for evaluation plugin arguments. 
    """

        logger.debug("_do_evaluation started")
        try:
            logger.info("Calling do_evaluation method.")
            self.do_evaluate(assessment_type, metrics, input_data_path,
                             evaluation_input_format, ground_truth_path,
                             evaluation_path, properties)
            self.ready()
        except Exception as ex:  #pylint: disable=broad-except
            logger.exception("Error running do_evaluation")
            self.fail(str(ex))
        logger.debug("_do_evaluation complete")
Beispiel #5
0
    def transform(self, initParams):  # noqa: E501
        """
        Performs the transforms defined for this plugin
    
        :param initParams: A list of directory paths where input files can be found.
        :type initParams: dict | bytes
    
        :rtype: None
        """
        logger.debug("transform called")
        try:
            if not isinstance(
                    initParams,
                    TransformSpecificationInitParams) and cx.request.is_json:
                initParams = mistk.data.utils.deserialize_model(
                    cx.request.get_json(), TransformSpecificationInitParams)
            assert isinstance(initParams, TransformSpecificationInitParams)

            task = TransformPluginTask(operation='transform',
                                       parameters={
                                           "inputDirs":
                                           initParams.input_datasets,
                                           "outputDir":
                                           initParams.output_dataset,
                                           "properties": initParams.properties
                                       })
        except RuntimeError as inst:
            msg = "Error during transform. %s" % str(inst)
            logger.exception(msg)
            return ServiceError(500, msg), 500

        self.add_task(task)
Beispiel #6
0
 def terminate(self):
     """
     Shuts down the model and the endpoint service
     """
     logger.debug("Terminate called")
     try:
         self.add_task(ModelInstanceTask(operation="terminate"))
     except RuntimeError as inst:
         return ServiceError(500, str(inst)), 500
Beispiel #7
0
 def delete_task(self, task):
     """
     Deletes the specified Task.
     This function is currently not supported
     
     :param task: The task to delete
     """
     logger.debug("Ignoring delete task request", extra={'task':task})
     return ServiceError(501, "Not currently supported"), 501
Beispiel #8
0
    def initialized(self):
        """
        Triggers the model to enter the initialized state.  It is expected that this 
        method will only be called by the implementing model subclass.

        This method should not be implemented or overwritten by subclasses.  It will be 
        created by the state machine.
        """
        logger.debug('Called abstractModel.initialized')
        pass
Beispiel #9
0
 def generate(self):
     """
     Creates and returns a Task which kicks off a generation activity
     
     :return: The created Task object
     """
     logger.debug("Generate called")
     try:
         self.add_task(ModelInstanceTask(operation="generate"))
     except RuntimeError as inst:
         return ServiceError(500, str(inst)), 500
Beispiel #10
0
 def terminate(self):  # noqa: E501
     """
     Shutdowns the transform plugin and cleans up any resources.
     
     :rtype: None
     """
     logger.debug("Terminate called")
     try:
         self.add_task(TransformPluginTask(operation="terminate"))
     except RuntimeError as inst:
         return ServiceError(500, str(inst)), 500
Beispiel #11
0
 def terminate(self):
     """
     Shuts down the model and the endpoint service
     """
     logger.debug("Terminate called")
     try:
         self.add_task(ModelInstanceTask(operation="terminate"))
     except RuntimeError as inst:
         msg = "Error while terminating the model: %s" % str(inst)
         logger.exception(msg)
         return ServiceError(500, msg), 500
Beispiel #12
0
 def new_state_entered(self, *args, **kwargs):
     """
     Notifies the endpoint service that the current state of the state machine has been update 
     
     :param args: Optional non-keyworded variable length arguments to pass in
     :param kwargs: Optional keyworded variable length arguments to pass in
     """
     logger.debug("New state entered - %s {args: %s}", self.state, args)
     if self.state == 'failed' and len(args) > 0:
         self.endpoint_service.update_state(self.state, payload=args[0])
     else:
         self.endpoint_service.update_state(self.state)
 def _do_transform(self, inputDirs, outputDir, properties):
     """
     Executes the transform activity
     """
     logger.debug("_do_transform started")
     try:
         logger.info("Calling do_transform method.")
         self.do_transform(inputDirs, outputDir, properties)
         self.ready()
     except Exception as ex:  #pylint: disable=broad-except
         logger.exception("Error running do_transform")
         self.fail(str(ex))
     logger.debug("_do_transform complete")
Beispiel #14
0
 def _do_train(self):
     """
     Executes/resumes the training activity
     """
     logger.debug("_do_train started")
     try:
         logger.info("Calling do_train method.")
         self.do_train()
         self.ready()
     except Exception as ex:  #pylint: disable=broad-except
         logger.exception("Error running do_train")
         self.fail(str(ex))
     logger.debug("_do_train complete")
Beispiel #15
0
    def metrics(self):
        """
        Metrics that can be performed by the evaluate method
        """
        logger.debug("metrics started")
        try:
            metrics_list = self.plugin_manager.get_metrics_list()
            logger.debug("metrics complete")
        except Exception as ex:
            logger.exception("Error running metrics")
            self.fail(str(ex))

        return metrics_list
Beispiel #16
0
 def predict(self):
     """
     Creates and returns a Task which kicks off a prediction activity
     
     :return: The created Task object
     """
     logger.debug("Predict called")
     try:
         self.add_task(ModelInstanceTask(operation="predict"))
     except RuntimeError as inst:
         msg = "Error while kicking off prediction activity: %s" % str(inst)
         logger.exception(msg)
         return ServiceError(500, msg), 500
Beispiel #17
0
 def build_model(self, modelPath=None):
     """
     Creates and returns a Task which builds the model using the modelPath provided
     
     :param modelPath: The path to where the model image can be found
     :return: The created Task object
     """
     logger.debug("build model called")
     try:
         task = ModelInstanceTask(operation="build_model",
                                  parameters={"path": modelPath})
         self.add_task(task)
     except RuntimeError as inst:
         return ServiceError(500, str(inst)), 500
Beispiel #18
0
 def initialize(self, objectives, props, hparams):
     """
     Triggers the model to enter the initializing state.  A subsequent call to
     do_initialize with the given parameters will be made as a result.
     This method should not be implemented or overwritten by subclasses.  It will be 
     created by the state machine.
     
     :param objectives: The objectives of this model.
     :param props: A dictionary that is parsed from a JSON string. These are settings that are passed from the ecosystem, 
         but are not really considered hyperparameters.  They are not used by the model, but rather the endpoint itself 
         (e.g., where should heartbeats be sent and how often, etc).
     :param hparams: A dictionary that is parsed from a JSON string. These are the hyperparameters that are used by the model.
     """
     logger.debug('Called abstractModel.initialize')
     pass
Beispiel #19
0
 def reset(self):
     """
     Resets the model
     """
     logger.debug("Reset called")
     try:
         def generate():
             try:
                 yield "resetting..."
             finally:
                 os.execv(sys.executable, ['python'] + sys.argv)
         return Response(generate(), mimetype='text/plain')
     except RuntimeError as inst:
         msg = "Error while resetting the model: %s" % str(inst)
         logger.exception(msg)
         return ServiceError(500, msg), 500 
Beispiel #20
0
 def terminate(self):  # noqa: E501
     """
     Shutdowns the evaluation plugin and cleans up any resources.
     
     :rtype: None
     """
     logger.debug("Terminate called")
     try:
         self.add_task(EvaluationPluginTask(operation="terminate"))
     except RuntimeError as inst:
         msg = "Runtime Error while terminating plugin evaluation and cleaning up any resources: %s" % str(inst)
         logger.exception(msg)
         return ServiceError(500, msg), 500
     except Exception as ex:
         msg = "Exception while terminating plugin evaluation and cleaning up any resources: %s" % str(ex)
         logger.exception(msg)
         return ServiceError(500, msg), 500
Beispiel #21
0
 def get_metrics(self):
     """
     Returns metrics that can be evaluated for this plugin
 
     :rtype: List[Metric]
     """
     logger.debug("get_metrics called")
     try:
         return self.plugin_manager._metric_list
     except RuntimeError as inst:
         msg = "Runtime Error while performing evaluation for plugin: %s" % str(inst)
         logger.exception(msg)
         return ServiceError(500, msg), 500
     except Exception as ex:
         msg = "Exception while performing evaluation for plugin: %s" % str(ex)
         logger.exception(msg)
         return ServiceError(500, msg), 500
Beispiel #22
0
 def get_status(self, watch=None, resourceVersion=None):  # noqa: E501
     """
     Retrieves the status of the transform plugin
 
     :rtype: TransformInstanceStatus
     """
     logger.debug("Get status called")
     try:
         with self._status_lock.reader_lock:
             if watch:
                 return Response(watch_manager.watch(
                     'status', resourceVersion, self._status),
                                 mimetype="application/json")
             else:
                 return self._status
     except RuntimeError as inst:
         return ServiceError(500, str(inst)), 500
     return 'do some magic!'
Beispiel #23
0
def initializeEndpointController(handler, *modules):
    """
    This method iterates over the functions defined in the auto-generated flask modules
    And creates a function in this package which points bound member methods of the 
    handler
    
    :param handler: The handler object which must implement bound member methods which
        otherwise have the same signature as those defined in the controller_modules 
    :param modules: These are the autogenerated swagger controller modules
    """
    
    fns = itertools.chain(*[inspect.getmembers(m, inspect.isfunction) for m in modules])
    for name, fn1 in fns:
        sig1 = inspect.signature(fn1)
        logger.debug("Building redirect for " + name + str(sig1))
        
        fn2 = getattr(handler, name)
        sig2 = inspect.signature(fn2)
        assert sig1 == sig2, "Can't redirect " + name +" : " + str(sig1) + "-" + str(sig2)
        globals()[name] = getattr(handler, name)
Beispiel #24
0
 def load_data(self, datasets):
     """
     Creates and returns a Task which loads data into a model using the bindings provided
     
     :param datasets: dictionary mapping dataset function to dataset
     :return: The create Task object
     """
     logger.debug("Load data called")
     try:            
         if not isinstance(list(datasets.values())[0], MistkDataset) and cx.request.is_json:
             datasets = {key : datautils.deserialize_model(ds, MistkDataset) for 
                         key, ds in cx.request.get_json().items()}
                     
         task = ModelInstanceTask(operation="load_data",
                     parameters = {"dataset_map": datasets})
         self.add_task(task)
     except RuntimeError as inst:
         msg = "Error while loading data into model: %s" % str(inst)
         logger.exception(msg)
         return ServiceError(500, msg), 500 
Beispiel #25
0
 def get_status(self, watch=None, resourceVersion=None):
     """
     Retrieves the status of this ModelEndpointService
     
     :param watch: Optional flag indicating whether state changes should be monitored
     :param resourceVersion: Optional version id that will be used as the minimum version number
         for watched status changes. 
     :return: The current status of this ModelEndpointService
     """
     logger.debug("Get Status called")
     try:
         with self._status_lock.reader_lock:
             if watch:
                 return Response(watch_manager.watch(
                     'status', resourceVersion, self._status),
                                 mimetype="application/json")
             else:
                 return self._status
     except RuntimeError as inst:
         return ServiceError(500, str(inst)), 500
Beispiel #26
0
 def stream_predict(self, dataMap, details=None):
     """
     Creates a Task which kicks off a stream prediction activity
     
     :param dataMap: Dictionary of IDs to b64 encoded data
     :return: Dictionary of IDs to predictions
     """
     try:
         task = ModelInstanceTask(operation="stream_predict", 
                                  parameters = {"data_map": dataMap,
                                               "details": details})
         self.add_task(task)
         resp = self._get_response()
         logger.debug('Stream predict response ready.')
         # check if error from model queue response  
         if isinstance(resp, ServiceError):
             return resp, 500
         else:
             return resp
     except RuntimeError as inst:
         msg = "Error while kicking off stream prediction activity: %s" % str(inst)
         logger.exception(msg)
         return ServiceError(500, msg), 500
Beispiel #27
0
 def _do_initialize(self, objectives: list, props: dict, hparams: dict):
     """
     Called once the endpoint service has launched.  This would typically be the first call made to the service. 
     
     :param objectives: The objectives for the model.
     :param props: A dictionary that is parsed from a JSON string. These are settings that are passed from the ecosystem, 
         but are not really considered hyperparameters.  They are not used by the model, but rather the endpoint itself 
         (e.g., where should heartbeats be sent and how often, etc).
     :param hparams: A dictionary that is parsed from a JSON string. These are the hyperparameters that are used by the model.
     """
     try:
         logger.debug("_do_initialize called")
         self.do_initialize(objectives, props, hparams)
         logger.debug("Changing state to 'initialized'")
         self.initialized()
         logger.debug("_do_initialize complete")
     except Exception as ex:  #pylint: disable=broad-except
         logger.exception("Error running do_initalize")
         self.fail(str(ex))
Beispiel #28
0
    def do_evaluate(self, assessment_type, metrics, input_data_path,
                    evaluation_input_format, ground_truth_path,
                    evaluation_path, properties):
        """
        Performs metrics' evaluation using the predictions and ground truth files provided.
        Stored the assessment results as a JSON file in the evaluation_path
        
        :param assessment_type: The evaluation assessment type. One of {'BinaryClassification', 
            'MultilabelClassification', 'MulticlassClassification', 'Regression'}
        :param metrics: Specific metrics to evaluate against instead of all metrics defined by assessment_type
        :param input_data_path: Path to input data for the evaluation
        :param evaluation_input_format: The format of the input data
        :param ground_truth_path: The directory path where the ground_truth.csv file is located
        :param evaluation_path: A directory path to where the evaluation.json output file will be stored
        :param properties: A dictionary of key value pairs for evaluation plugin arguments. 
        """
        if evaluation_input_format not in "predictions":
            msg = "EvaluationInputFormat %s is not supported by this Metric Evaluator, only 'predictions' are supported" % evaluation_input_format
            logger.error(msg)
            raise Exception(msg)

        # load prediction results
        full_predictions_path = os.path.join(input_data_path,
                                             "predictions.csv")
        results_df = csv_Predictions_to_DataFrame(full_predictions_path)

        # load ground truth
        full_ground_truth_path = os.path.join(ground_truth_path,
                                              "ground_truth.csv")
        truth_df = csv_Groundtruth_to_DataFrame(full_ground_truth_path)

        # match ground truth to results by id
        truth_df = truth_df.loc[truth_df['rowid'].isin(results_df['rowid'])]

        # sort the rows by id
        results_df.sort_values(by='rowid', inplace=True)
        truth_df.sort_values(by='rowid', inplace=True)

        logger.debug('Running for metrics %s' % metrics)

        if assessment_type == "MultilabelClassification" or assessment_type == "MulticlassClassification":
            # create matrices for labels and confidence
            label_mlb = MultiLabelBinarizer()
            parsed_truth_labels = (
                truth_df['labels'].str.split().values.tolist()
                if truth_df['labels'].dtype == 'object' else np.array(
                    np.transpose(np.matrix(truth_df['labels'].values))))
            parsed_results_labels = (
                results_df['labels'].str.split().values.tolist()
                if results_df['labels'].dtype == 'object' else np.array(
                    np.transpose(np.matrix(results_df['labels'].values))))
            label_mlb.fit(
                np.append(parsed_truth_labels, parsed_results_labels, axis=0))
            truth_labels_matrix = label_mlb.transform(parsed_truth_labels)
            results_labels_matrix = label_mlb.transform(parsed_results_labels)

            if 'confidence' in results_df and not results_df[
                    'confidence'].hasnans:
                parsed_confidence = (
                    results_df['confidence'].str.split().values.tolist() if
                    results_df['confidence'].dtype == 'object' else np.array(
                        np.transpose(np.matrix(
                            results_df['confidence'].values))))
                confidence_matrix = np.empty(results_labels_matrix.shape)
                label_classes = label_mlb.classes_.tolist()
                for row_index, row in enumerate(parsed_results_labels):
                    confidence_row = np.zeros(results_labels_matrix.shape[1])
                    for col_index, col in enumerate(row):
                        label_pos = label_classes.index(col)
                        confidence_row[label_pos] = np.float64(
                            parsed_confidence[row_index][col_index])  #pylint: disable=no-member
                    confidence_matrix[row_index] = confidence_row
        elif assessment_type == "Regression":
            if truth_df['labels'].dtype == 'object':
                truth_labels_matrix = truth_df['labels'].str.split(
                ).values.tolist()
                for index, item in enumerate(truth_labels_matrix):
                    truth_labels_matrix[index] = np.array(item,
                                                          dtype=np.float64)  #pylint: disable=no-member
            else:
                truth_labels_matrix = truth_df['labels'].values

            if results_df['labels'].dtype == 'object':
                results_labels_matrix = results_df['labels'].str.split(
                ).values.tolist()
                for index, item in enumerate(results_labels_matrix):
                    results_labels_matrix[index] = np.array(item,
                                                            dtype=np.float64)  #pylint: disable=no-member
            else:
                results_labels_matrix = results_df['labels'].values

            if results_df['confidence'].dtype == 'object':
                confidence_matrix = results_df['confidence'].str.split(
                ).values.tolist()
                for index, item in enumerate(confidence_matrix):
                    confidence_matrix[index] = np.array(item, dtype=np.float64)  #pylint: disable=no-member
            else:
                confidence_matrix = results_df['confidence'].values
        else:
            truth_labels_matrix = (
                truth_df['labels'].str.split().values.tolist()
                if truth_df['labels'].dtype == 'object' else
                truth_df['labels'].values)
            results_labels_matrix = (
                results_df['labels'].str.split().values.tolist()
                if results_df['labels'].dtype == 'object' else
                results_df['labels'].values)
            confidence_matrix = (
                results_df['confidence'].str.split().values.tolist()
                if results_df['confidence'].dtype == 'object' else
                results_df['confidence'].values)

        eval_dict = {}
        modules_cache = {}

        for counter, metric in enumerate(metrics):
            logger.info(metric.package + " : " + metric.method)
            if metric.package not in modules_cache:
                module = None
                name = metric.package
                try:
                    importlib.invalidate_caches()
                    module = importlib.import_module(name)
                except Exception:
                    logger.exception("Exception importing plugin module " +
                                     name)
                if module:
                    modules_cache[metric.package] = module
                else:
                    logger.warn("Cannot load " + metric.package)
                    continue
            else:
                logger.debug("Loading cached module")
                module = modules_cache[metric.package]

            if hasattr(module, metric.method):
                logger.debug("Calling " + metric.method + " in " +
                             metric.package)
                method = getattr(module, metric.method)

                args = metric.default_args or {}
                if metric.data_parameters.truth_labels:
                    args[metric.data_parameters.
                         truth_labels] = truth_labels_matrix

                if metric.data_parameters.truth_bounds and not truth_df[
                        'bounds'].hasnans:
                    args[metric.data_parameters.
                         truth_bounds] = truth_df['bounds'].values

                if metric.data_parameters.prediction_labels:
                    args[metric.data_parameters.
                         prediction_labels] = results_labels_matrix

                if metric.data_parameters.prediction_scores and 'confidence' in results_df and not results_df[
                        'confidence'].hasnans:
                    args[metric.data_parameters.
                         prediction_scores] = confidence_matrix

                if metric.data_parameters.prediction_bounds and not results_df[
                        'bounds'].hasnans:
                    args[metric.data_parameters.
                         prediction_bounds] = results_df['bounds'].values

                try:
                    evalResult = method(**args)
                except Exception:
                    logger.error("Something bad happened calling " +
                                 metric.method,
                                 exc_info=True)
                else:
                    logger.debug("Result is " + str(evalResult))
                    if isinstance(evalResult, np.ndarray):
                        # convert to native types
                        evalResultAsList = evalResult.tolist()
                        if assessment_type == "MultilabelClassification" or assessment_type == "MulticlassClassification":
                            # map labels to their values in the results
                            label_classes = label_mlb.classes_.tolist()
                            if len(evalResultAsList) == len(label_classes):
                                evalResultAsDict = {}
                                for index, label in enumerate(label_classes):
                                    evalResultAsDict[str(
                                        label)] = evalResultAsList[index]
                                eval_dict[
                                    metric.object_info.name] = evalResultAsDict
                            else:
                                eval_dict[
                                    metric.object_info.name] = evalResultAsList
                        else:
                            eval_dict[
                                metric.object_info.name] = evalResultAsList
                    elif isinstance(evalResult, np.generic):
                        # convert to native type
                        evalResultAsScalar = np.asscalar(evalResult)
                        eval_dict[metric.object_info.name] = evalResultAsScalar
                    elif isinstance(evalResult, tuple) or isinstance(
                            evalResult, list):
                        # kind of a cheat to cover the case where a native type has numpy elements
                        # which some scikit-learn methods inexplicably return
                        eval_dict[metric.object_info.name] = np.array(
                            evalResult).tolist()
                    else:
                        eval_dict[metric.object_info.name] = evalResult
            else:
                logger.warn(metric.method + " does not exist in " +
                            metric.package)

            logger.info("Completed metric " + str(counter + 1))

        eval_dict_json = json.dumps(eval_dict, indent=2)
        filename = evaluation_path + "/eval_results_" + str(int(
            time.time())) + ".json"
        logger.info("Writing eval results to " + filename)
        with open(filename, mode='w') as writer:
            writer.write(eval_dict_json)