Ejemplo n.º 1
0
 def retrieve(self, request, pk=None):
     """
     GET request for the data of a dataset, specified by dataset id
     :param request: GET request, containing the dataset id
     :param pk: Dataset id
     :return: Dataset data and relevant statistics
     """
     if pk:
         try:
             dataset = Dataset.objects.get(pk=pk)
         except Dataset.DoesNotExist:
             return Response("No dataset found for id: {}".format(pk), status=status.HTTP_400_BAD_REQUEST)
         if not IsOwner().has_object_permission(request, self, dataset):
             return Response(status=status.HTTP_401_UNAUTHORIZED)
         serializer = self.serializer_class(dataset, many=False)
         response_data = serializer.data
         m = Metadata(dataset)
         meta = m.get_metadata("DatasetMetadata")
         response = "Response"
         if meta:
             response_data["metadata"] = meta
             response = meta["target"]
         response_data["data"] = load_dataset(pk)
         if response not in response_data["data"]:
             response = response_data["data"].columns.tolist()[0]
         response_data["statistics"] = DatasetStatistics(response_data["data"]).calculate_statistics(response)
         return Response(response_data, status=status.HTTP_200_OK)
     else:
         return Response(
             "Required id for the dataset was not found.",
             status=status.HTTP_400_BAD_REQUEST
         )
Ejemplo n.º 2
0
 def update(self, request, pk=None):
     """
     PUT request to update a dataset
     :param request: PUT request
     :param pk: dataset ID to be updated
     :return: 200/details of updated dataset, 400/bad request, or 401/unauthorized
     """
     dataset_inputs = load_request(request)
     serializer = self.serializer_class(data=dataset_inputs, context={'request': request})
     if serializer.is_valid() and pk is not None:
         try:
             original_dataset = Dataset.objects.get(id=int(pk))
         except Dataset.DoesNotExist:
             return Response(
                 "No dataset found for id: {}".format(pk),
                 status=status.HTTP_400_BAD_REQUEST
             )
         if IsOwner().has_object_permission(request, self, original_dataset):
             amodel = serializer.update(original_dataset, serializer.validated_data)
             m = Metadata(amodel, dataset_inputs["metadata"])
             meta = m.set_metadata("DatasetMetadata")
             if amodel:
                 response_status = status.HTTP_201_CREATED
                 response_data = serializer.data
                 response_data["id"] = amodel.id
                 del response_data["data"]
                 if meta:
                     response_data["metadata"] = meta
                 if int(pk) == amodel.id:
                     response_status = status.HTTP_200_OK
                 return Response(response_data, status=response_status)
         else:
             return Response(status=status.HTTP_401_UNAUTHORIZED)
     return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
Ejemplo n.º 3
0
    def execute_task(df, model_id, model_name, dataset_id):
        logger.info(
            "Starting VB task -------- Model ID: {}; Model Type: {}; step 1/{}"
            .format(model_id, model_name, step_count[model_name]))
        DaskTasks.update_status(model_id, "Loading and validating data",
                                "1/{}".format(step_count[model_name]))

        dataset_m = Metadata(parent=Dataset.objects.get(
            id=dataset_id)).get_metadata("DatasetMetadata")
        target = "Response" if "response" not in dataset_m.keys(
        ) else dataset_m["response"]
        attributes = None if "attributes" not in dataset_m.keys(
        ) else dataset_m["attributes"]
        y = df[target]
        if attributes:
            attributes_list = json.loads(attributes.replace("\'", "\""))
            x = df[attributes_list]
        else:
            x = df.drop(target, axis=1)

        logger.info("Model ID: {}, loading hyper-parameters step 2/{}".format(
            model_id, step_count[model_name]))
        DaskTasks.update_status(model_id, "Loading hyper-parameters",
                                "2/{}".format(step_count[model_name]))
        parameters = Metadata(parent=AnalyticalModel.objects.get(
            id=model_id)).get_metadata("ModelMetadata")

        if model_name == "lra":
            DaskTasks.execute_lra(model_id, parameters, x, y,
                                  step_count[model_name])
Ejemplo n.º 4
0
 def create(self, request):
     """
     POST request that creates a new Pipeline.
     :param request: POST request
     :return: New pipeline object
     """
     pipeline_inputs = load_request(request)
     serializer = self.serializer_class(data=pipeline_inputs,
                                        context={'request': request})
     try:
         project = Project.objects.get(id=int(pipeline_inputs["project"]))
     except Project.DoesNotExist:
         return Response("No project found for id: {}".format(
             int(pipeline_inputs["project"])),
                         status=status.HTTP_400_BAD_REQUEST)
     if project.owner != request.user:
         return Response(status=status.HTTP_401_UNAUTHORIZED)
     if serializer.is_valid():
         serializer.save()
         pipeline = serializer.data
         if "metadata" not in pipeline_inputs.keys():
             pipeline_inputs["metadata"] = None
         a = Pipeline.objects.get(pk=int(pipeline["id"]))
         m = Metadata(a, pipeline_inputs["metadata"])
         meta = m.set_metadata("PipelineMetadata")
         pipeline["metadata"] = meta
         if pipeline:
             return Response(pipeline, status=status.HTTP_201_CREATED)
     return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
Ejemplo n.º 5
0
 def retrieve(self, request, pk=None):
     """
     GET request for the data of a dataset, specified by dataset id
     :param request: GET request, containing the dataset id
     :param pk: Dataset id
     :return: Dataset data and relevant statistics
     """
     if pk:
         dataset = Dataset.objects.get(pk=pk)
         serializer = self.serializer_class(dataset, many=False)
         response_data = serializer.data
         m = Metadata(dataset)
         meta = m.get_metadata("DatasetMetadata")
         response = "Response"
         if meta:
             response_data["metadata"] = meta
             response = meta["response"]
         response_data["data"] = pd.read_csv(
             StringIO(bytes(dataset.data).decode()))
         if response not in response_data["data"]:
             response = response_data["data"].columns.tolist()[0]
         response_data["statistics"] = DatasetStatistics(
             response_data["data"]).calculate_statistics(response)
         return Response(response_data, status=status.HTTP_200_OK)
     else:
         return Response("Required id for the dataset id was not found.",
                         status=status.HTTP_400_BAD_REQUEST)
Ejemplo n.º 6
0
 def update(self, request, pk=None):
     amodel_inputs = request.data.dict()
     serializer = self.serializer_class(data=request.data.dict(),
                                        context={'request': request})
     if serializer.is_valid() and pk is not None:
         try:
             original_amodel = AnalyticalModel.objects.get(id=int(pk))
         except AnalyticalModel.DoesNotExist:
             return Response(
                 "No analytical model found for id: {}".format(pk),
                 status=status.HTTP_400_BAD_REQUEST)
         if IsOwnerOfWorkflowChild().has_object_permission(
                 request, self, original_amodel):
             amodel = serializer.update(original_amodel,
                                        serializer.validated_data)
             if amodel:
                 response_status = status.HTTP_201_CREATED
                 response_data = serializer.data
                 response_data["id"] = amodel.id
                 if int(pk) == amodel.id:
                     response_status = status.HTTP_200_OK
                 if "metadata" in amodel_inputs.keys():
                     amodel_inputs["metadata"] = None
                     m = Metadata(amodel_inputs, amodel_inputs["metadata"])
                     meta = m.set_metadata("ModelMetadata")
                     response_data["metadata"] = m.get_metadata(
                         "ModelMetadata")
                 return Response(response_data, status=response_status)
         else:
             return Response(status=status.HTTP_401_UNAUTHORIZED)
     return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
Ejemplo n.º 7
0
 def create(self, request):
     """
     POST request that creates a new Dataset.
     :param request: POST request
     :return: New dataset
     """
     dataset_inputs = request.data.dict()
     serializer = self.serializer_class(data=dataset_inputs,
                                        context={'request': request})
     if serializer.is_valid():
         serializer.save()
         dataset = serializer.data
         if dataset:
             d = Dataset.objects.get(id=dataset["id"])
             if "metadata" not in dataset_inputs.keys():
                 dataset_inputs["metadata"] = None
             m = Metadata(d, dataset_inputs["metadata"])
             meta = m.set_metadata("DatasetMetadata")
             response = "Response"
             if meta:
                 dataset["metadata"] = meta
                 response = meta["response"]
             data = pd.read_csv(StringIO(bytes(d.data).decode()))
             if response not in data:
                 response = data.columns.tolist()[0]
             dataset["statistics"] = DatasetStatistics(
                 data).calculate_statistics(response)
             del dataset["data"]
             return Response(dataset, status=status.HTTP_201_CREATED)
     return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
Ejemplo n.º 8
0
 def update(self, request, pk=None):
     dataset_inputs = request.data.dict()
     serializer = self.serializer_class(data=dataset_inputs,
                                        context={'request': request})
     if serializer.is_valid() and pk is not None:
         try:
             original_dataset = Dataset.objects.get(id=int(pk))
         except Dataset.DoesNotExist:
             return Response("No dataset model found for id: {}".format(pk),
                             status=status.HTTP_400_BAD_REQUEST)
         if IsOwnerOfWorkflowChild().has_object_permission(
                 request, self, original_dataset):
             amodel = serializer.update(original_dataset,
                                        serializer.validated_data)
             m = Metadata(amodel, dataset_inputs["metadata"])
             meta = m.set_metadata("DatasetMetadata")
             if amodel:
                 response_status = status.HTTP_201_CREATED
                 response_data = serializer.data
                 response_data["id"] = amodel.id
                 del response_data["data"]
                 if meta:
                     response_data["metadata"] = meta
                 if int(pk) == amodel.id:
                     response_status = status.HTTP_200_OK
                 return Response(response_data, status=response_status)
         else:
             return Response(status=status.HTTP_401_UNAUTHORIZED)
     return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
Ejemplo n.º 9
0
 def update(self, request, pk=None):
     """
     PUT request to update an existing location.
     :param request: PUT request
     :param pk: Location ID
     :return:
     """
     dataset_inputs = load_request(request)
     serializer = self.serializer_class(data=dataset_inputs,
                                        context={'request': request})
     if serializer.is_valid() and pk is not None:
         try:
             original_location = Location.objects.get(id=int(pk))
         except Location.DoesNotExist:
             return Response("No location found for id: {}".format(pk),
                             status=status.HTTP_400_BAD_REQUEST)
         if original_location.owner == request.user:
             location = serializer.update(original_location,
                                          serializer.validated_data)
             if location:
                 l = serializer.data
                 m = Metadata(location, dataset_inputs["metadata"])
                 meta = m.set_metadata("LocationMetadata")
                 if meta:
                     l["metadata"] = meta
                 request_status = status.HTTP_201_CREATED
                 if int(pk) == location.id:
                     request_status = status.HTTP_200_OK
                 return Response(l, status=request_status)
         else:
             return Response(status=status.HTTP_401_UNAUTHORIZED)
     return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
Ejemplo n.º 10
0
 def update(self, request, pk=None):
     """
     PUT request for updating a project.
     :param request: PUT request
     :return: The updated/200
     """
     dataset_inputs = load_request(request)
     serializer = self.serializer_class(data=dataset_inputs,
                                        context={'request': request})
     if serializer.is_valid() and pk is not None:
         try:
             project = Project.objects.get(id=int(pk))
         except Project.DoesNotExist:
             return Response("No project found for id: {}".format(pk),
                             status=status.HTTP_400_BAD_REQUEST)
         if IsOwner().has_object_permission(request, self, project):
             project = serializer.update(project, serializer.validated_data)
             if "metadata" not in dataset_inputs.keys():
                 dataset_inputs["metadata"] = None
             m = Metadata(project, dataset_inputs["metadata"])
             meta = m.set_metadata("ProjectMetadata")
             response_data = serializer.data
             if meta:
                 response_data["metadata"] = meta
             request_status = status.HTTP_200_OK
             return Response(response_data, status=request_status)
         else:
             return Response(status=status.HTTP_401_UNAUTHORIZED)
     return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
Ejemplo n.º 11
0
 def set_prediction_estimators(project_id, model_id, selected_models: dict):
     project = Project.objects.get(id=int(project_id))
     model = Model.objects.get(id=int(model_id))
     m = load_model(model.id, model.model)
     model_metadata = Metadata(parent=model).get_metadata("ModelMetadata")
     m.prediction_model_type = model_metadata[
         "prediction_model_type"] if "prediction_model_type" in model_metadata.keys(
         ) else "single"
     m.refitPredictionModels(selected_models=selected_models)
     m.save(n=4, model_id=model_id, message="Model selection")
Ejemplo n.º 12
0
    def refit_model(self, request):
        input_data = load_request(request)
        required_parameters = ["project_id", "model_id", "predictive_models"]
        if set(required_parameters).issubset(input_data.keys()):
            permissions = []
            try:
                project = Project.objects.get(id=int(input_data["project_id"]))
                if not IsOwnerOfProject().has_object_permission(
                        request, self, project):
                    permissions.append("Unauthorized to access project.")
            except Project.DoesNotExist:
                project = None
            try:
                model = Model.objects.get(id=int(input_data["model_id"]))
                if not IsOwnerOfModel().has_object_permission(
                        request, self, model):
                    permissions.append("Unauthorized to access pipeline")
            except Model.DoesNotExist:
                model = None
            if len(permissions) > 0:
                return Response(permissions,
                                status=status.HTTP_401_UNAUTHORIZED)
            if model is None or project is None:
                message = []
                if project is None:
                    message.append("No project found for id: {}".format(
                        input_data["project_id"]))
                if model is None:
                    message.append("No model found for id: {}".format(
                        input_data["model_id"]))
                return Response(", ".join(message),
                                status=status.HTTP_400_BAD_REQUEST)
            p_models = {}
            for p in json.loads(input_data["predictive_models"]):
                p_models[p[0]] = int(p[1])
            m = Metadata(model, json.dumps({"predictive_models": p_models}))
            meta = m.set_metadata("ModelMetadata")

            response = {}
            DaskTasks.refit_task(project.id, model.id, p_models)
            response["project_id"] = project.id
            response["pipeline_id"] = model.pipeline.id
            response["model_id"] = model.id
            response["dataset_id"] = project.dataset
            response["model_metadata"] = meta
            response[
                "status"] = "Initiated refit for specified models for prediction"
            return Response(response, status=status.HTTP_200_OK)
        data = "Missing required parameters: {}".format(
            ", ".join(required_parameters))
        response_status = status.HTTP_200_OK
        return Response(data, status=response_status)
Ejemplo n.º 13
0
 def list(self, request):
     """
     GET request that lists all the locations owned by the user.
     :param request: GET request
     :return: List of locations
     """
     locations = Location.objects.filter(owner=request.user)
     # TODO: Add ACL access objects
     serializer = self.serializer_class(locations, many=True)
     response_data = serializer.data
     for l in response_data:
         loc = Location.objects.get(pk=int(l["id"]))
         m = Metadata(loc, None)
         l["metadata"] = m.get_metadata("LocationMetadata")
     return Response(response_data, status=status.HTTP_200_OK)
Ejemplo n.º 14
0
def update_status(_id, status, stage, message=None, retry=5, log=None):
    if _id == -1:
        return
    if retry == 0:
        return
    meta = 'PipelineMetadata'
    try:
        amodel = Pipeline.objects.get(id=int(_id))
        m = Metadata(parent=amodel, metadata=json.dumps({"status": status, "stage": stage, "message": message}))
        m.set_metadata(meta)
        if log:
            logger.info(log)
        PipelineLog(parent=amodel, logtype=status, log=f"Stage: {stage}, Message: {message}", timestamp=str(datetime.datetime.now().timestamp()))
    except Exception as ex:
        logger.warning("Error attempting to save status update: {}".format(ex))
        update_status(_id, status, stage, None, retry - 1)
Ejemplo n.º 15
0
 def list(self, request, pk=None):
     """
     GET request that lists all the projects
     :param request: GET request
     :return: List of projects
     """
     projects = Project.objects.filter(owner=request.user)
     # TODO: Add ACL access objects
     serializer = self.serializer_class(projects, many=True)
     response_data = serializer.data
     for d in response_data:
         p = Project.objects.get(id=d["id"])
         m = Metadata(p, None)
         meta = m.get_metadata("ProjectMetadata")
         d["metadata"] = meta
     return Response(serializer.data, status=status.HTTP_200_OK)
Ejemplo n.º 16
0
 def predict(self, request):
     input_data = load_request(request)
     required_parameters = ["project_id", "model_id", "data"]
     if set(required_parameters).issubset(input_data.keys()):
         permissions = []
         try:
             project = Project.objects.get(id=int(input_data["project_id"]))
             if not IsOwnerOfProject().has_object_permission(
                     request, self, project):
                 permissions.append("Unauthorized to access project.")
         except Project.DoesNotExist:
             project = None
         try:
             model = Model.objects.get(id=int(input_data["model_id"]))
             if not IsOwnerOfModel().has_object_permission(
                     request, self, model):
                 permissions.append("Unauthorized to access pipeline")
         except Model.DoesNotExist:
             model = None
         if len(permissions) > 0:
             return Response(permissions,
                             status=status.HTTP_401_UNAUTHORIZED)
         if model is None or project is None:
             message = []
             if project is None:
                 message.append("No project found for id: {}".format(
                     input_data["project_id"]))
             if model is None:
                 message.append("No model found for id: {}".format(
                     input_data["model_id"]))
             return Response(", ".join(message),
                             status=status.HTTP_400_BAD_REQUEST)
         response = {}
         data = str(input_data["data"])
         results = DaskTasks.predict(project.id, model.id, data)
         response["project_id"] = project.id
         response["pipeline_id"] = model.pipeline.id
         response["model_id"] = model.id
         m = Metadata(model)
         response["model_metadata"] = m.get_metadata("ModelMetadata")
         response["dataset_id"] = project.dataset
         response["results"] = results
         return Response(response, status=status.HTTP_200_OK)
     data = "Missing required parameters: {}".format(
         ", ".join(required_parameters))
     response_status = status.HTTP_200_OK
     return Response(data, status=response_status)
Ejemplo n.º 17
0
    def update(self, request, pk=None):
        """

        :param request:
        :param pk:
        :return:
        """
        pipeline_inputs = load_request(request)
        serializer = self.serializer_class(data=pipeline_inputs,
                                           context={'request': request})

        try:
            project = Project.objects.get(id=int(pipeline_inputs["project"]))
        except Project.DoesNotExist:
            return Response("No project found for id: {}".format(
                int(pipeline_inputs["project"])),
                            status=status.HTTP_400_BAD_REQUEST)
        if project.owner != request.user:
            return Response(status=status.HTTP_401_UNAUTHORIZED)

        if serializer.is_valid() and pk is not None:
            try:
                original_pipeline = Pipeline.objects.get(id=int(pk))
            except Pipeline.DoesNotExist:
                return Response("No pipeline found for id: {}".format(pk),
                                status=status.HTTP_400_BAD_REQUEST)
            if IsOwnerOfPipeline().has_object_permission(
                    request, self, original_pipeline):
                experiment = serializer.update(original_pipeline,
                                               serializer.validated_data)
                if experiment:
                    response_status = status.HTTP_201_CREATED
                    response_data = serializer.data
                    response_data["id"] = experiment.id
                    if int(pk) == experiment.id:
                        response_status = status.HTTP_200_OK
                    if "metadata" not in pipeline_inputs.keys():
                        pipeline_inputs["metadata"] = None
                    a = Pipeline.objects.get(pk=experiment.id)
                    m = Metadata(a, pipeline_inputs["metadata"])
                    response_data["metadata"] = m.set_metadata(
                        "PipelineMetadata")
                    return Response(response_data, status=response_status)
            else:
                return Response(status=status.HTTP_401_UNAUTHORIZED)
        return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
Ejemplo n.º 18
0
 def update_status(_id, status, stage, message=None, retry=5):
     if retry == 0:
         pass
     meta = 'ModelMetadata'
     try:
         amodel = AnalyticalModel.objects.get(id=int(_id))
         m = Metadata(parent=amodel,
                      metadata=json.dumps({
                          "status": status,
                          "stage": stage,
                          "message": message
                      }))
         m.set_metadata(meta)
     except Exception as ex:
         logger.warning(
             "Error attempting to save metadata update: {}".format(ex))
         DaskTasks.update_status(_id, status, stage, None, retry - 1)
Ejemplo n.º 19
0
 def destroy(self, request, pk=None):
     if pk is not None:
         try:
             dataset = Dataset.objects.get(id=int(pk))
         except Dataset.DoesNotExist:
             return Response("No dataset found for id: {}".format(pk),
                             status=status.HTTP_400_BAD_REQUEST)
         if IsOwnerOfWorkflowChild().has_object_permission(
                 request, self, dataset):
             m = Metadata(dataset)
             m.delete_metadata("DatasetMetadata")
             dataset.delete()
             return Response(status=status.HTTP_200_OK)
         else:
             return Response(status=status.HTTP_401_UNAUTHORIZED)
     return Response("No dataset 'id' in request.",
                     status=status.HTTP_400_BAD_REQUEST)
Ejemplo n.º 20
0
 def list(self, request):
     """
     GET request that lists all the pipeline for a specific project id
     :param request: GET request, containing the project id as 'project'
     :return: List of analytical models
     """
     if 'project' in self.request.query_params.keys():
         pipeline = Pipeline.objects.filter(
             project=int(self.request.query_params.get('project')))
         serializer = self.serializer_class(pipeline, many=True)
         response_data = serializer.data
         for l in response_data:
             a = Pipeline.objects.get(pk=int(l["id"]))
             m = Metadata(a, None)
             l["metadata"] = m.get_metadata("PipelineMetadata")
         return Response(response_data, status=status.HTTP_200_OK)
     return Response(
         "Required 'project' parameter for the pipeline was not found.",
         status=status.HTTP_400_BAD_REQUEST)
Ejemplo n.º 21
0
 def create(self, request):
     """
     POST request that creates a new project.
     :param request: POST request
     :return: New project object
     """
     dataset_inputs = load_request(request)
     serializer = self.serializer_class(data=dataset_inputs, context={'request': request})
     if serializer.is_valid():
         serializer.save()
         project = serializer.data
         p = Project.objects.get(id=project["id"])
         if "metadata" not in dataset_inputs.keys():
             dataset_inputs["metadata"] = None
         m = Metadata(p, dataset_inputs["metadata"])
         meta = m.set_metadata("ProjectMetadata")
         project["metadata"] = meta
         return Response(project, status=status.HTTP_201_CREATED)
     return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
Ejemplo n.º 22
0
    def set_prediction_estimators(project_id, model_id, selected_models: dict):
        project = Project.objects.get(id=int(project_id))
        dataset = Dataset.objects.get(id=project.dataset)
        df = load_dataset(dataset.id, dataset)
        project_metadata = Metadata(parent=Project.objects.get(id=project.id)).get_metadata("ProjectMetadata")

        target_label = "response" if "target" not in project_metadata.keys() else project_metadata["target"]
        features_label = None if "features" not in project_metadata.keys() else project_metadata["features"]
        target = df[target_label]
        if features_label:
            features_list = json.loads(features_label.replace("\'", "\""))
            features = df[features_list]
        else:
            features = df.drop(target_label, axis=1)
        model = Model.objects.get(id=int(model_id))
        m = load_model(model.id, model.model)
        # TODO: update predictive_model_type from model metadata
        m.refitPredictiveModels(selected_models=selected_models, y_df=target, x_df=features)
        m.save(n=4, model_id=model_id)
Ejemplo n.º 23
0
 def create(self, request):
     """
     POST request that creates a new analytical model.
     :param request: POST request
     :return: New analytical object
     """
     amodel_inputs = request.data.dict()
     serializer = self.serializer_class(data=amodel_inputs,
                                        context={'request': request})
     if serializer.is_valid():
         serializer.save()
         amodel_inputs = serializer.data
         if "metadata" in amodel_inputs.keys():
             amodel_inputs["metadata"] = None
             m = Metadata(amodel_inputs, amodel_inputs["metadata"])
             meta = m.set_metadata("ModelMetadata")
             amodel_inputs["metadata"] = m.get_metadata("ModelMetadata")
         if amodel_inputs:
             return Response(amodel_inputs, status=status.HTTP_201_CREATED)
     return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
Ejemplo n.º 24
0
 def destroy(self, request, pk=None):
     """
     DEL to delete an existing dataset specified by dataset ID
     :param request: DEL request
     :param pk: dataset ID to be deleted
     :return: 200/success, 400/bad request, or 401/unauthorized
     """
     if pk is not None:
         try:
             dataset = Dataset.objects.get(id=int(pk))
         except Dataset.DoesNotExist:
             return Response("No dataset found for id: {}".format(pk), status=status.HTTP_400_BAD_REQUEST)
         if IsOwner().has_object_permission(request, self, dataset):
             m = Metadata(dataset)
             m.delete_metadata("DatasetMetadata")
             dataset.delete()
             return Response(status=status.HTTP_200_OK)
         else:
             return Response(status=status.HTTP_401_UNAUTHORIZED)
     return Response("No dataset 'id' in request.", status=status.HTTP_400_BAD_REQUEST)
Ejemplo n.º 25
0
 def data(self, request):
     inputs = request.data.dict()
     required_parameters = ["workflow_id", "model_id"]
     if set(required_parameters).issubset(inputs.keys()):
         try:
             workflow = Workflow.objects.get(id=int(inputs["workflow_id"]))
         except ObjectDoesNotExist:
             workflow = None
         try:
             amodel = AnalyticalModel.objects.get(id=int(inputs["model_id"]))
         except ObjectDoesNotExist:
             amodel = None
         if workflow is None or amodel is None:
             message = []
             message = message if workflow else message.append("No workflow found for id: {}".format(inputs["workflow_id"]))
             message = message if amodel else message.append("No analytical model found for id: {}".format(inputs["amodel_id"]))
             return Response(",".join(message), status=status.HTTP_400_BAD_REQUEST)
         elif IsOwnerOfLocationChild().has_object_permission(request, self, workflow):
             response = {}
             meta = Metadata(parent=amodel)
             metadata = meta.get_metadata("ModelMetadata", ['status', 'stage', 'message'])
             response["metadata"] = metadata
             completed = False
             if "stage" in metadata.keys():
                 i = metadata["stage"].split("/")
                 if int(i[0]) == int(i[1]):
                     completed = True
             if completed:
                 if amodel.model:
                     data = None
                     if "data" in inputs.keys():
                         data = pd.read_csv(StringIO(inputs["data"]))
                     response["data"] = DaskTasks.make_prediction(amodel.id, data)
                     response["dataset_id"] = amodel.dataset
             response["analytical_model_id"] = amodel.id
             response["workflow_id"] = workflow.id
             return Response(response, status=status.HTTP_200_OK)
     data = "Missing required parameters: {}".format(", ".join(required_parameters))
     response_status = status.HTTP_200_OK
     return Response(data, status=response_status)
Ejemplo n.º 26
0
 def create(self, request):
     """
     POST request that creates a new location.
     :param request: POST request
     :return: New location object
     """
     dataset_inputs = load_request(request)
     serializer = self.serializer_class(data=dataset_inputs,
                                        context={'request': request})
     # TODO: Add project existence and ownership check
     if serializer.is_valid():
         location = serializer.save()
         location_data = serializer.data
         if "metadata" not in dataset_inputs.keys():
             dataset_inputs["metadata"] = None
         m = Metadata(location, dataset_inputs["metadata"])
         meta = m.set_metadata("LocationMetadata")
         if meta:
             location_data["metadata"] = meta
         if location:
             return Response(location_data, status=status.HTTP_201_CREATED)
     return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
Ejemplo n.º 27
0
    def make_prediction(amodel_id, data=None):
        amodel = AnalyticalModel.objects.get(id=int(amodel_id))
        dataset = Dataset.objects.get(id=int(amodel.dataset))
        y_data = None

        df = pd.read_csv(StringIO(bytes(dataset.data).decode()))
        dataset_m = Metadata(parent=dataset).get_metadata("DatasetMetadata")
        target = "Response" if "response" not in dataset_m.keys(
        ) else dataset_m["response"]
        attributes = None if "attributes" not in dataset_m.keys(
        ) else dataset_m["attributes"]
        y = df[target]
        if attributes:
            attributes_list = json.loads(attributes.replace("\'", "\""))
            x = df[attributes_list]
        else:
            x = df.drop(target, axis=1)

        t = LinearRegressionAutomatedVB()
        t.set_data(x, y)
        x_train = t.x_train
        y_train = t.y_train
        x_data = t.x_test
        y_test = t.y_test.to_numpy().flatten()

        if data is not None:
            x_data = data
        model = pickle.loads(amodel.model)
        response = {
            "results": model.predict(x_data),
            "train_score": model.score(x_train, y_train)
        }
        if data is None:
            response["residuals"] = y_test - response["results"]
            response["test_score"] = model.score(x_data, y_test)
        return response
Ejemplo n.º 28
0
    def __init__(self, dataset_id):
        # TODO: replace the need for the project_id with providing the target variable
        self.dataset_id = dataset_id
        self.dataset = Dataset.objects.get(pk=dataset_id)

        self.df = load_dataset(dataset_id, self.dataset)
        self.dataset_metadata = Metadata(
            parent=self.dataset).get_metadata("DatasetMetadata")

        self.target_label = "target" if "target" not in self.dataset_metadata.keys(
        ) else self.dataset_metadata["target"]
        self.features_label = None if "features" not in self.dataset_metadata.keys(
        ) else self.dataset_metadata["features"]
        if self.features_label is None or self.features_label == "*":
            self.features_label = list(self.df.columns)
            self.features_label.remove(self.target_label)
        else:
            self.features_label = json.loads(self.features_label)

        self.y_df = self.df[self.target_label].to_frame()
        self.X_df = self.df[self.features_label]

        self.vbhelper = VBHelper(pipeline_id=-1)
        self.vbhelper.setData(X_df=self.X_df, y_df=self.y_df)
Ejemplo n.º 29
0
    def execute_task(project_id, dataset_id, pipeline_id):
        # STAGE 1 - Data and parameter load from db
        update_status(pipeline_id,
                      "Data and Model Setup: Retrieving dataset and pipeline",
                      "1/{}".format(pre_processing_steps),
                      log="Pipeline: {}, Type: {}, Setup: 1/{}".format(
                          pipeline_id, None, pre_processing_steps),
                      message="Cross validation")
        project = Project.objects.get(id=int(project_id))
        dataset = Dataset.objects.get(id=int(dataset_id))
        pipeline = Pipeline.objects.get(id=int(pipeline_id))

        project.dataset = int(dataset_id)
        project.save()

        df = load_dataset(dataset_id, dataset)
        dataset_metadata = Metadata(
            parent=dataset).get_metadata("DatasetMetadata")
        pipeline_metadata = Metadata(
            parent=pipeline).get_metadata("PipelineMetadata")
        project_metadata = Metadata(
            parent=project).get_metadata("ProjectMetadata")

        target_label = None if "target" not in project_metadata.keys(
        ) else project_metadata["target"]
        features_label = None if "features" not in project_metadata.keys(
        ) else project_metadata["features"]

        target_label = "target" if (
            "target" not in dataset_metadata.keys()
            and target_label is None) else dataset_metadata["target"]

        if "features" not in dataset_metadata.keys(
        ) and features_label is None:
            features_label = None
        else:
            features_label = dataset_metadata["features"]
        if features_label is None or features_label == "*":
            features_label = list(df.columns)
            features_label.remove(target_label)
        else:
            features_label = json.loads(features_label)
        drop_vars = [] if "drop_features" not in project_metadata.keys(
        ) else json.loads(project_metadata["drop_features"].replace(
            "\'", "\""))
        for d in drop_vars:
            features_label.remove(d)

        # STAGE 2 - Data prep
        update_status(pipeline_id,
                      "Data and Model Setup: Loading data",
                      "2/{}".format(pre_processing_steps),
                      log="Pipeline: {}, Type: {}, Setup: 2/{}".format(
                          pipeline_id, pipeline.name, pre_processing_steps),
                      message="Cross validation")

        target = df[target_label].to_frame()
        if features_label:
            features = df[features_label]
        else:
            features = df.drop(target_label, axis=1)

        # STAGE 3 - VBHelper execution
        update_status(
            pipeline_id,
            "Data and Model Setup: Loading all parameters and settings",
            "3/{}".format(pre_processing_steps),
            log="Pipeline: {}, Type: {}, Setup: 3/{}".format(
                pipeline_id, pipeline.name, pre_processing_steps),
            message="Cross validation")
        if pipeline_metadata:
            vbhelper_parameters = None if "parameters" not in pipeline_metadata.keys(
            ) else json.loads(pipeline_metadata["parameters"].replace(
                "'", "\""))
        else:
            vbhelper_parameters = {}

        vbhelper_parameters["pipeline_id"] = pipeline_id
        outer_cv = pipeline_metadata[
            "outer_cv"] if "outer_cv" in pipeline_metadata.keys() else "True"
        try:
            vbhelper = VBHelper(**vbhelper_parameters)
            if "estimators" in pipeline_metadata.keys():
                est_str = pipeline_metadata["estimators"].replace("\'", "\"")
                estimators = json.loads(est_str)
            else:
                update_status(
                    pipeline_id,
                    "Error: VB Helper requires an estimator.",
                    "-1/{}".format(pre_processing_steps),
                    log="Pipeline: {}, Type: {}, Setup: -1/{}".format(
                        pipeline_id, pipeline.name, pre_processing_steps),
                    message="Cross validation")
                return
            vbhelper.setData(X_df=features, y_df=target)
            inner_cv_dict = {
                'cv_reps': 1,
                'cv_folds': 5,
                'cv_strategy': ('quantile', 5)
            }
            inner_cv = vbhelper.getCV(cv_dict=inner_cv_dict)
            # prep_dict = {'cat_approach': 'together', 'impute_strategy': 'IterativeImputer', 'cat_idx': vbhelper.cat_idx}
            prep_dict = {
                'cat_approach': 'together',
                'impute_strategy': 'impute_middle',
                'cat_idx': vbhelper.cat_idx
            }
            pipe_kwargs = dict(do_prep=not vbhelper.run_stacked,
                               prep_dict=prep_dict,
                               inner_cv=inner_cv,
                               cat_idx=vbhelper.cat_idx,
                               float_idx=vbhelper.float_idx,
                               bestT=False)
            estimators_dict = {}
            e_i = 0
            for e in estimators:
                name = e["name"] if "name" in e.keys(
                ) else e["type"] + "-{}".format(e_i)
                n_i = 1
                n_name = name
                while n_name in estimators_dict.keys():
                    n_name = name + "-{}".format(n_i)
                    n_i += 1
                name = n_name
                estimator = DaskTasks.get_estimator(e["type"])
                e_kwargs = copy.copy(pipe_kwargs)
                for k, p in e["parameters"].items():
                    e_kwargs[k] = p
                estimators_dict[name] = {
                    "pipe": estimator,
                    "pipe_kwargs": e_kwargs
                }
                e_i += 1
            vbhelper.setPipeDict(estimators_dict)
            vbhelper.setModelDict()
            if outer_cv == "True":
                vbhelper.runCrossValidate(verbose=True)
                vbhelper.buildCVScoreDict()
            else:
                #TODO: check processing for non-outer-cv instance for data cleanup
                vbhelper.fitEstimators()
            try:
                model = Model.objects.get(pipeline=pipeline)
                model_id = model.id
            except Model.DoesNotExist:
                model_id = None
            vbhelper.save(message="Completed.")
            del model
        except Exception as e:
            update_status(pipeline_id,
                          "Error: Unknown error executing pipeline",
                          "-0/16",
                          log="Pipeline: {}, Type: {}, Error: {}".format(
                              pipeline_id, pipeline.name, e),
                          message="Cross validation")
        del vbhelper
Ejemplo n.º 30
0
class DataExploration:
    def __init__(self, dataset_id):
        # TODO: replace the need for the project_id with providing the target variable
        self.dataset_id = dataset_id
        self.dataset = Dataset.objects.get(pk=dataset_id)

        self.df = load_dataset(dataset_id, self.dataset)
        self.dataset_metadata = Metadata(
            parent=self.dataset).get_metadata("DatasetMetadata")

        self.target_label = "target" if "target" not in self.dataset_metadata.keys(
        ) else self.dataset_metadata["target"]
        self.features_label = None if "features" not in self.dataset_metadata.keys(
        ) else self.dataset_metadata["features"]
        if self.features_label is None or self.features_label == "*":
            self.features_label = list(self.df.columns)
            self.features_label.remove(self.target_label)
        else:
            self.features_label = json.loads(self.features_label)

        self.y_df = self.df[self.target_label].to_frame()
        self.X_df = self.df[self.features_label]

        self.vbhelper = VBHelper(pipeline_id=-1)
        self.vbhelper.setData(X_df=self.X_df, y_df=self.y_df)

    def get_missing_vals(self):
        data = VBHelper.saveFullFloatXy(X_df=self.X_df,
                                        y_df=self.y_df,
                                        X_df_s=self.vbhelper.X_df_start_order,
                                        y_df_s=self.vbhelper.y_df_start_order)
        vbs = VBSummary()
        vbs.setData(data)
        return vbs.missingVals()

    def get_components(self, num_cols, keep_cats=False):
        try:
            if "," in num_cols:
                _num_cols = num_cols.split(",")
                num_cols = []
                for n in _num_cols:
                    num_cols.append(int(n))
            else:
                num_cols = [int(num_cols)]
        except Exception:
            num_cols = [1]
        data = VBHelper.saveFullFloatXy(X_df=self.X_df,
                                        y_df=self.y_df,
                                        X_df_s=self.vbhelper.X_df_start_order,
                                        y_df_s=self.vbhelper.y_df_start_order)
        vbs = VBSummary()
        vbs.setData(data)
        return vbs.viewComponents(num_cols=num_cols, keep_cats=keep_cats)

    def get_kerneldensity(self):
        data = VBHelper.saveFullFloatXy(X_df=self.X_df,
                                        y_df=self.y_df,
                                        X_df_s=self.vbhelper.X_df_start_order,
                                        y_df_s=self.vbhelper.y_df_start_order)
        vbs = VBSummary()
        vbs.setData(data)
        return vbs.kernelDensityPie()

    def get_dendrogram(self, linkage='ward', dist='spearmanr'):
        data = VBHelper.saveFullFloatXy(X_df=self.X_df,
                                        y_df=self.y_df,
                                        X_df_s=self.vbhelper.X_df_start_order,
                                        y_df_s=self.vbhelper.y_df_start_order)
        vbs = VBSummary()
        vbs.setData(data)
        return vbs.hierarchicalDendrogram(linkage=linkage, dist=dist)