Ejemplo n.º 1
0
def compute_dataset(formula,
                    datasets,
                    title,
                    unit_id,
                    time_resolution,
                    indicator_id,
                    class_id,
                    creator_path,
                    metric_id=None):
    """
    Compute and store a dataset. Returns the id of the newly created dataset.
    """

    result = compute_formula(formula, datasets)
    data = DatasetData(data_frame=result,
                       unit=unit_id,
                       resolution=time_resolution)

    dataset = Dataset(
        title=title,
        description="Computed formula '%s' with %s" % (formula, ", ".join([
            "'%s' as %s" % (title, variable)
            for variable, dataset in datasets.items()
        ])),
        keywords=", ".join(
            set(
                itertools.chain(
                    [dataset.keywords for dataset in datasets.values()]))),
        version=0,
        # ressource related info
        resource_url="not available",
        resource_issued=datetime.datetime.now(),
        # metrics identifier
        is_applied=True,
        metric_id=metric_id,
        # contained data
        time_resolution=time_resolution,
        time_start=data.get_time_start(),
        time_end=data.get_time_end(),
        data=data,
        # references to other services
        # TODO add useful values here
        language_id=0,
        creator_path=creator_path,
        unit_id=unit_id,
        indicator_id=indicator_id,
        class_id=class_id)

    return dataset_api.store(dataset)
Ejemplo n.º 2
0
def filter(**kwargs):
    """ Get all datasets matching filter.

    Gets all dataset which fields match the provided kwargs. This might result
    in a table scan if no
    index exists for the given field.
    """
    datasets = Dataset.objects.filter(**kwargs)
    for dataset in datasets:
        dataset.data = DatasetData.from_json(dataset.data)
    return datasets
Ejemplo n.º 3
0
def compute_dataset(formula, datasets, title, unit_id, time_resolution,
                    indicator_id, class_id, creator_path, metric_id=None):
    """
    Compute and store a dataset. Returns the id of the newly created dataset.
    """

    result = compute_formula(formula, datasets)
    data = DatasetData(
        data_frame=result,
        unit=unit_id,
        resolution=time_resolution)

    dataset = Dataset(
        title=title,
        description="Computed formula '%s' with %s" % (
            formula,
            ", ".join(["'%s' as %s" % (title, variable) for
                       variable, dataset in datasets.items()])),
        keywords=", ".join(set(itertools.chain(
            [dataset.keywords for dataset in datasets.values()]))),
        version=0,
        # ressource related info
        resource_url="not available",
        resource_issued=datetime.datetime.now(),
        # metrics identifier
        is_applied=True,
        metric_id=metric_id,
        # contained data
        time_resolution=time_resolution,
        time_start=data.get_time_start(),
        time_end=data.get_time_end(),
        data=data,
        # references to other services
        # TODO add useful values here
        language_id=0,
        creator_path=creator_path,
        unit_id=unit_id,
        indicator_id=indicator_id,
        class_id=class_id)

    return dataset_api.store(dataset)
Ejemplo n.º 4
0
def get(dataset_id: int):
    """ Internal API for datasetmanager for basic opeerations.

    The internal api for the datasetmanager supports basic CRDU-like function
    for datasets. It requires no knowledge about the marshalling of the dataset
     models and is just a thin wrapper arround the
    provided models.

    Currently it does not handle creation of datasets, for thos the models
    should be used directly.
    """
    """ Get a dataset by its id. """
    dataset = Dataset.objects.get(pk=dataset_id)
    dataset.data = DatasetData.from_json(dataset.data)
    return dataset
Ejemplo n.º 5
0
    def post(self, request, metrics_id: int):
        """
        Compute a new dataset from a given formula and mappings for variables.

        Example data:

        {
          "title" : "Some test",
          "datasets": [
            {
              "variable": "__1__",
              "dataset": 1
            }
          ],
          "unit_id": 0
        }
        """

        # check if metric exists
        metric = Metric.objects.get(pk=metrics_id)
        if Metric.objects.get(pk=metrics_id) is None:
            return Response("Unable to find metric %s." % pk,
                            status=status.HTTP_404_NOT_FOUND)

        # check resquest data
        serializer = OperationalizeSerializer(data=request.DATA,
                                              files=request.FILES)
        if not serializer.is_valid():
            return Response(serializer.errors,
                            status=status.HTTP_400_BAD_REQUEST)

        # check if dateset with that title already existed
        title = serializer.object.get("title")
        if len(datasets.filter(title=title)) != 0:
            return Response({
                "title": "Dataset name is not unique."},
                status=status.HTTP_400_BAD_REQUEST)

        result_unit = serializer.object.get("unit_id")

        # load required data sets
        id_mapping = serializer.object.get("datasets")
        mapping = {variable: datasets.get(dataset_id) for
                   (variable, dataset_id) in id_mapping.items()}

        # ensure all datasets have the same class
        first_dataset = next(iter(mapping.values()))
        if not all([dataset.class_id == first_dataset.class_id
                    for dataset in mapping.values()]):
            return Response(
                {"datasets": "All datasets need to have the same class"})
        result_class = first_dataset.class_id

        # ensure all datasets have the same time_resolution
        if not all([dataset.data.resolution == first_dataset.data.resolution
                    for dataset in mapping.values()]):
            return Response({"datasets": "All datasets need to have the same time resolution"})
        result_time_resolution = first_dataset.data.resolution

        # compute result
        result = compute_formula(metric.formula, mapping)

        # collect remaining time slots
        result = result.dropna('index', 'all')
        # result_time_slots = result.index.values

        # dataset
        data = DatasetData(
            data_frame=result,
            unit=result_unit,
            resolution=result_time_resolution)

        result_time_start = data.get_time_start()
        result_time_end = data.get_time_end()

        dataset = Dataset(
            title=title,
            description="Computed formula '%s' with %s" % (
                metric.formula,
                ", ".join(["'%s' as %s" % (dataset.title, variable) for
                           variable, dataset in mapping.items()])),
            keywords=", ".join(set(itertools.chain(
                [dataset.keywords for dataset in mapping.values()]))),
            version=0,
            # ressource related info
            resource_url=reverse("metrics-detail", kwargs={'pk': metrics_id}),
            resource_issued=datetime.now(),
            # metrics identifier
            is_applied=True,
            metric_id=metrics_id,
            # contained data
            time_resolution=result_time_resolution,
            time_start=result_time_start,
            time_end=result_time_end,
            data=data,
            # references to other services
            # TODO add useful values here
            language_id=0,
            creator_path=self.request.user.resource_path,
            unit_id=result_unit,
            indicator_id=metric.indicator_id,
            class_id=result_class)

        dataset_id = datasets.store(dataset)

        return Response({
            "dataset": {
                "id": dataset_id
            }
        })
Ejemplo n.º 6
0
def filter(**kwargs):
    datasets = Dataset.objects.filter(**kwargs)
    for dataset in datasets:
        dataset.data = DatasetData.from_json(dataset.data)
    return datasets
Ejemplo n.º 7
0
def get(dataset_id: int):
    dataset = Dataset.objects.get(pk=dataset_id)
    dataset.data = DatasetData.from_json(dataset.data)
    return dataset