def compute_dataset(formula, datasets, title, unit_id, time_resolution, indicator_id, class_id, creator_path, metric_id=None): """ Compute and store a dataset. Returns the id of the newly created dataset. """ result = compute_formula(formula, datasets) data = DatasetData(data_frame=result, unit=unit_id, resolution=time_resolution) dataset = Dataset( title=title, description="Computed formula '%s' with %s" % (formula, ", ".join([ "'%s' as %s" % (title, variable) for variable, dataset in datasets.items() ])), keywords=", ".join( set( itertools.chain( [dataset.keywords for dataset in datasets.values()]))), version=0, # ressource related info resource_url="not available", resource_issued=datetime.datetime.now(), # metrics identifier is_applied=True, metric_id=metric_id, # contained data time_resolution=time_resolution, time_start=data.get_time_start(), time_end=data.get_time_end(), data=data, # references to other services # TODO add useful values here language_id=0, creator_path=creator_path, unit_id=unit_id, indicator_id=indicator_id, class_id=class_id) return dataset_api.store(dataset)
def filter(**kwargs): """ Get all datasets matching filter. Gets all dataset which fields match the provided kwargs. This might result in a table scan if no index exists for the given field. """ datasets = Dataset.objects.filter(**kwargs) for dataset in datasets: dataset.data = DatasetData.from_json(dataset.data) return datasets
def compute_dataset(formula, datasets, title, unit_id, time_resolution, indicator_id, class_id, creator_path, metric_id=None): """ Compute and store a dataset. Returns the id of the newly created dataset. """ result = compute_formula(formula, datasets) data = DatasetData( data_frame=result, unit=unit_id, resolution=time_resolution) dataset = Dataset( title=title, description="Computed formula '%s' with %s" % ( formula, ", ".join(["'%s' as %s" % (title, variable) for variable, dataset in datasets.items()])), keywords=", ".join(set(itertools.chain( [dataset.keywords for dataset in datasets.values()]))), version=0, # ressource related info resource_url="not available", resource_issued=datetime.datetime.now(), # metrics identifier is_applied=True, metric_id=metric_id, # contained data time_resolution=time_resolution, time_start=data.get_time_start(), time_end=data.get_time_end(), data=data, # references to other services # TODO add useful values here language_id=0, creator_path=creator_path, unit_id=unit_id, indicator_id=indicator_id, class_id=class_id) return dataset_api.store(dataset)
def get(dataset_id: int): """ Internal API for datasetmanager for basic opeerations. The internal api for the datasetmanager supports basic CRDU-like function for datasets. It requires no knowledge about the marshalling of the dataset models and is just a thin wrapper arround the provided models. Currently it does not handle creation of datasets, for thos the models should be used directly. """ """ Get a dataset by its id. """ dataset = Dataset.objects.get(pk=dataset_id) dataset.data = DatasetData.from_json(dataset.data) return dataset
def post(self, request, metrics_id: int): """ Compute a new dataset from a given formula and mappings for variables. Example data: { "title" : "Some test", "datasets": [ { "variable": "__1__", "dataset": 1 } ], "unit_id": 0 } """ # check if metric exists metric = Metric.objects.get(pk=metrics_id) if Metric.objects.get(pk=metrics_id) is None: return Response("Unable to find metric %s." % pk, status=status.HTTP_404_NOT_FOUND) # check resquest data serializer = OperationalizeSerializer(data=request.DATA, files=request.FILES) if not serializer.is_valid(): return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST) # check if dateset with that title already existed title = serializer.object.get("title") if len(datasets.filter(title=title)) != 0: return Response({ "title": "Dataset name is not unique."}, status=status.HTTP_400_BAD_REQUEST) result_unit = serializer.object.get("unit_id") # load required data sets id_mapping = serializer.object.get("datasets") mapping = {variable: datasets.get(dataset_id) for (variable, dataset_id) in id_mapping.items()} # ensure all datasets have the same class first_dataset = next(iter(mapping.values())) if not all([dataset.class_id == first_dataset.class_id for dataset in mapping.values()]): return Response( {"datasets": "All datasets need to have the same class"}) result_class = first_dataset.class_id # ensure all datasets have the same time_resolution if not all([dataset.data.resolution == first_dataset.data.resolution for dataset in mapping.values()]): return Response({"datasets": "All datasets need to have the same time resolution"}) result_time_resolution = first_dataset.data.resolution # compute result result = compute_formula(metric.formula, mapping) # collect remaining time slots result = result.dropna('index', 'all') # result_time_slots = result.index.values # dataset data = DatasetData( data_frame=result, unit=result_unit, resolution=result_time_resolution) result_time_start = data.get_time_start() result_time_end = data.get_time_end() dataset = Dataset( title=title, description="Computed formula '%s' with %s" % ( metric.formula, ", ".join(["'%s' as %s" % (dataset.title, variable) for variable, dataset in mapping.items()])), keywords=", ".join(set(itertools.chain( [dataset.keywords for dataset in mapping.values()]))), version=0, # ressource related info resource_url=reverse("metrics-detail", kwargs={'pk': metrics_id}), resource_issued=datetime.now(), # metrics identifier is_applied=True, metric_id=metrics_id, # contained data time_resolution=result_time_resolution, time_start=result_time_start, time_end=result_time_end, data=data, # references to other services # TODO add useful values here language_id=0, creator_path=self.request.user.resource_path, unit_id=result_unit, indicator_id=metric.indicator_id, class_id=result_class) dataset_id = datasets.store(dataset) return Response({ "dataset": { "id": dataset_id } })
def filter(**kwargs): datasets = Dataset.objects.filter(**kwargs) for dataset in datasets: dataset.data = DatasetData.from_json(dataset.data) return datasets
def get(dataset_id: int): dataset = Dataset.objects.get(pk=dataset_id) dataset.data = DatasetData.from_json(dataset.data) return dataset