Ejemplo n.º 1
0
def __propagate_column(dataset, parent_dataset):
    """Propagate columns in `parent_dataset` to `dataset`.

    When a new calculation is added to a dataset this will propagate the
    new column to all child (merged) datasets.

    :param dataset: THe child dataet.
    :param parent_dataset: The dataset to propagate.
    """
    # delete the rows in this dataset from the parent
    dataset.remove_parent_observations(parent_dataset.dataset_id)

    # get this dataset without the out-of-date parent rows
    dframe = dataset.dframe(keep_parent_ids=True)

    # create new dframe from the upated parent and add parent id
    parent_dframe = add_parent_column(parent_dataset.dframe(),
                                      parent_dataset.dataset_id)

    # merge this new dframe with the existing dframe
    updated_dframe = concat([dframe, parent_dframe])

    # save new dframe (updates schema)
    dataset.replace_observations(updated_dframe)
    dataset.clear_summary_stats()

    # recur into merged dataset
    [__propagate_column(x, dataset) for x in dataset.merged_datasets]
Ejemplo n.º 2
0
def calculate_updates(dataset, new_data, new_dframe_raw=None,
                      parent_dataset_id=None, update_id=None):
    """Update dataset with `new_data`.

    This can result in race-conditions when:

    - deleting ``controllers.Datasets.DELETE``
    - updating ``controllers.Datasets.POST([dataset_id])``

    Therefore, perform these actions asychronously.

    :param new_data: Data to update this dataset with.
    :param new_dframe_raw: DataFrame to update this dataset with.
    :param parent_dataset_id: If passed add ID as parent ID to column,
        default is None.
    """
    __ensure_ready(dataset, update_id)

    if new_dframe_raw is None:
        new_dframe_raw = dframe_from_update(dataset, new_data)

    new_dframe = recognize_dates(new_dframe_raw, dataset.schema)

    new_dframe = __add_calculations(dataset, new_dframe)

    # set parent id if provided
    if parent_dataset_id:
        new_dframe = add_parent_column(new_dframe, parent_dataset_id)

    dataset.append_observations(new_dframe)
    dataset.clear_summary_stats()

    propagate(dataset, new_dframe=new_dframe, update={'add': new_dframe_raw})

    dataset.update_complete(update_id)
Ejemplo n.º 3
0
def __merge_datasets(datasets, mapping):
    """Merge two or more datasets."""
    dframes = []

    if not mapping:
        mapping = {}

    for dataset in datasets:
        dframe = dataset.dframe()
        column_map = mapping.get(dataset.dataset_id)

        if column_map:
            dframe = dframe.rename(columns=column_map)

        dframe = add_parent_column(dframe, dataset.dataset_id)
        dframes.append(dframe)

    return concat(dframes, ignore_index=True)
Ejemplo n.º 4
0
def __merge_datasets(datasets, mapping):
    """Merge two or more datasets."""
    dframes = []

    if not mapping:
        mapping = {}

    for dataset in datasets:
        dframe = dataset.dframe()
        column_map = mapping.get(dataset.dataset_id)

        if column_map:
            dframe = dframe.rename(columns=column_map)

        dframe = add_parent_column(dframe, dataset.dataset_id)
        dframes.append(dframe)

    return concat(dframes, ignore_index=True)
Ejemplo n.º 5
0
    def save(self, dataset):
        """Save this aggregation.

        If an aggregated dataset for this aggregations group already exists
        store in this dataset, if not create a new aggregated dataset and store
        the aggregation in this new aggregated dataset.

        """
        new_dframe = self.aggregation.eval(self.columns)
        new_dframe = add_parent_column(new_dframe, dataset.dataset_id)

        a_dataset = dataset.aggregated_dataset(self.groups)

        if a_dataset is None:
            a_dataset = aggregated_dataset(dataset, new_dframe, self.groups)
        else:
            a_dframe = a_dataset.dframe()
            new_dframe = group_join(self.groups, a_dframe, new_dframe)
            a_dataset.replace_observations(new_dframe)

        self.new_dframe = new_dframe
Ejemplo n.º 6
0
    def update(self, dataset, child_dataset, formula, reducible):
        """Attempt to reduce an update and store."""
        parent_dataset_id = dataset.dataset_id

        # get dframe only including rows from this parent
        dframe = rows_for_parent_id(child_dataset.dframe(
            keep_parent_ids=True, reload_=True), parent_dataset_id)

        # remove rows in child from parent
        child_dataset.remove_parent_observations(parent_dataset_id)

        if reducible and self.__is_reducible():
            dframe = self.aggregation.reduce(dframe, self.columns)
        else:
            dframe = self.updated_dframe(dataset, formula, dframe)

        new_a_dframe = concat([child_dataset.dframe(), dframe])
        new_a_dframe = add_parent_column(new_a_dframe, parent_dataset_id)
        child_dataset.replace_observations(new_a_dframe)

        return child_dataset.dframe()
Ejemplo n.º 7
0
    def save(self, dataset):
        """Save this aggregation.

        If an aggregated dataset for this aggregations group already exists
        store in this dataset, if not create a new aggregated dataset and store
        the aggregation in this new aggregated dataset.

        """
        new_dframe = self.aggregation.eval(self.columns)
        new_dframe = add_parent_column(new_dframe, dataset.dataset_id)

        a_dataset = dataset.aggregated_dataset(self.groups)

        if a_dataset is None:
            a_dataset = aggregated_dataset(dataset, new_dframe, self.groups)
        else:
            a_dframe = a_dataset.dframe()
            new_dframe = group_join(self.groups, a_dframe, new_dframe)
            a_dataset.replace_observations(new_dframe)

        self.new_dframe = new_dframe
Ejemplo n.º 8
0
    def update(self, dataset, child_dataset, formula, reducible):
        """Attempt to reduce an update and store."""
        parent_dataset_id = dataset.dataset_id

        # get dframe only including rows from this parent
        dframe = rows_for_parent_id(
            child_dataset.dframe(keep_parent_ids=True, reload_=True),
            parent_dataset_id)

        # remove rows in child from parent
        child_dataset.remove_parent_observations(parent_dataset_id)

        if reducible and self.__is_reducible():
            dframe = self.aggregation.reduce(dframe, self.columns)
        else:
            dframe = self.updated_dframe(dataset, formula, dframe)

        new_a_dframe = concat([child_dataset.dframe(), dframe])
        new_a_dframe = add_parent_column(new_a_dframe, parent_dataset_id)
        child_dataset.replace_observations(new_a_dframe)

        return child_dataset.dframe()
Ejemplo n.º 9
0
def calculate_updates(dataset, new_data, new_dframe_raw=None,
                      parent_dataset_id=None, update_id=None):
    """Update dataset with `new_data`.

    This can result in race-conditions when:

    - deleting ``controllers.Datasets.DELETE``
    - updating ``controllers.Datasets.POST([dataset_id])``

    Therefore, perform these actions asychronously.

    :param new_data: Data to update this dataset with.
    :param new_dframe_raw: DataFrame to update this dataset with.
    :param parent_dataset_id: If passed add ID as parent ID to column,
        default is None.
    """
    if not __update_is_valid(dataset, new_dframe_raw):
        dataset.remove_pending_update(update_id)
        return

    __ensure_ready(dataset, update_id)

    if new_dframe_raw is None:
        new_dframe_raw = dframe_from_update(dataset, new_data)

    new_dframe = recognize_dates(new_dframe_raw, dataset.schema)

    new_dframe = __add_calculations(dataset, new_dframe)

    # set parent id if provided
    if parent_dataset_id:
        new_dframe = add_parent_column(new_dframe, parent_dataset_id)

    dataset.append_observations(new_dframe)
    dataset.clear_summary_stats()

    propagate(dataset, new_dframe=new_dframe, update={'add': new_dframe_raw})

    dataset.update_complete(update_id)