Ejemplo n.º 1
0
def __build_columns(dataset, dframe, functions, name, no_index):
    columns = []

    for function in functions:
        column = dframe.apply(function, axis=1, args=(dataset, ))
        column.name = make_unique(name, [c.name for c in columns])

        if no_index:
            column = column.reset_index(drop=True)

        columns.append(column)

    return columns
Ejemplo n.º 2
0
def __build_columns(dataset, dframe, functions, name, no_index):
    columns = []

    for function in functions:
        column = dframe.apply(function, axis=1, args=(dataset,))
        column.name = make_unique(name, [c.name for c in columns])

        if no_index:
            column = column.reset_index(drop=True)

        columns.append(column)

    return columns
Ejemplo n.º 3
0
    def __check_name_and_make_unique(self, name, dataset):
        """Check that the name is valid and make unique if valid.

        :param name: The name to make unique.
        :param dataset: The dataset to make unique for.
        :raises: `UniqueCalculationError` if not unique.
        :returns: A unique name.
        """
        current_names = dataset.labels

        if name in current_names:
            raise UniqueCalculationError(name, current_names)

        return make_unique(name, dataset.schema.keys())
Ejemplo n.º 4
0
def _check_name_and_make_unique(name, dataset):
    """Check that the name is valid and make unique if valid.

    :param name: The name to make unique.
    :param dataset: The dataset to make unique for.
    :raises: `UniqueCalculationError` if not unique.
    :returns: A unique name.
    """
    current_names = dataset.labels

    if name in current_names:
        raise UniqueCalculationError(name, current_names)

    return make_unique(name, dataset.schema.keys())
Ejemplo n.º 5
0
    def parse_columns(self, formula, name, dframe=None):
        """Parse formula into function and variables."""
        if dframe is None:
            dframe = self.dataset.dframe()

        functions = self.parser.parse_formula(formula)

        columns = []

        for function in functions:
            column = dframe.apply(
                function, axis=1, args=(self.parser.context, ))
            column.name = make_unique(name, [c.name for c in columns])
            columns.append(column)

        return columns
Ejemplo n.º 6
0
    def save(self, dataset, formula, name, group_str=None):
        """Parse, save, and calculate a formula.

        Validate `formula` and `group_str` for the given `dataset`. If the
        formula and group are valid for the dataset, then save a new
        calculation for them under `name`. Finally, create a background task
        to compute the calculation.

        Calculations are initially saved in a **pending** state, after the
        calculation has finished processing it will be in a **ready** state.

        :param dataset: The DataSet to save.
        :param formula: The formula to save.
        :param name: The name of the formula.
        :param group_str: Columns to group on.
        :type group_str: String, list or strings, or None.

        :raises: `ParseError` if an invalid formula was supplied.
        """
        calculator = Calculator(dataset)

        # ensure that the formula is parsable
        groups = self.split_groups(group_str) if group_str else []
        aggregation = calculator.validate(formula, groups)

        if aggregation:
            # set group if aggregation and group unset
            if not group_str:
                group_str = ''
        else:
            # ensure the name is unique
            name = make_unique(name, dataset.labels + dataset.schema.keys())

        record = {
            DATASET_ID: dataset.dataset_id,
            self.AGGREGATION: aggregation,
            self.FORMULA: formula,
            self.GROUP: group_str,
            self.NAME: name,
            self.STATE: self.STATE_PENDING,
        }
        super(self.__class__, self).save(record)

        call_async(calculate_task, self, dataset)

        return record