Example #1
0
    def contains_point(self, point: Point):
        """ Checks if point belongs to the  hypergrid.

        We must first see if for every dimension of the root hypergrid, the Point:
        a) specifies the dimension
        b) the value along that dimension is within bounds

        Then for every pivotal dimension present in the point we must:
        a) find the corresponding subgrid that might have been joined
        b) check if the value along pivotal dimension belongs to that subgrid
        c) if b) is true, then for every dimension in the subgrid, check if the points dimension
            values are within bounds.

        This has to be recursive, because any of the subgrids can be hierarchical already.

        :param point:
        :return:
        """

        if not all(point.get(dimension.name) is not None and point.get(dimension.name) in dimension for dimension in self._dimensions):
            return False

        for external_dimension_name, guest_subgrids_joined_on_dimension in self.joined_subgrids_by_pivot_dimension.items():
            for guest_subgrid in guest_subgrids_joined_on_dimension:
                if point[external_dimension_name] in guest_subgrid.join_dimension:
                    # We need to check if the sub_point belongs to the sub_grid
                    #
                    subgrid = guest_subgrid.subgrid
                    if subgrid.name not in point or point[subgrid.name] not in subgrid:
                        return False
        return True
    def test_hierachical_spaces(self):

        valid_config_no_emergency_buffer = Point(
            num_readers=1,
            log2_buffer_size=10,
            use_emergency_buffer=False
        )

        valid_config_with_emergency_buffer = Point(
            num_readers=1,
            log2_buffer_size=10,
            use_emergency_buffer=True,
            emergency_buffer_config = Point(
                log2_emergency_buffer_size=2,
                use_colors=False
            )
        )

        valid_config_with_emergency_buffer_colors = Point(
            num_readers=1,
            log2_buffer_size=10,
            use_emergency_buffer=True,
            emergency_buffer_config = Point(
                log2_emergency_buffer_size=2,
                use_colors=True,
                emergency_buffer_color = Point(color='Crimson')
            )
        )

        valid_config_with_emergency_buffer_and_redundant_coordinates = Point(
            num_readers=1,
            log2_buffer_size=10,
            use_emergency_buffer=False,
            log2_emergency_buffer_size=2
        )

        another_invalid_config_with_emergency_buffer = Point(
            num_readers=1,
            log2_buffer_size=10,
            use_emergency_buffer=True
        )

        yet_another_invalid_config_with_emergency_buffer = Point(
            num_readers=1,
            log2_buffer_size=10,
            use_emergency_buffer=True,
            emergency_buffer_config = Point(
                log2_emergency_buffer_size=40
            )
        )

        assert valid_config_no_emergency_buffer in self.root_communication_channel_parameter_space
        assert valid_config_no_emergency_buffer in self.hierarchical_settings
        assert valid_config_with_emergency_buffer in self.hierarchical_settings
        assert valid_config_with_emergency_buffer_colors in self.hierarchical_settings
        assert valid_config_with_emergency_buffer_and_redundant_coordinates in self.hierarchical_settings
        assert another_invalid_config_with_emergency_buffer not in self.hierarchical_settings
        assert yet_another_invalid_config_with_emergency_buffer not in self.hierarchical_settings
Example #3
0
    def get_valid_rows_index(self, original_dataframe) -> pd.Index:
        """Returns an index of all rows in the dataframe that belong to this Hypergrid.

        Valid rows are rows with no NaNs and with values for all dimensions in the required ranges.

        :param df:
        :return:
        """
        assert set(original_dataframe.columns.values).issuperset(set(self.dimension_names))

        valid_rows_index = None
        dataframe = original_dataframe[self.dimension_names]

        if not self.is_hierarchical():
            # Let's exclude any extra columns
            #
            valid_rows_index = dataframe.index[dataframe.notnull().all(axis=1)]

            # Now for each column let's filter out the rows whose values are outside the allowed ranges.
            #
            for dimension in self.dimensions:
                if isinstance(dimension, ContinuousDimension):
                    if dimension.include_min:
                        valid_rows_index = valid_rows_index.intersection(dataframe[dataframe[dimension.name] >= dimension.min].index)
                    else:
                        valid_rows_index = valid_rows_index.intersection(dataframe[dataframe[dimension.name] > dimension.min].index)

                    if dimension.include_max:
                        valid_rows_index = valid_rows_index.intersection(dataframe[dataframe[dimension.name] <= dimension.max].index)
                    else:
                        valid_rows_index = valid_rows_index.intersection(dataframe[dataframe[dimension.name] < dimension.max].index)

                elif isinstance(dimension, DiscreteDimension):
                    valid_rows_index = valid_rows_index.intersection(dataframe[dataframe[dimension.name] >= dimension.min].index)
                    valid_rows_index = valid_rows_index.intersection(dataframe[dataframe[dimension.name] <= dimension.max].index)

                elif isinstance(dimension, CategoricalDimension):
                    valid_rows_index = valid_rows_index.intersection(dataframe[dataframe[dimension.name].isin(dimension.values_set)].index)

                else:
                    raise ValueError(f"Unsupported dimension type: {type(dimension)}")

        else:
            # TODO: this can be optimized. Do everything we did for non-hierarchical hypergrids, but also evaluate constraints imposed by join dimensions.
            #
            valid_rows_index = dataframe[dataframe.apply(
                lambda row: Point(**{dim_name: row[i] for i, dim_name in enumerate(self.dimension_names)}) in self,
                axis=1
            )].index

        return valid_rows_index
Example #4
0
    def random(self, point=None):
        if point is None:
            point = Point()

        for dimension in self._dimensions:
            if dimension.name not in point:
                point[dimension.name] = dimension.random()

        for external_dimension_name, guest_subgrids_joined_on_dimension in self.joined_subgrids_by_pivot_dimension.items():
            for joined_subgrid in guest_subgrids_joined_on_dimension:
                if point[external_dimension_name] in joined_subgrid.join_dimension:
                    sub_point = joined_subgrid.subgrid.random()
                    point[joined_subgrid.subgrid.name] = sub_point

        return point
Example #5
0
    def filter_out_invalid_rows(self,
                                original_dataframe: pd.DataFrame,
                                exclude_extra_columns=True) -> pd.DataFrame:
        """Returns a dataframe containing only valid rows from the original_dataframe.

        Valid rows are rows with no NaNs and with values for all dimensions in the required ranges.
        If there are additional columns, they will be dropped unless exclude_extra_columns == False.
        """
        assert set(original_dataframe.columns.values).issuperset(
            set(self.dimension_names))

        valid_rows_index = None
        dataframe = original_dataframe[self.dimension_names]

        if not self.is_hierarchical():
            # Let's exclude any extra columns
            #
            valid_rows_index = dataframe.index[dataframe.notnull().all(axis=1)]

            # Now for each column let's filter out the rows whose values are outside the allowed ranges.
            #
            for dimension in self.dimensions:
                if isinstance(dimension, ContinuousDimension):
                    if dimension.include_min:
                        valid_rows_index = valid_rows_index.intersection(
                            dataframe[dataframe[dimension.name] >=
                                      dimension.min].index)
                    else:
                        valid_rows_index = valid_rows_index.intersection(
                            dataframe[dataframe[dimension.name] >
                                      dimension.min].index)

                    if dimension.include_max:
                        valid_rows_index = valid_rows_index.intersection(
                            dataframe[dataframe[dimension.name] <=
                                      dimension.max].index)
                    else:
                        valid_rows_index = valid_rows_index.intersection(
                            dataframe[dataframe[dimension.name] <
                                      dimension.max].index)

                elif isinstance(dimension, DiscreteDimension):
                    valid_rows_index = valid_rows_index.intersection(dataframe[
                        dataframe[dimension.name] >= dimension.min].index)
                    valid_rows_index = valid_rows_index.intersection(dataframe[
                        dataframe[dimension.name] <= dimension.max].index)

                elif isinstance(dimension, CategoricalDimension):
                    valid_rows_index = valid_rows_index.intersection(
                        dataframe[dataframe[dimension.name].isin(
                            dimension.values_set)].index)

                else:
                    raise ValueError(
                        f"Unsupported dimension type: {type(dimension)}")

        else:
            # TODO: this can be optimized. Do everything we did for non-hierarchical hypergrids, but also evaluate constraints imposed by join dimensions.
            #
            valid_rows_index = dataframe[dataframe.apply(lambda row: Point(
                **{
                    dim_name: row[i]
                    for i, dim_name in enumerate(self.dimension_names)
                }) in self,
                                                         axis=1)].index

        if exclude_extra_columns:
            return dataframe.loc[valid_rows_index]
        return original_dataframe.loc[valid_rows_index]