Example #1
0
def _process_selection(selection, original_data):
    if _is_series(selection):
        if dd and isinstance(selection, dd.Series):
            # Dask index values are a delayed object - can't compare below without computing
            index_vals = selection.index.values.compute()
        else:
            index_vals = selection.index.values
        if _is_dataframe(original_data) and set(index_vals) == set(
                original_data.columns):
            # Selecting a single row from a DataFrame, returned as Series without Woodwork initialized
            schema = None
        elif _is_dataframe(original_data):
            # Selecting a single column from a DataFrame
            schema = original_data.ww.schema.columns[selection.name]
            schema.semantic_tags = schema.semantic_tags - {'index'
                                                           } - {'time_index'}
            if schema.use_standard_tags:
                schema.semantic_tags |= schema.logical_type.standard_tags
        else:
            # Selecting a new Series from an existing Series
            schema = original_data.ww._schema
        if schema:
            selection.ww.init(schema=copy.deepcopy(schema), validate=False)
    elif _is_dataframe(selection):
        # Selecting a new DataFrame from an existing DataFrame
        schema = original_data.ww.schema
        new_schema = schema._get_subset_schema(list(selection.columns))
        selection.ww.init(schema=new_schema, validate=False)
    # Selecting a single value or return selection from above
    return selection
Example #2
0
def _process_selection(selection, original_data):
    if _is_series(selection):
        if _is_dask_series(selection):
            # Dask index values are a delayed object - can't compare below without computing
            index_vals = selection.index.values.compute()
        else:
            index_vals = selection.index.values
        if _is_dataframe(original_data) and set(index_vals) == set(
                original_data.columns):
            # Selecting a single row from a DataFrame, returned as Series without Woodwork initialized
            schema = None
        elif _is_dataframe(original_data):
            # Selecting a single column from a DataFrame
            schema = original_data.ww.schema.columns[selection.name]
        else:
            # Selecting a new Series from an existing Series
            schema = original_data.ww._schema
        if schema:
            selection.ww.init(schema=copy.deepcopy(schema), validate=False)
    elif _is_dataframe(selection):
        # Selecting a new DataFrame from an existing DataFrame
        schema = original_data.ww.schema
        new_schema = schema.get_subset_schema(list(selection.columns))
        selection.ww.init_with_full_schema(schema=new_schema, validate=False)
    # Selecting a single value or return selection from above
    return selection
Example #3
0
            def wrapper(*args, **kwargs):
                # Make Series call and intercept the result
                result = series_attr(*args, **kwargs)

                # Try to initialize Woodwork with the existing schema
                if _is_series(result):
                    valid_dtype = _get_valid_dtype(type(result),
                                                   self._schema.logical_type)
                    if str(result.dtype) == valid_dtype:
                        result.ww.init(
                            logical_type=self._schema.logical_type,
                            semantic_tags=copy.deepcopy(
                                self._schema.semantic_tags),
                            description=self._schema.description,
                            metadata=copy.deepcopy(self._schema.metadata),
                            use_standard_tags=self._schema.use_standard_tags)
                    else:
                        invalid_schema_message = 'dtype mismatch between original dtype, ' \
                            f'{valid_dtype}, and returned dtype, {result.dtype}'
                        warning_message = TypingInfoMismatchWarning(
                        ).get_warning_message(attr, invalid_schema_message,
                                              'Series')
                        warnings.warn(warning_message,
                                      TypingInfoMismatchWarning)
                # Always return the results of the Series operation whether or not Woodwork is initialized
                return result
Example #4
0
            def wrapper(*args, **kwargs):
                # Make Series call and intercept the result
                result = series_attr(*args, **kwargs)

                # Try to initialize Woodwork with the existing schema
                if _is_series(result):
                    valid_dtype = _get_valid_dtype(type(result),
                                                   self._schema.logical_type)
                    if str(result.dtype) == valid_dtype:
                        result.ww.init(schema=self.schema, validate=False)
                    else:
                        invalid_schema_message = 'dtype mismatch between original dtype, ' \
                            f'{valid_dtype}, and returned dtype, {result.dtype}'
                        warning_message = TypingInfoMismatchWarning(
                        ).get_warning_message(attr, invalid_schema_message,
                                              'Series')
                        warnings.warn(warning_message,
                                      TypingInfoMismatchWarning)
                # Always return the results of the Series operation whether or not Woodwork is initialized
                return result
Example #5
0
            def wrapper(*args, **kwargs):
                # Make Series call and intercept the result
                result = series_attr(*args, **kwargs)

                # Try to initialize Woodwork with the existing schema
                if _is_series(result):
                    valid_dtype = self._schema.logical_type._get_valid_dtype(
                        type(result))
                    if _check_data_type_equality(str(result.dtype),
                                                 valid_dtype):
                        result.ww.init(schema=self.schema, validate=False)
                    else:
                        invalid_schema_message = (
                            "dtype mismatch between original dtype, "
                            f"{valid_dtype}, and returned dtype, {result.dtype}"
                        )
                        warning_message = (
                            TypingInfoMismatchWarning().get_warning_message(
                                attr, invalid_schema_message, "Series"))
                        warnings.warn(warning_message,
                                      TypingInfoMismatchWarning)
                elif _is_dataframe(result):
                    # Initialize Woodwork with a partial schema
                    col_schema = self.schema
                    col_name = self.name or result.columns.to_list()[0]
                    table_schema = TableSchema(
                        column_names=[col_name],
                        logical_types={col_name: col_schema.logical_type},
                        semantic_tags={col_name: col_schema.semantic_tags},
                        column_metadata={col_name: col_schema.metadata},
                        use_standard_tags={
                            col_name: col_schema.use_standard_tags
                        },
                        column_descriptions={col_name: col_schema.description},
                        column_origins={col_name: col_schema.origin},
                        validate=False,
                    )
                    result.ww.init_with_partial_schema(table_schema)
                # Always return the results of the Series operation whether or not Woodwork is initialized
                return result
Example #6
0
def test_is_series(sample_df):
    assert _is_series(sample_df['id'])
    assert not _is_series(sample_df)
Example #7
0
def test_is_series(sample_df):
    assert _is_series(sample_df["id"])
    assert not _is_series(sample_df)