def _process_selection(selection, original_data): if _is_series(selection): if dd and isinstance(selection, dd.Series): # Dask index values are a delayed object - can't compare below without computing index_vals = selection.index.values.compute() else: index_vals = selection.index.values if _is_dataframe(original_data) and set(index_vals) == set( original_data.columns): # Selecting a single row from a DataFrame, returned as Series without Woodwork initialized schema = None elif _is_dataframe(original_data): # Selecting a single column from a DataFrame schema = original_data.ww.schema.columns[selection.name] schema.semantic_tags = schema.semantic_tags - {'index' } - {'time_index'} if schema.use_standard_tags: schema.semantic_tags |= schema.logical_type.standard_tags else: # Selecting a new Series from an existing Series schema = original_data.ww._schema if schema: selection.ww.init(schema=copy.deepcopy(schema), validate=False) elif _is_dataframe(selection): # Selecting a new DataFrame from an existing DataFrame schema = original_data.ww.schema new_schema = schema._get_subset_schema(list(selection.columns)) selection.ww.init(schema=new_schema, validate=False) # Selecting a single value or return selection from above return selection
def _process_selection(selection, original_data): if _is_series(selection): if _is_dask_series(selection): # Dask index values are a delayed object - can't compare below without computing index_vals = selection.index.values.compute() else: index_vals = selection.index.values if _is_dataframe(original_data) and set(index_vals) == set( original_data.columns): # Selecting a single row from a DataFrame, returned as Series without Woodwork initialized schema = None elif _is_dataframe(original_data): # Selecting a single column from a DataFrame schema = original_data.ww.schema.columns[selection.name] else: # Selecting a new Series from an existing Series schema = original_data.ww._schema if schema: selection.ww.init(schema=copy.deepcopy(schema), validate=False) elif _is_dataframe(selection): # Selecting a new DataFrame from an existing DataFrame schema = original_data.ww.schema new_schema = schema.get_subset_schema(list(selection.columns)) selection.ww.init_with_full_schema(schema=new_schema, validate=False) # Selecting a single value or return selection from above return selection
def wrapper(*args, **kwargs): # Make Series call and intercept the result result = series_attr(*args, **kwargs) # Try to initialize Woodwork with the existing schema if _is_series(result): valid_dtype = _get_valid_dtype(type(result), self._schema.logical_type) if str(result.dtype) == valid_dtype: result.ww.init( logical_type=self._schema.logical_type, semantic_tags=copy.deepcopy( self._schema.semantic_tags), description=self._schema.description, metadata=copy.deepcopy(self._schema.metadata), use_standard_tags=self._schema.use_standard_tags) else: invalid_schema_message = 'dtype mismatch between original dtype, ' \ f'{valid_dtype}, and returned dtype, {result.dtype}' warning_message = TypingInfoMismatchWarning( ).get_warning_message(attr, invalid_schema_message, 'Series') warnings.warn(warning_message, TypingInfoMismatchWarning) # Always return the results of the Series operation whether or not Woodwork is initialized return result
def wrapper(*args, **kwargs): # Make Series call and intercept the result result = series_attr(*args, **kwargs) # Try to initialize Woodwork with the existing schema if _is_series(result): valid_dtype = _get_valid_dtype(type(result), self._schema.logical_type) if str(result.dtype) == valid_dtype: result.ww.init(schema=self.schema, validate=False) else: invalid_schema_message = 'dtype mismatch between original dtype, ' \ f'{valid_dtype}, and returned dtype, {result.dtype}' warning_message = TypingInfoMismatchWarning( ).get_warning_message(attr, invalid_schema_message, 'Series') warnings.warn(warning_message, TypingInfoMismatchWarning) # Always return the results of the Series operation whether or not Woodwork is initialized return result
def wrapper(*args, **kwargs): # Make Series call and intercept the result result = series_attr(*args, **kwargs) # Try to initialize Woodwork with the existing schema if _is_series(result): valid_dtype = self._schema.logical_type._get_valid_dtype( type(result)) if _check_data_type_equality(str(result.dtype), valid_dtype): result.ww.init(schema=self.schema, validate=False) else: invalid_schema_message = ( "dtype mismatch between original dtype, " f"{valid_dtype}, and returned dtype, {result.dtype}" ) warning_message = ( TypingInfoMismatchWarning().get_warning_message( attr, invalid_schema_message, "Series")) warnings.warn(warning_message, TypingInfoMismatchWarning) elif _is_dataframe(result): # Initialize Woodwork with a partial schema col_schema = self.schema col_name = self.name or result.columns.to_list()[0] table_schema = TableSchema( column_names=[col_name], logical_types={col_name: col_schema.logical_type}, semantic_tags={col_name: col_schema.semantic_tags}, column_metadata={col_name: col_schema.metadata}, use_standard_tags={ col_name: col_schema.use_standard_tags }, column_descriptions={col_name: col_schema.description}, column_origins={col_name: col_schema.origin}, validate=False, ) result.ww.init_with_partial_schema(table_schema) # Always return the results of the Series operation whether or not Woodwork is initialized return result
def test_is_series(sample_df): assert _is_series(sample_df['id']) assert not _is_series(sample_df)
def test_is_series(sample_df): assert _is_series(sample_df["id"]) assert not _is_series(sample_df)