def test_setops_preserve_object_dtype(self): idx = Index([1, 2, 3], dtype=object) result = idx.intersection(idx[1:]) expected = idx[1:] tm.assert_index_equal(result, expected) # if other is not monotonic increasing, intersection goes through # a different route result = idx.intersection(idx[1:][::-1]) tm.assert_index_equal(result, expected) result = idx._union(idx[1:], sort=None) expected = idx tm.assert_numpy_array_equal(result, expected.values) result = idx.union(idx[1:], sort=None) tm.assert_index_equal(result, expected) # if other is not monotonic increasing, _union goes through # a different route result = idx._union(idx[1:][::-1], sort=None) tm.assert_numpy_array_equal(result, expected.values) result = idx.union(idx[1:][::-1], sort=None) tm.assert_index_equal(result, expected)
def test_union_sort_other_special(self, slice_): # https://github.com/pandas-dev/pandas/issues/24959 idx = Index([1, 0, 2]) # default, sort=None other = idx[slice_] tm.assert_index_equal(idx.union(other), idx) tm.assert_index_equal(other.union(idx), idx) # sort=False tm.assert_index_equal(idx.union(other, sort=False), idx)
def test_union_non_numeric(self, dtype): # corner case, non-numeric index = Index(np.arange(5, dtype=dtype), dtype=dtype) assert index.dtype == dtype other = Index([datetime.now() + timedelta(i) for i in range(4)], dtype=object) result = index.union(other) expected = Index(np.concatenate((index, other))) tm.assert_index_equal(result, expected) result = other.union(index) expected = Index(np.concatenate((other, index))) tm.assert_index_equal(result, expected)
def test_union_duplicate_index_different_dtypes(): # GH#36289 a = Index([1, 2, 2, 3]) b = Index(["1", "0", "0"]) expected = Index([1, 2, 2, 3, "1", "0", "0"]) result = a.union(b, sort=False) tm.assert_index_equal(result, expected)
def test_union_nan_in_both(dup): # GH#36289 a = Index([np.nan, 1, 2, 2]) b = Index([np.nan, dup, 1, 2]) result = a.union(b, sort=False) expected = Index([np.nan, dup, 1.0, 2.0, 2.0]) tm.assert_index_equal(result, expected)
def test_union_same_value_duplicated_in_both(): # GH#36289 a = Index([0, 0, 1]) b = Index([0, 0, 1, 2]) result = a.union(b) expected = Index([0, 0, 1, 2]) tm.assert_index_equal(result, expected)
def test_union_sort_other_incomparable(self): # https://github.com/pandas-dev/pandas/issues/24959 idx = Index([1, pd.Timestamp("2000")]) # default (sort=None) with tm.assert_produces_warning(RuntimeWarning): result = idx.union(idx[:1]) tm.assert_index_equal(result, idx) # sort=None with tm.assert_produces_warning(RuntimeWarning): result = idx.union(idx[:1], sort=None) tm.assert_index_equal(result, idx) # sort=False result = idx.union(idx[:1], sort=False) tm.assert_index_equal(result, idx)
def test_union_sort_special_true(self, slice_): # TODO: decide on True behaviour # sort=True idx = Index([1, 0, 2]) # default, sort=None other = idx[slice_] result = idx.union(other, sort=True) expected = Index([0, 1, 2]) tm.assert_index_equal(result, expected)
def test_union_noncomparable(self): # corner case, non-Int64Index index = self.create_index() other = Index([datetime.now() + timedelta(i) for i in range(4)], dtype=object) result = index.union(other) expected = Index(np.concatenate((index, other))) tm.assert_index_equal(result, expected) result = other.union(index) expected = Index(np.concatenate((other, index))) tm.assert_index_equal(result, expected)
def test_union_noncomparable(self, sort): # corner case, non-Int64Index index = RangeIndex(start=0, stop=20, step=2) other = Index([datetime.now() + timedelta(i) for i in range(4)], dtype=object) result = index.union(other, sort=sort) expected = Index(np.concatenate((index, other))) tm.assert_index_equal(result, expected) result = other.union(index, sort=sort) expected = Index(np.concatenate((other, index))) tm.assert_index_equal(result, expected)
def test_union_with_regular_index(idx): other = Index(["A", "B", "C"]) result = other.union(idx) assert ("foo", "one") in result assert "B" in result msg = "The values in the array are unorderable" with tm.assert_produces_warning(RuntimeWarning, match=msg): result2 = idx.union(other) assert result.equals(result2)
def test_int_float_union_dtype(dtype): # https://github.com/pandas-dev/pandas/issues/26778 # [u]int | float -> float index = Index([0, 2, 3], dtype=dtype) other = pd.Float64Index([0.5, 1.5]) expected = pd.Float64Index([0.0, 0.5, 1.5, 2.0, 3.0]) result = index.union(other) tm.assert_index_equal(result, expected) result = other.union(index) tm.assert_index_equal(result, expected)
def _condition_logic(base_idx: pd.Index, sub_select_idx: pd.Index, state_idx: pd.Index, condition_idx: pd.Index, logic: str) -> pd.Index: if str(logic).upper() == 'ALL': return base_idx.intersection(condition_idx).sort_values() elif str(logic).upper() == 'ANY': return sub_select_idx.intersection(condition_idx).sort_values() elif str(logic).upper() == 'AND': return state_idx.intersection(condition_idx).sort_values() elif str(logic).upper() == 'NAND': return sub_select_idx.drop(state_idx.intersection(condition_idx)).sort_values() elif str(logic).upper() == 'OR': return state_idx.append(state_idx.union(condition_idx)).drop_duplicates().sort_values() elif str(logic).upper() == 'NOR': result = state_idx.append(state_idx.union(condition_idx)).drop_duplicates().sort_values() return sub_select_idx.drop(result) elif str(logic).upper() == 'NOT': return state_idx.difference(condition_idx) elif str(logic).upper() == 'XOR': return state_idx.union(condition_idx).difference(state_idx.intersection(condition_idx)) raise ValueError(f"The logic '{logic}' must be AND, NAND, OR, NOR, NOT, XOR ANY or ALL")
def test_union_noncomparable(self): from datetime import datetime, timedelta # corner case, non-Int64Index now = datetime.now() other = Index([now + timedelta(i) for i in range(4)], dtype=object) result = self.index.union(other) expected = Index(np.concatenate((self.index, other))) tm.assert_index_equal(result, expected) result = other.union(self.index) expected = Index(np.concatenate((other, self.index))) tm.assert_index_equal(result, expected)
def test_union_int_categorical_with_nan(): ci = CategoricalIndex([1, 2, np.nan]) assert ci.categories.dtype.kind == "i" idx = Index([1, 2]) result = idx.union(ci) expected = Index([1, 2, np.nan], dtype=np.float64) tm.assert_index_equal(result, expected) result = ci.union(idx) tm.assert_index_equal(result, expected)
def test_union_dtypes(left, right, expected, names): left = pandas_dtype(left) right = pandas_dtype(right) a = Index([], dtype=left, name=names[0]) b = Index([], dtype=right, name=names[1]) result = a.union(b) assert result.dtype == expected assert result.name == names[2] # Testing name retention # TODO: pin down desired dtype; do we want it to be commutative? result = a.intersection(b) assert result.name == names[2]
def test_union_name_preservation(self, first_list, second_list, first_name, second_name, expected_name, sort): first = Index(first_list, name=first_name) second = Index(second_list, name=second_name) union = first.union(second, sort=sort) vals = set(first_list).union(second_list) if sort is None and len(first_list) > 0 and len(second_list) > 0: expected = Index(sorted(vals), name=expected_name) tm.assert_index_equal(union, expected) else: expected = Index(vals, name=expected_name) tm.equalContents(union, expected)
def test_chained_union(self, sort): # Chained unions handles names correctly i1 = Index([1, 2], name="i1") i2 = Index([5, 6], name="i2") i3 = Index([3, 4], name="i3") union = i1.union(i2.union(i3, sort=sort), sort=sort) expected = i1.union(i2, sort=sort).union(i3, sort=sort) tm.assert_index_equal(union, expected) j1 = Index([1, 2], name="j1") j2 = Index([], name="j2") j3 = Index([], name="j3") union = j1.union(j2.union(j3, sort=sort), sort=sort) expected = j1.union(j2, sort=sort).union(j3, sort=sort) tm.assert_index_equal(union, expected)
def test_union_sort_other_incomparable_true(self): # TODO decide on True behaviour # sort=True idx = Index([1, pd.Timestamp("2000")]) with pytest.raises(TypeError, match=".*"): idx.union(idx[:1], sort=True)
def process_signal_from_index( self, signal: np.ndarray, sampling_rate: int, index: pd.Index, ) -> pd.Index: r"""Segment parts of a signal. Args: signal: signal values sampling_rate: sampling rate in Hz index: a segmented index conform to audformat_ or a :class:`pandas.MultiIndex` with two levels named `start` and `end` that hold start and end positions as :class:`pandas.Timedelta` objects. See also :func:`audinterface.utils.signal_index` Returns: Segmented index conform to audformat_ Raises: RuntimeError: if sampling rates do not match RuntimeError: if channel selection is invalid ValueError: if index contains duplicates .. _audformat: https://audeering.github.io/audformat/data-format.html """ utils.assert_index(index) if index.empty: return index if isinstance(index, pd.MultiIndex) and len(index.levels) == 2: params = [( (signal, sampling_rate), { 'start': start, 'end': end }, ) for start, end in index] else: index = audformat.utils.to_segmented_index(index) params = [( (signal, sampling_rate), { 'file': file, 'start': start, 'end': end }, ) for file, start, end in index] y = audeer.run_tasks( self.process_signal, params, num_workers=self.process.num_workers, multiprocessing=self.process.multiprocessing, progress_bar=self.process.verbose, task_description=f'Process {len(index)} segments', ) index = y[0] for obj in y[1:]: index = index.union(obj) return index
def _union(left: pd.Index, right: pd.Index) -> pd.Index: return left.union(right, sort=False)