Python Index.duplicated 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: pandas

클래스/타입: Index

메소드/함수: duplicated

hotexamples.com에서의 예제들: 8

Python Index.duplicated - 8개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 pandas.Index.duplicated에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Index(30)

get_loc(30)

intersection(27)

union(21)

name(14)

join(12)

get_level_values(11)

sort_values(11)

astype(10)

difference(10)

get_indexer(9)

slice_locs(9)

get_indexer_non_unique(8)

duplicated(8)

symmetric_difference(7)

append(7)

isin(6)

tolist(6)

identical(6)

equals(6)

tz_localize(6)

get_slice_bound(5)

map(5)

rename(4)

argmax(4)

copy(4)

argmin(4)

to_timestamp(3)

capitalize(3)

format(3)

fillna(3)

_with_infer(3)

to_series(3)

get_indexer_for(3)

set_levels(2)

delete(2)

to_frame(2)

insert(2)

remove_unused_levels(2)

reindex(2)

max(2)

min(2)

tz_convert(1)

take(1)

take_nd(1)

to_csv(1)

to_numpy(1)

to_datetime(1)

_summary(1)

set_names(1)

예제 #1

파일 보기

def make_index_unique(index: pd.Index, join: str = '-'):
    """Makes the index unique by appending '1', '2', etc.

    The first occurance of a non-unique value is ignored.

    Parameters
    ----------
    join
         The connecting string between name and integer.

    Examples
    --------
    >>> adata1 = sc.AnnData(np.ones((3, 2)), {'obs_names': ['a', 'b', 'c']})
    >>> adata2 = sc.AnnData(np.zeros((3, 2)), {'obs_names': ['d', 'b', 'b']})
    >>> adata = adata1.concatenate(adata2)
    >>> adata.obs_names
    Index(['a', 'b', 'c', 'd', 'b', 'b'], dtype='object')
    >>> adata.obs_names_make_unique()
    >>> adata.obs_names
    Index(['a', 'b', 'c', 'd', 'b-1', 'b-2'], dtype='object')
    """
    if index.is_unique:
        return index
    from collections import defaultdict
    values = index.values
    indices_dup = index.duplicated(keep='first')
    values_dup = values[indices_dup]
    counter = defaultdict(lambda: 0)
    for i, v in enumerate(values_dup):
        counter[v] += 1
        values_dup[i] += join + str(counter[v])
    values[indices_dup] = values_dup
    index = pd.Index(values)
    return index

예제 #2

파일 보기

파일: utils.py 프로젝트: rcannood/anndata-py

def make_index_unique(index: pd.Index, join: str = "-"):
    """
    Makes the index unique by appending a number string to each duplicate index element:
    '1', '2', etc.

    If a tentative name created by the algorithm already exists in the index, it tries
    the next integer in the sequence.

    The first occurrence of a non-unique value is ignored.

    Parameters
    ----------
    join
         The connecting string between name and integer.

    Examples
    --------
    >>> from anndata import AnnData
    >>> adata = AnnData(np.ones((2, 3)), var=pd.DataFrame(index=["a", "a", "b"]))
    >>> adata.var_names
    Index(['a', 'a', 'b'], dtype='object')
    >>> adata.var_names_make_unique()
    >>> adata.var_names
    Index(['a', 'a-1', 'b'], dtype='object')
    """
    if index.is_unique:
        return index
    from collections import Counter

    values = index.values.copy()
    indices_dup = index.duplicated(keep="first")
    values_dup = values[indices_dup]
    values_set = set(values)
    counter = Counter()
    issue_interpretation_warning = False
    example_colliding_values = []
    for i, v in enumerate(values_dup):
        while True:
            counter[v] += 1
            tentative_new_name = v + join + str(counter[v])
            if tentative_new_name not in values_set:
                values_set.add(tentative_new_name)
                values_dup[i] = tentative_new_name
                break
            issue_interpretation_warning = True
            if len(example_colliding_values) < 5:
                example_colliding_values.append(tentative_new_name)

    if issue_interpretation_warning:
        warnings.warn(
            f"Suffix used ({join}[0-9]+) to deduplicate index values may make index "
            +
            "values difficult to interpret. There values with a similar suffixes in "
            + "the index. Consider using a different delimiter by passing " +
            "`join={delimiter}`" +
            "Example key collisions generated by the make_index_unique algorithm: "
            + str(example_colliding_values))
    values[indices_dup] = values_dup
    index = pd.Index(values, name=index.name)
    return index

예제 #3

파일 보기

def assert_index(obj: pd.Index):
    r"""Check if index is conform to audformat."""

    if isinstance(obj, pd.MultiIndex) and len(obj.levels) == 2:

        if obj.has_duplicates:
            max_display = 10
            duplicates = obj[obj.duplicated()]
            msg_tail = '\n...' if len(duplicates) > max_display else ''
            msg_duplicates = '\n'.join([
                str(duplicate)
                for duplicate in duplicates[:max_display].tolist()
            ])
            raise ValueError('Found duplicates:\n'
                             f'{msg_duplicates}{msg_tail}')

        if not (obj.names[0] == audformat.define.IndexField.START
                and obj.names[1] == audformat.define.IndexField.END):
            expected_names = [
                audformat.define.IndexField.START,
                audformat.define.IndexField.END,
            ]
            raise ValueError('Found two levels with names '
                             f'{obj.names}, '
                             f'but expected names '
                             f'{expected_names}.')
        if not pd.api.types.is_timedelta64_dtype(obj.levels[0].dtype):
            raise ValueError(
                "Level 'start' must contain values of type 'timedelta64[ns]'.")
        if not pd.api.types.is_timedelta64_dtype(obj.levels[1].dtype):
            raise ValueError(
                "Level 'end' must contain values of type 'timedelta64[ns]'.")
    else:
        audformat.assert_index(obj)

예제 #4

파일 보기

파일: concat.py 프로젝트: yashukla/pandas

 def _maybe_check_integrity(self, concat_index: Index):
     if self.verify_integrity:
         if not concat_index.is_unique:
             overlap = concat_index[concat_index.duplicated()].unique()
             raise ValueError(
                 "Indexes have overlapping values: "
                 "{overlap!s}".format(overlap=overlap)
             )

예제 #5

파일 보기

파일: prepare.py 프로젝트: sinnamone/cellxgene

def make_index_unique(index: pd.Index, join: str = "-"):
    """
    Makes the index unique by appending a number string to each duplicate index element: '1', '2', etc.

    If a tentative name created by the algorithm already exists in the index, it tries the next integer in the sequence.

    The first occurrence of a non-unique value is ignored.
    Parameters
    ----------
    join
         The connecting string between name and integer.
    Examples
    --------
    >>> from anndata import AnnData
    >>> adata1 = AnnData(np.ones((3, 2)), dict(obs_names=['a', 'b', 'c']))
    >>> adata2 = AnnData(np.zeros((3, 2)), dict(obs_names=['d', 'b', 'b']))
    >>> adata = adata1.concatenate(adata2)
    >>> adata.obs_names
    Index(['a', 'b', 'c', 'd', 'b', 'b'], dtype='object')
    >>> adata.obs_names_make_unique()
    >>> adata.obs_names
    Index(['a', 'b', 'c', 'd', 'b-1', 'b-2'], dtype='object')
    """
    if index.is_unique:
        return index
    from collections import defaultdict

    values = index.values
    values_set = set(values)
    indices_dup = index.duplicated(keep="first")
    values_dup = values[indices_dup]
    counter = defaultdict(lambda: 0)
    for i, v in enumerate(values_dup):
        while True:
            counter[v] += 1
            tentative_new_name = v + join + str(counter[v])
            if tentative_new_name not in values_set:
                values_set.add(tentative_new_name)
                values_dup[i] = tentative_new_name
                break

    values[indices_dup] = values_dup
    index = pd.Index(values)
    return index

예제 #6

파일 보기

def check_no_dupes(idx: pd.Index, name: str) -> bool:
    dupes = idx.duplicated().any()
    if dupes:
        warn(f"Duplicated {name}: {idx[idx.duplicated(False)].sort_values()}")
    return not dupes

예제 #7

파일 보기

파일: pandas_utils.py 프로젝트: jhubscher/pandas_commons

def index_has_duplicates(index: Index) -> bool:
    """Indicates whether a DataFrame's Index contains any duplicates."""

    return index.duplicated().any()

예제 #8

파일 보기

파일: test_pandas_utils.py 프로젝트: jhubscher/pandas_commons

 def test_index_has_no_duplicates(self, index: Index):
     self.assertEqual(first=index.duplicated().any(),
                      second=False,
                      msg='The index contains duplicate values.')