コード例 #1
0
def test_is_dict_like():
    passes = [{}, {'A': 1}, Series([1])]
    fails = ['1', 1, [1, 2], (1, 2), range(2), Index([1])]

    for p in passes:
        assert inference.is_dict_like(p)

    for f in fails:
        assert not inference.is_dict_like(f)
コード例 #2
0
ファイル: test_inference.py プロジェクト: cmazzullo/pandas
def test_is_dict_like():
    passes = [{}, {'A': 1}, Series([1])]
    fails = ['1', 1, [1, 2], (1, 2), range(2), Index([1])]

    for p in passes:
        assert inference.is_dict_like(p)

    for f in fails:
        assert not inference.is_dict_like(f)
コード例 #3
0
def test_is_dict_like_duck_type(has_keys, has_getitem, has_contains):
    class DictLike:
        def __init__(self, d):
            self.d = d

        if has_keys:

            def keys(self):
                return self.d.keys()

        if has_getitem:

            def __getitem__(self, key):
                return self.d.__getitem__(key)

        if has_contains:

            def __contains__(self, key):
                return self.d.__contains__(key)

    d = DictLike({1: 2})
    result = inference.is_dict_like(d)
    expected = has_keys and has_getitem and has_contains

    assert result is expected
コード例 #4
0
ファイル: extension.py プロジェクト: venaturum/staircase
 def __init__(self, data):
     if isinstance(data, self.__class__):
         self.data = data.data
     elif isinstance(data, np.ndarray):
         if not data.ndim == 1:
             raise ValueError(
                 "'data' should be a 1-dimensional array of Stairs objects."
             )
         self.data = data
     elif is_dict_like(data):
         self.data = np.array([data[k] for k in data.keys()])
     elif isinstance(data, Stairs) or is_list_like(data):
         self.data = np.array(data, ndmin=1)
     else:
         raise TypeError("'data' should be array of Stairs objects.")
コード例 #5
0
ファイル: test_inference.py プロジェクト: josham/pandas
def test_is_dict_like_duck_type(has_keys, has_getitem, has_contains):
    class DictLike(object):
        def __init__(self, d):
            self.d = d

        if has_keys:
            def keys(self):
                return self.d.keys()

        if has_getitem:
            def __getitem__(self, key):
                return self.d.__getitem__(key)

        if has_contains:
            def __contains__(self, key):
                return self.d.__contains__(key)

    d = DictLike({1: 2})
    result = inference.is_dict_like(d)
    expected = has_keys and has_getitem and has_contains

    assert result is expected
コード例 #6
0
ファイル: test_inference.py プロジェクト: yyonggonyy/pandas
def test_is_dict_like_fails(ll):
    assert not inference.is_dict_like(ll)
コード例 #7
0
ファイル: test_inference.py プロジェクト: yyonggonyy/pandas
def test_is_dict_like_passes(ll):
    assert inference.is_dict_like(ll)
コード例 #8
0
    def _infer_columns(
        self, ) -> tuple[list[list[Scalar | None]], int, set[Scalar | None]]:
        names = self.names
        num_original_columns = 0
        clear_buffer = True
        unnamed_cols: set[Scalar | None] = set()
        self._header_line = None

        if self.header is not None:
            header = self.header

            if isinstance(header, (list, tuple, np.ndarray)):
                have_mi_columns = len(header) > 1
                # we have a mi columns, so read an extra line
                if have_mi_columns:
                    header = list(header) + [header[-1] + 1]
            else:
                have_mi_columns = False
                header = [header]

            columns: list[list[Scalar | None]] = []
            for level, hr in enumerate(header):
                try:
                    line = self._buffered_line()

                    while self.line_pos <= hr:
                        line = self._next_line()

                except StopIteration as err:
                    if self.line_pos < hr:
                        raise ValueError(
                            f"Passed header={hr} but only {self.line_pos + 1} lines in "
                            "file") from err

                    # We have an empty file, so check
                    # if columns are provided. That will
                    # serve as the 'line' for parsing
                    if have_mi_columns and hr > 0:
                        if clear_buffer:
                            self._clear_buffer()
                        columns.append([None] * len(columns[-1]))
                        return columns, num_original_columns, unnamed_cols

                    if not self.names:
                        raise EmptyDataError(
                            "No columns to parse from file") from err

                    line = self.names[:]

                this_columns: list[Scalar | None] = []
                this_unnamed_cols = []

                for i, c in enumerate(line):
                    if c == "":
                        if have_mi_columns:
                            col_name = f"Unnamed: {i}_level_{level}"
                        else:
                            col_name = f"Unnamed: {i}"

                        this_unnamed_cols.append(i)
                        this_columns.append(col_name)
                    else:
                        this_columns.append(c)

                if not have_mi_columns and self.mangle_dupe_cols:
                    counts: DefaultDict = defaultdict(int)
                    # Ensure that regular columns are used before unnamed ones
                    # to keep given names and mangle unnamed columns
                    col_loop_order = [
                        i for i in range(len(this_columns))
                        if i not in this_unnamed_cols
                    ] + this_unnamed_cols

                    for i in col_loop_order:
                        col = this_columns[i]
                        old_col = col
                        cur_count = counts[col]

                        if cur_count > 0:
                            while cur_count > 0:
                                counts[old_col] = cur_count + 1
                                col = f"{old_col}.{cur_count}"
                                if col in this_columns:
                                    cur_count += 1
                                else:
                                    cur_count = counts[col]

                            if (self.dtype is not None
                                    and is_dict_like(self.dtype)
                                    and self.dtype.get(old_col) is not None
                                    and self.dtype.get(col) is None):
                                self.dtype.update(
                                    {col: self.dtype.get(old_col)})
                        this_columns[i] = col
                        counts[col] = cur_count + 1
                elif have_mi_columns:

                    # if we have grabbed an extra line, but its not in our
                    # format so save in the buffer, and create an blank extra
                    # line for the rest of the parsing code
                    if hr == header[-1]:
                        lc = len(this_columns)
                        # error: Cannot determine type of 'index_col'
                        sic = self.index_col  # type: ignore[has-type]
                        ic = len(sic) if sic is not None else 0
                        unnamed_count = len(this_unnamed_cols)

                        # if wrong number of blanks or no index, not our format
                        if (lc != unnamed_count
                                and lc - ic > unnamed_count) or ic == 0:
                            clear_buffer = False
                            this_columns = [None] * lc
                            self.buf = [self.buf[-1]]

                columns.append(this_columns)
                unnamed_cols.update(
                    {this_columns[i]
                     for i in this_unnamed_cols})

                if len(columns) == 1:
                    num_original_columns = len(this_columns)

            if clear_buffer:
                self._clear_buffer()

            first_line: list[Scalar] | None
            if names is not None:
                # Read first row after header to check if data are longer
                try:
                    first_line = self._next_line()
                except StopIteration:
                    first_line = None

                len_first_data_row = 0 if first_line is None else len(
                    first_line)

                if len(names) > len(
                        columns[0]) and len(names) > len_first_data_row:
                    raise ValueError("Number of passed names did not match "
                                     "number of header fields in the file")
                if len(columns) > 1:
                    raise TypeError(
                        "Cannot pass names with multi-index columns")

                if self.usecols is not None:
                    # Set _use_cols. We don't store columns because they are
                    # overwritten.
                    self._handle_usecols(columns, names, num_original_columns)
                else:
                    num_original_columns = len(names)
                if self._col_indices is not None and len(names) != len(
                        self._col_indices):
                    columns = [[names[i] for i in sorted(self._col_indices)]]
                else:
                    columns = [names]
            else:
                columns = self._handle_usecols(columns, columns[0],
                                               num_original_columns)
        else:
            try:
                line = self._buffered_line()

            except StopIteration as err:
                if not names:
                    raise EmptyDataError(
                        "No columns to parse from file") from err

                line = names[:]

            # Store line, otherwise it is lost for guessing the index
            self._header_line = line
            ncols = len(line)
            num_original_columns = ncols

            if not names:
                if self.prefix:
                    columns = [[f"{self.prefix}{i}" for i in range(ncols)]]
                else:
                    columns = [list(range(ncols))]
                columns = self._handle_usecols(columns, columns[0],
                                               num_original_columns)
            else:
                if self.usecols is None or len(names) >= num_original_columns:
                    columns = self._handle_usecols([names], names,
                                                   num_original_columns)
                    num_original_columns = len(names)
                else:
                    if not callable(
                            self.usecols) and len(names) != len(self.usecols):
                        raise ValueError(
                            "Number of passed names did not match number of "
                            "header fields in the file")
                    # Ignore output but set used columns.
                    self._handle_usecols([names], names, ncols)
                    columns = [names]
                    num_original_columns = ncols

        return columns, num_original_columns, unnamed_cols
コード例 #9
0
ファイル: test_inference.py プロジェクト: josham/pandas
def test_is_dict_like_fails(ll):
    assert not inference.is_dict_like(ll)
コード例 #10
0
ファイル: test_inference.py プロジェクト: josham/pandas
def test_is_dict_like_passes(ll):
    assert inference.is_dict_like(ll)