def test_is_dict_like(): passes = [{}, {'A': 1}, Series([1])] fails = ['1', 1, [1, 2], (1, 2), range(2), Index([1])] for p in passes: assert inference.is_dict_like(p) for f in fails: assert not inference.is_dict_like(f)
def test_is_dict_like(): passes = [{}, {'A': 1}, Series([1])] fails = ['1', 1, [1, 2], (1, 2), range(2), Index([1])] for p in passes: assert inference.is_dict_like(p) for f in fails: assert not inference.is_dict_like(f)
def test_is_dict_like_duck_type(has_keys, has_getitem, has_contains): class DictLike: def __init__(self, d): self.d = d if has_keys: def keys(self): return self.d.keys() if has_getitem: def __getitem__(self, key): return self.d.__getitem__(key) if has_contains: def __contains__(self, key): return self.d.__contains__(key) d = DictLike({1: 2}) result = inference.is_dict_like(d) expected = has_keys and has_getitem and has_contains assert result is expected
def __init__(self, data): if isinstance(data, self.__class__): self.data = data.data elif isinstance(data, np.ndarray): if not data.ndim == 1: raise ValueError( "'data' should be a 1-dimensional array of Stairs objects." ) self.data = data elif is_dict_like(data): self.data = np.array([data[k] for k in data.keys()]) elif isinstance(data, Stairs) or is_list_like(data): self.data = np.array(data, ndmin=1) else: raise TypeError("'data' should be array of Stairs objects.")
def test_is_dict_like_duck_type(has_keys, has_getitem, has_contains): class DictLike(object): def __init__(self, d): self.d = d if has_keys: def keys(self): return self.d.keys() if has_getitem: def __getitem__(self, key): return self.d.__getitem__(key) if has_contains: def __contains__(self, key): return self.d.__contains__(key) d = DictLike({1: 2}) result = inference.is_dict_like(d) expected = has_keys and has_getitem and has_contains assert result is expected
def test_is_dict_like_fails(ll): assert not inference.is_dict_like(ll)
def test_is_dict_like_passes(ll): assert inference.is_dict_like(ll)
def _infer_columns( self, ) -> tuple[list[list[Scalar | None]], int, set[Scalar | None]]: names = self.names num_original_columns = 0 clear_buffer = True unnamed_cols: set[Scalar | None] = set() self._header_line = None if self.header is not None: header = self.header if isinstance(header, (list, tuple, np.ndarray)): have_mi_columns = len(header) > 1 # we have a mi columns, so read an extra line if have_mi_columns: header = list(header) + [header[-1] + 1] else: have_mi_columns = False header = [header] columns: list[list[Scalar | None]] = [] for level, hr in enumerate(header): try: line = self._buffered_line() while self.line_pos <= hr: line = self._next_line() except StopIteration as err: if self.line_pos < hr: raise ValueError( f"Passed header={hr} but only {self.line_pos + 1} lines in " "file") from err # We have an empty file, so check # if columns are provided. That will # serve as the 'line' for parsing if have_mi_columns and hr > 0: if clear_buffer: self._clear_buffer() columns.append([None] * len(columns[-1])) return columns, num_original_columns, unnamed_cols if not self.names: raise EmptyDataError( "No columns to parse from file") from err line = self.names[:] this_columns: list[Scalar | None] = [] this_unnamed_cols = [] for i, c in enumerate(line): if c == "": if have_mi_columns: col_name = f"Unnamed: {i}_level_{level}" else: col_name = f"Unnamed: {i}" this_unnamed_cols.append(i) this_columns.append(col_name) else: this_columns.append(c) if not have_mi_columns and self.mangle_dupe_cols: counts: DefaultDict = defaultdict(int) # Ensure that regular columns are used before unnamed ones # to keep given names and mangle unnamed columns col_loop_order = [ i for i in range(len(this_columns)) if i not in this_unnamed_cols ] + this_unnamed_cols for i in col_loop_order: col = this_columns[i] old_col = col cur_count = counts[col] if cur_count > 0: while cur_count > 0: counts[old_col] = cur_count + 1 col = f"{old_col}.{cur_count}" if col in this_columns: cur_count += 1 else: cur_count = counts[col] if (self.dtype is not None and is_dict_like(self.dtype) and self.dtype.get(old_col) is not None and self.dtype.get(col) is None): self.dtype.update( {col: self.dtype.get(old_col)}) this_columns[i] = col counts[col] = cur_count + 1 elif have_mi_columns: # if we have grabbed an extra line, but its not in our # format so save in the buffer, and create an blank extra # line for the rest of the parsing code if hr == header[-1]: lc = len(this_columns) # error: Cannot determine type of 'index_col' sic = self.index_col # type: ignore[has-type] ic = len(sic) if sic is not None else 0 unnamed_count = len(this_unnamed_cols) # if wrong number of blanks or no index, not our format if (lc != unnamed_count and lc - ic > unnamed_count) or ic == 0: clear_buffer = False this_columns = [None] * lc self.buf = [self.buf[-1]] columns.append(this_columns) unnamed_cols.update( {this_columns[i] for i in this_unnamed_cols}) if len(columns) == 1: num_original_columns = len(this_columns) if clear_buffer: self._clear_buffer() first_line: list[Scalar] | None if names is not None: # Read first row after header to check if data are longer try: first_line = self._next_line() except StopIteration: first_line = None len_first_data_row = 0 if first_line is None else len( first_line) if len(names) > len( columns[0]) and len(names) > len_first_data_row: raise ValueError("Number of passed names did not match " "number of header fields in the file") if len(columns) > 1: raise TypeError( "Cannot pass names with multi-index columns") if self.usecols is not None: # Set _use_cols. We don't store columns because they are # overwritten. self._handle_usecols(columns, names, num_original_columns) else: num_original_columns = len(names) if self._col_indices is not None and len(names) != len( self._col_indices): columns = [[names[i] for i in sorted(self._col_indices)]] else: columns = [names] else: columns = self._handle_usecols(columns, columns[0], num_original_columns) else: try: line = self._buffered_line() except StopIteration as err: if not names: raise EmptyDataError( "No columns to parse from file") from err line = names[:] # Store line, otherwise it is lost for guessing the index self._header_line = line ncols = len(line) num_original_columns = ncols if not names: if self.prefix: columns = [[f"{self.prefix}{i}" for i in range(ncols)]] else: columns = [list(range(ncols))] columns = self._handle_usecols(columns, columns[0], num_original_columns) else: if self.usecols is None or len(names) >= num_original_columns: columns = self._handle_usecols([names], names, num_original_columns) num_original_columns = len(names) else: if not callable( self.usecols) and len(names) != len(self.usecols): raise ValueError( "Number of passed names did not match number of " "header fields in the file") # Ignore output but set used columns. self._handle_usecols([names], names, ncols) columns = [names] num_original_columns = ncols return columns, num_original_columns, unnamed_cols
def test_is_dict_like_fails(ll): assert not inference.is_dict_like(ll)
def test_is_dict_like_passes(ll): assert inference.is_dict_like(ll)