def test_to_object_array_width(self): # see gh-13320 rows = [[1, 2, 3], [4, 5, 6]] expected = np.array(rows, dtype=object) out = lib.to_object_array(rows) tm.assert_numpy_array_equal(out, expected) expected = np.array(rows, dtype=object) out = lib.to_object_array(rows, min_width=1) tm.assert_numpy_array_equal(out, expected) expected = np.array([[1, 2, 3, None, None], [4, 5, 6, None, None]], dtype=object) out = lib.to_object_array(rows, min_width=5) tm.assert_numpy_array_equal(out, expected)
def _list_to_arrays(data, columns, coerce_float=False, dtype=None): if len(data) > 0 and isinstance(data[0], tuple): content = list(lib.to_object_array_tuples(data).T) else: # list of lists content = list(lib.to_object_array(data).T) return _convert_object_array(content, columns, dtype=dtype, coerce_float=coerce_float)
def _list_to_arrays(data: list[tuple | list]) -> np.ndarray: # Returned np.ndarray has ndim = 2 # Note: we already check len(data) > 0 before getting hre if isinstance(data[0], tuple): content = lib.to_object_array_tuples(data) else: # list of lists content = lib.to_object_array(data) return content
def getSpktSpkid(cellGids=[], timeRange=None, sim=None): """ Function to efficiently get a subset of spikes based on a timeRange and cellGids list Parameters ---------- cellGids : list A list of cells to include by global identifier (GID) **Default:** ``[]`` timeRange : [start, stop] A list of two floats specifying the time range of spikes to include **Default:** ``None`` includes the entire simulation time range sim : NetPyNE sim object **Default:** ``None`` uses the current NetPyNE sim object Returns ------- (selection, spkt, spkid) A tuple consisting of the subset in a Pandas dataframe, a list of spike times, and a list of spike GIDs """ if not sim: from .. import sim try: # Pandas 1.4.0 from pandas._libs import lib as pandaslib except: try: # Pandas 0.24 and later from pandas import _lib as pandaslib except: # Pandas 0.23 and earlier from pandas import lib as pandaslib df = pd.DataFrame(pandaslib.to_object_array( [sim.allSimData['spkt'], sim.allSimData['spkid']]).transpose(), columns=['spkt', 'spkid']) if timeRange: # binary search is faster than query min, max = [ int(df['spkt'].searchsorted(timeRange[i])) for i in range(2) ] else: min, max = 0, len(df) if len(cellGids) == 0: sel = df[min:max] else: sel = df[min:max].query('spkid in @cellGids') spktList = sel['spkt'].tolist() spkidList = sel['spkid'].tolist() return sel, spktList, spkidList
def _list_to_arrays( data: List[Scalar], columns: Union[Index, List], ) -> Tuple[List[Scalar], Union[Index, List[Axis]]]: # Note: we already check len(data) > 0 before getting hre if isinstance(data[0], tuple): content = lib.to_object_array_tuples(data) else: # list of lists content = lib.to_object_array(data) return content, columns
def _list_to_arrays(data, columns, coerce_float=False, dtype=None): if len(data) > 0 and isinstance(data[0], tuple): content = list(lib.to_object_array_tuples(data).T) else: # list of lists content = list(lib.to_object_array(data).T) # gh-26429 do not raise user-facing AssertionError try: result = _convert_object_array( content, columns, dtype=dtype, coerce_float=coerce_float ) except AssertionError as e: raise ValueError(e) from e return result
def _list_to_arrays( data: List[Scalar], columns: Union[Index, List], coerce_float: bool = False, dtype: Optional[DtypeObj] = None, ) -> Tuple[List[Scalar], Union[Index, List[Axis]]]: if len(data) > 0 and isinstance(data[0], tuple): content = list(lib.to_object_array_tuples(data).T) else: # list of lists content = list(lib.to_object_array(data).T) # gh-26429 do not raise user-facing AssertionError try: columns = _validate_or_indexify_columns(content, columns) result = _convert_object_array(content, dtype=dtype, coerce_float=coerce_float) except AssertionError as e: raise ValueError(e) from e return result, columns
def _rows_to_cols(self, content: list[list[Scalar]]) -> list[np.ndarray]: col_len = self.num_original_columns if self._implicit_index: col_len += len(self.index_col) max_len = max(len(row) for row in content) # Check that there are no rows with too many # elements in their row (rows with too few # elements are padded with NaN). # error: Non-overlapping identity check (left operand type: "List[int]", # right operand type: "Literal[False]") if (max_len > col_len and self.index_col is not False # type: ignore[comparison-overlap] and self.usecols is None): footers = self.skipfooter if self.skipfooter else 0 bad_lines = [] iter_content = enumerate(content) content_len = len(content) content = [] for (i, l) in iter_content: actual_len = len(l) if actual_len > col_len: if callable(self.on_bad_lines): new_l = self.on_bad_lines(l) if new_l is not None: content.append(new_l) elif (self.on_bad_lines == self.BadLineHandleMethod.ERROR or self.on_bad_lines == self.BadLineHandleMethod.WARN): row_num = self.pos - (content_len - i + footers) bad_lines.append((row_num, actual_len)) if self.on_bad_lines == self.BadLineHandleMethod.ERROR: break else: content.append(l) for row_num, actual_len in bad_lines: msg = (f"Expected {col_len} fields in line {row_num + 1}, saw " f"{actual_len}") if (self.delimiter and len(self.delimiter) > 1 and self.quoting != csv.QUOTE_NONE): # see gh-13374 reason = ("Error could possibly be due to quotes being " "ignored when a multi-char delimiter is used.") msg += ". " + reason self._alert_malformed(msg, row_num + 1) # see gh-13320 zipped_content = list( lib.to_object_array(content, min_width=col_len).T) if self.usecols: assert self._col_indices is not None col_indices = self._col_indices if self._implicit_index: zipped_content = [ a for i, a in enumerate(zipped_content) if (i < len(self.index_col) or i - len(self.index_col) in col_indices) ] else: zipped_content = [ a for i, a in enumerate(zipped_content) if i in col_indices ] return zipped_content
def _rows_to_cols(self, content): col_len = self.num_original_columns if self._implicit_index: col_len += len(self.index_col) max_len = max(len(row) for row in content) # Check that there are no rows with too many # elements in their row (rows with too few # elements are padded with NaN). if max_len > col_len and self.index_col is not False and self.usecols is None: footers = self.skipfooter if self.skipfooter else 0 bad_lines = [] iter_content = enumerate(content) content_len = len(content) content = [] for (i, l) in iter_content: actual_len = len(l) if actual_len > col_len: if self.error_bad_lines or self.warn_bad_lines: row_num = self.pos - (content_len - i + footers) bad_lines.append((row_num, actual_len)) if self.error_bad_lines: break else: content.append(l) for row_num, actual_len in bad_lines: msg = ( f"Expected {col_len} fields in line {row_num + 1}, saw " f"{actual_len}" ) if ( self.delimiter and len(self.delimiter) > 1 and self.quoting != csv.QUOTE_NONE ): # see gh-13374 reason = ( "Error could possibly be due to quotes being " "ignored when a multi-char delimiter is used." ) msg += ". " + reason self._alert_malformed(msg, row_num + 1) # see gh-13320 zipped_content = list(lib.to_object_array(content, min_width=col_len).T) if self.usecols: assert self._col_indices is not None col_indices = self._col_indices if self._implicit_index: zipped_content = [ a for i, a in enumerate(zipped_content) if ( i < len(self.index_col) or i - len(self.index_col) in col_indices ) ] else: zipped_content = [ a for i, a in enumerate(zipped_content) if i in col_indices ] return zipped_content