def get_result(self): if self.float_format is None: float_format = print_config.float_format if float_format is None: fmt_str = '%% .%dg' % print_config.precision float_format = lambda x: fmt_str % x else: float_format = self.float_format formatter = _stringify if self.formatter is None else self.formatter def _format(x): if self.na_rep is not None and lib.checknull(x): if x is None: return 'None' return self.na_rep else: # object dtype return '%s' % formatter(x) vals = self.values is_float = lib.map_infer(vals, com.is_float) & notnull(vals) leading_space = is_float.any() fmt_values = [] for i, v in enumerate(vals): if not is_float[i] and leading_space: fmt_values.append(' %s' % _format(v)) elif is_float[i]: fmt_values.append(float_format(v)) else: fmt_values.append(' %s' % _format(v)) return _make_fixed_width(fmt_values, self.justify)
def _dt_box_array(arr, offset=None, tz=None): if arr is None: return arr if not isinstance(arr, np.ndarray): return arr boxfunc = lambda x: Timestamp(x, offset=offset, tz=tz) return lib.map_infer(arr, boxfunc)
def auto_map(arr, f, otherargs, n_results=1, required='all'): if all(np.isscalar(a) for a in otherargs): res = lib.map_infer(arr, lambda v: f(v, *otherargs)) return Series(res, index=arr.index, copy=False) n_otherargs = len(otherargs) if required == 'all': required = list(range(n_otherargs)) res = map_iter_args(arr, f, azip(*otherargs), n_otherargs, required, n_results) res = [Series(col, index=arr.index, copy=False) for col in res] if n_results == 1: return res[0] return res
def _format_strings(self, use_unicode=False): if self.float_format is None: float_format = print_config.float_format if float_format is None: fmt_str = "%% .%dg" % print_config.precision float_format = lambda x: fmt_str % x else: float_format = self.float_format if use_unicode: formatter = _stringify if self.formatter is None else self.formatter else: formatter = str if self.formatter is None else self.formatter def _format(x): if self.na_rep is not None and lib.checknull(x): if x is None: return "None" return self.na_rep else: # object dtype return "%s" % formatter(x) vals = self.values is_float = lib.map_infer(vals, com.is_float) & notnull(vals) leading_space = is_float.any() fmt_values = [] for i, v in enumerate(vals): if not is_float[i] and leading_space: fmt_values.append(" %s" % _format(v)) elif is_float[i]: fmt_values.append(float_format(v)) else: fmt_values.append(" %s" % _format(v)) return fmt_values
def get_chunk(self, rows=None): if rows is not None and self.skip_footer: raise ValueError('skip_footer not supported for iteration') try: content = self._get_lines(rows) except StopIteration: if self._first_chunk: content = [] else: raise # done with first read, next time raise StopIteration self._first_chunk = False if len(content) == 0: # pragma: no cover if self.index_col is not None: if np.isscalar(self.index_col): index = Index([], name=self.index_name) else: index = MultiIndex.from_arrays([[]] * len(self.index_col), names=self.index_name) else: index = Index([]) return DataFrame(index=index, columns=self.columns) zipped_content = list(lib.to_object_array(content).T) # no index column specified, so infer that's what is wanted if self.index_col is not None: if np.isscalar(self.index_col): index = zipped_content.pop(self.index_col) else: # given a list of index index = [] for idx in self.index_col: index.append(zipped_content[idx]) # remove index items from content and columns, don't pop in loop for i in reversed(sorted(self.index_col)): zipped_content.pop(i) if np.isscalar(self.index_col): if self.parse_dates: index = lib.try_parse_dates(index, parser=self.date_parser) index, na_count = _convert_types(index, self.na_values) index = Index(index, name=self.index_name) if self.verbose and na_count: print 'Found %d NA values in the index' % na_count else: arrays = [] for arr in index: if self.parse_dates: arr = lib.try_parse_dates(arr, parser=self.date_parser) arr, _ = _convert_types(arr, self.na_values) arrays.append(arr) index = MultiIndex.from_arrays(arrays, names=self.index_name) else: index = Index(np.arange(len(content))) if not index._verify_integrity(): dups = index.get_duplicates() idx_str = 'Index' if not self.implicit_idx else 'Implicit index' err_msg = ('%s (columns %s) have duplicate values %s' % (idx_str, self.index_col, str(dups))) raise Exception(err_msg) if len(self.columns) != len(zipped_content): raise Exception('wrong number of columns') data = dict((k, v) for k, v in izip(self.columns, zipped_content)) # apply converters for col, f in self.converters.iteritems(): if isinstance(col, int) and col not in self.columns: col = self.columns[col] data[col] = lib.map_infer(data[col], f) data = _convert_to_ndarrays(data, self.na_values, self.verbose) return DataFrame(data=data, columns=self.columns, index=index)
def _have_unicode(self): mask = lib.map_infer(self.values, lambda x: isinstance(x, unicode)) return mask.any()
def get_chunk(self, rows=None): if rows is not None and self.skip_footer: raise ValueError("skip_footer not supported for iteration") try: content = self._get_lines(rows) except StopIteration: if self._first_chunk: content = [] else: raise # done with first read, next time raise StopIteration self._first_chunk = False if len(content) == 0: # pragma: no cover if self.index_col is not None: if np.isscalar(self.index_col): index = Index([], name=self.index_name) else: index = MultiIndex.from_arrays([[]] * len(self.index_col), names=self.index_name) else: index = Index([]) return DataFrame(index=index, columns=self.columns) zipped_content = list(lib.to_object_array(content).T) if not self._has_complex_date_col and self.index_col is not None: index = self._get_index(zipped_content) else: index = Index(np.arange(len(content))) col_len, zip_len = len(self.columns), len(zipped_content) if col_len != zip_len: row_num = -1 for (i, l) in enumerate(content): if len(l) != col_len: break footers = 0 if self.skip_footer: footers = self.skip_footer row_num = self.pos - (len(content) - i + footers) msg = "Expecting %d columns, got %d in row %d" % (col_len, zip_len, row_num) raise ValueError(msg) data = dict((k, v) for k, v in izip(self.columns, zipped_content)) # apply converters for col, f in self.converters.iteritems(): if isinstance(col, int) and col not in self.columns: col = self.columns[col] data[col] = lib.map_infer(data[col], f) columns = self.columns if self.parse_dates is not None: data, columns = self._process_date_conversion(data) data = _convert_to_ndarrays(data, self.na_values, self.verbose) df = DataFrame(data=data, columns=columns, index=index) if self._has_complex_date_col and self.index_col is not None: if not self._name_processed: self.index_name = self._get_index_name() self._name_processed = True data = dict(((k, v) for k, v in df.iteritems())) columns = list(columns) index = self._get_index(data, col_order=columns, parse_dates=False) data = dict(((k, v.values) for k, v in data.iteritems())) df = DataFrame(data=data, columns=columns, index=index) if self.squeeze and len(df.columns) == 1: return df[df.columns[0]] return df
def get_chunk(self, rows=None): if rows is not None and self.skip_footer: raise ValueError('skip_footer not supported for iteration') try: content = self._get_lines(rows) except StopIteration: if self._first_chunk: content = [] else: raise # done with first read, next time raise StopIteration self._first_chunk = False if len(content) == 0: # pragma: no cover if self.index_col is not None: if np.isscalar(self.index_col): index = Index([], name=self.index_name) else: index = MultiIndex.from_arrays([[]] * len(self.index_col), names=self.index_name) else: index = Index([]) return DataFrame(index=index, columns=self.columns) zipped_content = list(lib.to_object_array(content).T) if self.index_col is not None: if np.isscalar(self.index_col): index = zipped_content.pop(self.index_col) else: # given a list of index index = [] for idx in self.index_col: index.append(zipped_content[idx]) # remove index items from content and columns, don't pop in # loop for i in reversed(sorted(self.index_col)): zipped_content.pop(i) if np.isscalar(self.index_col): if self._should_parse_dates(self.index_col): index = self._conv_date(index) index, na_count = _convert_types(index, self.na_values) index = Index(index, name=self.index_name) if self.verbose and na_count: print 'Found %d NA values in the index' % na_count else: arrays = [] for i, arr in enumerate(index): if self._should_parse_dates(self.index_col[i]): arr = self._conv_date(arr) arr, _ = _convert_types(arr, self.na_values) arrays.append(arr) index = MultiIndex.from_arrays(arrays, names=self.index_name) else: index = Index(np.arange(len(content))) # if not index.is_unique: # dups = index.get_duplicates() # idx_str = 'Index' if not self._implicit_index else 'Implicit index' # err_msg = ('%s (columns %s) have duplicate values %s' # % (idx_str, self.index_col, str(dups))) # raise Exception(err_msg) col_len, zip_len = len(self.columns), len(zipped_content) if col_len != zip_len: row_num = -1 for (i, l) in enumerate(content): if len(l) != col_len: break footers = 0 if self.skip_footer: footers = self.skip_footer row_num = self.pos - (len(content) - i + footers) msg = ('Expecting %d columns, got %d in row %d' % (col_len, zip_len, row_num)) raise ValueError(msg) data = dict((k, v) for k, v in izip(self.columns, zipped_content)) # apply converters for col, f in self.converters.iteritems(): if isinstance(col, int) and col not in self.columns: col = self.columns[col] data[col] = lib.map_infer(data[col], f) columns = self.columns if self.parse_dates is not None: data, columns = self._process_date_conversion(data) data = _convert_to_ndarrays(data, self.na_values, self.verbose) return DataFrame(data=data, columns=columns, index=index)