def _create_table_statement(self): "Return a CREATE TABLE statement to suit the contents of a DataFrame." columns = list(map(str, self.frame.columns)) pat = re.compile('\s+') if any(map(pat.search, columns)): warnings.warn(_SAFE_NAMES_WARNING) column_types = [self._sql_type_name(typ) for typ in self.frame.dtypes] if self.index is not None: for i, idx_label in enumerate(self.index[::-1]): columns.insert(0, idx_label) column_types.insert(0, self._sql_type_name(self.frame.index.get_level_values(i).dtype)) flv = self.pd_sql.flavor br_l = _SQL_SYMB[flv]['br_l'] # left val quote char br_r = _SQL_SYMB[flv]['br_r'] # right val quote char col_template = br_l + '%s' + br_r + ' %s' columns = ',\n '.join(col_template % x for x in zip(columns, column_types)) template = """CREATE TABLE %(name)s ( %(columns)s )""" create_statement = template % {'name': self.name, 'columns': columns} return create_statement
def _get_join_indexers(left_keys, right_keys, sort=False, how='inner'): """ Parameters ---------- Returns ------- """ from functools import partial assert len(left_keys) == len(right_keys), \ 'left_key and right_keys must be the same length' # bind `sort` arg. of _factorize_keys fkeys = partial(_factorize_keys, sort=sort) # get left & right join labels and num. of levels at each location llab, rlab, shape = map(list, zip(* map(fkeys, left_keys, right_keys))) # get flat i8 keys from label lists lkey, rkey = _get_join_keys(llab, rlab, shape, sort) # factorize keys to a dense i8 space # `count` is the num. of unique keys # set(lkey) | set(rkey) == range(count) lkey, rkey, count = fkeys(lkey, rkey) # preserve left frame order if how == 'left' and sort == False kwargs = {'sort': sort} if how == 'left' else {} join_func = _join_functions[how] return join_func(lkey, rkey, count, **kwargs)
def _get_multiindex_indexer(join_keys, index, sort): from functools import partial # bind `sort` argument fkeys = partial(_factorize_keys, sort=sort) # left & right join labels and num. of levels at each location rlab, llab, shape = map(list, zip(* map(fkeys, index.levels, join_keys))) if sort: rlab = list(map(np.take, rlab, index.labels)) else: i8copy = lambda a: a.astype('i8', subok=False, copy=True) rlab = list(map(i8copy, index.labels)) # fix right labels if there were any nulls for i in range(len(join_keys)): mask = index.labels[i] == -1 if mask.any(): # check if there already was any nulls at this location # if there was, it is factorized to `shape[i] - 1` a = join_keys[i][llab[i] == shape[i] - 1] if a.size == 0 or not a[0] != a[0]: shape[i] += 1 rlab[i][mask] = shape[i] - 1 # get flat i8 join keys lkey, rkey = _get_join_keys(llab, rlab, shape, sort) # factorize keys to a dense i8 space lkey, rkey, count = fkeys(lkey, rkey) return _algos.left_outer_join(lkey, rkey, count, sort=sort)
def _create_table_statement(self): from sqlalchemy import Table, Column safe_columns = map(_safe_col_name, self.frame.dtypes.index) column_types = map(self._sqlalchemy_type, self.frame.dtypes) columns = [Column(name, typ) for name, typ in zip(safe_columns, column_types)] if self.index is not None: columns.insert(0, Column(self.index, self._sqlalchemy_type(self.frame.index), index=True)) return Table(self.name, self.pd_sql.meta, *columns)
def _get_column_names_and_types(self, dtype_mapper): column_names_and_types = [] if self.index is not None: for i, idx_label in enumerate(self.index): idx_type = dtype_mapper( self.frame.index.get_level_values(i).dtype) column_names_and_types.append((idx_label, idx_type)) column_names_and_types += zip( list(map(str, self.frame.columns)), map(dtype_mapper, self.frame.dtypes) ) return column_names_and_types
def _create_table_statement(self): from sqlalchemy import Table, Column columns = list(map(str, self.frame.columns)) column_types = map(self._sqlalchemy_type, self.frame.dtypes) columns = [Column(name, typ) for name, typ in zip(columns, column_types)] if self.index is not None: for i, idx_label in enumerate(self.index[::-1]): idx_type = self._sqlalchemy_type(self.frame.index.get_level_values(i)) columns.insert(0, Column(idx_label, idx_type, index=True)) return Table(self.name, self.pd_sql.meta, *columns)
def __iter__(self): """ Return an iterator of the values. These are each a scalar type, which is a Python scalar (for str, int, float) or a pandas scalar (for Timestamp/Timedelta/Interval/Period) """ # We are explicity making element iterators. if is_datetimelike(self._values): return map(com.maybe_box_datetimelike, self._values) elif is_extension_array_dtype(self._values): return iter(self._values) else: return map(self._values.item, range(self._values.size))
def _create_table_statement(self): "Return a CREATE TABLE statement to suit the contents of a DataFrame." column_names_and_types = \ self._get_column_names_and_types(self._sql_type_name) pat = re.compile('\s+') column_names = [col_name for col_name, _ in column_names_and_types] if any(map(pat.search, column_names)): warnings.warn(_SAFE_NAMES_WARNING) flv = self.pd_sql.flavor br_l = _SQL_SYMB[flv]['br_l'] # left val quote char br_r = _SQL_SYMB[flv]['br_r'] # right val quote char col_template = br_l + '%s' + br_r + ' %s' columns = ',\n '.join(col_template % x for x in column_names_and_types) template = """CREATE TABLE %(name)s ( %(columns)s )""" create_statement = template % {'name': self.name, 'columns': columns} return create_statement
def is_instance_factory(_type): """ Parameters ---------- `_type` - the type to be checked against Returns ------- validator - a function of a single argument x , which returns the True if x is an instance of `_type` """ if isinstance(_type, (tuple, list)): _type = tuple(_type) from pandas.core.common import pprint_thing type_repr = "|".join(map(pprint_thing, _type)) else: type_repr = "'%s'" % _type def inner(x): if not isinstance(x, _type): raise ValueError("Value must be an instance of %s" % type_repr) return inner
def __init__(self, io, **kwds): import xlrd # throw an ImportError if we need to ver = tuple(map(int, xlrd.__VERSION__.split(".")[:2])) if ver < (0, 9): # pragma: no cover raise ImportError("pandas requires xlrd >= 0.9.0 for excel " "support, current version " + xlrd.__VERSION__) self.io = io engine = kwds.pop('engine', None) if engine is not None and engine != 'xlrd': raise ValueError("Unknown engine: %s" % engine) if isinstance(io, compat.string_types): self.book = xlrd.open_workbook(io) elif engine == "xlrd" and isinstance(io, xlrd.Book): self.book = io elif hasattr(io, "read"): data = io.read() self.book = xlrd.open_workbook(file_contents=data) else: raise ValueError('Must explicitly set engine if not passing in' ' buffer or path for io.')
def is_instance_factory(_type): """ Parameters ---------- `_type` - the type to be checked against Returns ------- validator - a function of a single argument x , which raises ValueError if x is not an instance of `_type` """ if isinstance(_type, (tuple, list)): _type = tuple(_type) from pandas.io.formats.printing import pprint_thing type_repr = "|".join(map(pprint_thing, _type)) else: type_repr = "'{typ}'".format(typ=_type) def inner(x): if not isinstance(x, _type): msg = "Value must be an instance of {type_repr}" raise ValueError(msg.format(type_repr=type_repr)) return inner
def _denorm(queries,thing): fields = [] results = [] for q in queries: #print q r = Ql(q,thing) #print "-- result: ", r if not r: r = [default] if isinstance(r[0], type({})): fields.append(sorted(r[0].keys())) # dicty answers else: fields.append([q]) # stringy answer results.append(r) #print results #print fields flist = list(flatten(*map(iter,fields))) prod = itertools.product(*results) for p in prod: U = dict() for (ii,thing) in enumerate(p): #print ii,thing if isinstance(thing, type({})): U.update(thing) else: U[fields[ii][0]] = thing yield U
def get_schema(frame, name, flavor, keys=None): "Return a CREATE TABLE statement to suit the contents of a DataFrame." lookup_type = lambda dtype: get_sqltype(dtype.type, flavor) # Replace spaces in DataFrame column names with _. # Also force lowercase, postgresql can be case sensitive safe_columns = [s.replace(' ', '_').strip().lower() for s in frame.dtypes.index] column_types = lzip(safe_columns, map(lookup_type, frame.dtypes)) if flavor == 'sqlite': columns = ',\n '.join('[%s] %s' % x for x in column_types) elif flavor == 'postgresql': columns = ',\n '.join('"%s" %s' % x for x in column_types) else: columns = ',\n '.join('`%s` %s' % x for x in column_types) keystr = '' if keys is not None: if isinstance(keys, compat.string_types): keys = (keys,) keystr = ', PRIMARY KEY (%s)' % ','.join(keys) template = """CREATE TABLE %(name)s ( %(columns)s %(keystr)s );""" create_statement = template % {'name': name, 'columns': columns, 'keystr': keystr} return create_statement
def insert_data(self): if self.index is not None: temp = self.frame.copy() temp.index.names = self.index try: temp.reset_index(inplace=True) except ValueError as err: raise ValueError( "duplicate name in index/columns: {0}".format(err)) else: temp = self.frame column_names = list(map(str, temp.columns)) ncols = len(column_names) data_list = [None] * ncols blocks = temp._data.blocks for i in range(len(blocks)): b = blocks[i] if b.is_datetime: # convert to microsecond resolution so this yields datetime.datetime d = b.values.astype('M8[us]').astype(object) else: d = np.array(b.values, dtype=object) # replace NaN with None if b._can_hold_na: mask = isnull(d) d[mask] = None for col_loc, col in zip(b.mgr_locs, d): data_list[col_loc] = col return column_names, data_list
def _skip_if_no_xlrd(): try: import xlrd ver = tuple(map(int, xlrd.__VERSION__.split(".")[:2])) if ver < (0, 9): raise nose.SkipTest('xlrd < 0.9, skipping') except ImportError: raise nose.SkipTest('xlrd not installed, skipping')
def __unicode__(self): output = self.__class__.__name__ output += u('(') prefix = '' if compat.PY3 else 'u' mapper = "{0}'{{0}}'".format(prefix) output += '[{0}]'.format(', '.join(map(mapper.format, self))) output += ", freq='{0}'".format(self.freq) output += ')' return output
def test_map(self): func = lambda x, y, z: x + y + z lst = [builtins.range(10), builtins.range(10), builtins.range(10)] actual1 = map(func, *lst) actual2 = lmap(func, *lst) actual = [actual1, actual2], expected = list(builtins.map(func, *lst)), lengths = 10, self.check_result(actual, expected, lengths)
def __unicode__(self): output = self.__class__.__name__ output += u("(") prefix = "" if compat.PY3 else "u" mapper = "{0}'{{0}}'".format(prefix) output += "[{0}]".format(", ".join(map(mapper.format, self))) output += ", freq='{0}'".format(self.freq) output += ")" return output
def assert_framelist_equal(list1, list2, *args, **kwargs): assert len(list1) == len(list2), ( "lists are not of equal size " "len(list1) == {0}, " "len(list2) == {1}".format(len(list1), len(list2)) ) msg = "not all list elements are DataFrames" both_frames = all(map(lambda x, y: isinstance(x, DataFrame) and isinstance(y, DataFrame), list1, list2)) assert both_frames, msg for frame_i, frame_j in zip(list1, list2): tm.assert_frame_equal(frame_i, frame_j, *args, **kwargs) assert not frame_i.empty, "frames are both empty"
def assert_framelist_equal(list1, list2, *args, **kwargs): assert len(list1) == len(list2), ('lists are not of equal size ' 'len(list1) == {0}, ' 'len(list2) == {1}'.format(len(list1), len(list2))) assert all(map(lambda x, y: isframe(x) and isframe(y), list1, list2)), \ 'not all list elements are DataFrames' for frame_i, frame_j in zip(list1, list2): assert_frame_equal(frame_i, frame_j, *args, **kwargs) assert not frame_i.empty, 'frames are both empty'
def _concat_categorical(to_concat, axis=0): """Concatenate an object/categorical array of arrays, each of which is a single dtype Parameters ---------- to_concat : array of arrays axis : int Axis to provide concatenation in the current implementation this is always 0, e.g. we only have 1D categoricals Returns ------- Categorical A single array, preserving the combined dtypes """ from pandas.core.categorical import Categorical def convert_categorical(x): # coerce to object dtype if com.is_categorical_dtype(x.dtype): return x.get_values() return x.ravel() if get_dtype_kinds(to_concat) - set(['object', 'category']): # convert to object type and perform a regular concat return _concat_compat([np.array(x, copy=False, dtype=object) for x in to_concat], axis=0) # we could have object blocks and categoricals here # if we only have a single categoricals then combine everything # else its a non-compat categorical categoricals = [x for x in to_concat if com.is_categorical_dtype(x.dtype)] # validate the categories categories = categoricals[0] rawcats = categories.categories for x in categoricals[1:]: if not categories.is_dtype_equal(x): raise ValueError("incompatible categories in categorical concat") # we've already checked that all categoricals are the same, so if their # length is equal to the input then we have all the same categories if len(categoricals) == len(to_concat): # concating numeric types is much faster than concating object types # and fastpath takes a shorter path through the constructor return Categorical(np.concatenate([x.codes for x in to_concat], axis=0), rawcats, ordered=categoricals[0].ordered, fastpath=True) else: concatted = np.concatenate(list(map(convert_categorical, to_concat)), axis=0) return Categorical(concatted, rawcats)
def insert(self): ins = self.insert_statement() data_list = [] temp = self.insert_data() keys = list(map(str, temp.columns)) for t in temp.itertuples(): data = dict((k, self.maybe_asscalar(v)) for k, v in zip(keys, t[1:])) data_list.append(data) self.pd_sql.execute(ins, data_list)
def _check_columns(cols): if not len(cols): raise AssertionError("There must be at least 1 column") head, tail = cols[0], cols[1:] N = len(head) for i, n in enumerate(map(len, tail)): if n != N: raise AssertionError('All columns must have the same length: {0}; ' 'column {1} has length {2}'.format(N, i, n)) return N
def insert_statement(self): names = list(map(str, self.frame.columns)) flv = self.pd_sql.flavor br_l = _SQL_SYMB[flv]["br_l"] # left val quote char br_r = _SQL_SYMB[flv]["br_r"] # right val quote char wld = _SQL_SYMB[flv]["wld"] # wildcard char if self.index is not None: [names.insert(0, idx) for idx in self.index[::-1]] bracketed_names = [br_l + column + br_r for column in names] col_names = ",".join(bracketed_names) wildcards = ",".join([wld] * len(names)) insert_statement = "INSERT INTO %s (%s) VALUES (%s)" % (self.name, col_names, wildcards) return insert_statement
def _convert_to_stop(cls, stop_seq): """ Convert ``stop_seq`` to a list of openpyxl v2 Color objects, suitable for initializing the ``GradientFill`` ``stop`` parameter. Parameters ---------- stop_seq : iterable An iterable that yields objects suitable for consumption by ``_convert_to_color``. Returns ------- stop : list of openpyxl.styles.Color """ return map(cls._convert_to_color, stop_seq)
def _check_ne_builtin_clash(expr): """Attempt to prevent foot-shooting in a helpful way. Parameters ---------- terms : Term Terms can contain """ names = expr.names overlap = names & _ne_builtins if overlap: s = ', '.join(map(repr, overlap)) raise NumExprClobberingError('Variables in expression "%s" ' 'overlap with builtins: (%s)' % (expr, s))
def get_hits(defname,files=()): cs=set() for f in files: try: r=sh.git('blame', '-L', '/def\s*{start}/,/def/'.format(start=defname),f,_tty_out=False) except sh.ErrorReturnCode_128: logger.debug("no matches in %s" % f) continue lines = r.strip().splitlines()[:-1] # remove comment lines lines = [x for x in lines if not re.search("^\w+\s*\(.+\)\s*#",x)] hits = set(map(lambda x: x.split(" ")[0],lines)) cs.update(set([Hit(commit=c,path=f) for c in hits])) return cs
def __init__(self, path_or_buf, **kwds): import xlrd # throw an ImportError if we need to ver = tuple(map(int, xlrd.__VERSION__.split(".")[:2])) if ver < (0, 9): # pragma: no cover raise ImportError("pandas requires xlrd >= 0.9.0 for excel " "support, current version " + xlrd.__VERSION__) self.path_or_buf = path_or_buf self.tmpfile = None if isinstance(path_or_buf, compat.string_types): self.book = xlrd.open_workbook(path_or_buf) else: data = path_or_buf.read() self.book = xlrd.open_workbook(file_contents=data)
def _get_schema_legacy(frame, name, flavor, keys=None): """Old function from 0.13.1. To keep backwards compatibility. When mysql legacy support is dropped, it should be possible to remove this code """ def get_sqltype(dtype, flavor): pytype = dtype.type pytype_name = "text" if issubclass(pytype, np.floating): pytype_name = "float" elif issubclass(pytype, np.integer): pytype_name = "int" elif issubclass(pytype, np.datetime64) or pytype is datetime: # Caution: np.datetime64 is also a subclass of np.number. pytype_name = "datetime" elif pytype is datetime.date: pytype_name = "date" elif issubclass(pytype, np.bool_): pytype_name = "bool" return _SQL_TYPES[pytype_name][flavor] lookup_type = lambda dtype: get_sqltype(dtype, flavor) column_types = lzip(frame.dtypes.index, map(lookup_type, frame.dtypes)) if flavor == 'sqlite': columns = ',\n '.join('[%s] %s' % x for x in column_types) else: columns = ',\n '.join('`%s` %s' % x for x in column_types) keystr = '' if keys is not None: if isinstance(keys, string_types): keys = (keys,) keystr = ', PRIMARY KEY (%s)' % ','.join(keys) template = """CREATE TABLE %(name)s ( %(columns)s %(keystr)s );""" create_statement = template % {'name': name, 'columns': columns, 'keystr': keystr} return create_statement
def __setitem__(self, key, value): shape = tuple(self.shape) if isinstance(value, self._constructor_sliced): value = value.reindex( **self._construct_axes_dict_for_slice(self._AXIS_ORDERS[1:])) mat = value.values elif isinstance(value, np.ndarray): if value.shape != shape[1:]: raise ValueError( 'shape of value must be {0}, shape of given object was ' '{1}'.format(shape[1:], tuple(map(int, value.shape)))) mat = np.asarray(value) elif np.isscalar(value): dtype, value = _infer_dtype_from_scalar(value) mat = np.empty(shape[1:], dtype=dtype) mat.fill(value) else: raise TypeError('Cannot set item of type: %s' % str(type(value))) mat = mat.reshape(tuple([1]) + shape[1:]) NDFrame._set_item(self, key, mat)
def __repr__(self): if not self: return '%s()' % self.__class__.__name__ items = ', '.join(map('%r: %r'.__mod__, self.most_common())) return '%s({%s})' % (self.__class__.__name__, items)
def write_result(self, buf): """ Render a DataFrame to a LaTeX tabular/longtable environment output. """ # string representation of the columns if len(self.frame.columns) == 0 or len(self.frame.index) == 0: info_line = (u('Empty {name}\nColumns: {col}\nIndex: {idx}') .format(name=type(self.frame).__name__, col=self.frame.columns, idx=self.frame.index)) strcols = [[info_line]] else: strcols = self.fmt._to_str_columns() def get_col_type(dtype): if issubclass(dtype.type, np.number): return 'r' else: return 'l' # reestablish the MultiIndex that has been joined by _to_str_column if self.fmt.index and isinstance(self.frame.index, MultiIndex): clevels = self.frame.columns.nlevels strcols.pop(0) name = any(self.frame.index.names) cname = any(self.frame.columns.names) lastcol = self.frame.index.nlevels - 1 previous_lev3 = None for i, lev in enumerate(self.frame.index.levels): lev2 = lev.format() blank = ' ' * len(lev2[0]) # display column names in last index-column if cname and i == lastcol: lev3 = [x if x else '{}' for x in self.frame.columns.names] else: lev3 = [blank] * clevels if name: lev3.append(lev.name) current_idx_val = None for level_idx in self.frame.index.labels[i]: if ((previous_lev3 is None or previous_lev3[len(lev3)].isspace()) and lev2[level_idx] == current_idx_val): # same index as above row and left index was the same lev3.append(blank) else: # different value than above or left index different lev3.append(lev2[level_idx]) current_idx_val = lev2[level_idx] strcols.insert(i, lev3) previous_lev3 = lev3 column_format = self.column_format if column_format is None: dtypes = self.frame.dtypes._values column_format = ''.join(map(get_col_type, dtypes)) if self.fmt.index: index_format = 'l' * self.frame.index.nlevels column_format = index_format + column_format elif not isinstance(column_format, compat.string_types): # pragma: no cover raise AssertionError('column_format must be str or unicode, ' 'not {typ}'.format(typ=type(column_format))) if not self.longtable: buf.write('\\begin{{tabular}}{{{fmt}}}\n' .format(fmt=column_format)) buf.write('\\toprule\n') else: buf.write('\\begin{{longtable}}{{{fmt}}}\n' .format(fmt=column_format)) buf.write('\\toprule\n') ilevels = self.frame.index.nlevels clevels = self.frame.columns.nlevels nlevels = clevels if any(self.frame.index.names): nlevels += 1 strrows = list(zip(*strcols)) self.clinebuf = [] for i, row in enumerate(strrows): if i == nlevels and self.fmt.header: buf.write('\\midrule\n') # End of header if self.longtable: buf.write('\\endhead\n') buf.write('\\midrule\n') buf.write('\\multicolumn{{{n}}}{{r}}{{{{Continued on next ' 'page}}}} \\\\\n'.format(n=len(row))) buf.write('\\midrule\n') buf.write('\\endfoot\n\n') buf.write('\\bottomrule\n') buf.write('\\endlastfoot\n') if self.fmt.kwds.get('escape', True): # escape backslashes first crow = [(x.replace('\\', '\\textbackslash').replace('_', '\\_') .replace('%', '\\%').replace('$', '\\$') .replace('#', '\\#').replace('{', '\\{') .replace('}', '\\}').replace('~', '\\textasciitilde') .replace('^', '\\textasciicircum').replace('&', '\\&') if (x and x != '{}') else '{}') for x in row] else: crow = [x if x else '{}' for x in row] if self.bold_rows and self.fmt.index: # bold row labels crow = ['\\textbf{{{x}}}'.format(x=x) if j < ilevels and x.strip() not in ['', '{}'] else x for j, x in enumerate(crow)] if i < clevels and self.fmt.header and self.multicolumn: # sum up columns to multicolumns crow = self._format_multicolumn(crow, ilevels) if (i >= nlevels and self.fmt.index and self.multirow and ilevels > 1): # sum up rows to multirows crow = self._format_multirow(crow, ilevels, i, strrows) buf.write(' & '.join(crow)) buf.write(' \\\\\n') if self.multirow and i < len(strrows) - 1: self._print_cline(buf, i, len(strcols)) if not self.longtable: buf.write('\\bottomrule\n') buf.write('\\end{tabular}\n') else: buf.write('\\end{longtable}\n')
def _raw_hex_id(obj): """Return the padded hexadecimal id of ``obj``.""" # interpret as a pointer since that's what really what id returns packed = struct.pack('@P', id(obj)) return ''.join(map(_replacer, packed))
def __unicode__(self): # unicode representation based upon iterating over self # (since, by definition, `PandasContainers` are iterable) prepr = '[%s]' % ','.join(map(com.pprint_thing, self)) return '%s(%s)' % (self.__class__.__name__, prepr)
def randu(n): choices = u("").join(map(unichr, lrange(1488, 1488 + 26))) choices += string.digits return ''.join([random.choice(choices) for _ in range(n)])
def test_string_factorize(self): # should this be optional? data = 'a\nb\na\nb\na' reader = TextReader(StringIO(data), header=None) result = reader.read() self.assertEqual(len(set(map(id, result[0]))), 2)
_is_str = _is_type(string_types) # partition all AST nodes _all_nodes = frozenset( filter(lambda x: isinstance(x, type) and issubclass(x, ast.AST), (getattr(ast, node) for node in dir(ast)))) def _filter_nodes(superclass, all_nodes=_all_nodes): """Filter out AST nodes that are subclasses of ``superclass``.""" node_names = (node.__name__ for node in all_nodes if issubclass(node, superclass)) return frozenset(node_names) _all_node_names = frozenset(map(lambda x: x.__name__, _all_nodes)) _mod_nodes = _filter_nodes(ast.mod) _stmt_nodes = _filter_nodes(ast.stmt) _expr_nodes = _filter_nodes(ast.expr) _expr_context_nodes = _filter_nodes(ast.expr_context) _slice_nodes = _filter_nodes(ast.slice) _boolop_nodes = _filter_nodes(ast.boolop) _operator_nodes = _filter_nodes(ast.operator) _unary_op_nodes = _filter_nodes(ast.unaryop) _cmp_op_nodes = _filter_nodes(ast.cmpop) _comprehension_nodes = _filter_nodes(ast.comprehension) _handler_nodes = _filter_nodes(ast.excepthandler) _arguments_nodes = _filter_nodes(ast.arguments) _keyword_nodes = _filter_nodes(ast.keyword) _alias_nodes = _filter_nodes(ast.alias)
def write_result(self, buf): """ Render a DataFrame to a LaTeX tabular/longtable environment output. """ # string representation of the columns if len(self.frame.columns) == 0 or len(self.frame.index) == 0: info_line = (u('Empty {name}\nColumns: {col}\nIndex: {idx}') .format(name=type(self.frame).__name__, col=self.frame.columns, idx=self.frame.index)) strcols = [[info_line]] else: strcols = self.fmt._to_str_columns() def get_col_type(dtype): if issubclass(dtype.type, np.number): return 'r' else: return 'l' # reestablish the MultiIndex that has been joined by _to_str_column if self.fmt.index and isinstance(self.frame.index, MultiIndex): out = self.frame.index.format( adjoin=False, sparsify=self.fmt.sparsify, names=self.fmt.has_index_names, na_rep=self.fmt.na_rep ) # index.format will sparsify repeated entries with empty strings # so pad these with some empty space def pad_empties(x): for pad in reversed(x): if pad: break return [x[0]] + [i if i else ' ' * len(pad) for i in x[1:]] out = (pad_empties(i) for i in out) # Add empty spaces for each column level clevels = self.frame.columns.nlevels out = [[' ' * len(i[-1])] * clevels + i for i in out] # Add the column names to the last index column cnames = self.frame.columns.names if any(cnames): new_names = [i if i else '{}' for i in cnames] out[self.frame.index.nlevels - 1][:clevels] = new_names # Get rid of old multiindex column and add new ones strcols = out + strcols[1:] column_format = self.column_format if column_format is None: dtypes = self.frame.dtypes._values column_format = ''.join(map(get_col_type, dtypes)) if self.fmt.index: index_format = 'l' * self.frame.index.nlevels column_format = index_format + column_format elif not isinstance(column_format, compat.string_types): # pragma: no cover raise AssertionError('column_format must be str or unicode, ' 'not {typ}'.format(typ=type(column_format))) if not self.longtable: buf.write('\\begin{{tabular}}{{{fmt}}}\n' .format(fmt=column_format)) buf.write('\\toprule\n') else: buf.write('\\begin{{longtable}}{{{fmt}}}\n' .format(fmt=column_format)) buf.write('\\toprule\n') ilevels = self.frame.index.nlevels clevels = self.frame.columns.nlevels nlevels = clevels if self.fmt.has_index_names and self.fmt.show_index_names: nlevels += 1 strrows = list(zip(*strcols)) self.clinebuf = [] for i, row in enumerate(strrows): if i == nlevels and self.fmt.header: buf.write('\\midrule\n') # End of header if self.longtable: buf.write('\\endhead\n') buf.write('\\midrule\n') buf.write('\\multicolumn{{{n}}}{{r}}{{{{Continued on next ' 'page}}}} \\\\\n'.format(n=len(row))) buf.write('\\midrule\n') buf.write('\\endfoot\n\n') buf.write('\\bottomrule\n') buf.write('\\endlastfoot\n') if self.fmt.kwds.get('escape', True): # escape backslashes first crow = [(x.replace('\\', '\\textbackslash ') .replace('_', '\\_') .replace('%', '\\%').replace('$', '\\$') .replace('#', '\\#').replace('{', '\\{') .replace('}', '\\}').replace('~', '\\textasciitilde ') .replace('^', '\\textasciicircum ') .replace('&', '\\&') if (x and x != '{}') else '{}') for x in row] else: crow = [x if x else '{}' for x in row] if self.bold_rows and self.fmt.index: # bold row labels crow = ['\\textbf{{{x}}}'.format(x=x) if j < ilevels and x.strip() not in ['', '{}'] else x for j, x in enumerate(crow)] if i < clevels and self.fmt.header and self.multicolumn: # sum up columns to multicolumns crow = self._format_multicolumn(crow, ilevels) if (i >= nlevels and self.fmt.index and self.multirow and ilevels > 1): # sum up rows to multirows crow = self._format_multirow(crow, ilevels, i, strrows) buf.write(' & '.join(crow)) buf.write(' \\\\\n') if self.multirow and i < len(strrows) - 1: self._print_cline(buf, i, len(strcols)) if not self.longtable: buf.write('\\bottomrule\n') buf.write('\\end{tabular}\n') else: buf.write('\\end{longtable}\n')