예제 #1
0
    def _create_table_statement(self):
        "Return a CREATE TABLE statement to suit the contents of a DataFrame."

        columns = list(map(str, self.frame.columns))
        pat = re.compile('\s+')
        if any(map(pat.search, columns)):
            warnings.warn(_SAFE_NAMES_WARNING)
        column_types = [self._sql_type_name(typ) for typ in self.frame.dtypes]

        if self.index is not None:
            for i, idx_label in enumerate(self.index[::-1]):
                columns.insert(0, idx_label)
                column_types.insert(0, self._sql_type_name(self.frame.index.get_level_values(i).dtype))

        flv = self.pd_sql.flavor

        br_l = _SQL_SYMB[flv]['br_l']  # left val quote char
        br_r = _SQL_SYMB[flv]['br_r']  # right val quote char

        col_template = br_l + '%s' + br_r + ' %s'

        columns = ',\n  '.join(col_template %
                               x for x in zip(columns, column_types))
        template = """CREATE TABLE %(name)s (
                      %(columns)s
                      )"""
        create_statement = template % {'name': self.name, 'columns': columns}
        return create_statement
예제 #2
0
파일: merge.py 프로젝트: clamus/pandas
def _get_join_indexers(left_keys, right_keys, sort=False, how='inner'):
    """

    Parameters
    ----------

    Returns
    -------

    """
    from functools import partial

    assert len(left_keys) == len(right_keys), \
        'left_key and right_keys must be the same length'

    # bind `sort` arg. of _factorize_keys
    fkeys = partial(_factorize_keys, sort=sort)

    # get left & right join labels and num. of levels at each location
    llab, rlab, shape = map(list, zip(* map(fkeys, left_keys, right_keys)))

    # get flat i8 keys from label lists
    lkey, rkey = _get_join_keys(llab, rlab, shape, sort)

    # factorize keys to a dense i8 space
    # `count` is the num. of unique keys
    # set(lkey) | set(rkey) == range(count)
    lkey, rkey, count = fkeys(lkey, rkey)

    # preserve left frame order if how == 'left' and sort == False
    kwargs = {'sort': sort} if how == 'left' else {}
    join_func = _join_functions[how]
    return join_func(lkey, rkey, count, **kwargs)
예제 #3
0
파일: merge.py 프로젝트: clamus/pandas
def _get_multiindex_indexer(join_keys, index, sort):
    from functools import partial

    # bind `sort` argument
    fkeys = partial(_factorize_keys, sort=sort)

    # left & right join labels and num. of levels at each location
    rlab, llab, shape = map(list, zip(* map(fkeys, index.levels, join_keys)))
    if sort:
        rlab = list(map(np.take, rlab, index.labels))
    else:
        i8copy = lambda a: a.astype('i8', subok=False, copy=True)
        rlab = list(map(i8copy, index.labels))

    # fix right labels if there were any nulls
    for i in range(len(join_keys)):
        mask = index.labels[i] == -1
        if mask.any():
            # check if there already was any nulls at this location
            # if there was, it is factorized to `shape[i] - 1`
            a = join_keys[i][llab[i] == shape[i] - 1]
            if a.size == 0 or not a[0] != a[0]:
                shape[i] += 1

            rlab[i][mask] = shape[i] - 1

    # get flat i8 join keys
    lkey, rkey = _get_join_keys(llab, rlab, shape, sort)

    # factorize keys to a dense i8 space
    lkey, rkey, count = fkeys(lkey, rkey)

    return _algos.left_outer_join(lkey, rkey, count, sort=sort)
예제 #4
0
파일: sql.py 프로젝트: hussainsultan/pandas
    def _create_table_statement(self):
        from sqlalchemy import Table, Column

        safe_columns = map(_safe_col_name, self.frame.dtypes.index)
        column_types = map(self._sqlalchemy_type, self.frame.dtypes)

        columns = [Column(name, typ) for name, typ in zip(safe_columns, column_types)]

        if self.index is not None:
            columns.insert(0, Column(self.index, self._sqlalchemy_type(self.frame.index), index=True))

        return Table(self.name, self.pd_sql.meta, *columns)
예제 #5
0
파일: sql.py 프로젝트: 5i7788/pandas
    def _get_column_names_and_types(self, dtype_mapper):
        column_names_and_types = []
        if self.index is not None:
            for i, idx_label in enumerate(self.index):
                idx_type = dtype_mapper(
                    self.frame.index.get_level_values(i).dtype)
                column_names_and_types.append((idx_label, idx_type))

        column_names_and_types += zip(
            list(map(str, self.frame.columns)),
            map(dtype_mapper, self.frame.dtypes)
            )
        return column_names_and_types
예제 #6
0
파일: sql.py 프로젝트: jakeshi/pandas
    def _create_table_statement(self):
        from sqlalchemy import Table, Column

        columns = list(map(str, self.frame.columns))
        column_types = map(self._sqlalchemy_type, self.frame.dtypes)

        columns = [Column(name, typ) for name, typ in zip(columns, column_types)]

        if self.index is not None:
            for i, idx_label in enumerate(self.index[::-1]):
                idx_type = self._sqlalchemy_type(self.frame.index.get_level_values(i))
                columns.insert(0, Column(idx_label, idx_type, index=True))

        return Table(self.name, self.pd_sql.meta, *columns)
예제 #7
0
파일: base.py 프로젝트: josham/pandas
    def __iter__(self):
        """
        Return an iterator of the values.

        These are each a scalar type, which is a Python scalar
        (for str, int, float) or a pandas scalar
        (for Timestamp/Timedelta/Interval/Period)
        """
        # We are explicity making element iterators.
        if is_datetimelike(self._values):
            return map(com.maybe_box_datetimelike, self._values)
        elif is_extension_array_dtype(self._values):
            return iter(self._values)
        else:
            return map(self._values.item, range(self._values.size))
예제 #8
0
파일: sql.py 프로젝트: bxhunter/pandas
    def _create_table_statement(self):
        "Return a CREATE TABLE statement to suit the contents of a DataFrame."

        column_names_and_types = \
            self._get_column_names_and_types(self._sql_type_name)

        pat = re.compile('\s+')
        column_names = [col_name for col_name, _ in column_names_and_types]
        if any(map(pat.search, column_names)):
            warnings.warn(_SAFE_NAMES_WARNING)

        flv = self.pd_sql.flavor

        br_l = _SQL_SYMB[flv]['br_l']  # left val quote char
        br_r = _SQL_SYMB[flv]['br_r']  # right val quote char

        col_template = br_l + '%s' + br_r + ' %s'

        columns = ',\n  '.join(col_template %
                               x for x in column_names_and_types)
        template = """CREATE TABLE %(name)s (
                      %(columns)s
                      )"""
        create_statement = template % {'name': self.name, 'columns': columns}
        return create_statement
예제 #9
0
def is_instance_factory(_type):
    """

    Parameters
    ----------
    `_type` - the type to be checked against

    Returns
    -------
    validator - a function of a single argument x , which returns the
                True if x is an instance of `_type`

    """
    if isinstance(_type, (tuple, list)):
        _type = tuple(_type)
        from pandas.core.common import pprint_thing
        type_repr = "|".join(map(pprint_thing, _type))
    else:
        type_repr = "'%s'" % _type

    def inner(x):
        if not isinstance(x, _type):
            raise ValueError("Value must be an instance of %s" % type_repr)

    return inner
예제 #10
0
    def __init__(self, io, **kwds):

        import xlrd  # throw an ImportError if we need to

        ver = tuple(map(int, xlrd.__VERSION__.split(".")[:2]))
        if ver < (0, 9):  # pragma: no cover
            raise ImportError("pandas requires xlrd >= 0.9.0 for excel "
                              "support, current version " + xlrd.__VERSION__)

        self.io = io

        engine = kwds.pop('engine', None)

        if engine is not None and engine != 'xlrd':
            raise ValueError("Unknown engine: %s" % engine)

        if isinstance(io, compat.string_types):
            self.book = xlrd.open_workbook(io)
        elif engine == "xlrd" and isinstance(io, xlrd.Book):
            self.book = io
        elif hasattr(io, "read"):
            data = io.read()
            self.book = xlrd.open_workbook(file_contents=data)
        else:
            raise ValueError('Must explicitly set engine if not passing in'
                             ' buffer or path for io.')
예제 #11
0
파일: config.py 프로젝트: Axik/pandas
def is_instance_factory(_type):
    """

    Parameters
    ----------
    `_type` - the type to be checked against

    Returns
    -------
    validator - a function of a single argument x , which raises
                ValueError if x is not an instance of `_type`

    """
    if isinstance(_type, (tuple, list)):
        _type = tuple(_type)
        from pandas.io.formats.printing import pprint_thing
        type_repr = "|".join(map(pprint_thing, _type))
    else:
        type_repr = "'{typ}'".format(typ=_type)

    def inner(x):
        if not isinstance(x, _type):
            msg = "Value must be an instance of {type_repr}"
            raise ValueError(msg.format(type_repr=type_repr))

    return inner
예제 #12
0
    def _denorm(queries,thing):
        fields = []
        results = []
        for q in queries:
            #print q
            r = Ql(q,thing)
            #print "-- result: ", r
            if not r:
                r = [default]
            if isinstance(r[0], type({})):
                fields.append(sorted(r[0].keys()))  # dicty answers
            else:
                fields.append([q])  # stringy answer

            results.append(r)

        #print results
        #print fields
        flist =  list(flatten(*map(iter,fields)))

        prod = itertools.product(*results)
        for p in prod:
            U = dict()
            for (ii,thing) in enumerate(p):
                #print ii,thing
                if isinstance(thing, type({})):
                    U.update(thing)
                else:
                    U[fields[ii][0]] = thing

            yield U
예제 #13
0
파일: sql.py 프로젝트: rla3rd/pandas
def get_schema(frame, name, flavor, keys=None):
    "Return a CREATE TABLE statement to suit the contents of a DataFrame."
    lookup_type = lambda dtype: get_sqltype(dtype.type, flavor)
    # Replace spaces in DataFrame column names with _.
    # Also force lowercase, postgresql can be case sensitive
    safe_columns = [s.replace(' ', '_').strip().lower() for s in frame.dtypes.index]
    column_types = lzip(safe_columns, map(lookup_type, frame.dtypes))
    if flavor == 'sqlite':
        columns = ',\n  '.join('[%s] %s' % x for x in column_types)
    elif flavor == 'postgresql':
        columns = ',\n  '.join('"%s" %s' % x for x in column_types)
    else:
        columns = ',\n  '.join('`%s` %s' % x for x in column_types)

    keystr = ''
    if keys is not None:
        if isinstance(keys, compat.string_types):
            keys = (keys,)
        keystr = ', PRIMARY KEY (%s)' % ','.join(keys)
    template = """CREATE TABLE %(name)s (
                  %(columns)s
                  %(keystr)s
                  );"""
    create_statement = template % {'name': name, 'columns': columns,
                                   'keystr': keystr}
    return create_statement
예제 #14
0
파일: sql.py 프로젝트: Jemash/pandas
    def insert_data(self):
        if self.index is not None:
            temp = self.frame.copy()
            temp.index.names = self.index
            try:
                temp.reset_index(inplace=True)
            except ValueError as err:
                raise ValueError(
                    "duplicate name in index/columns: {0}".format(err))
        else:
            temp = self.frame
        
        column_names = list(map(str, temp.columns))
        ncols = len(column_names)
        data_list = [None] * ncols
        blocks = temp._data.blocks

        for i in range(len(blocks)):
            b = blocks[i]
            if b.is_datetime:
                # convert to microsecond resolution so this yields datetime.datetime
                d = b.values.astype('M8[us]').astype(object)
            else:
                d = np.array(b.values, dtype=object)

            # replace NaN with None
            if b._can_hold_na:
                mask = isnull(d)
                d[mask] = None

            for col_loc, col in zip(b.mgr_locs, d):
                data_list[col_loc] = col

        return column_names, data_list
예제 #15
0
파일: test_excel.py 프로젝트: 5i7788/pandas
def _skip_if_no_xlrd():
    try:
        import xlrd
        ver = tuple(map(int, xlrd.__VERSION__.split(".")[:2]))
        if ver < (0, 9):
            raise nose.SkipTest('xlrd < 0.9, skipping')
    except ImportError:
        raise nose.SkipTest('xlrd not installed, skipping')
예제 #16
0
 def __unicode__(self):
     output = self.__class__.__name__
     output += u('(')
     prefix = '' if compat.PY3 else 'u'
     mapper = "{0}'{{0}}'".format(prefix)
     output += '[{0}]'.format(', '.join(map(mapper.format, self)))
     output += ", freq='{0}'".format(self.freq)
     output += ')'
     return output
예제 #17
0
 def test_map(self):
     func = lambda x, y, z: x + y + z
     lst = [builtins.range(10), builtins.range(10), builtins.range(10)]
     actual1 = map(func, *lst)
     actual2 = lmap(func, *lst)
     actual = [actual1, actual2],
     expected = list(builtins.map(func, *lst)),
     lengths = 10,
     self.check_result(actual, expected, lengths)
예제 #18
0
파일: period.py 프로젝트: danbirken/pandas
 def __unicode__(self):
     output = self.__class__.__name__
     output += u("(")
     prefix = "" if compat.PY3 else "u"
     mapper = "{0}'{{0}}'".format(prefix)
     output += "[{0}]".format(", ".join(map(mapper.format, self)))
     output += ", freq='{0}'".format(self.freq)
     output += ")"
     return output
예제 #19
0
파일: test_html.py 프로젝트: nizvoo/pandas
def assert_framelist_equal(list1, list2, *args, **kwargs):
    assert len(list1) == len(list2), (
        "lists are not of equal size " "len(list1) == {0}, " "len(list2) == {1}".format(len(list1), len(list2))
    )
    msg = "not all list elements are DataFrames"
    both_frames = all(map(lambda x, y: isinstance(x, DataFrame) and isinstance(y, DataFrame), list1, list2))
    assert both_frames, msg
    for frame_i, frame_j in zip(list1, list2):
        tm.assert_frame_equal(frame_i, frame_j, *args, **kwargs)
        assert not frame_i.empty, "frames are both empty"
예제 #20
0
파일: test_html.py 프로젝트: APWaldo/pandas
def assert_framelist_equal(list1, list2, *args, **kwargs):
    assert len(list1) == len(list2), ('lists are not of equal size '
                                      'len(list1) == {0}, '
                                      'len(list2) == {1}'.format(len(list1),
                                                                 len(list2)))
    assert all(map(lambda x, y: isframe(x) and isframe(y), list1, list2)), \
        'not all list elements are DataFrames'
    for frame_i, frame_j in zip(list1, list2):
        assert_frame_equal(frame_i, frame_j, *args, **kwargs)
        assert not frame_i.empty, 'frames are both empty'
예제 #21
0
파일: concat.py 프로젝트: AbnerZheng/pandas
def _concat_categorical(to_concat, axis=0):
    """Concatenate an object/categorical array of arrays, each of which is a
    single dtype

    Parameters
    ----------
    to_concat : array of arrays
    axis : int
        Axis to provide concatenation in the current implementation this is
        always 0, e.g. we only have 1D categoricals

    Returns
    -------
    Categorical
        A single array, preserving the combined dtypes
    """

    from pandas.core.categorical import Categorical

    def convert_categorical(x):
        # coerce to object dtype
        if com.is_categorical_dtype(x.dtype):
            return x.get_values()
        return x.ravel()

    if get_dtype_kinds(to_concat) - set(['object', 'category']):
        # convert to object type and perform a regular concat
        return _concat_compat([np.array(x, copy=False, dtype=object)
                               for x in to_concat], axis=0)

    # we could have object blocks and categoricals here
    # if we only have a single categoricals then combine everything
    # else its a non-compat categorical
    categoricals = [x for x in to_concat if com.is_categorical_dtype(x.dtype)]

    # validate the categories
    categories = categoricals[0]
    rawcats = categories.categories
    for x in categoricals[1:]:
        if not categories.is_dtype_equal(x):
            raise ValueError("incompatible categories in categorical concat")

    # we've already checked that all categoricals are the same, so if their
    # length is equal to the input then we have all the same categories
    if len(categoricals) == len(to_concat):
        # concating numeric types is much faster than concating object types
        # and fastpath takes a shorter path through the constructor
        return Categorical(np.concatenate([x.codes for x in to_concat],
                                          axis=0),
                           rawcats, ordered=categoricals[0].ordered,
                           fastpath=True)
    else:
        concatted = np.concatenate(list(map(convert_categorical, to_concat)),
                                   axis=0)
        return Categorical(concatted, rawcats)
예제 #22
0
    def insert(self):
        ins = self.insert_statement()
        data_list = []
        temp = self.insert_data()
        keys = list(map(str, temp.columns))

        for t in temp.itertuples():
            data = dict((k, self.maybe_asscalar(v))
                        for k, v in zip(keys, t[1:]))
            data_list.append(data)

        self.pd_sql.execute(ins, data_list)
예제 #23
0
def _check_columns(cols):
    if not len(cols):
        raise AssertionError("There must be at least 1 column")

    head, tail = cols[0], cols[1:]

    N = len(head)

    for i, n in enumerate(map(len, tail)):
        if n != N:
            raise AssertionError('All columns must have the same length: {0}; '
                                 'column {1} has length {2}'.format(N, i, n))

    return N
예제 #24
0
파일: sql.py 프로젝트: jakeshi/pandas
    def insert_statement(self):
        names = list(map(str, self.frame.columns))
        flv = self.pd_sql.flavor
        br_l = _SQL_SYMB[flv]["br_l"]  # left val quote char
        br_r = _SQL_SYMB[flv]["br_r"]  # right val quote char
        wld = _SQL_SYMB[flv]["wld"]  # wildcard char

        if self.index is not None:
            [names.insert(0, idx) for idx in self.index[::-1]]

        bracketed_names = [br_l + column + br_r for column in names]
        col_names = ",".join(bracketed_names)
        wildcards = ",".join([wld] * len(names))
        insert_statement = "INSERT INTO %s (%s) VALUES (%s)" % (self.name, col_names, wildcards)
        return insert_statement
예제 #25
0
파일: excel.py 프로젝트: legolin/pandas
    def _convert_to_stop(cls, stop_seq):
        """
        Convert ``stop_seq`` to a list of openpyxl v2 Color objects,
        suitable for initializing the ``GradientFill`` ``stop`` parameter.
        Parameters
        ----------
        stop_seq : iterable
            An iterable that yields objects suitable for consumption by
            ``_convert_to_color``.
        Returns
        -------
        stop : list of openpyxl.styles.Color
        """

        return map(cls._convert_to_color, stop_seq)
예제 #26
0
def _check_ne_builtin_clash(expr):
    """Attempt to prevent foot-shooting in a helpful way.

    Parameters
    ----------
    terms : Term
        Terms can contain
    """
    names = expr.names
    overlap = names & _ne_builtins

    if overlap:
        s = ', '.join(map(repr, overlap))
        raise NumExprClobberingError('Variables in expression "%s" '
                                     'overlap with builtins: (%s)' % (expr, s))
예제 #27
0
def get_hits(defname,files=()):
    cs=set()
    for f in files:
        try:
            r=sh.git('blame', '-L', '/def\s*{start}/,/def/'.format(start=defname),f,_tty_out=False)
        except sh.ErrorReturnCode_128:
            logger.debug("no matches in %s" % f)
            continue

        lines = r.strip().splitlines()[:-1]
        # remove comment lines
        lines = [x for x in lines if not re.search("^\w+\s*\(.+\)\s*#",x)]
        hits = set(map(lambda x: x.split(" ")[0],lines))
        cs.update(set([Hit(commit=c,path=f) for c in hits]))

    return cs
예제 #28
0
파일: excel.py 프로젝트: ascii1011/pandas
    def __init__(self, path_or_buf, **kwds):

        import xlrd  # throw an ImportError if we need to

        ver = tuple(map(int, xlrd.__VERSION__.split(".")[:2]))
        if ver < (0, 9):  # pragma: no cover
            raise ImportError("pandas requires xlrd >= 0.9.0 for excel "
                              "support, current version " + xlrd.__VERSION__)

        self.path_or_buf = path_or_buf
        self.tmpfile = None

        if isinstance(path_or_buf, compat.string_types):
            self.book = xlrd.open_workbook(path_or_buf)
        else:
            data = path_or_buf.read()
            self.book = xlrd.open_workbook(file_contents=data)
예제 #29
0
def _get_schema_legacy(frame, name, flavor, keys=None):
    """Old function from 0.13.1. To keep backwards compatibility.
    When mysql legacy support is dropped, it should be possible to
    remove this code
    """

    def get_sqltype(dtype, flavor):
        pytype = dtype.type
        pytype_name = "text"
        if issubclass(pytype, np.floating):
            pytype_name = "float"
        elif issubclass(pytype, np.integer):
            pytype_name = "int"
        elif issubclass(pytype, np.datetime64) or pytype is datetime:
            # Caution: np.datetime64 is also a subclass of np.number.
            pytype_name = "datetime"
        elif pytype is datetime.date:
            pytype_name = "date"
        elif issubclass(pytype, np.bool_):
            pytype_name = "bool"

        return _SQL_TYPES[pytype_name][flavor]

    lookup_type = lambda dtype: get_sqltype(dtype, flavor)

    column_types = lzip(frame.dtypes.index, map(lookup_type, frame.dtypes))
    if flavor == 'sqlite':
        columns = ',\n  '.join('[%s] %s' % x for x in column_types)
    else:
        columns = ',\n  '.join('`%s` %s' % x for x in column_types)

    keystr = ''
    if keys is not None:
        if isinstance(keys, string_types):
            keys = (keys,)
        keystr = ', PRIMARY KEY (%s)' % ','.join(keys)
    template = """CREATE TABLE %(name)s (
                  %(columns)s
                  %(keystr)s
                  );"""
    create_statement = template % {'name': name, 'columns': columns,
                                   'keystr': keystr}
    return create_statement
예제 #30
0
파일: panel.py 프로젝트: hiseba/pandas
    def __setitem__(self, key, value):
        shape = tuple(self.shape)
        if isinstance(value, self._constructor_sliced):
            value = value.reindex(
                **self._construct_axes_dict_for_slice(self._AXIS_ORDERS[1:]))
            mat = value.values
        elif isinstance(value, np.ndarray):
            if value.shape != shape[1:]:
                raise ValueError(
                    'shape of value must be {0}, shape of given object was '
                    '{1}'.format(shape[1:], tuple(map(int, value.shape))))
            mat = np.asarray(value)
        elif np.isscalar(value):
            dtype, value = _infer_dtype_from_scalar(value)
            mat = np.empty(shape[1:], dtype=dtype)
            mat.fill(value)
        else:
            raise TypeError('Cannot set item of type: %s' % str(type(value)))

        mat = mat.reshape(tuple([1]) + shape[1:])
        NDFrame._set_item(self, key, mat)
예제 #31
0
파일: counter.py 프로젝트: wudcwctw/pandas
 def __repr__(self):
     if not self:
         return '%s()' % self.__class__.__name__
     items = ', '.join(map('%r: %r'.__mod__, self.most_common()))
     return '%s({%s})' % (self.__class__.__name__, items)
예제 #32
0
파일: latex.py 프로젝트: zzzzzzzzzx/pandas
    def write_result(self, buf):
        """
        Render a DataFrame to a LaTeX tabular/longtable environment output.
        """

        # string representation of the columns
        if len(self.frame.columns) == 0 or len(self.frame.index) == 0:
            info_line = (u('Empty {name}\nColumns: {col}\nIndex: {idx}')
                         .format(name=type(self.frame).__name__,
                                 col=self.frame.columns,
                                 idx=self.frame.index))
            strcols = [[info_line]]
        else:
            strcols = self.fmt._to_str_columns()

        def get_col_type(dtype):
            if issubclass(dtype.type, np.number):
                return 'r'
            else:
                return 'l'

        # reestablish the MultiIndex that has been joined by _to_str_column
        if self.fmt.index and isinstance(self.frame.index, MultiIndex):
            clevels = self.frame.columns.nlevels
            strcols.pop(0)
            name = any(self.frame.index.names)
            cname = any(self.frame.columns.names)
            lastcol = self.frame.index.nlevels - 1
            previous_lev3 = None
            for i, lev in enumerate(self.frame.index.levels):
                lev2 = lev.format()
                blank = ' ' * len(lev2[0])
                # display column names in last index-column
                if cname and i == lastcol:
                    lev3 = [x if x else '{}' for x in self.frame.columns.names]
                else:
                    lev3 = [blank] * clevels
                if name:
                    lev3.append(lev.name)
                current_idx_val = None
                for level_idx in self.frame.index.labels[i]:
                    if ((previous_lev3 is None or
                        previous_lev3[len(lev3)].isspace()) and
                            lev2[level_idx] == current_idx_val):
                        # same index as above row and left index was the same
                        lev3.append(blank)
                    else:
                        # different value than above or left index different
                        lev3.append(lev2[level_idx])
                        current_idx_val = lev2[level_idx]
                strcols.insert(i, lev3)
                previous_lev3 = lev3

        column_format = self.column_format
        if column_format is None:
            dtypes = self.frame.dtypes._values
            column_format = ''.join(map(get_col_type, dtypes))
            if self.fmt.index:
                index_format = 'l' * self.frame.index.nlevels
                column_format = index_format + column_format
        elif not isinstance(column_format,
                            compat.string_types):  # pragma: no cover
            raise AssertionError('column_format must be str or unicode, '
                                 'not {typ}'.format(typ=type(column_format)))

        if not self.longtable:
            buf.write('\\begin{{tabular}}{{{fmt}}}\n'
                      .format(fmt=column_format))
            buf.write('\\toprule\n')
        else:
            buf.write('\\begin{{longtable}}{{{fmt}}}\n'
                      .format(fmt=column_format))
            buf.write('\\toprule\n')

        ilevels = self.frame.index.nlevels
        clevels = self.frame.columns.nlevels
        nlevels = clevels
        if any(self.frame.index.names):
            nlevels += 1
        strrows = list(zip(*strcols))
        self.clinebuf = []

        for i, row in enumerate(strrows):
            if i == nlevels and self.fmt.header:
                buf.write('\\midrule\n')  # End of header
                if self.longtable:
                    buf.write('\\endhead\n')
                    buf.write('\\midrule\n')
                    buf.write('\\multicolumn{{{n}}}{{r}}{{{{Continued on next '
                              'page}}}} \\\\\n'.format(n=len(row)))
                    buf.write('\\midrule\n')
                    buf.write('\\endfoot\n\n')
                    buf.write('\\bottomrule\n')
                    buf.write('\\endlastfoot\n')
            if self.fmt.kwds.get('escape', True):
                # escape backslashes first
                crow = [(x.replace('\\', '\\textbackslash').replace('_', '\\_')
                         .replace('%', '\\%').replace('$', '\\$')
                         .replace('#', '\\#').replace('{', '\\{')
                         .replace('}', '\\}').replace('~', '\\textasciitilde')
                         .replace('^', '\\textasciicircum').replace('&', '\\&')
                         if (x and x != '{}') else '{}') for x in row]
            else:
                crow = [x if x else '{}' for x in row]
            if self.bold_rows and self.fmt.index:
                # bold row labels
                crow = ['\\textbf{{{x}}}'.format(x=x)
                        if j < ilevels and x.strip() not in ['', '{}'] else x
                        for j, x in enumerate(crow)]
            if i < clevels and self.fmt.header and self.multicolumn:
                # sum up columns to multicolumns
                crow = self._format_multicolumn(crow, ilevels)
            if (i >= nlevels and self.fmt.index and self.multirow and
                    ilevels > 1):
                # sum up rows to multirows
                crow = self._format_multirow(crow, ilevels, i, strrows)
            buf.write(' & '.join(crow))
            buf.write(' \\\\\n')
            if self.multirow and i < len(strrows) - 1:
                self._print_cline(buf, i, len(strcols))

        if not self.longtable:
            buf.write('\\bottomrule\n')
            buf.write('\\end{tabular}\n')
        else:
            buf.write('\\end{longtable}\n')
예제 #33
0
파일: scope.py 프로젝트: BRGM/Pic-EAU
def _raw_hex_id(obj):
    """Return the padded hexadecimal id of ``obj``."""
    # interpret as a pointer since that's what really what id returns
    packed = struct.pack('@P', id(obj))
    return ''.join(map(_replacer, packed))
예제 #34
0
 def __unicode__(self):
     # unicode representation based upon iterating over self
     # (since, by definition, `PandasContainers` are iterable)
     prepr = '[%s]' % ','.join(map(com.pprint_thing, self))
     return '%s(%s)' % (self.__class__.__name__, prepr)
예제 #35
0
def randu(n):
    choices = u("").join(map(unichr, lrange(1488, 1488 + 26)))
    choices += string.digits
    return ''.join([random.choice(choices) for _ in range(n)])
예제 #36
0
 def test_string_factorize(self):
     # should this be optional?
     data = 'a\nb\na\nb\na'
     reader = TextReader(StringIO(data), header=None)
     result = reader.read()
     self.assertEqual(len(set(map(id, result[0]))), 2)
예제 #37
0
파일: expr.py 프로젝트: vibhormehta/pandas
_is_str = _is_type(string_types)

# partition all AST nodes
_all_nodes = frozenset(
    filter(lambda x: isinstance(x, type) and issubclass(x, ast.AST),
           (getattr(ast, node) for node in dir(ast))))


def _filter_nodes(superclass, all_nodes=_all_nodes):
    """Filter out AST nodes that are subclasses of ``superclass``."""
    node_names = (node.__name__ for node in all_nodes
                  if issubclass(node, superclass))
    return frozenset(node_names)


_all_node_names = frozenset(map(lambda x: x.__name__, _all_nodes))
_mod_nodes = _filter_nodes(ast.mod)
_stmt_nodes = _filter_nodes(ast.stmt)
_expr_nodes = _filter_nodes(ast.expr)
_expr_context_nodes = _filter_nodes(ast.expr_context)
_slice_nodes = _filter_nodes(ast.slice)
_boolop_nodes = _filter_nodes(ast.boolop)
_operator_nodes = _filter_nodes(ast.operator)
_unary_op_nodes = _filter_nodes(ast.unaryop)
_cmp_op_nodes = _filter_nodes(ast.cmpop)
_comprehension_nodes = _filter_nodes(ast.comprehension)
_handler_nodes = _filter_nodes(ast.excepthandler)
_arguments_nodes = _filter_nodes(ast.arguments)
_keyword_nodes = _filter_nodes(ast.keyword)
_alias_nodes = _filter_nodes(ast.alias)
예제 #38
0
    def write_result(self, buf):
        """
        Render a DataFrame to a LaTeX tabular/longtable environment output.
        """

        # string representation of the columns
        if len(self.frame.columns) == 0 or len(self.frame.index) == 0:
            info_line = (u('Empty {name}\nColumns: {col}\nIndex: {idx}')
                         .format(name=type(self.frame).__name__,
                                 col=self.frame.columns,
                                 idx=self.frame.index))
            strcols = [[info_line]]
        else:
            strcols = self.fmt._to_str_columns()

        def get_col_type(dtype):
            if issubclass(dtype.type, np.number):
                return 'r'
            else:
                return 'l'

        # reestablish the MultiIndex that has been joined by _to_str_column
        if self.fmt.index and isinstance(self.frame.index, MultiIndex):
            out = self.frame.index.format(
                adjoin=False, sparsify=self.fmt.sparsify,
                names=self.fmt.has_index_names, na_rep=self.fmt.na_rep
            )

            # index.format will sparsify repeated entries with empty strings
            # so pad these with some empty space
            def pad_empties(x):
                for pad in reversed(x):
                    if pad:
                        break
                return [x[0]] + [i if i else ' ' * len(pad) for i in x[1:]]
            out = (pad_empties(i) for i in out)

            # Add empty spaces for each column level
            clevels = self.frame.columns.nlevels
            out = [[' ' * len(i[-1])] * clevels + i for i in out]

            # Add the column names to the last index column
            cnames = self.frame.columns.names
            if any(cnames):
                new_names = [i if i else '{}' for i in cnames]
                out[self.frame.index.nlevels - 1][:clevels] = new_names

            # Get rid of old multiindex column and add new ones
            strcols = out + strcols[1:]

        column_format = self.column_format
        if column_format is None:
            dtypes = self.frame.dtypes._values
            column_format = ''.join(map(get_col_type, dtypes))
            if self.fmt.index:
                index_format = 'l' * self.frame.index.nlevels
                column_format = index_format + column_format
        elif not isinstance(column_format,
                            compat.string_types):  # pragma: no cover
            raise AssertionError('column_format must be str or unicode, '
                                 'not {typ}'.format(typ=type(column_format)))

        if not self.longtable:
            buf.write('\\begin{{tabular}}{{{fmt}}}\n'
                      .format(fmt=column_format))
            buf.write('\\toprule\n')
        else:
            buf.write('\\begin{{longtable}}{{{fmt}}}\n'
                      .format(fmt=column_format))
            buf.write('\\toprule\n')

        ilevels = self.frame.index.nlevels
        clevels = self.frame.columns.nlevels
        nlevels = clevels
        if self.fmt.has_index_names and self.fmt.show_index_names:
            nlevels += 1
        strrows = list(zip(*strcols))
        self.clinebuf = []

        for i, row in enumerate(strrows):
            if i == nlevels and self.fmt.header:
                buf.write('\\midrule\n')  # End of header
                if self.longtable:
                    buf.write('\\endhead\n')
                    buf.write('\\midrule\n')
                    buf.write('\\multicolumn{{{n}}}{{r}}{{{{Continued on next '
                              'page}}}} \\\\\n'.format(n=len(row)))
                    buf.write('\\midrule\n')
                    buf.write('\\endfoot\n\n')
                    buf.write('\\bottomrule\n')
                    buf.write('\\endlastfoot\n')
            if self.fmt.kwds.get('escape', True):
                # escape backslashes first
                crow = [(x.replace('\\', '\\textbackslash ')
                         .replace('_', '\\_')
                         .replace('%', '\\%').replace('$', '\\$')
                         .replace('#', '\\#').replace('{', '\\{')
                         .replace('}', '\\}').replace('~', '\\textasciitilde ')
                         .replace('^', '\\textasciicircum ')
                         .replace('&', '\\&')
                         if (x and x != '{}') else '{}') for x in row]
            else:
                crow = [x if x else '{}' for x in row]
            if self.bold_rows and self.fmt.index:
                # bold row labels
                crow = ['\\textbf{{{x}}}'.format(x=x)
                        if j < ilevels and x.strip() not in ['', '{}'] else x
                        for j, x in enumerate(crow)]
            if i < clevels and self.fmt.header and self.multicolumn:
                # sum up columns to multicolumns
                crow = self._format_multicolumn(crow, ilevels)
            if (i >= nlevels and self.fmt.index and self.multirow and
                    ilevels > 1):
                # sum up rows to multirows
                crow = self._format_multirow(crow, ilevels, i, strrows)
            buf.write(' & '.join(crow))
            buf.write(' \\\\\n')
            if self.multirow and i < len(strrows) - 1:
                self._print_cline(buf, i, len(strcols))

        if not self.longtable:
            buf.write('\\bottomrule\n')
            buf.write('\\end{tabular}\n')
        else:
            buf.write('\\end{longtable}\n')