Esempio n. 1
0
class Cell(object):
    """Describes cell associated properties.

    Properties of interest include style, type, value, and address.

    """
    __slots__ = ('column',
                 'row',
                 '_value',
                 '_data_type',
                 'parent',
                 'xf_index',
                 '_hyperlink_rel',
                 '_shared_date',
                 'merged',
                 '_comment')

    ERROR_CODES = {'#NULL!': 0,
                   '#DIV/0!': 1,
                   '#VALUE!': 2,
                   '#REF!': 3,
                   '#NAME?': 4,
                   '#NUM!': 5,
                   '#N/A': 6}

    TYPE_STRING = 's'
    TYPE_FORMULA = 'f'
    TYPE_NUMERIC = 'n'
    TYPE_BOOL = 'b'
    TYPE_NULL = 's'
    TYPE_INLINE = 'inlineStr'
    TYPE_ERROR = 'e'
    TYPE_FORMULA_CACHE_STRING = 'str'

    VALID_TYPES = [TYPE_STRING, TYPE_FORMULA, TYPE_NUMERIC, TYPE_BOOL,
                   TYPE_NULL, TYPE_INLINE, TYPE_ERROR, TYPE_FORMULA_CACHE_STRING]

    RE_PATTERNS = {
        'percentage': re.compile(r'^\-?[0-9]*\.?[0-9]*\s?\%$'),
        'time': re.compile(r'^(\d|[0-1]\d|2[0-3]):[0-5]\d(:[0-5]\d)?$'),
        'numeric': re.compile(r'^-?([\d]|[\d]+\.[\d]*|\.[\d]+|[1-9][\d]+\.?[\d]*)((E|e)-?[\d]+)?$'),
        }

    def __init__(self, worksheet, column, row, value=None):
        self.column = column.upper()
        self.row = row
        # _value is the stored value, while value is the displayed value
        self._value = None
        self._hyperlink_rel = None
        self._data_type = self.TYPE_NULL
        if value:
            self.value = value
        self.parent = worksheet
        self.xf_index = 0
        self._shared_date = SharedDate(base_date=worksheet.parent.excel_base_date)
        self.merged = False
        self._comment = None

    @property
    def encoding(self):
        return self.parent.encoding

    def __repr__(self):
        return unicode("<Cell %s.%s>") % (self.parent.title, self.get_coordinate())

    def check_string(self, value):
        """Check string coding, length, and line break character"""
        # convert to unicode string
        if not isinstance(value, unicode):
            value = unicode(value, self.encoding)
        value = unicode(value)
        # string must never be longer than 32,767 characters
        # truncate if necessary
        value = value[:32767]
        if ILLEGAL_CHARACTERS_RE.match(value):
            raise IllegalCharacterError
        # we require that newline is represented as "\n" in core,
        # not as "\r\n" or "\r"
        value = value.replace('\r\n', '\n')
        return value

    def check_numeric(self, value):
        """Cast value to int or float if necessary"""
        if not isinstance(value, NUMERIC_TYPES):
            try:
                value = int(value)
            except ValueError:
                value = float(value)
        return value

    def check_error(self, value):
        """Tries to convert Error" else N/A"""
        try:
            return unicode(value)
        except:
            return unicode('#N/A')

    def set_explicit_value(self, value=None, data_type=TYPE_STRING):
        """Coerce values according to their explicit type"""
        type_coercion_map = {
            self.TYPE_INLINE: self.check_string,
            self.TYPE_STRING: self.check_string,
            self.TYPE_FORMULA: self.check_string,
            self.TYPE_NUMERIC: self.check_numeric,
            self.TYPE_BOOL: bool,
            self.TYPE_ERROR: self.check_error}
        try:
            self._value = type_coercion_map[data_type](value)
        except KeyError:
            if data_type not in self.VALID_TYPES:
                msg = 'Invalid data type: %s' % data_type
                raise DataTypeException(msg)
        self._data_type = data_type

    # preserve old method name
    set_value_explicit = set_explicit_value

    def data_type_for_value(self, value):
        """Given a value, infer the correct data type"""
        if value is None:
            data_type = self.TYPE_NULL
        elif value is True or value is False:
            data_type = self.TYPE_BOOL
        elif isinstance(value, NUMERIC_TYPES):
            data_type = self.TYPE_NUMERIC
        elif isinstance(value, (datetime.datetime, datetime.date, datetime.time, datetime.timedelta)):
            data_type = self.TYPE_NUMERIC
        elif not value:
            data_type = self.TYPE_STRING
        elif isinstance(value, basestring) and value[0] == '=':
            data_type = self.TYPE_FORMULA
        elif isinstance(value, unicode) and self.RE_PATTERNS['numeric'].match(value):
            data_type = self.TYPE_NUMERIC
        elif not isinstance(value, unicode) and self.RE_PATTERNS['numeric'].match(str(value)):
            data_type = self.TYPE_NUMERIC
        elif isinstance(value, basestring) and value.strip() in self.ERROR_CODES:
            data_type = self.TYPE_ERROR
        elif isinstance(value, list):
            data_type = self.TYPE_ERROR
        else:
            data_type = self.TYPE_STRING
        return data_type

    def bind_value(self, value):
        """Given a value, infer type and display options."""
        self._data_type = self.data_type_for_value(value)
        if value is None:
            self.set_explicit_value('', self.TYPE_NULL)
            return True
        elif self._data_type == self.TYPE_STRING:
            # percentage detection
            if isinstance(value, unicode):
                percentage_search = self.RE_PATTERNS['percentage'].match(value)
            else:
                percentage_search = self.RE_PATTERNS['percentage'].match(str(value))
            if percentage_search and value.strip() != '%':
                value = float(value.replace('%', '')) / 100.0
                self.set_explicit_value(value, self.TYPE_NUMERIC)
                self._set_number_format(NumberFormat.FORMAT_PERCENTAGE)
                return True
            # time detection
            if isinstance(value, unicode):
                time_search = self.RE_PATTERNS['time'].match(value)
            else:
                time_search = self.RE_PATTERNS['time'].match(str(value))
            if time_search:
                sep_count = value.count(':')  # pylint: disable=E1103
                if sep_count == 1:
                    hours, minutes = [int(bit) for bit in value.split(':')]  # pylint: disable=E1103
                    seconds = 0
                elif sep_count == 2:
                    hours, minutes, seconds = \
                            [int(bit) for bit in value.split(':')]  # pylint: disable=E1103
                days = (hours / 24.0) + (minutes / 1440.0) + \
                        (seconds / 86400.0)
                self.set_explicit_value(days, self.TYPE_NUMERIC)
                self._set_number_format(NumberFormat.FORMAT_DATE_TIME3)
                return True
        if self._data_type == self.TYPE_NUMERIC:
            # date detection
            # if the value is a date, but not a date time, make it a
            # datetime, and set the time part to 0
            if isinstance(value, datetime.date) and not \
                    isinstance(value, datetime.datetime):
                value = datetime.datetime.combine(value, datetime.time())
            if isinstance(value, (datetime.datetime, datetime.time, datetime.timedelta)):
                if isinstance(value, datetime.datetime):
                    self._set_number_format(NumberFormat.FORMAT_DATE_YYYYMMDD2)
                elif isinstance(value, datetime.time):
                    self._set_number_format(NumberFormat.FORMAT_DATE_TIME6)
                elif isinstance(value, datetime.timedelta):
                    self._set_number_format(NumberFormat.FORMAT_DATE_TIMEDELTA)
                value = SharedDate().datetime_to_julian(date=value)
                self.set_explicit_value(value, self.TYPE_NUMERIC)
                return True
        self.set_explicit_value(value, self._data_type)

    @property
    def value(self):
        """Get or set the value held in the cell.
            ':rtype: depends on the value (string, float, int or '
            ':class:`datetime.datetime`)'"""
        value = self._value
        if self.is_date():
            value = self._shared_date.from_julian(value)
        return value

    @value.setter
    def value(self, value):
        """Set the value and infer type and display options."""
        self.bind_value(value)

    @property
    def internal_value(self):
        """Always returns the value for excel."""
        return self._value

    @property
    def hyperlink(self):
        """Return the hyperlink target or an empty string"""
        return self._hyperlink_rel is not None and \
                self._hyperlink_rel.target or ''

    @hyperlink.setter
    def hyperlink(self, val):
        """Set value and display for hyperlinks in a cell.
        Automatically setsthe `value` of the cell with link text,
        but you can modify it afterwards by setting the `value`
        property, and the hyperlink will remain.\n\n' ':rtype: string"""
        if self._hyperlink_rel is None:
            self._hyperlink_rel = self.parent.create_relationship("hyperlink")
        self._hyperlink_rel.target = val
        self._hyperlink_rel.target_mode = "External"
        if self._value is None:
            self.value = val

    @property
    def hyperlink_rel_id(self):
        """Return the id pointed to by the hyperlink, or None"""
        return self._hyperlink_rel is not None and \
                self._hyperlink_rel.id or None

    def _set_number_format(self, format_code):
        """Set a new formatting code for numeric values"""
        self.style.number_format.format_code = format_code

    @property
    def has_style(self):
        """Check if the parent worksheet has a style for this cell"""
        return self.get_coordinate() in self.parent._styles  # pylint: disable=W0212

    @property
    def style(self):
        """Returns the :class:`openpyxl.style.Style` object for this cell"""
        return self.parent.get_style(self.get_coordinate())

    @property
    def data_type(self):
        """Return the data type represented by this cell"""
        return self._data_type

    def get_coordinate(self):
        """Return the coordinate string for this cell (e.g. 'B12')

        :rtype: string
        """
        return '%s%s' % (self.column, self.row)

    @property
    def address(self):
        """Return the coordinate string for this cell (e.g. 'B12')

        :rtype: string
        """
        return self.get_coordinate()

    def offset(self, row=0, column=0):
        """Returns a cell location relative to this cell.

        :param row: number of rows to offset
        :type row: int

        :param column: number of columns to offset
        :type column: int

        :rtype: :class:`openpyxl.cell.Cell`
        """
        offset_column = get_column_letter(column_index_from_string(
                column=self.column) + column)
        offset_row = self.row + row
        return self.parent.cell('%s%s' % (offset_column, offset_row))

    def is_date(self):
        """Returns whether the value is *probably* a date or not

        :rtype: bool
        """
        return (self.has_style
                and self.style.number_format.is_date_format()
                and isinstance(self._value, NUMERIC_TYPES))

    @property
    def anchor(self):
        """ returns the expected position of a cell in pixels from the top-left
            of the sheet. For example, A1 anchor should be (0,0).

            :rtype: tuple(int, int)
        """
        left_columns = (column_index_from_string(self.column, True) - 1)
        column_dimensions = self.parent.column_dimensions
        left_anchor = 0
        default_width = points_to_pixels(DEFAULT_COLUMN_WIDTH)

        for col_idx in range(left_columns):
            letter = get_column_letter(col_idx + 1)
            if letter in column_dimensions:
                cdw = column_dimensions.get(letter).width
                if cdw > 0:
                    left_anchor += points_to_pixels(cdw)
                    continue
            left_anchor += default_width

        row_dimensions = self.parent.row_dimensions
        top_anchor = 0
        top_rows = (self.row - 1)
        default_height = points_to_pixels(DEFAULT_ROW_HEIGHT)
        for row_idx in range(1, top_rows + 1):
            if row_idx in row_dimensions:
                rdh = row_dimensions[row_idx].height
                if rdh > 0:
                    top_anchor += points_to_pixels(rdh)
                    continue
            top_anchor += default_height

        return (left_anchor, top_anchor)

    @property
    def comment(self):
        """ Returns the comment associated with this cell

            :rtype: :class:`openpyxl.comments.Comment`
        """
        return self._comment

    @comment.setter
    def comment(self, value):
        if value is not None and value._parent is not None and value is not self.comment:
            raise AttributeError(
                "Comment already assigned to %s in worksheet %s. Cannot assign a comment to more than one cell" %
                (value._parent.get_coordinate(), value._parent.parent.title)
                )

        # Ensure the number of comments for the parent worksheet is up-to-date
        if value is None and self._comment is not None:
            self.parent._comment_count -= 1
        if value is not None and self._comment is None:
            self.parent._comment_count += 1

        # orphan the old comment
        if self._comment is not None:
            self._comment._parent = None

        self._comment = value
        if value is not None:
            self._comment._parent = self
Esempio n. 2
0
class IterableWorksheet(Worksheet):

    def __init__(self, parent_workbook, title, worksheet_path,
                 xml_source, string_table, style_table):
        Worksheet.__init__(self, parent_workbook, title)
        self.worksheet_path = worksheet_path
        self._string_table = string_table
        self._style_table = style_table

        min_col, min_row, max_col, max_row = read_dimension(xml_source=self.xml_source)
        self.min_col = min_col
        self.min_row = min_row
        self.max_row = max_row
        self.max_col = max_col

        self._shared_date = SharedDate(base_date=parent_workbook.excel_base_date)

    @property
    def xml_source(self):
        return self.parent._archive.open(self.worksheet_path)

    @xml_source.setter
    def xml_source(self, value):
        """Base class is always supplied XML source, IteratableWorksheet obtains it on demand."""
        pass

    def __getitem__(self, key):
        if isinstance(key, slice):
            key = "{0}:{1}".format(key)
        return self.iter_rows(key)

    def iter_rows(self, range_string='', row_offset=0, column_offset=1):
        """ Returns a squared range based on the `range_string` parameter,
        using generators.

        :param range_string: range of cells (e.g. 'A1:C4')
        :type range_string: string

        :param row_offset: additional rows (e.g. 4)
        :type row: int

        :param column_offset: additonal columns (e.g. 3)
        :type column: int

        :rtype: generator

        """
        if range_string:
            min_col, min_row, max_col, max_row = get_range_boundaries(range_string, row_offset, column_offset)
        else:
            min_col = column_index_from_string(self.min_col)
            max_col = column_index_from_string(self.max_col) + 1
            min_row = self.min_row
            max_row = self.max_row + 6

        return self.get_squared_range(min_col, min_row, max_col, max_row)

    def get_squared_range(self, min_col, min_row, max_col, max_row):
        expected_columns = [get_column_letter(ci) for ci in xrange(min_col, max_col)]
        current_row = min_row

        style_table = self._style_table
        for row, cells in groupby(self.get_cells(min_row, min_col,
                                                 max_row, max_col),
                                  operator.attrgetter('row')):
            full_row = []
            if current_row < row:

                for gap_row in xrange(current_row, row):
                    dummy_cells = get_missing_cells(gap_row, expected_columns)
                    yield tuple([dummy_cells[column] for column in expected_columns])
                    current_row = row

            temp_cells = list(cells)
            retrieved_columns = dict([(c.column, c) for c in temp_cells])
            missing_columns = list(set(expected_columns) - set(retrieved_columns.keys()))
            replacement_columns = get_missing_cells(row, missing_columns)

            for column in expected_columns:
                if column in retrieved_columns:
                    cell = retrieved_columns[column]
                    if cell.style_id is not None:
                        style = style_table[int(cell.style_id)]
                        cell = cell._replace(number_format=style.number_format.format_code) #pylint: disable-msg=W0212
                    if cell.internal_value is not None:
                        if cell.data_type in Cell.TYPE_STRING:
                            cell = cell._replace(internal_value=unicode(self._string_table[int(cell.internal_value)])) #pylint: disable-msg=W0212
                        elif cell.data_type == Cell.TYPE_BOOL:
                            cell = cell._replace(internal_value=cell.internal_value == '1')
                        elif cell.is_date:
                            cell = cell._replace(internal_value=self._shared_date.from_julian(float(cell.internal_value)))
                        elif cell.data_type == Cell.TYPE_NUMERIC:
                            cell = cell._replace(internal_value=float(cell.internal_value))
                        elif cell.data_type in(Cell.TYPE_INLINE, Cell.TYPE_FORMULA_CACHE_STRING):
                            cell = cell._replace(internal_value=unicode(cell.internal_value))
                    full_row.append(cell)
                else:
                    full_row.append(replacement_columns[column])
            current_row = row + 1
            yield tuple(full_row)


    def get_cells(self, min_row, min_col, max_row, max_col):
        p = iterparse(self.xml_source)

        for _event, element in p:

            if element.tag == '{%s}c' % SHEET_MAIN_NS:
                coord = element.get('r')
                column_str, row = RE_COORDINATE.match(coord).groups()

                row = int(row)
                column = column_index_from_string(column_str)

                if min_col <= column <= max_col and min_row <= row <= max_row:
                    data_type = element.get('t', 'n')
                    style_id = element.get('s')
                    formula = element.findtext('{%s}f' % SHEET_MAIN_NS)
                    value = element.findtext('{%s}v' % SHEET_MAIN_NS)
                    if formula is not None and not self.parent.data_only:
                        data_type = Cell.TYPE_FORMULA
                        value = "=" + formula
                    yield RawCell(row, column_str, coord, value, data_type, style_id, None)
            # sub-elements of cells should be skipped
            if (element.tag == '{%s}v' % SHEET_MAIN_NS
                or element.tag == '{%s}f' % SHEET_MAIN_NS):
                continue
            element.clear()


    def cell(self, *args, **kwargs):
        # TODO return an individual cell

        raise NotImplementedError("use 'iter_rows()' instead")

    def range(self, *args, **kwargs):
        # TODO return a range of cells, basically get_squared_range with same interface as Worksheet
        raise NotImplementedError("use 'iter_rows()' instead")

    def rows(self):
        return self.iter_rows()

    def calculate_dimension(self):
        return '%s%s:%s%s' % (self.min_col, self.min_row, self.max_col, self.max_row)

    def get_highest_column(self):
        return column_index_from_string(self.max_col)

    def get_highest_row(self):
        return self.max_row