예제 #1
0
    def _parse_value(self, element, data_type, value, style_id):
        if value is not None:
            if data_type == 'n':
                value = _cast_number(value)
                if style_id in self.date_formats:
                    data_type = 'd'
                    try:
                        value = from_excel(value, self.epoch)
                    except ValueError:
                        msg = """Cell {0} is marked as a date but the serial value {1} is outside the limits for dates. The cell will be treated as an error.""".format(coordinate, value)
                        warn(msg)
                        data_type = "e"
                        value = "#VALUE!"
            elif data_type == 's':
                value = self.shared_strings[int(value)]
            elif data_type == 'b':
                value = bool(int(value))
            elif data_type == "str":
                data_type = "s"
            elif data_type == 'd':
                value = from_ISO8601(value)

        elif data_type == 'inlineStr':
                child = element.find(INLINE_STRING)
                if child is not None:
                    data_type = 's'
                    richtext = Text.from_tree(child)
                    value = richtext.content

        return (data_type, value)
        
예제 #2
0
    def parse_cell(self, element):
        data_type = element.get('t', 'n')
        coordinate = element.get('r')
        self.col_counter += 1
        style_id = element.get('s', 0)
        if style_id:
            style_id = int(style_id)

        if data_type == "inlineStr":
            value = None
        else:
            value = element.findtext(VALUE_TAG, None) or None

        if coordinate:
            row, column = coordinate_to_tuple(coordinate)
        else:
            row, column = self.row_counter, self.col_counter

        if not self.data_only and element.find(FORMULA_TAG) is not None:
            data_type = 'f'
            value = self.parse_formula(element)

        elif value is not None:
            if data_type == 'n':
                value = _cast_number(value)
                if style_id in self.date_formats:
                    data_type = 'd'
                    try:
                        value = from_excel(value, self.epoch)
                    except ValueError:
                        msg = """Cell {0} is marked as a date but the serial value {1} is outside the limits for dates. The cell will be treated as an error.""".format(
                            coordinate, value)
                        warn(msg)
                        data_type = "e"
                        value = "#VALUE!"
            elif data_type == 's':
                value = self.shared_strings[int(value)]
            elif data_type == 'b':
                value = bool(int(value))
            elif data_type == "str":
                data_type = "s"
            elif data_type == 'd':
                value = from_ISO8601(value)

        elif data_type == 'inlineStr':
            child = element.find(INLINE_STRING)
            if child is not None:
                data_type = 's'
                richtext = Text.from_tree(child)
                value = richtext.content

        return {
            'row': row,
            'column': column,
            'value': value,
            'data_type': data_type,
            'style_id': style_id
        }
    def parse_cell(self, element):
        data_type = element.get('t', 'n')
        coordinate = element.get('r')
        self.max_column += 1
        style_id = element.get('s', 0)
        if style_id is not None:
            style_id = int(style_id)

        if data_type == "inlineStr":
            value = None
        else:
            value = element.findtext(VALUE_TAG)

        if coordinate:
            row, column = coordinate_to_tuple(coordinate)
        else:
            row, column = self.max_row, self.max_column

        if not self.data_only and element.find(FORMULA_TAG) is not None:
            data_type = 'f'
            value = self.parse_formula(element)

        elif value is not None:
            if data_type == 'n':
                value = _cast_number(value)
                if style_id in self.date_formats:
                    data_type = 'd'
                    value = from_excel(value, self.epoch)
            elif data_type == 's':
                value = self.shared_strings[int(value)]
            elif data_type == 'b':
                value = bool(int(value))
            elif data_type == "str":
                try:
                    value = _cast_number(value)
                    data_type = "n"
                except ValueError:
                    data_type = "s"
            elif data_type == 'd':
                value = from_ISO8601(value)

        elif data_type == 'inlineStr':
            child = element.find(INLINE_STRING)
            if child is not None:
                data_type = 's'
                richtext = Text.from_tree(child)
                value = richtext.content

        return {
            'row': row,
            'column': column,
            'value': value,
            'data_type': data_type,
            'style_id': style_id
        }
예제 #4
0
 def rich_content(cls, value):
     if type(value) is Text:
         return value.content
     segments = []
     for segment in value:
         if segment.text:
             segments.append(segment)
     if segments:
         return Text(r=segments).content
     else:
         return ''
예제 #5
0
파일: strings.py 프로젝트: ACMH1/ExcelDB
def read_string_table(xml_source):
    """Read in all shared strings in the table"""
    strings = []
    src = _get_xml_iter(xml_source)

    for _, node in iterparse(src):
        if node.tag == '{%s}si' % SHEET_MAIN_NS:

            text = Text.from_tree(node).content
            text = text.replace('x005F_', '')
            strings.append(text)

            node.clear()

    return IndexedList(strings)
예제 #6
0
def read_string_table(xml_source):
    """Read in all shared strings in the table"""

    strings = []
    STRING_TAG = '{%s}si' % SHEET_MAIN_NS

    for _, node in iterparse(xml_source):
        if node.tag == STRING_TAG:
            text = Text.from_tree(node).content
            text = text.replace('x005F_', '')
            node.clear()

            strings.append(text)

    return strings
예제 #7
0
def read_string_table(xml_source):
    """Read in all shared strings in the table"""
    strings = []
    src = _get_xml_iter(xml_source)

    for _, node in iterparse(src):
        if node.tag == '{%s}si' % SHEET_MAIN_NS:

            text = Text.from_tree(node).content
            text = text.replace('x005F_', '')
            strings.append(text)

            node.clear()

    return IndexedList(strings)
예제 #8
0
    def _get_row(self, element, min_col=1, max_col=None, row_counter=None):
        """Return cells from a particular row"""
        col_counter = min_col
        data_only = getattr(self.parent, 'data_only', False)

        for cell in safe_iterator(element, CELL_TAG):
            coordinate = cell.get('r')
            if coordinate:
                row, column = coordinate_to_tuple(coordinate)
            else:
                row, column = row_counter, col_counter

            if max_col is not None and column > max_col:
                break

            if min_col <= column:
                if col_counter < column:
                    for col_counter in range(max(col_counter, min_col),
                                             column):
                        # pad row with missing cells
                        yield EMPTY_CELL

                data_type = cell.get('t', 'n')
                style_id = int(cell.get('s', 0))
                value = None

                formula = cell.findtext(FORMULA_TAG)
                if formula is not None and not data_only:
                    data_type = 'f'
                    value = "=%s" % formula

                elif data_type == 'inlineStr':
                    child = cell.find(INLINE_TAG)
                    if child is not None:
                        richtext = Text.from_tree(child)
                        value = richtext.content

                else:
                    value = cell.findtext(VALUE_TAG) or None

                yield ReadOnlyCell(self, row, column, value, data_type,
                                   style_id)
            col_counter = column + 1

        if max_col is not None:
            for _ in range(col_counter, max_col + 1):
                yield EMPTY_CELL
예제 #9
0
    def _get_row(self, element, min_col=1, max_col=None, row_counter=None):
        """Return cells from a particular row"""
        col_counter = min_col
        data_only = getattr(self.parent, 'data_only', False)

        for cell in safe_iterator(element, CELL_TAG):
            coordinate = cell.get('r')
            if coordinate:
                row, column = coordinate_to_tuple(coordinate)
            else:
                row, column = row_counter, col_counter

            if max_col is not None and column > max_col:
                break

            if min_col <= column:
                if col_counter < column:
                    for col_counter in range(max(col_counter, min_col), column):
                        # pad row with missing cells
                        yield EMPTY_CELL

                data_type = cell.get('t', 'n')
                style_id = int(cell.get('s', 0))
                value = None

                formula = cell.findtext(FORMULA_TAG)
                if formula is not None and not data_only:
                    data_type = 'f'
                    value = "=%s" % formula

                elif data_type == 'inlineStr':
                    child = cell.find(INLINE_TAG)
                    if child is not None:
                        richtext = Text.from_tree(child)
                        value = richtext.content

                else:
                    value = cell.findtext(VALUE_TAG) or None

                yield ReadOnlyCell(self, row, column,
                                   value, data_type, style_id)
            col_counter = column + 1

        if max_col is not None:
            for _ in range(col_counter, max_col+1):
                yield EMPTY_CELL
예제 #10
0
 def __init__(self,
              ref="",
              authorId=0,
              guid=None,
              shapeId=0,
              text=None,
              commentPr=None,
              author=None,
             ):
     self.ref = ref
     self.authorId = authorId
     self.guid = guid
     self.shapeId = shapeId
     if text is None:
         text = Text()
     self.text = text
     self.commentPr = commentPr
     self.author = author
예제 #11
0
 def mid(cls, rich_text, head, tail):
     st = 0
     end = -1
     segments = []
     texts = []
     for index, segment in enumerate(rich_text.r):
         l_text = len(segment.text)
         st = end + 1
         end += l_text
         if end < head:
             continue
         elif st <= head <= end:
             if end < tail:
                 text_st = head - st
                 segment_copy = copy(segment)
                 text = segment.text[text_st:]
                 segment_copy.text = text
                 segments.append(segment_copy)
                 texts.append(text)
             else:
                 text_st = head - st
                 text_end = tail - st
                 segment_copy = copy(segment)
                 text = segment.text[text_st:text_end+1]
                 segment_copy.text = text
                 segments.append(segment_copy)
                 texts.append(text)
                 break
         elif end < tail:
             segment_copy = copy(segment)
             text = segment.text
             #segment_copy.text = text
             segments.append(segment_copy)
             texts.append(text)
         else:
             text_end = tail - st
             segment_copy = copy(segment)
             text = segment.text[:text_end + 1]
             segment_copy.text = text
             segments.append(segment_copy)
             texts.append(text)
             break
     return Text(r=segments), ''.join(texts)
예제 #12
0
 def __init__(self,
              ref="",
              authorId=0,
              guid=None,
              shapeId=0,
              text=None,
              commentPr=None,
              author=None,
              height=79,
              width=144):
     self.ref = ref
     self.authorId = authorId
     self.guid = guid
     self.shapeId = shapeId
     if text is None:
         text = Text()
     self.text = text
     self.commentPr = commentPr
     self.author = author
     self.height = height
     self.width = width
예제 #13
0
    def parse_cell(self, element):
        value = element.find(self.VALUE_TAG)
        if value is not None:
            value = value.text
        formula = element.find(self.FORMULA_TAG)
        data_type = element.get('t', 'n')
        coordinate = element.get('r')
        self._col_count += 1
        style_id = element.get('s')

        # assign formula to cell value unless only the data is desired
        if formula is not None and not self.data_only:
            data_type = 'f'
            if formula.text:
                value = "=" + formula.text
            else:
                value = "="
            formula_type = formula.get('t')
            if formula_type:
                if formula_type != "shared":
                    self.ws.formula_attributes[coordinate] = dict(
                        formula.attrib)

                else:
                    si = formula.get(
                        'si')  # Shared group index for shared formulas

                    # The spec (18.3.1.40) defines shared formulae in
                    # terms of the following:
                    #
                    # `master`: "The first formula in a group of shared
                    #            formulas"
                    # `ref`: "Range of cells which the formula applies
                    #        to." It's a required attribute on the master
                    #        cell, forbidden otherwise.
                    # `shared cell`: "A cell is shared only when si is
                    #                 used and t is `shared`."
                    #
                    # Whether to use the cell's given formula or the
                    # master's depends on whether the cell is shared,
                    # whether it's in the ref, and whether it defines its
                    # own formula, as follows:
                    #
                    #  Shared?   Has formula? | In ref    Not in ref
                    # ========= ==============|======== ===============
                    #   Yes          Yes      | master   impl. defined
                    #    No          Yes      |  own         own
                    #   Yes           No      | master   impl. defined
                    #    No           No      |  ??          N/A
                    #
                    # The ?? is because the spec is silent on this issue,
                    # though my inference is that the cell does not
                    # receive a formula at all.
                    #
                    # For this implementation, we are using the master
                    # formula in the two "impl. defined" cases and no
                    # formula in the "??" case. This choice of
                    # implementation allows us to disregard the `ref`
                    # parameter altogether, and does not require
                    # computing expressions like `C5 in A1:D6`.
                    # Presumably, Excel does not generate spreadsheets
                    # with such contradictions.
                    if si in self.shared_formula_masters:
                        trans = self.shared_formula_masters[si]
                        value = trans.translate_formula(coordinate)
                    else:
                        self.shared_formula_masters[si] = Translator(
                            value, coordinate)

        style_array = None
        if style_id is not None:
            style_id = int(style_id)
            style_array = self.styles[style_id]

        if coordinate:
            row, column = coordinate_to_tuple(coordinate)
        else:
            row, column = self._row_count, self._col_count

        cell = Cell(self.ws, row=row, col_idx=column, style_array=style_array)
        self.ws._cells[(row, column)] = cell

        if value is not None:
            if data_type == 'n':
                value = _cast_number(value)
                if is_date_format(cell.number_format):
                    data_type = 'd'
                    value = from_excel(value, self.epoch)

            elif data_type == 'b':
                value = bool(int(value))
            elif data_type == 's':
                value = self.shared_strings[int(value)]
            elif data_type == 'str':
                data_type = 's'
            elif data_type == 'd':
                value = from_ISO8601(value)

        else:
            if data_type == 'inlineStr':
                child = element.find(self.INLINE_STRING)
                if child is not None:
                    data_type = 's'
                    richtext = Text.from_tree(child)
                    value = richtext.content

        if self.guess_types or value is None:
            cell.value = value
        else:
            cell._value = value
            cell.data_type = data_type
예제 #14
0
파일: worksheet.py 프로젝트: cloudera/hue
    def parse_cell(self, element):
        value = element.find(self.VALUE_TAG)
        if value is not None:
            value = value.text
        formula = element.find(self.FORMULA_TAG)
        data_type = element.get('t', 'n')
        coordinate = element.get('r')
        self._col_count += 1
        style_id = element.get('s')

        # assign formula to cell value unless only the data is desired
        if formula is not None and not self.data_only:
            data_type = 'f'
            if formula.text:
                value = "=" + formula.text
            else:
                value = "="
            formula_type = formula.get('t')
            if formula_type:
                if formula_type != "shared":
                    self.ws.formula_attributes[coordinate] = dict(formula.attrib)

                else:
                    si = formula.get('si')  # Shared group index for shared formulas

                    # The spec (18.3.1.40) defines shared formulae in
                    # terms of the following:
                    #
                    # `master`: "The first formula in a group of shared
                    #            formulas"
                    # `ref`: "Range of cells which the formula applies
                    #        to." It's a required attribute on the master
                    #        cell, forbidden otherwise.
                    # `shared cell`: "A cell is shared only when si is
                    #                 used and t is `shared`."
                    #
                    # Whether to use the cell's given formula or the
                    # master's depends on whether the cell is shared,
                    # whether it's in the ref, and whether it defines its
                    # own formula, as follows:
                    #
                    #  Shared?   Has formula? | In ref    Not in ref
                    # ========= ==============|======== ===============
                    #   Yes          Yes      | master   impl. defined
                    #    No          Yes      |  own         own
                    #   Yes           No      | master   impl. defined
                    #    No           No      |  ??          N/A
                    #
                    # The ?? is because the spec is silent on this issue,
                    # though my inference is that the cell does not
                    # receive a formula at all.
                    #
                    # For this implementation, we are using the master
                    # formula in the two "impl. defined" cases and no
                    # formula in the "??" case. This choice of
                    # implementation allows us to disregard the `ref`
                    # parameter altogether, and does not require
                    # computing expressions like `C5 in A1:D6`.
                    # Presumably, Excel does not generate spreadsheets
                    # with such contradictions.
                    if si in self.shared_formula_masters:
                        trans = self.shared_formula_masters[si]
                        value = trans.translate_formula(coordinate)
                    else:
                        self.shared_formula_masters[si] = Translator(value, coordinate)


        style_array = None
        if style_id is not None:
            style_id = int(style_id)
            style_array = self.styles[style_id]

        if coordinate:
            row, column = coordinate_to_tuple(coordinate)
        else:
            row, column = self._row_count, self._col_count

        cell = Cell(self.ws, row=row, col_idx=column, style_array=style_array)
        self.ws._cells[(row, column)] = cell

        if value is not None:
            if data_type == 'n':
                value = _cast_number(value)
                if is_date_format(cell.number_format):
                    data_type = 'd'
                    value = from_excel(value)
            elif data_type == 'b':
                value = bool(int(value))
            elif data_type == 's':
                value = self.shared_strings[int(value)]
            elif data_type == 'str':
                data_type = 's'
            elif data_type == 'd':
                value = from_ISO8601(value)

        else:
            if data_type == 'inlineStr':
                child = element.find(self.INLINE_STRING)
                if child is not None:
                    data_type = 's'
                    richtext = Text.from_tree(child)
                    value = richtext.content

        if self.guess_types or value is None:
            cell.value = value
        else:
            cell._value = value
            cell.data_type = data_type
예제 #15
0
 def rich_content(cls, value):
     return Text(r=value).content