Example #1
0
def norm_date(d, prefer_format):
    "handle some creative thinking about what constitutes a date"
    d = d.replace('.', '-').strip()

    formats = [
        '%Y-%m-%d', '%b-%d-%Y', '%m/%d/%Y', '%d/%m/%Y', '%B %d %Y', '%B %d %y',
        '%d %b %y', '%Y/%m/%d', '%d %B, %y', '%d %b-%y', '%d %B-%y',
        '%d %B, %Y', '%d-%b-%y', '%B %d %Y', '%d / %m / %Y', '%d//%m/%Y'
    ]
    if prefer_format:
        formats.insert(0, prefer_format)

    if not prefer_format:
        try:
            datetime.strptime(d, '%m/%d/%Y')
            datetime.strptime(d, '%d/%m/%Y')
        except ValueError:
            pass
        else:
            raise ValueError

    for fmt in formats:
        try:
            return datetime.strptime(d, fmt)
        except ValueError:
            pass
    return from_excel(int(d))
Example #2
0
    def _parse_value(self, element, data_type, value, style_id):
        if value is not None:
            if data_type == 'n':
                value = _cast_number(value)
                if style_id in self.date_formats:
                    data_type = 'd'
                    try:
                        value = from_excel(value, self.epoch)
                    except ValueError:
                        msg = """Cell {0} is marked as a date but the serial value {1} is outside the limits for dates. The cell will be treated as an error.""".format(coordinate, value)
                        warn(msg)
                        data_type = "e"
                        value = "#VALUE!"
            elif data_type == 's':
                value = self.shared_strings[int(value)]
            elif data_type == 'b':
                value = bool(int(value))
            elif data_type == "str":
                data_type = "s"
            elif data_type == 'd':
                value = from_ISO8601(value)

        elif data_type == 'inlineStr':
                child = element.find(INLINE_STRING)
                if child is not None:
                    data_type = 's'
                    richtext = Text.from_tree(child)
                    value = richtext.content

        return (data_type, value)
        
Example #3
0
 def value(self):
     """Get or set the value held in the cell.
         ':rtype: depends on the value (string, float, int or '
         ':class:`datetime.datetime`)'"""
     value = self._value
     if value is not None and self.is_date:
         value = from_excel(value, self.base_date)
     return value
Example #4
0
 def value(self):
     """Get or set the value held in the cell.
         ':rtype: depends on the value (string, float, int or '
         ':class:`datetime.datetime`)'"""
     value = self._value
     if value is not None and self.is_date:
         value = from_excel(value, self.base_date)
     return value
Example #5
0
    def parse_cell(self, element):
        data_type = element.get('t', 'n')
        coordinate = element.get('r')
        self.col_counter += 1
        style_id = element.get('s', 0)
        if style_id:
            style_id = int(style_id)

        if data_type == "inlineStr":
            value = None
        else:
            value = element.findtext(VALUE_TAG, None) or None

        if coordinate:
            row, column = coordinate_to_tuple(coordinate)
        else:
            row, column = self.row_counter, self.col_counter

        if not self.data_only and element.find(FORMULA_TAG) is not None:
            data_type = 'f'
            value = self.parse_formula(element)

        elif value is not None:
            if data_type == 'n':
                value = _cast_number(value)
                if style_id in self.date_formats:
                    data_type = 'd'
                    try:
                        value = from_excel(value, self.epoch)
                    except ValueError:
                        msg = """Cell {0} is marked as a date but the serial value {1} is outside the limits for dates. The cell will be treated as an error.""".format(
                            coordinate, value)
                        warn(msg)
                        data_type = "e"
                        value = "#VALUE!"
            elif data_type == 's':
                value = self.shared_strings[int(value)]
            elif data_type == 'b':
                value = bool(int(value))
            elif data_type == "str":
                data_type = "s"
            elif data_type == 'd':
                value = from_ISO8601(value)

        elif data_type == 'inlineStr':
            child = element.find(INLINE_STRING)
            if child is not None:
                data_type = 's'
                richtext = Text.from_tree(child)
                value = richtext.content

        return {
            'row': row,
            'column': column,
            'value': value,
            'data_type': data_type,
            'style_id': style_id
        }
def norm_date(d):
    "handle some creative thinking about what constitutes a date"
    d = d.replace('.', '-').strip()
    for fmt in ['%Y-%m-%d', '%b-%d-%Y', '%m/%d/%Y']:
        try:
            return datetime.strptime(d, fmt)
        except ValueError:
            pass
    return from_excel(int(d))
def norm_date(d):
    "handle some creative thinking about what constitutes a date"
    d = d.replace('.', '-').strip()
    for fmt in ['%Y-%m-%d', '%b-%d-%Y', '%m/%d/%Y']:
        try:
            return datetime.strptime(d, fmt)
        except ValueError:
            pass
    return from_excel(int(d))
    def parse_cell(self, element):
        data_type = element.get('t', 'n')
        coordinate = element.get('r')
        self.max_column += 1
        style_id = element.get('s', 0)
        if style_id is not None:
            style_id = int(style_id)

        if data_type == "inlineStr":
            value = None
        else:
            value = element.findtext(VALUE_TAG)

        if coordinate:
            row, column = coordinate_to_tuple(coordinate)
        else:
            row, column = self.max_row, self.max_column

        if not self.data_only and element.find(FORMULA_TAG) is not None:
            data_type = 'f'
            value = self.parse_formula(element)

        elif value is not None:
            if data_type == 'n':
                value = _cast_number(value)
                if style_id in self.date_formats:
                    data_type = 'd'
                    value = from_excel(value, self.epoch)
            elif data_type == 's':
                value = self.shared_strings[int(value)]
            elif data_type == 'b':
                value = bool(int(value))
            elif data_type == "str":
                try:
                    value = _cast_number(value)
                    data_type = "n"
                except ValueError:
                    data_type = "s"
            elif data_type == 'd':
                value = from_ISO8601(value)

        elif data_type == 'inlineStr':
            child = element.find(INLINE_STRING)
            if child is not None:
                data_type = 's'
                richtext = Text.from_tree(child)
                value = richtext.content

        return {
            'row': row,
            'column': column,
            'value': value,
            'data_type': data_type,
            'style_id': style_id
        }
Example #9
0
 def _make_cell_value(self, cell):
     if isinstance(cell.value, datetime):
         if cell._value == 0 and from_excel(0) == datetime(1899, 12, 30, 0, 0):
             # openpyxl has a bug that treats '12:00:00 AM'
             # as 0 seconds from the 'Windows Epoch' of 1899-12-30
             return time(0, 0)
         elif cell.value.time() == time(0, 0):
             return cell.value.date()
         else:
             return cell.value
     return cell.value
Example #10
0
def date_format(d):
    "handle some creative thinking about what constitutes a date"
    d = d.replace('.', '-').strip()
    try:
        datetime.strptime(d, '%m/%d/%Y')
        datetime.strptime(d, '%d/%m/%Y')
    except ValueError:
        pass
    else:
        raise ValueError

    for fmt in [
            '%Y-%m-%d', '%b-%d-%Y', '%m/%d/%Y', '%d/%m/%Y', '%B %d %Y',
            '%B %d %y'
    ]:
        try:
            datetime.strptime(d, fmt)
            return fmt
        except ValueError:
            pass
    from_excel(int(d))
    return 'excel'
Example #11
0
 def value(self):
     if self._value is None:
         return
     if self.data_type == 'n':
         if is_date_format(self.number_format):
             return from_excel(self._value, self.base_date)
         return self._value
     if self.data_type == 'b':
         return self._value == '1'
     elif self.data_type in(Cell.TYPE_INLINE, Cell.TYPE_FORMULA_CACHE_STRING):
         return unicode(self._value)
     elif self.data_type == 's':
         return unicode(self.shared_strings[int(self._value)])
     return self._value
Example #12
0
 def value(self):
     if self._value is None:
         return
     if self.data_type == 'n':
         if is_date_format(self.number_format):
             return from_excel(self._value, self.base_date)
         return self._value
     if self.data_type == 'b':
         return self._value == '1'
     elif self.data_type in(Cell.TYPE_INLINE, Cell.TYPE_FORMULA_CACHE_STRING):
         return unicode(self._value)
     elif self.data_type == 's':
         return unicode(self.shared_strings[int(self._value)])
     return self._value
Example #13
0
def norm_date(d):
    "handle some creative thinking about what constitutes a date"
    d = d.replace('.', '-').strip()
    if ' [this contract' in d.lower():
        d = d.lower().split(' [this contract')[0]
    try:
        datetime.strptime(d, '%m/%d/%Y')
        datetime.strptime(d, '%d/%m/%Y')
    except ValueError:
        pass
    else:
        raise ValueError('ambiguous')
    for fmt in DATE_FORMATS:
        try:
            return datetime.strptime(d, fmt)
        except ValueError:
            pass
    return from_excel(int(d))
Example #14
0
    def parse_cell(self, element):
        value = element.find(self.VALUE_TAG)
        if value is not None:
            value = value.text
        formula = element.find(self.FORMULA_TAG)
        data_type = element.get('t', 'n')
        coordinate = element.get('r')
        self._col_count += 1
        style_id = element.get('s')

        # assign formula to cell value unless only the data is desired
        if formula is not None and not self.data_only:
            data_type = 'f'
            if formula.text:
                value = "=" + formula.text
            else:
                value = "="
            formula_type = formula.get('t')
            if formula_type:
                if formula_type != "shared":
                    self.ws.formula_attributes[coordinate] = dict(
                        formula.attrib)

                else:
                    si = formula.get(
                        'si')  # Shared group index for shared formulas

                    # The spec (18.3.1.40) defines shared formulae in
                    # terms of the following:
                    #
                    # `master`: "The first formula in a group of shared
                    #            formulas"
                    # `ref`: "Range of cells which the formula applies
                    #        to." It's a required attribute on the master
                    #        cell, forbidden otherwise.
                    # `shared cell`: "A cell is shared only when si is
                    #                 used and t is `shared`."
                    #
                    # Whether to use the cell's given formula or the
                    # master's depends on whether the cell is shared,
                    # whether it's in the ref, and whether it defines its
                    # own formula, as follows:
                    #
                    #  Shared?   Has formula? | In ref    Not in ref
                    # ========= ==============|======== ===============
                    #   Yes          Yes      | master   impl. defined
                    #    No          Yes      |  own         own
                    #   Yes           No      | master   impl. defined
                    #    No           No      |  ??          N/A
                    #
                    # The ?? is because the spec is silent on this issue,
                    # though my inference is that the cell does not
                    # receive a formula at all.
                    #
                    # For this implementation, we are using the master
                    # formula in the two "impl. defined" cases and no
                    # formula in the "??" case. This choice of
                    # implementation allows us to disregard the `ref`
                    # parameter altogether, and does not require
                    # computing expressions like `C5 in A1:D6`.
                    # Presumably, Excel does not generate spreadsheets
                    # with such contradictions.
                    if si in self.shared_formula_masters:
                        trans = self.shared_formula_masters[si]
                        value = trans.translate_formula(coordinate)
                    else:
                        self.shared_formula_masters[si] = Translator(
                            value, coordinate)

        style_array = None
        if style_id is not None:
            style_id = int(style_id)
            style_array = self.styles[style_id]

        if coordinate:
            row, column = coordinate_to_tuple(coordinate)
        else:
            row, column = self._row_count, self._col_count

        cell = Cell(self.ws, row=row, col_idx=column, style_array=style_array)
        self.ws._cells[(row, column)] = cell

        if value is not None:
            if data_type == 'n':
                value = _cast_number(value)
                if is_date_format(cell.number_format):
                    data_type = 'd'
                    value = from_excel(value, self.epoch)

            elif data_type == 'b':
                value = bool(int(value))
            elif data_type == 's':
                value = self.shared_strings[int(value)]
            elif data_type == 'str':
                data_type = 's'
            elif data_type == 'd':
                value = from_ISO8601(value)

        else:
            if data_type == 'inlineStr':
                child = element.find(self.INLINE_STRING)
                if child is not None:
                    data_type = 's'
                    richtext = Text.from_tree(child)
                    value = richtext.content

        if self.guess_types or value is None:
            cell.value = value
        else:
            cell._value = value
            cell.data_type = data_type
    def validate(self, value):

       if isinstance(value, long):
           value = from_excel(value)

       return DateTimeValidator.DateTimeValidator.validate(self, value);
Example #16
0
    def parse_cell(self, element):
        value = element.find(self.VALUE_TAG)
        if value is not None:
            value = value.text
        formula = element.find(self.FORMULA_TAG)
        data_type = element.get('t', 'n')
        coordinate = element.get('r')
        self._col_count += 1
        style_id = element.get('s')

        # assign formula to cell value unless only the data is desired
        if formula is not None and not self.data_only:
            data_type = 'f'
            if formula.text:
                value = "=" + formula.text
            else:
                value = "="
            formula_type = formula.get('t')
            if formula_type:
                if formula_type != "shared":
                    self.ws.formula_attributes[coordinate] = dict(formula.attrib)

                else:
                    si = formula.get('si')  # Shared group index for shared formulas

                    # The spec (18.3.1.40) defines shared formulae in
                    # terms of the following:
                    #
                    # `master`: "The first formula in a group of shared
                    #            formulas"
                    # `ref`: "Range of cells which the formula applies
                    #        to." It's a required attribute on the master
                    #        cell, forbidden otherwise.
                    # `shared cell`: "A cell is shared only when si is
                    #                 used and t is `shared`."
                    #
                    # Whether to use the cell's given formula or the
                    # master's depends on whether the cell is shared,
                    # whether it's in the ref, and whether it defines its
                    # own formula, as follows:
                    #
                    #  Shared?   Has formula? | In ref    Not in ref
                    # ========= ==============|======== ===============
                    #   Yes          Yes      | master   impl. defined
                    #    No          Yes      |  own         own
                    #   Yes           No      | master   impl. defined
                    #    No           No      |  ??          N/A
                    #
                    # The ?? is because the spec is silent on this issue,
                    # though my inference is that the cell does not
                    # receive a formula at all.
                    #
                    # For this implementation, we are using the master
                    # formula in the two "impl. defined" cases and no
                    # formula in the "??" case. This choice of
                    # implementation allows us to disregard the `ref`
                    # parameter altogether, and does not require
                    # computing expressions like `C5 in A1:D6`.
                    # Presumably, Excel does not generate spreadsheets
                    # with such contradictions.
                    if si in self.shared_formula_masters:
                        trans = self.shared_formula_masters[si]
                        value = trans.translate_formula(coordinate)
                    else:
                        self.shared_formula_masters[si] = Translator(value, coordinate)


        style_array = None
        if style_id is not None:
            style_id = int(style_id)
            style_array = self.styles[style_id]

        if coordinate:
            row, column = coordinate_to_tuple(coordinate)
        else:
            row, column = self._row_count, self._col_count

        cell = Cell(self.ws, row=row, col_idx=column, style_array=style_array)
        self.ws._cells[(row, column)] = cell

        if value is not None:
            if data_type == 'n':
                value = _cast_number(value)
                if is_date_format(cell.number_format):
                    data_type = 'd'
                    value = from_excel(value)
            elif data_type == 'b':
                value = bool(int(value))
            elif data_type == 's':
                value = self.shared_strings[int(value)]
            elif data_type == 'str':
                data_type = 's'
            elif data_type == 'd':
                value = from_ISO8601(value)

        else:
            if data_type == 'inlineStr':
                child = element.find(self.INLINE_STRING)
                if child is not None:
                    data_type = 's'
                    richtext = Text.from_tree(child)
                    value = richtext.content

        if self.guess_types or value is None:
            cell.value = value
        else:
            cell._value = value
            cell.data_type = data_type
    def validate(self, value):

       if isinstance(value, long):
           value = from_excel(value)

       return DateTimeValidator.DateTimeValidator.validate(self, value)
Example #18
0
def perform_number_format(value, number_format):
    """This is a half-baked attempt at formatting the given
    value using the given Excel number_format.  This is used by
    the tests to match values.  Handled is many of the formats for
    numbers (int/float), datetime, date, time, and timedelta."""

    if number_format == 'General' or isinstance(value, str):
        return value
    if number_format == '@':
        return str(value)
    grabit = []
    def grab_escapes(number_format):
        nonlocal grabit
        def sub_grabit(m):
            i = len(grabit)
            grabit.append(m.group(1))
            return f'{{{i}}}'
        nf = re.sub(r'\\(.)', sub_grabit, number_format)
        nf = re.sub(r'"([^"]*)"', sub_grabit, nf)
        nf = re.sub(r'\[(hh|h|mm|m|ss|s)\]', r'<\1>', nf)   # So we don't match the next rule with [h]
        nf = re.sub(r'\[[^\]]+\]', '', nf)   # Remove [Blue], [$-F800], etc
        nf = re.sub(r'<(hh|h|mm|m|ss|s)>', r'[\1]', nf)   # Put back the [h] etc
        return nf

    def restore_escapes(nf):
        nonlocal grabit
        if len(grabit):
            nf = nf.format(*grabit)       # Put escaped chars back in
        return nf

    if TRACE:
        print(f'perform_number_format({value}, {number_format})')
    if (isinstance(value, int) or isinstance(value, float)) and is_date_format(number_format):
        if '[h' in number_format or '[m' in number_format or '[s' in number_format:
            value = timedelta(days=value)
        else:
            value = from_excel(value)
    if isinstance(value, int) or isinstance(value, float):
        # Note: This is NOT a full implementation of Excel int/float number formatting!
        format_split = number_format.split(';')
        number_format = format_split[0]
        prefix = ''
        suffix = ''
        if value < 0 and len(format_split) >= 2:
            number_format = format_split[1]
            value = abs(value)
        elif value == 0 and len(format_split) >= 3:
            number_format = format_split[2]
        if not number_format:
            return ''
        nf = grab_escapes(number_format)
        fmt = 'f'
        if isinstance(value, int):
            fmt = 'd'
        if '%' in nf:
            fmt = '%'
        elif 'E' in nf:
            fmt = 'E'
            nf = re.sub(r'E[+0#?]+', 'E', nf)
        comma = ''
        pound = ''
        c_ndx = nf.find(',')
        d_ndx = nf.find('.')
        p_ndx = nf.find('#')
        if c_ndx >= 0:
            if d_ndx >=0 and c_ndx > d_ndx:
                while c_ndx < len(nf):
                    value /= 100
                    c_ndx = nf.find(',', c_ndx+1)
            else:
                comma = ','
        places = ''
        if d_ndx >= 0:
            if p_ndx > d_ndx:
                pound = '#'
                nf = nf.replace('#', '0')
            places = f'.{nf[d_ndx+1:].count("0")}'
            if fmt == 'd':
                value = float(value)
                fmt = 'f'
        elif fmt == 'd':
            zeros = nf.count('0')
            if zeros:
                fmt = f'0{zeros}' + fmt
        else:
            places = '.0'
        nf = re.sub(r'_.', ' ', nf)
        nf = nf.replace('*', '')        # We can't really do this one
        m = re.match(r'((?:[^0#.E%,?*]*{\d+}[^0#.E%,?*]*)|(?:[^0#.E%,?*]*))[0#.E%,?*]+(.*[/][0-9?#]+)?(.*)$', nf)
        prefix = restore_escapes(m.group(1))
        fraction = m.group(2)
        suffix = restore_escapes(m.group(3))
        if fraction:
            s_ndx = fraction.find('/')
            suf = restore_escapes(fraction[:s_ndx]).replace('?', '').replace('#', '').replace('0', '')
            fraction = fraction[s_ndx+1:]
            if fraction.isdigit():
                ival = int(value)
                value -= ival
                suf += f'{value//int(fraction)}/{fraction}'
                value = ival
            if fraction[0] != '?' or float(int(value)) != value:
                ival = int(value)
                value -= ival
                fr = Fraction.from_float(value).limit_denominator(10**(len(fraction))-1)
                suf += f'{fr.numerator}/{fr.denominator}'
            suffix = suf + suffix

        py_format = f'{prefix}{{0:{pound}{comma}{places}{fmt}}}{suffix}'
        value = py_format.format(value)
        if TRACE:
            print(f'perform_number_format: using {py_format} to produce {value}')
        return value

    number_format = number_format.split(';')[0]
    if isinstance(value, tm):
        value = datetime(1, 1, 1, value.hour, value.minute, value.second)
    elif isinstance(value, date) and not isinstance(value, datetime):
        value = datetime(value.year, value.month, value.day)
    if isinstance(value, datetime) and \
      ('[h' in number_format or '[m' in number_format or '[s' in number_format):
        value = timedelta(hours=value.hour, minutes=value.minute, 
                          seconds=value.second + value.microsecond / 1000000.0)
    if isinstance(value, timedelta):
        total_seconds = int(value.total_seconds())
        hours = total_seconds // 3600
        total_minutes = total_seconds // 60
        minutes = (total_seconds % 3600) // 60
        seconds = total_seconds % 60
        nf = grab_escapes(number_format)
        nf = nf.replace('[hh]', f'{hours:02d}').replace('[mm]', f'{total_minutes:02d}'). \
                replace('[ss]', f'{total_seconds:02d}').replace('[h]', str(hours)). \
                replace('[m]', str(total_minutes)).replace('[s]', str(total_seconds)). \
                replace('mm', f'{minutes:02d}').replace('ss', f'{seconds:02d}'). \
                replace('m', str(minutes)).replace('s', str(seconds))
        nf = restore_escapes(nf)
        value = nf
        if TRACE:
            print(f'perform_number_format: timedelta produced {value} (grabit = {grabit})')
    if isinstance(value, datetime):
        if value.microsecond >= 500000:        # Round up 999999 ms to the next second
            value = value.replace(microsecond=0) + timedelta(seconds=1)
        fmt = grab_escapes(number_format)
        fmt = fmt.replace('yyyy', '%Y').replace('yy', '%y').replace('dddd', '%A').replace('ddd', '%a'). \
                replace('dd', '%D').replace('mmmm', '%B').replace('mmm', '%b').replace('AM/PM', '%p'). \
                replace('ss', '%S')
        h_ndx = fmt.find('h')
        if '%p' in fmt:
            fmt = fmt.replace('hh', '%I')
        else:
            fmt = fmt.replace('hh', '%H')
        # Now let's handle the hard ones: mm, m, d, h, a/p
        ap_ndx = fmt.find('a/p')
        if ap_ndx >= 0:
            fmt = fmt.replace('a/p', '%p')
        while True:
            m_ndx = fmt.find('mm')
            if m_ndx < 0:
                break
            if h_ndx >= 0 and m_ndx > h_ndx: # it's minutes
                fmt = fmt[:m_ndx] + '%M' + fmt[m_ndx+2:]
                continue
            fmt = fmt[:m_ndx] + '%X' + fmt[m_ndx+2:]    # it's months (corrected below)
        while True:
            m_ndx = fmt.find('m')
            if m_ndx < 0:
                break
            if h_ndx >= 0 and m_ndx > h_ndx: # it's minutes
                fmt = fmt[:m_ndx] + str(value.minute) + fmt[m_ndx+1:]
                continue
            fmt = fmt[:m_ndx] + str(value.month) + fmt[m_ndx+1:]    # it's months
        d_ndx = fmt.find('d')
        if d_ndx >= 0:
            fmt = fmt.replace('d', str(value.day))
        if h_ndx >= 0:
            if '%p' in fmt:
                hour = value.hour
                if hour > 12:
                    hour -= 12
                if hour == 0:
                    hour = 12
                fmt = fmt.replace('h', str(hour))
            else:
                fmt = fmt.replace('h', str(value.hour))

        fmt = fmt.replace('%D', '%d').replace('%X', '%m')
        fmt = restore_escapes(fmt)
        value = value.strftime(fmt)
        if ap_ndx >= 0:
            value = value.replace('AM', 'a').replace('PM', 'p')
        if TRACE:
            print(f'perform_number_format: using {fmt} to produce {value} (grabit={grabit})')
    return value