def test_time_as_tuple(self): time = xldate.xldate_as_tuple(.273611, DATEMODE) self.assertEqual(time, (0, 0, 0, 6, 34, 0)) time = xldate.xldate_as_tuple(.538889, DATEMODE) self.assertEqual(time, (0, 0, 0, 12, 56, 0)) time = xldate.xldate_as_tuple(.741123, DATEMODE) self.assertEqual(time, (0, 0, 0, 17, 47, 13))
def test_date_as_tuple(self): date = xldate.xldate_as_tuple(2741., DATEMODE) self.assertEqual(date, (1907, 7, 3, 0, 0, 0)) date = xldate.xldate_as_tuple(38406., DATEMODE) self.assertEqual(date, (2005, 2, 23, 0, 0, 0)) date = xldate.xldate_as_tuple(32266., DATEMODE) self.assertEqual(date, (1988, 5, 3, 0, 0, 0))
def test_time_as_tuple(): time = xldate.xldate_as_tuple(.273611, DATEMODE) assert time == (0, 0, 0, 6, 34, 0) time = xldate.xldate_as_tuple(.538889, DATEMODE) assert time == (0, 0, 0, 12, 56, 0) time = xldate.xldate_as_tuple(.741123, DATEMODE) assert time == (0, 0, 0, 17, 47, 13)
def test_date_as_tuple(): date = xldate.xldate_as_tuple(2741., DATEMODE) assert date == (1907, 7, 3, 0, 0, 0) date = xldate.xldate_as_tuple(38406., DATEMODE) assert date == (2005, 2, 23, 0, 0, 0) date = xldate.xldate_as_tuple(32266., DATEMODE) assert date == (1988, 5, 3, 0, 0, 0)
def setAttendaceStatus(c1, c2, date, userRec): date = str(int(date)) # date is float time10Clock = strToTime(monthStr + date + ' ' + '10:00:00') time12Clock = strToTime(monthStr + date + ' ' + '12:00:00') time13Clock = strToTime(monthStr + date + ' ' + '13:00:00') time18Clock = strToTime(monthStr + date + ' ' + '18:00:00') # if out in morning and back in afternoon, if offTime < 18:30, it's a early time1830Clock = strToTime(monthStr + date + ' ' + '18:30:00') time19Clock = strToTime(monthStr + date + ' ' + '19:00:00') if(isinstance(c1, float)): # use column date to be compatible with '09:11:11' t1 = xldate_as_tuple(c1, 0) c1 = strToTime(monthStr + date + ' ' + str(t1[3]) + ':' + str(t1[4]) + ':' + str(t1[5])) if(isinstance(c2, float)): t2 = xldate_as_tuple(c2, 0) c2 = strToTime(monthStr + date + ' ' + str(t2[3]) + ':' + str(t2[4]) + ':' + str(t2[5])) # if(isinstance(c1, datetime)): # time1Str = c1.strftime('%Y/%m/%d %H:%M:%S') # if(isinstance(c2, datetime)): # time2Str = c2.strftime('%Y/%m/%d %H:%M:%S') if(isinstance(c1, datetime)): if(isinstance(c2, datetime)): userRec.totalTime += (c2 - c1).total_seconds() - 3600 # - 1 hour lunch time if((c1 - time10Clock).total_seconds() >= 0): userRec.late += 1 flag18Clock = c2 and ((c2 - time18Clock).total_seconds() < 0) # early flag19Clock = c2 and ((c2 - time19Clock).total_seconds() >= 0) # must not early flag9Hours = c1 and c2 and ((c2 - c1).total_seconds() < WORKSECONDS) #early if((not c2) or flag18Clock or (not flag19Clock and flag9Hours)): userRec.early += 1 # userRec.recDict[date] = [time1Str, time2Str] elif(isinstance(c2, unicode)): if(stateDictCn2En.has_key(c2)): prop = stateDictCn2En[c2] setattr(userRec, prop, getattr(userRec, prop) + 1) if(c2 == STATE_GAME): userRec.totalTime += WORKSECONDS_STD else: userRec.totalTime += (time12Clock - c2).total_seconds() elif(isinstance(c1, unicode)): if(stateDictCn2En.has_key(c1)): prop = stateDictCn2En[c1] setattr(userRec, prop, getattr(userRec, prop) + 1) if(c2 and stateDictCn2En.has_key(c2)): prop = stateDictCn2En[c2] setattr(userRec, prop, getattr(userRec, prop) + 1) elif(isinstance(c2, datetime)): if(prop == STATE_OUT): if((c2 - time1830Clock).total_seconds() < 0): userRec.early += 1 userRec.totalTime += (c2 - time13Clock).total_seconds()
def parse_excel_cell(self, book, cell, column_name): """Return (AccountEntry attr, value) or None. Raise an exception in the event of a parse error. """ if column_name in ('date', 'entry date', 'entry_date'): if isinstance(cell.value, (int, float)): tup = xldate_as_tuple(cell.value, book.datemode) parsed = datetime.date(*tup[:3]) else: parsed = dateutil.parser.parse(str(cell.value).strip()).date() return 'entry_date', parsed if column_name in ('amount', 'delta'): parsed = parse_amount(str(cell.value).strip(), currency=self.currency) return 'delta', parsed if column_name in ('description', 'desc'): return 'description', str(cell.value).strip() if column_name == 'sign': v = str(cell.value).strip().lower() # Treat it as a liability account: credit = increase. if v in ('+', 'c', 'cr', 'credit', 'deposit'): return 'sign', 1 elif v in ('-', 'd', 'dr', 'debit', 'withdrawal'): return 'sign', -1 return None
def __init__(self, row, fieldnames=None, book=None): super(Row, self).__init__() for fieldname, cell in zip(fieldnames, row): if cell.ctype == 3: self[fieldname] = datetime( *xldate_as_tuple(cell.value, book.datemode)) else: self[fieldname] = cell.value
def clean(cell): if cell.ctype == 3: # todo: get feedback on date formatting parts = xldate_as_tuple(cell.value, 0)[0:3] dt = datetime(*parts) val = dt.strftime(date_format) return val elif cell.ctype == 2: return (int(cell.value)) else: return cell.value
def convert_cell_to_time_to_float(self, time): year, month, day, hour, minute, second = xldate.xldate_as_tuple( time, 0) #py_date = datetime.datetime(year, month, day, hour, minute) #logging.info(py_date) my_time_old = datetime.time(hour // 3600, (minute % 3600) / 60, second % 60) my_time = hour + minute / 60.0 #datetime.time(hour // 3600, (minute % 3600) / 60, second % 60) logging.info('-------------old--------------') logging.info(my_time_old) logging.info('-------------new--------------') logging.info(my_time) return my_time
def _parse_cell(cell_contents, cell_typ): """ converts the contents of the cell into a pandas appropriate object """ if cell_typ == XL_CELL_DATE: # Use the newer xlrd datetime handling. try: cell_contents_orgin = cell_contents cell_contents = \ xldate.xldate_as_datetime(cell_contents, epoch1904) # distinguish between dates and time in mysql try: cell_contents_tuple = xldate.xldate_as_tuple(cell_contents_orgin, epoch1904) if cell_contents_tuple[3:6] == (0, 0, 0): cell_contents = cell_contents.date() except: cell_contents = \ xldate.xldate_as_datetime(cell_contents_orgin, epoch1904) except OverflowError: return cell_contents # Excel doesn't distinguish between dates and time, # so we treat dates on the epoch as times only. # Also, Excel supports 1900 and 1904 epochs. year = (cell_contents.timetuple())[0:3] if (not epoch1904 and year == (1899, 12, 31)) or ( epoch1904 and year == (1904, 1, 1) ): cell_contents = time( cell_contents.hour, cell_contents.minute, cell_contents.second, cell_contents.microsecond, ) elif cell_typ == XL_CELL_ERROR: cell_contents = np.nan elif cell_typ == XL_CELL_BOOLEAN: cell_contents = bool(cell_contents) elif convert_float and cell_typ == XL_CELL_NUMBER: # GH5394 - Excel 'numbers' are always floats # it's a minimal perf hit and less surprising val = int(cell_contents) if val == cell_contents: cell_contents = val return cell_contents
def do_format(engine, resource_id, table_id): table_suffix = '%s_table%s' % (resource_id, table_id) table = sl.get_table(engine, 'spending_%s' % table_suffix) date_field_values = defaultdict(list) for row in sl.all(engine, table): for date_field in DATE_FIELDS: if date_field in row and row[date_field]: date_field_values[date_field].append(row[date_field]) date_field_formats = {} for date_field, values in date_field_values.items(): date_field_formats[date_field] = detect_date_format(values) for row in sl.all(engine, table): for numeric_field in NUMERIC_FIELDS: try: val = row.get(numeric_field) if val is None: continue val = "".join([v for v in val if v in "-.0123456789"]) row[numeric_field + 'AsFloat'] = float(val) except Exception as e: print e for date_field, format_ in date_field_formats.items(): if format_ is None: continue try: if row[date_field] is None: continue if format_ == 'excel': # Deciphers excel dates that have been mangled into integers by formatting errors parsed = datetime(*xldate_as_tuple(float(row[date_field].strip()), 0)) else: parsed = datetime.strptime(row[date_field].strip(), format_) row[date_field + 'AsDate'] = parsed.strftime("%Y-%m-%d") except Exception as e: print e sl.upsert(engine, table, row, ['id'])
def get_seats(sloc: str) -> list: from xlrd.xldate import xldate_as_tuple wb = xlrd.open_workbook(sloc) sheet = wb.sheet_by_name("OO SPX") seats = [] for row in range(1, sheet.nrows): seat = sheet.cell_value(row, 4) MM_acr = sheet.cell_value(row, 2) firm = sheet.cell_value(row, 6) if seat != '': adate = int(sheet.cell_value(row, 0)) adttup = xldate_as_tuple(adate, 0) year, month = adttup[0:2] # print(year, month, seat) # create a two-element list and add to seats list new_elem = [str(year) + "_" + str(month), seat, MM_acr, firm] if new_elem not in seats: seats.append(new_elem) return seats
def importar_(archivo): from xlrd import open_workbook import datetime tabla = "conexiones_por_deuda" book = open_workbook(archivo) sheet = book.sheet_by_index(0) campos = db[tabla]._fields[1:] db[tabla].truncate('RESTART IDENTITY CASCADE') for row in range(4, sheet.nrows): valores = sheet.row_values(row, 1) from xlrd import xldate dict_ = dict(zip(campos, valores)) dict_["servicio"] = dict_["servicio"].replace(" ", "") fecha = xldate.xldate_as_tuple(dict_["fecha_conexion"], 0) dict_["fecha_conexion"] = datetime.datetime(*fecha) db[tabla].insert(**dict_)
def apply(row, field_formats, stats): today = datetime.now() for field, formats in field_formats.items(): try: value = row.get(field) if value in (None, ''): stats[field].add_spending('Empty', row) continue parsed = None # Try parsing for format_ in formats: try: if format_ == 'excel': # Deciphers excel dates that have been mangled into integers by # formatting errors parsed = datetime( *xldate_as_tuple(float(field.strip()), 0)) else: parsed = datetime.strptime(value.strip(), format_) break except Exception, e: pass if not parsed: row[field + 'Formatted'] = None row['valid'] = False stats[field].add_spending('Parse error', row, value) continue # Check it is not in the future - an obvious mistake if parsed > today: row[field + 'Formatted'] = None row['valid'] = False stats[field].add_spending('Date in the future', row, parsed) continue formatted_date = parsed.strftime("%Y-%m-%d") stats[field].add_spending('Parsed ok', row, value) row[field + 'Formatted'] = formatted_date except Exception as e: row[field + 'Formatted'] = None row['valid'] = False stats[field].add_spending('Exception %s' % e.__class__.__name__, row) log.exception(e)
def generate_list(self, book, log_out): sheet = book.sheet_by_index(0) rowIndex = -1 try: for cell in sheet.col(0): rowIndex += 1 cells = sheet.row(rowIndex) row_tmp = [] colIndex = -1 # iterate on the row for cell in cells: if rowIndex > 1: # skip first 2 row colIndex +=1 # user name/surname if (colIndex < LOG_GATE_ACTION): row_tmp.append(cell.value) # entry exit of main gate elif colIndex == LOG_GATE_ACTION: if cell.value == user_entry: row_tmp.append(MAIN_GATE_ENTRY) elif cell.value == user_exit: row_tmp.append(MAIN_GATE_EXIT) else: row_tmp.append(OTHER_GATES) # date - time log elif colIndex == LOG_DATE: try: datetime_value = datetime(*xldate.xldate_as_tuple(cell.value, 0)) row_tmp.append(datetime_value) except ValueError: pass log_out.append(row_tmp) except TypeError: pass book.unload_sheet(0)
def extraer_fec_nac(self, cell): """Extrae la fecha de nacimiento de una celda en el formato correcto""" if not self.text_dates: fec_nac = xldate_as_datetime(cell.value, self.book.datemode) else: try: if cell.ctype == XL_CELL_DATE: tup_aux_fec_nac = xldate_as_tuple(cell.value, self.book.datemode) # print tup_aux_fec_nac tup_fec_nac = [tup_aux_fec_nac[2], tup_aux_fec_nac[1], tup_aux_fec_nac[0]] try: dia = int(tup_fec_nac[self.pos_dia]) mes = int(tup_fec_nac[self.pos_mes]) ano = int(tup_fec_nac[self.pos_ano]) fec_nac = datetime(ano, mes, dia) except ValueError: fec_nac = xldate_as_datetime(cell.value, self.book.datemode) else: # Cell type es texto tup_fec_nac = re.findall('\d+', cell.value) dia = int(tup_fec_nac[self.pos_dia]) mes = int(tup_fec_nac[self.pos_mes]) ano = int(tup_fec_nac[self.pos_ano]) if ano <= datetime.today().year - 100: raise Exception fec_nac = datetime(ano, mes, dia) except Exception: raise return cell.value return fec_nac
def _parse_cell(cell_contents,cell_typ): """converts the contents of the cell into a pandas appropriate object""" if cell_typ == XL_CELL_DATE: if xlrd_0_9_3: # Use the newer xlrd datetime handling. cell_contents = xldate.xldate_as_datetime(cell_contents, epoch1904) # Excel doesn't distinguish between dates and time, # so we treat dates on the epoch as times only. # Also, Excel supports 1900 and 1904 epochs. year = (cell_contents.timetuple())[0:3] if ((not epoch1904 and year == (1899, 12, 31)) or (epoch1904 and year == (1904, 1, 1))): cell_contents = datetime.time(cell_contents.hour, cell_contents.minute, cell_contents.second, cell_contents.microsecond) else: # Use the xlrd <= 0.9.2 date handling. dt = xldate.xldate_as_tuple(cell_contents, epoch1904) if dt[0] < datetime.MINYEAR: cell_contents = datetime.time(*dt[3:]) else: cell_contents = datetime.datetime(*dt) elif cell_typ == XL_CELL_ERROR: cell_contents = np.nan elif cell_typ == XL_CELL_BOOLEAN: cell_contents = bool(cell_contents) elif convert_float and cell_typ == XL_CELL_NUMBER: # GH5394 - Excel 'numbers' are always floats # it's a minimal perf hit and less suprising val = int(cell_contents) if val == cell_contents: cell_contents = val return cell_contents
def apply(row, field_formats, stats): today = datetime.now() for field, formats in field_formats.items(): try: value = row.get(field) if value in (None, ''): stats[field].add_spending('Empty', row) continue parsed = None # Try parsing for format_ in formats: try: if format_ == 'excel': # Deciphers excel dates that have been mangled into integers by # formatting errors parsed = datetime(*xldate_as_tuple(float(field.strip()), 0)) else: parsed = datetime.strptime(value.strip(), format_) break except Exception, e: pass if not parsed: row[field + 'Formatted'] = None row['valid'] = False stats[field].add_spending('Parse error', row, value) continue # Check it is not in the future - an obvious mistake if parsed > today: row[field + 'Formatted'] = None row['valid'] = False stats[field].add_spending('Date in the future', row, parsed) continue formatted_date = parsed.strftime("%Y-%m-%d") stats[field].add_spending('Parsed ok', row, value) row[field + 'Formatted'] = formatted_date except Exception as e: row[field + 'Formatted'] = None row['valid'] = False stats[field].add_spending('Exception %s' % e.__class__.__name__, row) log.exception(e)
def apply(row, formats): today = datetime.now() for field, format_ in formats.items(): try: value = row.get(field) if value is None: continue if format_ == "excel": # Deciphers excel dates that have been mangled into integers by # formatting errors parsed = datetime(*xldate_as_tuple(float(field.strip()), 0)) else: parsed = datetime.strptime(value.strip(), format_) # Check it is not in the future - an obvious mistake if parsed > today: row[field + "Formatted"] = None row["valid"] = False continue row[field + "Formatted"] = parsed.strftime("%Y-%m-%d") except Exception as e: row[field + "Formatted"] = None row["valid"] = False # log.exception(e) return row
def apply(row, formats): today = datetime.now() for field, format_ in formats.items(): try: value = row.get(field) if value is None: continue if format_ == 'excel': # Deciphers excel dates that have been mangled into integers by # formatting errors parsed = datetime(*xldate_as_tuple(float(field.strip()), 0)) else: parsed = datetime.strptime(value.strip(), format_) # Check it is not in the future - an obvious mistake if parsed > today: row[field + 'Formatted'] = None row['valid'] = False continue row[field + 'Formatted'] = parsed.strftime("%Y-%m-%d") except Exception as e: row[field + 'Formatted'] = None row['valid'] = False #log.exception(e) return row
pl.set_title(name) fig.tight_layout() canvas = FigureCanvasTkAgg(fig, master=fr3) canvas.get_tk_widget().pack(side=Tk.BOTTOM, expand=True) toolbar = NavigationToolbar2TkAgg(canvas, fr3) toolbar.update() canvas.draw() df = pd.read_csv("dias_festivos.csv") df2 = pd.read_excel("datos.xlsx") #leyendo los datos df2 = df2.set_index("fecha") #establece la fecha como el index from xlrd.xldate import xldate_as_tuple from datetime import datetime df['fecha_python'] = df['fecha'].apply(lambda x: datetime(*xldate_as_tuple(x, 0))) root = Tk.Tk() root.title("Consumo de energÃa en MWh en dÃas festivos") fr = Tk.Frame(root, borderwidth=5, relief="sunken", width=200, height=550) fr.grid(column = 0, row = 0) fr.pack_propagate(0) fr2 = Tk.Frame(root, borderwidth=5, relief="sunken", width=300, height=550) fr2.grid(column = 1, row = 0) fr2.pack_propagate(0) fr3 = Tk.Frame(root, borderwidth=5, relief="sunken", width=600, height=550) fr3.grid(column = 2, row = 0) fr3.pack_propagate(0)
def _parse_excel(self, sheetname=0, header=0, skiprows=None, skip_footer=0, index_col=None, has_index_names=None, parse_cols=None, parse_dates=False, date_parser=None, na_values=None, thousands=None, chunksize=None, convert_float=True, **kwds): import xlrd from xlrd import (xldate, XL_CELL_DATE, XL_CELL_ERROR, XL_CELL_BOOLEAN, XL_CELL_NUMBER) epoch1904 = self.book.datemode # xlrd >= 0.9.3 can return datetime objects directly. if LooseVersion(xlrd.__VERSION__) >= LooseVersion("0.9.3"): xlrd_0_9_3 = True else: xlrd_0_9_3 = False if isinstance(sheetname, compat.string_types): sheet = self.book.sheet_by_name(sheetname) else: # assume an integer if not a string sheet = self.book.sheet_by_index(sheetname) data = [] should_parse = {} for i in range(sheet.nrows): row = [] for j, (value, typ) in enumerate( zip(sheet.row_values(i), sheet.row_types(i))): if parse_cols is not None and j not in should_parse: should_parse[j] = self._should_parse(j, parse_cols) if parse_cols is None or should_parse[j]: if typ == XL_CELL_DATE: if xlrd_0_9_3: # Use the newer xlrd datetime handling. value = xldate.xldate_as_datetime(value, epoch1904) # Excel doesn't distinguish between dates and time, # so we treat dates on the epoch as times only. # Also, Excel supports 1900 and 1904 epochs. year = (value.timetuple())[0:3] if ((not epoch1904 and year == (1899, 12, 31)) or (epoch1904 and year == (1904, 1, 1))): value = datetime.time(value.hour, value.minute, value.second, value.microsecond) else: # Use the xlrd <= 0.9.2 date handling. dt = xldate.xldate_as_tuple(value, epoch1904) if dt[0] < datetime.MINYEAR: value = datetime.time(*dt[3:]) else: value = datetime.datetime(*dt) elif typ == XL_CELL_ERROR: value = np.nan elif typ == XL_CELL_BOOLEAN: value = bool(value) elif convert_float and typ == XL_CELL_NUMBER: # GH5394 - Excel 'numbers' are always floats # it's a minimal perf hit and less suprising val = int(value) if val == value: value = val row.append(value) data.append(row) if header is not None: data[header] = _trim_excel_header(data[header]) parser = TextParser(data, header=header, index_col=index_col, has_index_names=has_index_names, na_values=na_values, thousands=thousands, parse_dates=parse_dates, date_parser=date_parser, skiprows=skiprows, skip_footer=skip_footer, chunksize=chunksize, **kwds) return parser.read()
def _parse_excel(self, sheetname=0, header=0, skiprows=None, skip_footer=0, index_col=None, has_index_names=None, parse_cols=None, parse_dates=False, date_parser=None, na_values=None, thousands=None, chunksize=None, convert_float=True, **kwds): import xlrd from xlrd import (xldate, XL_CELL_DATE, XL_CELL_ERROR, XL_CELL_BOOLEAN, XL_CELL_NUMBER) epoch1904 = self.book.datemode # xlrd >= 0.9.3 can return datetime objects directly. if LooseVersion(xlrd.__VERSION__) >= LooseVersion("0.9.3"): xlrd_0_9_3 = True else: xlrd_0_9_3 = False if isinstance(sheetname, compat.string_types): sheet = self.book.sheet_by_name(sheetname) else: # assume an integer if not a string sheet = self.book.sheet_by_index(sheetname) data = [] should_parse = {} for i in range(sheet.nrows): row = [] for j, (value, typ) in enumerate(zip(sheet.row_values(i), sheet.row_types(i))): if parse_cols is not None and j not in should_parse: should_parse[j] = self._should_parse(j, parse_cols) if parse_cols is None or should_parse[j]: if typ == XL_CELL_DATE: if xlrd_0_9_3: # Use the newer xlrd datetime handling. value = xldate.xldate_as_datetime(value, epoch1904) # Excel doesn't distinguish between dates and time, # so we treat dates on the epoch as times only. # Also, Excel supports 1900 and 1904 epochs. year = (value.timetuple())[0:3] if ((not epoch1904 and year == (1899, 12, 31)) or (epoch1904 and year == (1904, 1, 1))): value = datetime.time(value.hour, value.minute, value.second, value.microsecond) else: # Use the xlrd <= 0.9.2 date handling. dt = xldate.xldate_as_tuple(value, epoch1904) if dt[0] < datetime.MINYEAR: value = datetime.time(*dt[3:]) else: value = datetime.datetime(*dt) elif typ == XL_CELL_ERROR: value = np.nan elif typ == XL_CELL_BOOLEAN: value = bool(value) elif convert_float and typ == XL_CELL_NUMBER: # GH5394 - Excel 'numbers' are always floats # it's a minimal perf hit and less suprising val = int(value) if val == value: value = val row.append(value) data.append(row) if header is not None: data[header] = _trim_excel_header(data[header]) parser = TextParser(data, header=header, index_col=index_col, has_index_names=has_index_names, na_values=na_values, thousands=thousands, parse_dates=parse_dates, date_parser=date_parser, skiprows=skiprows, skip_footer=skip_footer, chunksize=chunksize, **kwds) return parser.read()
def _parse_cell(xcell, epoch1904=False): """ Parse a xl-xcell. :param xlrd.Cell xcell: an excel xcell :type xcell: xlrd.sheet.Cell :param epoch1904: Which date system was in force when this file was last saved. False => 1900 system (the Excel for Windows default). True => 1904 system (the Excel for Macintosh default). :type epoch1904: bool :return: formatted xcell value :rtype: int, float, datetime.datetime, bool, None, str, datetime.time, float('nan') Examples:: >>> import xlrd >>> from xlrd.sheet import Cell >>> _parse_cell(Cell(xlrd.XL_CELL_NUMBER, 1.2)) 1.2 >>> _parse_cell(Cell(xlrd.XL_CELL_DATE, 1.2)) datetime.datetime(1900, 1, 1, 4, 48) >>> _parse_cell(Cell(xlrd.XL_CELL_TEXT, 'hi')) 'hi' """ ctype = xcell.ctype cvalue = xcell.value if ctype == XL_CELL_NUMBER: # GH5394 - Excel 'numbers' are always floats # it's a minimal perf hit and less suprising cint = int(cvalue) if cint == cvalue: return cint return cvalue elif ctype in (XL_CELL_EMPTY, XL_CELL_BLANK): return None # RECT-LOOP NEVER USE THIS elif ctype == XL_CELL_TEXT: return cvalue elif ctype == XL_CELL_BOOLEAN: return bool(cvalue) elif ctype == XL_CELL_DATE: # modified from Pandas library if _xlrd_0_9_3: # Use the newer xlrd datetime handling. d = xldate.xldate_as_datetime(cvalue, epoch1904) # Excel doesn't distinguish between dates and time, so we treat # dates on the epoch as times only. Also, Excel supports 1900 and # 1904 epochs. epoch = (1904, 1, 1) if epoch1904 else (1899, 12, 31) if (d.timetuple())[0:3] == epoch: d = datetime.time(d.hour, d.minute, d.second, d.microsecond) else: # Use the xlrd <= 0.9.2 date handling. d = xldate.xldate_as_tuple(xcell.value, epoch1904) if d[0] < datetime.MINYEAR: # time d = datetime.time(*d[3:]) else: # date d = datetime.datetime(*d) return d elif ctype == XL_CELL_ERROR: return float("nan") raise ValueError("Invalid XL-cell type(%s) for value(%s)!" % (xcell.ctype, xcell.value))
def parse_minutegram(msheet, csheet, sw, user): work = Work() if msheet.cell(0,7).value == '': e = IntegrityError() e.__cause__="El trabajo no tiene numero" raise e else: work.number = msheet.cell(0, 7).value if column_value_search(1, 'DESCRIPCION TP:', msheet): drow = column_value_search(1, 'DESCRIPCION TP:', msheet) else: e = IntegrityError() e.__cause__="El documento no tiene seccion DESCRIPCION TP" raise e if column_value_search(1, 'JUSTIFICACION: ', msheet): jrow = column_value_search(1, 'JUSTIFICACION: ', msheet) else: e = IntegrityError() e.__cause__="El documento no tiene seccion JUSTIFICACION" raise e if column_value_search(1, 'OBSERVACIONES:', msheet): orow = column_value_search(1, 'OBSERVACIONES:', msheet) else: e = IntegrityError() e.__cause__="El documento no tiene seccion OBSERVACIONES" raise e if column_value_search(1, 'PLAN DE TRABAJO (MINUTOGRAMA):', msheet): wprow = column_value_search(1, 'PLAN DE TRABAJO (MINUTOGRAMA):', msheet) else: e = IntegrityError() e.__cause__="El documento no tiene seccion PLAN DE TRABAJO" raise e if column_value_search(1, 'PLAN DE CONTINGENCIA / ROLLBACK:', msheet): cprow = column_value_search(1, 'PLAN DE CONTINGENCIA / ROLLBACK:', msheet) else: e = IntegrityError() e.__cause__="El documento no tiene seccion PLAN DE CONTINGENCIA / ROLLBACK" raise e #este bloque de codigo asigna los datos extraidos del formulario al work creado work.ticketArea = sw.ticketArea work.department = sw.department work.municipality = sw.municipality work.impact = sw.impact work.ticketCause = sw.ticketCause work.initialDate = sw.initialDate work.finalDate = sw.finalDate work.outboundDate = sw.outboundDate work.createdDate = datetime.date.today() work.affectTime = sw.affectTime work.rollbackTime = sw.rollbackTime now = timezone.make_aware(datetime.datetime.now(), timezone.get_default_timezone()) #Si el tiempo dado para la causa esta en horas se entiende que debe pasarse a areas internas y nunca externas if sw.ticketCause.timeLapseType == Cause.HOURS and sw.ticketArea.type == Area.INTERN: if now + datetime.timedelta(days=1, hours=sw.ticketCause.internTimeLapse) <= sw.initialDate: work.limitResponseDate = now + datetime.timedelta(days=1, hours=sw.ticketCause.internTimeLapse) else: e = IntegrityError() e.__cause__="El tiempo maximo de respuesta de los clientes es mas tarde que la fecha de inicio del trabajo" raise e elif sw.ticketCause.timeLapseType == Cause.HOURS and sw.ticketArea.type == Area.EXTERN: e = IntegrityError() e.__cause__="La Causa del ticket no puede asignarse a un area externa" raise e elif sw.ticketCause.timeLapseType == Cause.DAYS and sw.ticketArea.type == Area.INTERN: if now + datetime.timedelta(days=1+sw.ticketCause.internTimeLapse) <= sw.initialDate: work.limitResponseDate = now + datetime.timedelta(days=1+sw.ticketCause.internTimeLapse) else: e = IntegrityError() e.__cause__="El tiempo maximo de respuesta de los clientes es mas tarde que la fecha de inicio del trabajo" raise e elif sw.ticketCause.timeLapseType == Cause.DAYS and sw.ticketArea.type == Area.INTERN: if now + datetime.timedelta(days=1+sw.ticketCause.externTimeLapse) <= sw.initialDate: work.limitResponseDate = now + datetime.timedelta(days=1+sw.ticketCause.externTimeLapse) else: e = IntegrityError() e.__cause__="El tiempo maximo de respuesta de los clientes es mas tarde que la fecha de inicio del trabajo" raise e #se asigna el usuario loggeado al trabajo if user: work.userCreator = user #------------------------------------------------------------------------------- work.description = msheet.cell(drow+1, 1).value work.justification = msheet.cell(jrow+1, 1).value work.observations = msheet.cell(orow+1, 1).value try: group = WorkGroup.objects.get(number = work.number) for w in group.work_set.all(): w.state = Work.CANCELED for acc in w.acceptance_set.all(): acc.valid = False acc.save() w.save() work.group = group work.programmed = Work.REPROGRAMMED except: group = WorkGroup() group.number = work.number group.save() work.group = group work.save() #loads work plans for i in range(wprow+2,cprow): if check_line(i, 2, 6, msheet): wp = WorkPlan() wp.work=work wp.initialDate = xldate_as_datetime(msheet.cell(i, 2).value, 0) wp.finalDate = xldate_as_datetime(msheet.cell(i, 3).value, 0) wp.affectation = datetime.time(*(xldate_as_tuple(msheet.cell(i, 4).value, 0))[3:]) wp.activity = msheet.cell(i, 5).value wp.save() else: e = IntegrityError() e.__cause__="Alguno de los planes de trabajo tiene un campo vacio" raise e #loads contingency plans for i in range(cprow+2, drow-1): if check_line(i, 2, 6, msheet): cp = ContingencyPlan() cp.work=work cp.initialDate = xldate_as_datetime(msheet.cell(i, 2).value, 0) cp.finalDate = xldate_as_datetime(msheet.cell(i, 3).value, 0) cp.affectation = datetime.time(*(xldate_as_tuple(msheet.cell(i, 4).value, 0))[3:]) cp.activity = msheet.cell(i, 5).value cp.save() else: e = IntegrityError() e.__cause__="Alguno de los planes de contingencia tiene un campo vacio" raise e parse_corp_clients(csheet, work)
#to get the year workbook = xlrd.open_workbook('ted_info_name_title_date.xlsx') worksheet = workbook.sheet_by_name('Sheet1') num_rows = worksheet.nrows - 1 num_cells = worksheet.ncols - 1 curr_row = 0 while curr_row < num_rows: curr_row += 1 row = worksheet.row(curr_row) author_name = worksheet.cell_value(curr_row, 0) talk_title = worksheet.cell_value(curr_row, 1) date = worksheet.cell_value(curr_row, 2) date_as_datetime = xldate.xldate_as_tuple(date, workbook.datemode) year, month, day, hour, minute, second = date_as_datetime print year try: viewcount_dict[author_name + ":" + talk_title].append(year) except: #author/title not in dictionary (because it was one of the weirdly formatted pages) print row continue print len(viewcount_dict) year_viewcount_dict = {}
def run(self): book = open_workbook(os.path.join( self.input().path, 'per{}{}.xls'.format( ('0' + str(self.month))[-2:], ('0' + str(self.year))[-2:])), formatting_info=True) sheet = book.sheets()[0] session = current_session() allvals = [] for rownum, row in enumerate(sheet.get_rows()): if rownum == 2: coldefs = [ '"{}" VARCHAR'.format(cell.value.replace('"', '').strip()) for cell in row if cell.value ] session.execute('CREATE TABLE {output} ({coldefs})'.format( coldefs=', '.join(coldefs), output=self.output().table)) elif rownum > 2: # Have to escape colons as they are interpreted as symbols otherwise vals = [] for cell in row: # type 0 is empty if cell.ctype == 0: pass # type 2 is numeric, which is always a float, even if it # should be an integer elif cell.ctype == 2: if cell.value == int(cell.value): vals.append(str(int(cell.value))) else: vals.append(str(cell.value)) # type 3 is date elif cell.ctype == 3: vals.append("'{}-{}-{}'".format( *xldate_as_tuple(cell.value, 0))) # everything else just pass in as unicode string, unless # it's blank, in which case send in NULL else: if cell.value: vals.append("'{}'".format( str(cell.value).replace(":", "::").replace( "'", "''"))) else: vals.append('NULL') # Kill occasional erroneous blank last column if vals[-1] == "NULL": vals = vals[0:-1] if len(vals) < len(coldefs): vals.extend(["NULL"] * (len(coldefs) - len(vals))) if len(vals) != len(coldefs): LOGGER.error('FIXME: cannot parse year %s month %s row %s', self.year, self.month, rownum) continue allvals.append(', '.join(vals)) try: session.execute('INSERT INTO {output} VALUES ({allvals})'.format( output=self.output().table, allvals='), ('.join(allvals))) except Exception as err: print(err) raise
def processLogFile(rawDataFile, xlrdLog, medianCollector): global sites, outputCSVSummary nRows = 0 # Number rows written to output earliestDateSeen = datetime.date.max # set to HIGHEST date so the first one we encounter is less latestDateSeen = datetime.date.min ret = True if statusCallback: statusCallback('processLogFile: Processing '+rawDataFile) try: book = open_workbook(rawDataFile, logfile=xlrdLog) except XLRDError as e: statusCallback('Error opening workbook: %s\n' % (str(e))) xlrdLog.write('Error opening workbook %s: %s\n' % (rawDataFile, str(e))) return False sheet = book.sheet_by_index(0) # Site is always B19 per Evan siteName = sheet.cell_value(rowx=18, colx=1) # Some sanity checking - is the book in the expected format? if book.nsheets != 2 or not isinstance(siteName, str): statusCallback('Workbook not in expected format') xlrdLog.write('Workbook %s not in expected format\n' % (rawDataFile)) return False statusCallback ('Site name in data file: '+siteName) # Map some sitenames together siteName = siteName.upper() siteMap = {'WHISI' : 'WHIRU', 'WHRZ' : 'WHISP', 'RUTZP.OUT' : 'RUTZP.outflow', 'WHIRS' : 'WHISP', 'WHIRZ' : 'WHISP', 'RUTZPOND' : 'RUTZP', 'YELPER' : 'YELPR', 'LASMO' : 'LASOM', 'RUTZEROUT' : 'RUTZP.outflow' } siteName = siteMap.get(siteName, siteName) if statusCallback: statusCallback('Site name: '+siteName) # Open the sheet and grab the data, copying it to the output CSV try: dataSheet = book.sheet_by_index(1) if verbose: statusCallback('Sheets: ',book.sheet_names()) except XLRDError as e: statusCallback('Workbook does not have correct number of sheets') xlrdLog.write('Error getting data sheet for '+rawDataFile+' - Skipping Workbook for "'+siteName+'"\n') return False # Switch to data sheet sheet = book.sheet_by_index(1) # Get first line from sheet and analyze numColumns = sheet.ncols # Number of columns # Check if workbook seems to match expected format: # 2 sheets # first cell in 2nd sheet has Date in the name if sheet.cell_value(rowx=0,colx=0) == u'Date' and book.nsheets == 2: # Print header row # Check to see if we have one of those books where the data doesn't match - in that case we have to do # some magic transposing # Three variations: # Date Time Temp.[C] pH EC[uS/cm] D.O.[%] D.O.[ppm] Turb.FNU Remarks # Date Time Temp.[C] pH D.O.[%] D.O.[ppm] Turb.FNU EC[uS/cm] Remarks # Date Time Temp.[C] pH mV[pH] EC[uS/cm] D.O.[%] D.O.[ppm] Turb.FNU Remarks # 0 1 2 3 4 5 6 7 8 9 # We've adopted the third one (which is the most complete) as the standard, so if we # detect the others, we have to do move columns around as we emit them to the CSV # columns in the right order. columnFormatModel = None if numColumns > 4 and sheet.cell(0, 4).ctype == 1 and sheet.cell(0, 4).value == 'D.O.[%]': columnFormatModel = 1 # Transpose columns if needed - column indices are in the # source sheet - they will be pulled from those source columns # and then written to columns 2, 3, 4 etc. - since 0 is the site name # and 1 is the file name. -1 means blank column (skip) colOrder = [0, 1, 2, 3, -1, 7, 4, 5, 6, 8] statusCallback("This sheet has non-standard column ordering; adjusting columns to match standard.\n") elif numColumns > 4 and sheet.cell(0, 4).ctype == 1 and sheet.cell(0, 4).value == 'mV[pH]': columnFormatModel = 0 colOrder = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] elif numColumns > 4 and sheet.cell(0, 5).ctype == 1 and sheet.cell(0, 5).value == 'D.O.[%]': columnFormatModel = 2 colOrder = [0, 1, 2, 3, -1, 4, 5, 6, 7, 8] else: statusCallback('Workbook has non-standard column headers - see columnFormatModel in processLogFile to address - skipping workbook') xlrdLog.write('Workbook has non-standard column headers - see columnFormatModel in processLogFile to address - Skipping Workbook for "'+siteName+'"\n') return False nColsToWrite = len(colOrder) for rowIndex in range(1, sheet.nrows): # Iterate through data rows if verbose: statusCallback ('-'*40) statusCallback ('Row: %s' % rowIndex) # Print row number # Skip entirely blank rows rowIsBlank = True for columnIndex in range(1, sheet.ncols): # Iterate through columns val = sheet.cell(rowIndex, columnIndex) if (val.ctype != 0 or (val.ctype == 1 and len(str(val)) > 0)): rowIsBlank = False if rowIsBlank: if verbose: statusCallback('Skipping blank row') continue else: nRows += 1 nCols = 0 # Number of columns processed - so we can omit the last "," in the CSV output for columnIndex in colOrder: # Iterate through columns nCols += 1 if columnIndex >= numColumns: continue # don't access columns that don't exist on this sheet elif columnIndex == -1: # emit a blank column - this isn't in source sheet outputCSVSummary.write(',') continue else: cellValue = sheet.cell(rowIndex, columnIndex) # Get cell object by row, col cellType = cellValue.ctype # Prefix each row with the site name of the data and the # file name it came from if (columnIndex == 0): outputCSVSummary.write(siteName+','+rawDataFile+',') if verbose: statusCallback ('Column: [%s] is [%s] : [%s]' % (columnIndex, cellType, cellValue)) # Somewhere these numeric values have to be defined, right? # 3 == date per https://pythonhosted.org/xlrd3/cell.html - but there are two # in the data, a date and a time. 0 is the date, 1 is the time. Sigh. if (cellType == 3): if (columnIndex == 0): # Date # Date year, month, day, hour, minute, second = xldate.xldate_as_tuple(cellValue.value, book.datemode) strDate = '%4d-%02d-%02d' % (year, month, day) outputCSVSummary.write(strDate) d = datetime.date(year,month,day) if (d < earliestDateSeen): earliestDateSeen = d if (d > latestDateSeen): latestDateSeen = d else: # Only other date value in input is the Time stamp # Just emit time as is. Remember since we need to add it # to the median finder so we can emit the timestamp in the # DoE Summary. year, month, day, hour, minute, second = xldate.xldate_as_tuple(cellValue.value, book.datemode) strTime = '%02d:%02d:%02d' % (hour, minute, second) outputCSVSummary.write(strTime) elif (cellType in [1, 2]): # 1 = text, 2 = number # Other column - e.g. ph, Turb.FNU, etc. outputCSVSummary.write(str(cellValue.value)) # Do we need to accumulate values for this for median calculation? # Note some measurements have '-----' instead of 0 for missing values # so catch that here by checking only for numeric values if cellType == 1: measurementValue = 0 else: measurementValue = cellValue.value if calculateMedians[columnIndex]: medianCollector.addMeasurement(siteName, strDate, strTime, columnHeaders[columnIndex], measurementValue) else: xlrdLog.write('%s: Unknown value type for [%s,%s] : %s\n' % (rawDataFile, rowIndex, columnIndex, cellType)) if (nCols < nColsToWrite): outputCSVSummary.write(',') # append , except after last column value # After emitting all columns, terminate the line in the CSV file outputCSVSummary.write('\n') # Terminate line else: statusCallback('Wrong # of sheets or first row of 2nd worksheet is not Date.. skipping') ret = False if statusCallback: statusCallback('%d data rows read.' % sheet.nrows) if ret: siteData = SiteData(rawDataFile, minDate=earliestDateSeen, maxDate=latestDateSeen, numRecs=nRows) if siteName not in sites: # Not in list yet - add a tuple statusCallback("This data is for a new site: " + siteName) sites[siteName] = [ siteData ] else: statusCallback("This is additional data for site: " + siteName) sites[siteName].append(siteData) return ret
def action_upload(self, action, data): status = IStatusMessage(self.request) excelfile = data.get('excelfile', None) num_updated = num_new = num_deleted = 0 skipped = list() if not excelfile: # print "no file!" status.addStatusMessage(_(u"No file was selected!."), type='error') return try: book = xlrd.open_workbook(file_contents=excelfile) except: # print "not Excel?" status.addStatusMessage( _(u"File was not readable. Check that it is really " "in Excel format."), type='error') return relpath = data['path'] if relpath.startswith('/'): relpath = relpath[1:] dest = self.context.restrictedTraverse(relpath, None) if not dest: status.addStatusMessage( u"Could not find destination folder at %s" % relpath, type="error") return catalog = getToolByName(self.context, 'portal_catalog') pwt = getToolByName(self.context, 'portal_workflow') pvt = getToolByName(self.context, 'portal_vocabularies') ann = IAnnotations(dest) existing_events = ann.get(UPLOADEDEVENTSKEY, dict()) keys = existing_events.keys() #valid_categories = catalog.uniqueValuesFor('Subject') now = DateTime() for sheet in book.sheets(): if sheet.name not in SHEETS_TO_HANDLE: continue for i in range(DATA_ROW_START, sheet.nrows): row = sheet.row(i) do_upload = row[DO_UPLOAD].value do_upload = do_upload.strip().lower() if not do_upload: continue title = row[TITLE].value uidcell = row[UNIQUEID] if uidcell.ctype == XL_CELL_NUMBER: uid = str(int(uidcell.value)) else: uid = uidcell.value if not title.strip(): continue # Delete or upload? if do_upload == 'delete': if uid not in keys: # status.addStatusMessage( # _(u"You are trying to delete an event with "\ # "Excel ID %s and Title '%s', but it does not " \ # "exist in the system. Please check "\ # "your Excel file or delete the desired event " \ # "manually" % (uid, title)), type="error") continue brains = catalog(UID=existing_events[uid]) if not len(brains): status.addStatusMessage( _(u"Existing event with Excel id %s and Title '%s'" " could not be found in the system. It might " "have been deleted manually." % (uid, title)), type="warning") else: event = brains[0].getObject() if not event: del existing_events[uid] continue event_url = event.absolute_url() dest._delObject(event.getId()) del existing_events[uid] num_deleted += 1 status.addStatusMessage( _(u"Event with Excel id %s and Title '%s' at %s " "was deleted." % (uid, title, event_url)), type="warning") else: startdatecell = row[DATE_START] # Start date will have 0:0:0 as default time unless # explicitly stated if startdatecell.ctype == XL_CELL_DATE: dtuple = xldate_as_tuple(startdatecell.value, book.datemode) startDate = DateTime("%d/%d/%d %d:%d:%d" % ( dtuple[0], dtuple[1], dtuple[2], dtuple[3], dtuple[4], dtuple[5])) else: # Sometimes they write dates like "2011 09 12" # Excel doesn't recognise this format, but # DateTime will elems = startdatecell.value.strip().split(' ') if len(elems) == 3: try: startDate = DateTime(startdatecell.value) except: skipped.append(title) continue else: skipped.append(title) continue # by default, end date = start date 23:55, unless # otherwise stated endDate = DateTime(startDate.strftime('%Y/%m/%d 23:55:00')) enddatecell = row[DATE_END] if enddatecell.ctype == XL_CELL_DATE: dtuple = xldate_as_tuple(enddatecell.value, book.datemode) endTime = '%d:%d:%d' % ( dtuple[3], dtuple[4], dtuple[5]) if endTime == '0:0:0': endTime = '23:55:00' endDate = DateTime("%d/%d/%d %s" % ( dtuple[0], dtuple[1], dtuple[2], endTime)) else: try: # Sometimes they write dates like "2011 09 12" # Excel doesn't recognise this format, but # DateTime will. We try to create a DateTime out # of it, but set the time to 23:55 endDate = DateTime(enddatecell.value) endDate = DateTime( endDate.strftime('%Y/%m/%d 23:55:00')) # if end date can't be created, just ignore it except: pass if endDate < now: # Skipping, because the event is already over continue description = row[ORGANISATION].value url = row[WEBSITE].value country = row[COUNTRY].value location = row[CITY].value if country.strip() != '': location = "%s %s" % (country, location) subcategory = row[CATEGORY].value if subcategory: # make sure we don't save bogus values subcategory = [x.strip() for x in subcategory.split(',')] vocab = getattr(pvt, 'Subcategory', None) if not vocab: subcategory = '' else: subcategory = [x for x in subcategory if vocab.getTermByKey(x)] isNew = False def createNewEvent(dest, uid): id = "event-%s-%s" % ( DateTime().strftime('%Y%M%d%H%m%s'), uid) dest.invokeFactory(type_name='Event', id=id) # transaction.commit() event = getattr(dest, id) plt = getToolByName(dest, 'portal_languages') if plt.startNeutral() and event.Language() != '': event.setLanguage('') return event if uid not in keys: event = createNewEvent(dest, uid) isNew = True else: brains = catalog(UID=existing_events[uid]) if not len(brains): status.addStatusMessage( _(u"Existing event with Excel id %s (%s) could" " not be found. It will be created anew." % (uid, title)), type="error") del existing_events[uid] event = createNewEvent(dest, uid) isNew = True else: event = brains[0].getObject() if not event: status.addStatusMessage( _(u"Reference to exiting event wit UID %s " "is broken." % existing_events[uid]), type="error") del existing_events[uid] event = createNewEvent(dest, uid) isNew = True values = dict( title=title, description=description, location=location, eventUrl=url, country=country, startDate=startDate, endDate=endDate, subcategory=subcategory) for k, v in values.items(): field = event.getField(k) if not field: continue field.getMutator(event)(v) if isNew: event.unmarkCreationFlag() event._renameAfterCreation() num_new += 1 else: event.reindexObject() num_updated += 1 existing_events[uid] = event.UID() try: if (pwt.getInfoFor(event, 'review_state') != 'published'): pwt.doActionFor(event, 'publish') except: # user does not have permissions for this pass verb = isNew and 'Created' or 'Updated' status.addStatusMessage( _(u'%(verb)s event " %(title)s" at %(url)s' % dict( verb=verb, url=event.absolute_url(), title=title)), type=verb) # print [startDate], title, uid ann[UPLOADEDEVENTSKEY] = existing_events dest.reindexObject() # print "uploaded!" status.addStatusMessage( _(u"Upload and event creation was successful. %s events were " "created, %d updated and %d deleted." % ( num_new, num_updated, num_deleted)), type='info') if len(skipped): status.addStatusMessage( u'The following entries do not have a valid start date and ' 'were skipped: %s' % ', '.join(skipped), type='warning')
def _parse_cell(xcell, epoch1904=False): """ Parse a xl-xcell. :param xlrd.Cell xcell: an excel xcell :type xcell: xlrd.sheet.Cell :param epoch1904: Which date system was in force when this file was last saved. False => 1900 system (the Excel for Windows default). True => 1904 system (the Excel for Macintosh default). :type epoch1904: bool :return: formatted xcell value :rtype: int, float, datetime.datetime, bool, None, str, datetime.time, float('nan') Examples:: >>> import xlrd >>> from xlrd.sheet import Cell >>> _parse_cell(Cell(xlrd.XL_CELL_NUMBER, 1.2)) 1.2 >>> _parse_cell(Cell(xlrd.XL_CELL_DATE, 1.2)) datetime.datetime(1900, 1, 1, 4, 48) >>> _parse_cell(Cell(xlrd.XL_CELL_TEXT, 'hi')) 'hi' """ ctype = xcell.ctype cvalue = xcell.value if ctype == XL_CELL_NUMBER: # GH5394 - Excel 'numbers' are always floats # it's a minimal perf hit and less suprising cint = int(cvalue) if cint == cvalue: return cint return cvalue elif ctype in (XL_CELL_EMPTY, XL_CELL_BLANK): return None # RECT-LOOP NEVER USE THIS elif ctype == XL_CELL_TEXT: return cvalue elif ctype == XL_CELL_BOOLEAN: return bool(cvalue) elif ctype == XL_CELL_DATE: # modified from Pandas library if _xlrd_0_9_3: # Use the newer xlrd datetime handling. d = xldate.xldate_as_datetime(cvalue, epoch1904) # Excel doesn't distinguish between dates and time, so we treat # dates on the epoch as times only. Also, Excel supports 1900 and # 1904 epochs. epoch = (1904, 1, 1) if epoch1904 else (1899, 12, 31) if (d.timetuple())[0:3] == epoch: d = datetime.time(d.hour, d.minute, d.second, d.microsecond) else: # Use the xlrd <= 0.9.2 date handling. d = xldate.xldate_as_tuple(xcell.value, epoch1904) if d[0] < datetime.MINYEAR: # time d = datetime.time(*d[3:]) else: # date d = datetime.datetime(*d) return d elif ctype == XL_CELL_ERROR: return float('nan') raise ValueError('Invalid XL-cell type(%s) for value(%s)!' % (xcell.ctype, xcell.value))