Example #1
0
 def test_time_as_tuple(self):
     time = xldate.xldate_as_tuple(.273611, DATEMODE)
     self.assertEqual(time, (0, 0, 0, 6, 34, 0))
     time = xldate.xldate_as_tuple(.538889, DATEMODE)
     self.assertEqual(time, (0, 0, 0, 12, 56, 0))
     time = xldate.xldate_as_tuple(.741123, DATEMODE)
     self.assertEqual(time, (0, 0, 0, 17, 47, 13))
Example #2
0
 def test_date_as_tuple(self):
     date = xldate.xldate_as_tuple(2741., DATEMODE)
     self.assertEqual(date, (1907, 7, 3, 0, 0, 0))
     date = xldate.xldate_as_tuple(38406., DATEMODE)
     self.assertEqual(date, (2005, 2, 23, 0, 0, 0))
     date = xldate.xldate_as_tuple(32266., DATEMODE)
     self.assertEqual(date, (1988, 5, 3, 0, 0, 0))
Example #3
0
def test_time_as_tuple():
    time = xldate.xldate_as_tuple(.273611, DATEMODE)
    assert time == (0, 0, 0, 6, 34, 0)
    time = xldate.xldate_as_tuple(.538889, DATEMODE)
    assert time == (0, 0, 0, 12, 56, 0)
    time = xldate.xldate_as_tuple(.741123, DATEMODE)
    assert time == (0, 0, 0, 17, 47, 13)
Example #4
0
def test_date_as_tuple():
    date = xldate.xldate_as_tuple(2741., DATEMODE)
    assert date == (1907, 7, 3, 0, 0, 0)
    date = xldate.xldate_as_tuple(38406., DATEMODE)
    assert date == (2005, 2, 23, 0, 0, 0)
    date = xldate.xldate_as_tuple(32266., DATEMODE)
    assert date == (1988, 5, 3, 0, 0, 0)
Example #5
0
 def test_date_as_tuple(self):
     date = xldate.xldate_as_tuple(2741., DATEMODE)
     self.assertEqual(date, (1907, 7, 3, 0, 0, 0))
     date = xldate.xldate_as_tuple(38406., DATEMODE)
     self.assertEqual(date, (2005, 2, 23, 0, 0, 0))
     date = xldate.xldate_as_tuple(32266., DATEMODE)
     self.assertEqual(date, (1988, 5, 3, 0, 0, 0))
Example #6
0
 def test_time_as_tuple(self):
     time = xldate.xldate_as_tuple(.273611, DATEMODE)
     self.assertEqual(time, (0, 0, 0, 6, 34, 0))
     time = xldate.xldate_as_tuple(.538889, DATEMODE)
     self.assertEqual(time, (0, 0, 0, 12, 56, 0))
     time = xldate.xldate_as_tuple(.741123, DATEMODE)
     self.assertEqual(time, (0, 0, 0, 17, 47, 13))
Example #7
0
def setAttendaceStatus(c1, c2, date, userRec):
    date = str(int(date))  # date is float
    time10Clock = strToTime(monthStr + date + ' ' + '10:00:00')
    time12Clock = strToTime(monthStr + date + ' ' + '12:00:00')
    time13Clock = strToTime(monthStr + date + ' ' + '13:00:00')
    time18Clock = strToTime(monthStr + date + ' ' + '18:00:00')
    # if out in morning and back in afternoon, if offTime < 18:30, it's a early
    time1830Clock = strToTime(monthStr + date + ' ' + '18:30:00')
    time19Clock = strToTime(monthStr + date + ' ' + '19:00:00')
    if(isinstance(c1, float)):  # use column date to be compatible with '09:11:11'
        t1 = xldate_as_tuple(c1, 0)
        c1 = strToTime(monthStr + date + ' ' + str(t1[3]) + ':' + str(t1[4]) + ':' + str(t1[5]))
    if(isinstance(c2, float)):
        t2 = xldate_as_tuple(c2, 0)
        c2 = strToTime(monthStr + date + ' ' + str(t2[3]) + ':' + str(t2[4]) + ':' + str(t2[5]))

#    if(isinstance(c1, datetime)):
#        time1Str = c1.strftime('%Y/%m/%d %H:%M:%S')
#    if(isinstance(c2, datetime)):
#        time2Str = c2.strftime('%Y/%m/%d %H:%M:%S')

    if(isinstance(c1, datetime)):
        if(isinstance(c2, datetime)):
            userRec.totalTime += (c2 - c1).total_seconds() - 3600  # - 1 hour lunch time

            if((c1 - time10Clock).total_seconds() >= 0):
                userRec.late += 1

            flag18Clock = c2 and ((c2 - time18Clock).total_seconds() < 0)  # early
            flag19Clock = c2 and ((c2 - time19Clock).total_seconds() >= 0)  # must not early
            flag9Hours = c1 and c2 and ((c2 - c1).total_seconds() < WORKSECONDS)  #early
            if((not c2) or flag18Clock or (not flag19Clock and flag9Hours)):
                userRec.early += 1

#            userRec.recDict[date] = [time1Str, time2Str]
        elif(isinstance(c2, unicode)):
            if(stateDictCn2En.has_key(c2)):
                prop = stateDictCn2En[c2]
                setattr(userRec, prop, getattr(userRec, prop) + 1)
            if(c2 == STATE_GAME):
                userRec.totalTime += WORKSECONDS_STD
            else:
                userRec.totalTime += (time12Clock - c2).total_seconds()
    elif(isinstance(c1, unicode)):
        if(stateDictCn2En.has_key(c1)):
            prop = stateDictCn2En[c1]
            setattr(userRec, prop, getattr(userRec, prop) + 1)
        if(c2 and stateDictCn2En.has_key(c2)):
            prop = stateDictCn2En[c2]
            setattr(userRec, prop, getattr(userRec, prop) + 1)
        elif(isinstance(c2, datetime)):
            if(prop == STATE_OUT):
                if((c2 - time1830Clock).total_seconds() < 0):
                    userRec.early += 1

            userRec.totalTime += (c2 - time13Clock).total_seconds()
    def parse_excel_cell(self, book, cell, column_name):
        """Return (AccountEntry attr, value) or None.

        Raise an exception in the event of a parse error.
        """
        if column_name in ('date', 'entry date', 'entry_date'):
            if isinstance(cell.value, (int, float)):
                tup = xldate_as_tuple(cell.value, book.datemode)
                parsed = datetime.date(*tup[:3])
            else:
                parsed = dateutil.parser.parse(str(cell.value).strip()).date()
            return 'entry_date', parsed

        if column_name in ('amount', 'delta'):
            parsed = parse_amount(str(cell.value).strip(),
                                  currency=self.currency)
            return 'delta', parsed

        if column_name in ('description', 'desc'):
            return 'description', str(cell.value).strip()

        if column_name == 'sign':
            v = str(cell.value).strip().lower()
            # Treat it as a liability account: credit = increase.
            if v in ('+', 'c', 'cr', 'credit', 'deposit'):
                return 'sign', 1
            elif v in ('-', 'd', 'dr', 'debit', 'withdrawal'):
                return 'sign', -1

        return None
Example #9
0
 def __init__(self, row, fieldnames=None, book=None):
     super(Row, self).__init__()
     for fieldname, cell in zip(fieldnames, row):
         if cell.ctype == 3:
             self[fieldname] = datetime(
                 *xldate_as_tuple(cell.value, book.datemode))
         else:
             self[fieldname] = cell.value
Example #10
0
 def clean(cell):
     if cell.ctype == 3:
         # todo: get feedback on date formatting
         parts = xldate_as_tuple(cell.value, 0)[0:3]
         dt = datetime(*parts)
         val = dt.strftime(date_format)
         return val
     elif cell.ctype == 2:
         return (int(cell.value))
     else:
         return cell.value
Example #11
0
 def convert_cell_to_time_to_float(self, time):
     year, month, day, hour, minute, second = xldate.xldate_as_tuple(
         time, 0)
     #py_date = datetime.datetime(year, month, day, hour, minute)
     #logging.info(py_date)
     my_time_old = datetime.time(hour // 3600, (minute % 3600) / 60,
                                 second % 60)
     my_time = hour + minute / 60.0  #datetime.time(hour // 3600, (minute % 3600) / 60, second % 60)
     logging.info('-------------old--------------')
     logging.info(my_time_old)
     logging.info('-------------new--------------')
     logging.info(my_time)
     return my_time
Example #12
0
        def _parse_cell(cell_contents, cell_typ):
            """
            converts the contents of the cell into a pandas appropriate object
            """
            if cell_typ == XL_CELL_DATE:

                # Use the newer xlrd datetime handling.
                try:
                        cell_contents_orgin = cell_contents
                        cell_contents = \
                            xldate.xldate_as_datetime(cell_contents,
                                                      epoch1904)
                        # distinguish between dates and time in mysql
                        try:
                            cell_contents_tuple = xldate.xldate_as_tuple(cell_contents_orgin, epoch1904)
                            if cell_contents_tuple[3:6] == (0, 0, 0):
                                cell_contents = cell_contents.date()
                        except:
                            cell_contents = \
                            xldate.xldate_as_datetime(cell_contents_orgin,
                                                      epoch1904)
                except OverflowError:
                    return cell_contents

                # Excel doesn't distinguish between dates and time,
                # so we treat dates on the epoch as times only.
                # Also, Excel supports 1900 and 1904 epochs.
                year = (cell_contents.timetuple())[0:3]
                if (not epoch1904 and year == (1899, 12, 31)) or (
                    epoch1904 and year == (1904, 1, 1)
                ):
                    cell_contents = time(
                        cell_contents.hour,
                        cell_contents.minute,
                        cell_contents.second,
                        cell_contents.microsecond,
                    )

            elif cell_typ == XL_CELL_ERROR:
                cell_contents = np.nan
            elif cell_typ == XL_CELL_BOOLEAN:
                cell_contents = bool(cell_contents)
            elif convert_float and cell_typ == XL_CELL_NUMBER:
                # GH5394 - Excel 'numbers' are always floats
                # it's a minimal perf hit and less surprising
                val = int(cell_contents)
                if val == cell_contents:
                    cell_contents = val
            return cell_contents
Example #13
0
def do_format(engine, resource_id, table_id):
    table_suffix = '%s_table%s' % (resource_id, table_id)

    table = sl.get_table(engine, 'spending_%s' % table_suffix)

    date_field_values = defaultdict(list)
    for row in sl.all(engine, table):
        for date_field in DATE_FIELDS:
            if date_field in row and row[date_field]:
                date_field_values[date_field].append(row[date_field])

    date_field_formats = {}
    for date_field, values in date_field_values.items():
        date_field_formats[date_field] = detect_date_format(values)

    for row in sl.all(engine, table):

        for numeric_field in NUMERIC_FIELDS:
            try:
                val = row.get(numeric_field)
                if val is None:
                    continue
                val = "".join([v for v in val if v in "-.0123456789"])
                row[numeric_field + 'AsFloat'] = float(val)
            except Exception as e:
                print e

        for date_field, format_ in date_field_formats.items():
            if format_ is None:
                continue
            try:
                if row[date_field] is None:
                    continue
                if format_ == 'excel':
                    # Deciphers excel dates that have been mangled into integers by formatting errors
                    parsed = datetime(*xldate_as_tuple(float(row[date_field].strip()), 0))
                else:
                    parsed = datetime.strptime(row[date_field].strip(), format_)
                row[date_field + 'AsDate'] = parsed.strftime("%Y-%m-%d")
            except Exception as e:
                print e

        sl.upsert(engine, table, row, ['id'])
def get_seats(sloc: str) -> list:
    from xlrd.xldate import xldate_as_tuple
    wb = xlrd.open_workbook(sloc)
    sheet = wb.sheet_by_name("OO SPX")
    seats = []
    for row in range(1, sheet.nrows):
        seat = sheet.cell_value(row, 4)
        MM_acr = sheet.cell_value(row, 2)
        firm = sheet.cell_value(row, 6)
        if seat != '':
            adate = int(sheet.cell_value(row, 0))
            adttup = xldate_as_tuple(adate, 0)
            year, month = adttup[0:2]
            #            print(year, month, seat)
            # create a two-element list and add to seats list
            new_elem = [str(year) + "_" + str(month), seat, MM_acr, firm]
            if new_elem not in seats:
                seats.append(new_elem)
    return seats
Example #15
0
def importar_(archivo):
    from xlrd import open_workbook
    import datetime
    tabla = "conexiones_por_deuda"
    book = open_workbook(archivo)
    sheet = book.sheet_by_index(0)
    campos = db[tabla]._fields[1:]
    db[tabla].truncate('RESTART IDENTITY CASCADE')

    for row in range(4, sheet.nrows):
        valores = sheet.row_values(row, 1)
        from xlrd import xldate

        dict_ = dict(zip(campos, valores))
        dict_["servicio"] = dict_["servicio"].replace(" ", "")
        fecha = xldate.xldate_as_tuple(dict_["fecha_conexion"], 0)
        dict_["fecha_conexion"] = datetime.datetime(*fecha)

        db[tabla].insert(**dict_)
Example #16
0
def apply(row, field_formats, stats):
    today = datetime.now()
    for field, formats in field_formats.items():
        try:
            value = row.get(field)
            if value in (None, ''):
                stats[field].add_spending('Empty', row)
                continue
            parsed = None
            # Try parsing
            for format_ in formats:
                try:
                    if format_ == 'excel':
                        # Deciphers excel dates that have been mangled into integers by
                        # formatting errors
                        parsed = datetime(
                            *xldate_as_tuple(float(field.strip()), 0))
                    else:
                        parsed = datetime.strptime(value.strip(), format_)
                    break
                except Exception, e:
                    pass
            if not parsed:
                row[field + 'Formatted'] = None
                row['valid'] = False
                stats[field].add_spending('Parse error', row, value)
                continue
            # Check it is not in the future - an obvious mistake
            if parsed > today:
                row[field + 'Formatted'] = None
                row['valid'] = False
                stats[field].add_spending('Date in the future', row, parsed)
                continue
            formatted_date = parsed.strftime("%Y-%m-%d")
            stats[field].add_spending('Parsed ok', row, value)
            row[field + 'Formatted'] = formatted_date
        except Exception as e:
            row[field + 'Formatted'] = None
            row['valid'] = False
            stats[field].add_spending('Exception %s' % e.__class__.__name__,
                                      row)
            log.exception(e)
Example #17
0
 def generate_list(self, book, log_out):
     sheet = book.sheet_by_index(0)
     rowIndex = -1
 
     try:
         for cell in sheet.col(0):
             rowIndex += 1
             cells = sheet.row(rowIndex)
             row_tmp = []
             colIndex = -1
             
             # iterate on the row
             for cell in cells:
                 if rowIndex > 1: # skip first 2 row 
                     colIndex +=1
                     
                     # user name/surname
                     if (colIndex < LOG_GATE_ACTION):                
                         row_tmp.append(cell.value)
                     # entry exit of main gate
                     elif colIndex == LOG_GATE_ACTION:
                         if cell.value == user_entry:
                             row_tmp.append(MAIN_GATE_ENTRY)
                         elif cell.value == user_exit:
                             row_tmp.append(MAIN_GATE_EXIT)
                         else:
                             row_tmp.append(OTHER_GATES)
                     # date - time log
                     elif colIndex == LOG_DATE:
                         try:
                             datetime_value = datetime(*xldate.xldate_as_tuple(cell.value, 0))
                             row_tmp.append(datetime_value)
                         except ValueError:
                             pass
                     
             log_out.append(row_tmp)
             
     except TypeError:
         pass
 
     book.unload_sheet(0)        
Example #18
0
    def extraer_fec_nac(self, cell):
        """Extrae la fecha de nacimiento de una celda en el formato correcto"""

        if not self.text_dates:
            fec_nac = xldate_as_datetime(cell.value, self.book.datemode)
        else:
            try:
                if cell.ctype == XL_CELL_DATE:
                    tup_aux_fec_nac = xldate_as_tuple(cell.value,
                                                      self.book.datemode)

                    # print tup_aux_fec_nac
                    tup_fec_nac = [tup_aux_fec_nac[2], tup_aux_fec_nac[1],
                                   tup_aux_fec_nac[0]]

                    try:
                        dia = int(tup_fec_nac[self.pos_dia])
                        mes = int(tup_fec_nac[self.pos_mes])
                        ano = int(tup_fec_nac[self.pos_ano])
                        fec_nac = datetime(ano, mes, dia)
                    except ValueError:
                        fec_nac = xldate_as_datetime(cell.value,
                                                     self.book.datemode)
                else:

                    # Cell type es texto

                    tup_fec_nac = re.findall('\d+', cell.value)

                    dia = int(tup_fec_nac[self.pos_dia])
                    mes = int(tup_fec_nac[self.pos_mes])
                    ano = int(tup_fec_nac[self.pos_ano])

                    if ano <= datetime.today().year - 100:
                        raise Exception

                    fec_nac = datetime(ano, mes, dia)
            except Exception:
                raise
                return cell.value
        return fec_nac
Example #19
0
        def _parse_cell(cell_contents,cell_typ):
            """converts the contents of the cell into a pandas
               appropriate object"""

            if cell_typ == XL_CELL_DATE:
                if xlrd_0_9_3:
                    # Use the newer xlrd datetime handling.
                    cell_contents = xldate.xldate_as_datetime(cell_contents,
                                                              epoch1904)

                    # Excel doesn't distinguish between dates and time,
                    # so we treat dates on the epoch as times only.
                    # Also, Excel supports 1900 and 1904 epochs.
                    year = (cell_contents.timetuple())[0:3]
                    if ((not epoch1904 and year == (1899, 12, 31))
                            or (epoch1904 and year == (1904, 1, 1))):
                        cell_contents = datetime.time(cell_contents.hour,
                                              cell_contents.minute,
                                              cell_contents.second,
                                              cell_contents.microsecond)
                else:
                    # Use the xlrd <= 0.9.2 date handling.
                    dt = xldate.xldate_as_tuple(cell_contents, epoch1904)

                    if dt[0] < datetime.MINYEAR:
                        cell_contents = datetime.time(*dt[3:])
                    else:
                        cell_contents = datetime.datetime(*dt)

            elif cell_typ == XL_CELL_ERROR:
                cell_contents = np.nan
            elif cell_typ == XL_CELL_BOOLEAN:
                cell_contents = bool(cell_contents)
            elif convert_float and cell_typ == XL_CELL_NUMBER:
                # GH5394 - Excel 'numbers' are always floats
                # it's a minimal perf hit and less suprising
                val = int(cell_contents)
                if val == cell_contents:
                    cell_contents = val
            return cell_contents
Example #20
0
        def _parse_cell(cell_contents,cell_typ):
            """converts the contents of the cell into a pandas
               appropriate object"""
               
            if cell_typ == XL_CELL_DATE:
                if xlrd_0_9_3:
                    # Use the newer xlrd datetime handling.
                    cell_contents = xldate.xldate_as_datetime(cell_contents,
                                                              epoch1904)

                    # Excel doesn't distinguish between dates and time,
                    # so we treat dates on the epoch as times only.
                    # Also, Excel supports 1900 and 1904 epochs.
                    year = (cell_contents.timetuple())[0:3]
                    if ((not epoch1904 and year == (1899, 12, 31))
                            or (epoch1904 and year == (1904, 1, 1))):
                        cell_contents = datetime.time(cell_contents.hour,
                                              cell_contents.minute,
                                              cell_contents.second,
                                              cell_contents.microsecond)
                else:
                    # Use the xlrd <= 0.9.2 date handling.
                    dt = xldate.xldate_as_tuple(cell_contents, epoch1904)

                    if dt[0] < datetime.MINYEAR:
                        cell_contents = datetime.time(*dt[3:])
                    else:
                        cell_contents = datetime.datetime(*dt)

            elif cell_typ == XL_CELL_ERROR:
                cell_contents = np.nan
            elif cell_typ == XL_CELL_BOOLEAN:
                cell_contents = bool(cell_contents)
            elif convert_float and cell_typ == XL_CELL_NUMBER:
                # GH5394 - Excel 'numbers' are always floats
                # it's a minimal perf hit and less suprising
                val = int(cell_contents)
                if val == cell_contents:
                    cell_contents = val
            return cell_contents
Example #21
0
def apply(row, field_formats, stats):
    today = datetime.now()
    for field, formats in field_formats.items():
        try:
            value = row.get(field)
            if value in (None, ''):
                stats[field].add_spending('Empty', row)
                continue
            parsed = None
            # Try parsing
            for format_ in formats:
                try:
                    if format_ == 'excel':
                        # Deciphers excel dates that have been mangled into integers by
                        # formatting errors
                        parsed = datetime(*xldate_as_tuple(float(field.strip()), 0))
                    else:
                        parsed = datetime.strptime(value.strip(), format_)
                    break
                except Exception, e:
                    pass
            if not parsed:
                row[field + 'Formatted'] = None
                row['valid'] = False
                stats[field].add_spending('Parse error', row, value)
                continue
            # Check it is not in the future - an obvious mistake
            if parsed > today:
                row[field + 'Formatted'] = None
                row['valid'] = False
                stats[field].add_spending('Date in the future', row, parsed)
                continue
            formatted_date = parsed.strftime("%Y-%m-%d")
            stats[field].add_spending('Parsed ok', row, value)
            row[field + 'Formatted'] = formatted_date
        except Exception as e:
            row[field + 'Formatted'] = None
            row['valid'] = False
            stats[field].add_spending('Exception %s' % e.__class__.__name__, row)
            log.exception(e)
Example #22
0
def apply(row, formats):
    today = datetime.now()
    for field, format_ in formats.items():
        try:
            value = row.get(field)
            if value is None:
                continue
            if format_ == "excel":
                # Deciphers excel dates that have been mangled into integers by
                # formatting errors
                parsed = datetime(*xldate_as_tuple(float(field.strip()), 0))
            else:
                parsed = datetime.strptime(value.strip(), format_)
            # Check it is not in the future - an obvious mistake
            if parsed > today:
                row[field + "Formatted"] = None
                row["valid"] = False
                continue
            row[field + "Formatted"] = parsed.strftime("%Y-%m-%d")
        except Exception as e:
            row[field + "Formatted"] = None
            row["valid"] = False
            # log.exception(e)
    return row
def apply(row, formats):
    today = datetime.now()
    for field, format_ in formats.items():
        try:
            value = row.get(field)
            if value is None:
                continue
            if format_ == 'excel':
                # Deciphers excel dates that have been mangled into integers by
                # formatting errors
                parsed = datetime(*xldate_as_tuple(float(field.strip()), 0))
            else:
                parsed = datetime.strptime(value.strip(), format_)
            # Check it is not in the future - an obvious mistake
            if parsed > today:
                row[field + 'Formatted'] = None
                row['valid'] = False
                continue
            row[field + 'Formatted'] = parsed.strftime("%Y-%m-%d")
        except Exception as e:
            row[field + 'Formatted'] = None
            row['valid'] = False
            #log.exception(e)
    return row
Example #24
0
File: PAP.py Project: nt702596/PAP
    pl.set_title(name)
    fig.tight_layout()
    canvas = FigureCanvasTkAgg(fig, master=fr3)  
    canvas.get_tk_widget().pack(side=Tk.BOTTOM, expand=True)
    toolbar = NavigationToolbar2TkAgg(canvas, fr3)
    toolbar.update()
    canvas.draw()
       
df = pd.read_csv("dias_festivos.csv")
df2 = pd.read_excel("datos.xlsx") #leyendo los datos
df2 = df2.set_index("fecha") #establece la fecha como el index
    
from xlrd.xldate import xldate_as_tuple
from datetime import datetime

df['fecha_python'] = df['fecha'].apply(lambda x: datetime(*xldate_as_tuple(x, 0)))

root = Tk.Tk()
root.title("Consumo de energía en MWh en días festivos")

fr = Tk.Frame(root, borderwidth=5, relief="sunken", width=200, height=550)
fr.grid(column = 0, row = 0)
fr.pack_propagate(0)

fr2 = Tk.Frame(root, borderwidth=5, relief="sunken", width=300, height=550)
fr2.grid(column = 1, row = 0)
fr2.pack_propagate(0)

fr3 = Tk.Frame(root, borderwidth=5, relief="sunken", width=600, height=550)
fr3.grid(column = 2, row = 0)
fr3.pack_propagate(0)
Example #25
0
    def _parse_excel(self,
                     sheetname=0,
                     header=0,
                     skiprows=None,
                     skip_footer=0,
                     index_col=None,
                     has_index_names=None,
                     parse_cols=None,
                     parse_dates=False,
                     date_parser=None,
                     na_values=None,
                     thousands=None,
                     chunksize=None,
                     convert_float=True,
                     **kwds):
        import xlrd
        from xlrd import (xldate, XL_CELL_DATE, XL_CELL_ERROR, XL_CELL_BOOLEAN,
                          XL_CELL_NUMBER)

        epoch1904 = self.book.datemode

        # xlrd >= 0.9.3 can return datetime objects directly.
        if LooseVersion(xlrd.__VERSION__) >= LooseVersion("0.9.3"):
            xlrd_0_9_3 = True
        else:
            xlrd_0_9_3 = False

        if isinstance(sheetname, compat.string_types):
            sheet = self.book.sheet_by_name(sheetname)
        else:  # assume an integer if not a string
            sheet = self.book.sheet_by_index(sheetname)

        data = []
        should_parse = {}
        for i in range(sheet.nrows):
            row = []
            for j, (value, typ) in enumerate(
                    zip(sheet.row_values(i), sheet.row_types(i))):
                if parse_cols is not None and j not in should_parse:
                    should_parse[j] = self._should_parse(j, parse_cols)

                if parse_cols is None or should_parse[j]:
                    if typ == XL_CELL_DATE:
                        if xlrd_0_9_3:
                            # Use the newer xlrd datetime handling.
                            value = xldate.xldate_as_datetime(value, epoch1904)

                            # Excel doesn't distinguish between dates and time,
                            # so we treat dates on the epoch as times only.
                            # Also, Excel supports 1900 and 1904 epochs.
                            year = (value.timetuple())[0:3]
                            if ((not epoch1904 and year == (1899, 12, 31))
                                    or (epoch1904 and year == (1904, 1, 1))):
                                value = datetime.time(value.hour, value.minute,
                                                      value.second,
                                                      value.microsecond)
                        else:
                            # Use the xlrd <= 0.9.2 date handling.
                            dt = xldate.xldate_as_tuple(value, epoch1904)

                            if dt[0] < datetime.MINYEAR:
                                value = datetime.time(*dt[3:])
                            else:
                                value = datetime.datetime(*dt)

                    elif typ == XL_CELL_ERROR:
                        value = np.nan
                    elif typ == XL_CELL_BOOLEAN:
                        value = bool(value)
                    elif convert_float and typ == XL_CELL_NUMBER:
                        # GH5394 - Excel 'numbers' are always floats
                        # it's a minimal perf hit and less suprising
                        val = int(value)
                        if val == value:
                            value = val

                    row.append(value)

            data.append(row)

        if header is not None:
            data[header] = _trim_excel_header(data[header])

        parser = TextParser(data,
                            header=header,
                            index_col=index_col,
                            has_index_names=has_index_names,
                            na_values=na_values,
                            thousands=thousands,
                            parse_dates=parse_dates,
                            date_parser=date_parser,
                            skiprows=skiprows,
                            skip_footer=skip_footer,
                            chunksize=chunksize,
                            **kwds)

        return parser.read()
Example #26
0
    def _parse_excel(self, sheetname=0, header=0, skiprows=None, skip_footer=0,
                     index_col=None, has_index_names=None, parse_cols=None,
                     parse_dates=False, date_parser=None, na_values=None,
                     thousands=None, chunksize=None, convert_float=True,
                     **kwds):
        import xlrd
        from xlrd import (xldate, XL_CELL_DATE,
                          XL_CELL_ERROR, XL_CELL_BOOLEAN,
                          XL_CELL_NUMBER)

        epoch1904 = self.book.datemode

        # xlrd >= 0.9.3 can return datetime objects directly.
        if LooseVersion(xlrd.__VERSION__) >= LooseVersion("0.9.3"):
            xlrd_0_9_3 = True
        else:
            xlrd_0_9_3 = False

        if isinstance(sheetname, compat.string_types):
            sheet = self.book.sheet_by_name(sheetname)
        else:  # assume an integer if not a string
            sheet = self.book.sheet_by_index(sheetname)

        data = []
        should_parse = {}
        for i in range(sheet.nrows):
            row = []
            for j, (value, typ) in enumerate(zip(sheet.row_values(i),
                                                 sheet.row_types(i))):
                if parse_cols is not None and j not in should_parse:
                    should_parse[j] = self._should_parse(j, parse_cols)

                if parse_cols is None or should_parse[j]:
                    if typ == XL_CELL_DATE:
                        if xlrd_0_9_3:
                            # Use the newer xlrd datetime handling.
                            value = xldate.xldate_as_datetime(value, epoch1904)

                            # Excel doesn't distinguish between dates and time,
                            # so we treat dates on the epoch as times only.
                            # Also, Excel supports 1900 and 1904 epochs.
                            year = (value.timetuple())[0:3]
                            if ((not epoch1904 and year == (1899, 12, 31))
                                    or (epoch1904 and year == (1904, 1, 1))):
                                    value = datetime.time(value.hour,
                                                          value.minute,
                                                          value.second,
                                                          value.microsecond)
                        else:
                            # Use the xlrd <= 0.9.2 date handling.
                            dt = xldate.xldate_as_tuple(value, epoch1904)

                            if dt[0] < datetime.MINYEAR:
                                value = datetime.time(*dt[3:])
                            else:
                                value = datetime.datetime(*dt)

                    elif typ == XL_CELL_ERROR:
                        value = np.nan
                    elif typ == XL_CELL_BOOLEAN:
                        value = bool(value)
                    elif convert_float and typ == XL_CELL_NUMBER:
                        # GH5394 - Excel 'numbers' are always floats
                        # it's a minimal perf hit and less suprising
                        val = int(value)
                        if val == value:
                            value = val

                    row.append(value)

            data.append(row)

        if header is not None:
            data[header] = _trim_excel_header(data[header])

        parser = TextParser(data, header=header, index_col=index_col,
                            has_index_names=has_index_names,
                            na_values=na_values,
                            thousands=thousands,
                            parse_dates=parse_dates,
                            date_parser=date_parser,
                            skiprows=skiprows,
                            skip_footer=skip_footer,
                            chunksize=chunksize,
                            **kwds)

        return parser.read()
Example #27
0
def _parse_cell(xcell, epoch1904=False):
    """
    Parse a xl-xcell.

    :param xlrd.Cell xcell: an excel xcell
    :type xcell: xlrd.sheet.Cell

    :param epoch1904:
        Which date system was in force when this file was last saved.
        False => 1900 system (the Excel for Windows default).
        True => 1904 system (the Excel for Macintosh default).
    :type epoch1904: bool

    :return: formatted xcell value
    :rtype:
        int, float, datetime.datetime, bool, None, str, datetime.time,
        float('nan')


    Examples::

        >>> import xlrd
        >>> from xlrd.sheet import Cell
        >>> _parse_cell(Cell(xlrd.XL_CELL_NUMBER, 1.2))
        1.2

        >>> _parse_cell(Cell(xlrd.XL_CELL_DATE, 1.2))
        datetime.datetime(1900, 1, 1, 4, 48)

        >>> _parse_cell(Cell(xlrd.XL_CELL_TEXT, 'hi'))
        'hi'
    """

    ctype = xcell.ctype
    cvalue = xcell.value
    if ctype == XL_CELL_NUMBER:
        # GH5394 - Excel 'numbers' are always floats
        # it's a minimal perf hit and less suprising
        cint = int(cvalue)
        if cint == cvalue:
            return cint
        return cvalue
    elif ctype in (XL_CELL_EMPTY, XL_CELL_BLANK):
        return None  # RECT-LOOP NEVER USE THIS
    elif ctype == XL_CELL_TEXT:
        return cvalue
    elif ctype == XL_CELL_BOOLEAN:
        return bool(cvalue)
    elif ctype == XL_CELL_DATE:  # modified from Pandas library
        if _xlrd_0_9_3:
            # Use the newer xlrd datetime handling.
            d = xldate.xldate_as_datetime(cvalue, epoch1904)

            # Excel doesn't distinguish between dates and time, so we treat
            # dates on the epoch as times only. Also, Excel supports 1900 and
            # 1904 epochs.
            epoch = (1904, 1, 1) if epoch1904 else (1899, 12, 31)
            if (d.timetuple())[0:3] == epoch:
                d = datetime.time(d.hour, d.minute, d.second, d.microsecond)
        else:
            # Use the xlrd <= 0.9.2 date handling.
            d = xldate.xldate_as_tuple(xcell.value, epoch1904)
            if d[0] < datetime.MINYEAR:  # time
                d = datetime.time(*d[3:])
            else:  # date
                d = datetime.datetime(*d)
        return d
    elif ctype == XL_CELL_ERROR:
        return float("nan")

    raise ValueError("Invalid XL-cell type(%s) for value(%s)!" %
                     (xcell.ctype, xcell.value))
Example #28
0
def parse_minutegram(msheet, csheet, sw, user):
    work = Work()

    if msheet.cell(0,7).value == '':
        e = IntegrityError()
        e.__cause__="El trabajo no tiene numero"
        raise e
    else:
        work.number = msheet.cell(0, 7).value

    if column_value_search(1, 'DESCRIPCION TP:', msheet):
        drow = column_value_search(1, 'DESCRIPCION TP:', msheet)
    else:
        e = IntegrityError()
        e.__cause__="El documento no tiene seccion DESCRIPCION TP"
        raise e

    if column_value_search(1, 'JUSTIFICACION: ', msheet):
        jrow = column_value_search(1, 'JUSTIFICACION: ', msheet)
    else:
        e = IntegrityError()
        e.__cause__="El documento no tiene seccion JUSTIFICACION"
        raise e

    if column_value_search(1, 'OBSERVACIONES:', msheet):
        orow = column_value_search(1, 'OBSERVACIONES:', msheet)
    else:
        e = IntegrityError()
        e.__cause__="El documento no tiene seccion OBSERVACIONES"
        raise e

    if column_value_search(1, 'PLAN DE TRABAJO (MINUTOGRAMA):', msheet):
        wprow = column_value_search(1, 'PLAN DE TRABAJO (MINUTOGRAMA):', msheet)
    else:
        e = IntegrityError()
        e.__cause__="El documento no tiene seccion PLAN DE TRABAJO"
        raise e

    if column_value_search(1, 'PLAN DE CONTINGENCIA / ROLLBACK:', msheet):
        cprow = column_value_search(1, 'PLAN DE CONTINGENCIA / ROLLBACK:', msheet)
    else:
        e = IntegrityError()
        e.__cause__="El documento no tiene seccion PLAN DE CONTINGENCIA / ROLLBACK"
        raise e


    #este bloque de codigo asigna los datos extraidos del formulario al work creado
    work.ticketArea = sw.ticketArea
    work.department = sw.department
    work.municipality = sw.municipality
    work.impact = sw.impact
    work.ticketCause = sw.ticketCause
    work.initialDate = sw.initialDate
    work.finalDate = sw.finalDate
    work.outboundDate = sw.outboundDate
    work.createdDate = datetime.date.today()
    work.affectTime = sw.affectTime
    work.rollbackTime = sw.rollbackTime
    now = timezone.make_aware(datetime.datetime.now(), timezone.get_default_timezone())

    #Si el tiempo dado para la causa esta en horas se entiende que debe pasarse a areas internas y nunca externas
    if sw.ticketCause.timeLapseType == Cause.HOURS and sw.ticketArea.type == Area.INTERN:
        if now + datetime.timedelta(days=1, hours=sw.ticketCause.internTimeLapse) <= sw.initialDate:
            work.limitResponseDate = now + datetime.timedelta(days=1, hours=sw.ticketCause.internTimeLapse)
        else:
            e = IntegrityError()
            e.__cause__="El tiempo maximo de respuesta de los clientes es mas tarde que la fecha de inicio del trabajo"
            raise e
    elif sw.ticketCause.timeLapseType == Cause.HOURS and sw.ticketArea.type == Area.EXTERN:
        e = IntegrityError()
        e.__cause__="La Causa del ticket no puede asignarse a un area externa"
        raise e
    elif sw.ticketCause.timeLapseType == Cause.DAYS and sw.ticketArea.type == Area.INTERN:
        if now + datetime.timedelta(days=1+sw.ticketCause.internTimeLapse) <= sw.initialDate:
            work.limitResponseDate = now + datetime.timedelta(days=1+sw.ticketCause.internTimeLapse)
        else:
            e = IntegrityError()
            e.__cause__="El tiempo maximo de respuesta de los clientes es mas tarde que la fecha de inicio del trabajo"
            raise e
    elif sw.ticketCause.timeLapseType == Cause.DAYS and sw.ticketArea.type == Area.INTERN:
        if now + datetime.timedelta(days=1+sw.ticketCause.externTimeLapse) <= sw.initialDate:
            work.limitResponseDate = now + datetime.timedelta(days=1+sw.ticketCause.externTimeLapse)
        else:
            e = IntegrityError()
            e.__cause__="El tiempo maximo de respuesta de los clientes es mas tarde que la fecha de inicio del trabajo"
            raise e

    #se asigna el usuario loggeado al trabajo
    if user:
        work.userCreator = user
    #-------------------------------------------------------------------------------

    work.description = msheet.cell(drow+1, 1).value
    work.justification = msheet.cell(jrow+1, 1).value
    work.observations = msheet.cell(orow+1, 1).value

    try:
        group = WorkGroup.objects.get(number = work.number)
        for w in group.work_set.all():
            w.state = Work.CANCELED
            for acc in w.acceptance_set.all():
                    acc.valid = False
                    acc.save()
            w.save()

        work.group = group
        work.programmed = Work.REPROGRAMMED

    except:
        group = WorkGroup()
        group.number = work.number
        group.save()

        work.group = group


    work.save()

    #loads work plans
    for i in range(wprow+2,cprow):
        if check_line(i, 2, 6, msheet):
            wp = WorkPlan()
            wp.work=work
            wp.initialDate = xldate_as_datetime(msheet.cell(i, 2).value, 0)
            wp.finalDate = xldate_as_datetime(msheet.cell(i, 3).value, 0)
            wp.affectation = datetime.time(*(xldate_as_tuple(msheet.cell(i, 4).value, 0))[3:])
            wp.activity = msheet.cell(i, 5).value

            wp.save()
        else:
            e = IntegrityError()
            e.__cause__="Alguno de los planes de trabajo tiene un campo vacio"
            raise e

    #loads contingency plans
    for i in range(cprow+2, drow-1):
        if check_line(i, 2, 6, msheet):
            cp = ContingencyPlan()
            cp.work=work
            cp.initialDate = xldate_as_datetime(msheet.cell(i, 2).value, 0)
            cp.finalDate = xldate_as_datetime(msheet.cell(i, 3).value, 0)
            cp.affectation = datetime.time(*(xldate_as_tuple(msheet.cell(i, 4).value, 0))[3:])
            cp.activity = msheet.cell(i, 5).value

            cp.save()
        else:
            e = IntegrityError()
            e.__cause__="Alguno de los planes de contingencia tiene un campo vacio"
            raise e

    parse_corp_clients(csheet, work)

#to get the year
workbook = xlrd.open_workbook('ted_info_name_title_date.xlsx')
worksheet = workbook.sheet_by_name('Sheet1')
num_rows = worksheet.nrows - 1
num_cells = worksheet.ncols - 1
curr_row = 0
while curr_row < num_rows:
    curr_row += 1
    row = worksheet.row(curr_row)

    author_name = worksheet.cell_value(curr_row, 0)
    talk_title = worksheet.cell_value(curr_row, 1)
    date = worksheet.cell_value(curr_row, 2)
    date_as_datetime = xldate.xldate_as_tuple(date, workbook.datemode)
    year, month, day, hour, minute, second = date_as_datetime
    print year

    try:
        viewcount_dict[author_name + ":" + talk_title].append(year)
    except:
        #author/title not in dictionary (because it was one of the weirdly formatted pages)
        print row
        continue


print len(viewcount_dict)


year_viewcount_dict = {}
Example #30
0
    def run(self):
        book = open_workbook(os.path.join(
            self.input().path, 'per{}{}.xls'.format(
                ('0' + str(self.month))[-2:], ('0' + str(self.year))[-2:])),
                             formatting_info=True)

        sheet = book.sheets()[0]

        session = current_session()
        allvals = []
        for rownum, row in enumerate(sheet.get_rows()):
            if rownum == 2:
                coldefs = [
                    '"{}" VARCHAR'.format(cell.value.replace('"', '').strip())
                    for cell in row if cell.value
                ]
                session.execute('CREATE TABLE {output} ({coldefs})'.format(
                    coldefs=', '.join(coldefs), output=self.output().table))
            elif rownum > 2:
                # Have to escape colons as they are interpreted as symbols otherwise
                vals = []
                for cell in row:
                    # type 0 is empty
                    if cell.ctype == 0:
                        pass
                    # type 2 is numeric, which is always a float, even if it
                    # should be an integer
                    elif cell.ctype == 2:
                        if cell.value == int(cell.value):
                            vals.append(str(int(cell.value)))
                        else:
                            vals.append(str(cell.value))
                    # type 3 is date
                    elif cell.ctype == 3:
                        vals.append("'{}-{}-{}'".format(
                            *xldate_as_tuple(cell.value, 0)))
                    # everything else just pass in as unicode string, unless
                    # it's blank, in which case send in NULL
                    else:
                        if cell.value:
                            vals.append("'{}'".format(
                                str(cell.value).replace(":", "::").replace(
                                    "'", "''")))
                        else:
                            vals.append('NULL')

                # Kill occasional erroneous blank last column
                if vals[-1] == "NULL":
                    vals = vals[0:-1]

                if len(vals) < len(coldefs):
                    vals.extend(["NULL"] * (len(coldefs) - len(vals)))

                if len(vals) != len(coldefs):
                    LOGGER.error('FIXME: cannot parse year %s month %s row %s',
                                 self.year, self.month, rownum)
                    continue

                allvals.append(', '.join(vals))
        try:
            session.execute('INSERT INTO {output} VALUES ({allvals})'.format(
                output=self.output().table, allvals='), ('.join(allvals)))
        except Exception as err:
            print(err)
            raise
Example #31
0
def processLogFile(rawDataFile, xlrdLog, medianCollector):
    
    global sites, outputCSVSummary
    nRows = 0           # Number rows written to output
    earliestDateSeen = datetime.date.max        # set to HIGHEST date so the first one we encounter is less
    latestDateSeen = datetime.date.min
    ret = True
    
    if statusCallback:
        statusCallback('processLogFile: Processing '+rawDataFile)
    try:
        book = open_workbook(rawDataFile, logfile=xlrdLog)
    except XLRDError as e:
        statusCallback('Error opening workbook: %s\n' % (str(e)))
        xlrdLog.write('Error opening workbook %s: %s\n' % (rawDataFile, str(e)))
        return False
    
    sheet = book.sheet_by_index(0)
    # Site is always B19 per Evan
    siteName = sheet.cell_value(rowx=18, colx=1)

    # Some sanity checking - is the book in the expected format?
    if book.nsheets != 2 or not isinstance(siteName, str):
        statusCallback('Workbook not in expected format')
        xlrdLog.write('Workbook %s not in expected format\n' % (rawDataFile))
        return False

    statusCallback ('Site name in data file: '+siteName)
    
    # Map some sitenames together
    siteName = siteName.upper()
    siteMap = {'WHISI' : 'WHIRU',
               'WHRZ' : 'WHISP',
               'RUTZP.OUT' : 'RUTZP.outflow',
               'WHIRS' : 'WHISP',
               'WHIRZ' : 'WHISP',
               'RUTZPOND' : 'RUTZP',
               'YELPER' : 'YELPR',
               'LASMO' : 'LASOM',
               'RUTZEROUT' : 'RUTZP.outflow'
    }
    
    siteName = siteMap.get(siteName, siteName)
    
    if statusCallback:
        statusCallback('Site name: '+siteName)

    # Open the sheet and grab the data, copying it to the output CSV
    try:
        dataSheet = book.sheet_by_index(1)
        if verbose:
            statusCallback('Sheets: ',book.sheet_names())

    except XLRDError as e:
        statusCallback('Workbook does not have correct number of sheets')
        xlrdLog.write('Error getting data sheet for '+rawDataFile+' - Skipping Workbook for "'+siteName+'"\n')
        return False

    # Switch to data sheet
    sheet = book.sheet_by_index(1)
    
    # Get first line from sheet and analyze
    numColumns = sheet.ncols   # Number of columns
    
    # Check if workbook seems to match expected format:
    # 2 sheets
    # first cell in 2nd sheet has Date in the name
    if sheet.cell_value(rowx=0,colx=0) == u'Date' and book.nsheets == 2:
        # Print header row
        # Check to see if we have one of those books where the data doesn't match - in that case we have to do
        # some magic transposing
        # Three variations:
        # Date  Time	Temp.[C]	pH 	EC[uS/cm]	D.O.[%]	    D.O.[ppm]	Turb.FNU	Remarks
        # Date	Time    Temp.[C]	pH 	D.O.[%]	    D.O.[ppm]	Turb.FNU	EC[uS/cm]   Remarks
        # Date	Time	Temp.[C]	pH 	mV[pH]	    EC[uS/cm]	D.O.[%]	    D.O.[ppm]	Turb.FNU	Remarks
        # 0     1       2           3   4           5           6           7           8           9

        # We've adopted the third one (which is the most complete) as the standard, so if we
        # detect the others, we have to do move columns around as we emit them to the CSV
        # columns in the right order.
        
        columnFormatModel = None

        if numColumns > 4 and sheet.cell(0, 4).ctype == 1 and sheet.cell(0, 4).value == 'D.O.[%]':
            columnFormatModel = 1           
                # Transpose columns if needed - column indices are in the
                # source sheet - they will be pulled from those source columns
                # and then written to columns 2, 3, 4 etc. - since 0 is the site name
                # and 1 is the file name.  -1 means blank column (skip)
            colOrder = [0, 1, 2, 3, -1, 7, 4, 5, 6, 8]
            statusCallback("This sheet has non-standard column ordering; adjusting columns to match standard.\n")
        elif numColumns > 4 and sheet.cell(0, 4).ctype == 1 and sheet.cell(0, 4).value == 'mV[pH]':
            columnFormatModel = 0
            colOrder = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
        elif numColumns > 4 and sheet.cell(0, 5).ctype == 1 and sheet.cell(0, 5).value == 'D.O.[%]':
            columnFormatModel = 2
            colOrder = [0, 1, 2, 3, -1, 4, 5, 6, 7, 8]
        else:
            statusCallback('Workbook has non-standard column headers - see columnFormatModel in processLogFile to address - skipping workbook')
            xlrdLog.write('Workbook has non-standard column headers - see columnFormatModel in processLogFile to address - Skipping Workbook for "'+siteName+'"\n')
            return False 
        
        nColsToWrite = len(colOrder)

        for rowIndex in range(1, sheet.nrows):    # Iterate through data rows
            if verbose:
                statusCallback ('-'*40)
                statusCallback ('Row: %s' % rowIndex)   # Print row number
            # Skip entirely blank rows
            rowIsBlank = True
            for columnIndex in range(1, sheet.ncols):  # Iterate through columns
                val = sheet.cell(rowIndex, columnIndex)
                if (val.ctype != 0 or (val.ctype == 1 and len(str(val)) > 0)):
                    rowIsBlank = False
                    
            if rowIsBlank:
                if verbose:
                    statusCallback('Skipping blank row')
                continue
            else:
                nRows += 1              
  
            nCols = 0           # Number of columns processed - so we can omit the last "," in the CSV output
            
            for columnIndex in colOrder:  # Iterate through columns
                nCols += 1
                if columnIndex >= numColumns:
                    continue            # don't access columns that don't exist on this sheet
                elif columnIndex == -1:     # emit a blank column - this isn't in source sheet
                    outputCSVSummary.write(',')
                    continue
                else:
                    cellValue = sheet.cell(rowIndex, columnIndex)  # Get cell object by row, col
                    cellType = cellValue.ctype
                    
                    # Prefix each row with the site name of the data and the
                    # file name it came from
                    if (columnIndex == 0):
                        outputCSVSummary.write(siteName+','+rawDataFile+',')
                        
                    if verbose:
                        statusCallback ('Column: [%s] is [%s] : [%s]' % (columnIndex, cellType, cellValue))
                        
                    # Somewhere these numeric values have to be defined, right?
                    # 3 == date per https://pythonhosted.org/xlrd3/cell.html - but there are two
                    # in the data, a date and a time.  0 is the date, 1 is the time.  Sigh.
                    if (cellType == 3):
                        if (columnIndex == 0):      # Date
                            # Date
                            year, month, day, hour, minute, second = xldate.xldate_as_tuple(cellValue.value, book.datemode)
                            strDate = '%4d-%02d-%02d' % (year, month, day)
                            outputCSVSummary.write(strDate)
                            d = datetime.date(year,month,day)
                            if (d < earliestDateSeen):
                                earliestDateSeen = d
                            if (d > latestDateSeen):
                                latestDateSeen = d
                        else:  # Only other date value in input is the Time stamp
                            # Just emit time as is.  Remember since we need to add it
                            # to the median finder so we can emit the timestamp in the
                            # DoE Summary.
                            year, month, day, hour, minute, second = xldate.xldate_as_tuple(cellValue.value, book.datemode)
                            strTime = '%02d:%02d:%02d' % (hour, minute, second)
                            outputCSVSummary.write(strTime)
                    elif (cellType in [1, 2]):   # 1 = text, 2 = number
                            # Other column - e.g. ph, Turb.FNU, etc.
                            outputCSVSummary.write(str(cellValue.value))
                            # Do we need to accumulate values for this for median calculation?
                            # Note some measurements have '-----' instead of 0 for missing values
                            # so catch that here by checking only for numeric values
                            if cellType == 1:
                                measurementValue = 0
                            else:
                                measurementValue = cellValue.value
                            if calculateMedians[columnIndex]:
                                medianCollector.addMeasurement(siteName, strDate, strTime, columnHeaders[columnIndex], measurementValue)
                    else:
                        xlrdLog.write('%s: Unknown value type for [%s,%s] : %s\n' % (rawDataFile, rowIndex, columnIndex, cellType))
                    if (nCols < nColsToWrite):
                        outputCSVSummary.write(',')        # append , except after last column value                    
                        
            # After emitting all columns, terminate the line in the CSV file
            outputCSVSummary.write('\n')       # Terminate line
    else:
        statusCallback('Wrong # of sheets or first row of 2nd worksheet is not Date.. skipping')
        ret = False
       
    if statusCallback:
        statusCallback('%d data rows read.' % sheet.nrows)
        
    if ret:
        siteData = SiteData(rawDataFile, minDate=earliestDateSeen, maxDate=latestDateSeen, numRecs=nRows)
        if siteName not in sites:
            # Not in list yet - add a tuple
            statusCallback("This data is for a new site: " + siteName)
            sites[siteName] = [ siteData ]
        else:
            statusCallback("This is additional data for site: " + siteName)
            sites[siteName].append(siteData)

    return ret
    def action_upload(self, action, data):
        status = IStatusMessage(self.request)
        excelfile = data.get('excelfile', None)
        num_updated = num_new = num_deleted = 0
        skipped = list()

        if not excelfile:
            # print "no file!"
            status.addStatusMessage(_(u"No file was selected!."), type='error')
            return
        try:
            book = xlrd.open_workbook(file_contents=excelfile)
        except:
            # print "not Excel?"
            status.addStatusMessage(
                _(u"File was not readable. Check that it is really "
                  "in Excel format."),
                type='error')
            return

        relpath = data['path']
        if relpath.startswith('/'):
            relpath = relpath[1:]
        dest = self.context.restrictedTraverse(relpath, None)
        if not dest:
            status.addStatusMessage(
                u"Could not find destination folder at %s" % relpath,
                type="error")
            return

        catalog = getToolByName(self.context, 'portal_catalog')
        pwt = getToolByName(self.context, 'portal_workflow')
        pvt = getToolByName(self.context, 'portal_vocabularies')
        ann = IAnnotations(dest)
        existing_events = ann.get(UPLOADEDEVENTSKEY, dict())
        keys = existing_events.keys()
        #valid_categories = catalog.uniqueValuesFor('Subject')
        now = DateTime()

        for sheet in book.sheets():
            if sheet.name not in SHEETS_TO_HANDLE:
                continue

            for i in range(DATA_ROW_START, sheet.nrows):
                row = sheet.row(i)
                do_upload = row[DO_UPLOAD].value
                do_upload = do_upload.strip().lower()
                if not do_upload:
                    continue

                title = row[TITLE].value
                uidcell = row[UNIQUEID]
                if uidcell.ctype == XL_CELL_NUMBER:
                    uid = str(int(uidcell.value))
                else:
                    uid = uidcell.value
                if not title.strip():
                    continue

                # Delete or upload?
                if do_upload == 'delete':
                    if uid not in keys:
                        # status.addStatusMessage(
                        # _(u"You are trying to delete an event with "\
                        # "Excel ID %s and Title '%s', but it does not " \
                        # "exist in the system. Please check "\
                        # "your Excel file or delete the desired event " \
                        # "manually" % (uid, title)), type="error")
                        continue
                    brains = catalog(UID=existing_events[uid])
                    if not len(brains):
                        status.addStatusMessage(
                            _(u"Existing event with Excel id %s and Title '%s'"
                              " could not be found in the system. It might "
                              "have been deleted manually." % (uid, title)),
                            type="warning")
                    else:
                        event = brains[0].getObject()
                        if not event:
                            del existing_events[uid]
                            continue
                        event_url = event.absolute_url()
                        dest._delObject(event.getId())
                        del existing_events[uid]
                        num_deleted += 1
                        status.addStatusMessage(
                            _(u"Event with Excel id %s and Title '%s' at %s "
                              "was deleted." % (uid, title, event_url)),
                            type="warning")
                else:
                    startdatecell = row[DATE_START]
                    # Start date will have 0:0:0 as default time unless
                    # explicitly stated
                    if startdatecell.ctype == XL_CELL_DATE:
                        dtuple = xldate_as_tuple(startdatecell.value,
                                                 book.datemode)
                        startDate = DateTime("%d/%d/%d %d:%d:%d" % (
                            dtuple[0], dtuple[1], dtuple[2],
                            dtuple[3], dtuple[4], dtuple[5]))
                    else:
                        # Sometimes they write dates like "2011 09 12"
                        # Excel doesn't recognise this format, but
                        # DateTime will
                        elems = startdatecell.value.strip().split(' ')
                        if len(elems) == 3:
                            try:
                                startDate = DateTime(startdatecell.value)
                            except:
                                skipped.append(title)
                                continue
                        else:
                            skipped.append(title)
                            continue

                    # by default, end date = start date 23:55, unless
                    # otherwise stated
                    endDate = DateTime(startDate.strftime('%Y/%m/%d 23:55:00'))
                    enddatecell = row[DATE_END]
                    if enddatecell.ctype == XL_CELL_DATE:
                        dtuple = xldate_as_tuple(enddatecell.value,
                                                 book.datemode)
                        endTime = '%d:%d:%d' % (
                            dtuple[3], dtuple[4], dtuple[5])
                        if endTime == '0:0:0':
                            endTime = '23:55:00'
                        endDate = DateTime("%d/%d/%d %s" % (
                            dtuple[0], dtuple[1], dtuple[2], endTime))
                    else:
                        try:
                            # Sometimes they write dates like "2011 09 12"
                            # Excel doesn't recognise this format, but
                            # DateTime will. We try to create a DateTime out
                            # of it, but set the time to 23:55
                            endDate = DateTime(enddatecell.value)
                            endDate = DateTime(
                                endDate.strftime('%Y/%m/%d 23:55:00'))
                        # if end date can't be created, just ignore it
                        except:
                            pass

                    if endDate < now:
                        # Skipping, because the event is already over
                        continue

                    description = row[ORGANISATION].value
                    url = row[WEBSITE].value
                    country = row[COUNTRY].value
                    location = row[CITY].value
                    if country.strip() != '':
                        location = "%s %s" % (country, location)

                    subcategory = row[CATEGORY].value
                    if subcategory:
                         # make sure we don't save bogus values
                        subcategory = [x.strip() for x in
                                       subcategory.split(',')]
                        vocab = getattr(pvt, 'Subcategory', None)
                        if not vocab:
                            subcategory = ''
                        else:
                            subcategory = [x for x in subcategory if
                                           vocab.getTermByKey(x)]

                    isNew = False

                    def createNewEvent(dest, uid):
                        id = "event-%s-%s" % (
                            DateTime().strftime('%Y%M%d%H%m%s'), uid)
                        dest.invokeFactory(type_name='Event', id=id)
                        # transaction.commit()
                        event = getattr(dest, id)
                        plt = getToolByName(dest, 'portal_languages')
                        if plt.startNeutral() and event.Language() != '':
                            event.setLanguage('')
                        return event

                    if uid not in keys:
                        event = createNewEvent(dest, uid)
                        isNew = True
                    else:
                        brains = catalog(UID=existing_events[uid])
                        if not len(brains):
                            status.addStatusMessage(
                                _(u"Existing event with Excel id %s (%s) could"
                                  " not be found. It will be created anew."
                                  % (uid, title)), type="error")
                            del existing_events[uid]
                            event = createNewEvent(dest, uid)
                            isNew = True
                        else:
                            event = brains[0].getObject()
                            if not event:
                                status.addStatusMessage(
                                    _(u"Reference to exiting event wit UID %s "
                                      "is broken." % existing_events[uid]),
                                    type="error")
                                del existing_events[uid]
                                event = createNewEvent(dest, uid)
                                isNew = True

                    values = dict(
                        title=title,
                        description=description,
                        location=location,
                        eventUrl=url,
                        country=country,
                        startDate=startDate,
                        endDate=endDate,
                        subcategory=subcategory)
                    for k, v in values.items():
                        field = event.getField(k)
                        if not field:
                            continue
                        field.getMutator(event)(v)
                    if isNew:
                        event.unmarkCreationFlag()
                        event._renameAfterCreation()
                        num_new += 1
                    else:
                        event.reindexObject()
                        num_updated += 1
                    existing_events[uid] = event.UID()
                    try:
                        if (pwt.getInfoFor(event, 'review_state') !=
                            'published'):
                            pwt.doActionFor(event, 'publish')
                    except:
                        # user does not have permissions for this
                        pass
                    verb = isNew and 'Created' or 'Updated'
                    status.addStatusMessage(
                        _(u'%(verb)s event " %(title)s" at %(url)s' % dict(
                        verb=verb, url=event.absolute_url(), title=title)),
                        type=verb)
                    # print [startDate], title, uid

        ann[UPLOADEDEVENTSKEY] = existing_events
        dest.reindexObject()

        # print "uploaded!"
        status.addStatusMessage(
            _(u"Upload and event creation was successful. %s events were "
              "created, %d updated and %d deleted." % (
                  num_new, num_updated, num_deleted)), type='info')
        if len(skipped):
            status.addStatusMessage(
                u'The following entries do not have a valid start date and '
                'were skipped: %s' % ', '.join(skipped), type='warning')
Example #33
0
def _parse_cell(xcell, epoch1904=False):
    """
    Parse a xl-xcell.

    :param xlrd.Cell xcell: an excel xcell
    :type xcell: xlrd.sheet.Cell

    :param epoch1904:
        Which date system was in force when this file was last saved.
        False => 1900 system (the Excel for Windows default).
        True => 1904 system (the Excel for Macintosh default).
    :type epoch1904: bool

    :return: formatted xcell value
    :rtype:
        int, float, datetime.datetime, bool, None, str, datetime.time,
        float('nan')


    Examples::

        >>> import xlrd
        >>> from xlrd.sheet import Cell
        >>> _parse_cell(Cell(xlrd.XL_CELL_NUMBER, 1.2))
        1.2

        >>> _parse_cell(Cell(xlrd.XL_CELL_DATE, 1.2))
        datetime.datetime(1900, 1, 1, 4, 48)

        >>> _parse_cell(Cell(xlrd.XL_CELL_TEXT, 'hi'))
        'hi'
    """

    ctype = xcell.ctype
    cvalue = xcell.value
    if ctype == XL_CELL_NUMBER:
        # GH5394 - Excel 'numbers' are always floats
        # it's a minimal perf hit and less suprising
        cint = int(cvalue)
        if cint == cvalue:
            return cint
        return cvalue
    elif ctype in (XL_CELL_EMPTY, XL_CELL_BLANK):
        return None  # RECT-LOOP NEVER USE THIS
    elif ctype == XL_CELL_TEXT:
        return cvalue
    elif ctype == XL_CELL_BOOLEAN:
        return bool(cvalue)
    elif ctype == XL_CELL_DATE:  # modified from Pandas library
        if _xlrd_0_9_3:
            # Use the newer xlrd datetime handling.
            d = xldate.xldate_as_datetime(cvalue, epoch1904)

            # Excel doesn't distinguish between dates and time, so we treat
            # dates on the epoch as times only. Also, Excel supports 1900 and
            # 1904 epochs.
            epoch = (1904, 1, 1) if epoch1904 else (1899, 12, 31)
            if (d.timetuple())[0:3] == epoch:
                d = datetime.time(d.hour, d.minute, d.second, d.microsecond)
        else:
            # Use the xlrd <= 0.9.2 date handling.
            d = xldate.xldate_as_tuple(xcell.value, epoch1904)
            if d[0] < datetime.MINYEAR:  # time
                d = datetime.time(*d[3:])
            else:  # date
                d = datetime.datetime(*d)
        return d
    elif ctype == XL_CELL_ERROR:
        return float('nan')

    raise ValueError('Invalid XL-cell type(%s) for value(%s)!' %
                     (xcell.ctype, xcell.value))