Example #1
0
def get_document_title(workbook: xlrd.book.Book,
                       orderform_sheet: xlrd.sheet.Sheet) -> str:
    """Get the document title for the order form."""
    if "information" in workbook.sheet_names():
        information_sheet = workbook.sheet_by_name("information")
        document_title = information_sheet.row(0)[2].value
        return document_title

    document_title = orderform_sheet.row(0)[1].value
    return document_title
Example #2
0
def sheet_of(
    request, workbook: xlrd.book.Book
) -> Tuple[Tuple[int, int], Callable[[int, int], str]]:
    if workbook.nsheets != 1:
        raise FileError().with_message(
            request,
            f'excel can only contain 1 sheet, sheets: {workbook.nsheets}')
    sheet = workbook.sheet_by_index(0)
    m, n = sheet.nrows, sheet.ncols
    if m == 0 or n == 0:
        raise FileError().with_message(request, 'empty excel sheet')

    def value_of(i, j) -> str:
        cell = sheet.cell(i, j)
        if cell.ctype == xlrd.XL_CELL_EMPTY:
            return ''
        elif cell.ctype == xlrd.XL_CELL_TEXT:
            return cell.value
        elif cell.ctype == xlrd.XL_CELL_NUMBER:
            return str(cell.value)
        elif cell.ctype == xlrd.XL_CELL_DATE:
            date = xlrd.xldate.xldate_as_tuple(cell.value,
                                               datemode=0)  # datemode
            return f'{date[0]}-{date[1]}-{date[2]}'
        elif cell.ctype == xlrd.XL_CELL_BOOLEAN:
            return str(True if cell.value == 1 else False)
        else:
            raise FileError().with_message(request,
                                           f'cell type error at ({i},{j})')

    return (m, n), value_of
Example #3
0
def read_worksheet_settings(wks: xlrd.book.Book) -> Dict[str, Any]:
    sheet = wks.sheet_by_name("Settings")
    result = {}
    for cur_row in range(sheet.nrows):
        key = str(sheet.cell(cur_row, 0).value)
        if key == "":
            continue

        if key == "Formulas":
            break

        values = []
        for cur_col in range(1, sheet.ncols):
            cur_value = sheet.cell(cur_row, cur_col).value
            if str(cur_value) == "":
                break

            values.append(cur_value)

        if len(values) == 1:
            values = values[0]

        result[key] = values

    return result
Example #4
0
    def _read_xls(self, wb: xlrd.book.Book):
        data = list()
        ws = wb.sheet_by_index(0)  # type: xlrd.sheet.Sheet
        row = 7
        while ws.cell_value(row, 0) != '':
            dgu_id = int(ws.cell_value(row, 0))
            node_id = int(ws.cell_value(row, 2))
            data_row = ws.row_values(row, 4, 124)
            for i in range(0, 5 * 24, 5):
                data.append(
                    (dgu_id, i // 5, node_id, data_row[i], data_row[i + 1],
                     data_row[i + 2], data_row[i + 3], data_row[i + 4]))
            row += 1

        data = pd.DataFrame(data,
                            columns=[
                                'dgu_id', 'hour', 'node_id', 'p_min_tech',
                                'p_min_techn', 'p_min', 'v_ppp', 'p_max'
                            ])
        data['date'] = pd.to_datetime(self._context[0])
        # в какой-то момент int превращаются в float
        data.node_id = data.node_id.astype(int)
        data.hour = data.hour.astype(int)
        data.dgu_id = data.dgu_id.astype(int)

        self.data.append(data)
Example #5
0
def extract_retailer(wb: xlrd.book.Book) -> str:
    sheet = wb.sheet_by_index(0)
    for index, cell in enumerate(sheet.col(0)):
        if 'PURCHASE ORDER TO' in cell.value:
            cell: xlrd.sheet.Cell = sheet.cell(index + 1, 0)
            assert ctype_text.get(cell.ctype) == 'text', 'invalid purchase order cell'
            return cell.value
    return None
Example #6
0
 def _read_xls(self, wb: xlrd.book.Book):
     ws = wb.sheet_by_index(0)
     data = pd.DataFrame(data=ws._cell_values[7:],
                         columns=['date', 'hour', 'fact'])
     data['region_id'] = region_map[ws._cell_values[1][1]]
     data['date'] = pd.to_datetime(data['date'])
     data['hour'] = data.hour.astype(int) - 1
     self.data.append(data)
Example #7
0
def extract_po(wb: xlrd.book.Book) -> str:
    sheet = wb.sheet_by_index(0)
    # try col 9, 10
    for i in range(9, 11):
        cell: xlrd.sheet.Cell = sheet.cell(2, i)
        if ctype_text.get(cell.ctype) != 'text':
            continue
        po = cell.value
        if po_re.match(po):
            return po
    return None
Example #8
0
 def _read_xls(self, wb: xlrd.book.Book):
     ws = wb.sheet_by_index(0)
     data = pd.DataFrame(
         data=ws._cell_values[6:],
         columns=['region1_id', 'region2_id', 'hour', 'flow'])
     data.replace('', null, inplace=True)
     data['region1_id'] = data.region1_id.replace(region_map).astype(int)
     data['region2_id'] = data.region2_id.replace(region_map).astype(int)
     data['hour'] = data.hour.astype(int)
     data['date'] = pd.to_datetime(self._context[0])
     self.data.append(data)
Example #9
0
 def _read_xls(self, wb: xlrd.book.Book):
     ws = wb.sheet_by_index(0)
     data = pd.DataFrame(
         data=ws._cell_values[6:],
         columns=[
             'region_id', 'hour', 'gen_ges', 'gen_aes', 'gen_tes',
             'gen_ses', 'gen_ves', 'gen_other', 'pmin_tech_ges',
             'pmin_tech_aes', 'pmin_tech_tes', 'pmin_tech_ses',
             'pmin_tech_ves', 'pmin_tech_other', 'pmin_ges', 'pmin_aes',
             'pmin_tes', 'pmin_ses', 'pmin_ves', 'pmin_other', 'pmax_ges',
             'pmax_aes', 'pmax_tes', 'pmax_ses', 'pmax_ves', 'pmax_other',
             'con', 'exp', 'imp', 'price_con', 'price_gen'
         ])
     data.replace('', null, inplace=True)
     data['region_id'] = data.region_id.replace(region_map).astype(int)
     data['hour'] = data.hour.astype(int)
     data['date'] = pd.to_datetime(self._context[0])
     self.data.append(data)
Example #10
0
def read_worksheet_table(wks: xlrd.book.Book) -> Dict[str, Any]:
    """Read a table of numbers from an Excel file saved by Keithley.

    This function reads the first worksheet in the Excel file passed as
    argument and returns a dictionary associating NumPy arrays with their
    names.
    """

    sheet = wks.sheet_by_index(0)
    result = OrderedDict()
    nrows = sheet.nrows
    for cur_col in range(sheet.ncols):
        name = sheet.cell(0, cur_col).value
        if len(name) >= len("START") and name[:5] == "START":
            # This column and the following are not useful
            break
        result[name] = np.array([sheet.cell(i, cur_col).value for i in range(1, nrows)])

    return result
Example #11
0
    def _read_xls(self, wb: xlrd.book.Book):
        data = list()
        for ws in wb.sheets():  # type: xlrd.sheet.Sheet
            hour_data = pd.DataFrame(data=ws._cell_values[3:],
                                     columns=[
                                         'node_id', 'node_name', 'u',
                                         'region_id', 'price', 'empty'
                                     ])
            hour_data.drop(['node_name', 'u', 'empty'], axis=1, inplace=True)
            hour_data['hour'] = int(ws.name)
            hour_data['region_id'] = hour_data.region_id.replace(
                region_map).astype(int)
            hour_data['price'] = pd.to_numeric(hour_data.price)
            data.append(hour_data)

        data = pd.concat(data)  # type: pd.DataFrame
        data['date'] = pd.to_datetime(self._context[0])
        # в какой-то момент int превращаются в float
        data.node_id = data.node_id.astype(int)
        data.hour = data.hour.astype(int)

        self.data.append(data)
Example #12
0
    def _read_xls(self, wb: xlrd.book.Book):
        data = list()
        for ws in wb.sheets():  # type: xlrd.sheet.Sheet
            hour_data = pd.DataFrame(
                data={
                    'hour': int(ws.name),
                    'node_from': ws.col_values(0, 5),
                    'node_to': ws.col_values(1, 5),
                    'branch_num': ws.col_values(2, 5),
                    'flow': ws.col_values(3, 5)
                })
            data.append(hour_data)

        data = pd.concat(data)  # type: pd.DataFrame
        data['date'] = pd.to_datetime(self._context[0])
        # в какой-то момент int превращаются в float
        data.node_from = data.node_from.astype(int)
        data.node_to = data.node_to.astype(int)
        data.branch_num = data.branch_num.astype(int)
        data.hour = data.hour.astype(int)

        self.data.append(data)
Example #13
0
    def _read_xls(self, wb: xlrd.book.Book):
        data = list()
        for ws in wb.sheets():  # type: xlrd.sheet.Sheet
            hour_data = pd.DataFrame(data=ws._cell_values[3:],
                                     columns=[
                                         'section_id', 'name', 'node1',
                                         'node2', 'branch_name', 'p_min',
                                         'p_max', 'v_ppp'
                                     ])
            hour_data.replace(['', ' '], [null, null], inplace=True)
            t = hour_data.p_min.isnull() & hour_data.p_max.isnull()
            hour_data.drop(hour_data.index.values[t], inplace=True)
            hour_data.p_min = hour_data.p_min.astype(str).str.replace(
                ',', '.').astype('float')
            hour_data.p_max = hour_data.p_max.astype(str).str.replace(
                ',', '.').astype('float')
            hour_data.v_ppp = hour_data.v_ppp.astype(str).str.replace(
                ',', '.').astype('float')
            hour_data['is_active'] = ((hour_data.p_min == hour_data.v_ppp) |
                                      (hour_data.p_max == hour_data.v_ppp)) * 1
            hour_data.drop([
                'name', 'node1', 'node2', 'branch_name', 'p_min', 'p_max',
                'v_ppp'
            ],
                           axis=1,
                           inplace=True)
            hour_data['hour'] = int(ws.name)
            data.append(hour_data)

        data = pd.concat(data)  # type: pd.DataFrame
        data['date'] = pd.to_datetime(self._context[0])
        # в какой-то момент int превращаются в float
        data.section_id = data.section_id.astype(int)
        data.hour = data.hour.astype(int)
        data.is_active = data.is_active.astype(int)

        self.data.append(data)
Example #14
0
    def _read_xls(self, wb: xlrd.book.Book):
        data = list()
        for ws in wb.sheets():  # type: xlrd.sheet.Sheet
            p_list = ws.col_values(3, 8)
            if p_list[0] == "*":
                p_list[0] = 0

            hour_data = pd.DataFrame(
                data={
                    'hour': int(ws.name),
                    'price': p_list,
                    'volume': ws.col_values(4, 8)
                })
            data.append(hour_data)

        data = pd.concat(data)  # type: pd.DataFrame
        date, pz = self._context
        data['date'] = pd.to_datetime(date)
        data['pz'] = pz
        # в какой-то момент int превращаются в float
        data.pz = data.pz.astype(int)
        data.hour = data.hour.astype(int)

        self.data.append(data)
Example #15
0
def _get_sheet(book: xlrd.book.Book, sheet_name=False) -> pd.DataFrame:
    """指定したsheet nameのシートをpandasで開く"""
    sheet = book.sheet_by_name(sheet_name)
    table = [sheet.row_values(row) for row in range(sheet.nrows)]
    index = _get_content_start_row_index(table)
    return pd.DataFrame(table[index + 1:], columns=table[index])