Beispiel #1
0
    def _search(self, sheet: Sheet, column: str, target: str, only_one=False):
        """
        Generic search method
        :param sheet: A Sheet XLRD Object where perform the search
        :param column: Name of the column where perform the search
        :param target: Value to find
        :param only_one: (boolean) search one (true) or multiple values (false)
        :return dict: a dictionary with a complete row of data
        """
        target_rows = []
        column_names = pyrvtools.pyrvtools.PyRvtools.get_columns_names(sheet)
        for row_number in range(sheet.nrows):
            if sheet.cell_value(row_number, column_names[column]) == target:
                target_rows.append(row_number)

                if target_rows and only_one:
                    break

        all_answer = []
        for target_row in target_rows:
            one_answer = {}
            for col_name, col_number in column_names.items():
                one_answer[col_name] = sheet.cell_value(target_row, col_number)
            all_answer.append(one_answer)
        return all_answer
Beispiel #2
0
 def do_sheet(self, elem):
     bk = self.bk
     sheetx = bk.nsheets
     # print elem.attrib
     rid = elem.get(U_ODREL + 'id')
     sheetId = int(elem.get('sheetId'))
     name = unescape(ensure_unicode(elem.get('name')))
     reltype = self.relid2reltype[rid]
     target = self.relid2path[rid]
     if self.verbosity >= 2:
         self.dumpout(
             'sheetx=%d sheetId=%r rid=%r type=%r name=%r',
             sheetx, sheetId, rid, reltype, name)
     if reltype != 'worksheet':
         if self.verbosity >= 2:
             self.dumpout('Ignoring sheet of type %r (name=%r)', reltype, name)
         return
     state = elem.get('state')
     visibility_map = {
         None: 0,
         'visible': 0,
         'hidden': 1,
         'veryHidden': 2
         }
     bk._sheet_visibility.append(visibility_map[state])
     sheet = Sheet(bk, position=None, name=name, number=sheetx)
     sheet.utter_max_rows = X12_MAX_ROWS
     sheet.utter_max_cols = X12_MAX_COLS
     bk._sheet_list.append(sheet)
     bk._sheet_names.append(name)
     bk.nsheets += 1
     self.sheet_targets.append(target)
     self.sheetIds.append(sheetId)
Beispiel #3
0
    def __bypass_for_spec_col(self, sheet: Sheet, row_num: int) -> None:
        """
        A private function that is executed when you specify a dictionary
        of certain fields.
        :param sheet: Book
        :param row_num: Number row
        :return:
        """
        _ = sheet.row_values(row_num, self._comparison_col,
                             self._comparison_col + 1)[0]

        if self.out_data.get(_):
            if self._info_name:
                print(_)
            self._number_count += 1
            return

        self.out_data[_] = {}

        for k, v in self._dict_col.items():
            value = sheet.row_values(row_num, v, v + 1)[0]
            self.out_data[_][k] = value

        if self._dict_col_category:
            self.__get_category(_, sheet, row_num)
Beispiel #4
0
    def __bypass_default(self, sheet: Sheet, row_num: int) -> None:
        """
        Private function, executed without specifying a dictionary of
         certain fields.
        :param sheet: Book
        :param row_num: Number row
        :return:
        """
        _ = self.out_data.get(
            sheet.row_values(row_num, self._comparison_col,
                             self._comparison_col + 1))[0]

        if _:
            if self._info_name:
                print(_)
            self._number_count += 1
            return

        self.out_data[_] = {}

        values = sheet.row_values(row_num)

        num = 0
        for val in values:
            self.out_data[_][num] = val
            num += 1
 def do_sheet(self, elem):
     bk = self.bk
     sheetx = bk.nsheets
     # print elem.attrib
     rid = elem.get(U_ODREL + "id")
     sheetId = int(elem.get("sheetId"))
     name = unescape(ensure_unicode(elem.get("name")))
     reltype = self.relid2reltype[rid]
     target = self.relid2path[rid]
     if self.verbosity >= 2:
         self.dumpout("sheetx=%d sheetId=%r rid=%r type=%r name=%r", sheetx, sheetId, rid, reltype, name)
     if reltype != "worksheet":
         if self.verbosity >= 2:
             self.dumpout("Ignoring sheet of type %r (name=%r)", reltype, name)
         return
     state = elem.get("state")
     visibility_map = {None: 0, "visible": 0, "hidden": 1, "veryHidden": 2}
     bk._sheet_visibility.append(visibility_map[state])
     sheet = Sheet(bk, position=None, name=name, number=sheetx)
     sheet.utter_max_rows = X12_MAX_ROWS
     sheet.utter_max_cols = X12_MAX_COLS
     bk._sheet_list.append(sheet)
     bk._sheet_names.append(name)
     bk.nsheets += 1
     self.sheet_targets.append(target)
     self.sheetIds.append(sheetId)
Beispiel #6
0
def xlrd_sheet_to_list_of_dict(sheet: Sheet) -> List[Dict]:
    """Convert an xlrd sheet into a list of dicts."""
    keys = [sheet.cell(0, col_index).value for col_index in range(sheet.ncols)]
    dict_list = []
    for row_index in range(1, sheet.nrows):
        d = {keys[col_index]: sheet.cell(row_index, col_index).value
             for col_index in range(sheet.ncols)}
        dict_list.append(d)
    return dict_list
def Get_Excel_Row_Values(filepath,sheetName,uniqueValue):
    
    Book = xlrd.open_workbook(filepath)
    Sheet = Book.sheet_by_name(sheetName)
    row_count = Sheet.nrows
    col_count = Sheet.ncols
    for i in range(0,row_count):
        for j in range(0,col_count):
            value = Sheet.cell_value(i, j)
            if value == uniqueValue:
                row_values = Sheet.row_values(i, 0)           
    return row_values
Beispiel #8
0
def make_client_map(client_list: Sheet) -> Dict[str, str]:
    clients = client_list.col_values(1, 1)
    sales = client_list.col_values(3, 1)
    client_map = {}
    for i, client in enumerate(clients):
        sal = sales[i]
        if not sal:
            log('“{}”的业务员为空,归为“其他”'.format(client))
            sal = '其他'
        ret = client_map.setdefault(client, sal)
        if ret != sal:
            log('“{}”同时属于“{}”和“{}”,自动归为“{}”'.format(client, ret, sal, ret))
    return client_map
def make_sheet(rows,book=None,name='test sheet',number=0):
    if book is None:
        book = DummyBook()
    book._sheet_visibility.append(0)
    sheet = Sheet(book,0,name,number)
    book.add(sheet)
    for rowx in range(len(rows)):
        row = rows[rowx]
        for colx in range(len(row)):
            value = row[colx]
            if isinstance(value,tuple):
                cell_type,value = value
            else:
                cell_type=XL_CELL_TEXT
            sheet.put_cell(rowx,colx,cell_type,value,0)
    return sheet
Beispiel #10
0
def make_sheet(rows, book=None, name='test sheet', number=0):
    if book is None:
        book = DummyBook()
    book._sheet_visibility.append(0)
    sheet = Sheet(book, 0, name, number)
    book.add(sheet)
    for rowx in range(len(rows)):
        row = rows[rowx]
        for colx in range(len(row)):
            value = row[colx]
            if isinstance(value, tuple):
                cell_type, value = value
            else:
                cell_type = XL_CELL_TEXT
            sheet.put_cell(rowx, colx, cell_type, value, 0)
    return sheet
Beispiel #11
0
 def process_row(self, row_index:int, sheet:Sheet):
     values = sheet.row_values(row_index)
     values = self.fix_floats(values)
     if values == [''] * len(values):
         self.add_table()
         self.state_process()
     else:
         self.current_table.add_row(values)
Beispiel #12
0
def _assert_sheet_content(sheet_name: str, actual_worksheet: Sheet,
                          expected_worksheet: Sheet):
    assert (actual_worksheet.nrows == expected_worksheet.nrows
            ), f"Different number of rows in {sheet_name} sheet"
    assert (actual_worksheet.ncols == expected_worksheet.ncols
            ), f"Different number of columns in {sheet_name} sheet"

    for row_index, actual_row in enumerate(actual_worksheet.get_rows()):
        expected_row = expected_worksheet.row(row_index)
        for cell_index, actual_cell in enumerate(actual_row):
            expected_cell = expected_row[cell_index]
            assert (
                actual_cell.ctype == expected_cell.ctype
            ), f"Different cell type in row {row_index}, col {cell_index} in {sheet_name} sheet"
            assert (
                actual_cell.value == expected_cell.value
            ), f"Different cell content in row {row_index}, col {cell_index} in {sheet_name} sheet"
Beispiel #13
0
def get_rows_with_headers(
        sheet: Sheet) -> Tuple[List[Cell], Generator[List[Cell], None, None]]:
    """
    Since it ends up happening a lot, return
    """
    row_iterator = sheet.get_rows()
    headers = [cell.value for cell in next(row_iterator)]
    return headers, row_iterator
Beispiel #14
0
def _find_column_index(sheet: Sheet, column_name: str) -> int:
    distribution_index = -1

    for i in range(sheet.ncols):
        if sheet.cell_value(0, i) == column_name:
            distribution_index = i
            break

    return distribution_index
Beispiel #15
0
    def get_columns_names(sheet: Sheet):
        """
        Return a dictionary with COLUMN_NAME:ID_COLUMN
        :param sheet: a Sheet object
        """

        mapping = {}
        for col_index in range(sheet.ncols):
            mapping[sheet.cell_value(0, col_index)] = col_index
        return mapping
Beispiel #16
0
def get_column_values(sheet: Sheet, column_name: str) -> List:
    column_index = _find_column_index(sheet, column_name)

    if column_index == -1:
        raise Exception(f"Sheet does not contain column {column_name}")

    values = []
    for i in range(1, sheet.nrows):
        values.append(sheet.cell_value(i, column_index))
    return values
Beispiel #17
0
 def __parse_sheet(self, sheet_id: int, sheet: Sheet) -> Table:
     n_rows = sheet.nrows
     n_cols = sheet.ncols
     res = []
     for row_id in range(n_rows):
         row = []
         for col_id in range(n_cols):
             value = sheet.cell_value(rowx=row_id, colx=col_id)
             row.append(value)
         res.append(row)
     metadata = TableMetadata(page_id=sheet_id)
     return Table(cells=res, metadata=metadata)
Beispiel #18
0
def get_headers(sheet: Sheet):
    row_idx: int = 0
    for row in sheet.get_rows():
        is_header_row: bool = True
        if type(row[0].value) == str and row[0].value.startswith('*'):
            for cell in row[1:]:
                if type(cell.value) == str and len(cell.value) == 0:
                    is_header_row = is_header_row and True
                else:
                    is_header_row = False
        else:
            is_header_row = False

        row_idx += 1
        if is_header_row:
            break
    header_names: Tuple[str, ...] = tuple(
        map(
            lambda cell: cell.value.replace('.', '').replace('/', '_').replace(
                ' ', '_').lower(), sheet.row(row_idx)))
    return header_names
Beispiel #19
0
    def __init__(self, route_table: Sheet):
        self.rt_idx = {}
        self.sc_idx = {}
        self.schools = {}  # school: (route, abbr)

        routes = route_table.col_values(0)
        schools = route_table.col_values(1)
        assert len(schools) == len(routes)
        abbrs = route_table.col_values(2)
        assert len(abbrs) == len(schools)

        dist_route = set()

        for i, school in enumerate(schools):
            route, abbr = routes[i], abbrs[i]
            if (not school) or (not route) or (not abbr):
                stm = '不完整的记录:第{}行:“{} {} {}”,已丢弃'
                log(stm.format(i + 1, school, route, abbr))
            else:
                self.schools[school] = (route, abbr)
                self.rt_idx.setdefault(route, len(self.rt_idx))
                self.sc_idx.setdefault(school, len(self.sc_idx))
Beispiel #20
0
def commit_from_sheet(ws: Sheet, model: db.Model, **kwargs):
    """Initialize DB table data from XLRD Worksheet.

    Initialize table data from source data associated with corresponding
    data model.

    Args:
        ws (xlrd.sheet.Sheet): XLRD worksheet object.
        model (class): SqlAlchemy model class.
    """
    survey, indicator, characteristic = '', '', ''
    if model == Data:
        survey = kwargs['survey']
        indicator = kwargs['indicator']
        characteristic = kwargs['characteristic']
    header = None

    for i, row in enumerate(ws.get_rows()):
        row = [r.value for r in row]
        if i == 0:
            header = row
        else:
            row_dict = {k: v for k, v in zip(header, row)}
            if model == Data:
                survey_code = row_dict.get('survey_code')
                survey_id = survey.get(survey_code)
                row_dict['survey_id'] = survey_id
                indicator_code = row_dict.get('indicator_code')
                indicator_id = indicator.get(indicator_code)
                row_dict['indicator_id'] = indicator_id
                char1_code = row_dict.get('char1_code')
                char1_id = characteristic.get(char1_code)
                row_dict['char1_id'] = char1_id
                char2_code = row_dict.get('char2_code')
                char2_id = characteristic.get(char2_code)
                row_dict['char2_id'] = char2_id
            try:
                record = model(**row_dict)
            except (DatabaseError, ValueError, AttributeError, KeyError,
                    IntegrityError, Exception) as err:
                msg = 'Error when processing data import.\n' \
                      '- Worksheet name: {}\n' \
                      '- Row number: {}\n' \
                      '- Cell values: {}\n\n' \
                      '- Original Error:\n' + \
                      type(err).__name__ + ': ' + str(err)
                msg = msg.format(ws.name, i + 1, row)
                logging.error(msg)
                raise PmaApiDbInteractionError(msg)

            db.session.add(record)
Beispiel #21
0
    def __get_category(self, key: str, sheet: Sheet, row_num: int) -> None:
        """
        The private function is executed if the dictionary of certain category
         fields is specified.
        :param key: Key record
        :param sheet: Book
        :param row_num: Number row
        :return:
        """
        if self._join:
            cat = ''
            for v in self._dict_col_category.values():
                _ = sheet.row_values(row_num, v, v + 1)[0]
                if cat:
                    cat += f'{self._delimiter}{_}'
                else:
                    cat = _
            self.out_data[key]['category'] = cat

        else:
            for k, v in self._dict_col_category:
                value = sheet.row_values(row_num, v, v + 1)[0]
                self.out_data[key][k] = value
Beispiel #22
0
    def filter_data(
            self,
            sheet: Sheet,
            datetime_handler: Dict[int,
                                   str] = None) -> List[Dict[str, object]]:
        ''' 依据过滤传入的sheet中数据,并返回数据
            1、解决整型数据读取后变成小数
            2、解决日期时间读取后变成小数,默认格式:'%Y-%m-%d %H:%M:%S'
            3、依据datetime_handler 可以特殊格式化指定列的日期时间格式,没有输入则默认
            4、布尔类型的数据,读取转换为 ture和false
        :param sheet: 传入Sheet对象
        :param datetime_handler:列序号为key(从0开始),日期格式fmt为value的字典
        (如:{0:'%Y-%m-%d %H:%M:%S',2:'%Y-%m-%d'},表示第一列使用xx格式,第三列使用xx格式)
        :return: 当前excel-sheet页中数据list=[row1{param1:value1,param2:value2...},
        row2{param1:value1,param2:value2...},....]

        以下为ctype类型:
            XL_CELL_EMPTY: 'empty',0
            XL_CELL_TEXT: 'text',1
            XL_CELL_NUMBER: 'number',2
            XL_CELL_DATE: 'xldate',3
            XL_CELL_BOOLEAN: 'bool',4
            XL_CELL_ERROR: 'error',5
            XL_CELL_BLANK: 'blank,6
        '''
        row_all = sheet.nrows
        all_data = []
        # top_data = sheet.row_values(0)
        top_data = self.filter_row_data(sheet.row_slice(1), datetime_handler)
        for x in range(2, row_all):
            row_cell_list = sheet.row_slice(x)
            all_data.append(
                dict(
                    zip(top_data,
                        self.filter_row_data(row_cell_list,
                                             datetime_handler))))
        return all_data
Beispiel #23
0
def get_merged_cells_value(sheet: Sheet, row_index, col_index):
    """
    先判断给定的单元格,是否属于合并单元格;
    如果是合并单元格,就返回合并单元格的内容
    :return:
    """
    merged = get_merged_cells(sheet)
    for (rlow, rhigh, clow, chigh) in merged:
        if rlow <= row_index < rhigh:
            if clow <= col_index < chigh:
                cell_value = sheet.cell_value(rlow, clow)

                # print('该单元格[%d,%d]属于合并单元格,值为[%s]' % (row_index, col_index, cell_value))
                return cell_value
                break
    return None
Beispiel #24
0
def get_boundaries(sheet: Sheet) -> Tuple[int, int]:
    start_row_num: int = -1
    end_row_num: int = -1
    row_idx: int = 0

    for row in sheet.get_rows():
        is_boundary: bool = True
        for cell in row:
            is_boundary = type(
                cell.value) == str and cell.value.startswith('*')
            if not is_boundary:
                break

        if is_boundary:
            if start_row_num < 0:
                start_row_num = row_idx
            elif end_row_num < 0:
                end_row_num = row_idx
        row_idx += 1
    return start_row_num + 1, end_row_num - 1
Beispiel #25
0
def get_dividend_rows(sheet: Sheet) -> List[Dict[str, Any]]:
    start_row_num, end_row_num = get_boundaries(sheet)
    header_names: Tuple[str, ...] = get_headers(sheet)
    # headers: Dict[str, int] = {header_names[idx]: idx for idx in range(0, len(header_names))}

    dividend_rows: List[Dict[str, Any]] = []

    row_num = 1
    for row_idx in range(start_row_num, end_row_num):
        row: Dict[str, Any] = dict(
            zip(header_names, map(lambda cell: cell.value,
                                  sheet.row(row_idx))))
        row['index'] = row_num
        row_num += 1
        # print(row)

        if row['narration'].find('ACH') >= 0 or row['narration'].find(
                'DIV') >= 0:
            dividend_rows.append(row)

    return dividend_rows
Beispiel #26
0
    def __init__(self, sheet: Sheet, keys: Iterable[str] = ..., key_row=0):
        self.conn = sqlite3.connect(':memory:')
        self.cur = self.conn.cursor()
        self.where = None
        self.orders = {}

        # create table
        stm = []
        col_keys = []
        for i in (self.ARGS if keys is ... else keys):
            key, ktype, col = i.split()
            stm.append('{} {}'.format(key, ktype))
            col_keys.append((int(col), key))
        stm = 'CREATE TABLE TEMP({});'.format(','.join(stm))
        self.cur.execute(stm)

        # add records
        klist = ','.join([i[1] for i in col_keys])
        stm = 'INSERT INTO TEMP({}) VALUES({})'
        for i in range(key_row + 1, sheet.nrows):
            row = sheet.row_values(i)
            if (len(row) > 1) and (not row[1]):
                continue
            vlist = ','.join([repr(row[j[0]]) for j in col_keys])
            self.cur.execute(stm.format(klist, vlist))
        self.conn.commit()

        # add extra order
        if keys is not ...:
            return
        order = ['肉', '菜', '油料干货', '调料制品', '杂货类']
        self.add_order('kind', order)
        order = ['营养餐', '非营养餐', '幼儿餐', '教师餐']
        self.add_order('meal', order)

        cur = self.select('DISTINCT NAME, KIND')
        order = self.orders['kind']
        names = [(order.get(k, len(order)), n) for n, k in cur]
        names.sort()
        self.add_order('name', [i[1] for i in names])
Beispiel #27
0
def read_student_info(sheet: Sheet, file_name: str) -> dict:
    student_id = file_name.replace('student_', '').replace('.xlsx', '')
    student_name = sheet.cell(0, 4).value
    student_score = round(sheet.cell(15, 4).value, 1)
    print('[read_student_info]\t' + student_id + ' : ' + student_name + ' : ' + str(student_score))
    return {'id': student_id, 'name': student_name, 'score': student_score}
Beispiel #28
0
 def process_schema(self, row_index:int, sheet:Sheet):
     self.schema = sheet.cell(row_index, 0).value
Beispiel #29
0
 def process_table(self, row_index:int, sheet:Sheet):
     self.current_table_name = sheet.cell(row_index, 0).value
     self.current_table = Table(self.schema, self.current_table_name, [], [])
Beispiel #30
0
def extract_header(ws: Sheet):
    return (ws.cell_value(1, 1)), (ws.cell_value(1, 6))
Beispiel #31
0
 def process_cols(self, row_index:int, sheet:Sheet):
     self.current_table.cols = sheet.row_values(row_index)