Esempio n. 1
0
def get_head_line(sheet_data: xlrd.sheet.Sheet):
    """
    获取第一行cell数据列表
    :param sheet_data:
    :return:
    """
    return sheet_data.row_values(0, 0, sheet_data.ncols)
Esempio n. 2
0
 def diff_sheet(self, s1: xlrd.sheet.Sheet, s2: xlrd.sheet.Sheet):
     """
     get sheet diff
     :param s1: sheet 1
     :param s2: sheet 2
     :return: sheet diff of s1 and s2
     """
     sheet_diff = {
         'added_cols': [],
         'removed_cols': [],
         'modified_data': {},
     }
     modified = False
     # diff header
     headers1 = [
         str(v) for v in s1.row_values(self._header_row,
                                       start_colx=self._start_col)
     ]
     headers2 = [
         str(v) for v in s2.row_values(self._header_row,
                                       start_colx=self._start_col)
     ]
     # may contain header with same name
     header_cols1, header_cols2 = dict(), dict()
     l1, l2 = len(headers1), len(headers2)
     for i in range(l1):
         h1 = headers1[i]
         if h1 not in header_cols1.keys():
             header_cols1[h1] = list()
         header_cols1[h1].append(i)
     for i in range(l2):
         h2 = headers2[i]
         if h2 not in header_cols2.keys():
             header_cols2[h2] = list()
         header_cols2[h2].append(i)
     removed_cols, kept_cols, added_cols = get_iter_diff(
         header_cols1.keys(), header_cols2.keys())
     # please do not change col name or switch data frequently!
     if len(removed_cols) > 0:
         sheet_diff['removed_cols'] = [{
             'name': h,
             'indices': header_cols1[h]
         } for h in removed_cols]
         modified = True
     if len(added_cols) > 0:
         sheet_diff['added_cols'] = [{
             'name': h,
             'indices': header_cols2[h]
         } for h in added_cols]
         modified = True
     for h in kept_cols:
         cols1, cols2 = header_cols1[h], header_cols2[h]
         l1, l2 = len(cols1), len(cols2)
         if l1 > l2:
             sheet_diff['removed_cols'].append({
                 'name': h,
                 'indices': cols1[l2 - l1:]
             })
             header_cols1[h] = cols1[:l2]
             modified = True
         elif l1 < l2:
             sheet_diff['added_cols'].append({
                 'name': h,
                 'indices': cols2[l1 - l2:]
             })
             header_cols2[h] = cols2[:l1]
             modified = True
     # map cols
     cols1_header = dict()
     cols1_cols2 = dict()
     for header in header_cols1:
         if header in kept_cols:
             col1_indices = header_cols1[header]
             col2_indices = header_cols2[header]
             while len(col1_indices) > 0 and len(col2_indices) > 0:
                 col_idx1 = col1_indices.pop()
                 col_idx2 = col2_indices.pop()
                 cols1_header[col_idx1] = header
                 cols1_cols2[col_idx1] = col_idx2
     indices1 = list(cols1_header.keys())
     indices1.sort()
     d1, d2 = [], []
     if self._start_row > s1.nrows:
         LOGGER.warn('Sheet %s: start row %d is larger than num rows %d!' %
                     (s1.name, self._start_row, s1.nrows))
     else:
         for i in range(self._start_row, s1.nrows):
             d1.append([str(s1.cell_value(i, c)) for c in indices1])
     if self._start_row > s2.nrows:
         LOGGER.warn('Sheet %s: start row %d is larger then num rows %d!' %
                     (s2.name, self._start_row, s2.nrows))
     else:
         for i in range(self._start_row, s2.nrows):
             d2.append(
                 [str(s2.cell_value(i, cols1_cols2[c])) for c in indices1])
     # diff data
     data_diff = self.diff_data(d1, d2)
     if data_diff:
         modified = True
         data_diff['modified_cells'] = [
             dict(
                 d, **{
                     'src_col':
                     indices1[d['src_col']] + self._start_col,
                     'dest_col':
                     cols1_cols2[indices1[d['dest_col']]] + self._start_col,
                 }) for d in data_diff['modified_cells']
         ]
         sheet_diff['modified_data'] = data_diff
     # +1 to all indices if using excel
     if modified and self._use_excel_indices:
         sheet_diff = ExcelDiffer._convert_idx_of_sheet_diff(sheet_diff)
     return sheet_diff if modified else None