def _retrieve_subindex_observations(self, structure_obs_sheet, subindex_name, subindex_scaled_column, sheet_year): self._log.debug( "\t\tRetrieving subindex %s observations in sheet %s..." % (subindex_name, structure_obs_sheet.name)) year_column = get_column_number( self._config_get("STRUCTURE_OBSERVATIONS", "OBSERVATION_YEAR_COLUMN", sheet_year)) iso3_column = get_column_number( self._config_get("STRUCTURE_OBSERVATIONS", "OBSERVATION_ISO3_COLUMN", sheet_year)) check_column = get_column_number( self._config_get("STRUCTURE_OBSERVATIONS", "OBSERVATION_CHECK_COLUMN", sheet_year)) observation_start_row = self._config_getint("STRUCTURE_OBSERVATIONS", "OBSERVATION_START_ROW", sheet_year) empty_row_error_cache = {} try: subindex_rank_column = self._find_rank_column(structure_obs_sheet, subindex_name, sheet_year) if not subindex_rank_column: self._log.warn("No rank column found for SUBINDEX '%s' while parsing %s" % ( subindex_name, structure_obs_sheet.name)) indicator = self._indicator_repo.find_indicator_by_code(subindex_name, 'SUBINDEX') for row_number in range(observation_start_row, structure_obs_sheet.nrows): # Per country if not structure_obs_sheet.cell(row_number, check_column).value or row_number in empty_row_error_cache: if row_number not in empty_row_error_cache: self._log.debug( "Skipping row while parsing %s[%s] (did not detect value on check column, additional errors regarding this row will be omitted)" % ( structure_obs_sheet.name, row_number)) empty_row_error_cache[row_number] = True continue try: year = int(structure_obs_sheet.cell(row_number, year_column).value) iso3 = structure_obs_sheet.cell(row_number, iso3_column).value area = self._area_repo.find_by_iso3(iso3) value = structure_obs_sheet.cell(row_number, subindex_scaled_column).value rank = structure_obs_sheet.cell(row_number, subindex_rank_column).value if subindex_rank_column else None excel_observation = ExcelObservation(iso3=iso3, indicator_code=indicator.indicator, year=year, rank=rank, value=value) if [t for t in self._excel_structure_observations if t[0].year == year and t[1].iso3 == iso3 and t[2].indicator == indicator.indicator]: self._log.warn("Ignoring duplicate observations for SUBINDEX %s while parsing %s [%s]" % ( indicator.indicator, structure_obs_sheet.name, colname(subindex_scaled_column))) # Will not continue parsing, we could check this also at the beginning if we extract the # year from the sheet name return else: self._excel_structure_observations.append((excel_observation, area, indicator)) except AreaRepositoryError: self._log.error("No area with code %s for indicator %s while parsing %s" % ( iso3, indicator.indicator, structure_obs_sheet.name)) except: self._log.error("Unexpected error parsing %s[%s]" % (structure_obs_sheet.name, row_number)) except IndicatorRepositoryError: self._log.error( "No SUBINDEX '%s' indicator found while parsing %s [%s]" % ( subindex_name, structure_obs_sheet.name, colname(subindex_scaled_column)))
def show_row(bk, sh, rowx, colrange, printit): if printit: print if bk.formatting_info: for colx, ty, val, cxfx in get_row_data(bk, sh, rowx, colrange): if printit: print "cell %s%d: type=%d, data: %r, xfx: %s" \ % (xlrd.colname(colx), rowx+1, ty, val, cxfx) else: for colx, ty, val, _unused in get_row_data(bk, sh, rowx, colrange): if printit: print "cell %s%d: type=%d, data: %r" % (xlrd.colname(colx), rowx+1, ty, val)
def show_row(bk, sh, rowx, colrange, printit): if bk.ragged_rows: colrange = range(sh.row_len(rowx)) if not colrange: return if printit: print() if bk.formatting_info: for colx, ty, val, cxfx in get_row_data(bk, sh, rowx, colrange): if printit: print("cell %s%d: type=%d, data: %r, xfx: %s" % (xlrd.colname(colx), rowx+1, ty, val, cxfx)) else: for colx, ty, val, _unused in get_row_data(bk, sh, rowx, colrange): if printit: print("cell %s%d: type=%d, data: %r" % (xlrd.colname(colx), rowx+1, ty, val))
def show_row(bk, sh, rowx, colrange, printit): if bk.ragged_rows: colrange = range(sh.row_len(rowx)) if not colrange: return if printit: print() if bk.formatting_info: for colx, ty, val, cxfx in get_row_data(bk, sh, rowx, colrange): if printit: print("cell %s%d: type=%d, data: %r, xfx: %s" % (xlrd.colname(colx), rowx + 1, ty, val, cxfx)) else: for colx, ty, val, _unused in get_row_data(bk, sh, rowx, colrange): if printit: print("cell %s%d: type=%d, data: %r" % (xlrd.colname(colx), rowx + 1, ty, val))
def get_keys_attrs(sheet): """keys and attrs keys come from row 1, a normal string is ok attrs live with these keys, note text """ progress["row"] = 1 keys = list(itertools.takewhile(lambda x: isinstance(x, str) and x, sheet.row_values(0))) assert keys, progress assert len(set(keys)) == len(keys), keys attrs = [] cell_note_map = sheet.cell_note_map for colx in range(len(keys)): colname = xlrd.colname(colx) progress["column"] = colname note = cell_note_map.get((0, colx)) if note: txt = note.text out = note_text_to_attr(txt) logging.debug( "note_text_to_attr_{}({!r}) = {}".format(colname, txt, out)) attrs.append(out) else: attrs.append({}) return keys, attrs
def load_columns(file): rels = file.relationships.filter(type__name='Contains Column') table = load_file(file) matched = [] for rel in rels: item = rel.right info = { 'match': unicode(item), 'rel_id': rel.pk, } if isinstance(item, UnknownItem): info['unknown'] = True if rel.range_set.filter(type='list').exists(): col = rel.range_set.get(type='list').start_column info['name'] = table.field_map.keys()[col].replace('\n', ' - ') info['column'] = colname(col) info['colnum'] = col elif rel.range_set.filter(type='value').exists(): info['name'] = get_range_value(table, rel.range_set.get( type='head' )) info['value'] = get_range_value(table, rel.range_set.get( type='value' )) matched.append(info) matched.sort(key=lambda info: info.get('colnum', -1)) return matched
def show_row(bk, sh, rowx, colrange, printit): if bk.ragged_rows: colrange = range(sh.row_len(rowx)) if not colrange: return for colx, ty, val, cxfx in get_row_data(bk, sh, rowx, colrange): if printit: print(json.dumps([ "cell", { "r": rowx, "c": colx, "cn": xlrd.colname(colx), "t": ty, "v": val }]))
def print_qboxes(worksheet, qboxDimensions): qbox_headers, qbox_footers = zip(*qboxDimensions) for i, qbox_header_row in enumerate(qbox_headers): qbox_footer_row = qbox_footers[i] print "\r################################" print "QBOX " + str(i+1) + ":" print "Dimensions: (" + str(qbox_header_row) + ", " + str(2) + ") -- (" + str(qbox_footer_row) + ", " + str(5) + ")" for curr_row_num in range(qbox_header_row-1, qbox_footer_row+1): if curr_row_num == qbox_header_row-1: print "\r[Title Row " + str(curr_row_num+1) + "]" elif curr_row_num == qbox_header_row: print "\r[Key Row " + str(curr_row_num+1) + "]" else: print "\r[Row " + str(curr_row_num+1) + "]" qbox_row_values = worksheet.row_values(curr_row_num) for curr_col_num in range(1,6): output = "cell was not read" if (worksheet.cell_type(curr_row_num, curr_col_num) == 1): output = unicodedata.normalize('NFKD', worksheet.cell_value(curr_row_num, curr_col_num)).encode('ascii', 'ignore') elif (worksheet.cell_type(curr_row_num, curr_col_num) == 0): output = "(Blank)\r" else: output = str(worksheet.cell_value(curr_row_num, curr_col_num)) + "\t" print "[Col " + xlrd.colname(curr_col_num) + "]\t\t" + output return 0
def load_columns(run): table = run.load_io() cols = list(table.field_map.keys()) matched = [] for rng in run.range_set.exclude(type='data'): ident = rng.identifier info = { 'match': str(ident), 'rel_id': rng.pk, 'type': ident.type, } if ident.type == 'meta': info['field_name'] = rng.identifier.field info['model'] = ctid(ident.content_type) elif ident.type == 'instance': info['%s_id' % ident.content_type.model] = ident.object_id else: info['value'] = ident.name if rng.type == 'list': col = rng.start_col info['name'] = cols[col].replace('\n', ' - ') info['column'] = colname(col) info['colnum'] = col elif rng.type == 'value': info['name'] = get_range_value(table, rng, rng.header_col, rng.start_col - 1) info['value'] = get_range_value(table, rng, rng.start_col, rng.end_col) matched.append(info) matched.sort(key=lambda info: info.get('colnum', -1)) return matched
def compareFields(tabletIID, tabletData, paperData, IIDIndex): ws_paper.write(IIDIndex, 0, mapIID(tabletIID)) ws_tablet.write(IIDIndex, 0, tabletIID) ws_diff.write(IIDIndex, 0, tabletIID[2:]) startTime = None varIndex = 1 for field in sorted(tabletData.keys()): name = field[4:] if name == keywords['startTime']: startTime = datetime.datetime.strptime(tabletData[field][0], "%I:%M:%S %p") if IIDIndex == 1: ws_diff.write(0, varIndex, name) ws_paper.write(0, varIndex, name) ws_tablet.write(0, varIndex, name) if name[:8] == keywords['runTime']: if tabletData[field][0] == '': runtime = 'n.a.' else: timestamp = datetime.datetime.strptime(tabletData[field][0], "%I:%M:%S %p") runtime = (timestamp - startTime).seconds / 60.0 ws_paper.write(IIDIndex, varIndex, runtime) ws_tablet.write(IIDIndex, varIndex, runtime) ws_diff.write(IIDIndex, varIndex, runtime) else: if name != keywords['startTime']: ws_paper.write(IIDIndex, varIndex, paperData[field][1]) ws_tablet.write(IIDIndex, varIndex, tabletData[field][1]) row = str(IIDIndex + 1) col = xlrd.colname(varIndex) ws_diff.write(IIDIndex, varIndex, xlwt.Formula('IF(Paper!' + col + row + '=Tablet!' + col + row + ';"";CONCATENATE(Paper!' + col + row + ';" --- ";Tablet!' + col + row + '))')) varIndex += 1
def _retrieve_index_observations(self, structure_obs_sheet): self._log.info("\t\tRetrieving index observations...") empty_row_error_cache = {} sheet_year = re.match(self._config.get("STRUCTURE_OBSERVATIONS", "SHEET_NAME_PATTERN"), structure_obs_sheet.name).group("year") year_column = get_column_number( self._config_get("STRUCTURE_OBSERVATIONS", "OBSERVATION_YEAR_COLUMN", sheet_year)) iso3_column = get_column_number( self._config_get("STRUCTURE_OBSERVATIONS", "OBSERVATION_ISO3_COLUMN", sheet_year)) observation_name_row = self._config_getint("STRUCTURE_OBSERVATIONS", "OBSERVATION_NAME_ROW", sheet_year) observation_start_row = self._config_getint("STRUCTURE_OBSERVATIONS", "OBSERVATION_START_ROW", sheet_year) check_column = get_column_number( self._config_get("STRUCTURE_OBSERVATIONS", "OBSERVATION_CHECK_COLUMN", sheet_year)) index_scaled_column = get_column_number( self._config_get("STRUCTURE_OBSERVATIONS", "OBSERVATION_INDEX_SCALED_COLUMN", sheet_year)) index_rank_column = get_column_number( self._config_get("STRUCTURE_OBSERVATIONS", "OBSERVATION_INDEX_RANK_COLUMN", sheet_year)) index_rank_change_column = get_column_number( self._config_get("STRUCTURE_OBSERVATIONS", "OBSERVATION_INDEX_RANK_CHANGE_COLUMN", sheet_year)) try: column_name = structure_obs_sheet.cell(observation_name_row, index_scaled_column).value parsed_column = self._parse_index_scaled_column_name(column_name, sheet_year) # Sanity check useful if there could be more than one INDEX, otherwise this check could be relaxed if not parsed_column: raise IndicatorRepositoryError("Column name '%s' does not match INDEX pattern while parsing %s" % ( column_name, structure_obs_sheet.name)) indicator = self._indicator_repo.find_indicator_by_code(parsed_column.group('index')) for row_number in range(observation_start_row, structure_obs_sheet.nrows): # Per country if not structure_obs_sheet.cell(row_number, check_column).value or row_number in empty_row_error_cache: if row_number not in empty_row_error_cache: self._log.debug( "Skipping row while parsing %s[%s] (did not detect value on check column, additional errors regarding this row will be omitted)" % ( structure_obs_sheet.name, row_number)) empty_row_error_cache[row_number] = True continue try: year = int(structure_obs_sheet.cell(row_number, year_column).value) iso3 = structure_obs_sheet.cell(row_number, iso3_column).value area = self._area_repo.find_by_iso3(iso3) value = structure_obs_sheet.cell(row_number, index_scaled_column).value rank = structure_obs_sheet.cell(row_number, index_rank_column).value # Allow for empty values here rank_change = na_to_none(structure_obs_sheet.cell(row_number, index_rank_change_column).value) if index_rank_change_column else None excel_observation = ExcelObservation(iso3=iso3, indicator_code=indicator.indicator, year=year, rank=rank, value=value, rank_change=rank_change) self._excel_structure_observations.append((excel_observation, area, indicator)) except AreaRepositoryError: self._log.error("No area with code %s for indicator %s while parsing %s" % ( iso3, indicator.indicator, structure_obs_sheet.name)) except: self._log.error("Unexpected error parsing %s[%s]" % (structure_obs_sheet.name, row_number)) except IndicatorRepositoryError: self._log.error("No INDEX indicator found while parsing %s [%s]" % ( structure_obs_sheet.name, colname(index_scaled_column))) except ParserError as pe: self._log.error(pe)
def get_excel_ref(row, col): ''' >>> get_excel_ref(0, 0) 'A1' >>> get_excel_ref(3, 2) 'C4' ''' return xlrd.colname(col) + str(row + 1)
def _parse_formula(self, formula): """Формат формулы 2col/5col + 5 заменяем на B(current_row)/E(current_row) + 5 """ for column_text, col_number in re.findall(r'((\d+)col)', formula): letter = xlrd.colname(int(col_number)) formula = formula.replace(column_text, '%s%s' % (letter, self.current_row_i+1)) return formula
def printRow(self, sh, rowx, colrange): bk = self.bk print for colx, ty, val, _unused in self.getRowData(sh, rowx, colrange): if val: # print not empty cell if ty == 1: # text print "p5: cell %s%d: type=%d, data: [%s]" \ % (xlrd.colname(colx), rowx+1, ty, (u''+val+u'').encode(cp)) elif ty == 2: # number 2.2999999999999998 / 2.3 print "p6: cell %s%d: type=%d, data: [%s]" \ % (xlrd.colname(colx), rowx+1, ty, val) elif ty == 3: # datetime? (1989, 10, 19, 0, 0, 0) / 19.10.1989 print "p7: cell %s%d: type=%d, data: [%s-%s-%s]" \ % (xlrd.colname(colx), rowx+1, ty, val[0],val[1],val[2]) else: print "p8: cell %s%d: type=%d, data: [%r]" % ( xlrd.colname(colx), rowx + 1, ty, val)
def FromIndices(cls, rowIndex, colIndex): ''' Creates a :class:`.Cell` object from a pair of 0-indexed xlrd indices. :rtype: :class:`.Cell` ''' row = rowIndex + 1 col = xlrd.colname(colIndex) return cls(row, col)
def _parse_formula(self, formula): """Формат формулы 2col/5col + 5 заменяем на B(current_row)/E(current_row) + 5 """ for column_text, col_number in re.findall(r'((\d+)col)', formula): letter = xlrd.colname(int(col_number)) formula = formula.replace( column_text, '%s%s' % (letter, self.current_row_i + 1)) return formula
def _retrieve_dataset_assesments(self): self._log.info("\tRetrieving dataset assesments") dataset_obs_sheets = self._get_dataset_obs_sheets() indicator_code_error_cache = {} for dataset_obs_sheet in dataset_obs_sheets: # Per year sheet_year = re.match(self._config.get("DATASET_OBSERVATIONS", "SHEET_NAME_PATTERN"), dataset_obs_sheet.name).group("year") year_column = get_column_number( self._config_get("DATASET_OBSERVATIONS", "OBSERVATION_YEAR_COLUMN", sheet_year)) iso3_column = get_column_number( self._config_get("DATASET_OBSERVATIONS", "OBSERVATION_ISO3_COLUMN", sheet_year)) indicator_column = get_column_number( self._config_get("DATASET_OBSERVATIONS", "OBSERVATION_INDICATOR_COLUMN", sheet_year)) observation_name_row = self._config_getint("DATASET_OBSERVATIONS", "OBSERVATION_NAME_ROW", sheet_year) observation_start_row = self._config_getint("DATASET_OBSERVATIONS", "OBSERVATION_START_ROW", sheet_year) observation_start_column = get_column_number( self._config_get("DATASET_OBSERVATIONS", "OBSERVATION_START_COLUMN", sheet_year)) for column_number in range(observation_start_column, dataset_obs_sheet.ncols): # Per dataset indicator dataset_indicator_code = dataset_obs_sheet.cell(observation_name_row, column_number).value try: dataset_indicator = self._indicator_repo.find_indicator_by_code(dataset_indicator_code) except IndicatorRepositoryError: if dataset_indicator_code not in indicator_code_error_cache: self._log.warn( "No indicator with code %s found while parsing %s[%s] (additional errors regarding this indicator will be omitted)" % ( dataset_indicator_code, dataset_obs_sheet.name, colname(column_number))) indicator_code_error_cache[dataset_indicator_code] = True continue for row_number in range(observation_start_row, dataset_obs_sheet.nrows): # Per country and variable year = int(dataset_obs_sheet.cell(row_number, year_column).value) iso3 = dataset_obs_sheet.cell(row_number, iso3_column).value try: indicator_code = dataset_obs_sheet.cell(row_number, indicator_column).value indicator = self._indicator_repo.find_indicator_by_code(indicator_code) area = self._area_repo.find_by_iso3(iso3) value_retrieved = dataset_obs_sheet.cell(row_number, column_number).value value = na_to_none(value_retrieved) excel_dataset_observation = ExcelObservation(iso3=iso3, indicator_code=indicator_code, value=value, year=year, dataset_indicator_code=dataset_indicator_code) self._excel_dataset_observations.append( (excel_dataset_observation, area, indicator, dataset_indicator)) except IndicatorRepositoryError: if indicator_code not in indicator_code_error_cache: self._log.warn( "No indicator with code %s found while parsing %s[%s] (additional errors regarding this indicator will be omitted)" % ( indicator_code, dataset_obs_sheet.name, cellname(indicator_column, row_number))) indicator_code_error_cache[indicator_code] = True except AreaRepositoryError: self._log.error("No area found with code %s while parsing %s" % ( iso3, dataset_obs_sheet.name))
def column_index(self, value): """Small xlrd hack to get column index""" index = 0 value = value.strip().upper() while True: if xlrd.colname(index) == value: return index - 1 index += 1 if index > 16384: raise NoIndexFound
def get_excel_ref(cell): """ TODO: test below fails with strange message, need to fix >>> get_excel_ref((0,0)) 'A1' >>> get_excel_ref((1,3)) 'D2' """ row = cell[0] col = cell[1] return xlrd.colname(col) + str(row + 1)
def selectData(self, event=None): self.resetOptions() self.resetView() self.sheetID = self.trn.getNatMnsOrZero(self.cmbSheet.get()) if self.xlsData and len(self.xlsData[self.sheetID]): self.getScope() values = [(colname(colID)) for colID in range(self.colStart, self.colFinish + 1)] values.insert(0, '') self.cmbItmCol['values'] = values self.cmbMnfCol['values'] = values self.cmbPrcCol['values'] = values self.cmbExpDtCol['values'] = values
def process_step_files(self, form): files = self.get_form_step_files(form) if self.steps.current == 'db_select': xlsfile = xlrd.open_workbook(file_contents = files['db_select-file'].read()) table = xlsfile.sheets()[0] colname_list = [] for colid in range(0,table.ncols-1): colname_list.append(xlrd.colname(colid)) preview_list = [] for rowid in range(0, 5): row = table.row_values(rowid) if row: preview_list.append(row) self.storage.extra_data['colname_list'] = colname_list self.storage.extra_data['preview_list'] = preview_list return files
def parseRow(self, data): res = [] for col, ty, val, _unused in data: value = '' clmn = '%s' % xlrd.colname(col) if val: # print not empty cell if ty == 1: # text value = "%s" % (u'' + val + u'').encode(cp) #~ value = (u'%s' % val).encode(cp) elif ty == 2: # number 2.2999999999999998 / 2.3 value = "%s" % val elif ty == 3: # datetime? (1989, 10, 19, 0, 0, 0) / 19.10.1989 value = "%s-%s-%s" % (val[0], val[1], val[2]) else: value = "%r" % val res.append((clmn.strip(), value.strip())) return res
def _write_sum(self): for col, field in enumerate(self.parent.headers): if field.need_sum or field.need_count or field.need_average or field.formula: letter = xlrd.colname(col) if field.formula: formula = xlwt.Formula(self._parse_formula(field.formula)) else: if field.need_sum: function_ = u'SUM(%s%s:%s%s)' elif field.need_average: function_ = u'ROUND(AVERAGE(%s%s:%s%s);0)' else: function_ = u'COUNTIF(%s%s:%s%s,"{condition}")'.format(condition=field.need_count) formula = xlwt.Formula( function_ % ( letter, self._data_start_row_i+1, letter, self.current_row_i )) self.write(self.current_row_i, col, formula) self.current_row_i += 1
def apply_attrs(values, attrs, custom_attrs, rowx): """convert and check cell.value if error occurs, set progress["error"] and break """ fmt = "{} -> {} -> {}".format o = [] colx = 0 for x, attr in zip(values, attrs): custom_attr = custom_attrs.get((rowx, colx)) if custom_attr: attr = attr.copy() attr.update(custom_attr) colname = xlrd.colname(colx) progress["column"] = colname abs_colname = fmt(progress["xls"], progress["sheet"], colname) if attr: # _type = attr.get("type") if _type: x = _type(x) # _test = attr.get("test") if _test: assert eval(_test, None, locals()), _test # `S` and `o` can be used here # if attr.get("uniq"): uniq_tasks[abs_colname].append(x) # if attr.get("sort"): sort_tasks[abs_colname].append(x) # _ref = attr.get("ref") if _ref: abs_cellname = fmt(progress["xls"], progress["sheet"], xlrd.cellname(rowx, colx)) ref_file_tasks[_ref].append([x, abs_cellname]) o.append(x) colx += 1 return o
def _write_sum(self): for col, field in enumerate(self.parent.headers): if field.need_sum or field.need_count or field.need_average or field.formula: letter = xlrd.colname(col) if field.formula: formula = xlwt.Formula(self._parse_formula(field.formula)) else: if field.need_sum: function_ = u'SUM(%s%s:%s%s)' elif field.need_average: function_ = u'ROUND(AVERAGE(%s%s:%s%s);0)' else: function_ = u'COUNTIF(%s%s:%s%s,"{condition}")'.format( condition=field.need_count) formula = xlwt.Formula(function_ % (letter, self._data_start_row_i + 1, letter, self.current_row_i)) self.write(self.current_row_i, col, formula) self.current_row_i += 1
def xls_match(ptn, fname): """ 全シートのセルでptnにマッチするものをprint """ try: book = xlrd.open_workbook(fname) for sheet in book.sheets(): for row in range(sheet.nrows): for col in range(sheet.ncols): val = sheet.cell_value(row, col) # 文字列じゃないセルは文字列に変換 if (type(val) != str): val = str(val) if (re.match(ptn, val, flags=(re.MULTILINE | re.DOTALL))): print("{0} in [{1}]{2}!{3}{4}". format( val, fname, sheet.name, xlrd.colname(col), row+1)) except Exception as e: print(e)
def load_columns(run): table = run.load_io() cols = list(table.field_map.keys()) matched = [] for rng in run.range_set.exclude(type='data'): ident = rng.identifier info = { 'match': str(ident), 'mapping': ident.mapping_label, 'rel_id': rng.pk, 'type': ident.type, } if ident.type == 'meta': info['field_name'] = rng.identifier.field elif ident.type == 'attribute': info['field_name'] = rng.identifier.field info['attr_id'] = rng.identifier.attr_id info['attr_field'] = rng.identifier.attr_field else: info['value'] = ident.name if rng.type == 'list': col = rng.start_col info['name'] = cols[col].replace('\n', ' - ') info['column'] = colname(col) info['colnum'] = col elif rng.type == 'value': info['name'] = get_range_value( table, rng, rng.header_col, rng.start_col - 1 ) info['meta_value'] = get_range_value( table, rng, rng.start_col, rng.end_col ) info['colnum'] = rng.start_col info['rownum'] = rng.start_row matched.append(info) matched.sort(key=lambda info: info.get('colnum', -1)) return matched
def conv(letters): alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" map = { "A" : "1", "B" : "2", "C" : "3", "D" : "4", "E" : "5", "F" : "6", "G" : "7", "H" : "8", "I" : "9", "J" : "A", "K" : "B", "L" : "C", "M" : "D", "N" : "E", "O" : "F", "P" : "G", "Q" : "H", "R" : "I", "S" : "J", "T" : "K", "U" : "L", "V" : "M", "W" : "N", "X" : "O", "Y" : "P", "Z" : "Q", } vals = "" for letter in letters: vals += map[letter.upper()] result = int(vals, 28) - 1 result -= 2*(result / 27) if xlrd.colname(result) != letters.upper(): print "Error cell reference out of range" return None return result
def load_columns(instance): rels = instance.relationships.filter(type__name='Contains Column') table = instance.load_io() cols = list(table.field_map.keys()) matched = [] for rel in rels: item = rel.right info = { 'match': str(item), 'rel_id': rel.pk, } if isinstance(item, UnknownItem): info['type'] = "unknown" info['value'] = item.name elif isinstance(item, Parameter): info['type'] = "parameter_value" info['parameter_id'] = get_object_id(item) elif isinstance(item, MetaColumn): info['type'] = item.type info['field_name'] = item.name if rel.range_set.filter(type='list').exists(): col = rel.range_set.get(type='list').start_column info['name'] = cols[col].replace('\n', ' - ') info['column'] = colname(col) info['colnum'] = col elif rel.range_set.filter(type='value').exists(): info['name'] = get_range_value(table, rel.range_set.get( type='head' )) info['value'] = get_range_value(table, rel.range_set.get( type='value' )) matched.append(info) matched.sort(key=lambda info: info.get('colnum', -1)) return matched
def load_columns(run): table = run.load_io() cols = list(table.field_map.keys()) matched = [] for rng in run.range_set.exclude(type='data'): ident = rng.identifier info = { 'match': str(ident), 'mapping': ident.mapping_label, 'rel_id': rng.pk, 'type': ident.type, } if ident.type == 'meta': info['field_name'] = rng.identifier.field elif ident.type == 'attribute': info['field_name'] = rng.identifier.field info['attr_id'] = rng.identifier.attr_id info['attr_field'] = rng.identifier.attr_field else: info['value'] = ident.name if rng.type == 'list': col = rng.start_col info['name'] = cols[col].replace('\n', ' - ') info['column'] = colname(col) info['colnum'] = col elif rng.type == 'value': info['name'] = get_range_value(table, rng, rng.header_col, rng.start_col - 1) info['meta_value'] = get_range_value(table, rng, rng.start_col, rng.end_col) info['colnum'] = rng.start_col info['rownum'] = rng.start_row matched.append(info) matched.sort(key=lambda info: info.get('colnum', -1)) return matched
def compareFields(tabletIID, tabletData, paperData, IIDIndex): ws_paper.write(IIDIndex, 0, mapIID(tabletIID)) ws_tablet.write(IIDIndex, 0, tabletIID) ws_diff.write(IIDIndex, 0, tabletIID[2:]) startTime = None varIndex = 1 for field in sorted(tabletData.keys()): name = field[4:] if name == keywords['startTime']: startTime = datetime.datetime.strptime(tabletData[field][0], "%I:%M:%S %p") if IIDIndex == 1: ws_diff.write(0, varIndex, name) ws_paper.write(0, varIndex, name) ws_tablet.write(0, varIndex, name) if name[:8] == keywords['runTime']: if tabletData[field][0] == '': runtime = 'n.a.' else: timestamp = datetime.datetime.strptime(tabletData[field][0], "%I:%M:%S %p") runtime = (timestamp - startTime).seconds / 60.0 ws_paper.write(IIDIndex, varIndex, runtime) ws_tablet.write(IIDIndex, varIndex, runtime) ws_diff.write(IIDIndex, varIndex, runtime) else: if name != keywords['startTime']: ws_paper.write(IIDIndex, varIndex, paperData[field][1]) ws_tablet.write(IIDIndex, varIndex, tabletData[field][1]) row = str(IIDIndex + 1) col = xlrd.colname(varIndex) ws_diff.write( IIDIndex, varIndex, xlwt.Formula('IF(Paper!' + col + row + '=Tablet!' + col + row + ';"";CONCATENATE(Paper!' + col + row + ';" --- ";Tablet!' + col + row + '))')) varIndex += 1
def toname(colx, rowy): """ Opposite to `toindex` """ colname = xlrd.colname(colx) return colname, rowy+1
def mostrarColumnas(): print(colname(2), colname(35))
def get_col_name(self, col_index): return colname(col_index)
def doImport(self, params): workbook = xlrd.open_workbook(params.filename) sheet = params.field_results.get('sheet', '') if not sheet: sheet = '0' if sheet.isdigit(): # If the name is entirely numeric, treat it as a zero-based sheet # index instead of a name sheet = workbook.sheet_by_index(int(sheet)) else: sheet = workbook.sheet_by_name(sheet) ref = params.field_results.get('range', '') if not ref: ref = used_range(sheet) (first_row, first_col), (last_row, last_col) = parse_range(ref) # Clamp the selected range to the used cells on the sheet last_row = max(sheet.nrows - 1, last_row) last_col = max(sheet.ncols - 1, last_col) if params.field_results.get('direction', 'Columns') == 'Columns': if params.field_results.get('header', False): names = [ unicode(cell.value) for cell in sheet.row(first_row)[first_col:last_col + 1]] first_row += 1 else: names = [ 'col{}'.format(xlrd.colname(i)) for i in range(first_col, last_col + 1)] data = [ [cell for cell in sheet.col(col)[first_row:last_row + 1]] for col in range(first_col, last_col + 1)] else: if params.field_results.get('header', False): names = [ unicode(cell.value) for cell in sheet.col(first_col)[first_row:last_row + 1]] first_col += 1 else: names = [ 'row{}'.format(i) for i in range(first_row, last_row + 1)] data = [ [cell for cell in sheet.row(row)[first_col:last_col + 1]] for row in range(first_row, last_row + 1)] names = sanitize_names(names) classes = [ ImportDatasetText if any(cell.ctype == xlrd.XL_CELL_TEXT for cell in col) else ImportDataset1D for col in data] result = [] for (name, cls, column) in zip(names, classes, data): if cls is ImportDataset1D: result.append(ImportDataset1D( name, data=[ # Import non-numeric cells as NaN float(cell.value) if cell.ctype == xlrd.XL_CELL_NUMBER else float('NaN') for cell in column])) else: result.append(ImportDatasetText( name, data=[ cell.value if cell.ctype == xlrd.XL_CELL_TEXT else '' for cell in column])) return result
#5.特定单元格读取 #取值 print() print(table1.cell(1, 2).value) print(table1.cell_value(1, 2)) print(table1.row(1)[2].value) #取类型 print() print(table1.cell(1, 2).ctype) print(table1.cell_type(1, 2)) print(table1.row(1)[2].ctype) #6.常用技巧:(0,0)转换成A1 print() print(xlrd.cellname(0, 0)) print(xlrd.cellnameabs(0, 2)) print(xlrd.colname(30)) def read_excel(table, row, col): name = table.cell_value(row, col) type = table.cell_type(row, col) if type == 0: name = "'" elif type == 1: name = name elif type == 2 and name % 1 == 0: name = int(name) elif type == 3: #方法1 转换为日期时间 # date_value = xlrd.xldate.xldate_as_datetime(name,0) # name = date_value
def flush(self, params, oriented=ISpreadsheetSection.LEFT_DOWN, used_formulas=None): """ Запись секции в отчет :param params: словарь с параметрами подстановки :param oriented: направление вывода секции :param used_formulas: используемые формулы - нужны для записи простых формул в отчет :result: None """ for k, v in params.items(): if v is None: params[k] = '' if used_formulas is None: used_formulas = {} begin_row, begin_column = self.begin end_row, end_column = self.end book = self.sheet_data.sheet.book current_col, current_row = self.calc_next_cursor(oriented=oriented) for rdrowx in range(begin_row, end_row + 1): # индекс строки независит от колонок wtrowx = current_row + rdrowx - begin_row for rdcolx in range(begin_column, end_column + 1): # Вычисляем координаты ячейки для записи. wtcolx = current_col + rdcolx - begin_column try: cell = self.writer.rdsheet.cell(rdrowx, rdcolx) except IndexError: continue val = cell.value # доставем формат ячейки xf_index = cell.xf_index xf = book.xf_list[xf_index] format_key = xf.format_key format_ = book.format_map[format_key] format_str = format_.format_str cty = cell.ctype f_id = None for key, value in params.items(): if unicode(cell.value).count(u''.join(['#', key, '#'])): if used_formulas: formula_id_list = used_formulas.get(key) if formula_id_list: for formula_id in formula_id_list: self.sheet_data.formula_id_dict.setdefault( formula_id, [] ).append( ''.join([xlrd.colname(wtcolx), str(wtrowx + 1)]) ) if isinstance(value, FormulaWriteExcel): # Если приходит формула, то заменяем на # ее значение с указанием списка ячеек formula = value.excel_function f_id = value.formula_id if formula is not None and f_id is not None: formula_cells = self.sheet_data.formula_id_dict.get( f_id ) if formula_cells: if value.ranged: val = '%s(%s)' % (formula, ':'.join( [formula_cells[0], formula_cells[-1]])) else: val = '%s(%s)' % (formula, ','.join( formula_cells)) self.sheet_data.formula_id_dict[f_id] = [] cty = FORMULA_XLS_TYPE else: val = '' cty = xlrd.XL_CELL_TEXT break elif isinstance(value, XLSImage): cty = EXCEL_IMAGE_TYPE val = value break # Тип ячейки cty = self.get_value_type(value=value, default_type=cell.ctype) value = unicode(value) val = val.replace(u'#%s#' % key, value) if isinstance(val, basestring): while u'#' in val: val = re.sub(u'#.*#', '', val) if len(val.split('#')) == 2: break # Копирование всяких свойств из шаблона в результирующий отчет. if (wtcolx not in self.writer.wtcols and rdcolx in self.writer.rdsheet.colinfo_map): rdcol = self.writer.rdsheet.colinfo_map[rdcolx] wtcol = self.writer.wtsheet.col(wtcolx) wtcol.width = rdcol.width wtcol.set_style(self.writer.style_list[rdcol.xf_index]) wtcol.hidden = rdcol.hidden wtcol.level = rdcol.outline_level wtcol.collapsed = rdcol.collapsed self.writer.wtcols.add(wtcolx) if cty == xlrd.XL_CELL_EMPTY: continue # XF - индексы if cell.xf_index is not None: style = self.writer.style_list[cell.xf_index] else: style = default_style rdcoords2d = rdrowx, rdcolx if rdcoords2d in self.writer.merged_cell_top_left_map: rlo, rhi, clo, chi = self.writer.merged_cell_top_left_map[ rdcoords2d ] assert (rlo, clo) == rdcoords2d if isinstance(val, XLSImage): self.writer.wtsheet.merge( wtrowx, wtrowx + rhi - rlo - 1, wtcolx, wtcolx + chi - clo - 1, style ) #TODO: вынести в метод записи self.writer.wtsheet.insert_bitmap( val.path, wtrowx, wtcolx ) continue self.writer.wtsheet.write_merge( wtrowx, wtrowx + rhi - rlo - 1, wtcolx, wtcolx + chi - clo - 1, val, style) continue if rdcoords2d in self.writer.merged_cell_already_set: continue # если поле текстовое и # стоит настройка "Сохранять текстовые поля" # то не преобразуем текст в число if KEEP_TEXT_TYPE and format_str == TEXT_CELL_FORMAT: pass else: try: val1 = val if isinstance(val1, float): val1 = str(val1) decimal.Decimal(val1) cty = xlrd.XL_CELL_NUMBER except (decimal.InvalidOperation, TypeError): pass runlist = self.writer.rdsheet.rich_text_runlist_map.get( (rdrowx, rdcolx) ) self.write_result((wtcolx, wtrowx), val, style, cty, (runlist, rdrowx, rdcolx)) # перетащим заодно и высоту текущей строки rdrow = self.writer.rdsheet.rowinfo_map.get(rdrowx) wtrow = self.writer.wtsheet.rows.get(wtrowx) if rdrow is not None and wtrow is not None: wtrow.height = rdrow.height # height_mismatch нужен для того, чтобы применилась высота wtrow.height_mismatch = rdrow.height_mismatch
def get_name_column(self, col): """Return name column.""" name = openpyxl.get_column_letter(col) if self.xlsx else xlrd.colname( col - 1) return name
def main(args): citations.main(args) data = Data() pairs = {} languages = {} coords = {} for lang in dsv.rows( args.data_file('MB_Map_Data_Aug13WLabels'), namedtuples=True, newline='\n', encoding='latin1' ): coords[slug(lang.Label.split('<')[0].strip())] = ( float(lang.y), float(lang.x)) xls = xlrd.open_workbook(args.data_file('MB_BoCatSum_AFBO.xlsx')) matrix = xls.sheet_by_name('MB_BoCatSum_AFBO.txt') md = "area\trecipient language iso\trecipient language genus\tdonor language iso\tdonor language genus".split('\t') fields = [] params = [] for i in range(matrix.ncols): colname = xlrd.colname(i) if len(colname) == 2 and colname > 'BE': break colval = matrix.cell(0, i).value.strip() if (len(colname) == 1 and colname > 'G') or (len(colname) == 2 and colname < 'AY'): params.append(colval) fields.append(colval) else: fields.append(colval.lower()) for f in fields: if fields.count(f) > 1: print(f) assert len(fields) == len(set(fields)) for j in range(1, matrix.nrows): values = dict(zip(fields, [matrix.cell(j, i).value for i in range(matrix.ncols)])) try: id_ = int(values['perm.id']) except: continue pairs[id_] = values for type_ in ['recipient', 'donor']: languages[values[type_ + ' language'].strip()] = { 'macroarea': values['area']} for md in ['iso', 'genus']: languages[values[type_ + ' language'].strip()][md] \ = values['%s language %s' % (type_, md)] for name in COORDS: assert name in languages sources = {} with open(args.data_file('MB_Case_List_with_links.html')) as fp: worddoc = fp.read() for m in re.finditer('\"__(?P<recid>[^_]+)__\"', worddoc): sources[m.group('recid').decode('utf8')] = 1 soup = bs(worddoc) doc = {} cols = [] table = soup.find('table') for tr in table.children: if tr.name != 'tr': continue tds = filter(lambda n: n.name == 'td', tr.children) if not cols: cols = map(text, tds) else: values = dict(zip(cols, tds)) try: id_ = int(text(values['perm.id'])) doc[id_] = values if id_ in pairs: assert doc['Recipient lg.'] == pairs[id_][1]['recipient language'] assert doc['Don'] == pairs[id_][1]['donor language'] except: continue dataset = common.Dataset( id='afbo', name="AfBo: A world-wide survey of affix borrowing", contact="*****@*****.**", domain="afbo.info", license='http://creativecommons.org/licenses/by/3.0/', jsondata={ 'license_icon': 'cc-by.png', 'license_name': 'Creative Commons Attribution 3.0 Unported License'}) DBSession.add(dataset) for i, spec in enumerate([('seifart', "Frank Seifart")]): DBSession.add(common.Editor( dataset=dataset, ord=i + 1, contributor=common.Contributor(id=spec[0], name=spec[1]))) contrib = data.add(common.Contribution, 'afbo', name="AfBo", id="afbo") iso_map = { ('ron', 'Meglenite Romanian'): ('ruq', None), ('fra', 'Norman French'): ('xno', None), ('tur', 'Turkic'): (None, 'turk1311'), ('xuu', 'Kxoe languages'): (None, 'khoe1241'), ('zoc', 'Zoquean languages'): (None, 'zoqu1261'), ('tzm', 'Moroccan Berber languages'): (None, 'atla1275'), ('cvn', 'Quechua'): ('qvn', None), ('rop', 'Gurindji Kriol'): (None, 'guri1249'), ('ita', 'Sicilian Italian'): ('scn', None), ('srp', 'Croatian'): ('hrv', None), ('eme', 'Wayampi‑Emerillon‑Zo’é'): (None, 'waya1271'), ('ale', 'Copper Island Aleut'): ('mud', None), ('car', 'intermediate Proto‑Carib'): (None, 'cari1283'), ('ell', 'Cappadocian Greek'): ('cpg', None), ('eng', 'Middle English'): ('enm', None), ('als', 'Arvanitic Albanian'): ('aat', None), ('nys', 'Northern Nyungic'): (None, 'dese1234'), ('ron', 'Istro‑Romanian'): ('ruo', None), ('chf', 'Cho’ol'): ('ctu', None), ('tuo', 'Eastern Tucanoan languages'): (None, 'east2698'), ('ceb', 'Visayan'): (None, 'bisa1268'), ('por', 'Sri Lanka Portuguese'): (None, 'mala1544'), ('brx', 'Tibeto-Burman languages'): (None, 'brah1260'), } with open('name_conflicts.tab', 'w') as fp: fp.write('iso\tafbo\tglottolog\tproposed iso\n') for i, name in enumerate(languages.keys()): md = languages[name] iso = md.pop('iso') if iso == 'cvn' and name == 'Quechua': iso = 'qvn' kw = dict(name=name, id=str(i+1), jsondata=md) if name in COORDS: kw['latitude'], kw['longitude'] = COORDS[name] elif slug(name) in coords: kw['latitude'], kw['longitude'] = coords[slug(name)] elif glottocoords.get(iso): kw['latitude'], kw['longitude'] = glottocoords[iso] if glottonames.get(iso) and slug(glottonames.get(iso)) != slug(name): fp.write(('%s\t%s\t%s\t%s\n' % ( iso, name, glottonames.get(iso), rglottonames.get(slug(name), ''))).encode('utf8')) if name == 'Meglenite Romanian': kw['name'] = 'Megleno Romanian' if not 'latitude' in kw: print(name) l = data.add(common.Language, name, **kw) iso, gc = iso_map.get((iso, name), (iso, None)) for code, type_ in [ (iso, common.IdentifierType.iso), (gc or glottocodes.get(iso), common.IdentifierType.glottolog) ]: if code: identifier = data.add( common.Identifier, code, id=code, name=code, type=type_.value) data.add( common.LanguageIdentifier, '%s-%s' % (code, l.id), identifier=identifier, language=l) include = sources.keys() + [ 'myersscottoncontact2002', 'myersscottonlanguage2007', 'meakinsborrowing2011', 'seifartprinciple2012', ] refdb = bibtex.Database.from_file(args.data_file('FSeifartZoteroLibrary14Nov2013.bib')) for rec in refdb: if slug(rec.id) in include: data.add(common.Source, slug(rec.id), _obj=bibtex2source(rec)) for i, name in enumerate(params): data.add(models.AffixFunction, name, id=str(i + 1), name=name) for id_, vd in pairs.items(): assert id_ in doc donor = data['Language'][vd['donor language'].strip()] recipient = data['Language'][vd['recipient language'].strip()] p = data.add( models.Pair, id_, id=str(id_), name=vd['pairs'].replace('Meglenite', 'Megleno'), area=recipient.jsondata['macroarea'], description=unicode(doc[id_]['comment']).replace('<h1', '<p').replace('</h1>', '</p>').replace('Meglenite', 'Megleno'), reliability=vd['reliability'], int_reliability=['high', 'mid', 'low'].index(vd['reliability']), count_interrel=int(vd[u'number of interrelated affixes']), count_borrowed=int(vd['number of borrowed affixes']), donor=donor, recipient=recipient) DBSession.flush() for i, param in enumerate(params): param_id = i + 1 value = vd[param] if value != '': vsid = '%s-%s' % (recipient.id, param_id) if vsid in data['ValueSet']: vs = data['ValueSet'][vsid] else: vs = data.add( common.ValueSet, vsid, id=vsid, parameter=data['AffixFunction'][param], language=recipient, contribution=contrib) data.add( models.waabValue, '%s-%s' % (id_, param_id), id='%s-%s' % (id_, param_id), pair=p, name='%s' % int(value), numeric=int(value), description='%s' % p, valueset=vs)
print(table1.cell_value(1, 2)) print(table1.row(1)[2]) print(table1.row(1)[2].value) print(table1.row(1)[2].ctype) '''六、常用技巧:(0,0)转换成A1''' print(xlrd.cellname(0, 0)) # A1 cellname方法把一对行和列索引转换为一个对应的Excel单元格引用 print(xlrd.cellnameabs( 0, 0)) # $A$1 cellnameabs方法把一对行和列索引转换为一个绝对的Excel单元格引用(如:$A$1) print(xlrd.colname(0)) # A colname方法把一个列索引转换为Excel列名 '''七、获取表格内不同类型的name''' def read_excel(table, row, col): name = table.cell_value(row, col) ctype = table.cell_type(row, col) if ctype == 0: name = "''" elif ctype == 1: name = name elif ctype == 2 and name % 1 == 0: name = int(name) elif ctype == 3: '''方法一''' date_value = xlrd.xldate_as_tuple(name,
def _retrieve_component_observations(self, structure_obs_sheet, subindex_name, component_short_name, component_scaled_column, sheet_year): self._log.debug("\t\tRetrieving component %s from subindex %s observations in sheet %s..." % ( component_short_name, subindex_name, structure_obs_sheet.name)) empty_row_error_cache = {} year_column = get_column_number( self._config_get("STRUCTURE_OBSERVATIONS", "OBSERVATION_YEAR_COLUMN", sheet_year)) iso3_column = get_column_number( self._config_get("STRUCTURE_OBSERVATIONS", "OBSERVATION_ISO3_COLUMN", sheet_year)) observation_start_row = self._config_getint("STRUCTURE_OBSERVATIONS", "OBSERVATION_START_ROW", sheet_year) check_column = get_column_number( self._config_get("STRUCTURE_OBSERVATIONS", "OBSERVATION_CHECK_COLUMN", sheet_year)) aliased_short_name = self._get_aliased_component(component_short_name, sheet_year) if aliased_short_name: self._log.info("Using alias %s for COMPONENT %s while parsing %s [%s]" % ( aliased_short_name, component_short_name, structure_obs_sheet.name, colname(component_scaled_column))) short_name = aliased_short_name else: short_name = component_short_name # Set up sorted list to simplify ranking (components are not ranked in the spreadsheet) sorted_observations = SortedListWithKey( key=lambda x: x[0].value if x[0].value is not None and na_to_none(x[0].value) is not None else 0) try: indicator = self._indicator_repo.find_component_by_short_name(short_name, subindex_name) for row_number in range(observation_start_row, structure_obs_sheet.nrows): # Per country if not structure_obs_sheet.cell(row_number, check_column).value or row_number in empty_row_error_cache: if row_number not in empty_row_error_cache: self._log.debug( "Skipping row while parsing %s[%s] (did not detect value on check column, additional errors regarding this row will be omitted)" % ( structure_obs_sheet.name, row_number)) empty_row_error_cache[row_number] = True continue try: year = int(structure_obs_sheet.cell(row_number, year_column).value) iso3 = structure_obs_sheet.cell(row_number, iso3_column).value area = self._area_repo.find_by_iso3(iso3) value = structure_obs_sheet.cell(row_number, component_scaled_column).value excel_observation = ExcelObservation(iso3=iso3, indicator_code=indicator.indicator, year=year, value=value) if [t for t in sorted_observations if t[0].year == year and t[1].iso3 == iso3 and t[2].indicator == indicator.indicator]: self._log.warn("Ignoring duplicate observations for COMPONENT %s while parsing %s [%s]" % ( indicator.indicator, structure_obs_sheet.name, colname(component_scaled_column))) # Will not continue parsing, we could check this also at the beginning if we extract the # year from the sheet name return else: sorted_observations.add((excel_observation, area, indicator)) except AreaRepositoryError: self._log.error("No area with code %s for indicator %s while parsing %s" % ( iso3, indicator.indicator, structure_obs_sheet.name)) except: self._log.error("Unexpected error parsing %s[%s]" % (structure_obs_sheet.name, row_number)) except IndicatorRepositoryError: self._log.error( "No COMPONENT '%s' indicator found while parsing %s [%s]" % ( short_name, structure_obs_sheet.name, colname(component_scaled_column))) # Rank them based on their scaled score self._update_observation_ranking(sorted_observations, observation_getter=lambda x: x[0]) self._excel_structure_observations.extend(sorted_observations)
def _retrieve_raw_observations(self): self._log.info("\tRetrieving raw observations...") raw_obs_sheets = self._get_raw_obs_sheets() for raw_obs_sheet in raw_obs_sheets: # Per year sheet_year = re.match(self._config.get("RAW_OBSERVATIONS", "SHEET_NAME_PATTERN"), raw_obs_sheet.name).group("year") empty_row_error_cache = {} year_column = get_column_number(self._config_get("RAW_OBSERVATIONS", "OBSERVATION_YEAR_COLUMN", sheet_year)) iso3_column = get_column_number(self._config_get("RAW_OBSERVATIONS", "OBSERVATION_ISO3_COLUMN", sheet_year)) observation_name_row = self._config_getint("RAW_OBSERVATIONS", "OBSERVATION_NAME_ROW", sheet_year) observation_start_row = self._config_getint("RAW_OBSERVATIONS", "OBSERVATION_START_ROW", sheet_year) observation_start_column = get_column_number( self._config_get("RAW_OBSERVATIONS", "OBSERVATION_START_COLUMN", sheet_year)) check_column = get_column_number( self._config_get("RAW_OBSERVATIONS", "OBSERVATION_CHECK_COLUMN", sheet_year)) for column_number in range(observation_start_column, raw_obs_sheet.ncols): # Per indicator # Maintain sorted list with elements sorted by value # Elements are tuples of the form (ExcelObservation, Area, Indicator) # We're using tuples just to avoid some additional round trips to the db in order to get area and indicator per_indicator_observations = SortedListWithKey( key=lambda x: x[0].value if x[0].value is not None and na_to_none(x[0].value) is not None else 0) # HACK: Curate data by stripping year indicator_code_retrieved = raw_obs_sheet.cell(observation_name_row, column_number).value if len(indicator_code_retrieved.split()) > 1: self._log.debug('Indicator %s in had to be stripped of year while parsing %s', indicator_code_retrieved, raw_obs_sheet.name) try: indicator_code = indicator_code_retrieved.split()[0] except IndexError: self._log.warn( 'Wrong Indicator name %s while parsing %s[%s], skipping column' % ( indicator_code_retrieved, raw_obs_sheet.name, colname(column_number))) continue try: indicator = self._indicator_repo.find_indicator_by_code(indicator_code) except IndicatorRepositoryError: self._log.warn( "No indicator with code %s found while parsing %s" % (indicator_code, raw_obs_sheet.name)) indicator = create_indicator(indicator=indicator_code) # Orphan indicator for row_number in range(observation_start_row, raw_obs_sheet.nrows): # Per country if not raw_obs_sheet.cell(row_number, check_column).value or row_number in empty_row_error_cache: if row_number not in empty_row_error_cache: self._log.debug( "Skipping row while parsing %s[%s] (did not detect value on check column, additional errors regarding this row will be omitted)" % ( raw_obs_sheet.name, row_number)) empty_row_error_cache[row_number] = True continue try: year = int(raw_obs_sheet.cell(row_number, year_column).value) iso3 = raw_obs_sheet.cell(row_number, iso3_column).value area = self._area_repo.find_by_iso3(iso3) value_retrieved = raw_obs_sheet.cell(row_number, column_number).value value = na_to_none(value_retrieved) excel_observation = ExcelObservation(iso3=iso3, indicator_code=indicator_code, value=value, year=year) per_indicator_observations.add((excel_observation, area, indicator)) except AreaRepositoryError: self._log.error("No area found with code %s for indicator %s while parsing %s" % ( iso3, indicator_code, raw_obs_sheet.name)) except: self._log.error("Unexpected error parsing %s[%s]" % (raw_obs_sheet.name, row_number)) self._update_observation_ranking(per_indicator_observations, observation_getter=lambda x: x[0]) self._excel_raw_observations.extend(per_indicator_observations)
print("获取单元格数据类型:", sheet1.row_types(0)) print("******************************") #表操作 print("获取第1行的第6-10列的值,不包括第10列:", sheet1.row_values(0, 6, 10)) print("获取第一列,第0-5行的值,不包括第5行:", sheet1.col_values(0, 0, 5)) print("获取第6列的所有的值:", sheet1.col_values(6, 1)) six_col = sheet1.col_values(6, 1) print(six_col) print(len(six_col)) print("******************************") #获取特定单元格的值和类型 print("获取第20行第一列的值:", sheet1.cell_value(19, 0)) print("获取第20行第一列的值:", sheet1.cell(19, 0).value) print("获取第20行第一列的值:", sheet1.row(19)[0].value) print("获取第20行第一列的单元格的类型:", sheet1.cell_type(19, 0)) print("获取第20行第一列的单元格的类型:", sheet1.cell(19, 0).ctype) print("获取第20行第一列的单元格的类型:", sheet1.row(19)[0].ctype) print("******************************") #(0,0)转换A1 print("(0,0)转换A1:", xlrd.cellname(0, 0)) print("(0,0)转换A1:", xlrd.cellnameabs(0, 0)) print("(0,0)转换A1:", xlrd.colname(509)) '''数据类型: 空:0 字符串:1 数字:2 日期:3 布尔:4 error:5 '''
def toname(colx, rowy): """ Opposite to `toindex` """ colname = xlrd.colname(colx) return colname, rowy + 1
from xlrd import cellname, cellnameabs, colname print cellname(0,0),cellname(10,10),cellname(100,100) print cellnameabs(1,0),cellnameabs(41,59),cellnameabs(265,358) print colname(0),colname(10),colname(100)
from xlrd import cellname, cellnameabs, colname print cellname(0, 0), cellname(10, 10), cellname(100, 100) print cellnameabs(3, 1), cellnameabs(41, 59), cellnameabs(265, 358) print colname(0), colname(10), colname(100)
def to_xl_ref(row, col, base = 1): if base == 1: return xlrd.colname(col-1) + str(row) elif base == 0: return xlrd.colname(col) + str(row+1)
def _getData_(self): #loading file metadata header = [h.value for h in self.datasheet.row(self.headR)] units = [h.value for h in self.datasheet.row(self.unitR)] lastMetaColumn = 20 locator = [h.value for h in self.datasheet.row(self.locR)[:lastMetaColumn]] ckey={} for n,l in enumerate(locator): if l in [ 'time','t', 'Date& Time (local)']: ckey['time']=n if l.lower() in [ 'lat', 'latitude']: ckey['lat']=n if l.lower() in [ 'lon','long', 'longitude']: ckey['lon']=n if l in [ 'Depth of sample [m]']: ckey['z']=n if l in [ 'Depth of Sea [m]',]: ckey['bathy']=n if l in [ 'UTC offset',]: ckey['tOffset']=n if l in ['Institute',]: ckey['Institute']=n bad_cells = [xl_cellerror,xl_cellempty,xl_cellblank] metadataTitles = {r:h.value for r,h in enumerate(self.datasheet.col(self.metaC)[:self.maxMDR]) if h.ctype not in bad_cells} endofHeadRow=max(metadataTitles.keys()) #create excel coordinates for netcdf. colnames = {h: colname(h) for h,head in enumerate(self.datasheet.row(0))} # row number doesn't matter here # which columns are we saving? saveCols={} lineTitles={} unitTitles={} attributes={} for l,loc in enumerate(locator): if loc in ['', None]:continue print 'GreenSeasXLtoNC:\tInfo:\tFOUND:\t',l,'\t',loc, 'in locator' saveCols[l] = True lineTitles[l]=loc unitTitles[l]='' if loc.find('[') > 0: unitTitles[l]=loc[loc.find('['):].replace(']','') if header[5].find('Note')>-1: attributes['Note'] = header[5] header[5]='' # flag for saving all columns: if 'all' in self.datanames: for head in header[lastMetaColumn:]: if head == '': continue self.datanames.append(head) # add data columns titles to output to netcdf. for h,head in enumerate(header): if head == '':continue if h in saveCols.keys(): continue for d in self.datanames: if h in saveCols.keys(): continue if head.lower().find(d.lower()) > -1: print 'GreenSeasXLtoNC:\tInfo:\tFOUND:\t',h,'\t',d, 'in ',head saveCols[h] = True lineTitles[h] = header[h] unitTitles[h] = units[h] saveCols = sorted(saveCols.keys()) print 'GreenSeasXLtoNC:\tInfo:\tInterograting columns:',saveCols # Meta data for those columns with only one value: ncVarName={} allNames=[] for h in saveCols: name = self._getNCvarName_(lineTitles[h]) #ensure netcdf variable keys are unique: if name in allNames:name+='_'+ucToStr(colnames[h]) allNames.append(name) ncVarName[h] = name # make an index to link netcdf back to spreadsheet index = {} for r in xrange(len(self.datasheet.col(saveCols[0])[self.maxMDR:])): index[r] = r+self.maxMDR #create data dictionary data={} tunit='seconds since 1900-00-00' unitTitles[ckey['time']] = tunit for d in saveCols: tmpdata= self.datasheet.col(d)[self.maxMDR:] arr = [] if d == ckey['time']: # time for a in tmpdata[:]: if a.ctype in bad_cells: arr.append(default_fillvals['i8']) else: try: arr.append(int64(date2num(parse(a.value),units=tunit))) except: try: arr.append(int(a.value)) print 'GreenSeasXLtoNC:\tWarning: Can not read time effecitvely:',int(a.value) except: arr.append(default_fillvals['i8']) data[d] = marray(arr) continue isaString = self._isaString_(lineTitles[d]) if isaString: #strings for a in tmpdata[:]: if a.ctype in bad_cells: arr.append(default_fillvals['S1']) else: try: arr.append(ucToStr(a.value)) except: arr.append(default_fillvals['S1']) else: # data for a in tmpdata[:]: if a.ctype in bad_cells: arr.append(default_fillvals['f4']) else: try: arr.append(float(a.value)) except: arr.append(default_fillvals['f4']) data[d] = marray(arr) fillvals = default_fillvals.values() # count number of data in each column: print 'GreenSeasXLtoNC:\tInfo:\tCount number of data in each column...' # can be slow datacounts = {d:0 for d in saveCols} for d in saveCols: for i in data[d][:]: if i in ['', None, ]: continue if i in fillvals: continue datacounts[d]+=1 print 'GreenSeasXLtoNC:\tInfo:\tMax number of entries to in a column:', max(datacounts.values()) # list data columns with no data or only one value removeCol=[] for h in saveCols: if datacounts[h] == 0: print 'GreenSeasXLtoNC:\tInfo:\tNo data for column ',h,lineTitles[h],'[',unitTitles[h],']' removeCol.append(h) continue col = sorted(data[h]) if col[0] == col[-1]: if col[0] in fillvals: print 'GreenSeasXLtoNC:\tInfo:\tIgnoring masked column', h, lineTitles[h],'[',unitTitles[h],']' removeCol.append(h) continue print 'GreenSeasXLtoNC:\tInfo:\tonly one "data": ',lineTitles[h],'[',unitTitles[h],']','value:', col[0] removeCol.append(h) attributes[makeStringSafe(ucToStr(lineTitles[h]))] = ucToStr(col[0]) for r in removeCol:saveCols.remove(r) print 'GreenSeasXLtoNC:\tInfo:\tnew file attributes:', attributes print 'GreenSeasXLtoNC:\tInfo:\tFigure out which rows should be saved...' saveRows = {a: False for a in index.keys()} #index.keys() are rows in data. #index.values are rows in excel. rowcounts = {a: 0 for a in index.keys()} for r in sorted(saveRows.keys()): if data[ckey['time']][r] in ['', None,]: print 'No time value:',r, data[ckey['time']][r] continue if data[ckey['time']][r] in fillvals: print 'No time value:',r, data[ckey['time']][r] continue for d in saveCols: if d<lastMetaColumn:continue if data[d][r] in ['', None, ]: continue if data[d][r] in fillvals: continue rowcounts[r] += 1 saveRows[r] = True print 'GreenSeasXLtoNC:\tInfo:\tMaximum number of rows to save: ',max(rowcounts.values()) # #rowcounts = {d:0 for d in saveRows.keys()} #for r in sorted(rowcounts.keys()): # #if saveRows[r] == False: continue # for d in saveCols: # if d<20:continue # if data[d][r] in ['', None, ]:continue # if data[d][r] in fillvals: continue # rowcounts[r] += 1 # get data type (ie float, int, etc...): # netcdf4 requries some strange names for datatypes: # ie f8 instead of numpy.float64 dataTypes={} dataIsAString=[] for h in saveCols: dataTypes[h] = marray(data[h]).dtype print 'GreenSeasXLtoNC:\tInfo:\ttype: ',ncVarName[h], h,'\t',dataTypes[h] if dataTypes[h] == float64: dataTypes[h] = 'f8' elif dataTypes[h] == int32: dataTypes[h] = 'i4' elif dataTypes[h] == int64: dataTypes[h] = 'i8' else: dataTypes[h] = 'S1' dataIsAString.append(h) print 'GreenSeasXLtoNC:\tInfo:\tCreate MetaData...' #create metadata. metadata = {} for h in saveCols: if h in dataIsAString:continue datacol = self.datasheet.col(h)[:] colmeta = {metadataTitles[mdk]: datacol[mdk] for mdk in metadataTitles.keys() if metadataTitles[mdk] not in ['', None]} md=' ' if len(colmeta.keys())> 20: print 'Too many metadata' print 'GreenSeasXLtoNC:\tWarning:\tMetadata reading failed. Please Consult original excel file for more info.' metadata[h] = 'Metadata reading failed. Please Consult original excel file for more info.' continue for mdt,mdc in zip(colmeta.keys(),colmeta.values() ): if mdc in ['', None]:continue md +=ucToStr(mdt)+':\t'+ucToStr(mdc)+'\n ' #print md metadata[h] = md # save all info as public variables, so that it can be accessed if netCDF creation fails: self.saveCols = saveCols self.saveRows = saveRows self.rowcounts=rowcounts self.ncVarName = ncVarName self.dataTypes = dataTypes self.dataIsAString=dataIsAString self.metadata=metadata self.colnames = colnames self.data = data self.lineTitles = lineTitles self.unitTitles = unitTitles self.attributes = attributes self.index = index
from collections import namedtuple from xlrd import colname from xlwt import Workbook from datetime import date import re wrow = lambda sh, row, vals: [sh.write(row, colx, val) for colx, val in enumerate(vals)] Cols = namedtuple("Cols", " ".join(colname(c).lower() for c in range(44))) col = Cols(*range(44)) BA_RE = re.compile(r'(BA|CB)\d+', re.IGNORECASE) MF_RE = re.compile(r'MF\d+', re.IGNORECASE) LC_REF_RE = re.compile(r'(ILCL|026L)[A-Z]{2,3}\d+(?:/\d+)?', re.IGNORECASE) DATE_RE = re.compile( r'(?P<d>\d{1,2})[\s\\/-](?P<m>([a-z]{3}|\d{1,2}))[\s\\/-](?P<y>\d{2,4})', re.IGNORECASE ) def iso_to_date_obj(iso_date_string): date_re = re.compile(r'(\d{4})-(\d{1,2})-(\d{1,2})') searched = date_re.search(iso_date_string) if not searched: raise ValueError('Date to parse must be in format "yyyy-mm-dd"') grp = searched.group return date(int(grp(1)), int(grp(2)), int(grp(3)))
def flush(self, params, oriented=ISpreadsheetSection.LEFT_DOWN, used_formulas=None, keep_text_type=KEEP_TEXT_TYPE): """ Запись секции в отчет :param params: словарь с параметрами подстановки :param oriented: направление вывода секции :param used_formulas: используемые формулы - нужны для записи простых формул в отчет :result: None """ for k, v in params.items(): if v is None: params[k] = '' if used_formulas is None: used_formulas = {} begin_row, begin_column = self.begin end_row, end_column = self.end book = self.sheet_data.sheet.book current_col, current_row = self.calc_next_cursor(oriented=oriented) for rdrowx in range(begin_row, end_row + 1): # индекс строки независит от колонок wtrowx = current_row + rdrowx - begin_row for rdcolx in range(begin_column, end_column + 1): # Вычисляем координаты ячейки для записи. wtcolx = current_col + rdcolx - begin_column try: cell = self.writer.rdsheet.cell(rdrowx, rdcolx) except IndexError: continue val = cell.value # доставем формат ячейки xf_index = cell.xf_index xf = book.xf_list[xf_index] format_key = xf.format_key format_ = book.format_map[format_key] format_str = format_.format_str cty = cell.ctype f_id = None for key, value in params.items(): if unicode(cell.value).count(u''.join(['#', key, '#'])): if used_formulas: formula_id_list = used_formulas.get(key) if formula_id_list: for formula_id in formula_id_list: self.sheet_data.formula_id_dict.setdefault( formula_id, [] ).append( ''.join([xlrd.colname(wtcolx), str(wtrowx + 1)]) ) if isinstance(value, FormulaWriteExcel): # Если приходит формула, то заменяем на # ее значение с указанием списка ячеек formula = value.excel_function f_id = value.formula_id if formula is not None and f_id is not None: formula_cells = self.sheet_data.formula_id_dict.get( f_id ) if formula_cells: if value.ranged: val = '%s(%s)' % (formula, ':'.join( [formula_cells[0], formula_cells[-1]])) else: val = '%s(%s)' % (formula, ','.join( formula_cells)) self.sheet_data.formula_id_dict[f_id] = [] cty = FORMULA_XLS_TYPE else: val = '' cty = xlrd.XL_CELL_TEXT break elif isinstance(value, XLSImage): cty = EXCEL_IMAGE_TYPE val = value break # Тип ячейки cty = self.get_value_type(value=value, default_type=cell.ctype) value = unicode(value) val = val.replace(u'#%s#' % key, value) if isinstance(val, basestring): while u'#' in val: val = re.sub(u'#.*#', '', val) if len(val.split('#')) == 2: break # Копирование всяких свойств из шаблона в результирующий отчет. if (wtcolx not in self.writer.wtcols and rdcolx in self.writer.rdsheet.colinfo_map): rdcol = self.writer.rdsheet.colinfo_map[rdcolx] wtcol = self.writer.wtsheet.col(wtcolx) wtcol.width = rdcol.width wtcol.set_style(self.writer.style_list[rdcol.xf_index]) wtcol.hidden = rdcol.hidden wtcol.level = rdcol.outline_level wtcol.collapsed = rdcol.collapsed self.writer.wtcols.add(wtcolx) if cty == xlrd.XL_CELL_EMPTY: continue # XF - индексы if cell.xf_index is not None: style = self.writer.style_list[cell.xf_index] else: style = default_style rdcoords2d = rdrowx, rdcolx if rdcoords2d in self.writer.merged_cell_top_left_map: rlo, rhi, clo, chi = self.writer.merged_cell_top_left_map[ rdcoords2d ] assert (rlo, clo) == rdcoords2d if isinstance(val, XLSImage): self.writer.wtsheet.merge( wtrowx, wtrowx + rhi - rlo - 1, wtcolx, wtcolx + chi - clo - 1, style ) #TODO: вынести в метод записи self.writer.wtsheet.insert_bitmap( val.path, wtrowx, wtcolx ) continue self.writer.wtsheet.write_merge( wtrowx, wtrowx + rhi - rlo - 1, wtcolx, wtcolx + chi - clo - 1, val, style) continue if rdcoords2d in self.writer.merged_cell_already_set: continue # если поле текстовое и # стоит настройка "Сохранять текстовые поля" # то не преобразуем текст в число if keep_text_type and format_str == TEXT_CELL_FORMAT: pass else: try: val1 = val if isinstance(val1, float): val1 = str(val1) decimal.Decimal(val1) cty = xlrd.XL_CELL_NUMBER except (decimal.InvalidOperation, TypeError): pass runlist = self.writer.rdsheet.rich_text_runlist_map.get( (rdrowx, rdcolx) ) self.write_result((wtcolx, wtrowx), val, style, cty, (runlist, rdrowx, rdcolx)) # перетащим заодно и высоту текущей строки rdrow = self.writer.rdsheet.rowinfo_map.get(rdrowx) wtrow = self.writer.wtsheet.rows.get(wtrowx) if rdrow is not None and wtrow is not None: wtrow.height = rdrow.height # height_mismatch нужен для того, чтобы применилась высота wtrow.height_mismatch = rdrow.height_mismatch