def _read_placement(excel_data, sheet): """ @type excel_data: ExcelData @type sheet: xlrd.sheet.Sheet """ placements = [] for col in range(number_of_good_cols(sheet)): placement_name = sheet.cell(0, col).value positions = [] position = [] for cell in sheet.col(col)[1:]: person = cell.value if person: is_fixed = person.startswith('*') if is_fixed: person = person[1:] position.append([person, is_fixed]) else: if position: positions.append(position) position = [] if position: positions.append(position) placements.append((placement_name, positions)) excel_data.placements = placements
def _read_placement(excel_data, sheet): """ @type excel_data: ExcelData @type sheet: xlrd.sheet.Sheet """ placements = [] for col in range(number_of_good_cols(sheet)): placement_name = sheet.cell(0, col).value positions = [] position = [] for cell in sheet.col(col)[1:]: person = cell.value if person: is_fixed = person.startswith('*') if is_fixed: person = person[1:] position.append([person, is_fixed]) else: if position: positions.append(position) position = [] if position: positions.append(position) placements.append((placement_name, positions)) excel_data.placements = placements
def _read_groups(excel_data, sheet): """ @type excel_data: ExcelData @type sheet: xlrd.sheet.Sheet """ names = [] for name in sheet.col(0)[1:]: names.append(name.value) group_names = [] for col in range(1, number_of_good_cols(sheet, nrows=1)): group_names.append(sheet.cell(0, col).value) groups_by_group_name = {group_name: [] for group_name in group_names} for row in range(1, number_of_good_rows(sheet, ncols=1)): name = sheet.cell(row, 0).value for i, group_name in enumerate(group_names): if sheet.cell(row, i + 1).value: groups_by_group_name[group_name].append(name) excel_data.names = names excel_data.groups = [(group_name, groups_by_group_name[group_name]) for group_name in group_names]
def extractMarkingFromXLS(self, mrkFileName): ''' Process the current Excel file to extract the marking annotation and write the output to mrkFileName ''' outputHandle = bz2.BZ2File( mrkFileName + ".bz2", 'wb', compresslevel=9) if config.isCompress() else open( mrkFileName, "w") for n in range(self.workbook.nsheets): sheet = self.workbook.sheet_by_index(n) colns = number_of_good_cols(sheet) rowns = number_of_good_rows(sheet) self.log.debug("Process %d columns and %d rows" % (colns, rowns)) for i in range(0, rowns): for j in range(0, colns): cell = sheet.cell(i, j) cell_name = cellname(i, j) style_name = self.styles[cell].name # print self.styles[cell].xf if style_name.startswith('TL '): mrk_line = "%d;%s;%s" % (n, cell_name, style_name) outputHandle.write(mrk_line) outputHandle.write("\n") outputHandle.close()
def __init__(self, data): self._mappings = {} self.excelFileName = data['file'] predicate = URIRef(data['predicate']) mapping_type = data['mapping_type'] # Load the mappings wb = open_workbook(data['path'] + "/" + self.excelFileName, formatting_info=False, on_demand=True) sheet = wb.sheet_by_index(0) colns = number_of_good_cols(sheet) rowns = number_of_good_rows(sheet) for i in range(1, rowns): # Get the context (first column) context = sheet.cell(i, 0).value # Get the string (force reading the cell as a string) literal = sheet.cell(i, 1).value if type(literal) == type(1.0): literal = str(int(literal)) literal = clean_string(literal) # Get the values values = [] for j in range(2, colns): value = sheet.cell(i, j).value if value != '': # Codes using numbers need to be seen as string if type(value) == type(1.0): value = str(int(value)) # Encode the value encoded_value = None if mapping_type == 'uri': prefix = data['prefix'] encoded_value = URIRef(prefix + value) elif mapping_type == 'boolean': isTrue = (value == '1' or value == 'true') encoded_value = Literal(isTrue) else: encoded_value = Literal(value) # Prefix the code and pair with predicate pair = (predicate, encoded_value) values.append(pair) if len(values) == 0: values = None # Save the mapping self._mappings.setdefault(literal, {}) if context != '': # Store the specific context self._mappings[literal].setdefault('context', {}) self._mappings[literal]['context'][context] = values else: # Store the default mappings self._mappings[literal]['default'] = values
def __init__(self, data): self._mappings = {} self.excelFileName = data['file'] predicate = URIRef(data['predicate']) mapping_type = data['mapping_type'] # Load the mappings wb = open_workbook(data['path'] + "/" + self.excelFileName, formatting_info=False, on_demand=True) sheet = wb.sheet_by_index(0) colns = number_of_good_cols(sheet) rowns = number_of_good_rows(sheet) for i in range(1, rowns): # Get the context (first column) context = sheet.cell(i, 0).value # Get the string (force reading the cell as a string) literal = sheet.cell(i, 1).value if type(literal) == type(1.0): literal = str(int(literal)) literal = clean_string(literal) # Get the values values = [] for j in range(2, colns): value = sheet.cell(i, j).value if value != '': # Codes using numbers need to be seen as string if type(value) == type(1.0): value = str(int(value)) # Encode the value encoded_value = None if mapping_type == 'uri': prefix = data['prefix'] encoded_value = URIRef(prefix + value) elif mapping_type == 'boolean': isTrue = (value == '1' or value == 'true') encoded_value = Literal(isTrue) else: encoded_value = Literal(value) # Prefix the code and pair with predicate pair = (predicate, encoded_value) values.append(pair) if len(values) == 0: values = None # Save the mapping self._mappings.setdefault(literal, {}) if context != '': # Store the specific context self._mappings[literal].setdefault('context', {}) self._mappings[literal]['context'][context] = values else: # Store the default mappings self._mappings[literal]['default'] = values
def injectMarkingIntoXLS(self, mrkFileName, targetFileName): ''' Load marking instructions from mrkFileName and process the current Excel file to generate the annotated targetFileName ''' # Prepare the output self.log.debug("Copy book") target_workbook = copy(self.workbook) #print Styles(target_workbook) self.log.debug("Load marking from %s" % mrkFileName) # Load marking information marking = {} for mrk in open(mrkFileName): (index_str, cell, style) = mrk.strip().split(';') index = int(index_str) marking.setdefault(index, {}) marking[index][cell] = style # Create the styles in the target styles = {} for s in [('TL Metadata', 'yellow'), ('TL ColHeader', 'blue'), ('TL Data', 'white'), ('TL RowHeader', 'yellow'), ('TL RowProperty', 'green'), ('TL Title', 'pink'), ('TL RowLabel', 'blue'), ('TL HRowHeader', 'blue')]: name, colour = s xf = xlwt.easyxf( 'name: test; pattern: pattern solid, fore_colour %s;' % colour) styles[name] = xf print target_workbook.add_style(xf) #target_workbook.xf_list.append(xf) #index = len(target_workbook.xf_list) #target_workbook.style_name_map[name] = (0, index) # Process the source workbook for n in range(self.workbook.nsheets): sheet = self.workbook.sheet_by_index(n) target_sheet = target_workbook.get_sheet(n) colns = number_of_good_cols(sheet) rowns = number_of_good_rows(sheet) self.log.debug("Process %d columns and %d rows" % (colns, rowns)) for i in range(0, rowns): for j in range(0, colns): cell = sheet.cell(i, j) cell_name = cellname(i, j) #cell_xf_index = sheet.cell_xf_index(i, j) #print cell_xf_index if cell_name in marking[n]: style = styles[marking[n][cell_name]] # TODO Use matching style defined earlier #print target_sheet target_sheet.write(i, j, label=cell.value, style=style) target_workbook.save(targetFileName)
def injectMarkingIntoXLS(self, mrkFileName, targetFileName): ''' Load marking instructions from mrkFileName and process the current Excel file to generate the annotated targetFileName ''' # Prepare the output self.log.debug("Copy book") target_workbook = copy(self.workbook) #print Styles(target_workbook) self.log.debug("Load marking from %s" % mrkFileName) # Load marking information marking = {} for mrk in open(mrkFileName): (index_str, cell, style) = mrk.strip().split(';') index = int(index_str) marking.setdefault(index, {}) marking[index][cell] = style # Create the styles in the target styles = {} for s in [('TL Metadata', 'yellow'), ('TL ColHeader', 'blue'), ('TL Data', 'white'), ('TL RowHeader', 'yellow'), ('TL RowProperty', 'green'), ('TL Title', 'pink'), ('TL RowLabel', 'blue'), ('TL HRowHeader', 'blue')]: name, colour = s xf = xlwt.easyxf('name: test; pattern: pattern solid, fore_colour %s;' % colour) styles[name] = xf print target_workbook.add_style(xf) #target_workbook.xf_list.append(xf) #index = len(target_workbook.xf_list) #target_workbook.style_name_map[name] = (0, index) # Process the source workbook for n in range(self.workbook.nsheets): sheet = self.workbook.sheet_by_index(n) target_sheet = target_workbook.get_sheet(n) colns = number_of_good_cols(sheet) rowns = number_of_good_rows(sheet) self.log.debug("Process %d columns and %d rows" % (colns, rowns)) for i in range(0, rowns): for j in range(0, colns): cell = sheet.cell(i, j) cell_name = cellname(i, j) #cell_xf_index = sheet.cell_xf_index(i, j) #print cell_xf_index if cell_name in marking[n]: style = styles[marking[n][cell_name]] # TODO Use matching style defined earlier #print target_sheet target_sheet.write(i, j, label=cell.value, style=style) target_workbook.save(targetFileName)
def _read_tables(excel_data, sheet): """ @type excel_data: ExcelData @type sheet: xlrd.sheet.Sheet """ dimensions = [] for row in range(0, number_of_good_rows(sheet, ncols=1)): name = sheet.cell(row, 0).value sizes = [] for col in range(1, number_of_good_cols(sheet)): value = sheet.cell(row, col).value if value: size = int(value) else: break sizes.append(size) dimensions.append([name, sizes]) excel_data.dimensions = dimensions
def _read_tables(excel_data, sheet): """ @type excel_data: ExcelData @type sheet: xlrd.sheet.Sheet """ dimensions = [] for row in range(0, number_of_good_rows(sheet, ncols=1)): name = sheet.cell(row, 0).value sizes = [] for col in range(1, number_of_good_cols(sheet)): value = sheet.cell(row, col).value if value: size = int(value) else: break sizes.append(size) dimensions.append([name, sizes]) excel_data.dimensions = dimensions
def getValidRowsCols(self) : """ Determine the number of non-empty rows and columns in the Excel sheet Returns: rowns -- number of rows colns -- number of columns """ colns = number_of_good_cols(self.r_sheet) rowns = number_of_good_rows(self.r_sheet) # Check whether the number of good columns and rows are correct while self.isEmptyRow(rowns-1, colns) : rowns = rowns - 1 while self.isEmptyColumn(colns-1, rowns) : colns = colns - 1 self.log.debug('Number of rows with content: {0}'.format(rowns)) self.log.debug('Number of columns with content: {0}'.format(colns)) return rowns, colns
def extractMarkingFromXLS(self, mrkFileName): ''' Process the current Excel file to extract the marking annotation and write the output to mrkFileName ''' outputHandle = bz2.BZ2File(mrkFileName + ".bz2", 'wb', compresslevel=9) if config.isCompress() else open(mrkFileName, "w") for n in range(self.workbook.nsheets): sheet = self.workbook.sheet_by_index(n) colns = number_of_good_cols(sheet) rowns = number_of_good_rows(sheet) self.log.debug("Process %d columns and %d rows" % (colns, rowns)) for i in range(0, rowns): for j in range(0, colns): cell = sheet.cell(i, j) cell_name = cellname(i, j) style_name = self.styles[cell].name # print self.styles[cell].xf if style_name.startswith('TL '): mrk_line = "%d;%s;%s" % (n, cell_name, style_name) outputHandle.write(mrk_line) outputHandle.write("\n") outputHandle.close()
def _read_groups(excel_data, sheet): """ @type excel_data: ExcelData @type sheet: xlrd.sheet.Sheet """ names = [] for name in sheet.col(0)[1:]: names.append(name.value) group_names = [] for col in range(1, number_of_good_cols(sheet, nrows=1)): group_names.append(sheet.cell(0, col).value) groups_by_group_name = {group_name: [] for group_name in group_names} for row in range(1, number_of_good_rows(sheet, ncols=1)): name = sheet.cell(row, 0).value for i, group_name in enumerate(group_names): if sheet.cell(row, i+1).value: groups_by_group_name[group_name].append(name) excel_data.names = names excel_data.groups = [(group_name, groups_by_group_name[group_name]) for group_name in group_names]
Utility function to clean a string TODO speed this up """ # Lower and remove new lines text_clean = text.lower().replace("\n", " ").replace("\r", " ") # Shrink spaces text_clean = re.sub(r"\s+", " ", text_clean) # Remove lead and trailing whitespace text_clean = text_clean.strip() return text_clean # Load the current file mappings wb = open_workbook(CURRENT_TYPES, formatting_info=False, on_demand=True) sheet = wb.sheet_by_index(0) colns = number_of_good_cols(sheet) rowns = number_of_good_rows(sheet) mappings = {} for i in range(1, rowns): # Get the string literal = clean_string(sheet.cell(i, 1).value) mappings.setdefault(literal, set()) print ">" + literal # Get the values for j in range(2, colns): value = sheet.cell(i, j).value if value != "": mappings[literal].add(value)
Utility function to clean a string TODO speed this up """ # Lower and remove new lines text_clean = text.lower().replace('\n', ' ').replace('\r', ' ') # Shrink spaces text_clean = re.sub(r'\s+', ' ', text_clean) # Remove lead and trailing whitespace text_clean = text_clean.strip() return text_clean # Load the current file mappings wb = open_workbook(CURRENT_TYPES, formatting_info=False, on_demand=True) sheet = wb.sheet_by_index(0) colns = number_of_good_cols(sheet) rowns = number_of_good_rows(sheet) mappings = {} for i in range(1, rowns): # Get the string literal = clean_string(sheet.cell(i, 1).value) mappings.setdefault(literal, set()) print '>' + literal # Get the values for j in range(2, colns): value = sheet.cell(i, j).value if value != '': mappings[literal].add(value) # Load data from Ashkan