def extractMarkingFromXLS(self, mrkFileName): ''' Process the current Excel file to extract the marking annotation and write the output to mrkFileName ''' outputHandle = bz2.BZ2File( mrkFileName + ".bz2", 'wb', compresslevel=9) if config.isCompress() else open( mrkFileName, "w") for n in range(self.workbook.nsheets): sheet = self.workbook.sheet_by_index(n) colns = number_of_good_cols(sheet) rowns = number_of_good_rows(sheet) self.log.debug("Process %d columns and %d rows" % (colns, rowns)) for i in range(0, rowns): for j in range(0, colns): cell = sheet.cell(i, j) cell_name = cellname(i, j) style_name = self.styles[cell].name # print self.styles[cell].xf if style_name.startswith('TL '): mrk_line = "%d;%s;%s" % (n, cell_name, style_name) outputHandle.write(mrk_line) outputHandle.write("\n") outputHandle.close()
def _read_groups(excel_data, sheet): """ @type excel_data: ExcelData @type sheet: xlrd.sheet.Sheet """ names = [] for name in sheet.col(0)[1:]: names.append(name.value) group_names = [] for col in range(1, number_of_good_cols(sheet, nrows=1)): group_names.append(sheet.cell(0, col).value) groups_by_group_name = {group_name: [] for group_name in group_names} for row in range(1, number_of_good_rows(sheet, ncols=1)): name = sheet.cell(row, 0).value for i, group_name in enumerate(group_names): if sheet.cell(row, i + 1).value: groups_by_group_name[group_name].append(name) excel_data.names = names excel_data.groups = [(group_name, groups_by_group_name[group_name]) for group_name in group_names]
def __init__(self, data): self._mappings = {} self.excelFileName = data['file'] predicate = URIRef(data['predicate']) mapping_type = data['mapping_type'] # Load the mappings wb = open_workbook(data['path'] + "/" + self.excelFileName, formatting_info=False, on_demand=True) sheet = wb.sheet_by_index(0) colns = number_of_good_cols(sheet) rowns = number_of_good_rows(sheet) for i in range(1, rowns): # Get the context (first column) context = sheet.cell(i, 0).value # Get the string (force reading the cell as a string) literal = sheet.cell(i, 1).value if type(literal) == type(1.0): literal = str(int(literal)) literal = clean_string(literal) # Get the values values = [] for j in range(2, colns): value = sheet.cell(i, j).value if value != '': # Codes using numbers need to be seen as string if type(value) == type(1.0): value = str(int(value)) # Encode the value encoded_value = None if mapping_type == 'uri': prefix = data['prefix'] encoded_value = URIRef(prefix + value) elif mapping_type == 'boolean': isTrue = (value == '1' or value == 'true') encoded_value = Literal(isTrue) else: encoded_value = Literal(value) # Prefix the code and pair with predicate pair = (predicate, encoded_value) values.append(pair) if len(values) == 0: values = None # Save the mapping self._mappings.setdefault(literal, {}) if context != '': # Store the specific context self._mappings[literal].setdefault('context', {}) self._mappings[literal]['context'][context] = values else: # Store the default mappings self._mappings[literal]['default'] = values
def __init__(self, data): self._mappings = {} self.excelFileName = data['file'] predicate = URIRef(data['predicate']) mapping_type = data['mapping_type'] # Load the mappings wb = open_workbook(data['path'] + "/" + self.excelFileName, formatting_info=False, on_demand=True) sheet = wb.sheet_by_index(0) colns = number_of_good_cols(sheet) rowns = number_of_good_rows(sheet) for i in range(1, rowns): # Get the context (first column) context = sheet.cell(i, 0).value # Get the string (force reading the cell as a string) literal = sheet.cell(i, 1).value if type(literal) == type(1.0): literal = str(int(literal)) literal = clean_string(literal) # Get the values values = [] for j in range(2, colns): value = sheet.cell(i, j).value if value != '': # Codes using numbers need to be seen as string if type(value) == type(1.0): value = str(int(value)) # Encode the value encoded_value = None if mapping_type == 'uri': prefix = data['prefix'] encoded_value = URIRef(prefix + value) elif mapping_type == 'boolean': isTrue = (value == '1' or value == 'true') encoded_value = Literal(isTrue) else: encoded_value = Literal(value) # Prefix the code and pair with predicate pair = (predicate, encoded_value) values.append(pair) if len(values) == 0: values = None # Save the mapping self._mappings.setdefault(literal, {}) if context != '': # Store the specific context self._mappings[literal].setdefault('context', {}) self._mappings[literal]['context'][context] = values else: # Store the default mappings self._mappings[literal]['default'] = values
def injectMarkingIntoXLS(self, mrkFileName, targetFileName): ''' Load marking instructions from mrkFileName and process the current Excel file to generate the annotated targetFileName ''' # Prepare the output self.log.debug("Copy book") target_workbook = copy(self.workbook) #print Styles(target_workbook) self.log.debug("Load marking from %s" % mrkFileName) # Load marking information marking = {} for mrk in open(mrkFileName): (index_str, cell, style) = mrk.strip().split(';') index = int(index_str) marking.setdefault(index, {}) marking[index][cell] = style # Create the styles in the target styles = {} for s in [('TL Metadata', 'yellow'), ('TL ColHeader', 'blue'), ('TL Data', 'white'), ('TL RowHeader', 'yellow'), ('TL RowProperty', 'green'), ('TL Title', 'pink'), ('TL RowLabel', 'blue'), ('TL HRowHeader', 'blue')]: name, colour = s xf = xlwt.easyxf( 'name: test; pattern: pattern solid, fore_colour %s;' % colour) styles[name] = xf print target_workbook.add_style(xf) #target_workbook.xf_list.append(xf) #index = len(target_workbook.xf_list) #target_workbook.style_name_map[name] = (0, index) # Process the source workbook for n in range(self.workbook.nsheets): sheet = self.workbook.sheet_by_index(n) target_sheet = target_workbook.get_sheet(n) colns = number_of_good_cols(sheet) rowns = number_of_good_rows(sheet) self.log.debug("Process %d columns and %d rows" % (colns, rowns)) for i in range(0, rowns): for j in range(0, colns): cell = sheet.cell(i, j) cell_name = cellname(i, j) #cell_xf_index = sheet.cell_xf_index(i, j) #print cell_xf_index if cell_name in marking[n]: style = styles[marking[n][cell_name]] # TODO Use matching style defined earlier #print target_sheet target_sheet.write(i, j, label=cell.value, style=style) target_workbook.save(targetFileName)
def get_sheet_data(self, index): curr_sheet = self.sheets_read[index] rows = number_of_good_rows(curr_sheet) cols = range(curr_sheet.ncols) self.data[index] = [] for i in range(rows): d = [] for col in cols: d.append(curr_sheet.cell_value(i, col)) self.data[index].append(d)
def injectMarkingIntoXLS(self, mrkFileName, targetFileName): ''' Load marking instructions from mrkFileName and process the current Excel file to generate the annotated targetFileName ''' # Prepare the output self.log.debug("Copy book") target_workbook = copy(self.workbook) #print Styles(target_workbook) self.log.debug("Load marking from %s" % mrkFileName) # Load marking information marking = {} for mrk in open(mrkFileName): (index_str, cell, style) = mrk.strip().split(';') index = int(index_str) marking.setdefault(index, {}) marking[index][cell] = style # Create the styles in the target styles = {} for s in [('TL Metadata', 'yellow'), ('TL ColHeader', 'blue'), ('TL Data', 'white'), ('TL RowHeader', 'yellow'), ('TL RowProperty', 'green'), ('TL Title', 'pink'), ('TL RowLabel', 'blue'), ('TL HRowHeader', 'blue')]: name, colour = s xf = xlwt.easyxf('name: test; pattern: pattern solid, fore_colour %s;' % colour) styles[name] = xf print target_workbook.add_style(xf) #target_workbook.xf_list.append(xf) #index = len(target_workbook.xf_list) #target_workbook.style_name_map[name] = (0, index) # Process the source workbook for n in range(self.workbook.nsheets): sheet = self.workbook.sheet_by_index(n) target_sheet = target_workbook.get_sheet(n) colns = number_of_good_cols(sheet) rowns = number_of_good_rows(sheet) self.log.debug("Process %d columns and %d rows" % (colns, rowns)) for i in range(0, rowns): for j in range(0, colns): cell = sheet.cell(i, j) cell_name = cellname(i, j) #cell_xf_index = sheet.cell_xf_index(i, j) #print cell_xf_index if cell_name in marking[n]: style = styles[marking[n][cell_name]] # TODO Use matching style defined earlier #print target_sheet target_sheet.write(i, j, label=cell.value, style=style) target_workbook.save(targetFileName)
def _read_tables(excel_data, sheet): """ @type excel_data: ExcelData @type sheet: xlrd.sheet.Sheet """ dimensions = [] for row in range(0, number_of_good_rows(sheet, ncols=1)): name = sheet.cell(row, 0).value sizes = [] for col in range(1, number_of_good_cols(sheet)): value = sheet.cell(row, col).value if value: size = int(value) else: break sizes.append(size) dimensions.append([name, sizes]) excel_data.dimensions = dimensions
def _read_tables(excel_data, sheet): """ @type excel_data: ExcelData @type sheet: xlrd.sheet.Sheet """ dimensions = [] for row in range(0, number_of_good_rows(sheet, ncols=1)): name = sheet.cell(row, 0).value sizes = [] for col in range(1, number_of_good_cols(sheet)): value = sheet.cell(row, col).value if value: size = int(value) else: break sizes.append(size) dimensions.append([name, sizes]) excel_data.dimensions = dimensions
def getValidRowsCols(self) : """ Determine the number of non-empty rows and columns in the Excel sheet Returns: rowns -- number of rows colns -- number of columns """ colns = number_of_good_cols(self.r_sheet) rowns = number_of_good_rows(self.r_sheet) # Check whether the number of good columns and rows are correct while self.isEmptyRow(rowns-1, colns) : rowns = rowns - 1 while self.isEmptyColumn(colns-1, rowns) : colns = colns - 1 self.log.debug('Number of rows with content: {0}'.format(rowns)) self.log.debug('Number of columns with content: {0}'.format(colns)) return rowns, colns
def extractMarkingFromXLS(self, mrkFileName): ''' Process the current Excel file to extract the marking annotation and write the output to mrkFileName ''' outputHandle = bz2.BZ2File(mrkFileName + ".bz2", 'wb', compresslevel=9) if config.isCompress() else open(mrkFileName, "w") for n in range(self.workbook.nsheets): sheet = self.workbook.sheet_by_index(n) colns = number_of_good_cols(sheet) rowns = number_of_good_rows(sheet) self.log.debug("Process %d columns and %d rows" % (colns, rowns)) for i in range(0, rowns): for j in range(0, colns): cell = sheet.cell(i, j) cell_name = cellname(i, j) style_name = self.styles[cell].name # print self.styles[cell].xf if style_name.startswith('TL '): mrk_line = "%d;%s;%s" % (n, cell_name, style_name) outputHandle.write(mrk_line) outputHandle.write("\n") outputHandle.close()
def _read_groups(excel_data, sheet): """ @type excel_data: ExcelData @type sheet: xlrd.sheet.Sheet """ names = [] for name in sheet.col(0)[1:]: names.append(name.value) group_names = [] for col in range(1, number_of_good_cols(sheet, nrows=1)): group_names.append(sheet.cell(0, col).value) groups_by_group_name = {group_name: [] for group_name in group_names} for row in range(1, number_of_good_rows(sheet, ncols=1)): name = sheet.cell(row, 0).value for i, group_name in enumerate(group_names): if sheet.cell(row, i+1).value: groups_by_group_name[group_name].append(name) excel_data.names = names excel_data.groups = [(group_name, groups_by_group_name[group_name]) for group_name in group_names]
def exel(url): font0 = xlwt.Font() font0.name = 'Arial' font0.colour_index = 0 font0.height = 160 # font0.bold = True style0 = xlwt.XFStyle() style0.font = font0 style1 = xlwt.XFStyle() style1.font = font0 style1.num_format_str = 'D-MMM-YY' merged_list = url_read(url[0]) xls_file = file_selector(url) for i in range(3, len(merged_list), 5): inputBook = xlrd.open_workbook(xls_file, formatting_info=True) outputBook = copy(inputBook) sheet_name = method_name(i, merged_list) sheet_name_int = get_sheet_index(sheet_name) inputBookSheet = inputBook.sheet_by_name(sheet_name) outputBookSheet = outputBook.get_sheet(sheet_name_int) list_index = row_index(inputBookSheet) start = i - 3 merge = merged_list[start] outputBookSheet.write(list_index, 0, merge, style0) outputBookSheet.write(list_index, 1, merged_list[start + 2], style0) outputBookSheet.write(list_index, 2, merged_list[start + 1], style0) outputBookSheet.write(list_index, 28, datetime.datetime.now(), style1) outputBookSheet.write(list_index, 20, merged_list[start + 4], style0) outputBook.save(xls_file) input_Book = xlrd.open_workbook(xls_file, formatting_info=True) output_Book = copy(input_Book) for sheet_index in range(0, 10): outputBookSheet = output_Book.get_sheet(sheet_index) inputBookSheet = input_Book.sheet_by_index(sheet_index) for row in xrange(number_of_good_rows(inputBookSheet), 0, -1): formula_for_11 = ('if(and(E' + str(row + 1) + '+F' + str(row + 1) + '+G' + str(row + 1) + '+H' + str(row + 1) + '+I' + str(row + 1) + '+J' + str(row + 1) + '<B' + str( row + 1) + ';' + 'F' + str(row + 1) + '+E' + str(row + 1) + '>5);"TM";"")') formula_for_12 = ('if(E' + str(row + 1) + '+F' + str(row + 1) + '+G' + str(row + 1) + '+H' + str(row + 1) + '+I' + str(row + 1) + '+J' + str(row + 1) + '>B' + str( row + 1) + ';' + '"TB";"")') formula_for_13 = ('if(E' + str(row + 1) + '+G' + str(row + 1) + '+I' + str(row + 1) + '-C' + str(row + 1) + '>F' + str(row + 1) + '+H' + str(row + 1) + '+J' + str(row + 1) + ';"F-";"")') formula_for_14 = ('if(and(E' + str(row + 1) + '+G' + str(row + 1) + '+I' + str(row + 1) + '-C' + str(row + 1) + '<F' + str(row + 1) + '+H' + str(row + 1) + '+J' + str( row + 1) + ';' + 'E' + str(row + 1) + '+F' + str(row + 1) + '>5);"F+";"")') formula_for_15 = ('if(V' + str(row + 1) + '+W' + str(row + 1) + '+X' + str(row + 1) + '>1;"P1";"")') formula_for_16 = ( 'if(and(V' + str(row + 1) + '+W' + str(row + 1) + '+X' + str(row + 1) + '<2;E' + str(row + 1) + '+F' + str(row + 1) + '>5);"P2";"")') formula_for_17 = ('if(V' + str(row + 1) + '+W' + str(row + 1) + '=2;"2:0";"")') formula_for_18 = ( 'if(and(I' + str(row + 1) + '+J' + str(row + 1) + '>5;V' + str(row + 1) + '+W' + str(row + 1) + '+X' + str(row + 1) + '=2);"2:1";"")') formula_for_19 = ( 'if(and(I' + str(row + 1) + '+J' + str(row + 1) + '>5;V' + str(row + 1) + '+W' + str(row + 1) + '+X' + str(row + 1) + '=1);"1:2";"")') formula_for_20 = ('if(and(E' + str(row + 1) + '>0;V' + str(row + 1) + '+W' + str(row + 1) + '=0);"0:2";"")') formula_for_22 = ('if(E' + str(row + 1) + '>F' + str(row + 1) + ';"1";"0")') formula_for_23 = ('if(G' + str(row + 1) + '>H' + str(row + 1) + ';"1";"0")') formula_for_24 = ('if(I' + str(row + 1) + '>J' + str(row + 1) + ';"1";"0")') outputBookSheet.write(row, 10, xlwt.Formula(formula_for_11), style0) outputBookSheet.write(row, 11, xlwt.Formula(formula_for_12), style0) outputBookSheet.write(row, 12, xlwt.Formula(formula_for_13), style0) outputBookSheet.write(row, 13, xlwt.Formula(formula_for_14), style0) outputBookSheet.write(row, 14, xlwt.Formula(formula_for_15), style0) outputBookSheet.write(row, 15, xlwt.Formula(formula_for_16), style0) outputBookSheet.write(row, 16, xlwt.Formula(formula_for_17), style0) outputBookSheet.write(row, 17, xlwt.Formula(formula_for_18), style0) outputBookSheet.write(row, 18, xlwt.Formula(formula_for_19), style0) outputBookSheet.write(row, 19, xlwt.Formula(formula_for_20), style0) outputBookSheet.write(row, 21, xlwt.Formula(formula_for_22), style0) outputBookSheet.write(row, 22, xlwt.Formula(formula_for_23), style0) outputBookSheet.write(row, 23, xlwt.Formula(formula_for_24), style0) output_Book.save(xls_file)
TODO speed this up """ # Lower and remove new lines text_clean = text.lower().replace("\n", " ").replace("\r", " ") # Shrink spaces text_clean = re.sub(r"\s+", " ", text_clean) # Remove lead and trailing whitespace text_clean = text_clean.strip() return text_clean # Load the current file mappings wb = open_workbook(CURRENT_TYPES, formatting_info=False, on_demand=True) sheet = wb.sheet_by_index(0) colns = number_of_good_cols(sheet) rowns = number_of_good_rows(sheet) mappings = {} for i in range(1, rowns): # Get the string literal = clean_string(sheet.cell(i, 1).value) mappings.setdefault(literal, set()) print ">" + literal # Get the values for j in range(2, colns): value = sheet.cell(i, j).value if value != "": mappings[literal].add(value) # Load data from Ashkan
TODO speed this up """ # Lower and remove new lines text_clean = text.lower().replace('\n', ' ').replace('\r', ' ') # Shrink spaces text_clean = re.sub(r'\s+', ' ', text_clean) # Remove lead and trailing whitespace text_clean = text_clean.strip() return text_clean # Load the current file mappings wb = open_workbook(CURRENT_TYPES, formatting_info=False, on_demand=True) sheet = wb.sheet_by_index(0) colns = number_of_good_cols(sheet) rowns = number_of_good_rows(sheet) mappings = {} for i in range(1, rowns): # Get the string literal = clean_string(sheet.cell(i, 1).value) mappings.setdefault(literal, set()) print '>' + literal # Get the values for j in range(2, colns): value = sheet.cell(i, j).value if value != '': mappings[literal].add(value) # Load data from Ashkan add = 0