def xlsx2html(self, filePath, sheet): wb = xlrd.open_workbook(filePath) sht = wb.sheet_by_name(sheet) data = [ ] # used to store rows, which are later joined to make complete html remove_index = [] # used to store columns which are removable header = [] for row in range(sht.nrows): dt = [] for column in range(sht.ncols): value = sht.cell_value(row, column) if type(value) == float: if repr(value)[-2:] == '.0': value = int(value) value = str(value) if row == 0: if value.lower() in self.remove_headers: remove_index.append( column ) # storing column number of removable headers in a list else: header.append(escape(value)) else: if column not in remove_index: # if column is not in remove_header list than add the data in html dt.append(escape(value)) data.append( '<td>' + '</td>\n<td>'.join(dt) + '</td>') # joining individual data of a row in single row header = '<tr><th>' + '</th>\n<th>'.join(header) + '</th></tr>' html = '<table><tbody>' + header + '<tr>' + '</tr>\n<tr>'.join( data ) + '</tr>' + '</tbody></table>' # joining list of rows to make html return html
def test_time(self): book = xlrd3.open_workbook(from_tests_dir('Formate.xls'), formatting_info=True) sheet = book.sheet_by_name('Blätt1') for row, h, m, s in [(3, 6, 34, 0), (4, 12, 56, 0), (5, 17, 47, 13)]: cell = sheet.cell(row, 1) self.assertEqual(cell.time(), time(h, m, s))
class TestReadWorkbook(unittest.TestCase): book = xlrd3.open_workbook(from_tests_dir('profiles.xls')) sheetnames = [ 'PROFILEDEF', 'AXISDEF', 'TRAVERSALCHAINAGE', 'AXISDATUMLEVELS', 'PROFILELEVELS' ] def test_nsheets(self): self.assertEqual(self.book.nsheets, 5) def test_sheet_by_name(self): for name in self.sheetnames: sheet = self.book.sheet_by_name(name) self.assertTrue(sheet) def test_sheet_by_index(self): for index in range(5): sheet = self.book.sheet_by_index(index) self.assertEqual(sheet.name, self.sheetnames[index]) def test_sheets(self): sheets = self.book.sheets() for index, sheet in enumerate(sheets): self.assertEqual(sheet.name, self.sheetnames[index]) def test_sheet_names(self): names = self.book.sheet_names() self.assertEqual(self.sheetnames, names)
def test_repr_without_formatting_info(self): book = xlrd3.open_workbook(from_tests_dir('xf_class.xls'), formatting_info=False) sheet = book.sheet_by_name('table1') cell = sheet.cell(0, 0) self.assertEqual(repr(cell), "text:'RED'") self.assertEqual(cell.xf_index, None)
def test_date(self): book = xlrd3.open_workbook(from_tests_dir('Formate.xls'), formatting_info=True) sheet = book.sheet_by_name('Blätt1') for row, y, m, d in [(0, 1907, 7, 3), (1, 2005, 2, 23), (2, 1988, 5, 3)]: cell = sheet.cell(row, 1) self.assertEqual(cell.date(), date(y, m, d))
def getCellValue(sheetIndex, rowIndex, colIndex, xlsFilePath): workBook = xlrd3.open_workbook(xlsFilePath) table = workBook.sheets()[sheetIndex] return table.cell( rowIndex, colIndex).value # 或者table.row(0)[0].value或者table.col(0)[0].value
def xls2csv(self, excel_filename, csv_filename): try: book = xlrd3.open_workbook(excel_filename) print("The number of worksheets: %d" % book.nsheets) sheet_name = book.sheet_names()[0].split(' ')[0] #The first sheet sh = book.sheet_by_index(0) print("Sheet name:", sh.name, " | Number of rows:", sh.nrows, " | Number of columns:", sh.ncols) csv_file = open(csv_filename, 'w') w = csv.writer(csv_file, csv.excel) for row in range(sh.nrows): cell_types = sh.row_types(row) this_row = [] for col in range(sh.ncols): if cell_types[col] == xlrd3.XL_CELL_DATE: cell_val = datetime.datetime(*xlrd3.xldate_as_tuple( sh.cell_value(row, col), book.datemode)) else: cell_val = str(sh.cell_value(row, col)).encode('utf8') this_row.append(cell_val) if row == 0 or (row > 0 and this_row[1].isdigit()): w.writerow(this_row) print("%s has been created" % csv_filename) except IOError: print("IOError: Please ensure %s exists" % excel_filename)
def _init_reader(self, source_file: Union[io.IOBase, Path]) -> None: wb_kwargs: Dict[str, Any] = { 'encoding_override': self.encoding, 'on_demand': True, } if isinstance(source_file, Path): wb_kwargs['filename'] = str(source_file) elif isinstance(source_file, io.IOBase): try: fileno = source_file.fileno() except OSError: # We're dealing with an in-memory stream. Write it to a file # so it can be mmap'ed, self._tmpfile = tempfile.TemporaryFile() buf = source_file.readline() while buf: self._tmpfile.write(buf) buf = source_file.readline() self._tmpfile.seek(0) fileno = self._tmpfile.fileno() wb_kwargs['file_contents'] = mmap.mmap( fileno, os.fstat(fileno).st_size, access=mmap.ACCESS_READ, ) else: # pragma: no cover raise ValueError('Invalid source_file') self._book = xlrd3.open_workbook(**wb_kwargs) self._rows = self._find_sheet().get_rows()
def get_workbooks(self): if self.temp_path is None: return filenames = [] for name in os.listdir(self.temp_path): d = name.split('-', 1) d.append(name) filenames.append(d) filenames.sort() for i, filename, pathname in filenames: yield ( # We currently don't open with on_demand=True here # as error filters should be lastish in the chain # so there's not much win. # However, if we did, getting rid of the temp dirs # becomes a problem as, on Windows, they can't be # deleted until the xlrd.Book object is done with # and we don't know when that might be :-( xlrd.open_workbook( os.path.join(self.temp_path, pathname), formatting_info=1, on_demand=False, ragged_rows=True ), filename )
def test_merged_cells(self): book = xlrd3.open_workbook(from_tests_dir('xf_class.xls'), formatting_info=True) sheet3 = book.sheet_by_name('table2') row_lo, row_hi, col_lo, col_hi = sheet3.merged_cells[0] self.assertEqual(sheet3.cell(row_lo, col_lo).value, 'MERGED') self.assertEqual((row_lo, row_hi, col_lo, col_hi), (3, 7, 2, 5))
def create_body(self): """ Will create body from mail by using summary tab of xlsx output file. Any changes in body structure should be made in this function. :return: """ wb = xlrd.open_workbook(self.xlsx_file) sheet = wb.sheet_by_name("Summary") data = [] for x in range(2, sheet.nrows): lis = [] for cell in sheet.row(x): value = cell.value if type(value) == float: value = str(value) if value[-2:] == '.0': value = value[:-2] else: value = str(value).strip() lis.append(value) while "" in lis: lis.remove("") if lis != []: data.append(' = '.join(lis)) subject = sheet.row(0)[0].value.strip() body = '\n'.join(data) return subject, body
def create_child_pages(self, confluence, parent_page): # Creating child pages if required wb = xlrd.open_workbook(self.fileNameAndPathToResultXLSX) output_xlsx = wb.sheet_by_name("Output") # Getting starting points for each subpage starting_points = [ x for x in range(1, output_xlsx.nrows, self.CreateSubPagesForEachXXEntries) ] header = [] # header used in every subpage remove_index = [ ] # headers column number which are not to be used are stored in it. for x in range(output_xlsx.ncols): value = output_xlsx.cell_value(0, x) if value.lower() not in self.remove_headers: header.append(value) else: remove_index.append( x ) # if header is to be removed its column number is stored here header = '<tr><th>' + '</th>\n<th>'.join( header) + '</th></tr>' # html table headers are formatted here for starting in starting_points: if starting + self.CreateSubPagesForEachXXEntries < output_xlsx.nrows: # if it is not last sub page ending = starting + self.CreateSubPagesForEachXXEntries - 1 # ending point is only used for title else: ending = output_xlsx.nrows title = ( (len(str(output_xlsx.nrows)) - len(str(starting))) * "0" ) + str(starting) + " - " + ( (len(str(output_xlsx.nrows)) - len(str(ending))) * "0" ) + str( ending) + " " + self.pageTitle # generating title for subpage data = [] for row in range(starting, ending): dt = [] for column in range(output_xlsx.ncols): value = output_xlsx.cell_value(row, column) if type(value ) == float: # coverting int and float into string if repr(value)[-2:] == '.0': value = int(value) value = str(value) if column not in remove_index: # if column is not in remove_header list than add the data in html dt.append(escape(value)) data.append( '<td>' + '</td>\n<td>'.join(dt) + '</td>' ) # generating html table row and appending it in list # html table row tag is added for every row which are stored in list html = '<table><tbody>' + header + '<tr>' + '</tr>\n<tr>'.join( data) + '</tr>' + '</tbody></table>' confluence.create_page( self.space, title, html, parent_id=parent_page, type='page', representation='storage' ) # subpage is created here with the help of parent page id
def peoples(file): peopleList = [] wb = xlrd3.open_workbook(filename=file) sheet1 = wb.sheet_by_index(0) for i in range(1, sheet1.nrows): peopleList.append(sheet1.row(i)[2].value) wb.release_resources() return peopleList
def __init__(self, excel_path, sheet_name=None): self.excel_path = excel_path self.sheet_name = sheet_name read_excel = xlrd3.open_workbook(self.excel_path) if self.sheet_name: self.sheet = read_excel.sheet_by_name(self.sheet_name) else: self.sheet = read_excel.sheet_by_index(0)
def test_write_excel_100(): # Tests write method of TestDataGenerator object with 100 random data in excel. removeFile(testOutput100xls) testDataGenerator.write(batch_size=100, outputfile=testOutput100xls) xl_book = xlrd.open_workbook(testOutput100xls) xl_sheet = xl_book.sheet_by_index(0) assert xl_sheet.nrows == 101 print("Test 100 random data successful.")
def test_write_excel_all(): removeFile(testOutputFullxls) # Takes too much time!!!!! # Tests write_excel method of TestDataGenerator object with all data. testDataGenerator.write(outputfile=testOutputFullxls) xl_book = xlrd.open_workbook(testOutputFullxls) xl_sheet = xl_book.sheet_by_index(0) print(f"Total data in excel file = {str(xl_sheet.nrows)}")
def parseFile(self, filePath, outputDir): self._xls = xlrd.open_workbook(filePath) for sheet in self._xls.sheets(): if self._isValidSheet(sheet): outputContent = self.parseSheet(sheet, outputDir) outputFileName = self._getOutputFileName(sheet) self._saveFile(outputDir, outputFileName, outputContent)
def openXls(self, xlsNames): for name in xlsNames: workbook = xlrd.open_workbook(name) fname = self.detectCompany(workbook,name) if fname != "UNKNOWN": self.workbooks[fname] = workbook self.fillHeaderId()
def read_excel(date_path=data_path, sheet_name='login_page'): wokebook = xlrd3.open_workbook(data_path) sheet = wokebook.sheet_by_name(sheet_name) data = {} for i in range(1, sheet.nrows): data_01 = {'elemen_name': sheet.cell_value(i, 1), 'locat_type': sheet.cell_value(i, 2), 'locate_value': sheet.cell_value(i, 3), 'time_out': sheet.cell_value(i, 4)} data[sheet.cell_value(i, 0)] = data_01 return data
def __init__(self, FileNameAndPath, testRunUtils: TestRunUtils): self.testStepExecutionNumber = 0 # self.testRunAttributes = testRunUtils.testRunAttributes self.testRunUtils = testRunUtils try: self.excelFile = xlrd.open_workbook(FileNameAndPath) except FileNotFoundError as e: raise BaseException(f"File not found - exiting {e}") self.fileName = utils.extractFileNameFromFullPath(FileNameAndPath)
def check_output(xlsx_file): workbook = xlrd.open_workbook(xlsx_file) book = workbook.sheet_by_name("Summary") test_records = book.row(2)[1].value or 0 success = book.row(3)[1].value or 0 error = book.row(5)[1].value or 0 test_records = int(test_records) success = int(success) error = int(error) assert success >= test_records / 2
def csv_from_excel(xlsx_file, sheet_index, csv_file): wb = xlrd.open_workbook(xlsx_file) sh = wb.sheet_by_index(sheet_index) your_csv_file = open(csv_file, 'w') wr = csv.writer(your_csv_file, quoting=csv.QUOTE_ALL) for rownum in range(sh.nrows): wr.writerow(sh.row_values(rownum)) your_csv_file.close()
def getAllRowsBySheetIndex(sheetIndex, xlsFilePath): workBook = xlrd3.open_workbook(xlsFilePath) table = workBook.sheets()[sheetIndex] rows = [] rowNum = table.nrows # 总共行数 rowList = table.row_values for i in range(rowNum): rows.append(rowList(i)) # 等价于rows.append(i, rowLists(i)) return rows
def getAllColsBySheetIndex(sheetIndex, xlsFilePath): workBook = xlrd3.open_workbook(xlsFilePath) table = workBook.sheets()[sheetIndex] cols = [] colNum = table.ncols # 总共列数 colList = table.col_values for i in range(colNum): cols.append(colList(i)) return cols
def xlsfile(w_file): global wb try: wb = xlrd3.open_workbook(w_file) except xlrd3.mmap.error: print("%s не является файлом в формате Excel" % w_file, end="\n") exit(0) except IOError: print("%s не существует, или ошибка в пути к файлу" % w_file, end="\n") exit(0) else: return wb
def get_workbooks(self): """ If the data to be processed is not stored in files or if special parameters need to be passed to :func:`xlrd.open_workbook` then this method must be overriden. Any implementation must return an iterable sequence of tuples. The first element of which must be an :class:`xlrd.Book` object and the second must be the filename of the file from which the book object came. """ for path in self.get_filepaths(): yield ( xlrd.open_workbook( path, formatting_info=1, on_demand=True, ragged_rows=True), os.path.split(path)[1] )
def get_excel_data(file_path, sheet_name, fields=[], header_rows=1, footer_rows=0): book = xlrd.open_workbook(file_path) sheet = book.sheet_by_name(sheet_name) header_map = {} if header_rows>0: header_row = sheet.row_values(header_rows-1) for index in range(len(header_row)): header_map[header_row[index].strip()] = index # 转换目标字段集 target_fields = [] reg = re.compile(r'^([a-z]?)([a-z])$') for field in fields: field = field.strip() match_obj = reg.match(field.lower()) if match_obj: col1, col2 = match_obj.groups() target_fields.append((col1 and (ord(col1)-96)*26 or 0) + ord(col2)-97) else: target_fields.append(header_map[field]) for row_index in range(header_rows, sheet.nrows-footer_rows): yield [sheet.cell_value(row_index, col_index) for col_index in target_fields]
def read_excel(self, fileName, sheetName): fileName = utils.findFileAndPathFromPath(fileName) if not fileName: logger.critical(f"Can't open file: {fileName}") return logger.debug(f"Reading excel file {fileName}...") book = open_workbook(fileName) sheet = book.sheet_by_name(sheetName) # read header values into the list keys = [ sheet.cell(0, col_index).value for col_index in range(sheet.ncols) ] # if testresult header is present then taking its index, which is later used as column number testrun_index = [ keys.index(x) for x in keys if str(x).lower() == "testresult" ] if testrun_index: testrun_index = testrun_index[ 0] + 1 # adding +1 value which is the correct column position else: # if list is empty that means their is no testresult header testrun_index = 0 for row_index in range(1, sheet.nrows): temp_dic = {} for col_index in range(sheet.ncols): temp_dic[keys[col_index]] = sheet.cell(row_index, col_index).value if type(temp_dic[keys[col_index]]) == float: temp_dic[keys[col_index]] = repr(temp_dic[keys[col_index]]) if temp_dic[keys[col_index]][-2:] == ".0": temp_dic[keys[col_index]] = temp_dic[ keys[col_index]][:-2] # row, column, sheetName & fileName which are later used in updating source testrun file temp_dic["testcase_row"] = row_index temp_dic["testcase_sheet"] = sheetName temp_dic["testcase_file"] = fileName temp_dic["testcase_column"] = testrun_index self.dataDict.append(temp_dic)
def doOneSpreadSheet(self, spreadSheetPath): mySqlClient = MySqlClient() mySqlClient.queryMaxModelID() print("MaxModelID:", mySqlClient.getMaxModelID()) workbook = xlrd3.open_workbook(spreadSheetPath) sheetNames = workbook.sheet_names() for sheetName in sheetNames: worksheet = workbook.sheet_by_name(sheetName) num_rows = worksheet.nrows - 1 num_cells = worksheet.ncols - 1 curr_row = -1 mySqlClient.hasNotBeenEmitted() while curr_row < num_rows: curr_row += 1 row = worksheet.row(curr_row) # print('Row:', curr_row) curr_cell = -1 while curr_cell < num_cells: curr_cell += 1 # Cell Types: 0=Empty, 1=Text, 2=Number, 3=Date, 4=Boolean, 5=Error, 6=Blank cell_type = worksheet.cell_type(curr_row, curr_cell) cell_value = worksheet.cell_value(curr_row, curr_cell) print( "CurrentSheet", sheetName, "CurrentRow:", curr_row, "CurrentCell:", curr_cell, "CellType:", cell_type, "CellValue:", cell_value, ) self.builder(curr_row, curr_cell, cell_value, mySqlClient) if curr_row != 0: mySqlClient.emitDetailSQL() mySqlClient.cleanClose()
def xls2csv(self, excel_filename,csv_filename): try: book = xlrd3.open_workbook(excel_filename) print("The number of worksheets: %d" % book.nsheets) sheet_name = book.sheet_names()[0].split(' ')[0] #The first sheet sh = book.sheet_by_index(0) print("Sheet name:", sh.name, " | Number of rows:", sh.nrows, " | Number of columns:", sh.ncols) csv_file = open(csv_filename,'w') w = csv.writer(csv_file, csv.excel) for row in range(sh.nrows): cell_types = sh.row_types(row) this_row = [] for col in range(sh.ncols): if cell_types[col] == xlrd3.XL_CELL_DATE: cell_val = datetime.datetime(*xlrd3.xldate_as_tuple(sh.cell_value(row,col), book.datemode)) else: cell_val = str(sh.cell_value(row,col)).encode('utf8') this_row.append(cell_val) if row == 0 or (row > 0 and this_row[1].isdigit()): w.writerow(this_row) print("%s has been created" % csv_filename) except IOError: print("IOError: Please ensure %s exists" % excel_filename)
def test_NestedLoops_and_repeat(): run_file = str(input_dir.joinpath("CompleteBaangtWebdemo_nested.xlsx")) execute(run_file, globals_file=Path(input_dir).joinpath("globalsNoBrowser.json")) managedPaths = ManagedPaths() DATABASE_URL = os.getenv('DATABASE_URL') or 'sqlite:///' + str( managedPaths.derivePathForOSAndInstallationOption().joinpath( 'testrun.db')) new_file = folder_monitor.getNewFiles() assert new_file output_file = output_dir.joinpath(new_file[0][0]).as_posix() wb = xlrd.open_workbook(output_file) sheet1 = wb.sheet_by_name("Test_textarea2") sheet2 = wb.sheet_by_name("Test_textarea2.nested") assert sheet1.nrows == 3 assert sheet2.nrows == 3 TestRunSheet = wb.sheet_by_name("Summary") TestRunUUID = TestRunSheet.cell_value(8, 1) engine = create_engine(DATABASE_URL) Session = sessionmaker(bind=engine) s = Session() data = s.query(TestrunLog).get(uuid.UUID(TestRunUUID).bytes) assert "textarea2" in json.loads(data.RLPJson)
def getData(file_pth=None): if file_pth is not None: xf = xlrd3.open_workbook(file_pth) return_data = [] for id_sh in range(xf.nsheets): sh = xf.sheet_by_index(id_sh) x = sh.col_values(0)[1:] y = sh.col_values(1)[1:] vx_comp = sh.col_values(8)[1:] vy_comp = sh.col_values(9)[1:] current_data = { "x": x, "y": y, "vx_comp": vx_comp, "vy_comp": vy_comp } return_data.append(current_data) return return_data else: print("Plse input the xls-path")
def getCellAsInt(self, header, row): '''获取指定数据返回整形''' return int(self.getCell(header, row)) def getCellAsStrInt(self, header, row): '''获取指定数据返回字符串形式的整形''' return str(self.getCellAsInt(header, row)) def getStartRow(self): '''获取开始行数''' return self._startRow def getEndRow(self): '''获取结束行数''' return self._endRow def getRange(self): '''获取行数范围''' return self._range if __name__ == '__main__': import xlrd3 configFile = 'E:\\手游文档\\配置表-录入\\技能填表.xls' excel = xlrd3.open_workbook(configFile) sheet = excel.sheet_by_name('天赋') configSheet = ConfigSheet(sheet) for i in configSheet.getRange(): print (i, configSheet.getCell('name', i))
def getCellValue(sheetIndex, rowIndex, colIndex, xlsFilePath): workBook = xlrd3.open_workbook(xlsFilePath) table = workBook.sheets()[sheetIndex] return table.cell(rowIndex, colIndex).value # 或者table.row(0)[0].value或者table.col(0)[0].value
# Created: 03.12.2010 # Copyright (C) 2010, Manfred Moitzi # License: GPLv3 import sys import os import unittest from datetime import datetime, date, time import xlrd3 from xlrd3 import xfconst def from_tests_dir(filename): return os.path.join(os.path.dirname(os.path.abspath(__file__)), filename) book = xlrd3.open_workbook(from_tests_dir('profiles.xls'), formatting_info=True) sheet = book.sheet_by_name('PROFILEDEF') class TestCellValues(unittest.TestCase): def test_string_cell(self): cell = sheet.cell(0, 0) self.assertEqual(cell.ctype, xlrd3.XL_CELL_TEXT) self.assertEqual(cell.value, 'PROFIL') self.assertTrue(cell.has_xf) def test_number_cell(self): cell = sheet.cell(1, 1) self.assertEqual(cell.ctype, xlrd3.XL_CELL_NUMBER) self.assertEqual(cell.value, 100) self.assertTrue(cell.has_xf)
col2 = 1 header = xlwt.easyxf('font: bold 1, color white; ' 'alignment: horizontal left, vertical center; ' 'pattern: pattern solid, fore_colour green;') # Creation of workbook and sheet "DATAS" fichier = xlwt.Workbook() sheet1 = fichier.add_sheet("DATAS") sheet1.write(1, col2, "Date", header) # Loop to fill the file for root, dirs, files, in pbar(os.walk(chemin)): xlsfiles = [_ for _ in files if _.endswith('.xlsx')] for xlsfile in xlsfiles: wb = xlrd3.open_workbook(os.path.join(root, xlsfile)) date = re.split("[._]", xlsfile)[0] objet = re.split("[_.]", xlsfile)[1] auteur = re.split("[_.]", xlsfile)[-2] sheet = wb.sheet_by_index(0) empty_cell = False num_cols = sheet.ncols num_rows = sheet.nrows for row_index in range(row, num_rows): # set count empty cells count_empty = 0 # print('Ligne: {}'.format(row_index)) for col_index in range(col): # get cell value data = sheet.cell_value(row_index, col)
def openxls(): excel = xlrd3.open_workbook("xls/info_fight_monster.xls"); base = excel.sheet_by_name("base"); monster = excel.sheet_by_name("skill"); prize = excel.sheet_by_name("prize"); return (base, monster, prize);
name = name.replace(" ", " ") name = name.replace(" ", " ") name = re.sub("^ *", "", name) name = re.sub(" *$", "", name) if len(name) == 0: print("!!!", orig) return name def int_or_string(t): try: return int(t) except: pass return str(t) wb = open_workbook("2011_trafic_annuel.xls") sheet = wb.sheets()[0] for row in range(sheet.nrows): if sheet.cell(row, 2).value != "Métro": continue name = clean(sheet.cell(row, 3).value) if len(name) == 0: continue trafic = sheet.cell(row, 4).value rank = sheet.cell(row, 1).value lines = [sheet.cell(row, i).value for i in range(5, 10)] stations[name] = {"trafic": trafic, "lines": [int_or_string(l) for l in lines if l != 0 and len(str(l)) > 0 and str(l)[0].isdigit()], "rank": rank} import csv reader = csv.reader(open("ratp_arret_graphique.csv", newline=""), delimiter='#')
def main (dir, cache_file): fna = re.compile ("([0-9]+).+\.fna$") xls = re.compile ("\.xls$") mapper = re.compile ("\.mapper$") xlsx = re.compile ("^[^\.]+\.xlsx$") taginname = re.compile ("^([A-Z][0-9]+)\ ([0-9]+)") if os.path.exists(cache_file): with open (cache_file, "r") as fh: list_of_fna = json.load (fh) else: list_of_fna = {} for root, dirs, files in os.walk(dir): haz_xls = False haz_csv = True xls_map = {} csv_map = {} locallist = {} for file in files: ma = fna.search (file) if ma is not None: base_path = join(root, splitext(file)[0]); if base_path not in list_of_fna and isfile ( base_path + ".qual"): pat = taginname.search (file) if pat is not None: locallist[base_path] = [pat.group(1), datetime.datetime.strptime(pat.group(2),"%m%d%Y")] #print(locallist[base_path]) else: locallist[base_path] = int(ma.group(1)) #print (base_path) else: if xls.search (file) is not None: #print ("\n\n\nHAZ excel! %s %s" % (root,file) ) try: workbook = xlrd3.open_workbook (join (root, file)) info = workbook.sheet_by_name('Sheet1') header_row = info.row_values(0) mbn_id = header_row.index('MBN') sample_date = header_row.index('Sample Date') sample_well = header_row.index('Sample Well') for rownum in range(1,info.nrows): da_row = info.row_values(rownum) xls_map [int (da_row[sample_well])] = [da_row[mbn_id], minimalist_xldate_as_datetime (da_row[sample_date], 1)] except: pass haz_xls = True if mapper.search (file) is not None: #print ("\n\n\nHAZ! %s %s" % (root,file) ) try: data_reader = csv.reader(open(join (root, file), 'r')) for row in data_reader: if len (row) == 3: csv_map[int(row[0])] = [row[1],datetime.datetime.strptime(row[2],"%m/%d/%Y")] else: raise BaseException ("FAIL") #print (csv_map) except: raise haz_csv = True done_by_map = False for k,l in [(haz_xls, xls_map), (haz_csv, csv_map)]: if k and len (l) == len (locallist): for fname, val in locallist.items(): locallist[fname] = l[val] list_of_fna.update (locallist) done_by_map = True break if not done_by_map: for f, v in locallist.items (): if isinstance (v, list): list_of_fna [f] = v else: print ("Unknown sample %s" % f) dthandler = lambda obj: obj.isoformat() if isinstance(obj, datetime.datetime) else None with open (cache_file, "w") as fh: json.dump (list_of_fna, fh, default=dthandler, sort_keys=True, indent=4) return 0
# Created: 03.12.2010 # Copyright (C) 2010, Manfred Moitzi # License: GPLv3 import sys import os import unittest import xlrd3 def from_tests_dir(filename): return os.path.join(os.path.dirname(os.path.abspath(__file__)), filename) book = xlrd3.open_workbook(from_tests_dir("profiles.xls"), formatting_info=True) sheet = book.sheet_by_name("PROFILEDEF") class TestCell(unittest.TestCase): def test_string_cell(self): cell = sheet.cell(0, 0) self.assertEqual(cell.ctype, xlrd3.XL_CELL_TEXT) self.assertEqual(cell.value, "PROFIL") self.assertTrue(cell.xf_index > 0) def test_number_cell(self): cell = sheet.cell(1, 1) self.assertEqual(cell.ctype, xlrd3.XL_CELL_NUMBER) self.assertEqual(cell.value, 100) self.assertTrue(cell.xf_index > 0)
# Author: mozman -- <*****@*****.**> # Purpose: test formula (inspired by sjmachin) # Created: 21.01.2011 # Copyright (C) , Manfred Moitzi # License: GPLv3 import os import sys import unittest import xlrd3 def from_tests_dir(filename): return os.path.join(os.path.dirname(os.path.abspath(__file__)), filename) book = xlrd3.open_workbook(from_tests_dir('formula_test_sjmachin.xls')) sheet = book.sheet_by_index(0) class TestFormulas(unittest.TestCase): def get_value(self, col, row): return ascii(sheet.col_values(col)[row]) def test_is_opened(self): self.assertIsNotNone(book) def test_cell_B2(self): self.assertEqual(self.get_value(1, 1), r"'\u041c\u041e\u0421\u041a\u0412\u0410 \u041c\u043e\u0441\u043a\u0432\u0430'") def test_cell_B3(self): self.assertEqual(self.get_value(1, 2), '0.14285714285714285')
# Author: mozman <*****@*****.**> # Purpose: test cell formats # Created: 03.12.2010 # Copyright (C) 2010, Manfred Moitzi # License: GPLv3 import sys import os import unittest import xlrd3 def from_tests_dir(filename): return os.path.join(os.path.dirname(os.path.abspath(__file__)), filename) book = xlrd3.open_workbook(from_tests_dir('Formate.xls'), formatting_info=True) class TestCellContent(unittest.TestCase): def test_text_cells(self): sheet = book.sheet_by_name('Blätt1') for row, name in enumerate(['Huber', 'Äcker', 'Öcker']): cell = sheet.cell(row, 0) self.assertEqual(cell.ctype, xlrd3.XL_CELL_TEXT) self.assertEqual(cell.value, name) self.assertTrue(cell.xf_index > 0) def test_date_cells(self): sheet = book.sheet_by_name('Blätt1') # see also 'Dates in Excel spreadsheets' in the documentation # convert: xldate_as_tuple(float, book.datemode) -> (year, month, # day, hour, minutes, seconds)
def get_sheet(self): ''' 跟进文件路径及表格名称获取表格对象 ''' work_book = xlrd3.open_workbook(self.excel_file_path) sheet = work_book.sheet_by_name(self.sheet_name) return sheet
def processFIle(metfile,keywordfile,outputfile): """ searches for keywords in filenames that are suspicious regarding pedo content""" regexp = re.compile("!|[]|]|;|\?|\(|\)|-|_|\.|\,") wb = open_workbook(metfile) book = Workbook() sheet1=book.add_sheet('Found') sheet2=book.add_sheet("NOT FOUND") s=wb.sheets()[0] sheet = wb.sheet_by_index(0) #input sheet book = Workbook() sheet1=book.add_sheet('Found') sheet2=book.add_sheet("NOT FOUND",cell_overwrite_ok=True) #print ("number of roews",s.nrows) hashCalcprog=[] filesize=[] knowndotMedHashes=[] lastpostedUTC=[] lastsharedUTC=[] requestsAccepted=[] bytesUploaded=[] filenames=[] keywords=[] with open(keywordfile) as f: # Returns a file object keywords=[line.replace('\n','').encode('utf8') for line in f] # Invokes readline() method on file k=0 sheet1.write(k,0,'Α/Α') sheet1.write(k,1,'keyword') for col in range(sheet.ncols): sheet1.write(k,col+2,sheet.cell_value(0,col) ) for i in range(sheet.nrows):#read hashes of met xls file if i>1: Found=False knowndotMedHashes.append(sheet.cell_value(i,17)) filename=str(sheet.cell_value(i,0)).encode('utf8') filenames.append(filename) filesize=sheet.cell_value(i,2) lastpostedUTC=sheet.cell_value(i,5) lastsharedUTC=sheet.cell_value(i,6) requestsAccepted=sheet.cell_value(i,8) bytesUploaded=sheet.cell_value(i,9) for term in regexp.sub(' ',filename.decode()).split(' '): if term.encode('utf8').lower() in keywords: print ("FOUND",term.encode('utf8').lower(),term.encode('utf8').lower() in keywords,type(term.encode('utf8').lower())) k+=1 sheet1.write(k,0,k) sheet1.write(k,1,term) for col in range(sheet.ncols): sheet1.write(k,col+2,sheet.cell_value(i,col) ) Found=True break if not Found: sheet2.write(i,0,i) for col in range(sheet.ncols): sheet2.write(i,col+1,sheet.cell_value(i,col) ) book.save(outputfile)
def check_file(fname, verbose, do_punc=False, fmt_info=0, encoding='ascii', onesheet=''): print() print (fname) if do_punc: checker = ispunc else: checker = None try: book = open_workbook(fname, formatting_info=fmt_info, on_demand=True) except TypeError: try: book = open_workbook(fname, formatting_info=fmt_info) except TypeError: # this is becoming ridiculous book = open_workbook(fname) totold = totnew = totnotnull = 0 if onesheet is None or onesheet == "": shxrange = range(book.nsheets) else: try: shxrange = [int(onesheet)] except ValueError: shxrange = [book.sheet_names().index(onesheet)] for shx in shxrange: sheet = book.sheet_by_index(shx) ngoodrows = number_of_good_rows(sheet, checker) ngoodcols = number_of_good_cols(sheet, checker, nrows=ngoodrows) oldncells = sheet.nrows * sheet.ncols newncells = ngoodrows * ngoodcols totold += oldncells totnew += newncells nnotnull = 0 sheet_density_pct_s = '' if verbose >= 2: colxrange = range(ngoodcols) for rowx in xrange(ngoodrows): rowtypes = sheet.row_types(rowx) for colx in colxrange: if rowtypes[colx] not in null_cell_types: nnotnull += 1 totnotnull += nnotnull sheet_density_pct = (nnotnull * 100.0) / max(1, newncells) sheet_density_pct_s = "; den = %5.1f%%" % sheet_density_pct if verbose >= 3: # which rows have non_empty cells in the right-most column? lastcolx = sheet.ncols - 1 for rowx in xrange(sheet.nrows): cell = sheet.cell(rowx, lastcolx) if cell.ctype != XL_CELL_EMPTY: print "%s (%d, %d): type %d, value %r" % ( cellname(rowx, lastcolx), rowx, lastcolx, cell.ctype, cell.value) if (verbose or ngoodrows != sheet.nrows or ngoodcols != sheet.ncols or (verbose >= 2 and ngoodcells and sheet_density_pct < 90.0) ): if oldncells: pctwaste = (1.0 - float(newncells) / oldncells) * 100.0 else: pctwaste = 0.0 shname_enc = safe_encode(sheet.name, encoding) print "sheet #%2d: RxC %5d x %3d => %5d x %3d; %4.1f%% waste%s (%s)" \ % (shx, sheet.nrows, sheet.ncols, ngoodrows, ngoodcols, pctwaste, sheet_density_pct_s, shname_enc) if hasattr(book, 'unload_sheet'): book.unload_sheet(shx) if totold: pctwaste = (1.0 - float(totnew) / totold) * 100.0 else: pctwaste = 0.0 print "%d cells => %d cells; %4.1f%% waste" % (totold, totnew, pctwaste)
def test_open_workbook(self): book = xlrd3.open_workbook(from_tests_dir('profiles.xls'))
def extract_meta(self, filename): self.filename = filename self.book = xlrd3.open_workbook(filename)
import xlrd3 import os workbook = xlrd3.open_workbook( os.path.join(os.path.dirname(__file__), '..', 'data', 'TestData.xlsx')) sheet = workbook.sheet_by_name('testcase01') # merged_cells 获取当前表格所有合并单元格的位置信息 ,返回一个列表 def get_cell_merge_values(row_index, col_index): cell_value = None merged = sheet.merged_cellsSheet1 for (rlow, rhigh, clow, chigh) in merged: # 遍历表格中所有合并单元格位置信息 if (row_index >= rlow and row_index < rhigh): # 行坐标判断 if (col_index >= clow and col_index < chigh): # 列坐标判断 # 如果满足条件,就把合并单元格第一个位置的值赋给其它合并单元格 cell_value = sheet.cell_value(rlow, clow) break else: cell_value = sheet.cell_value(row_index, col_index) else: cell_value = sheet.cell_value(row_index, col_index) return cell_value # # for i in range(0,11): # for j in range(0,3): # print(get_cell_merge_values(i,j),end=' ') # print() # excel_list_data = []
def test_merged_cells(self): book = xlrd3.open_workbook(from_tests_dir("xf_class.xls"), formatting_info=True) sheet3 = book.sheet_by_name("table2") row_lo, row_hi, col_lo, col_hi = sheet3.merged_cells[0] self.assertEqual(sheet3.cell(row_lo, col_lo).value, "MERGED") self.assertEqual((row_lo, row_hi, col_lo, col_hi), (3, 7, 2, 5))