def save(self, *args, **kwargs): super().save(*args, **kwargs) serial_number = 0 quantity = 0 price = 0 rw = 0 with open_workbook(self.file.url) as wb: with wb.get_sheet(1) as sheet: for row in sheet.rows(): if rw: for col in row: if (col.c == 0): serial_number = col.v elif (col.c == 1): quantity = col.v elif (col.c == 2): price = col.v Product.objects.create(serial_number=serial_number, quantity=quantity, price=price) rw += 1
def ExcelFile(path): # this is a generic function to read both xlsb file and xls file # for xlsb file we need a seperate module if ".xlsb" in path: print("reading xlsb file") wb = False ret = {} with open_workbook(path) as wb: sheets = wb.sheets i = 0 for sheet_name in sheets: with wb.get_sheet(sheet_name) as sheet: df = [] for row in sheet.rows(): df.append([item.v for item in row]) df = pd.DataFrame(df[1:], columns=df[0]) ret[sheet_name] = df ret[i] = df i += 1 ret["wb"] = wb ret["sheet_names"] = wb.sheets return ret else: try: return pd.ExcelFile(path) except Exception as e: print(e) raise Exception(e)
def save_model(self, request, obj, form, change): xls = form.cleaned_data['field'] qty = 0 total_price = 0 with open_workbook(xls) as wb: row_generator = wb.get_sheet(1).rows() row_generator.__next__() for row in row_generator: item_inventory = [item.v for item in row] onj_inventory = Inventory() onj_inventory.serial_number = item_inventory[0] onj_inventory.quantity = item_inventory[1] onj_inventory.price = item_inventory[2] onj_inventory.save() for cell in row: if cell.c == 1: qty += int(cell.v) elif cell.c == 2: total_price += float(cell.v) prom = total_price / qty # obj.calculate_field = json.dumps({'elementos': qty, 'precio promedio': prom}) obj.calculate_field = {'elementos': qty, 'precio promedio': prom} super().save_model(request, obj, form, change)
def summary(self, obj): my_json = {} elements = 0 total_price = 0 average_price = 0 try: with open_workbook(obj.file.url) as wb: with wb.get_sheet(1) as sheet: for row in sheet.rows(): for col in row: # Total elements ignoring first row as header if (col.c == 1 and col.r >= 1): elements = elements + col.v # Average price ignoring first row as header if (col.c == 2 and col.r >= 1): total_price = total_price + col.v average_price = total_price / elements my_json["elements"] = elements my_json["average_price"] = average_price except: print("Could not read File") return my_json
def __init__(self, file_name, file_type, **keywords): self._native_book = open_workbook(file_name) self._keywords = keywords self.content_array = [] for sheet_index, sheet_name in enumerate(self._native_book.sheets, 1): sheet = self._native_book.get_sheet(sheet_index) self.content_array.append(NamedContent(sheet_name, sheet))
def import_xlsb(file_path, file_meta): with open_workbook(file_path) as wb: with wb.get_sheet(file_meta['sheet_name']) as sheet: for row in sheet.rows(): row_number = row[0].r + 1 if row_number < file_meta['first_data_row']: continue raw_accident = { column_name: extract_value(row, column_number - 1, column_name) for ( column_name, column_number) in file_meta['columns_mapping'].items() if column_number } raw_accident['source_row_number'] = row_number for key in [ 'source_file', 'import_timestamp', 'source_file_hash' ]: raw_accident[key] = file_meta[key] try: create_accident_raw(raw_accident) except: print( f'{file_meta["source_file"]}:{row_number} failed to import' )
def extract_io_exchanges(sourcedir, version): activities = load_metadata("activities") products = load_metadata("products") dct = VERSIONS[version]["technosphere"] wb = pyxlsb.open_workbook(str(sourcedir / dct["filename"])) sheet = iter(wb.get_sheet(dct["worksheet"])) def next_row(sheet): return [o.v for o in next(sheet)] headers = [next_row(sheet)[5:] for _ in range(4)] # activity location assert headers[0] == [x[1] for x in activities] # activity names assert headers[1] == [x[2] for x in activities] with bz2.open(DATA_DIR / "hiot.csv.bz2", "wt", newline="") as compressed: writer = csv.writer(compressed) for row_index, row_raw in enumerate(sheet): if not row_index % 250: print("{} / {}".format(row_index, len(activities))) row = [o.v for o in row_raw] assert row[0] == products[row_index][1] assert row[1] == products[row_index][2] for col_index, value in enumerate(row[5:]): if value: writer.writerow((products[row_index][0], activities[col_index][0], value))
def iterload(self): from pyxlsb import open_workbook wb = open_workbook(str(self.source)) for name in wb.sheets: vs = wb.get_sheet(name, True) vs.reload() yield vs
def load_workbook(self, filepath_or_buffer: FilePathOrBuffer): from pyxlsb import open_workbook # TODO: hack in buffer capability # This might need some modifications to the Pyxlsb library # Actual work for opening it is in xlsbpackage.py, line 20-ish return open_workbook(filepath_or_buffer)
def read_xlsb(path: str) -> pd.DataFrame: """function to read in a xlsb format file and return it as a DataFrame""" data = [] with open_workbook(path) as wb: for sheetname in wb.sheets: with wb.get_sheet(sheetname) as sheet: for row in sheet.rows(): values = [r.v for r in row] # retrieving content data.append(values) return pd.DataFrame(data[1:], columns=data[0])
def convert_xlsb(workbook, worksheet, targetdir): wb = pyxlsb.open_workbook(workbook) sheet = wb.get_sheet(worksheet) with bz2.open(targetdir / (worksheet + ".csv.bz2"), "wt", newline="") as compressed: writer = csv.writer(compressed) for i, row in enumerate(sheet.rows()): writer.writerow([c.v for c in row]) if i and not i % 250: print(f"Row {i}")
def xlsb_to_csv(): newest = max(os.listdir(downloads), key=lambda f: os.path.getmtime("{}/{}".format(downloads, f))) with open_workbook(os.path.join(downloads, newest)) as wb, open( os.path.join(downloads, 'target.csv'), 'w') as out: writer = csv.writer(out, lineterminator='\n') # for sheetname in wb.sheets: sheet = wb.get_sheet(1) for row in sheet.rows(): values = [r.v for r in row] # retrieving content writer.writerow(values)
def convert_xlsb(workbook, worksheet, sourcedir, targetdir): wb = pyxlsb.open_workbook(workbook) sheet = wb.get_sheet(worksheet) directory = CONVERTED_DATA_DIR / Path(workbook).name.replace(".xlsb", "") directory.mkdir(mode=0o755, exist_ok=True) with bz2.open(directory / (worksheet + ".csv.bz2"), "wt", newline="") as compressed: writer = csv.writer(compressed) for i, row in enumerate(sheet.rows()): writer.writerow([c.v for c in row]) if i and not i % 250: print(f"Row {i}")
def convert_xlsb_to_csv(xlsb_file_name): print('Converting File : ' + xlsb_file_name) wb1 = open_workbook(xlsb_file_name) sheet1 = wb1.get_sheet(1) df = [] for row in sheet1.rows(sparse=True): df.append([item.v for item in row]) df = pd.DataFrame(df[1:], columns=df[0]) csv_file_name = xlsb_file_name.replace('xlsb', 'csv') print(csv_file_name) df.to_csv(csv_file_name, index=False) wb1.close()
def read_xlsb(input_file, sheet_name): book = pyxlsb.open_workbook(input_file) sheet = book.get_sheet(sheet_name) sheet_as_list = [] for row in sheet.rows(): row_for_list = [] for cell in row: row_for_list.append(cell.v) sheet_as_list.append(row_for_list) del book del sheet gc.collect() return sheet_as_list
def save_model(self, request, obj, form, change): js = [] with open_workbook(request.FILES['archivo']) as wb: with wb.get_sheet(1) as sheet: for row in sheet.rows(sparse=True): if row[0].r > 0: p = Inventario(serie=row[0].v, cantidad=row[1].v, precio=row[2].v) p.save() js.append({ "serie": row[0].v, "cantidad": row[1].v, "precio": row[2].v }) obj.my_json_field = js super(Test, self).save_model(request, obj, form, change)
def readxslb(fileName, sheet, cells): columnsRow = [] response = [] with open_workbook(fileName) as wb: with wb.get_sheet(sheet) as sheet: for row in sheet.rows(): for r in row: cellRow = { "row": r.r, "column": r.c, "value": r.v } columnsRow.append(cellRow) for cell in cells: print(cell) findCell = list(filter(lambda cellRow: (cellRow['row'] == cell['row'] and cellRow['column'] == cell['column']), columnsRow)) response.append(findCell[0]) return response
def read_xlsb(filepath, worksheet, pbar_total=None): wb = pyxlsb.open_workbook(str(filepath)) sheet = wb.get_sheet(worksheet) if not pbar_total: return [[o.v for o in row] for row in sheet.rows()] else: def iterate_rows(sheet, bar): for row in sheet.rows(): bar.update() yield row print("Loading file: ", filepath) bar = pyprind.ProgBar(pbar_total) data = [[o.v for o in row] for row in iterate_rows(sheet, bar)] print(bar) return data
def get_all_sheet(self, path): sheetnames = [] if path.endswith('xlsb'): wb = open_workbook(path) sheetnames = [ name for name in wb.sheets if name not in ['ห้ามลบ', 'List Item', 'Operator'] ] wb.close() else: wb = load_workbook(filename=path, read_only=True) for name in wb.sheetnames: if name not in ['ห้ามลบ', 'List Item', 'Operator']: sheetnames.append(name) wb.close() return sheetnames
def __init__(self, fileLocation): self.fileLocation = fileLocation if "xlsb" in fileLocation: self.fileType = XLFileType.xlsb elif "xlsm" in fileLocation: self.fileType = XLFileType.xlsm elif "xlsx" in fileLocation: self.fileType = XLFileType.xlsx elif "xls" in fileLocation: self.fileType = XLFileType.xls else: self.fileType = None if self.fileType == XLFileType.xlsb: self.workbookXLSB = pyxlsb.open_workbook(self.fileLocation) self.sheets = self.workbookXLSB.sheets else: self.workbookXLSXM = xlrd.open_workbook(self.fileLocation) self.sheets = self.workbookXLSXM.sheet_names()
def process_data(self): elements = 0 total_price = 0 count = 0 wb = open_workbook(self.cleaned_data['file'].file) with wb.get_sheet(1) as sheet: for row in sheet.rows(): if (isinstance(row[0].v, str) and isinstance(row[1].v, (float, int)) and isinstance(row[2].v, (float, int))): serial_number = row[0].v quantity = int(row[1].v) price = row[2].v try: Inventory.objects.create(serial_number=serial_number, quantity=quantity, price=price) elements += quantity total_price += price count += 1 except IntegrityError: Inventory.objects.filter( serial_number=serial_number).update( quantity=quantity, price=price) elements += quantity total_price += price count += 1 except Exception as e: print('EXCEPTION', str(e)) logger.error( "{}: Row with values ({}, {}, {}) from File ({}) \ was not created or updated".format( datetime.now(), serial_number, quantity, price, self.cleaned_data['file']._name)) else: logger.error( "{}: Row with values ({}, {}, {}) from File ({}) does \ not comply with data types".format( datetime.now(), row[0].v, row[1].v, row[2].v, self.cleaned_data['file']._name)) return {'elements': elements, 'average_price': total_price / count}
def select_item(event): wb1 = (listbox1.get(listbox1.curselection())) ww = wb1.split('.')[0] def window_enum_handler(hwnd, resultList): if win32gui.IsWindowVisible( hwnd) and win32gui.GetWindowText(hwnd) != '': resultList.append((hwnd, win32gui.GetWindowText(hwnd))) def get_app_list(handles=[]): mlst = [] win32gui.EnumWindows(window_enum_handler, handles) for handle in handles: mlst.append(handle) return mlst appwindows = dict(get_app_list()) for value in appwindows.values(): if ww in value: ww2 = value ww1 = win32gui.FindWindow(None, ww2) win32gui.SetForegroundWindow(ww1) if (wb1[-1] != 'b' and wb1[-1] != 'B'): t = xw.books(wb1) ttt = t.__str__().replace('<Book ', '').replace('>', '') kk = '<Sheet ' + ttt ss = xw.sheets ss1 = pd.DataFrame(ss) ss1[0] = ss1[0].astype('str').str[0:-1] ss1 = ss1[0].tolist() ss1 = ss1.__str__().replace('Sheets([', '').replace('])', '').replace(kk, '') \ .replace("['", '').replace("']", '').replace("', '", ',') ss1 = list(ss1.split(',')) listbox2.delete('0', 'end') for i in ss1: listbox2.insert(END, i) else: t = xw.books(wb1) listbox2.delete('0', 'end') with open_workbook(t.fullname) as wb: for sheetname in wb.sheets: listbox2.insert(END, sheetname)
def get_worksheet(excel_file, worksheet_name=False): """ Returns an OpenPyxl (or pyxlsb) worksheet object as named in worksheet name. If worksheet_name is not specified, the first worksheet in the workbook is returned. :param excel_file: the filename for a Microsoft Excel (xls, or xlsx) file. :param worksheet_name: The name of a worksheet (tab) in the workbook. If omitted, the first worksheet is returned. :return: An OpenPyxl worksheet object. """ worksheet = [] file_name, file_extension = os.path.splitext(excel_file) if os.path.exists(excel_file): if file_extension.lower() == ".xlsb": try: wb = open_workbook(excel_file) except Exception as e: print("BAD File: {}".format(excel_file)) print(e) else: if worksheet_name: worksheet = wb.get_sheet(worksheet_name) else: worksheet = wb.get_sheet(1) else: try: wb = load_workbook(filename=excel_file, data_only=True, read_only=True) except zipfile.BadZipFile: print("Bad Zip File") print(excel_file) else: worksheet = wb.worksheets[0] if worksheet_name: for ws in wb.worksheets: if ws.title == worksheet_name: worksheet = ws break return worksheet
def get_summary(file_field, save_inventary): total_cant = 0 total_price = 0 cant_rows = 0 with open_workbook(file_field) as wb: with wb.get_sheet(1) as sheet: for row in sheet.rows(): serial_name = row[0][2] cant = row[1][2] price = row[2][2] if (serial_name is not None and cant is not None and price is not None): save_inventary(int(serial_name), int(cant), int(price)) cant_rows += 1 total_cant += cant total_price += price else: break average = total_price / cant_rows return [cant_rows, int(total_cant), average]
def xlsbToXlsx(path_in, path_out, separate_sheets=False, print_interval=1000): print('parsing: ') print(path_in) print(path_out) if separate_sheets: print('Saving in differents sheets') with open_workbook(path_in) as wb: wbx = Workbook() for s in wb.sheets: wsx = wbx.create_sheet(s) print('Reading Sheet ', s) with wb.get_sheet(s) as sheet: row_count = 0 # vrow = [0 for x in range(sheet.dimension.w)] sheet = list(sheet) for row in sheet: # print(sheet.index(row)) if (row_count % print_interval == 0): print('Reading row: ', row_count) row_count += 1 for c in row: wsx.cell(row=1 + c.r, column=1 + c.c).value = c.v if (separate_sheets): print('Saving partials...') removeEmptySheet(wb, wbx) wbx.save(path_out + "." + s + ".xls") wbx = Workbook() print('Saving Completed.') try: if (~separate_sheets): print('... Saving: ', path_out) removeEmptySheet(wb, wbx) wbx.save(path_out + '.xls') print('Sucess!') except Exception as ex: print(ex) print('Saving Failed')
def list_worksheets(excel_file): sheets = [] file_name, file_extension = os.path.splitext(excel_file) if os.path.exists(excel_file): if file_extension.lower() == ".xlsb": try: wb = open_workbook(excel_file) except Exception as e: print("BAD File: {}".format(excel_file)) print(e) else: sheets = wb.sheets else: try: wb = load_workbook(filename=excel_file, data_only=True, read_only=True) except zipfile.BadZipFile: print("Bad Zip File") print(excel_file) else: sheets = wb.sheetnames return sheets
def xlsbToCSV(path_in, path_out, sep=";", print_interval=1000, date_columns=[]): print('parsing to csv..') row_count = 0 with open_workbook(path_in) as wb: for s in wb.sheets: with wb.get_sheet(s) as sheet: print('reading sheet', s) with open(path_in.replace('.xlsb', '.' + s) + '.csv', 'w', encoding='utf-8') as csvfile: spamwriter = csv.writer(csvfile, delimiter=sep, quotechar='"', quoting=csv.QUOTE_MINIMAL) sheet = list(sheet) for row in sheet: if (row_count % print_interval == 0): print('Reading row: ', row_count) row_count += 1 spamwriter.writerow([c.v for c in row])
from pyxlsb import open_workbook from console_progressbar import ProgressBar pb = ProgressBar(total=100, prefix='Status', suffix='', decimals=2, length=50, fill='X', zfill='-') with open_workbook('data_import/SortedVertices.xlsx') as wb: count_sheets = len(wb.sheets) result = [] for sh_i, sh_name in enumerate(wb.sheets): with wb.get_sheet(sh_name) as sheet: for i, row in enumerate(sheet.rows()): result.append(row[2].v) pb.print_progress_bar(((sh_i + 1) * 100) / count_sheets) print() print(result[:10], result[-1], sep=' - ') # for key, value in dct_adjacent.items(): # if len(value) > 1: # count_adjacent += 1 # print(key, '-', end=' ') # [print(x, end=' ') for x in value] # print() # print('Всего листов:', count_sheets)
import sys import time from pyxlsb import open_workbook a = time.time() print('Opening workbook... ', end='', flush=True) with open_workbook(sys.argv[1]) as wb: d = time.time() - a print('Done! ({} seconds)'.format(d)) for s in wb.sheets: print('Reading sheet {}... '.format(s), end='', flush=True) a = time.time() with wb.get_sheet(s) as sheet: for row in sheet: pass d = time.time() - a print('Done! ({} seconds)'.format(d))
def __init__(self, full_path): self.workbook = pyxlsb.open_workbook(full_path)