def get_file(path,filename,filetype): if filetype == 'csv': try: return(readcsv(path,filename)) except: print('error in reading csv') else: if filetype == 'json': try: with open(f'{path}\\{filename}', 'r') as f: return(pd.DataFrame(js.load(f))) except: print('error in reading json') else: if filetype == 'xlsb': try: print('reading file...') df = [] with open_xlsb(f'{path}\\{filename}') as wb: with wb.get_sheet('Data') as sheet: for row in sheet.rows(): df.append([item.v for item in row]) print('reading completed ...') return(pd.DataFrame(df[1:], columns=df[0])) except: print('error in reading xlsb') else: print('file format not supported yet in packages')
def start_parse(self, file_path, data=None): """ Will first take all columns of the excel and will then read each row. Then it will create a dictionary for each row by zipping column names and row entries. Each row dict will be added to list, which in turn will be returned as a JSON. """ with open_xlsb(file_path) as wb: cols = [] data_frame = [] with wb.get_sheet(data) as sheet: for row in sheet.rows(): l = [item.v for item in row] if not cols: cols = l data_frame.append(l) data_frame = pd.DataFrame(data_frame[1:], columns=data_frame[0]) data = data_frame.to_dict("split")["data"] parsable = [] for d in data: for i in d: if str(i) == "nan" and math.isnan(i): return None parsable.append(dict(zip(cols, d))) return json.dumps(parsable)
def xlsb_export_csv(self,sheet_number,outputname): """ Enter the sheet number, and ouput file name in the format of .csv that you need to read and create a data frame, you can use itertions to upload multiple sheets. """ df =[] with open_xlsb(self.inputfilename) as wb: with wb.get_sheet(sheet_number) as sheet: for row in sheet.rows(): df.append([item.v for item in row]) df = pd.DataFrame(df[1:], columns=df[0]) df.to_csv(outputname)
def read_xlsb(self,sheet_number): """ Enter the sheet number that you need to read and create a data frame, you can use itertions to upload multiple sheets. """ df =[] with open_xlsb(self.inputfilename) as wb: with wb.get_sheet(sheet_number) as sheet: for row in sheet.rows(): df.append([item.v for item in row]) df = pd.DataFrame(df[1:], columns=df[0]) return df
def pandas_read_xlsb_file(filepath): """ https://stackoverflow.com/questions/45019778/read-xlsb-file-in-pandas-python Accepts a filepath - pathlib.Path object Reads an xlsb file into a dataframe Returns the dataframe """ dfs = [] with open_xlsb(filepath) as wb: with wb.get_sheet(1) as sheet: for row in sheet.rows(): dfs.append([item.v for item in row]) df = pd.DataFrame(dfs[1:], columns=dfs[0]) return df
def extract(macro_module): df = [] #read xlsb with open_xlsb('./test/TCMUR.xlsb') as wb: with wb.get_sheet(1) as sheet: for row in sheet.rows(): df.append([item.v for item in row]) #get week list wk_list = [] for y in range(len(df[1])): if df[13][y] is not None and df[13][y] != 'REGION' and df[13][ y] != 'COUNTRY' and df[13][y] != 'CENTER' and df[13][ y] != 'KPI': wk_list.append(df[13][y]) #transfer list to dataframe df = (pd.DataFrame(df[14:], columns=df[13])) #drop row and column where are all NaN data df.dropna(axis=0, how='all', inplace=True) df.dropna(axis=1, how='all', inplace=True) #extract value of area dfOnlyValue = df.iloc[:, 3:] apac = df['REGION'].values.tolist().index('APAC Rate') df_Value_APAC = dfOnlyValue[:apac] #extract country apac_country = [] for i in range(len(df['COUNTRY'])): if df['COUNTRY'][i] is not None and i < apac: apac_country.append(df['COUNTRY'][i]) #deal with problem of string for j in range(len(apac_country)): if apac_country[j] == "CEETE B'IVRRE": apac_country[j] = 'CEETE B\"IVRRE' #items kpi = ['Rate', 'U Qty', 'C Qty', 'Target'] #Region total rate apac_total = df.iloc[:, -1:][apac:apac + 1].values.tolist()[0][0] #insert APAC's Rate to mysql process.drawTCMUR(df_Value_APAC.values, wk_list, 'APAC', apac_country, kpi, macro_module, apac_total)
class xlsb : def __init__(self,inputfilename): self.inputfilename = inputfilename def read_xlsb(self,sheet_number): """ Enter the sheet number that you need to read and create a data frame, you can use itertions to upload multiple sheets. """ df =[] with open_xlsb(self.inputfilename) as wb: with wb.get_sheet(sheet_number) as sheet: for row in sheet.rows(): df.append([item.v for item in row]) df = pd.DataFrame(df[1:], columns=df[0]) return df def xlsb_export_csv(self,sheet_number,outputname): """ Enter the sheet number, and ouput file name in the format of .csv that you need to read and create a data frame, you can use itertions to upload multiple sheets. """ df =[] with open_xlsb(self.inputfilename) as wb: with wb.get_sheet(sheet_number) as sheet: for row in sheet.rows(): df.append([item.v for item in row]) df = pd.DataFrame(df[1:], columns=df[0]) df.to_csv(outputname) def xlsb_export_xlsx(self,sheet_number,outputname,sheet_name="Sheet1"): """ Enter the sheet number,output file name and sheet name that you need to read and create a data frame, you can use itertions to export multiple sheets. """ df =[] with open_xlsb(self.inputfilename) as wb: with wb.get_sheet(sheet_number) as sheet: for row in sheet.rows(): df.append([item.v for item in row]) df = pd.DataFrame(df[1:], columns=df[0]) df.to_excel(outputname, sheet_name=sheet_name)
def saveExcelRowsInArray(): global fileName, sheetName, fileExist, rowArray, rowNumber, helpNeeded, sqlStatement, tableName, columnNameArray, sysCliColumn, noOfRowsToCopy with open_xlsb(fileName) as wb: with wb.get_sheet(sheetName) as sheet: print('Reading rows of Excel sheet') print( 'This could take a while, depending on how many rows you are going to use in the SQL Statement' ) print('') for row in sheet.rows(sparse=True): if (not rowNumber == 0): #print("RowNumber != 0") if (rowNumber <= int(noOfRowsToCopy) ): # (51) This will take the first 50 rows ## Go Through columns and put them into an array inside another array ## This way, the data will be a lot more readable rowArray.append([item.v for item in row]) else: break rowNumber = rowNumber + 1
def get_data_hsbc(): file_name = foldername + "/" + "hsbc" + file_date + ".xlsb" wb = open_xlsb(file_name) data = list() sheets = len(wb.sheets) for i in range(2, sheets): sheet = wb.get_sheet(i) i = 0 for row in sheet.rows(): cells = list(row) if i == 2: scheme_name = cells[2][2] if i >= 6: if cells[2][2] != None: entry = { "Name of scheme": scheme_name, "Date": render_date(cells[1][2]), "REGULAR_Base_TER_perc": render_value(cells[2][2]), "REGULAR_Addnl_52_6a_b_exp_perc": render_value(cells[3][2]), "REGULAR_Addnl_52_6a_c_exp_perc": render_value(cells[4][2]), "REGULAR_GST_perc": render_value(cells[5][2]), "REGULAR_Total_ter_perc": render_value(cells[6][2]), "DIRECT_Base_TER_perc": render_value(cells[9][2]), "DIRECT_Addnl_52_6a_b_exp_perc": render_value(cells[10][2]), "DIRECT_Addnl_52_6a_c_exp_perc": render_value(cells[11][2]), "DIRECT_GST_perc": render_value(cells[12][2]), "DIRECT_Total_ter_perc": render_value(cells[13][2]) } data.append(entry) i += 1 return data
import pandas as pd import numpy as np from pyxlsb import open_workbook as open_xlsb df = [] Years = [ '2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014' ] for year in Years: with open_xlsb('WIOT' + year + '_Nov16_ROW.xlsb') as wb: with wb.get_sheet(1) as sheet: for row in sheet.rows(): df.append([item.v for item in row]) df = pd.DataFrame(df[1:]) matrix = df.values industries = list(df.iloc[2].unique()) industries.remove(None) industries.remove('(millions of US$)') industries.remove('Final consumption expenditure by households') industries.remove( 'Final consumption expenditure by non-profit organisations serving households (NPISH)' ) industries.remove('Final consumption expenditure by government') industries.remove('Gross fixed capital formation')
import numpy as np import pandas as pd from pyxlsb import open_workbook as open_xlsb df = [] with open_xlsb('AVS Dashboard - 2019.xlsb') as wb: with wb.get_sheet("Raw Data") as sheet: for row in sheet.rows(): df.append([item.v for item in row]) df1 = pd.DataFrame(df[1:], columns=df[0])
import os import pandas as pd from pyxlsb import open_workbook as open_xlsb os.chdir(r'C:\Users\paulo.roberto\Downloads\Plan PNT') path = (r'C:\Users\paulo.roberto\Downloads\Plan PNT') df = pd.DataFrame() for i in os.listdir(path): with open_xlsb(i).get_sheet('Plan PNT') as sheet: df1 = pd.DataFrame(sheet.v for v in sheet) df = df.append(df1) sheet.close() df1.to_excel("teste.xlsx")
# In[1]: import pandas as pd import numpy as np import matplotlib.pyplot as plt get_ipython().run_line_magic('matplotlib', 'inline') # In[2]: from pyxlsb import open_workbook as open_xlsb # In[204]: df = [] with open_xlsb('WIOT2000_Nov16_ROW.xlsb') as wb: with wb.get_sheet(1) as sheet: for row in sheet.rows(): df.append([item.v for item in row]) df = pd.DataFrame(df[1:]) matrix = df.values industries = list(df.iloc[2].unique()) industries.remove(None) industries.remove('(millions of US$)') industries.remove('Final consumption expenditure by households') industries.remove( 'Final consumption expenditure by non-profit organisations serving households (NPISH)' )
def get_sheet_names(file_path): """ Will return the list of sheet names this Excel is containing. """ with open_xlsb(file_path) as wb: return wb.sheets
#!/usr/bin/env python # coding: utf-8 # In[1]: import pandas as pd from pyxlsb import open_workbook as open_xlsb df1 = [] with open_xlsb( 'C:/Users/HP/Desktop/EXL EQ 2020/EXL_EQ_2020_Train_datasets.xlsb' ) as wb: with wb.get_sheet(1) as sheet: for row in sheet.rows(): df1.append([item.v for item in row]) df1 = pd.DataFrame(df1[1:], columns=df1[0]) # In[2]: import pandas as pd from pyxlsb import open_workbook as open_xlsb df2 = [] with open_xlsb('C:/Users/HP/Desktop/EXL EQ 2020/EXL_EQ_2020_Test_Datasets.xlsb' ) as wb: with wb.get_sheet(2) as sheet: for row in sheet.rows(): df2.append([item.v for item in row])
# In[261]: from pyxlsb import open_workbook as open_xlsb # In[262]: # load the dataset. df = [] with open_xlsb('WC_Data_Science_Case_Study.xlsb') as wb: with wb.get_sheet(1) as sheet: for row in sheet.rows(): df.append([item.v for item in row]) df = pd.DataFrame(df[1:], columns=df[0]) df.head() # In[263]: # list of column names. df.columns
""" if calendar.day_name[date.today().weekday()] == "Sunday": wk = date.today().isocalendar()[1] else: wk = date.today().isocalendar()[1] - 1 """ wk = 52 fold = "wk" + str(wk) # Importing raw data os.chdir("C:/Users/" + username + "/Desktop/AVS Dashboard") file_path = "AVS Dashboard - 2019.xlsb" df = [] with open_xlsb(file_path) as wb: with wb.get_sheet("Raw Data") as sheet: for row in sheet.rows(): df.append([item.v for item in row]) df = pd.DataFrame(df[1:], columns=df[0]) # Creating week folder on Desktop source_dir = "C:/Users/" + username + "/Desktop" os.chdir(source_dir) if not os.path.exists(fold): os.makedirs(fold) os.chdir(fold) # Tasks pending from more than 50 days df["Open Ageing"] = pd.to_numeric(df["Open Ageing"], errors="coerce")