def main(): word1 = input("要搜索的职业:") word = urllib.request.quote(word1) df_all = pd.DataFrame() for page in range(1, 20): print(f'正在读取第{page}页的数据') df = get_all(word, page) df_all = df_all.append(df) df_all.to_excel('猎聘-{}.xlsx'.format(word1), encoding='utf_8_sig', index=False) print(f'已读取总共{page}页的数据') sf = StyleFrame.read_excel('猎聘-{}.xlsx'.format(word1)) print('data read successful!') #sf.set_column_width(columns=['公司名','岗位信息'],width=200) sf.set_column_width_dict(col_width_dict={ "公司名": 20, "岗位信息": 100, "招聘岗位": 20, '申请网址': 30 }) ew = StyleFrame.ExcelWriter('猎聘-{}.xlsx'.format(word1)) sf.to_excel(ew) ew.save() print("data updated successful!")
def test_read_excel_no_style(self): self.export_and_get_default_sheet(save=True) sf_from_excel = StyleFrame.read_excel(TEST_FILENAME) # making sure content is the same self.assertTrue( all( list(self.sf[col]) == list(sf_from_excel[col]) for col in self.sf.columns))
def test_read_excel_with_style_comments_styler_objects(self): self.export_and_get_default_sheet(save=True) sf_from_excel = StyleFrame.read_excel(TEST_FILENAME, read_style=True, read_comments=True) # making sure content is the same self.assertTrue(all(list(self.sf[col]) == list(sf_from_excel[col]) for col in self.sf.columns)) rows_in_excel = sf_from_excel.data_df.itertuples() rows_in_self = self.sf.data_df.itertuples() # making sure styles are the same self.assertTrue(all(excel_cell.style == self_cell.style for row_in_excel, row_in_self in zip(rows_in_excel, rows_in_self) for excel_cell, self_cell in zip(row_in_excel[1:], row_in_self[1:])))
def test_read_excel_with_string_sheet_name(self): self.export_and_get_default_sheet(save=True) sf_from_excel = StyleFrame.read_excel(TEST_FILENAME, read_style=True, sheet_name='Sheet1', use_openpyxl_styles=True) # making sure content is the same self.assertTrue(all(list(self.sf[col]) == list(sf_from_excel[col]) for col in self.sf.columns)) rows_in_excel = sf_from_excel.data_df.itertuples() rows_in_self = self.sf.data_df.itertuples() # making sure styles are the same self.assertTrue(all(self_cell.style == Styler.from_openpyxl_style(excel_cell.style, []) for row_in_excel, row_in_self in zip(rows_in_excel, rows_in_self) for excel_cell, self_cell in zip(row_in_excel[1:], row_in_self[1:])))
def test_read_excel_style(self): self.apply_headers_style() self.export_and_get_default_sheet(save=True) sf_from_excel = StyleFrame.read_excel(TEST_FILENAME, read_style=True) # making sure content is the same self.assertTrue(all(list(self.sf[col]) == list(sf_from_excel[col]) for col in self.sf.columns)) rows_in_excel = sf_from_excel.data_df.itertuples() rows_in_self = self.sf.data_df.itertuples() # making sure styles are the same self.assertTrue(all(excel_cell.value == self_cell.value for row_in_excel, row_in_self in zip(rows_in_excel, rows_in_self) for excel_cell, self_cell in zip(row_in_excel, row_in_self)))
def group_by_color( filepath, row_height, column_width, include_sources, output_name, ): from StyleFrame import StyleFrame, Styler, utils import numpy as np import pandas as pd if not filepath.endswith(".xlsx"): filepath += ".xlsx" sf = StyleFrame.read_excel(filepath, read_style=True) color_groupings = {} for c_index, c in enumerate(sf.columns): if "Cues" in c.value: for value in sf[c]: if type(value.value) is not str and np.isnan(value.value): continue if value.style.bg_color + "_value" not in color_groupings: color_groupings[value.style.bg_color + "_value"] = [] if include_sources and ( value.style.bg_color + "_source" not in color_groupings ): color_groupings[value.style.bg_color + "_source"] = [] color_groupings[value.style.bg_color + "_value"].append( value.value ) if include_sources: color_groupings[value.style.bg_color + "_source"].append( sf.iloc[0,c_index - 1].value ) # colors of our stuff for k,length in zip(color_groupings.keys(), map(len, color_groupings.values())): print( "color", k + ":", length, "entries") # pad data so it is all the same size maxlen = max(map(len, color_groupings.values())) for k,v in color_groupings.items(): color_groupings[k] = v + ['']*(maxlen - len(v)) print("old length:", len(v), "|| new length:", len(color_groupings[k])) # add padded data to df, print 'er out new_df = pd.DataFrame(color_groupings) new_df defaults = {'font': utils.fonts.calibri, 'font_size': 9} new_sf = StyleFrame(new_df, styler_obj=Styler(**defaults)) for c in new_sf.columns: # color to use color = c.value.strip("_source").strip("_value") # apply style to ALL values (incl headers) new_sf.apply_column_style( cols_to_style=[c.value], styler_obj=Styler(bold=True, font_size=10, bg_color=color, font_color=utils.colors.white), style_header=True, ) # revert style for non-headers new_sf.apply_column_style( cols_to_style=[c.value], styler_obj=Styler(**defaults), style_header=False ) # row height all_rows = new_sf.row_indexes new_sf.set_row_height_dict( row_height_dict={ all_rows[0]: 24, all_rows[1:]: row_height } ) # col width all_cols = tuple(map(lambda x: x.value, new_sf.columns)) new_sf.set_column_width_dict( col_width_dict={ all_cols: column_width } ) # save to excel file if not output_name.endswith(".xlsx"): output_name += ".xlsx" new_sf.to_excel(output_name).save() return 0
def load_training_data(excel_file): sf = StyleFrame.read_excel(excel_file, sheet_name='Training Samples', read_style=True, use_openpyxl_styles=False) return StyleFrame(sf.applymap(get_classes_from_colors)).data_df[0:20].iloc[:, 1:].to_numpy(dtype=np.str)
from StyleFrame import StyleFrame import pandas as pd sf = StyleFrame.read_excel('FreeEnglishTextbooks.xlsx', read_style=True, use_openpyxl_styles=False) ls = [] for row in sf.row_indexes: try: if sf.loc[row, 'Book Title'].style.bold: d = { 'Book Title': sf.loc[row, 'Book Title'].value, 'URL': sf.loc[row, 'OpenURL'].value } ls.append(d) except: pass pd.DataFrame(ls).to_excel('filtered-books.xlsx')
currFile.write(filterToAppend + "\r\n") print("This filter is saved to: " + filename) def only_cells_with_green_background(cell): #print(cell.value, cell.style.bg_color) #return cell if cell.style.bg_color in {utils.colors.green, 'FF93C47D'} else np.nan return cell.style.bg_color def checkVerified(): return sf = StyleFrame.read_excel(path, sheet_name=SheetName, read_style=True, use_openpyxl_styles=False) sf_bg = StyleFrame(sf.applymap(only_cells_with_green_background)) #print(sf_bg) df = pd.read_excel(path, SheetName) #print(df) #sys.exit() for idx, x in enumerate(df['Suggested filter (to be reviewed)']): # Verified : if the background cell color of filter is Green or the "New filter" column is filled. # isVerified= df['Verified'][idx] # if isVerified == 'x': # print(str(idx)+ ". Passed unverified filter.") # continue has_green_bg = False
# import openpyxl # from openpyxl import load_workbook # # workbook = load_workbook('Visão Geral Iniciativas_v2.xlsm') # worksheet = workbook.get_active_sheet() # # ws_range = worksheet.range('A1:Z20') # for row in ws_range: # print(row) # # # # # # # print(sh['A2'].fill.start_color.index) from StyleFrame import StyleFrame, Styler, utils # sf = StyleFrame.read_excel('teste.xlsx', sheet_name='Planilha1', read_style=True) # print(sf) # # # # sf = sf[[col for col in sf.columns # if col.style.fill.fgColor.rgb in ('FFFFFFFF', utils.colors.white)]] # # "white" can be represented as 'FFFFFFFF' or # # '00FFFFFF' (which is what utils.colors.white is set to) # print(sf)