Exemple #1
0
def main():
    word1 = input("要搜索的职业:")
    word = urllib.request.quote(word1)
    df_all = pd.DataFrame()

    for page in range(1, 20):
        print(f'正在读取第{page}页的数据')
        df = get_all(word, page)
        df_all = df_all.append(df)
        df_all.to_excel('猎聘-{}.xlsx'.format(word1),
                        encoding='utf_8_sig',
                        index=False)
    print(f'已读取总共{page}页的数据')

    sf = StyleFrame.read_excel('猎聘-{}.xlsx'.format(word1))
    print('data read successful!')
    #sf.set_column_width(columns=['公司名','岗位信息'],width=200)
    sf.set_column_width_dict(col_width_dict={
        "公司名": 20,
        "岗位信息": 100,
        "招聘岗位": 20,
        '申请网址': 30
    })
    ew = StyleFrame.ExcelWriter('猎聘-{}.xlsx'.format(word1))
    sf.to_excel(ew)
    ew.save()
    print("data updated successful!")
 def test_read_excel_no_style(self):
     self.export_and_get_default_sheet(save=True)
     sf_from_excel = StyleFrame.read_excel(TEST_FILENAME)
     # making sure content is the same
     self.assertTrue(
         all(
             list(self.sf[col]) == list(sf_from_excel[col])
             for col in self.sf.columns))
Exemple #3
0
    def test_read_excel_with_style_comments_styler_objects(self):
        self.export_and_get_default_sheet(save=True)
        sf_from_excel = StyleFrame.read_excel(TEST_FILENAME, read_style=True, read_comments=True)
        # making sure content is the same
        self.assertTrue(all(list(self.sf[col]) == list(sf_from_excel[col]) for col in self.sf.columns))

        rows_in_excel = sf_from_excel.data_df.itertuples()
        rows_in_self = self.sf.data_df.itertuples()

        # making sure styles are the same
        self.assertTrue(all(excel_cell.style == self_cell.style
                            for row_in_excel, row_in_self in zip(rows_in_excel, rows_in_self)
                            for excel_cell, self_cell in zip(row_in_excel[1:], row_in_self[1:])))
Exemple #4
0
    def test_read_excel_with_string_sheet_name(self):
        self.export_and_get_default_sheet(save=True)
        sf_from_excel = StyleFrame.read_excel(TEST_FILENAME, read_style=True, sheet_name='Sheet1',
                                              use_openpyxl_styles=True)
        # making sure content is the same
        self.assertTrue(all(list(self.sf[col]) == list(sf_from_excel[col]) for col in self.sf.columns))

        rows_in_excel = sf_from_excel.data_df.itertuples()
        rows_in_self = self.sf.data_df.itertuples()

        # making sure styles are the same
        self.assertTrue(all(self_cell.style == Styler.from_openpyxl_style(excel_cell.style, [])
                            for row_in_excel, row_in_self in zip(rows_in_excel, rows_in_self)
                            for excel_cell, self_cell in zip(row_in_excel[1:], row_in_self[1:])))
Exemple #5
0
    def test_read_excel_style(self):
        self.apply_headers_style()
        self.export_and_get_default_sheet(save=True)
        sf_from_excel = StyleFrame.read_excel(TEST_FILENAME, read_style=True)
        # making sure content is the same
        self.assertTrue(all(list(self.sf[col]) == list(sf_from_excel[col]) for col in self.sf.columns))

        rows_in_excel = sf_from_excel.data_df.itertuples()
        rows_in_self = self.sf.data_df.itertuples()

        # making sure styles are the same
        self.assertTrue(all(excel_cell.value == self_cell.value
                        for row_in_excel, row_in_self in zip(rows_in_excel, rows_in_self)
                        for excel_cell, self_cell in zip(row_in_excel, row_in_self)))
def group_by_color(
    filepath,
    row_height,
    column_width,
    include_sources,
    output_name,
):
    from StyleFrame import StyleFrame, Styler, utils
    import numpy as np
    import pandas as pd
   
    if not filepath.endswith(".xlsx"):
        filepath += ".xlsx"
        
    sf = StyleFrame.read_excel(filepath, read_style=True)
    color_groupings = {}

    for c_index, c in enumerate(sf.columns):
        if "Cues" in c.value:
            for value in sf[c]:
                if type(value.value) is not str and np.isnan(value.value):
                    continue
                if value.style.bg_color + "_value" not in color_groupings:
                    color_groupings[value.style.bg_color + "_value"] = []
                if include_sources and (
                    value.style.bg_color + "_source" not in color_groupings
                ):
                    color_groupings[value.style.bg_color + "_source"] = []
                color_groupings[value.style.bg_color + "_value"].append(
                    value.value
                )
                if include_sources:
                    color_groupings[value.style.bg_color + "_source"].append(
                        sf.iloc[0,c_index - 1].value
                    )

    # colors of our stuff
    for k,length in zip(color_groupings.keys(), map(len, color_groupings.values())):
        print( "color", k + ":", length, "entries")

    # pad data so it is all the same size
    maxlen = max(map(len, color_groupings.values()))
    for k,v in color_groupings.items():
        color_groupings[k] = v + ['']*(maxlen - len(v))
        print("old length:", len(v), "|| new length:", len(color_groupings[k]))

    # add padded data to df, print 'er out
    new_df = pd.DataFrame(color_groupings)
    new_df

    defaults = {'font': utils.fonts.calibri, 'font_size': 9}

    new_sf = StyleFrame(new_df, styler_obj=Styler(**defaults))

    for c in new_sf.columns:

        # color to use
        color = c.value.strip("_source").strip("_value")

        # apply style to ALL values (incl headers)
        new_sf.apply_column_style(
            cols_to_style=[c.value],
            styler_obj=Styler(bold=True, font_size=10, bg_color=color,
            font_color=utils.colors.white),
            style_header=True,
        )

        # revert style for non-headers
        new_sf.apply_column_style(
            cols_to_style=[c.value],
            styler_obj=Styler(**defaults),
            style_header=False
        )

    # row height
    all_rows = new_sf.row_indexes
    new_sf.set_row_height_dict(
        row_height_dict={
            all_rows[0]: 24,
            all_rows[1:]: row_height
        }
    )

    # col width
    all_cols = tuple(map(lambda x: x.value, new_sf.columns))
    new_sf.set_column_width_dict(
        col_width_dict={
            all_cols: column_width
        }
    )

    # save to excel file
    if not output_name.endswith(".xlsx"):
        output_name += ".xlsx"
    new_sf.to_excel(output_name).save()
    
    return 0
Exemple #7
0
def load_training_data(excel_file):
    sf = StyleFrame.read_excel(excel_file, sheet_name='Training Samples', read_style=True, use_openpyxl_styles=False)
    return StyleFrame(sf.applymap(get_classes_from_colors)).data_df[0:20].iloc[:, 1:].to_numpy(dtype=np.str)
Exemple #8
0
from StyleFrame import StyleFrame
import pandas as pd

sf = StyleFrame.read_excel('FreeEnglishTextbooks.xlsx', read_style=True, use_openpyxl_styles=False)

ls = []
for row in sf.row_indexes:
	try:
		if sf.loc[row, 'Book Title'].style.bold:
			d = { 'Book Title': sf.loc[row, 'Book Title'].value, 
				'URL': sf.loc[row, 'OpenURL'].value }
			ls.append(d)
	except:
		pass
pd.DataFrame(ls).to_excel('filtered-books.xlsx')
Exemple #9
0
        currFile.write(filterToAppend + "\r\n")
    print("This filter is saved to: " + filename)


def only_cells_with_green_background(cell):
    #print(cell.value, cell.style.bg_color)
    #return cell if cell.style.bg_color in {utils.colors.green, 'FF93C47D'} else np.nan
    return cell.style.bg_color


def checkVerified():
    return


sf = StyleFrame.read_excel(path,
                           sheet_name=SheetName,
                           read_style=True,
                           use_openpyxl_styles=False)
sf_bg = StyleFrame(sf.applymap(only_cells_with_green_background))
#print(sf_bg)

df = pd.read_excel(path, SheetName)
#print(df)
#sys.exit()
for idx, x in enumerate(df['Suggested filter (to be reviewed)']):
    # Verified : if the background cell color of filter is Green or the "New filter" column is filled.
    # isVerified= df['Verified'][idx]
    # if isVerified == 'x':
    #     print(str(idx)+ ". Passed unverified filter.")
    #     continue

    has_green_bg = False
Exemple #10
0
# import openpyxl
# from openpyxl import load_workbook
#
# workbook = load_workbook('Visão Geral Iniciativas_v2.xlsm')
# worksheet = workbook.get_active_sheet()
#
# ws_range = worksheet.range('A1:Z20')
# for row in ws_range:
#     print(row)
#
#
#
#
#
# # print(sh['A2'].fill.start_color.index)

from StyleFrame import StyleFrame, Styler, utils



#
sf = StyleFrame.read_excel('teste.xlsx', sheet_name='Planilha1', read_style=True)
# print(sf)
#
# #
# sf = sf[[col for col in sf.columns
#          if col.style.fill.fgColor.rgb in ('FFFFFFFF', utils.colors.white)]]
#          # "white" can be represented as 'FFFFFFFF' or
#          # '00FFFFFF' (which is what utils.colors.white is set to)
# print(sf)