def process_data(self, **kwargs):
        # combine data in [day_head, day_tail)
        if ("day_head" not in kwargs) or ("day_tail" not in kwargs):
            print("Arguments must contain: day_head, day_tail")
            return
        day_head = kwargs["day_head"]
        day_tail = kwargs["day_tail"]

        day_it = day_head
        cnt = 2
        day_idx = day_it.strftime("%Y-%m-%d")
        day_t = day_tail.strftime("%Y-%m-%d")

        wb = Workbook()
        ws = wb.active
        while day_idx != day_t:
            print("Processing...", day_idx)
            file_path = self.data_path + "\\" + day_idx + "--" + day_idx + ".xls"
            self.__combine_data(file_path, cnt, day_idx, ws)
            day_it = day_it + datetime.timedelta(days=1)
            cnt += 1
            day_idx = day_it.strftime("%Y-%m-%d")
        jiangsu = "剔除江苏"
        nanjing = "剔除南京"
        if self.include_nanjing:
            nanjing = "包含南京"
        if self.include_jiangsu:
            jiangsu = "包含江苏"

        t_h = day_head.strftime("%Y-%m-%d")
        t_t = (day_tail - datetime.timedelta(days=1)).strftime("%Y-%m-%d")
        wb.save(self.data_path + "\\" + "全市乡村游客源排名" + jiangsu + nanjing + t_h + "--" + t_t + ".xlsx")
Example #2
0
def bc_generate_xlsx(fn):

    wb = Workbook()
    #wb = Workbook(optimized_write = True)
    #dest_filename = r'test_book.xlsx'
    dest_filename = fn.outdir + "/" + filename_from_path(fn.fiwalk_xmlfile) + ".xlsx"
    print("Generating Excel report ", dest_filename)
    ws = wb.worksheets[0]
    #ws = wb.create_sheet()
    ws.title = "File Object Information"

    ws.cell('%s%s'%('A', '1')).value = '%s' % "Partition"
    ws.cell('%s%s'%('B', '1')).value = '%s' % "Filename"
    ws.cell('%s%s'%('C', '1')).value = '%s' % "Extension"
    ws.cell('%s%s'%('D', '1')).value = '%s' % "Filesize"
    ws.cell('%s%s'%('E', '1')).value = '%s' % "File format"
    ws.cell('%s%s'%('F', '1')).value = '%s' % "Change time"
    ws.cell('%s%s'%('G', '1')).value = '%s' % "Access time"
    ws.cell('%s%s'%('H', '1')).value = '%s' % "Create time"
    ws.cell('%s%s'%('I', '1')).value = '%s' % "Modification time"
    ws.cell('%s%s'%('J', '1')).value = '%s' % "MD5 Hash" 
    ws.cell('%s%s'%('K', '1')).value = '%s' % "SHA1 Hash"

    process_files(fn.fiwalk_xmlfile, ws)

    wb.save(filename=dest_filename)
Example #3
0
def test_write_bool():
    wb = Workbook()
    ws = wb.create_sheet()
    ws.cell("F42").value = False
    ws.cell("F43").value = True
    content = write_worksheet(ws, {}, {})
    assert_equals_file_content(os.path.join(DATADIR, "writer", "expected", "sheet1_bool.xml"), content)
Example #4
0
def saveData():
    name = 'fights'
    try:
        wb = load_workbook(name+'.xlsx')
    except:
        wb = Workbook()
        ws = wb.worksheets[0]
        ws.title = "log"
        wb.save(name+'.xlsx')
        wb = load_workbook(name+'.xlsx')
    ws = wb.worksheets[0]

    i = 1
    ws.cell('A1').value = 'Name'
    ws.cell('C1').value = 'Dammage'
    ws.cell('D1').value = 'Health'
    ws.cell('B1').value = 'Ignore'
    for n in persons:
        i = i + 1
        ws.cell('A'+str(i)).value = n
        ws.cell('B'+str(i)).value = persons[n]['total']['Ignore']
        ws.cell('C'+str(i)).value = persons[n]['total']['DmgDealed']
        ws.cell('D'+str(i)).value = persons[n]['total']['DmgTake']
        for b in persons[n]:
            if b == 'total': continue
            dd = persons[n][b]['DmgDealed']
            ws.cell(row = 0, column = int(b)+3).value = "battle"+b
            ws.cell(row = i-1, column = int(b)+3).value = dd

    wb.save(name+'.xlsx')
Example #5
0
 def test_get_named_range_wrong_sheet(self, Worksheet):
     wb = Workbook()
     ws1 = Worksheet(wb)
     ws2 = Worksheet(wb)
     wb.create_named_range('wrong_sheet_range', ws1, 'C5')
     with pytest.raises(NamedRangeException):
         ws2.get_named_range('wrong_sheet_range')
Example #6
0
def test_freeze_panes_both():
    wb = Workbook()
    ws = wb.create_sheet()
    ws.cell("F42").value = "hello"
    ws.freeze_panes = "D4"
    content = write_worksheet(ws, {"hello": 0}, {})
    assert_equals_file_content(os.path.join(DATADIR, "writer", "expected", "sheet1_freeze_panes_both.xml"), content)
Example #7
0
def test_write_hidden_worksheet():
    wb = Workbook()
    ws = wb.create_sheet()
    ws.sheet_state = ws.SHEETSTATE_HIDDEN
    ws.cell("F42").value = "hello"
    content = write_worksheet(ws, {"hello": 0}, {})
    assert_equals_file_content(os.path.join(DATADIR, "writer", "expected", "sheet1.xml"), content)
Example #8
0
def test_write_hyperlink():
    wb = Workbook()
    ws = wb.create_sheet()
    ws.cell("A1").value = "test"
    ws.cell("A1").hyperlink = "http://test.com"
    content = write_worksheet(ws, {"test": 0}, {})
    assert_equals_file_content(os.path.join(DATADIR, "writer", "expected", "sheet1_hyperlink.xml"), content)
Example #9
0
def test_hyperlink_value():
    wb = Workbook()
    ws = wb.create_sheet()
    ws.cell("A1").hyperlink = "http://test.com"
    eq_("http://test.com", ws.cell("A1").value)
    ws.cell("A1").value = "test"
    eq_("test", ws.cell("A1").value)
Example #10
0
def test_write_style():
    wb = Workbook()
    ws = wb.create_sheet()
    ws.cell("F1").value = "13%"
    style_id_by_hash = StyleWriter(wb).get_style_by_hash()
    content = write_worksheet(ws, {}, style_id_by_hash)
    assert_equals_file_content(os.path.join(DATADIR, "writer", "expected", "sheet1_style.xml"), content)
Example #11
0
def test_write_height():
    wb = Workbook()
    ws = wb.create_sheet()
    ws.cell("F1").value = 10
    ws.row_dimensions[ws.cell("F1").row].height = 30
    content = write_worksheet(ws, {}, {})
    assert_equals_file_content(os.path.join(DATADIR, "writer", "expected", "sheet1_height.xml"), content)
Example #12
0
def test_short_number():
    wb = Workbook()
    ws = wb.create_sheet()
    ws.cell('A1').value = 1234567890
    content = write_worksheet(ws, {}, {})
    assert_equals_file_content(os.path.join(DATADIR, 'writer', 'expected', \
            'short_number.xml'), content)
Example #13
0
def test_write_worksheet():
    wb = Workbook()
    ws = wb.create_sheet()
    ws.cell('F42').value = 'hello'
    content = write_worksheet(ws, {'hello': 0}, {})
    assert_equals_file_content(os.path.join(DATADIR, 'writer', 'expected', \
            'sheet1.xml'), content)
Example #14
0
def saveData(data):
    name = 'top_info'
    #try:
    #    wb = load_workbook(name+'.xlsx')
    #except:
    if True:
        wb = Workbook()
        ws = wb.worksheets[0]
        ws.title = "gettop"
        wb.save(name+'.xlsx')
        wb = load_workbook(name+'.xlsx')
    ws = wb.worksheets[0]
    ws.title = 'gettop'
    i = 1
    for n in data:
        i = i + 1
        putData(ws,'A',i,n,'NO',i-1)
        putData(ws,'B',i,n,'name','......')
        putData(ws,'C',i,n,'vk')
        putData(ws,'D',i,n,'rang')
        putData(ws,'E',i,n,'level')
        putData(ws,'F',i,n,'epower')
        putData(ws,'G',i,n,'clan_name')
        putData(ws,'H',i,n,'clan_owner')
        if n.has_key('adInfo'):
            n['adInfo'] = json.loads(n['adInfo'])
            putData(ws,'I',i,n['adInfo'],'currency')

    wb.save(name+'.xlsx')
Example #15
0
def test_decimal():
    wb = Workbook()
    ws = wb.create_sheet()
    ws.cell('A1').value = decimal.Decimal('3.14')
    content = write_worksheet(ws, {}, {})
    assert_equals_file_content(os.path.join(DATADIR, 'writer', 'expected', \
            'decimal.xml'), content)
Example #16
0
def test_get_named_range():
    wb = Workbook()
    new_sheet = wb.create_sheet()
    named_range = NamedRange("test_nr", [(new_sheet, "A1")])
    wb.add_named_range(named_range)
    found_named_range = wb.get_named_range("test_nr")
    eq_(named_range, found_named_range)
Example #17
0
def test_get_sheet_by_name():
    wb = Workbook()
    new_sheet = wb.create_sheet()
    title = "my sheet"
    new_sheet.title = title
    found_sheet = wb.get_sheet_by_name(title)
    eq_(new_sheet, found_sheet)
Example #18
0
def test_get_sheet_names():
    wb = Workbook()
    names = ["Sheet", "Sheet1", "Sheet2", "Sheet3", "Sheet4", "Sheet5"]
    for count in range(5):
        wb.create_sheet(0)
    actual_names = wb.get_sheet_names()
    eq_(sorted(actual_names), sorted(names))
Example #19
0
def test_add_named_range():
    wb = Workbook()
    new_sheet = wb.create_sheet()
    named_range = NamedRange("test_nr", [(new_sheet, "A1")])
    wb.add_named_range(named_range)
    named_ranges_list = wb.get_named_ranges()
    assert named_range in named_ranges_list
Example #20
0
def saveClansData(cdata):
    name = 'clan_info_all'
    try:
        wb = load_workbook(name+'.xlsx')
    except:
        wb = Workbook()
        ws = wb.worksheets[0]
        ws.title = "log"
        wb.save(name+'.xlsx')
        wb = load_workbook(name+'.xlsx')
    ws = wb.worksheets[0]
    i = 0
    j = 0
    clansN = []
    for c in cdata:
        for n in c:
            
            clansN.append(n)
            ws.cell(row=i, column=j).value = n
            j = j + 1
        break
    for c in cdata:
        i = i + 1
        j = 0
        for n in c:
            ws.cell(row=i, column=j).value = c[n]
            j = j + 1

    wb.save(name+'.xlsx')
Example #21
0
def test_hyperlink_value():
    wb = Workbook()
    ws = wb.create_sheet()
    ws.cell('A1').hyperlink = "http://test.com"
    assert "http://test.com" == ws.cell('A1').value
    ws.cell('A1').value = "test"
    assert "test" == ws.cell('A1').value
def test_get_sheet_by_name():
    wb = Workbook()
    new_sheet = wb.create_sheet()
    title = 'my sheet'
    new_sheet.title = title
    found_sheet = wb.get_sheet_by_name(title)
    assert new_sheet == found_sheet
def test_get_sheet_names():
    wb = Workbook()
    names = ['Sheet', 'Sheet1', 'Sheet2', 'Sheet3', 'Sheet4', 'Sheet5']
    for count in range(5):
        wb.create_sheet(0)
    actual_names = wb.get_sheet_names()
    assert sorted(actual_names) == sorted(names)
Example #24
0
def separate_xl_content(src_filepath):
    src_wb = load_workbook(src_filepath, use_iterators=True)
    src_ws = src_wb.get_sheet_by_name(name = "Sheet")

    mytree = {}
    for row in src_ws.iter_rows():
        subxlfilename = row[0].internal_value
        if not mytree.has_key(subxlfilename):
            mytree[subxlfilename] = []

        values = []
        for cell in row[1:]:
            values.append(cell.internal_value)

        mytree[subxlfilename].append(values)

    ret = []
    for subxlfilename in mytree.keys():
        wb = Workbook()
        ws = wb.get_sheet_by_name(name="Sheet")

        for values in mytree[subxlfilename]:
            ws.append(values)

        wb.save(subxlfilename)

        ret.append(subxlfilename)

    return ret
Example #25
0
class XLSXRenderer(Renderer):
    def __init__(self, renderers_func, config):
        if not has_openpyxl:
            debug.error("You must install OpenPyxl 2.1.2 for xlsx format:\n\thttps://pypi.python.org/pypi/openpyxl")
        self._config = config
        self._columns = None
        self._text_cell_renderers_func = renderers_func
        self._text_cell_renderers = None
        self._wb = Workbook(optimized_write=True)
        self._ws = self._wb.create_sheet()

    def description(self):
        output = []
        for column in self._columns:
            output.append((column.name))
        return output

    def _add_row(self, node, data):
        accumulator = data
        accumulator[node] = max(accumulator.values()) + 1
        self._ws.append(list(node.values))
        return accumulator

    def render(self, outfd, grid):
        """Renders the TreeGrid in data out to the output file from the config options"""
        if not self._config.OUTPUT_FILE:
            debug.error("Please specify a valid output file using --output-file")
        self._columns = grid.columns
        self._text_cell_renderers = self._text_cell_renderers_func(self._columns)
        self._ws.append(self.description())
        grid.visit(None, self._add_row, {None: 0})
        self._wb.save(filename=self._config.OUTPUT_FILE)
Example #26
0
 def setUp(self):
     wb = Workbook()
     ws = wb.get_active_sheet()
     for i in range(10):
         ws.cell(row=i, column=0).value = i
     self.cell = Reference(ws, (0, 0))
     self.range = Reference(ws, (0, 0), (9, 0))
Example #27
0
def test1():
    wb = Workbook()
    ws = wb.get_sheet_by_name(name=r"Sheet")

    merge_xl_content(ws, '/home/huzhennan/Works/local/Gallery2/books.xlsx', 'Sheet')

    wb.save('test.xlsx')
def test_get_named_range():
    wb = Workbook()
    new_sheet = wb.create_sheet()
    named_range = NamedRange('test_nr', [(new_sheet, 'A1')])
    wb.add_named_range(named_range)
    found_named_range = wb.get_named_range('test_nr')
    assert named_range == found_named_range
def print_CFs(CF_matrices, EF_list, CF_categories):
    from openpyxl.workbook import Workbook
    from openpyxl.worksheet import Worksheet
    
    result_filename = 'CF_print.xlsx'
    wb = Workbook() #creating a workbook
    for method in CF_matrices:
        ws = Worksheet(wb, title = method) #creating a sheet inside the workbook
        ws.freeze_panes = 'D2'
        header = ['compartment', 
              'substance', 
              'subcompartment']
        for category in CF_categories[method]:
            header.append(category)
        ws.append(header)
        for EF in EF_list:
            matrix_column = EF_list.index(EF)
            compartment, substance, subcompartment = EF
            line = [compartment, substance, subcompartment]
            for category in CF_categories[method]:
                matrix_line = CF_categories[method].index(category)
                CF = CF_matrices[method][matrix_line, matrix_column]
                line.append(CF)
            ws.append(line)
    print 'saving in excel sheet named: ' + result_filename
    wb.add_sheet(ws)
    wb.save(result_filename)
Example #30
0
def saveData(cname, data):
    name = 'clan_info_'+str(cid)
    try:
        wb = load_workbook(name+'.xlsx')
    except:
        wb = Workbook()
        ws = wb.worksheets[0]
        ws.title = "log"
        wb.save(name+'.xlsx')
        wb = load_workbook(name+'.xlsx')
    ws = wb.worksheets[0]
    ws.title = cname
    i = 1
    ws.cell('C1').value = 'Name'
    ws.cell('E1').value = 'Level'
    ws.cell('G1').value = 'Ladder'
    ws.cell('H1').value = 'Win'
    ws.cell('I1').value = 'Kill'
    ws.cell('J1').value = 'Mission'
    for n in data:
        i = i + 1
        ws.cell('A'+str(i)).value = '1'
        ws.cell('B'+str(i)).value = '.'
        ws.cell('C'+str(i)).value = n["Name"]
        ws.cell('D'+str(i)).value = '-'
        ws.cell('E'+str(i)).value = n["Lvl"]
        ws.cell('F'+str(i)).value = ','
        ws.cell('G'+str(i)).value = n["Ladder"]
        ws.cell('H'+str(i)).value = n["WinCount"]
        ws.cell('I'+str(i)).value = n["KillCount"]
        ws.cell('J'+str(i)).value = n["DoMissionCount"]

    wb.save(name+'.xlsx')
Example #31
0
# Create blank Masterfile for recording ALL changes after particular duration
import pandas as pd
from openpyxl.workbook import Workbook

#heading = [" MASTERS File " + dttime[:11]]
filename = 'C:\\NSE\\outputs\\MASTERS.xlsx'
# Create blank Masterfile for recording ALL changes after particular duration

wb = Workbook()
ws = wb.active
ws.title = "Masters"
wb.save(filename='C:\\NSE\\outputs\Masters.xlsx')
print("Masters Excel File created in C:\\NSE\\outputs")

masterdf = pd.DataFrame(columns = ['Date', 'Time', 'Base Stike', 'Base Change','Current NIFTY',
                                   'Change','CALL Price', 'Put Price', 'Remarks'])

masterdf.to_excel("C:\\NSE\\outputs\Masters.xlsx", startrow = 0,index = False)
Example #32
0
 def test_good_encoding(self):
     wb = Workbook(encoding='latin1')
     ws = wb.active
     cell = ws['A1']
     cell.value = self.test_string
Example #33
0
def test_cell_offset():
    wb = Workbook()
    ws = Worksheet(wb)
    assert ws['B15'].offset(2, 1).coordinate == 'C17'
Example #34
0
def test_repr():
    wb = Workbook()
    ws = Worksheet(wb)
    cell = Cell(ws, 'A', 1)
    assert repr(cell), '<Cell Sheet1.A1>' == 'Got bad repr: %s' % repr(cell)
Example #35
0
 def setup_class(cls):
     wb = Workbook()
     ws = Worksheet(wb)
     cls.cell = Cell(ws, 'A', 1)
                    ci = source.rows[ri].index(cell)
                    temprow.append(source.rows[ri][ci].value)
                temprow.append(
                    temprow[-1] +
                    temprow[-2])  #Total Hours: sum of DOE & Proj Hours
                temprow.append(temprow[-3] / float(
                    temprow[-1]))  #DOE Util%; DOE Hours / newly added Total
                temprow.append(
                    temprow[-3] /
                    float(temprow[-2]))  #Proj Util%; Proj. Hours / Total
                tempTable.append(temprow)
    return tempTable


##Creating the target workbook
target = Workbook()
dest_filename = r'hdcntfunc.xlsx'


##Function that creates the individual functional area worksheets IN the workbook just created
def create_tabs(functable, tabname):
    '''
    list of lists, string --> list of lists
    
    Takes in nested list for each functional area (created by funcTable/makeSubseaTable/makeNoSubseaTable) and a string (tabname)
    each inner list reperesents a row of data; creates a spreadsheet in memory, writes those rows to the spreadsheet
    The string becomes the name of the worksheet
    '''
    #creating & naming the spreadsheet in memory
    ws = target.create_sheet(0)
    ws.title = tabname
    def to_xls_export(self, path, data, username, id_string, *args):
        xform = XForm.objects.get(user__username__iexact=username,
                                  id_string__exact=id_string)

        json_question = json.loads(xform.json)
        parsedQuestions = get_questions_and_media_attributes(
            json_question['children'])

        from django.contrib.sites.models import Site as DjangoSite
        domain = DjangoSite.objects.get_current().domain

        def write_row(data, work_sheet, fields, work_sheet_titles):
            # work_sheet_titles = work_sheet_titles.append("fs_site")
            # update parent_table with the generated sheet's title
            data[PARENT_TABLE_NAME] = work_sheet_titles.get(
                data.get(PARENT_TABLE_NAME))
            data_new = []
            for f in fields:
                if f in data and f in parsedQuestions.get('media_attributes'):
                    data_new.append('=HYPERLINK("http://' + domain +
                                    '/attachment/medium?media_file=' +
                                    xform.user.username + '/attachments/' +
                                    data.get(f) + '", "Attachment")')
                else:
                    if f == "fs_status":
                        try:
                            status = FInstance.objects.get(
                                instance_id=data.get(
                                    '_id')).get_form_status_display()
                        except:
                            status = "No Status"
                        data_new.append(status)
                    else:
                        data_new.append(data.get(f, ''))
            work_sheet.append(data_new)

        wb = Workbook(optimized_write=True)
        work_sheets = {}
        # map of section_names to generated_names
        work_sheet_titles = {}
        for section in self.sections:
            section_name = section['name']
            work_sheet_title = ExportBuilder.get_valid_sheet_name(
                "_".join(section_name.split("/")), work_sheet_titles.values())
            work_sheet_titles[section_name] = work_sheet_title
            work_sheets[section_name] = wb.create_sheet(title=work_sheet_title)

        # write the headers
        for section in self.sections:
            section_name = section['name']
            headers = [element['title']
                       for element in section['elements']] + self.EXTRA_FIELDS
            # get the worksheet
            ws = work_sheets[section_name]
            ws.append(headers)

        index = 1
        indices = {}
        survey_name = self.survey.name
        for d in data:
            joined_export = dict_to_joined_export(d, index, indices,
                                                  survey_name)
            output = ExportBuilder.decode_mongo_encoded_section_names(
                joined_export)
            # attach meta fields (index, parent_index, parent_table)
            # output has keys for every section
            if survey_name not in output:
                output[survey_name] = {}
            output[survey_name][INDEX] = index
            output[survey_name][PARENT_INDEX] = -1
            for section in self.sections:
                # get data for this section and write to xls
                section_name = section['name']
                fields = [element['xpath'] for element in section['elements']
                          ] + self.EXTRA_FIELDS

                ws = work_sheets[section_name]
                # section might not exist within the output, e.g. data was
                # not provided for said repeat - write test to check this
                row = output.get(section_name, None)
                if type(row) == dict:
                    write_row(self.pre_process_row(row, section), ws, fields,
                              work_sheet_titles)
                elif type(row) == list:
                    for child_row in row:
                        write_row(self.pre_process_row(child_row, section), ws,
                                  fields, work_sheet_titles)
            index += 1

        wb.save(filename=path)
Example #38
0
class _Openpyxl1Writer(ExcelWriter):
    engine = 'openpyxl1'
    supported_extensions = ('.xlsx', '.xlsm')
    openpyxl_majorver = 1

    def __init__(self, path, engine=None, **engine_kwargs):
        if not openpyxl_compat.is_compat(major_ver=self.openpyxl_majorver):
            raise ValueError('Installed openpyxl is not supported at this '
                             'time. Use {0}.x.y.'.format(
                                 self.openpyxl_majorver))
        # Use the openpyxl module as the Excel writer.
        from openpyxl.workbook import Workbook

        super(_Openpyxl1Writer, self).__init__(path, **engine_kwargs)

        # Create workbook object with default optimized_write=True.
        self.book = Workbook()
        # Openpyxl 1.6.1 adds a dummy sheet. We remove it.
        if self.book.worksheets:
            self.book.remove_sheet(self.book.worksheets[0])

    def save(self):
        """
        Save workbook to disk.
        """
        return self.book.save(self.path)

    def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0):
        # Write the frame cells using openpyxl.
        from openpyxl.cell import get_column_letter

        sheet_name = self._get_sheet_name(sheet_name)

        if sheet_name in self.sheets:
            wks = self.sheets[sheet_name]
        else:
            wks = self.book.create_sheet()
            wks.title = sheet_name
            self.sheets[sheet_name] = wks

        for cell in cells:
            colletter = get_column_letter(startcol + cell.col + 1)
            xcell = wks.cell("%s%s" % (colletter, startrow + cell.row + 1))
            xcell.value = _conv_value(cell.val)
            style = None
            if cell.style:
                style = self._convert_to_style(cell.style)
                for field in style.__fields__:
                    xcell.style.__setattr__(field,
                                            style.__getattribute__(field))

            if isinstance(cell.val, datetime.datetime):
                xcell.style.number_format.format_code = self.datetime_format
            elif isinstance(cell.val, datetime.date):
                xcell.style.number_format.format_code = self.date_format

            if cell.mergestart is not None and cell.mergeend is not None:
                cletterstart = get_column_letter(startcol + cell.col + 1)
                cletterend = get_column_letter(startcol + cell.mergeend + 1)

                wks.merge_cells('%s%s:%s%s' %
                                (cletterstart, startrow + cell.row + 1,
                                 cletterend, startrow + cell.mergestart + 1))

                # Excel requires that the format of the first cell in a merged
                # range is repeated in the rest of the merged range.
                if style:
                    first_row = startrow + cell.row + 1
                    last_row = startrow + cell.mergestart + 1
                    first_col = startcol + cell.col + 1
                    last_col = startcol + cell.mergeend + 1

                    for row in range(first_row, last_row + 1):
                        for col in range(first_col, last_col + 1):
                            if row == first_row and col == first_col:
                                # Ignore first cell. It is already handled.
                                continue
                            colletter = get_column_letter(col)
                            xcell = wks.cell("%s%s" % (colletter, row))
                            for field in style.__fields__:
                                xcell.style.__setattr__(
                                    field, style.__getattribute__(field))

    @classmethod
    def _convert_to_style(cls, style_dict):
        """
        converts a style_dict to an openpyxl style object
        Parameters
        ----------
        style_dict: style dictionary to convert
        """

        from openpyxl.style import Style
        xls_style = Style()
        for key, value in style_dict.items():
            for nk, nv in value.items():
                if key == "borders":
                    (xls_style.borders.__getattribute__(nk).__setattr__(
                        'border_style', nv))
                else:
                    xls_style.__getattribute__(key).__setattr__(nk, nv)

        return xls_style
Example #39
0
from openpyxl.styles import Font, colors, Color, Alignment, PatternFill, GradientFill, Border, Side
from openpyxl.styles import NamedStyle
from openpyxl.workbook import Workbook

wb = Workbook()
ws = wb.active

for i in range(1, 20):
    ws.append(range(300))

ws.merge_cells("A1:B5")
ws.unmerge_cells("A1:B5")
ws.merge_cells(start_row=2, start_column=2, end_row=5, end_column=5)

cell = ws['B2']
cell.font = Font(color=colors.RED, size=20, italic=True)
cell.value = 'Merged Cell'
cell.alignment = Alignment(horizontal='right', vertical='bottom')
cell.fill = GradientFill(stop=("000000", "FFFFFF"))
wb.save('text.xlsx')

highlight = NamedStyle(name='highlight')
highlight.font = Font(bold=True)
bd = Side(style='thick', color='000000')
highlight.border = Border(left=bd, top=bd, right=bd, bottom=bd)
highlight.fill = PatternFill('solid', fgColor='FFFF00')

count = 0

for col in ws.iter_cols(min_col=8, min_row=1, max_col=30, max_row=30):
    col[count].style = highlight
Example #40
0
def writer():
    from .._writer import WorksheetWriter
    wb = Workbook()
    ws = wb.active
    return WorksheetWriter(ws)
Example #41
0
def test_getitem(Workbook, Worksheet):
    wb = Workbook()
    ws = wb['Sheet']
    assert isinstance(ws, Worksheet)
    with pytest.raises(KeyError):
        wb['NotThere']
Example #42
0
def test_close_write(wo):
    from openpyxl.workbook import Workbook
    wb = Workbook(write_only=wo)
    wb.close()
Example #43
0
def test_create_sheet_readonly():
    wb = Workbook()
    wb._set_optimized_read()
    wb.create_sheet()
Example #44
0
class SHSX:
    # 食行生鲜爬虫
    def __init__(self):
        self.dict = []
        self.dict = UserAgent.Android_USER_AGENT + UserAgent.iPhone_USER_AGENT
        #self.ipf = IpFilter('1')
        self.outwb = Workbook()  #Excel对象

        self.url_Class = {
            '蔬菜豆菇':
            'https://api1.34580.com/sz/productcategory/firstdetail?sourcetype=9&id=100840',
            '新鲜水果':
            'https://api1.34580.com/sz/productcategory/firstdetail?sourcetype=9&id=100559',
            '鲜肉蛋禽':
            'https://api1.34580.com/sz/productcategory/firstdetail?sourcetype=9&id=100771',
            '水产生鲜':
            'https://api1.34580.com/sz/productcategory/firstdetail?sourcetype=9&id=100591',
            '乳品烘培':
            'https://api1.34580.com/sz/productcategory/firstdetail?sourcetype=9&id=100705',
            '面点速食':
            'https://api1.34580.com/sz/productcategory/firstdetail?sourcetype=9&id=100736',
            '粮油副食':
            'https://api1.34580.com/sz/productcategory/firstdetail?sourcetype=9&id=100870',
            '休闲零食':
            'https://api1.34580.com/sz/productcategory/firstdetail?sourcetype=9&id=100935',
            '酒水饮料':
            'https://api1.34580.com/sz/productcategory/firstdetail?sourcetype=9&id=100996',
            '生活百货':
            'https://api1.34580.com/sz/productcategory/firstdetail?sourcetype=9&id=100625',
            '鲜花绿植':
            'https://api1.34580.com/sz/productcategory/firstdetail?sourcetype=9&id=101017'
        }
        self.headers = {
            'charset': 'utf-8',
            'Accept-Encoding': 'gzip',
            'referer':
            'https://servicewechat.com/wx6e7ce0c196b0c3c2/32/page-frame.html',
            'content-type': 'application/json',
            'User-Agent':
            'Mozilla/5.0 (Linux; Android 8.0.0; FRD-AL10 Build/HUAWEIFRD-AL10; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/68.0.3440.91 Mobile Safari/537.36 MicroMessenger/6.7.3.1360(0x26070338) NetType/WIFI Language/zh_CN Process/toolsmp',
            'Host': 'api1.34580.com',
            'Connection': 'Keep-Alive'
        }

    def getBaseUrl(self, str_ID):
        # 每次每页取最大数据
        url_allClass = 'https://api1.34580.com/sz/ProductRequests/ProductMultiConditionRequest?sourcetype=9&OrderDirectionType=0&OrderFieldType=0&CategoryIds=%s&PageSize=%s&PageIndex=1&SourceType=9&MallTypes=&joinedproduct=true' % (
            str_ID, '1000')
        return url_allClass

    def initialization(self):  #请求初始化函数 设置useragent和代理ip
        self.headers['User-Agent'] = random.sample(self.dict, 1)[0]
        #print self.headers['User-Agent']
        #self.proxies = self.ipf.getIp()
        self.proxies = {'HTTP': 'localhost:8080'}
        #print self.proxies
        print '已初始化useragent和代理ip'

    def getExcel(self, career_Name):
        wo = self.outwb.active
        careerSheet = self.outwb.create_sheet(unicode(career_Name), 0)
        careerSheet.append(
            ['展示名称', '团购价', '标签', '划线价', '单价', '重量', '单位', '展示名称', '已售数量'])
        return careerSheet

    def SaveExcel(self, ExcelName):
        self.outwb.save(ExcelName + '.xlsx')

    def getDataByUrl(self, url):
        print '当前url' + url
        self.initialization()
        req = requests.get(url=url, headers=self.headers,
                           proxies=self.proxies).json()
        return json.dumps(req, ensure_ascii=False)

    def getAllClassID(self):
        for Key, Value in self.url_Class.items():
            urlDict = {}
            urldata_ID = json.loads(self.getDataByUrl(Value))
            for item in urldata_ID['Data']['Children']:
                urlDict[item['Name']] = self.getBaseUrl(item['Id'])
            self.url_Class[Key] = urlDict
        print '所有子类url组装到url_Class——完毕'
        #print self.url_Class

    #获得点开商品后的数据 如 销售量
    def getSingleProductData(self, id):
        data_ = ''
        try:
            url = "https://api1.34580.com/sz/product/detail?sourcetype=9&ssuId=%s" % id
            headers = {
                'charset': 'utf-8',
                'Accept-Encoding': 'gzip',
                'referer':
                'https://servicewechat.com/wx6e7ce0c196b0c3c2/32/page-frame.html',
                'content-type': 'application/json',
                'User-Agent':
                'Mozilla/5.0 (Linux; Android 8.0.0; FRD-AL10 Build/HUAWEIFRD-AL10; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/68.0.3440.91 Mobile Safari/537.36 MicroMessenger/6.7.3.1360(0x26070338) NetType/WIFI Language/zh_CN Process/toolsmp',
                'Host': 'api1.34580.com',
                'Connection': 'Keep-Alive'
            }
            req = requests.get(url=url, headers=headers).json()
            data = json.loads(json.dumps(req, ensure_ascii=False))
            data_ = data['result']['productInfo']['soldNumber']
        except:
            print '销售量失败'
        print data_
        return data_

    def getAllClassData(self):
        self.getAllClassID()
        for Key, Value in self.url_Class.items():
            careerSheet = self.getExcel(Key)  #以一级分类为基础建立Excel页
            for key, value in Value.items():
                careerSheet.append([key])
                try:
                    data = json.loads(self.getDataByUrl(value))
                    if data['Message'] == '返回正确':
                        for item in data['Data']['SourceData']:
                            careerSheet.append([
                                item['ProductName'],  #展示名称
                                item['UnitPeriodMoney'],  # 团购价
                                item['PriceName'],  # 标签
                                item['DefaultMoney'],  # 划线价
                                item['PvStandard'],  # 单价
                                item['Weight'],  # 重量
                                item['Unit'],  # 单位
                                item['ProductName'],  # 展示名称
                                str(
                                    self.getSingleProductData(
                                        item['ProductId']))  #已售数量
                            ])
                    print data['Message']
                except:
                    print 'Fail'

    def Crawl(self, ExcelName):
        self.getAllClassData()  #组装url
        self.SaveExcel(ExcelName)  #储存
Example #45
0
def test_add_correct_sheet():
    wb = Workbook()
    new_sheet = wb.create_sheet(0)
    wb.add_sheet(new_sheet)
    eq_(new_sheet, wb.worksheets[2])
Example #46
0
def test_remove_sheet():
    wb = Workbook()
    new_sheet = wb.create_sheet(0)
    wb.remove_sheet(new_sheet)
    assert new_sheet not in wb.worksheets
Example #47
0
def test_create_sheet():
    wb = Workbook()
    new_sheet = wb.create_sheet(0)
    eq_(new_sheet, wb.worksheets[0])
Example #48
0
def test_add_incorrect_sheet():
    wb = Workbook()
    with pytest.raises(TypeError):
        wb.add_sheet("Test")
Example #49
0
def test_add_invalid_worksheet_class_instance():
    wb = Workbook()
    ws = AlternativeWorksheet(parent_workbook=wb)
    with pytest.raises(TypeError):
        wb.add_sheet(worksheet=ws)
Example #50
0
def test_create_sheet_with_name():
    wb = Workbook()
    new_sheet = wb.create_sheet(0, title='LikeThisName')
    eq_(new_sheet, wb.worksheets[0])
Example #51
0
def test_get_named_ranges():
    wb = Workbook()
    eq_(wb.get_named_ranges(), wb._named_ranges)
Example #52
0
def test_get_active_sheet():
    wb = Workbook()
    active_sheet = wb.get_active_sheet()
    eq_(active_sheet, wb.worksheets[0])
Example #53
0
def test_get_index():
    wb = Workbook()
    new_sheet = wb.create_sheet(0)
Example #54
0
def test_worksheet_class():
    wb = Workbook(worksheet_class=AlternativeWorksheet)
    assert isinstance(wb.worksheets[0], AlternativeWorksheet)
Example #55
0
def test_contains(Workbook):
    wb = Workbook()
    assert "Sheet" in wb
    assert "NotThere" not in wb
Example #56
0
def test_get_index():
    wb = Workbook()
    new_sheet = wb.create_sheet(0)
    sheet_index = wb.get_index(new_sheet)
    eq_(sheet_index, 0)
Example #57
0
class OpenpyxlWriter(ExcelWriter):
    engine = "openpyxl"
    supported_extensions = (".xlsx", ".xlsm")

    def __init__(
        self,
        path,
        engine=None,
        date_format=None,
        datetime_format=None,
        mode: str = "w",
        storage_options: StorageOptions = None,
        if_sheet_exists: str | None = None,
        engine_kwargs: dict[str, Any] | None = None,
    ):
        # Use the openpyxl module as the Excel writer.
        from openpyxl.workbook import Workbook

        super().__init__(
            path,
            mode=mode,
            storage_options=storage_options,
            if_sheet_exists=if_sheet_exists,
            engine_kwargs=engine_kwargs,
        )

        # ExcelWriter replaced "a" by "r+" to allow us to first read the excel file from
        # the file and later write to it
        if "r+" in self.mode:  # Load from existing workbook
            from openpyxl import load_workbook

            self.book = load_workbook(self.handles.handle)
            self.handles.handle.seek(0)
            self.sheets = {
                name: self.book[name]
                for name in self.book.sheetnames
            }

        else:
            # Create workbook object with default optimized_write=True.
            self.book = Workbook()

            if self.book.worksheets:
                self.book.remove(self.book.worksheets[0])

    def save(self):
        """
        Save workbook to disk.
        """
        self.book.save(self.handles.handle)
        if "r+" in self.mode and not isinstance(self.handles.handle,
                                                mmap.mmap):
            # truncate file to the written content
            self.handles.handle.truncate()

    @classmethod
    def _convert_to_style_kwargs(cls,
                                 style_dict: dict) -> dict[str, Serialisable]:
        """
        Convert a style_dict to a set of kwargs suitable for initializing
        or updating-on-copy an openpyxl v2 style object.

        Parameters
        ----------
        style_dict : dict
            A dict with zero or more of the following keys (or their synonyms).
                'font'
                'fill'
                'border' ('borders')
                'alignment'
                'number_format'
                'protection'

        Returns
        -------
        style_kwargs : dict
            A dict with the same, normalized keys as ``style_dict`` but each
            value has been replaced with a native openpyxl style object of the
            appropriate class.
        """
        _style_key_map = {"borders": "border"}

        style_kwargs: dict[str, Serialisable] = {}
        for k, v in style_dict.items():
            if k in _style_key_map:
                k = _style_key_map[k]
            _conv_to_x = getattr(cls, f"_convert_to_{k}", lambda x: None)
            new_v = _conv_to_x(v)
            if new_v:
                style_kwargs[k] = new_v

        return style_kwargs

    @classmethod
    def _convert_to_color(cls, color_spec):
        """
        Convert ``color_spec`` to an openpyxl v2 Color object.

        Parameters
        ----------
        color_spec : str, dict
            A 32-bit ARGB hex string, or a dict with zero or more of the
            following keys.
                'rgb'
                'indexed'
                'auto'
                'theme'
                'tint'
                'index'
                'type'

        Returns
        -------
        color : openpyxl.styles.Color
        """
        from openpyxl.styles import Color

        if isinstance(color_spec, str):
            return Color(color_spec)
        else:
            return Color(**color_spec)

    @classmethod
    def _convert_to_font(cls, font_dict):
        """
        Convert ``font_dict`` to an openpyxl v2 Font object.

        Parameters
        ----------
        font_dict : dict
            A dict with zero or more of the following keys (or their synonyms).
                'name'
                'size' ('sz')
                'bold' ('b')
                'italic' ('i')
                'underline' ('u')
                'strikethrough' ('strike')
                'color'
                'vertAlign' ('vertalign')
                'charset'
                'scheme'
                'family'
                'outline'
                'shadow'
                'condense'

        Returns
        -------
        font : openpyxl.styles.Font
        """
        from openpyxl.styles import Font

        _font_key_map = {
            "sz": "size",
            "b": "bold",
            "i": "italic",
            "u": "underline",
            "strike": "strikethrough",
            "vertalign": "vertAlign",
        }

        font_kwargs = {}
        for k, v in font_dict.items():
            if k in _font_key_map:
                k = _font_key_map[k]
            if k == "color":
                v = cls._convert_to_color(v)
            font_kwargs[k] = v

        return Font(**font_kwargs)

    @classmethod
    def _convert_to_stop(cls, stop_seq):
        """
        Convert ``stop_seq`` to a list of openpyxl v2 Color objects,
        suitable for initializing the ``GradientFill`` ``stop`` parameter.

        Parameters
        ----------
        stop_seq : iterable
            An iterable that yields objects suitable for consumption by
            ``_convert_to_color``.

        Returns
        -------
        stop : list of openpyxl.styles.Color
        """
        return map(cls._convert_to_color, stop_seq)

    @classmethod
    def _convert_to_fill(cls, fill_dict):
        """
        Convert ``fill_dict`` to an openpyxl v2 Fill object.

        Parameters
        ----------
        fill_dict : dict
            A dict with one or more of the following keys (or their synonyms),
                'fill_type' ('patternType', 'patterntype')
                'start_color' ('fgColor', 'fgcolor')
                'end_color' ('bgColor', 'bgcolor')
            or one or more of the following keys (or their synonyms).
                'type' ('fill_type')
                'degree'
                'left'
                'right'
                'top'
                'bottom'
                'stop'

        Returns
        -------
        fill : openpyxl.styles.Fill
        """
        from openpyxl.styles import (
            GradientFill,
            PatternFill,
        )

        _pattern_fill_key_map = {
            "patternType": "fill_type",
            "patterntype": "fill_type",
            "fgColor": "start_color",
            "fgcolor": "start_color",
            "bgColor": "end_color",
            "bgcolor": "end_color",
        }

        _gradient_fill_key_map = {"fill_type": "type"}

        pfill_kwargs = {}
        gfill_kwargs = {}
        for k, v in fill_dict.items():
            pk = gk = None
            if k in _pattern_fill_key_map:
                pk = _pattern_fill_key_map[k]
            if k in _gradient_fill_key_map:
                gk = _gradient_fill_key_map[k]
            if pk in ["start_color", "end_color"]:
                v = cls._convert_to_color(v)
            if gk == "stop":
                v = cls._convert_to_stop(v)
            if pk:
                pfill_kwargs[pk] = v
            elif gk:
                gfill_kwargs[gk] = v
            else:
                pfill_kwargs[k] = v
                gfill_kwargs[k] = v

        try:
            return PatternFill(**pfill_kwargs)
        except TypeError:
            return GradientFill(**gfill_kwargs)

    @classmethod
    def _convert_to_side(cls, side_spec):
        """
        Convert ``side_spec`` to an openpyxl v2 Side object.

        Parameters
        ----------
        side_spec : str, dict
            A string specifying the border style, or a dict with zero or more
            of the following keys (or their synonyms).
                'style' ('border_style')
                'color'

        Returns
        -------
        side : openpyxl.styles.Side
        """
        from openpyxl.styles import Side

        _side_key_map = {"border_style": "style"}

        if isinstance(side_spec, str):
            return Side(style=side_spec)

        side_kwargs = {}
        for k, v in side_spec.items():
            if k in _side_key_map:
                k = _side_key_map[k]
            if k == "color":
                v = cls._convert_to_color(v)
            side_kwargs[k] = v

        return Side(**side_kwargs)

    @classmethod
    def _convert_to_border(cls, border_dict):
        """
        Convert ``border_dict`` to an openpyxl v2 Border object.

        Parameters
        ----------
        border_dict : dict
            A dict with zero or more of the following keys (or their synonyms).
                'left'
                'right'
                'top'
                'bottom'
                'diagonal'
                'diagonal_direction'
                'vertical'
                'horizontal'
                'diagonalUp' ('diagonalup')
                'diagonalDown' ('diagonaldown')
                'outline'

        Returns
        -------
        border : openpyxl.styles.Border
        """
        from openpyxl.styles import Border

        _border_key_map = {
            "diagonalup": "diagonalUp",
            "diagonaldown": "diagonalDown"
        }

        border_kwargs = {}
        for k, v in border_dict.items():
            if k in _border_key_map:
                k = _border_key_map[k]
            if k == "color":
                v = cls._convert_to_color(v)
            if k in ["left", "right", "top", "bottom", "diagonal"]:
                v = cls._convert_to_side(v)
            border_kwargs[k] = v

        return Border(**border_kwargs)

    @classmethod
    def _convert_to_alignment(cls, alignment_dict):
        """
        Convert ``alignment_dict`` to an openpyxl v2 Alignment object.

        Parameters
        ----------
        alignment_dict : dict
            A dict with zero or more of the following keys (or their synonyms).
                'horizontal'
                'vertical'
                'text_rotation'
                'wrap_text'
                'shrink_to_fit'
                'indent'
        Returns
        -------
        alignment : openpyxl.styles.Alignment
        """
        from openpyxl.styles import Alignment

        return Alignment(**alignment_dict)

    @classmethod
    def _convert_to_number_format(cls, number_format_dict):
        """
        Convert ``number_format_dict`` to an openpyxl v2.1.0 number format
        initializer.

        Parameters
        ----------
        number_format_dict : dict
            A dict with zero or more of the following keys.
                'format_code' : str

        Returns
        -------
        number_format : str
        """
        return number_format_dict["format_code"]

    @classmethod
    def _convert_to_protection(cls, protection_dict):
        """
        Convert ``protection_dict`` to an openpyxl v2 Protection object.

        Parameters
        ----------
        protection_dict : dict
            A dict with zero or more of the following keys.
                'locked'
                'hidden'

        Returns
        -------
        """
        from openpyxl.styles import Protection

        return Protection(**protection_dict)

    def write_cells(self,
                    cells,
                    sheet_name=None,
                    startrow=0,
                    startcol=0,
                    freeze_panes=None):
        # Write the frame cells using openpyxl.
        sheet_name = self._get_sheet_name(sheet_name)

        _style_cache: dict[str, dict[str, Serialisable]] = {}

        if sheet_name in self.sheets and self.if_sheet_exists != "new":
            if "r+" in self.mode:
                if self.if_sheet_exists == "replace":
                    old_wks = self.sheets[sheet_name]
                    target_index = self.book.index(old_wks)
                    del self.book[sheet_name]
                    wks = self.book.create_sheet(sheet_name, target_index)
                    self.sheets[sheet_name] = wks
                elif self.if_sheet_exists == "error":
                    raise ValueError(
                        f"Sheet '{sheet_name}' already exists and "
                        f"if_sheet_exists is set to 'error'.")
                else:
                    raise ValueError(
                        f"'{self.if_sheet_exists}' is not valid for if_sheet_exists. "
                        "Valid options are 'error', 'new' and 'replace'.")
            else:
                wks = self.sheets[sheet_name]
        else:
            wks = self.book.create_sheet()
            wks.title = sheet_name
            self.sheets[sheet_name] = wks

        if validate_freeze_panes(freeze_panes):
            wks.freeze_panes = wks.cell(row=freeze_panes[0] + 1,
                                        column=freeze_panes[1] + 1)

        for cell in cells:
            xcell = wks.cell(row=startrow + cell.row + 1,
                             column=startcol + cell.col + 1)
            xcell.value, fmt = self._value_with_fmt(cell.val)
            if fmt:
                xcell.number_format = fmt

            style_kwargs: dict[str, Serialisable] | None = {}
            if cell.style:
                key = str(cell.style)
                style_kwargs = _style_cache.get(key)
                if style_kwargs is None:
                    style_kwargs = self._convert_to_style_kwargs(cell.style)
                    _style_cache[key] = style_kwargs

            if style_kwargs:
                for k, v in style_kwargs.items():
                    setattr(xcell, k, v)

            if cell.mergestart is not None and cell.mergeend is not None:

                wks.merge_cells(
                    start_row=startrow + cell.row + 1,
                    start_column=startcol + cell.col + 1,
                    end_column=startcol + cell.mergeend + 1,
                    end_row=startrow + cell.mergestart + 1,
                )

                # When cells are merged only the top-left cell is preserved
                # The behaviour of the other cells in a merged range is
                # undefined
                if style_kwargs:
                    first_row = startrow + cell.row + 1
                    last_row = startrow + cell.mergestart + 1
                    first_col = startcol + cell.col + 1
                    last_col = startcol + cell.mergeend + 1

                    for row in range(first_row, last_row + 1):
                        for col in range(first_col, last_col + 1):
                            if row == first_row and col == first_col:
                                # Ignore first cell. It is already handled.
                                continue
                            xcell = wks.cell(column=col, row=row)
                            for k, v in style_kwargs.items():
                                setattr(xcell, k, v)
Example #58
0
def test_iter(Workbook):
    wb = Workbook()
    for i, ws in enumerate(wb):
        pass
    assert i == 0
    assert ws.title == "Sheet"
Example #59
0
class boss_crawler:
    def __init__(self):
        self.headers = {
            'Accept':
            'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
            'Accept-Encoding':
            'gzip,deflate,br',
            'Accept-Language':
            'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2',
            'Connecion':
            'keep-alive',
            #'Cookie':'__a=32544699.1550979725.1550979725.1550979732.57.2.56.57; Hm_lvt_194df3105ad7148dcf2b98a91b5e727a=1550979723,1552831132,1552906964; lastCity=101010100; _uab_collina=155283113440544880091396; __g=-; __l=r=&l=%2Fsao.zhipin.com%2F; Hm_lpvt_194df3105ad7148dcf2b98a91b5e727a=1552907207; __c=1550979732; JSESSIONID=""',
            'Host':
            'www.zhipin.com',
            'Referer':
            'https://www.zhipin.com/c101010100/',
            'User-Agent':
            'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:65.0) Gecko/20100101 Firefox/65.0'
        }  # cookie会失效的,需要定期更换
        self.keyword = ['java']
        self.outwb = Workbook()

    def getHtml(self, url):
        time.sleep(2)
        response = request('get', url, headers=self.headers)
        #html = json.loads(response.text)
        html = response.text
        return html

    def getExcel(self, careerName):
        wo = self.outwb.active
        careerSheet = self.outwb.create_sheet(careerName)
        careerSheet.append(
            ['公司', '规模低', '规模顶', '职位', '最低工作经验', '学历', '底薪', '顶薪', '发布时间'])
        return careerSheet

    def saveExcel(self):
        self.outwb.save(
            "E:\DataAnalysis\\tools\python3\project\money_analysis\JodFinding\work.xlsx"
        )

    def getNum(self, str):
        pattern = re.compile(r'\d+')
        result = pattern.findall(str)
        return result

    def getSchool(self, str):
        p = re.compile(r'[年]+')
        p_ = re.compile(r'[限]+')
        p__ = re.compile(r'[生]+')
        list1 = p.split(str)
        list2 = p_.split(str)
        list3 = p__.split(str)
        if len(list1) > 1:
            print(list1)
            return list1[1]
        elif len(list2) > 1:
            print(list2)
            return list2[1]
        else:
            try:
                print(list3)
                return list3[1]
            except:
                print('error')

    def get_SaveMessage(self, careerSheet, soup):
        joblist = soup.find_all('div', {'class': 'job-primary'})
        for item in joblist:
            company = item.find('div', {
                'class': 'info-company'
            }).find('a').get_text()
            big = item.find('div', {
                'class': 'info-company'
            }).find('p').get_text()
            job = item.find('div', {
                'class': 'info-primary'
            }).find('div', {
                'class': 'job-title'
            }).get_text()
            graduate = item.find('div', {'class': 'info-primary'}).p.get_text()
            money = item.find('div', {
                'class': 'info-primary'
            }).find('span').get_text()
            time = item.find('div', {
                'class': 'info-publis'
            }).find('p').get_text()
            if len(self.getNum(big)) > 1:
                if len(self.getNum(graduate)) > 0:
                    careerSheet.append([
                        company,
                        self.getNum(big)[0],
                        self.getNum(big)[1], job,
                        self.getNum(graduate)[0],
                        self.getSchool(graduate),
                        self.getNum(money)[0],
                        self.getNum(money)[1], time
                    ])
                else:
                    careerSheet.append([
                        company,
                        self.getNum(big)[0],
                        self.getNum(big)[1], job, '无',
                        self.getSchool(graduate),
                        self.getNum(money)[0],
                        self.getNum(money)[1], time
                    ])
            else:
                if len(self.getNum(graduate)) > 0:
                    careerSheet.append([
                        company,
                        self.getNum(big)[0], '以上', job,
                        self.getNum(graduate)[0],
                        self.getSchool(graduate),
                        self.getNum(money)[0],
                        self.getNum(money)[1], time
                    ])
                else:
                    careerSheet.append([
                        company,
                        self.getNum(big)[0], '以上', job, '无',
                        self.getSchool(graduate),
                        self.getNum(money)[0],
                        self.getNum(money)[1], time
                    ])

    def startCrawl(self):
        for i in range(len(self.keyword)):  #按关键字进行爬取
            page = 0
            careerSheet = self.getExcel(self.keyword[i])
            url = 'https://www.zhipin.com/c101010100/?query=%s&page=%d&ka=page-%d' % (
                self.keyword[i], page, page)  #&period=3 是近7天招聘的意思
            soup = BeautifulSoup(self.getHtml(url), 'lxml')
            self.get_SaveMessage(careerSheet, soup)
            while soup.find('a', {'class': 'next'}):  #如果有下一页
                if soup.find('a', {'class': 'next disabled'}):
                    break
                page = page + 1
                url = 'https://www.zhipin.com/c101010100/?query=%s&page=%d&ka=page-%d' % (
                    self.keyword[i], page, page)
                soup = BeautifulSoup(self.getHtml(url), 'lxml')
                self.get_SaveMessage(careerSheet, soup)
            print(self.keyword[i] + 'is OK')
        self.saveExcel()
Example #60
0
def test_delitem(Workbook):
    wb = Workbook()
    del wb['Sheet']
    assert wb.worksheets == []