Ejemplo n.º 1
0
def test_open_too_many_files():
    test_filename = _get_test_filename()
    wb = Workbook(optimized_write=True)
    for i in range(200): # over 200 worksheets should raise an OSError ('too many open files')
        wb.create_sheet()
    wb.save(test_filename)
    os.remove(test_filename)
def process_dir(data_dir_path, output_file_path):
    
    # crate a new workbook and sheets
    wb_new = Workbook()
    ws_deliveries = wb_new.get_active_sheet()
    ws_deliveries.title = 'Deliveries'
    ws_returns = wb_new.create_sheet(1)
    ws_returns.title = 'Returns'
    ws_wastage = wb_new.create_sheet(2)
    ws_wastage.title = 'Wastage'
    ws_staff_meals = wb_new.create_sheet(3)
    ws_staff_meals.title = 'Staff Meals'
    ws_transfers_in = wb_new.create_sheet(4)
    ws_transfers_in.title = 'Transfers In'
    ws_transfers_out = wb_new.create_sheet(5)
    ws_transfers_out.title = 'Transfers Out'
    
    # get the list of files in the directory
    onlyfiles = [ f for f in listdir(data_dir_path) if isfile(join(data_dir_path,f))]

    # process each file
    for f in onlyfiles:
        process_file(data_dir_path + f, wb_new)

    # save the new workbook
    wb_new.save(output_file_path)
def test_get_sheet_names():
    wb = Workbook()
    names = ['Sheet', 'Sheet1', 'Sheet2', 'Sheet3', 'Sheet4', 'Sheet5']
    for count in range(5):
        wb.create_sheet(0)
    actual_names = wb.get_sheet_names()
    assert sorted(actual_names) == sorted(names)
Ejemplo n.º 4
0
def test_get_sheet_names():
    wb = Workbook()
    names = ["Sheet", "Sheet1", "Sheet2", "Sheet3", "Sheet4", "Sheet5"]
    for count in range(5):
        wb.create_sheet(0)
    actual_names = wb.get_sheet_names()
    eq_(sorted(actual_names), sorted(names))
Ejemplo n.º 5
0
def createReport(conn,series,dateList):
    #新建一个workbook
    wb = Workbook()
    #默认sheet"报表说明"
    sheet = wb.worksheets[0]
    sheet.title = u'报表说明'
    #生成"报表说明"内容
    createReportSpecification(sheet, conn, series, dateList)
    #创建sheet"网站经销商数量变化分析"
    sheet = wb.create_sheet(u'网站经销商数量变化分析', 1)
    #生成"网站经销商数量变化分析"内容
    createAnalysisOfWebsiteDealer(sheet, conn, series, dateList)
    #创建sheet"网站报价均值分析"
    sheet = wb.create_sheet(u'网站报价均值分析', 2)
    #生成"网站报价均值分析"内容
    createAnalysisOfNetworkOffer(sheet, conn, series, dateList)
    #创建sheet"大区报价分析"
    sheet = wb.create_sheet(u'大区报价分析', 3)
    #生成"大区报价分析"内容
    createAnalysisOfAreaPrice(sheet, conn, series, dateList)
    #创建sheet"省份报价分析"
    sheet = wb.create_sheet(u'省份报价分析', 4)
    #生成"省份报价分析"内容
    createTableAnalysisOfProvincesOffer(sheet, conn, series, dateList)
    #创建sheet"报价详细"
    sheet = wb.create_sheet(u'报价详细', 5)
    #生成"报价详细"内容
    createTableAnalysisOfDetailedQuotation(sheet, conn, series, dateList)
    file_dir = settings.filePath+'\\report\\'+series.encode('gbk')+'-报价日报_'.encode('gbk')+\
               dateList[len(dateList)-1].strftime('%Y-%m-%d')+'.xlsx'
    #保存文件
    wb.save(file_dir)
Ejemplo n.º 6
0
def test_write_content_types():
    wb = Workbook()
    wb.create_sheet()
    wb.create_sheet()
    content = write_content_types(wb)
    reference_file = os.path.join(DATADIR, 'writer', 'expected',
            '[Content_Types].xml')
    assert_equals_file_content(reference_file, content)
Ejemplo n.º 7
0
 def test_write_properties_app(self):
     wb = Workbook()
     wb.create_sheet()
     wb.create_sheet()
     content = write_properties_app(wb)
     assert_equals_file_content(
             os.path.join(DATADIR, 'writer', 'expected', 'app.xml'),
             content)
Ejemplo n.º 8
0
def FacebookProfiles(path=None , analysisid=None, delim=',', quote ='"',num_to_str=False) :
    
    IDCOL=0
    sheet_index = 0 
    if path[len(path)-1] != '/' : path+='/'
    profile_file_obj='ua_fb_user.csv'
    comment_net_files = get_comment_net_csvs(path)
    id_user_map=user_map(path+profile_file_obj)
    
    #create workbook
    workbook = Workbook()
    
    #add profiles sheet
    sheet = workbook.create_sheet(sheet_index)
    sheet_index+=1
    sheet.title="All_Users"
    for row_index, row in enumerate(
        csv.reader(open(path+profile_file_obj), delimiter=delim, quotechar = quote)):
        for col_index, col in enumerate(row):
            if col_index == IDCOL :
                col = str(col)  + "_"
            sheet.cell(row = row_index, column = col_index).value = col    
    
    #add network metrics sheet(s)
    for netfile in comment_net_files :
        uid=-1
        lastind=0
        sheet = workbook.create_sheet(sheet_index)
        sheet_index+=1
        #verify function call on sheet object
        sheet.title = netfile.replace(".csv","").replace("fb_comment_net","").strip("_")
        reader=csv.reader(open(path+netfile), delimiter=delim, quotechar = quote)
        
        print 'writing network stats...'
        for row_index, row in enumerate(
            reader):
            
            if row_index == 0 :
                sheet.cell(row = row_index, column = 0 ).value  = 'node_id'
                sheet.cell(row = row_index, column = 1 ).value  = 'name'
                for col_index in range(1,len(row)) :
                    sheet.cell(row = row_index, column = col_index + 1 ).value = row[col_index]
                sheet.cell(row = row_index, column = len(row) + 1).value  = 'link'
            else :
                for col_index, col in enumerate(row):
                    if col_index == IDCOL :
                        col_ = col + "_"  # (for print out)
                        uid = col
                        
                        sheet.cell(row = row_index, column = col_index ).value = col_
                        sheet.cell(row = row_index, column = col_index + 1 ).value = id_user_map[col]
                    else :
                        sheet.cell(row = row_index, column = col_index + 1).value = col
                        lastind = col_index + 1
                sheet.cell(row = row_index, column = lastind + 1  ).value = "http://facebook.com/" + uid
    #save workbook
    print 'writing output file ' + path + "fb_comment_network_stats" + "_" + analysisid + ".xlsx"
    workbook.save(open(path + ("fb_comment_network_stats" + "_" + analysisid ).upper()+ ".xlsx",'w') )
Ejemplo n.º 9
0
def test_write_content_types(datadir):
    datadir.chdir()
    wb = Workbook()
    wb.create_sheet()
    wb.create_sheet()
    content = write_content_types(wb)
    with open('[Content_Types].xml') as expected:
        diff = compare_xml(content, expected.read())
        assert diff is None, diff
Ejemplo n.º 10
0
def test_write_properties_app(datadir):
    datadir.join("writer").chdir()
    wb = Workbook()
    wb.create_sheet()
    wb.create_sheet()
    content = write_properties_app(wb)
    with open('app.xml') as expected:
        diff = compare_xml(content, expected.read())
        assert diff is None, diff
Ejemplo n.º 11
0
 def test_write_properties_app(self):
     wb = Workbook()
     wb.create_sheet()
     wb.create_sheet()
     content = write_properties_app(wb)
     reference_file = os.path.join(DATADIR, "writer", "expected", "app.xml")
     with open(reference_file) as expected:
         diff = compare_xml(content, expected.read())
         assert diff is None
Ejemplo n.º 12
0
def test_write_content_types():
    wb = Workbook()
    wb.create_sheet()
    wb.create_sheet()
    content = write_content_types(wb)
    reference_file = os.path.join(DATADIR, 'writer', 'expected',
            '[Content_Types].xml')
    with open(reference_file) as expected:
        diff = compare_xml(content, expected.read())
        assert diff is None, diff
def write_DataFrame(dfs, folder, filename, meta_info = [], missing = None):
    #dfs = list[info]
    #info = {'df': df, 'sheetname': sheetname, 'cols' = []}, sheet and cols optional
    writer = Workbook(optimized_write = True)
    if len(meta_info) > 0:
        ws = writer.create_sheet(title = 'read me')
        for row in meta_info:
            for i in range(len(row)):
                row[i] = str(row[i])
            ws.append(row)
    for info in dfs:
        if 'sheetname' not in info:
            ws = writer.create_sheet(title = 'Sheet' + str(len(writer.worksheets)+1))
        else:
            ws = writer.create_sheet(title = info['sheetname'])
        if 'cols' not in info:
            cols = list(info['df'].axes[1])
        else:
            cols = info['cols']
        for i in range(len(cols)):
            cols[i] = str(cols[i])
        cols_copy = deepcopy(cols)
        for i in range(len(cols)):
            if type(cols[i]) in [str, unicode]:
                cols_copy[i] = killgremlins(cols[i])
        ws.append(cols_copy)
        try:
            info['df'] = info['df'].reset_index()
        except:
            pass
        #info['df'] = info['df'].reset_index()
        for i in range(len(info['df'])):
            row = list(info['df'].iloc[i][cols])
            for j in range(len(row)):
                try:
                    row[j] = killgremlins(row[j])
                except:
                    pass
                if type(row[j]) in [float, int64, float64] and isnan(row[j]):
                    row[j] = missing
                if type(row[j]) in [str, unicode]:
                   row[j] = killgremlins(row[j])
                elif type(row[j]) == bool_:
                    if row[j]:
                        row[j] = 'TRUE'
                    else:
                        row[j] = 'FALSE'
                elif type(row[j]) == int64:
                    row[j] = float(row[j])
                elif type(row[j]) in [tuple, list]:
                    row[j] = str(row[j])
            ws.append(row)
        ws.freeze_panes = 'A2'
    writer.save(os.path.join(folder, filename))
Ejemplo n.º 14
0
def test_write_hidden_worksheet():
    wb = Workbook()
    ws = wb.create_sheet()
    ws.sheet_state = ws.SHEETSTATE_HIDDEN
    ws.cell("F42").value = "hello"
    content = write_worksheet(ws, {"hello": 0}, {})
    assert_equals_file_content(os.path.join(DATADIR, "writer", "expected", "sheet1.xml"), content)
Ejemplo n.º 15
0
def test_write_height():
    wb = Workbook()
    ws = wb.create_sheet()
    ws.cell("F1").value = 10
    ws.row_dimensions[ws.cell("F1").row].height = 30
    content = write_worksheet(ws, {}, {})
    assert_equals_file_content(os.path.join(DATADIR, "writer", "expected", "sheet1_height.xml"), content)
Ejemplo n.º 16
0
def test_hyperlink_value():
    wb = Workbook()
    ws = wb.create_sheet()
    ws.cell("A1").hyperlink = "http://test.com"
    eq_("http://test.com", ws.cell("A1").value)
    ws.cell("A1").value = "test"
    eq_("test", ws.cell("A1").value)
Ejemplo n.º 17
0
def test_write_hyperlink():
    wb = Workbook()
    ws = wb.create_sheet()
    ws.cell("A1").value = "test"
    ws.cell("A1").hyperlink = "http://test.com"
    content = write_worksheet(ws, {"test": 0}, {})
    assert_equals_file_content(os.path.join(DATADIR, "writer", "expected", "sheet1_hyperlink.xml"), content)
Ejemplo n.º 18
0
def test_write_worksheet():
    wb = Workbook()
    ws = wb.create_sheet()
    ws.cell('F42').value = 'hello'
    content = write_worksheet(ws, {'hello': 0}, {})
    assert_equals_file_content(os.path.join(DATADIR, 'writer', 'expected', \
            'sheet1.xml'), content)
Ejemplo n.º 19
0
def test_write_style():
    wb = Workbook()
    ws = wb.create_sheet()
    ws.cell("F1").value = "13%"
    style_id_by_hash = StyleWriter(wb).get_style_by_hash()
    content = write_worksheet(ws, {}, style_id_by_hash)
    assert_equals_file_content(os.path.join(DATADIR, "writer", "expected", "sheet1_style.xml"), content)
Ejemplo n.º 20
0
def test_decimal():
    wb = Workbook()
    ws = wb.create_sheet()
    ws.cell('A1').value = decimal.Decimal('3.14')
    content = write_worksheet(ws, {}, {})
    assert_equals_file_content(os.path.join(DATADIR, 'writer', 'expected', \
            'decimal.xml'), content)
Ejemplo n.º 21
0
def test_short_number():
    wb = Workbook()
    ws = wb.create_sheet()
    ws.cell('A1').value = 1234567890
    content = write_worksheet(ws, {}, {})
    assert_equals_file_content(os.path.join(DATADIR, 'writer', 'expected', \
            'short_number.xml'), content)
Ejemplo n.º 22
0
def test_get_sheet_by_name():
    wb = Workbook()
    new_sheet = wb.create_sheet()
    title = "my sheet"
    new_sheet.title = title
    found_sheet = wb.get_sheet_by_name(title)
    eq_(new_sheet, found_sheet)
Ejemplo n.º 23
0
def test_add_named_range():
    wb = Workbook()
    new_sheet = wb.create_sheet()
    named_range = NamedRange("test_nr", [(new_sheet, "A1")])
    wb.add_named_range(named_range)
    named_ranges_list = wb.get_named_ranges()
    assert named_range in named_ranges_list
Ejemplo n.º 24
0
def test_freeze_panes_both():
    wb = Workbook()
    ws = wb.create_sheet()
    ws.cell("F42").value = "hello"
    ws.freeze_panes = "D4"
    content = write_worksheet(ws, {"hello": 0}, {})
    assert_equals_file_content(os.path.join(DATADIR, "writer", "expected", "sheet1_freeze_panes_both.xml"), content)
Ejemplo n.º 25
0
def test_get_named_range():
    wb = Workbook()
    new_sheet = wb.create_sheet()
    named_range = NamedRange("test_nr", [(new_sheet, "A1")])
    wb.add_named_range(named_range)
    found_named_range = wb.get_named_range("test_nr")
    eq_(named_range, found_named_range)
Ejemplo n.º 26
0
def test_hyperlink_value():
    wb = Workbook()
    ws = wb.create_sheet()
    ws.cell('A1').hyperlink = "http://test.com"
    assert "http://test.com" == ws.cell('A1').value
    ws.cell('A1').value = "test"
    assert "test" == ws.cell('A1').value
def test_get_named_range():
    wb = Workbook()
    new_sheet = wb.create_sheet()
    named_range = NamedRange('test_nr', [(new_sheet, 'A1')])
    wb.add_named_range(named_range)
    found_named_range = wb.get_named_range('test_nr')
    assert named_range == found_named_range
Ejemplo n.º 28
0
class XLSXRenderer(Renderer):
    def __init__(self, renderers_func, config):
        if not has_openpyxl:
            debug.error("You must install OpenPyxl 2.1.2 for xlsx format:\n\thttps://pypi.python.org/pypi/openpyxl")
        self._config = config
        self._columns = None
        self._text_cell_renderers_func = renderers_func
        self._text_cell_renderers = None
        self._wb = Workbook(optimized_write=True)
        self._ws = self._wb.create_sheet()

    def description(self):
        output = []
        for column in self._columns:
            output.append((column.name))
        return output

    def _add_row(self, node, data):
        accumulator = data
        accumulator[node] = max(accumulator.values()) + 1
        self._ws.append(list(node.values))
        return accumulator

    def render(self, outfd, grid):
        """Renders the TreeGrid in data out to the output file from the config options"""
        if not self._config.OUTPUT_FILE:
            debug.error("Please specify a valid output file using --output-file")
        self._columns = grid.columns
        self._text_cell_renderers = self._text_cell_renderers_func(self._columns)
        self._ws.append(self.description())
        grid.visit(None, self._add_row, {None: 0})
        self._wb.save(filename=self._config.OUTPUT_FILE)
Ejemplo n.º 29
0
def test_write_bool():
    wb = Workbook()
    ws = wb.create_sheet()
    ws.cell("F42").value = False
    ws.cell("F43").value = True
    content = write_worksheet(ws, {}, {})
    assert_equals_file_content(os.path.join(DATADIR, "writer", "expected", "sheet1_bool.xml"), content)
def test_get_sheet_by_name():
    wb = Workbook()
    new_sheet = wb.create_sheet()
    title = 'my sheet'
    new_sheet.title = title
    found_sheet = wb.get_sheet_by_name(title)
    assert new_sheet == found_sheet
Ejemplo n.º 31
0
def test_create_sheet_with_name():
    wb = Workbook()
    new_sheet = wb.create_sheet(0, title='LikeThisName')
    eq_(new_sheet, wb.worksheets[0])
Ejemplo n.º 32
0
def test_add_correct_sheet():
    wb = Workbook()
    new_sheet = wb.create_sheet(0)
    wb.add_sheet(new_sheet)
    eq_(new_sheet, wb.worksheets[2])
Ejemplo n.º 33
0
def test_create_sheet_readonly():
    wb = Workbook()
    wb._set_optimized_read()
    wb.create_sheet()
Ejemplo n.º 34
0
#encoding=utf-8
import os
import json
from openpyxl.workbook import Workbook

List = os.listdir(os.getcwd())
NewList = filter(lambda x: "." not in x, List)

wb = Workbook()
sheet = wb.create_sheet("Summary")
sheet = wb["Sheet"]
wb.remove(sheet)
SumTitleLine = ["No", "Name", "Status", "Owner"]
sheet = wb["Summary"]
for index, rowValue in enumerate(["SumTitleLine"] + NewList):
    if index == 0:
        sheet.append(SumTitleLine)
    else:
        rowContent = [str(index), rowValue]
        sheet.append(rowContent)
sheet.column_dimensions['B'].width = 40
wb.save("OWASP_Scan_Staus.xlsx")
Ejemplo n.º 35
0
def test_remove_sheet():
    wb = Workbook()
    new_sheet = wb.create_sheet(0)
    wb.remove(new_sheet)
    assert new_sheet not in wb.worksheets
Ejemplo n.º 36
0
def test_create_sheet():
    wb = Workbook()
    new_sheet = wb.create_sheet(0)
    eq_(new_sheet, wb.worksheets[0])
Ejemplo n.º 37
0
 def dump_workbook():
     wb = Workbook(optimized_write=True)
     ws = wb.create_sheet()
     ws.append(range(30))
     wb.save(filename=BytesIO())
Ejemplo n.º 38
0
"""

import argparse
import csv
import sys
from openpyxl.workbook import Workbook

parser = argparse.ArgumentParser()
parser.add_argument("input_file")
parser.add_argument("-d",
                    "--delimiter",
                    default="\t",
                    help="select delimiter character")
args = parser.parse_args()

if not ".csv" in args.input_file:
    sys.stderr.write("Error: File does not have the ending \".csv\".\n")
    sys.exit(2)

input_fh = open(args.input_file)
workbook = Workbook()
sheet = workbook.create_sheet(0)

for row_index, row in enumerate(
        csv.reader(open(args.input_file), delimiter=args.delimiter)):
    for col_index, col in enumerate(row):
        sheet.cell(row=row_index, column=col_index).value = col

workbook.save(open(args.input_file.replace(".csv", ".xlsx"), "w"))
Ejemplo n.º 39
0
def test_get_index():
    wb = Workbook()
    new_sheet = wb.create_sheet()
    sheet_index = wb.get_index(new_sheet)
    assert sheet_index == 1
Ejemplo n.º 40
0
def test_get_index():
    wb = Workbook()
    new_sheet = wb.create_sheet(0)
Ejemplo n.º 41
0
class TestStyleWriter(object):
    def setUp(self):

        self.workbook = Workbook()
        self.worksheet = self.workbook.create_sheet()

    def test_no_style(self):

        w = StyleWriter(self.workbook)
        eq_(0, len(w.style_table))

    def test_nb_style(self):

        for i in range(1, 6):
            self.worksheet.cell(row=1, column=i).style.font.size += i
        w = StyleWriter(self.workbook)
        eq_(5, len(w.style_table))

        self.worksheet.cell('A10').style.borders.top = Border.BORDER_THIN
        w = StyleWriter(self.workbook)
        eq_(6, len(w.style_table))

    def test_style_unicity(self):

        for i in range(1, 6):
            self.worksheet.cell(row=1, column=i).style.font.bold = True
        w = StyleWriter(self.workbook)
        eq_(1, len(w.style_table))

    def test_fonts(self):

        self.worksheet.cell('A1').style.font.size = 12
        self.worksheet.cell('A1').style.font.bold = True
        w = StyleWriter(self.workbook)
        w._write_fonts()
        eq_(
            get_xml(w._root),
            '<?xml version=\'1.0\' encoding=\'UTF-8\'?><styleSheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main"><fonts count="2"><font><sz val="11" /><color theme="1" /><name val="Calibri" /><family val="2" /><scheme val="minor" /></font><font><sz val="12" /><color rgb="FF000000" /><name val="Calibri" /><family val="2" /><b /></font></fonts></styleSheet>'
        )

    def test_fonts_with_underline(self):
        self.worksheet.cell('A1').style.font.size = 12
        self.worksheet.cell('A1').style.font.bold = True
        self.worksheet.cell('A1').style.font.underline = Font.UNDERLINE_SINGLE
        w = StyleWriter(self.workbook)
        w._write_fonts()
        eq_(
            get_xml(w._root),
            '<?xml version=\'1.0\' encoding=\'UTF-8\'?><styleSheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main"><fonts count="2"><font><sz val="11" /><color theme="1" /><name val="Calibri" /><family val="2" /><scheme val="minor" /></font><font><sz val="12" /><color rgb="FF000000" /><name val="Calibri" /><family val="2" /><b /><u /></font></fonts></styleSheet>'
        )

    def test_fills(self):

        self.worksheet.cell('A1').style.fill.fill_type = 'solid'
        self.worksheet.cell(
            'A1').style.fill.start_color.index = Color.DARKYELLOW
        w = StyleWriter(self.workbook)
        w._write_fills()
        eq_(
            get_xml(w._root),
            '<?xml version=\'1.0\' encoding=\'UTF-8\'?><styleSheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main"><fills count="3"><fill><patternFill patternType="none" /></fill><fill><patternFill patternType="gray125" /></fill><fill><patternFill patternType="solid"><fgColor rgb="FF808000" /></patternFill></fill></fills></styleSheet>'
        )

    def test_borders(self):

        self.worksheet.cell(
            'A1').style.borders.top.border_style = Border.BORDER_THIN
        self.worksheet.cell(
            'A1').style.borders.top.color.index = Color.DARKYELLOW
        w = StyleWriter(self.workbook)
        w._write_borders()
        eq_(
            get_xml(w._root),
            '<?xml version=\'1.0\' encoding=\'UTF-8\'?><styleSheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main"><borders count="2"><border><left /><right /><top /><bottom /><diagonal /></border><border><left /><right /><top style="thin"><color rgb="FF808000" /></top><bottom /><diagonal /></border></borders></styleSheet>'
        )

    def test_write_cell_xfs_1(self):

        self.worksheet.cell('A1').style.font.size = 12
        w = StyleWriter(self.workbook)
        ft = w._write_fonts()
        nft = w._write_number_formats()
        w._write_cell_xfs(nft, ft, {}, {})
        xml = get_xml(w._root)
        ok_('applyFont="1"' in xml)
        ok_('applyFill="1"' not in xml)
        ok_('applyBorder="1"' not in xml)
        ok_('applyAlignment="1"' not in xml)

    def test_alignment(self):
        self.worksheet.cell('A1').style.alignment.horizontal = 'center'
        self.worksheet.cell('A1').style.alignment.vertical = 'center'
        w = StyleWriter(self.workbook)
        nft = w._write_number_formats()
        w._write_cell_xfs(nft, {}, {}, {})
        xml = get_xml(w._root)
        ok_('applyAlignment="1"' in xml)
        ok_('horizontal="center"' in xml)
        ok_('vertical="center"' in xml)

    def test_alignment_rotation(self):
        self.worksheet.cell('A1').style.alignment.vertical = 'center'
        self.worksheet.cell('A1').style.alignment.text_rotation = 90
        self.worksheet.cell('A2').style.alignment.vertical = 'center'
        self.worksheet.cell('A2').style.alignment.text_rotation = 135
        self.worksheet.cell('A3').style.alignment.text_rotation = -34
        w = StyleWriter(self.workbook)
        nft = w._write_number_formats()
        w._write_cell_xfs(nft, {}, {}, {})
        xml = get_xml(w._root)
        ok_('textRotation="90"' in xml)
        ok_('textRotation="135"' in xml)
        ok_('textRotation="124"' in xml)

    def test_alignment_indent(self):
        self.worksheet.cell('A1').style.alignment.indent = 1
        self.worksheet.cell('A2').style.alignment.indent = 4
        self.worksheet.cell('A3').style.alignment.indent = 0
        self.worksheet.cell('A3').style.alignment.indent = -1
        w = StyleWriter(self.workbook)
        nft = w._write_number_formats()
        w._write_cell_xfs(nft, {}, {}, {})
        xml = get_xml(w._root)
        ok_('indent="1"' in xml)
        ok_('indent="4"' in xml)
        #Indents not greater than zero are ignored when writing
        ok_('indent="0"' not in xml)
        ok_('indent="-1"' not in xml)
Ejemplo n.º 42
0
def test_add_correct_sheet():
    wb = Workbook()
    new_sheet = wb.create_sheet()
    wb._add_sheet(new_sheet)
    assert new_sheet == wb.worksheets[2]
Ejemplo n.º 43
0
def test_create_sheet():
    wb = Workbook()
    new_sheet = wb.create_sheet()
    assert new_sheet == wb.worksheets[-1]
Ejemplo n.º 44
0
def test_create_sheet_readonly():
    wb = Workbook()
    wb._read_only = True
    with pytest.raises(ReadOnlyWorkbookException):
        wb.create_sheet()
Ejemplo n.º 45
0
def test_get_index():
    wb = Workbook()
    new_sheet = wb.create_sheet(0)
    sheet_index = wb.get_index(new_sheet)
    eq_(sheet_index, 0)
Ejemplo n.º 46
0
def test_create_sheet_with_name():
    wb = Workbook()
    new_sheet = wb.create_sheet(title='LikeThisName')
    assert new_sheet == wb.worksheets[-1]
Ejemplo n.º 47
0
def test_get_named_range():
    wb = Workbook()
    new_sheet = wb.create_sheet()
    wb.create_named_range('test_nr', new_sheet, 'A1')
    assert wb.defined_names['test_nr'].value == 'Sheet1!A1'
Ejemplo n.º 48
0
 def test_cannot_copy_writeonly(self):
     wb = Workbook(write_only=True)
     ws = wb.create_sheet()
     with pytest.raises(ValueError):
         wb.copy_worksheet(ws)
Ejemplo n.º 49
0
class _OpenpyxlWriter(ExcelWriter):
    engine = "openpyxl"
    supported_extensions = (".xlsx", ".xlsm")

    def __init__(self, path, engine=None, mode="w", **engine_kwargs):
        # Use the openpyxl module as the Excel writer.
        from openpyxl.workbook import Workbook

        super().__init__(path, mode=mode, **engine_kwargs)

        if self.mode == "a":  # Load from existing workbook
            from openpyxl import load_workbook

            book = load_workbook(self.path)
            self.book = book
        else:
            # Create workbook object with default optimized_write=True.
            self.book = Workbook()

            if self.book.worksheets:
                try:
                    self.book.remove(self.book.worksheets[0])
                except AttributeError:

                    # compat - for openpyxl <= 2.4
                    self.book.remove_sheet(self.book.worksheets[0])

    def save(self):
        """
        Save workbook to disk.
        """
        return self.book.save(self.path)

    @classmethod
    def _convert_to_style(cls, style_dict):
        """
        Converts a style_dict to an openpyxl style object.

        Parameters
        ----------
        style_dict : style dictionary to convert
        """
        from openpyxl.style import Style

        xls_style = Style()
        for key, value in style_dict.items():
            for nk, nv in value.items():
                if key == "borders":
                    (xls_style.borders.__getattribute__(nk).__setattr__(
                        "border_style", nv))
                else:
                    xls_style.__getattribute__(key).__setattr__(nk, nv)

        return xls_style

    @classmethod
    def _convert_to_style_kwargs(cls, style_dict):
        """
        Convert a style_dict to a set of kwargs suitable for initializing
        or updating-on-copy an openpyxl v2 style object.

        Parameters
        ----------
        style_dict : dict
            A dict with zero or more of the following keys (or their synonyms).
                'font'
                'fill'
                'border' ('borders')
                'alignment'
                'number_format'
                'protection'

        Returns
        -------
        style_kwargs : dict
            A dict with the same, normalized keys as ``style_dict`` but each
            value has been replaced with a native openpyxl style object of the
            appropriate class.
        """
        _style_key_map = {"borders": "border"}

        style_kwargs = {}
        for k, v in style_dict.items():
            if k in _style_key_map:
                k = _style_key_map[k]
            _conv_to_x = getattr(cls, f"_convert_to_{k}", lambda x: None)
            new_v = _conv_to_x(v)
            if new_v:
                style_kwargs[k] = new_v

        return style_kwargs

    @classmethod
    def _convert_to_color(cls, color_spec):
        """
        Convert ``color_spec`` to an openpyxl v2 Color object.

        Parameters
        ----------
        color_spec : str, dict
            A 32-bit ARGB hex string, or a dict with zero or more of the
            following keys.
                'rgb'
                'indexed'
                'auto'
                'theme'
                'tint'
                'index'
                'type'

        Returns
        -------
        color : openpyxl.styles.Color
        """
        from openpyxl.styles import Color

        if isinstance(color_spec, str):
            return Color(color_spec)
        else:
            return Color(**color_spec)

    @classmethod
    def _convert_to_font(cls, font_dict):
        """
        Convert ``font_dict`` to an openpyxl v2 Font object.

        Parameters
        ----------
        font_dict : dict
            A dict with zero or more of the following keys (or their synonyms).
                'name'
                'size' ('sz')
                'bold' ('b')
                'italic' ('i')
                'underline' ('u')
                'strikethrough' ('strike')
                'color'
                'vertAlign' ('vertalign')
                'charset'
                'scheme'
                'family'
                'outline'
                'shadow'
                'condense'

        Returns
        -------
        font : openpyxl.styles.Font
        """
        from openpyxl.styles import Font

        _font_key_map = {
            "sz": "size",
            "b": "bold",
            "i": "italic",
            "u": "underline",
            "strike": "strikethrough",
            "vertalign": "vertAlign",
        }

        font_kwargs = {}
        for k, v in font_dict.items():
            if k in _font_key_map:
                k = _font_key_map[k]
            if k == "color":
                v = cls._convert_to_color(v)
            font_kwargs[k] = v

        return Font(**font_kwargs)

    @classmethod
    def _convert_to_stop(cls, stop_seq):
        """
        Convert ``stop_seq`` to a list of openpyxl v2 Color objects,
        suitable for initializing the ``GradientFill`` ``stop`` parameter.

        Parameters
        ----------
        stop_seq : iterable
            An iterable that yields objects suitable for consumption by
            ``_convert_to_color``.

        Returns
        -------
        stop : list of openpyxl.styles.Color
        """
        return map(cls._convert_to_color, stop_seq)

    @classmethod
    def _convert_to_fill(cls, fill_dict):
        """
        Convert ``fill_dict`` to an openpyxl v2 Fill object.

        Parameters
        ----------
        fill_dict : dict
            A dict with one or more of the following keys (or their synonyms),
                'fill_type' ('patternType', 'patterntype')
                'start_color' ('fgColor', 'fgcolor')
                'end_color' ('bgColor', 'bgcolor')
            or one or more of the following keys (or their synonyms).
                'type' ('fill_type')
                'degree'
                'left'
                'right'
                'top'
                'bottom'
                'stop'

        Returns
        -------
        fill : openpyxl.styles.Fill
        """
        from openpyxl.styles import GradientFill, PatternFill

        _pattern_fill_key_map = {
            "patternType": "fill_type",
            "patterntype": "fill_type",
            "fgColor": "start_color",
            "fgcolor": "start_color",
            "bgColor": "end_color",
            "bgcolor": "end_color",
        }

        _gradient_fill_key_map = {"fill_type": "type"}

        pfill_kwargs = {}
        gfill_kwargs = {}
        for k, v in fill_dict.items():
            pk = gk = None
            if k in _pattern_fill_key_map:
                pk = _pattern_fill_key_map[k]
            if k in _gradient_fill_key_map:
                gk = _gradient_fill_key_map[k]
            if pk in ["start_color", "end_color"]:
                v = cls._convert_to_color(v)
            if gk == "stop":
                v = cls._convert_to_stop(v)
            if pk:
                pfill_kwargs[pk] = v
            elif gk:
                gfill_kwargs[gk] = v
            else:
                pfill_kwargs[k] = v
                gfill_kwargs[k] = v

        try:
            return PatternFill(**pfill_kwargs)
        except TypeError:
            return GradientFill(**gfill_kwargs)

    @classmethod
    def _convert_to_side(cls, side_spec):
        """
        Convert ``side_spec`` to an openpyxl v2 Side object.

        Parameters
        ----------
        side_spec : str, dict
            A string specifying the border style, or a dict with zero or more
            of the following keys (or their synonyms).
                'style' ('border_style')
                'color'

        Returns
        -------
        side : openpyxl.styles.Side
        """
        from openpyxl.styles import Side

        _side_key_map = {"border_style": "style"}

        if isinstance(side_spec, str):
            return Side(style=side_spec)

        side_kwargs = {}
        for k, v in side_spec.items():
            if k in _side_key_map:
                k = _side_key_map[k]
            if k == "color":
                v = cls._convert_to_color(v)
            side_kwargs[k] = v

        return Side(**side_kwargs)

    @classmethod
    def _convert_to_border(cls, border_dict):
        """
        Convert ``border_dict`` to an openpyxl v2 Border object.

        Parameters
        ----------
        border_dict : dict
            A dict with zero or more of the following keys (or their synonyms).
                'left'
                'right'
                'top'
                'bottom'
                'diagonal'
                'diagonal_direction'
                'vertical'
                'horizontal'
                'diagonalUp' ('diagonalup')
                'diagonalDown' ('diagonaldown')
                'outline'

        Returns
        -------
        border : openpyxl.styles.Border
        """
        from openpyxl.styles import Border

        _border_key_map = {
            "diagonalup": "diagonalUp",
            "diagonaldown": "diagonalDown"
        }

        border_kwargs = {}
        for k, v in border_dict.items():
            if k in _border_key_map:
                k = _border_key_map[k]
            if k == "color":
                v = cls._convert_to_color(v)
            if k in ["left", "right", "top", "bottom", "diagonal"]:
                v = cls._convert_to_side(v)
            border_kwargs[k] = v

        return Border(**border_kwargs)

    @classmethod
    def _convert_to_alignment(cls, alignment_dict):
        """
        Convert ``alignment_dict`` to an openpyxl v2 Alignment object.

        Parameters
        ----------
        alignment_dict : dict
            A dict with zero or more of the following keys (or their synonyms).
                'horizontal'
                'vertical'
                'text_rotation'
                'wrap_text'
                'shrink_to_fit'
                'indent'
        Returns
        -------
        alignment : openpyxl.styles.Alignment
        """
        from openpyxl.styles import Alignment

        return Alignment(**alignment_dict)

    @classmethod
    def _convert_to_number_format(cls, number_format_dict):
        """
        Convert ``number_format_dict`` to an openpyxl v2.1.0 number format
        initializer.

        Parameters
        ----------
        number_format_dict : dict
            A dict with zero or more of the following keys.
                'format_code' : str

        Returns
        -------
        number_format : str
        """
        return number_format_dict["format_code"]

    @classmethod
    def _convert_to_protection(cls, protection_dict):
        """
        Convert ``protection_dict`` to an openpyxl v2 Protection object.

        Parameters
        ----------
        protection_dict : dict
            A dict with zero or more of the following keys.
                'locked'
                'hidden'

        Returns
        -------
        """
        from openpyxl.styles import Protection

        return Protection(**protection_dict)

    def write_cells(self,
                    cells,
                    sheet_name=None,
                    startrow=0,
                    startcol=0,
                    freeze_panes=None):
        # Write the frame cells using openpyxl.
        sheet_name = self._get_sheet_name(sheet_name)

        _style_cache = {}

        if sheet_name in self.sheets:
            wks = self.sheets[sheet_name]
        else:
            wks = self.book.create_sheet()
            wks.title = sheet_name
            self.sheets[sheet_name] = wks

        if _validate_freeze_panes(freeze_panes):
            wks.freeze_panes = wks.cell(row=freeze_panes[0] + 1,
                                        column=freeze_panes[1] + 1)

        for cell in cells:
            xcell = wks.cell(row=startrow + cell.row + 1,
                             column=startcol + cell.col + 1)
            xcell.value, fmt = self._value_with_fmt(cell.val)
            if fmt:
                xcell.number_format = fmt

            style_kwargs = {}
            if cell.style:
                key = str(cell.style)
                style_kwargs = _style_cache.get(key)
                if style_kwargs is None:
                    style_kwargs = self._convert_to_style_kwargs(cell.style)
                    _style_cache[key] = style_kwargs

            if style_kwargs:
                for k, v in style_kwargs.items():
                    setattr(xcell, k, v)

            if cell.mergestart is not None and cell.mergeend is not None:

                wks.merge_cells(
                    start_row=startrow + cell.row + 1,
                    start_column=startcol + cell.col + 1,
                    end_column=startcol + cell.mergeend + 1,
                    end_row=startrow + cell.mergestart + 1,
                )

                # When cells are merged only the top-left cell is preserved
                # The behaviour of the other cells in a merged range is
                # undefined
                if style_kwargs:
                    first_row = startrow + cell.row + 1
                    last_row = startrow + cell.mergestart + 1
                    first_col = startcol + cell.col + 1
                    last_col = startcol + cell.mergeend + 1

                    for row in range(first_row, last_row + 1):
                        for col in range(first_col, last_col + 1):
                            if row == first_row and col == first_col:
                                # Ignore first cell. It is already handled.
                                continue
                            xcell = wks.cell(column=col, row=row)
                            for k, v in style_kwargs.items():
                                setattr(xcell, k, v)
Ejemplo n.º 50
0
def test_get_sheet_names():
    wb = Workbook()
    names = ['Sheet', 'Sheet1', 'Sheet2', 'Sheet3', 'Sheet4', 'Sheet5']
    for count in range(5):
        wb.create_sheet(0)
    assert wb.sheetnames == names
Ejemplo n.º 51
0
        for rownum in random.sample(range(1, 100), num):
            for columnnum in range(1, sheet.max_column + 1):
                data_list.append(
                    sheet.cell(row=rownum, column=columnnum).value)

    for i in data_list:
        print(i)
    a = []
    for i in range(len(data_list)):
        if i % 2 == 0:
            a.append(data_list[i])
    for i in a:
        print(i)

    wbl = Workbook()
    new_ws = wbl.create_sheet(title='test')
    key_ws = wbl.create_sheet(title='test_key')
    i = 1
    k = 1
    for data_1 in range(len(data_list)):
        key_ws.cell(column=k, row=i, value=data_list[data_l])
        if k % 2 == 1:
            k = 2
        else:
            k = 1
            i += 1

    i = 1
    for data_m in range(len(a)):
        new_ws.cell(column=1, row=i, value=a[data_m])
        i += 1
Ejemplo n.º 52
0
class CheKeShengXian:
    '''车客生鲜爬虫'''
    def __init__(self):

        self.proxies = {}
        self.ipf = IpFilter('1')
        self.dict = []
        self.dict = UserAgent.MY_USER_AGENT
        self.outwb = Workbook()
        self.BaseUrl = "https://api.jingchengcaidian.com/api/Category/getCategoryName?"
        self.headers = {
            'charset': 'utf-8',
            'Accept-Encoding': 'gzip',
            'referer':
            'https://servicewechat.com/wx2911548a18ed7d95/42/page-frame.html',
            'content-type': 'application/json',
            'User-Agent':
            'Mozilla/5.0 (Linux; Android 8.0.0; FRD-AL10 Build/HUAWEIFRD-AL10; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/68.0.3440.91 Mobile Safari/537.36 MicroMessenger/6.7.3.1360(0x26070337) NetType/WIFI Language/zh_CN Process/appbrand2',
            'Host': 'api.jingchengcaidian.com',
            'Connection': 'Keep-Alive'
        }
        self.PidIndex = {
            '7': '蔬菜',
            '5': '水果',
            '4': '水产',
            '8': '禽蛋',
            '1': '肉类',
            '9': '粮油',
            '2': '乳饮',
            '10': '零食',
            '3': '速食',
            '6': '生活',
            '23': '扶贫'
        }
        self.pageCount = [1, 2, 3, 4]

    def initialization(self):
        self.headers['User-Agent'] = random.sample(self.dict, 1)[0]
        #self.proxies = self.ipf.getIp()
        self.proxies = {'HTTP': 'localhost:8080'}
        #s设置请求头和ip

    def getUrlAndSheet(self):  #url工厂
        returnData = {}
        for pid, value in self.PidIndex.items():
            urlList = []
            for page in self.pageCount:
                url = self.BaseUrl + 'pid=' + pid + '&page=' + str(page)
                urlList.append(url)
            returnData[pid] = urlList
        return returnData

    def getExcel(self, careerName, outwb):
        wo = self.outwb.active
        careerSheet = self.outwb.create_sheet(unicode(careerName), 0)
        careerSheet.append([
            'id', '库存amount', '下单数buynum', '团购价格group_price',
            '划线价market_price', '名称name', '特色标签label', '描述samllpromotion'
        ])
        return careerSheet

    def SaveExcel(self, ExcelName):
        self.outwb.save(ExcelName + '.xlsx')

    def getData(self):
        urlDict = self.getUrlAndSheet()
        for key, values in urlDict.items():
            careerSheet = self.getExcel(self.PidIndex[key], self.outwb)
            for url in values:
                bool = True
                while bool:  #如果代理ip和请求头未能获得数据,则更换信息,继续请求
                    try:
                        self.initialization()

                        req = requests.get(url=url,
                                           headers=self.headers).json()
                        data1 = json.dumps(req, ensure_ascii=False)
                        data2 = json.loads(data1)
                        if data2['message'] == '列表获取成功':
                            for item in data2['data']:
                                careerSheet.append([
                                    item['id'], item['amount'], item['buynum'],
                                    item['group_price'], item['market_price'],
                                    item['name'], item['lable'],
                                    item['smallpromotion']
                                ])
                        bool = False
                        print data2['message']
                    except:
                        print '爬虫失败,继续尝试'

    def Crawl(self, ExcelName):
        self.getData()
        self.SaveExcel(ExcelName)
Ejemplo n.º 53
0
    def to_xls_export(self, path, data, *args):
        def write_row(data, work_sheet, fields, work_sheet_titles):
            # update parent_table with the generated sheet's title
            data[PARENT_TABLE_NAME] = work_sheet_titles.get(
                data.get(PARENT_TABLE_NAME))
            work_sheet.append([data.get(f) for f in fields])

        wb = Workbook(optimized_write=True)
        work_sheets = {}
        # map of section_names to generated_names
        work_sheet_titles = {}
        for section in self.sections:
            section_name = section['name']
            work_sheet_title = ExportBuilder.get_valid_sheet_name(
                "_".join(section_name.split("/")), work_sheet_titles.values())
            work_sheet_titles[section_name] = work_sheet_title
            work_sheets[section_name] = wb.create_sheet(title=work_sheet_title)

        # write the headers
        for section in self.sections:
            section_name = section['name']
            headers = [element['title']
                       for element in section['elements']] + self.EXTRA_FIELDS
            # get the worksheet
            ws = work_sheets[section_name]
            ws.append(headers)

        index = 1
        indices = {}
        survey_name = self.survey.name
        for d in data:
            joined_export = dict_to_joined_export(d, index, indices,
                                                  survey_name)
            output = ExportBuilder.decode_mongo_encoded_section_names(
                joined_export)
            # attach meta fields (index, parent_index, parent_table)
            # output has keys for every section
            if survey_name not in output:
                output[survey_name] = {}
            output[survey_name][INDEX] = index
            output[survey_name][PARENT_INDEX] = -1
            for section in self.sections:
                # get data for this section and write to xls
                section_name = section['name']
                fields = [element['xpath'] for element in section['elements']
                          ] + self.EXTRA_FIELDS

                ws = work_sheets[section_name]
                # section might not exist within the output, e.g. data was
                # not provided for said repeat - write test to check this
                row = output.get(section_name, None)
                if type(row) == dict:
                    write_row(self.pre_process_row(row, section), ws, fields,
                              work_sheet_titles)
                elif type(row) == list:
                    for child_row in row:
                        write_row(self.pre_process_row(child_row, section), ws,
                                  fields, work_sheet_titles)
            index += 1

        wb.save(filename=path)
Ejemplo n.º 54
0
    def to_xls_export(self, path, data, *args, **kwargs):
        def write_row(data, work_sheet, fields, work_sheet_titles):
            # update parent_table with the generated sheet's title
            data[PARENT_TABLE_NAME] = work_sheet_titles.get(
                data.get(PARENT_TABLE_NAME))
            work_sheet.append([data.get(f) for f in fields])

        dataview = kwargs.get('dataview')
        total_records = kwargs.get('total_records')

        wb = Workbook(write_only=True)
        work_sheets = {}
        # map of section_names to generated_names
        work_sheet_titles = {}
        for section in self.sections:
            section_name = section['name']
            work_sheet_title = ExportBuilder.get_valid_sheet_name(
                '_'.join(section_name.split('/')), work_sheet_titles.values())
            work_sheet_titles[section_name] = work_sheet_title
            work_sheets[section_name] = wb.create_sheet(
                title=work_sheet_title)

        # write the headers
        if not self.INCLUDE_LABELS_ONLY:
            for section in self.sections:
                section_name = section['name']
                headers = self.get_fields(dataview, section, 'title')

                # get the worksheet
                ws = work_sheets[section_name]
                ws.append(headers)

        # write labels
        if self.INCLUDE_LABELS or self.INCLUDE_LABELS_ONLY:
            for section in self.sections:
                section_name = section['name']
                labels = self.get_fields(dataview, section, 'label')

                # get the worksheet
                ws = work_sheets[section_name]
                ws.append(labels)

        media_xpaths = [] if not self.INCLUDE_IMAGES \
            else self.dd.get_media_survey_xpaths()

        # write hxl header
        columns_with_hxl = kwargs.get('columns_with_hxl')
        if self.INCLUDE_HXL and columns_with_hxl:
            for section in self.sections:
                section_name = section['name']
                headers = self.get_fields(dataview, section, 'title')

                # get the worksheet
                ws = work_sheets[section_name]

                hxl_row = [columns_with_hxl.get(col, '')
                           for col in headers]
                hxl_row and ws.append(hxl_row)

        index = 1
        indices = {}
        survey_name = self.survey.name
        for i, d in enumerate(data, start=1):
            joined_export = dict_to_joined_export(d, index, indices,
                                                  survey_name,
                                                  self.survey, d,
                                                  media_xpaths)
            output = decode_mongo_encoded_section_names(joined_export)
            # attach meta fields (index, parent_index, parent_table)
            # output has keys for every section
            if survey_name not in output:
                output[survey_name] = {}
            output[survey_name][INDEX] = index
            output[survey_name][PARENT_INDEX] = -1
            for section in self.sections:
                # get data for this section and write to xls
                section_name = section['name']
                fields = self.get_fields(dataview, section, 'xpath')

                ws = work_sheets[section_name]
                # section might not exist within the output, e.g. data was
                # not provided for said repeat - write test to check this
                row = output.get(section_name, None)
                if isinstance(row, dict):
                    write_row(
                        self.pre_process_row(row, section),
                        ws, fields, work_sheet_titles)
                elif isinstance(row, list):
                    for child_row in row:
                        write_row(
                            self.pre_process_row(child_row, section),
                            ws, fields, work_sheet_titles)
            index += 1
            track_task_progress(i, total_records)

        wb.save(filename=path)
Ejemplo n.º 55
0
    def to_xls_export(self, path, data, username, id_string, *args):
        xform = XForm.objects.get(user__username__iexact=username,
                                  id_string__exact=id_string)

        json_question = json.loads(xform.json)
        parsedQuestions = get_questions_and_media_attributes(
            json_question['children'])

        from django.contrib.sites.models import Site as DjangoSite
        domain = DjangoSite.objects.get_current().domain

        def write_row(data, work_sheet, fields, work_sheet_titles):
            # work_sheet_titles = work_sheet_titles.append("fs_site")
            # update parent_table with the generated sheet's title
            data[PARENT_TABLE_NAME] = work_sheet_titles.get(
                data.get(PARENT_TABLE_NAME))
            data_new = []
            for f in fields:
                if f in data and f in parsedQuestions.get('media_attributes'):
                    data_new.append('=HYPERLINK("http://' + domain +
                                    '/attachment/medium?media_file=' +
                                    xform.user.username + '/attachments/' +
                                    data.get(f) + '", "Attachment")')
                else:
                    if f == "fs_status":
                        try:
                            status = FInstance.objects.get(
                                instance_id=data.get(
                                    '_id')).get_form_status_display()
                        except:
                            status = "No Status"
                        data_new.append(status)
                    else:
                        data_new.append(data.get(f, ''))
            work_sheet.append(data_new)

        wb = Workbook(optimized_write=True)
        work_sheets = {}
        # map of section_names to generated_names
        work_sheet_titles = {}
        for section in self.sections:
            section_name = section['name']
            work_sheet_title = ExportBuilder.get_valid_sheet_name(
                "_".join(section_name.split("/")), work_sheet_titles.values())
            work_sheet_titles[section_name] = work_sheet_title
            work_sheets[section_name] = wb.create_sheet(title=work_sheet_title)

        # write the headers
        for section in self.sections:
            section_name = section['name']
            headers = [element['title']
                       for element in section['elements']] + self.EXTRA_FIELDS
            # get the worksheet
            ws = work_sheets[section_name]
            ws.append(headers)

        index = 1
        indices = {}
        survey_name = self.survey.name
        for d in data:
            joined_export = dict_to_joined_export(d, index, indices,
                                                  survey_name)
            output = ExportBuilder.decode_mongo_encoded_section_names(
                joined_export)
            # attach meta fields (index, parent_index, parent_table)
            # output has keys for every section
            if survey_name not in output:
                output[survey_name] = {}
            output[survey_name][INDEX] = index
            output[survey_name][PARENT_INDEX] = -1
            for section in self.sections:
                # get data for this section and write to xls
                section_name = section['name']
                fields = [element['xpath'] for element in section['elements']
                          ] + self.EXTRA_FIELDS

                ws = work_sheets[section_name]
                # section might not exist within the output, e.g. data was
                # not provided for said repeat - write test to check this
                row = output.get(section_name, None)
                if type(row) == dict:
                    write_row(self.pre_process_row(row, section), ws, fields,
                              work_sheet_titles)
                elif type(row) == list:
                    for child_row in row:
                        write_row(self.pre_process_row(child_row, section), ws,
                                  fields, work_sheet_titles)
            index += 1

        wb.save(filename=path)
Ejemplo n.º 56
0
class SHSX:
    # 食行生鲜爬虫
    def __init__(self):
        self.dict = []
        self.dict = UserAgent.Android_USER_AGENT + UserAgent.iPhone_USER_AGENT
        #self.ipf = IpFilter('1')
        self.outwb = Workbook()  #Excel对象

        self.url_Class = {
            '蔬菜豆菇':
            'https://api1.34580.com/sz/productcategory/firstdetail?sourcetype=9&id=100840',
            '新鲜水果':
            'https://api1.34580.com/sz/productcategory/firstdetail?sourcetype=9&id=100559',
            '鲜肉蛋禽':
            'https://api1.34580.com/sz/productcategory/firstdetail?sourcetype=9&id=100771',
            '水产生鲜':
            'https://api1.34580.com/sz/productcategory/firstdetail?sourcetype=9&id=100591',
            '乳品烘培':
            'https://api1.34580.com/sz/productcategory/firstdetail?sourcetype=9&id=100705',
            '面点速食':
            'https://api1.34580.com/sz/productcategory/firstdetail?sourcetype=9&id=100736',
            '粮油副食':
            'https://api1.34580.com/sz/productcategory/firstdetail?sourcetype=9&id=100870',
            '休闲零食':
            'https://api1.34580.com/sz/productcategory/firstdetail?sourcetype=9&id=100935',
            '酒水饮料':
            'https://api1.34580.com/sz/productcategory/firstdetail?sourcetype=9&id=100996',
            '生活百货':
            'https://api1.34580.com/sz/productcategory/firstdetail?sourcetype=9&id=100625',
            '鲜花绿植':
            'https://api1.34580.com/sz/productcategory/firstdetail?sourcetype=9&id=101017'
        }
        self.headers = {
            'charset': 'utf-8',
            'Accept-Encoding': 'gzip',
            'referer':
            'https://servicewechat.com/wx6e7ce0c196b0c3c2/32/page-frame.html',
            'content-type': 'application/json',
            'User-Agent':
            'Mozilla/5.0 (Linux; Android 8.0.0; FRD-AL10 Build/HUAWEIFRD-AL10; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/68.0.3440.91 Mobile Safari/537.36 MicroMessenger/6.7.3.1360(0x26070338) NetType/WIFI Language/zh_CN Process/toolsmp',
            'Host': 'api1.34580.com',
            'Connection': 'Keep-Alive'
        }

    def getBaseUrl(self, str_ID):
        # 每次每页取最大数据
        url_allClass = 'https://api1.34580.com/sz/ProductRequests/ProductMultiConditionRequest?sourcetype=9&OrderDirectionType=0&OrderFieldType=0&CategoryIds=%s&PageSize=%s&PageIndex=1&SourceType=9&MallTypes=&joinedproduct=true' % (
            str_ID, '1000')
        return url_allClass

    def initialization(self):  #请求初始化函数 设置useragent和代理ip
        self.headers['User-Agent'] = random.sample(self.dict, 1)[0]
        #print self.headers['User-Agent']
        #self.proxies = self.ipf.getIp()
        self.proxies = {'HTTP': 'localhost:8080'}
        #print self.proxies
        print '已初始化useragent和代理ip'

    def getExcel(self, career_Name):
        wo = self.outwb.active
        careerSheet = self.outwb.create_sheet(unicode(career_Name), 0)
        careerSheet.append(
            ['展示名称', '团购价', '标签', '划线价', '单价', '重量', '单位', '展示名称', '已售数量'])
        return careerSheet

    def SaveExcel(self, ExcelName):
        self.outwb.save(ExcelName + '.xlsx')

    def getDataByUrl(self, url):
        print '当前url' + url
        self.initialization()
        req = requests.get(url=url, headers=self.headers,
                           proxies=self.proxies).json()
        return json.dumps(req, ensure_ascii=False)

    def getAllClassID(self):
        for Key, Value in self.url_Class.items():
            urlDict = {}
            urldata_ID = json.loads(self.getDataByUrl(Value))
            for item in urldata_ID['Data']['Children']:
                urlDict[item['Name']] = self.getBaseUrl(item['Id'])
            self.url_Class[Key] = urlDict
        print '所有子类url组装到url_Class——完毕'
        #print self.url_Class

    #获得点开商品后的数据 如 销售量
    def getSingleProductData(self, id):
        data_ = ''
        try:
            url = "https://api1.34580.com/sz/product/detail?sourcetype=9&ssuId=%s" % id
            headers = {
                'charset': 'utf-8',
                'Accept-Encoding': 'gzip',
                'referer':
                'https://servicewechat.com/wx6e7ce0c196b0c3c2/32/page-frame.html',
                'content-type': 'application/json',
                'User-Agent':
                'Mozilla/5.0 (Linux; Android 8.0.0; FRD-AL10 Build/HUAWEIFRD-AL10; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/68.0.3440.91 Mobile Safari/537.36 MicroMessenger/6.7.3.1360(0x26070338) NetType/WIFI Language/zh_CN Process/toolsmp',
                'Host': 'api1.34580.com',
                'Connection': 'Keep-Alive'
            }
            req = requests.get(url=url, headers=headers).json()
            data = json.loads(json.dumps(req, ensure_ascii=False))
            data_ = data['result']['productInfo']['soldNumber']
        except:
            print '销售量失败'
        print data_
        return data_

    def getAllClassData(self):
        self.getAllClassID()
        for Key, Value in self.url_Class.items():
            careerSheet = self.getExcel(Key)  #以一级分类为基础建立Excel页
            for key, value in Value.items():
                careerSheet.append([key])
                try:
                    data = json.loads(self.getDataByUrl(value))
                    if data['Message'] == '返回正确':
                        for item in data['Data']['SourceData']:
                            careerSheet.append([
                                item['ProductName'],  #展示名称
                                item['UnitPeriodMoney'],  # 团购价
                                item['PriceName'],  # 标签
                                item['DefaultMoney'],  # 划线价
                                item['PvStandard'],  # 单价
                                item['Weight'],  # 重量
                                item['Unit'],  # 单位
                                item['ProductName'],  # 展示名称
                                str(
                                    self.getSingleProductData(
                                        item['ProductId']))  #已售数量
                            ])
                    print data['Message']
                except:
                    print 'Fail'

    def Crawl(self, ExcelName):
        self.getAllClassData()  #组装url
        self.SaveExcel(ExcelName)  #储存
Ejemplo n.º 57
0
class _OpenpyxlWriter(ExcelWriter):
    engine = 'openpyxl'
    supported_extensions = ('.xlsx', '.xlsm')

    def __init__(self, path, engine=None, **engine_kwargs):
        if not openpyxl_compat.is_compat():
            raise ValueError('Installed openpyxl is not supported at this '
                             'time. Use >={0} and '
                             '<{1}.'.format(openpyxl_compat.start_ver,
                                            openpyxl_compat.stop_ver))
        # Use the openpyxl module as the Excel writer.
        from openpyxl.workbook import Workbook

        super(_OpenpyxlWriter, self).__init__(path, **engine_kwargs)

        # Create workbook object with default optimized_write=True.
        self.book = Workbook()
        # Openpyxl 1.6.1 adds a dummy sheet. We remove it.
        if self.book.worksheets:
            self.book.remove_sheet(self.book.worksheets[0])

    def save(self):
        """
        Save workbook to disk.
        """
        return self.book.save(self.path)

    def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0):
        # Write the frame cells using openpyxl.
        from openpyxl.cell import get_column_letter

        sheet_name = self._get_sheet_name(sheet_name)

        if sheet_name in self.sheets:
            wks = self.sheets[sheet_name]
        else:
            wks = self.book.create_sheet()
            wks.title = sheet_name
            self.sheets[sheet_name] = wks

        for cell in cells:
            colletter = get_column_letter(startcol + cell.col + 1)
            xcell = wks.cell("%s%s" % (colletter, startrow + cell.row + 1))
            xcell.value = _conv_value(cell.val)
            style = None
            if cell.style:
                style = self._convert_to_style(cell.style)
                for field in style.__fields__:
                    xcell.style.__setattr__(field,
                                            style.__getattribute__(field))

            if isinstance(cell.val, datetime.datetime):
                xcell.style.number_format.format_code = self.datetime_format
            elif isinstance(cell.val, datetime.date):
                xcell.style.number_format.format_code = self.date_format

            if cell.mergestart is not None and cell.mergeend is not None:
                cletterstart = get_column_letter(startcol + cell.col + 1)
                cletterend = get_column_letter(startcol + cell.mergeend + 1)

                wks.merge_cells('%s%s:%s%s' %
                                (cletterstart, startrow + cell.row + 1,
                                 cletterend, startrow + cell.mergestart + 1))

                # Excel requires that the format of the first cell in a merged
                # range is repeated in the rest of the merged range.
                if style:
                    first_row = startrow + cell.row + 1
                    last_row = startrow + cell.mergestart + 1
                    first_col = startcol + cell.col + 1
                    last_col = startcol + cell.mergeend + 1

                    for row in range(first_row, last_row + 1):
                        for col in range(first_col, last_col + 1):
                            if row == first_row and col == first_col:
                                # Ignore first cell. It is already handled.
                                continue
                            colletter = get_column_letter(col)
                            xcell = wks.cell("%s%s" % (colletter, row))
                            for field in style.__fields__:
                                xcell.style.__setattr__(
                                    field, style.__getattribute__(field))

    @classmethod
    def _convert_to_style(cls, style_dict):
        """
        converts a style_dict to an openpyxl style object
        Parameters
        ----------
        style_dict: style dictionary to convert
        """

        from openpyxl.style import Style
        xls_style = Style()
        for key, value in style_dict.items():
            for nk, nv in value.items():
                if key == "borders":
                    (xls_style.borders.__getattribute__(nk).__setattr__(
                        'border_style', nv))
                else:
                    xls_style.__getattribute__(key).__setattr__(nk, nv)

        return xls_style
Ejemplo n.º 58
0
            ["help", "file=", "geolitecity=", "excelfile=", "coloring"])
    except getopt.GetoptError, err:
        print str(err)
        sys.exit(2)

    for o, a in opts:
        if o in ("-h", "--help"):
            usage()
            sys.exit(0)
        elif o in ("-e", "--excelfile"):
            if not has_openpyxl:
                print "You must install OpenPyxl 2.1.2+ for xlsx format:\n\thttps://pypi.python.org/pypi/openpyxl"
                sys.exit(-1)
            excelfile = a
            wb = Workbook(optimized_write=True)
            ws = wb.create_sheet()
            ws.title = "IP Address Info"
            ws.append(header)
        elif o in ("-g", "--geolitecity"):
            GeoLite = a
        elif o in ("-f", "--file"):
            if os.path.isfile(a):
                ipfile = open(a, "r")
            else:
                print a + " is not a file"
                usage()
                sys.exit(-1)
        elif o in ("-c", "--coloring"):
            color = True
        else:
            assert False, "unhandled option\n\n"
Ejemplo n.º 59
0
class ExcelWriter(object):
    """
    Class for writing DataFrame objects into excel sheets, uses xlwt for xls,
    openpyxl for xlsx.  See DataFrame.to_excel for typical usage.

    Parameters
    ----------
    path : string
        Path to xls file
    """
    def __init__(self, path):
        self.use_xlsx = True
        if path.endswith('.xls'):
            self.use_xlsx = False
            import xlwt
            self.book = xlwt.Workbook()
            self.fm_datetime = xlwt.easyxf(num_format_str='YYYY-MM-DD HH:MM:SS')
            self.fm_date = xlwt.easyxf(num_format_str='YYYY-MM-DD')
        else:
            from openpyxl.workbook import Workbook
            self.book = Workbook(optimized_write = True)
        self.path = path
        self.sheets = {}
        self.cur_sheet = None

    def save(self):
        """
        Save workbook to disk
        """
        self.book.save(self.path)

    def writerow(self, row, sheet_name=None):
        """
        Write the given row into Excel an excel sheet

        Parameters
        ----------
        row : list
            Row of data to save to Excel sheet
        sheet_name : string, default None
            Name of Excel sheet, if None, then use self.cur_sheet
        """
        if sheet_name is None:
            sheet_name = self.cur_sheet
        if sheet_name is None:  # pragma: no cover
            raise Exception('Must pass explicit sheet_name or set '
                            'cur_sheet property')
        if self.use_xlsx:
            self._writerow_xlsx(row, sheet_name)
        else:
            self._writerow_xls(row, sheet_name)

    def _writerow_xls(self, row, sheet_name):
        if sheet_name in self.sheets:
            sheet, row_idx = self.sheets[sheet_name]
        else:
            sheet = self.book.add_sheet(sheet_name)
            row_idx = 0
        sheetrow = sheet.row(row_idx)
        for i, val in enumerate(row):
            if isinstance(val, (datetime.datetime, datetime.date)):
                if isinstance(val, datetime.datetime):
                    sheetrow.write(i,val, self.fm_datetime)
                else:
                    sheetrow.write(i,val, self.fm_date)
            elif isinstance(val, np.int64):
                sheetrow.write(i,int(val))
            elif isinstance(val, np.bool8):
                sheetrow.write(i,bool(val))
            else:
                sheetrow.write(i,val)
        row_idx += 1
        if row_idx == 1000:
            sheet.flush_row_data()
        self.sheets[sheet_name] = (sheet, row_idx)

    def _writerow_xlsx(self, row, sheet_name):
        if sheet_name in self.sheets:
            sheet, row_idx = self.sheets[sheet_name]
        else:
            sheet = self.book.create_sheet()
            sheet.title = sheet_name
            row_idx = 0

        conv_row = []
        for val in row:
            if isinstance(val, np.int64):
                val = int(val)
            elif isinstance(val, np.bool8):
                val = bool(val)
            conv_row.append(val)
        sheet.append(conv_row)
        row_idx += 1
        self.sheets[sheet_name] = (sheet, row_idx)
Ejemplo n.º 60
0
class boss_crawler:
    def __init__(self):
        self.headers = {
            'Accept':
            'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
            'Accept-Encoding':
            'gzip,deflate,br',
            'Accept-Language':
            'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2',
            'Connecion':
            'keep-alive',
            #'Cookie':'__a=32544699.1550979725.1550979725.1550979732.57.2.56.57; Hm_lvt_194df3105ad7148dcf2b98a91b5e727a=1550979723,1552831132,1552906964; lastCity=101010100; _uab_collina=155283113440544880091396; __g=-; __l=r=&l=%2Fsao.zhipin.com%2F; Hm_lpvt_194df3105ad7148dcf2b98a91b5e727a=1552907207; __c=1550979732; JSESSIONID=""',
            'Host':
            'www.zhipin.com',
            'Referer':
            'https://www.zhipin.com/c101010100/',
            'User-Agent':
            'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:65.0) Gecko/20100101 Firefox/65.0'
        }  # cookie会失效的,需要定期更换
        self.keyword = ['java']
        self.outwb = Workbook()

    def getHtml(self, url):
        time.sleep(2)
        response = request('get', url, headers=self.headers)
        #html = json.loads(response.text)
        html = response.text
        return html

    def getExcel(self, careerName):
        wo = self.outwb.active
        careerSheet = self.outwb.create_sheet(careerName)
        careerSheet.append(
            ['公司', '规模低', '规模顶', '职位', '最低工作经验', '学历', '底薪', '顶薪', '发布时间'])
        return careerSheet

    def saveExcel(self):
        self.outwb.save(
            "E:\DataAnalysis\\tools\python3\project\money_analysis\JodFinding\work.xlsx"
        )

    def getNum(self, str):
        pattern = re.compile(r'\d+')
        result = pattern.findall(str)
        return result

    def getSchool(self, str):
        p = re.compile(r'[年]+')
        p_ = re.compile(r'[限]+')
        p__ = re.compile(r'[生]+')
        list1 = p.split(str)
        list2 = p_.split(str)
        list3 = p__.split(str)
        if len(list1) > 1:
            print(list1)
            return list1[1]
        elif len(list2) > 1:
            print(list2)
            return list2[1]
        else:
            try:
                print(list3)
                return list3[1]
            except:
                print('error')

    def get_SaveMessage(self, careerSheet, soup):
        joblist = soup.find_all('div', {'class': 'job-primary'})
        for item in joblist:
            company = item.find('div', {
                'class': 'info-company'
            }).find('a').get_text()
            big = item.find('div', {
                'class': 'info-company'
            }).find('p').get_text()
            job = item.find('div', {
                'class': 'info-primary'
            }).find('div', {
                'class': 'job-title'
            }).get_text()
            graduate = item.find('div', {'class': 'info-primary'}).p.get_text()
            money = item.find('div', {
                'class': 'info-primary'
            }).find('span').get_text()
            time = item.find('div', {
                'class': 'info-publis'
            }).find('p').get_text()
            if len(self.getNum(big)) > 1:
                if len(self.getNum(graduate)) > 0:
                    careerSheet.append([
                        company,
                        self.getNum(big)[0],
                        self.getNum(big)[1], job,
                        self.getNum(graduate)[0],
                        self.getSchool(graduate),
                        self.getNum(money)[0],
                        self.getNum(money)[1], time
                    ])
                else:
                    careerSheet.append([
                        company,
                        self.getNum(big)[0],
                        self.getNum(big)[1], job, '无',
                        self.getSchool(graduate),
                        self.getNum(money)[0],
                        self.getNum(money)[1], time
                    ])
            else:
                if len(self.getNum(graduate)) > 0:
                    careerSheet.append([
                        company,
                        self.getNum(big)[0], '以上', job,
                        self.getNum(graduate)[0],
                        self.getSchool(graduate),
                        self.getNum(money)[0],
                        self.getNum(money)[1], time
                    ])
                else:
                    careerSheet.append([
                        company,
                        self.getNum(big)[0], '以上', job, '无',
                        self.getSchool(graduate),
                        self.getNum(money)[0],
                        self.getNum(money)[1], time
                    ])

    def startCrawl(self):
        for i in range(len(self.keyword)):  #按关键字进行爬取
            page = 0
            careerSheet = self.getExcel(self.keyword[i])
            url = 'https://www.zhipin.com/c101010100/?query=%s&page=%d&ka=page-%d' % (
                self.keyword[i], page, page)  #&period=3 是近7天招聘的意思
            soup = BeautifulSoup(self.getHtml(url), 'lxml')
            self.get_SaveMessage(careerSheet, soup)
            while soup.find('a', {'class': 'next'}):  #如果有下一页
                if soup.find('a', {'class': 'next disabled'}):
                    break
                page = page + 1
                url = 'https://www.zhipin.com/c101010100/?query=%s&page=%d&ka=page-%d' % (
                    self.keyword[i], page, page)
                soup = BeautifulSoup(self.getHtml(url), 'lxml')
                self.get_SaveMessage(careerSheet, soup)
            print(self.keyword[i] + 'is OK')
        self.saveExcel()