def genBrandList_Yesky():
    src = requestByProxy('http://product.yesky.com/washingmachine/')
    d = pq(src)
    frames = d.find('.updonwlist a')
    produListEnglish = [pq(item).attr('href').split('/')[-2] for item in frames]
    produListChinese = frames.my_text()
    urlList = ['http://product.yesky.com/list/brand/' + item + '/' for item in produListEnglish]
    # productList=zip(produListChinese,produListEnglish)
    result = []
    for url in urlList:
        src = requestByProxy(url)
        d = pq(src)
        temp_1 = d.find('.rmpp>ul>li>a').my_text() if d.find('.rmpp>ul>li>a').my_text() else []
        temp_2 = d.find('.rmpp>dl>dd>a').my_text() if d.find('.rmpp>dl>dd>a').my_text() else []
        temp = temp_1 + temp_2
        nameEnglist = url.split('/')[-2]
        nameChinese = produListChinese[produListEnglish.index(nameEnglist)]
        res = []
        for item in temp:
            res.append([nameChinese, item])
        result += res

    # 数据写入csv文件
    title = ['category', 'brand']
    writer = My_Csv.Write_Csv(path=dirCheck.dirGen('d:/spider'), name='brandList_for_xiaoju', title=title,
                              result=result)
    writer.add_title_data()
Esempio n. 2
0
def dataSaved(result):
    import My_Csv, dirCheck

    title = []
    writer = My_Csv.Write_Csv(path=dirCheck.dirGen('d:/operationAnalysis'), name='temp',title=title, result=result)
    writer.add_title_data()