def genBrandList_Yesky(): src = requestByProxy('http://product.yesky.com/washingmachine/') d = pq(src) frames = d.find('.updonwlist a') produListEnglish = [pq(item).attr('href').split('/')[-2] for item in frames] produListChinese = frames.my_text() urlList = ['http://product.yesky.com/list/brand/' + item + '/' for item in produListEnglish] # productList=zip(produListChinese,produListEnglish) result = [] for url in urlList: src = requestByProxy(url) d = pq(src) temp_1 = d.find('.rmpp>ul>li>a').my_text() if d.find('.rmpp>ul>li>a').my_text() else [] temp_2 = d.find('.rmpp>dl>dd>a').my_text() if d.find('.rmpp>dl>dd>a').my_text() else [] temp = temp_1 + temp_2 nameEnglist = url.split('/')[-2] nameChinese = produListChinese[produListEnglish.index(nameEnglist)] res = [] for item in temp: res.append([nameChinese, item]) result += res # 数据写入csv文件 title = ['category', 'brand'] writer = My_Csv.Write_Csv(path=dirCheck.dirGen('d:/spider'), name='brandList_for_xiaoju', title=title, result=result) writer.add_title_data()
def dataSaved(result): import My_Csv, dirCheck title = [] writer = My_Csv.Write_Csv(path=dirCheck.dirGen('d:/operationAnalysis'), name='temp',title=title, result=result) writer.add_title_data()