Example #1
0
def get_info(driver):
	"""获取一个页面中的所有数据"""
	global Count_num
	type_ = ''
	tCount = 0
	for page in range(1, 121):
		tCount += 1
		"""如果满的话,每一页有120个主播"""
		try:
			#主播名字
			dy_name = driver.find_element_by_css_selector('#live-list-contentbox > li:nth-child({}) > a > div.mes > p > span.dy-name.ellipsis.fl'.format(page)).text
			#直播分类
			dy_type = driver.find_element_by_css_selector('#live-list-contentbox > li:nth-child({}) > a > div.mes > div > span'.format(page)).text
			#直播标题
			dy_title = ddy_title = driver.find_element_by_css_selector("#live-list-contentbox > li:nth-child({}) > a > div.mes > div > h3".format(page)).text
			#直播热度
			dy_num = driver.find_element_by_css_selector("#live-list-contentbox > li:nth-child({}) > a > div.mes > p > span.dy-num.fr".format(page)).text
			if dy_num.isdigit():
				pass
			else:
				dy_num = dy_num.replace('万','')
				dy_num = int(float(dy_num)*10000)      #转换为数字
			#主播标签
			tags = ''
			for i in range(1,4):
				try:
					dy_tag = driver.find_element_by_css_selector("#live-list-contentbox > li:nth-child({}) > a > div.impress-tag-list > span:nth-child({})".format(page, i)).text
					tags = tags + "[" + dy_tag + "] "
				except:
					# print("该主播没有标签!")
					tags = '该主播没有标签!'
					break
		except:
			logger.info("{}类已经爬取完毕,一共有{}条数据。".format(type_, tCount))
			logger.info("一共已经爬取{}条数据!".format(Count_num))
			break
		Sql_insert = "INSERT INTO douyu3(dy_name,dy_type,dy_title,dy_num,dy_tag)VALUES('%s','%s','%s','%d','%s')"%(dy_name,dy_type,dy_title,int(dy_num),tags)
		print("正在爬取第{}个主播:".format(Count_num))
		# print("名字:", dy_name)
		# print("分类:", dy_type)
		# print("标题:", dy_title)
		# print("热度:", dy_num )
		# print("标签:", tags)
		try:
			insert(Sql_insert)
		except:
			print("插入错误!")
			logger.info("{}插入错误!".format(dy_name))
			pass
		Count_num += 1
Example #2
0
def get_info(source):
    """获取一个页面中的所有数据"""
    global Count_num
    selector = etree.HTML(source)
    print(selector)
    type_ = ''
    tCount = 0
    for page in range(1, 121):
        tCount += 1
        """如果满的话,每一页有120个主播"""
        try:
            #主播名字
            dy_name = selector.xpath(
                '//*[@id="live-list-contentbox"]/li[%d]/a/div[1]/p/span[1]/text()'
                % (page))[0]
            #直播分类
            dy_type = selector.xpath(
                '//*[@id="live-list-contentbox"]/li[%d]/a/div[1]/div/span/text()'
                % (page))[0]
            #直播标题
            dy_title = selector.xpath(
                '//*[@id="live-list-contentbox"]/li[{}]/a/div[1]/div/h3/text()'
                .format(page))[0]
            #直播热度
            dy_num = selector.xpath(
                '//*[@id="live-list-contentbox"]/li[{}]/a/div[1]/p/span[2]/text()'
                .format(page))[0]
            if dy_num.isdigit():
                pass
            else:
                dy_num = dy_num.replace('万', '')
                dy_num = int(float(dy_num) * 10000)  #转换为数字
            #主播标签
            tags = ''
            for i in range(1, 4):
                try:
                    dy_tag = selector.xpath(
                        '//*[@id="live-list-contentbox"]/li[%d]/a/div[2]/span[%d]/text()'
                        % (page, i))[0]
                    tags = tags + "[" + dy_tag + "] "
                except:
                    # print("该主播没有标签!")
                    tags = '该主播没有标签!'
                    break
        except:
            logger.info("{}类已经爬取完毕,一共有{}条数据。".format(type_, tCount))
            logger.info("一共已经爬取{}条数据!".format(Count_num))
            break
        Sql_insert = "INSERT INTO douyu3(dy_name,dy_type,dy_title,dy_num,dy_tag)VALUES('%s','%s','%s','%d','%s')" % (
            dy_name, dy_type, dy_title, int(dy_num), tags)
        print("正在爬取第{}个主播!".format(Count_num))
        # print("名字:", dy_name)
        # print("分类:", dy_type)
        # print("标题:", dy_title)
        # print("热度:", dy_num )
        # print("标签:", tags)
        try:
            insert(Sql_insert)
        except:
            print("插入错误!")
            logger.info("{}插入错误!".format(dy_name))
            pass
        Count_num += 1