コード例 #1
0
ファイル: ZhiSpider.py プロジェクト: yumupinglan/ZhiSpider
    def __init__(self):
        self.configs = self._getConfigs()
        self.rules = self._getRules()

        self.session = requests.session()
        self._setHeader()

        self.login()

        self.fpExcept = open(self.configs['EXCEPTIONS'], 'wb')

        # 创建数据库
        self.db = sqlite3.connect(self.configs['DB']['NAME'])
        self.dbCursor = self.db.cursor()

        # 爬问题列表
        self.questionSpider = ZhiQuestions(self)
        # 爬答案
        self.answerSpider = ZhiAnswers(self)
コード例 #2
0
ファイル: ZhiSpider.py プロジェクト: Blueve/ZhiSpider
	def __init__(self):
		self.configs = self._getConfigs()
		self.rules = self._getRules()

		self.session = requests.session()
		self._setHeader()

		self.login()

		self.fpExcept = open(self.configs['EXCEPTIONS'], 'wb')

		# 创建数据库
		self.db = sqlite3.connect(self.configs['DB']['NAME'])
		self.dbCursor = self.db.cursor()

		# 爬问题列表
		self.questionSpider = ZhiQuestions(self)
		# 爬答案
		self.answerSpider = ZhiAnswers(self)
コード例 #3
0
ファイル: ZhiSpider.py プロジェクト: Blueve/ZhiSpider
class ZhiSpider(object):

	def __init__(self):
		self.configs = self._getConfigs()
		self.rules = self._getRules()

		self.session = requests.session()
		self._setHeader()

		self.login()

		self.fpExcept = open(self.configs['EXCEPTIONS'], 'wb')

		# 创建数据库
		self.db = sqlite3.connect(self.configs['DB']['NAME'])
		self.dbCursor = self.db.cursor()

		# 爬问题列表
		self.questionSpider = ZhiQuestions(self)
		# 爬答案
		self.answerSpider = ZhiAnswers(self)


	def __del__(self):
		self.fpExcept.close()
		self.dbCursor.close()
		self.db.close()

	# 读取配置文件
	def _getConfigs(self):
		with open('Config.yaml', 'rb') as fp:
			configs = yaml.load(fp)
		return configs

	# 读取规则文件
	def _getRules(self):
		with open('Rule.yaml', 'rb') as fp:
			rules = yaml.load(fp)
		return rules

	# 设置HTTP头
	def _setHeader(self):
		self.session.headers.update(self.configs['HEADERS']['INIT'])

	# 登陆知乎
	def login(self):
		# 构造登陆表单并提交
		loginPost = {
			'email': self.configs['AUTH']['EMAIL'],
			'password': self.configs['AUTH']['PASSWORD'],
			'_xsrf': self._getXsrf(),
			'rememberme': 'y',
		}
		self.session.post(self.configs['URL']['LOGIN'], loginPost)
		# 验证登陆状态 若登陆失败则退出
		response = self.session.get(self.configs['URL']['HOME'])
		html = etree.HTML(response.content)
		assert len(html.xpath(self.rules['LOGIN']['TEST'])) == 1, "Failed to login status_code %d" % (response.status_code,)

	# 获取xsrf token
	def _getXsrf(self):
		return self.session.cookies.get('_xsrf')

	# 获取当前时间
	def _getTime(self):
		return time.strftime('%Y-%m-%d-%H%M%S',time.localtime(time.time()))

	# 抓取问题列表
	def getQuestions(self):
		self.questionSpider.start()

	def getAnswers(self):
		self.answerSpider.start()
コード例 #4
0
ファイル: ZhiSpider.py プロジェクト: yumupinglan/ZhiSpider
class ZhiSpider(object):
    def __init__(self):
        self.configs = self._getConfigs()
        self.rules = self._getRules()

        self.session = requests.session()
        self._setHeader()

        self.login()

        self.fpExcept = open(self.configs['EXCEPTIONS'], 'wb')

        # 创建数据库
        self.db = sqlite3.connect(self.configs['DB']['NAME'])
        self.dbCursor = self.db.cursor()

        # 爬问题列表
        self.questionSpider = ZhiQuestions(self)
        # 爬答案
        self.answerSpider = ZhiAnswers(self)

    def __del__(self):
        self.fpExcept.close()
        self.dbCursor.close()
        self.db.close()

    # 读取配置文件
    def _getConfigs(self):
        with open('Config.yaml', 'rb') as fp:
            configs = yaml.load(fp)
        return configs

    # 读取规则文件
    def _getRules(self):
        with open('Rule.yaml', 'rb') as fp:
            rules = yaml.load(fp)
        return rules

    # 设置HTTP头
    def _setHeader(self):
        self.session.headers.update(self.configs['HEADERS']['INIT'])

    # 登陆知乎
    def login(self):
        # 构造登陆表单并提交
        loginPost = {
            'email': self.configs['AUTH']['EMAIL'],
            'password': self.configs['AUTH']['PASSWORD'],
            '_xsrf': self._getXsrf(),
            'rememberme': 'y',
        }
        self.session.post(self.configs['URL']['LOGIN'], loginPost)
        # 验证登陆状态 若登陆失败则退出
        response = self.session.get(self.configs['URL']['HOME'])
        html = etree.HTML(response.content)
        assert len(
            html.xpath(self.rules['LOGIN']['TEST'])
        ) == 1, "Failed to login status_code %d" % (response.status_code, )

    # 获取xsrf token
    def _getXsrf(self):
        return self.session.cookies.get('_xsrf')

    # 获取当前时间
    def _getTime(self):
        return time.strftime('%Y-%m-%d-%H%M%S', time.localtime(time.time()))

    # 抓取问题列表
    def getQuestions(self):
        self.questionSpider.start()

    def getAnswers(self):
        self.answerSpider.start()