def get_content(self): logger.info("get_content url %s" % self.url) r = requests.get(self.url, headers=self.headers) logger.info("get_content res %s " % r.text) res_json = r.json() content = res_json.get('content', '') soup = BeautifulSoup(content, "html5lib") soup.find_all(self.remove_attrs) soup.html.unwrap() soup.head.unwrap() soup.body.unwrap() title = res_json.get('title', '') # question_id = res_json.get('question', {}).get('id', '') id = res_json.get('id', '') note_url = 'https://zhuanlan.zhihu.com/p/%s' % (id) res = self.change_img(soup) title_list = title.split('\n') title = '' for t in title_list: title += t logger.info("note_url %s" % note_url) logger.info("title %s" % title) html_content = str(soup) res = EvernoteMethod.makeNote(self.noteStore, title.encode('utf8'), html_content, note_url, res, self.parent_note)
def get_content(self): r = requests.get(self.url, headers=self.headers) print(self.url) print(r.text) res_json = r.json() content = res_json.get('content', '') soup = BeautifulSoup(content, "html5lib") soup.find_all(self.remove_attrs) soup.html.unwrap() soup.head.unwrap() soup.body.unwrap() title = res_json.get('question', {}).get('title', '') question_id = res_json.get('question', {}).get('id', '') id = res_json.get('id', '') note_url = 'http://www.zhihu.com/question/%s/answer/%s' % (question_id, id) res = self.change_img(soup) title_list = title.split('\n') title = '' for t in title_list: title += t print("note_url %s" % note_url) print("title %s" % title) html_content = str(soup) res = EvernoteMethod.makeNote(self.noteStore, title.encode('utf8'), html_content, note_url, res, self.parent_note) session = create_session() find_queue = session.query(CollectionQueue).filter(CollectionQueue.api_url == self.url).first() if find_queue: find_queue.is_collected = 1 find_queue.collected_time = int(time.time()) find_queue.note_guid = res.guid session.commit() sqs_conn.delete_message_from_handle(zhihufav_sqs, self.receipt_handle) session.close()