def post(url, params, cookie=None): """Post网站""" headers = headers_ua() if cookie: headers['cookie'] = cookie response = requests.post(url=url, data=params, headers=headers) return response
def get(url, cookie=None): """Get网站""" headers = headers_ua() if cookie: headers['cookie'] = cookie response = requests.get(url=url, headers=headers) return response
def Load_BaiDuBaiKe(name): """下载百度百科里面的内容信息 :param name: 百科百科名字 :return: 百度百科的文本信息 """ url = F'https://baike.baidu.com/item/{quote(name)}' response = requests.get(url, headers=headers_ua()) data = response.content.decode('utf-8') return data
def _redirect_link(self, word): """获得重定向的链接""" response = requests.post(self.url, headers=headers_ua(), data={ 'wo': word, 'directGo': 1, 'nsid': 0 }) return response.url
def _not_included_data(self, word): """未录取句子提取""" data_ls = [] response = requests.post(self.url, headers=headers_ua(), data={ 'wo': word, 'directGo': 1, 'nsid': 0 }) text = response.text soup = self._BeautifulSoup(text, features="lxml") ls = soup.find('div', id='content').contents for d in ls: if not isinstance(d, str) and ('function' not in d.text): t = d.text.replace('\xa0', '').strip() if '造句' in t: t = re.sub('[((\\[【].*[^造]*造句.*[)】\\])]', '', t) data_ls.append(t) return data_ls
def _get_page_data(url): """获得链接中的数据""" response = requests.get(url, headers=headers_ua()) return response.text