Beispiel #1
0
def post(url, params, cookie=None):
    """Post网站"""
    headers = headers_ua()
    if cookie:
        headers['cookie'] = cookie
    response = requests.post(url=url, data=params, headers=headers)
    return response
Beispiel #2
0
def get(url, cookie=None):
    """Get网站"""
    headers = headers_ua()
    if cookie:
        headers['cookie'] = cookie
    response = requests.get(url=url, headers=headers)
    return response
Beispiel #3
0
def Load_BaiDuBaiKe(name):
    """下载百度百科里面的内容信息
    :param name: 百科百科名字
    :return: 百度百科的文本信息
    """
    url = F'https://baike.baidu.com/item/{quote(name)}'
    response = requests.get(url, headers=headers_ua())
    data = response.content.decode('utf-8')
    return data
Beispiel #4
0
 def _redirect_link(self, word):
     """获得重定向的链接"""
     response = requests.post(self.url,
                              headers=headers_ua(),
                              data={
                                  'wo': word,
                                  'directGo': 1,
                                  'nsid': 0
                              })
     return response.url
Beispiel #5
0
 def _not_included_data(self, word):
     """未录取句子提取"""
     data_ls = []
     response = requests.post(self.url,
                              headers=headers_ua(),
                              data={
                                  'wo': word,
                                  'directGo': 1,
                                  'nsid': 0
                              })
     text = response.text
     soup = self._BeautifulSoup(text, features="lxml")
     ls = soup.find('div', id='content').contents
     for d in ls:
         if not isinstance(d, str) and ('function' not in d.text):
             t = d.text.replace('\xa0', '').strip()
             if '造句' in t:
                 t = re.sub('[((\\[【].*[^造]*造句.*[)】\\])]', '', t)
             data_ls.append(t)
     return data_ls
Beispiel #6
0
 def _get_page_data(url):
     """获得链接中的数据"""
     response = requests.get(url, headers=headers_ua())
     return response.text