def download_text(url): if url: session = XMLSession() try: response = session.get(url) finally: if session: session.close() if response: items = response.xml.xpath('//item') for item in items: descritpion = item.xpath('//description')[0].text link = item.xpath('//link')[0].text yield link, descritpion
def get_xml(url): session = XMLSession() session.proxies = {'http': HTTP_PROXY, 'https': HTTPS_PROXY} retry = Retry(connect=3, backoff_factor=1) adapter = HTTPAdapter(max_retries=retry) headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) ' 'AppleWebKit/603.3.8 (KHTML, like Gecko) Version/10.1.2 Safari/603.3.8', 'Accept-Encoding': 'gzip, deflate', 'Accept': '*/*', 'Connection': 'keep-alive'} session.mount('http://', adapter) r = session.get(url, headers=headers, verify=False) session.close() # results = [] # if int(status.get('totalRes')) > 0: # for elem in root.findall('{http://purl.org/rss/1.0/}item'): # temp = [] # temp.append(elem.find('{http://purl.org/rss/1.0/}title').text) # temp.append(elem.find('{http://purl.org/rss/1.0/}link').text) # results.append(temp) return ET.fromstring(r.content)