def test_read_plain_stream(self): file_path = os.path.join(MY_DIR, 'http_content', 'bzipped.expected') with open(file_path, 'rb') as fh: expected_content = fh.read() with open(file_path, 'rb') as fh: content = b''.join( httputil.read_body_stream(fh, chunked=False, compression=None)) self.assertEqual(expected_content, content)
def test_read_plain_stream(self): file_path = os.path.join(MY_DIR, 'http_content', 'bzipped.expected') with open(file_path, 'rb') as fh: expected_content = fh.read() with open(file_path, 'rb') as fh: content = b''.join(httputil.read_body_stream( fh, chunked=False, compression=None)) self.assertEqual(expected_content, content)
def test_read_body_stream(self): for fname, chunked, compression in CONTENT_FILES: file_path = os.path.join(MY_DIR, 'http_content', fname) with self.subTest(fname): with open(file_path, 'rb') as fh: with open(file_path + '.expected', 'rb') as exp_fh: content = b''.join(httputil.read_body_stream( fh, chunked=chunked, compression=compression)) self.assertEqual(content, exp_fh.read())
def test_read_body_stream(self): for fname, chunked, compression in CONTENT_FILES: file_path = os.path.join(MY_DIR, 'http_content', fname) with self.subTest(fname): with open(file_path, 'rb') as fh: with open(file_path + '.expected', 'rb') as exp_fh: content = b''.join( httputil.read_body_stream(fh, chunked=chunked, compression=compression)) self.assertEqual(content, exp_fh.read())
# 반복... # \r\n0\r\n\r\n type = None if res.text.startswith('<!'): if res.request.url.startswith('http://hei'): type = 'hei' #한경 연예면 text = res.content.decode() elif res.request.url.startswith('http://plus'): type = 'plus' #한경 플러스 text = res.text.encode('latin-1').decode('cp949') else : text = res.text else: gzipped_bytes = res.content text = b''.join(httputil.read_body_stream(io.BytesIO(gzipped_bytes), chunked=True, compression=httputil.GZIP)).decode() bs = BeautifulSoup(text, 'html.parser') if type == None: title = bs.select('div#container > div.artlcle_top > h2.tit')[0].text base_dtm = bs.select('div#container > div.wrap_container > div > div.info_article > div.date > span')[0].text[3:] contents = bs.select('div#newsView')[0].text elif type == 'hei': title = bs.select('div#container > section > h1')[0].text base_dtm = bs.select('div#container > section > div > div.atc-info > span')[0].text[3:] contents = bs.select('article#newsView')[0].text elif type == 'plus':