def test_to_buy_goods(self): headers = cookie self.http.set_header(headers) self.params['serial'] = global_serial logger.info('正在发起GET请求...') self.params = urllib.parse.urlencode( self.params) # 将参数转为url编码字符串# 注意,此处params为字典类型的数据 response = self.http.get(self.url, self.params) status_code = response[2] response_body = response[0].decode('utf-8') logger.info('正在解析返回结果:%s' % response_body) # 解析HTML文档 parser = MyHTMLParser(strict=False) parser.feed(response_body) starttag_data = parser.get_starttag_data() tab_page_title = '' for data in starttag_data: if data[1] == self.expected_result['tab_page_title']: tab_page_title = data[1] break # 断言 self.assertEqual(status_code, self.expected_result['status'], msg='http状态码status不等于200') self.assertEqual(tab_page_title, self.expected_result['tab_page_title'], msg='无法打开商品详情')
def test_visit_webtours(self): # 根据被测接口的实际情况,合理的添加HTTP头 # header = {'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', # 'User-Agent':'Mozilla/5.0 (Windows NT 6.1; rv:29.0) Gecko/20100101 Firefox/29.0' # } logger.info('正在发起GET请求...') response = self.http.get(self.url, (self.params)) response_body = response[0] response_body = response_body.decode('utf-8') logger.info('正在解析返回结果') # 解析HTML文档 parser = MyHTMLParser(strict=False) parser.feed(response_body) # 比较结果 starttag_data = parser.get_starttag_data() i = 0 for data_list in starttag_data: if data_list[0] == 'title' and data_list[1] == 'Web Tours': i = i + 1 self.assertNotEqual(str(i), self.expected_result['result'], msg='访问WebTours失败')
class DataSetParser(MyHTMLParser): def __init__(self, filename): self.parser = MyHTMLParser() self.HTMLdata=open (filename, "rb").read().replace('\n', '') def parse_HTML(self): self.parser.feed(self.HTMLdata) self.dataset = self.parser.getdataset() def getdataset(self): return self.dataset def gettags(self): return self.parser.gettags() def classifydata(self, filename): self.buildsubjectset() with open(filename, "w") as f: writer = csv.writer(f, delimiter=',') writer.writerow(['subject', 'dokid']) for subject in self.subjectset: for data in self.filtered_dataset: temp = [x.strip() for x in data['asca']] if subject in temp: writer.writerow([subject, data['dokid'][0]]) def buildsubjectset(self): self.subjectset = [x.strip() for x in self.parser.getsubject()] self.subjectset.sort() def writesubject(self, filename): self.buildsubjectset() with open(filename, "w") as text_file: for x in self.subjectset: text_file.write(x + '\n') text_file.close() def filtertags(self, tagname = 'refunifids'): self.filtered_dataset = [data for data in self.dataset if tagname in data] return self.filtered_dataset def writeCsv(self, attlist, output_filename): with open(output_filename, 'wb') as csvfile: writer = csv.writer(csvfile, delimiter=',') writer.writerow(attlist) for line in self.filtered_dataset: data = [] for att in attlist: if att in line: data.append(line[att]) else: data.append('') writer.writerow(data)
def from_html_to_dict(text, vars): if (text.startswith("<a") and text.endswith("</a>")): parser = MyHTMLParser() parser.feed(text) return parser.value elif text.replace("[[", "").replace("]]", "") in vars.keys(): return text else: return int(text)
def search_google(term): quoted_term = '"%s"' % term parser = MyHTMLParser() (html, user_agent_google) = google.get_search_results(quoted_term) parser.feed(html) # remove the last result which is the google location based search about # page del parser.get_results()[-1] return parser.get_results()
def search_google(term): quoted_term = '"%s"' % term parser = MyHTMLParser() (html, user_agent_google) = google.get_search_results(quoted_term) parser.feed(html) # remove the last result which is the google location based search about # page del parser.get_results()[-1] return parser.get_results()
def test_visit_taobaoapi(self): # 根据被测接口的实际情况,合理的添加HTTP头 # header = {'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', # 'User-Agent':'Mozilla/5.0 (Windows NT 6.1; rv:29.0) Gecko/20100101 Firefox/29.0' # } logger.info('正在发起GET请求...') response = self.http.get(self.url, (self.params)) logger.info('正在解析返回结果') # 解析HTML文档 parser = MyHTMLParser(strict=False) parser.feed(response)
def __init__(self, str_id: str, str_name: str, str_content: str, str_link: str, str_type: str): # Class vars self.id = str_id # parse url decoding self.name = translate_url(str_name) self.name_small_list = list() self.content = "" self.link = str_link # Item link / slug self.type = str_type # Item type # Strip unneeded html tags from post content html_remover = MyHTMLParser() html_remover.feed(str_content) # Strip remaining non HTML tags (such as [h5p id="#"]) self.content = sub(r" ?\[[^)]+\]", " " + settings_dict["h5p"] + " ", html_remover.html_text) html_remover.close()
def test_click_goods(self): headers = cookie self.http.set_header(headers) mall_goods_id = self.params['id'] logger.info('正在发起GET请求...') self.params['serial'] = global_serial self.params = urllib.parse.urlencode( self.params) # 将参数转为url编码字符串# 注意,此处params为字典类型的数据 response = self.http.get(self.url, self.params) status_code = response[2] # logger.info('正在解析返回结果:%s' % response[0].decode('utf-8')) # 解析HTML文档 parser = MyHTMLParser(strict=False) parser.feed(response[0].decode('utf-8')) starttag_data = parser.get_starttag_data() query = 'SELECT name FROM mall_goods WHERE id=%s' data = (mall_goods_id, ) mall_goods_name = saofudb.select_one_record(query, data) mall_goods_name = mall_goods_name[0] self.expected_result['goods_name'] = mall_goods_name goods_name = '' for data in starttag_data: if data[1].find(mall_goods_name) != -1: goods_name = data[1].replace('\r', '') goods_name = goods_name.replace('\n', '') goods_name = goods_name.replace('\t', '') # 断言 self.assertEqual(status_code, self.expected_result['status'], msg='http状态码status不等于200') self.assertEqual(goods_name, self.expected_result['goods_name'], msg='无法打开商品详情')
def test_pay5(self): headers = cookie self.http.set_header(headers) self.params['orderId'] = CMOrder.attach self.params['serial'] = global_serial logger.info('正在发起GET请求...') self.params = urllib.parse.urlencode( self.params) # 将参数转为url编码字符串# 注意,此处params为字典类型的数据 response = self.http.get(self.url, self.params) response_headers = response[1] response_body = response[0].decode('utf-8') # logger.info(response_body) # 解析HTML文档 parser = MyHTMLParser(strict=False) parser.feed(response_body) starttag_data = parser.get_starttag_data() page_title = '' button_name = '' for data in starttag_data: if data[1] == self.expected_result['page_title']: page_title = data[1] if data[1] == self.expected_result['button_name']: button_name = data[1] break # 断言 self.assertEqual(page_title, self.expected_result['page_title'], msg='打开页面不是储值卡支付界面') self.assertEqual(button_name, self.expected_result['button_name'], msg='无法打开确认支付页面')
import os import json from htmlparser import MyHTMLParser for tag in [ 'INTJ', 'INTP', 'ISFJ', 'ISFP', 'ISTJ', 'ISTP', 'ENFJ', 'ENFP', 'ENTJ', 'ENTP', 'ESFJ', 'ESFP', 'ESTJ', 'INFJ', 'INFP', 'ESTP' ]: ts = 2612 curl_str = "curl 'https://twitter.com/i/search/timeline?f=tweets&vertical=default&q=%23" + tag + "%20lang%3Aar&src=typd&include_available_features=1&include_entities=1&last_note_ts=2516&max_position=TWEET-711340624943906817-718124708412203008-BD1UO2FFu9QAAAAAAAAETAAAAAcAAAASAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA&reset_error_state=false' -H 'accept-encoding: gzip, deflate, sdch' -H 'x-requested-with: XMLHttpRequest' -H 'accept-language: en-US,en;q=0.8' -H 'user-agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36' -H 'accept: application/json, text/javascript, */*; q=0.01' -H 'referer: https://twitter.com/search?f=tweets&vertical=default&q=%23ISFP%20lang%3Aar&src=typd' -H 'authority: twitter.com' -H 'cookie: guest_id=v1%3A143863298058930111; mp_c3de24deb6a3f73fba73a616bb625130_mixpanel=%7B%22distinct_id%22%3A%20%227851b24a2bf2c7756fe7a387d4a02f3c71ef869d436e1e29099f9a08eefb812c%22%2C%22isAdmin%22%3A%20false%2C%22isAccountSpending%22%3A%20false%2C%22serviceLevel%22%3A%20%22null%22%2C%22goalBased%22%3A%20true%7D; eu_cn=1; kdt=6CB6J5Euwi4vSd6f87cx1xUNoW5QFzRSRJalTKFv; remember_checked_on=1; auth_token=43da469332b0c6afff154a20904cf4b538412d9b; pid=\"v3:1458750681895591692855842\"; __utma=43838368.234037132.1438633240.1459465162.1459465162.1; __utmz=43838368.1459465162.1.1.utmcsr=t.co|utmccn=(referral)|utmcmd=referral|utmcct=/Hs5aCk1uqi; lang=en; twitter_ads_id=v1_716178566988214277; external_referer=padhuUp37zjgzgv1mFWxJ12Ozwit7owX|0; _ga=GA1.2.234037132.1438633240; _gat=1; _twitter_sess=BAh7CSIKZmxhc2hJQzonQWN0aW9uQ29udHJvbGxlcjo6Rmxhc2g6OkZsYXNo%250ASGFzaHsABjoKQHVzZWR7ADoPY3JlYXRlZF9hdGwrCJXH09NTAToMY3NyZl9p%250AZCIlNWIxYjdjZjJjY2FmMGQ3Yzc0ZDE4MmNlMmU1OTA1ODE6B2lkIiVkOGIz%250AY2UwNWVjMTczYzUxMzUwYzc5ZGEzMTU2YmI4Yg%253D%253D--516ce1de0aff3a3c1e0181febb7482f25f4aa224; ua=\"f5,m2,m5,rweb,msw\"' --compressed -o a.txt" os.system(curl_str) c = 0 while True: c += 1 with open('a.txt') as f: out = json.loads(f.read()) last = out["inner"]["min_position"] html = out["inner"]["items_html"] if len(html.replace('\n', '')) == 0 or c > 1000: break #print html with open(tag + '.txt', 'ab') as file: parser = MyHTMLParser(file) parser.feed(html) ts += 1 next_curl_str = "curl 'https://twitter.com/i/search/timeline?f=tweets&vertical=default&q=%23ISFP%20lang%3Aar&src=typd&include_available_features=1&include_entities=1&last_note_ts=" + str( ts ) + "&max_position=" + last + "&reset_error_state=false' -H 'accept-encoding: gzip, deflate, sdch' -H 'x-requested-with: XMLHttpRequest' -H 'accept-language: en-US,en;q=0.8' -H 'user-agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36' -H 'accept: application/json, text/javascript, */*; q=0.01' -H 'referer: https://twitter.com/search?f=tweets&vertical=default&q=%23ISFP%20lang%3Aar&src=typd' -H 'authority: twitter.com' -H 'cookie: guest_id=v1%3A143863298058930111; mp_c3de24deb6a3f73fba73a616bb625130_mixpanel=%7B%22distinct_id%22%3A%20%227851b24a2bf2c7756fe7a387d4a02f3c71ef869d436e1e29099f9a08eefb812c%22%2C%22isAdmin%22%3A%20false%2C%22isAccountSpending%22%3A%20false%2C%22serviceLevel%22%3A%20%22null%22%2C%22goalBased%22%3A%20true%7D; eu_cn=1; kdt=6CB6J5Euwi4vSd6f87cx1xUNoW5QFzRSRJalTKFv; remember_checked_on=1; auth_token=43da469332b0c6afff154a20904cf4b538412d9b; pid=\"v3:1458750681895591692855842\"; __utma=43838368.234037132.1438633240.1459465162.1459465162.1; __utmz=43838368.1459465162.1.1.utmcsr=t.co|utmccn=(referral)|utmcmd=referral|utmcct=/Hs5aCk1uqi; lang=en; twitter_ads_id=v1_716178566988214277; external_referer=padhuUp37zjgzgv1mFWxJ12Ozwit7owX|0; _ga=GA1.2.234037132.1438633240; _gat=1; _twitter_sess=BAh7CSIKZmxhc2hJQzonQWN0aW9uQ29udHJvbGxlcjo6Rmxhc2g6OkZsYXNo%250ASGFzaHsABjoKQHVzZWR7ADoPY3JlYXRlZF9hdGwrCJXH09NTAToMY3NyZl9p%250AZCIlNWIxYjdjZjJjY2FmMGQ3Yzc0ZDE4MmNlMmU1OTA1ODE6B2lkIiVkOGIz%250AY2UwNWVjMTczYzUxMzUwYzc5ZGEzMTU2YmI4Yg%253D%253D--516ce1de0aff3a3c1e0181febb7482f25f4aa224; ua=\"f5,m2,m5,rweb,msw\"' --compressed -o a.txt" os.system(next_curl_str) time.sleep(1)
import time import os import json from htmlparser import MyHTMLParser for tag in ['INTJ', 'INTP', 'ISFJ', 'ISFP', 'ISTJ', 'ISTP', 'ENFJ', 'ENFP', 'ENTJ', 'ENTP', 'ESFJ', 'ESFP', 'ESTJ', 'INFJ', 'INFP', 'ESTP']: ts=2612 curl_str = "curl 'https://twitter.com/i/search/timeline?f=tweets&vertical=default&q=%23" + tag + "%20lang%3Aar&src=typd&include_available_features=1&include_entities=1&last_note_ts=2516&max_position=TWEET-711340624943906817-718124708412203008-BD1UO2FFu9QAAAAAAAAETAAAAAcAAAASAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA&reset_error_state=false' -H 'accept-encoding: gzip, deflate, sdch' -H 'x-requested-with: XMLHttpRequest' -H 'accept-language: en-US,en;q=0.8' -H 'user-agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36' -H 'accept: application/json, text/javascript, */*; q=0.01' -H 'referer: https://twitter.com/search?f=tweets&vertical=default&q=%23ISFP%20lang%3Aar&src=typd' -H 'authority: twitter.com' -H 'cookie: guest_id=v1%3A143863298058930111; mp_c3de24deb6a3f73fba73a616bb625130_mixpanel=%7B%22distinct_id%22%3A%20%227851b24a2bf2c7756fe7a387d4a02f3c71ef869d436e1e29099f9a08eefb812c%22%2C%22isAdmin%22%3A%20false%2C%22isAccountSpending%22%3A%20false%2C%22serviceLevel%22%3A%20%22null%22%2C%22goalBased%22%3A%20true%7D; eu_cn=1; kdt=6CB6J5Euwi4vSd6f87cx1xUNoW5QFzRSRJalTKFv; remember_checked_on=1; auth_token=43da469332b0c6afff154a20904cf4b538412d9b; pid=\"v3:1458750681895591692855842\"; __utma=43838368.234037132.1438633240.1459465162.1459465162.1; __utmz=43838368.1459465162.1.1.utmcsr=t.co|utmccn=(referral)|utmcmd=referral|utmcct=/Hs5aCk1uqi; lang=en; twitter_ads_id=v1_716178566988214277; external_referer=padhuUp37zjgzgv1mFWxJ12Ozwit7owX|0; _ga=GA1.2.234037132.1438633240; _gat=1; _twitter_sess=BAh7CSIKZmxhc2hJQzonQWN0aW9uQ29udHJvbGxlcjo6Rmxhc2g6OkZsYXNo%250ASGFzaHsABjoKQHVzZWR7ADoPY3JlYXRlZF9hdGwrCJXH09NTAToMY3NyZl9p%250AZCIlNWIxYjdjZjJjY2FmMGQ3Yzc0ZDE4MmNlMmU1OTA1ODE6B2lkIiVkOGIz%250AY2UwNWVjMTczYzUxMzUwYzc5ZGEzMTU2YmI4Yg%253D%253D--516ce1de0aff3a3c1e0181febb7482f25f4aa224; ua=\"f5,m2,m5,rweb,msw\"' --compressed -o a.txt" os.system(curl_str) c = 0 while True: c += 1 with open('a.txt') as f: out = json.loads(f.read()) last = out["inner"]["min_position"] html = out["inner"]["items_html"] if len(html.replace('\n', '')) == 0 or c > 1000: break #print html with open(tag+'.txt', 'ab') as file: parser = MyHTMLParser(file) parser.feed(html) ts += 1 next_curl_str = "curl 'https://twitter.com/i/search/timeline?f=tweets&vertical=default&q=%23ISFP%20lang%3Aar&src=typd&include_available_features=1&include_entities=1&last_note_ts="+str(ts)+"&max_position="+last+"&reset_error_state=false' -H 'accept-encoding: gzip, deflate, sdch' -H 'x-requested-with: XMLHttpRequest' -H 'accept-language: en-US,en;q=0.8' -H 'user-agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36' -H 'accept: application/json, text/javascript, */*; q=0.01' -H 'referer: https://twitter.com/search?f=tweets&vertical=default&q=%23ISFP%20lang%3Aar&src=typd' -H 'authority: twitter.com' -H 'cookie: guest_id=v1%3A143863298058930111; mp_c3de24deb6a3f73fba73a616bb625130_mixpanel=%7B%22distinct_id%22%3A%20%227851b24a2bf2c7756fe7a387d4a02f3c71ef869d436e1e29099f9a08eefb812c%22%2C%22isAdmin%22%3A%20false%2C%22isAccountSpending%22%3A%20false%2C%22serviceLevel%22%3A%20%22null%22%2C%22goalBased%22%3A%20true%7D; eu_cn=1; kdt=6CB6J5Euwi4vSd6f87cx1xUNoW5QFzRSRJalTKFv; remember_checked_on=1; auth_token=43da469332b0c6afff154a20904cf4b538412d9b; pid=\"v3:1458750681895591692855842\"; __utma=43838368.234037132.1438633240.1459465162.1459465162.1; __utmz=43838368.1459465162.1.1.utmcsr=t.co|utmccn=(referral)|utmcmd=referral|utmcct=/Hs5aCk1uqi; lang=en; twitter_ads_id=v1_716178566988214277; external_referer=padhuUp37zjgzgv1mFWxJ12Ozwit7owX|0; _ga=GA1.2.234037132.1438633240; _gat=1; _twitter_sess=BAh7CSIKZmxhc2hJQzonQWN0aW9uQ29udHJvbGxlcjo6Rmxhc2g6OkZsYXNo%250ASGFzaHsABjoKQHVzZWR7ADoPY3JlYXRlZF9hdGwrCJXH09NTAToMY3NyZl9p%250AZCIlNWIxYjdjZjJjY2FmMGQ3Yzc0ZDE4MmNlMmU1OTA1ODE6B2lkIiVkOGIz%250AY2UwNWVjMTczYzUxMzUwYzc5ZGEzMTU2YmI4Yg%253D%253D--516ce1de0aff3a3c1e0181febb7482f25f4aa224; ua=\"f5,m2,m5,rweb,msw\"' --compressed -o a.txt" os.system(next_curl_str) time.sleep(1)