from analyzer import KNNAnalyzer import random import urllib def getImage(url, file_path): u = urllib.urlopen(url) data = u.read() f = open(file_path, 'wb') f.write(data) f.close() segmenter = NormalSegmenter() extractor = SimpleFeatureExtractor( feature_size=20, stretch=False ) analyzer = KNNAnalyzer( segmenter, extractor) analyzer.train('../data/features.jpg') for i in range(4): rand = random.random() url = "http://202.119.113.135/validateCodeAction.do?random=" + str(rand); print url file_path = "../train/crawler.jpg" getImage(url,file_path) result = analyzer.analyze('../train/crawler.jpg') print result analyzer.display() analyzer.display_binary()
from analyzer import KNNAnalyzer import random import urllib def getImage(url, file_path): u = urllib.urlopen(url) data = u.read() f = open(file_path, 'wb') f.write(data) f.close() segmenter = NormalSegmenter() extractor = SimpleFeatureExtractor( feature_size=20, stretch=False ) analyzer = KNNAnalyzer( segmenter, extractor) analyzer.train('../data/features.jpg') for i in range(1): rand = random.random() url = "http://202.119.113.135/validateCodeAction.do?random=" + str(rand); #print url file_path = "../train/crawler.jpg" getImage(url,file_path) result = analyzer.analyze('../train/crawler.jpg') print result #analyzer.display() #analyzer.display_binary()
def getSource(uid,pwd): # print "初始化分类器..." segmenter = NormalSegmenter() extractor = SimpleFeatureExtractor( feature_size=20, stretch=False ) analyzer = KNNAnalyzer( segmenter, extractor) analyzer.train('../data/features.jpg') # print "开始模拟登录..." login_url = "http://202.119.113.135/loginAction.do" vcode_url = 'http://202.119.113.135/validateCodeAction.do?random=0.2583906068466604' all_url = "http://202.119.113.135/gradeLnAllAction.do?type=ln&oper=sxinfo&lnsxdm=001" now_url = "http://202.119.113.135/bxqcjcxAction.do" #post请求头部 global headers global postData headers = { 'x-requestted-with': 'XMLHttpRequest', 'Accept-Language': 'zh-cn', 'Accept-Encoding': 'gzip, deflate', 'ContentType': 'application/x-www-form-urlencoded; chartset=UTF-8', 'Host': 'login.taobao.com', 'DNT': 1, 'Cache-Control': 'no-cache', 'User-Agent' : 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:14.0) Gecko/20100101 Firefox/14.0.1', 'Referer' : 'https://login.taobao.com/member/login.jhtml?redirectURL=http%3A%2F%2Fwww.taobao.com%2F', 'Connection' : 'Keep-Alive' } #用户名,密码 username = uid password = pwd #请求数据包 postData = { 'zjh':username, 'mm':password, } test_number = 10 while login(login_url,vcode_url,postData,headers,analyzer) == False: test_number -= 1 if(test_number == 0): print "please check out your uid and password!!" break; if test_number > 0: page = getHtml(all_url) print test_number page = page.encode('utf-8') soup = BeautifulSoup(page) trs = soup.findAll('tr',{'class':'odd'}) lists = [] for tr in trs: ll = [] tds = tr.findAll('td') for td in tds: tmp = '' if(td.p): tmp = td.p.string.strip().encode('utf-8') else: tmp = td.string.strip().encode('utf-8') # print tmp ll.append(tmp) lists.append(ll) return lists
cookieSupport= urllib2.HTTPCookieProcessor(cookiejar) opener = urllib2.build_opener(cookieSupport, urllib2.HTTPHandler) urllib2.install_opener(opener) #打开登陆页面 urllib2.urlopen(login_url) #此时直接发送post数据包登录 getCheckCode(vcode_url) success_test, failed_test = 0, 0 return sendPostData(login_url, postData, headers) if __name__ == "__main__": print "初始化分类器..." segmenter = NormalSegmenter() extractor = SimpleFeatureExtractor( feature_size=20, stretch=False ) analyzer = KNNAnalyzer( segmenter, extractor) analyzer.train('../data/features.jpg') print "开始模拟登录..." login_url = "http://202.119.113.135/loginAction.do" vcode_url = 'http://202.119.113.135/validateCodeAction.do?random=0.2583906068466604' #post请求头部 headers = { 'x-requestted-with': 'XMLHttpRequest', 'Accept-Language': 'zh-cn', 'Accept-Encoding': 'gzip, deflate', 'ContentType': 'application/x-www-form-urlencoded; chartset=UTF-8', 'Host': 'login.taobao.com', 'DNT': 1, 'Cache-Control': 'no-cache',