def get_title(domain): domain_t = IP._getDomainStr(domain) #print domain_t # 根据传递进来的 domain 获取 首先直接打开网站获取到 title 然后再去修改 header的值 去对比titie # 如果网站本来就打不开呢 打得开 获取 title 打不开直接 输出title try: http_domain = u'http://' + unicode(domain_t[0]) u = u'(' + http_domain + '.*?)"' u2 = u'' + http_domain + u'(.*)' t = u'<title>.*</title>' _re_title = re.compile(t) _re_url = re.compile(u2) abc = 'http://' + domain.strip() r = requests.get(abc, verify=False, allow_redirects=True) url = _re_url.findall(r.url)[0] get1 = _re_url.findall('http://' + domain)[0] if get1 == '': get1 = '/' r1 = get( domain_t[0], 80, __http_header.replace('{host}', domain_t[0]).replace('{url}', get1)) r2, code = x2Unicode(r1) title = _re_title.findall(r2)[0] print 'Get key words:\n"' + title.decode('utf-8') + '"' u_title, code = x2Unicode(title) return url, u_title, domain_t[0] except Exception as a: print a.message #return url,'1',domain_t[0] return 0