Esempio n. 1
0
def get_title(domain):
    domain_t = IP._getDomainStr(domain)
    #print domain_t

    # 根据传递进来的 domain 获取    首先直接打开网站获取到 title 然后再去修改 header的值 去对比titie
    # 如果网站本来就打不开呢 打得开 获取 title  打不开直接 输出title
    try:
        http_domain = u'http://' + unicode(domain_t[0])
        u = u'(' + http_domain + '.*?)"'
        u2 = u'' + http_domain + u'(.*)'
        t = u'<title>.*</title>'
        _re_title = re.compile(t)
        _re_url = re.compile(u2)
        abc = 'http://' + domain.strip()
        r = requests.get(abc, verify=False, allow_redirects=True)
        url = _re_url.findall(r.url)[0]
        get1 = _re_url.findall('http://' + domain)[0]
        if get1 == '':
            get1 = '/'
        r1 = get(
            domain_t[0], 80,
            __http_header.replace('{host}',
                                  domain_t[0]).replace('{url}', get1))
        r2, code = x2Unicode(r1)
        title = _re_title.findall(r2)[0]
        print 'Get key words:\n"' + title.decode('utf-8') + '"'
        u_title, code = x2Unicode(title)
        return url, u_title, domain_t[0]
    except Exception as a:
        print a.message
        #return url,'1',domain_t[0]
        return 0