Example #1
0
    def _work(self, entry_url):
        try:
            logger.info("[{}] req = > {}".format(len(self.done_url_list),
                                                 entry_url))
            if utils.url_ext(entry_url) in self.ignore_ext:
                return URLsimilarList()

            conn = utils.http_req(entry_url)
            if conn.status_code in [301, 302, 307]:
                _url = urljoin(entry_url, conn.headers.get("Location",
                                                           "")).strip()
                _url = utils.normal_url(_url)
                if _url is None:
                    return URLsimilarList()

                url_info = URLinfo(entry_url, _url, URLTYPE.document)
                if utils.same_netloc(entry_url,
                                     _url) and (url_info
                                                not in self.done_url_list):
                    entry_url = _url
                    logger.info("[{}] req 302 = > {}".format(
                        len(self.done_url_list), entry_url))
                    conn = utils.http_req(_url)
                    self.done_url_list.add(url_info)
                    self.all_url_list.add(url_info)

            html = conn.content
            if "html" not in conn.headers.get("Content-Type", "").lower():
                return URLsimilarList()

            dom = pq(html)
            ret_url = URLsimilarList()
            for tag in self.tagMap:
                items = dom(tag['name']).items()
                for i in items:
                    _url = urljoin(entry_url, i.attr(tag['attr'])).strip()
                    _url = utils.normal_url(_url)
                    if _url is None:
                        continue
                    _type = tag["type"]
                    if utils.same_netloc(_url, entry_url):
                        url_info = URLinfo(entry_url, _url, _type)
                        ret_url.add(url_info)
                        self.all_url_list.add(url_info)

            return ret_url
        except Exception as e:
            logger.error("error on {} {}".format(entry_url, e))
            return URLsimilarList()
Example #2
0
 def quota(self):
     auth = (self.auth_email, self.auth_key)
     conn = utils.http_req(self.quota_api, auth=auth)
     data = conn.json()
     count = data["user"]["counts"]["search_api"]
     limit = data["user"]["limits"]["search_api"]
     return count, limit
Example #3
0
    def work(self, site):
        _, hostname, _ = get_host(site)

        conn = utils.http_req(site)
        item = {
            "site": site,
            "hostname": hostname,
            "ip":"",
            "title": utils.get_title(conn.content),
            "status": conn.status_code,
            "headers": utils.get_headers(conn),
            "http_server":  conn.headers.get("Server", ""),
            "body_length": len(conn.content),
            "finger": [],
            "favicon": fetch_favicon(site)
        }
        domain_parsed = utils.domain_parsed(hostname)
        if domain_parsed:
            item["fld"] = domain_parsed["fld"]
            ips = utils.get_ip(hostname)
            if ips:
                item["ip"] = ips[0]
        else:
            item["ip"] = hostname

        self.site_info_list.append(item)
        if conn.status_code == 301 or conn.status_code == 302:
            url_302 = urljoin(site, conn.headers.get("Location", ""))
            if url_302 != site and url_302.startswith(site):
                self.work(url_302)
Example #4
0
def get_domains():
    url = "http://10.0.83.77:5018/domain/?task_id=5f2298aa6591e770f69e8f62&source=altdns&size=2000"
    data = utils.http_req(url).json()
    items = data["items"]
    domains = [x["domain"] for x in items]
    print(domains)
    return services.probe_http(domains)
Example #5
0
 def __init__(self, cluster): 
     self.api = cluster.sunstone_api.replace('http://', '')
     cookie_req = http_req(host=self.api, uri='/login', method='POST', headers={"Authorization": cluster.sunstone_auth})
     for x in cookie_req.getheaders():
         if x[0] == "set-cookie":
             cookie = x[1].split(';')[0]
     self.headers = {"Cookie": cookie}
Example #6
0
    def search(self, domain):
        param = {"output": "json", "q": domain}

        data = utils.http_req(self.url,
                              'get',
                              params=param,
                              timeout=(30.1, 50.1)).json()
        return data
Example #7
0
 def result_num(self):
     url = self.search_url.format(page=0, keyword=quote(self.keyword))
     #logger.info("search url {}".format(url))
     html = utils.http_req(url).text
     self.first_html = html
     result = re.findall(self.num_pattern, html)
     num = int("".join(result[0].split(",")))
     self.search_result_num = num
     return num
Example #8
0
 def get_favicon_data(self, favicon_url):
     conn = http_req(favicon_url)
     if "/favicon.ico" in favicon_url:
         if conn.headers.get("Content-Type", "") == "image/x-icon":
             data = self.encode_bas64_lines(conn.content)
             return data
     else:
         if "image" in conn.headers.get("Content-Type", ""):
             data = self.encode_bas64_lines(conn.content)
             return data
Example #9
0
 def __init__(self, cluster):
     self.api = cluster.sunstone_api.replace('http://', '')
     cookie_req = http_req(host=self.api,
                           uri='/login',
                           method='POST',
                           headers={"Authorization": cluster.sunstone_auth})
     for x in cookie_req.getheaders():
         if x[0] == "set-cookie":
             cookie = x[1].split(';')[0]
     self.headers = {"Cookie": cookie}
Example #10
0
    def check(self, url):
        conn = utils.http_req(url, method="head", timeout=self.timeout)
        if conn.status_code == 400:
            return None

        if (conn.status_code >= 501) and (conn.status_code < 600):
            return None

        if conn.status_code == 403:
            conn2 = utils.http_req(url)
            check = b'</title><style type="text/css">body{margin:5% auto 0 auto;padding:0 18px}'
            if check in conn2.content:
                return None

        item = {
            "status": conn.status_code,
            "content-type": conn.headers.get("Content-Type", "")
        }

        return item
Example #11
0
    def get_favicon_data(self, favicon_url):
        conn = http_req(favicon_url)
        if conn.status_code != 200:
            return

        if len(conn.content) <= 80:
            logger.debug("favicon content len lt 100")
            return

        if "image" in conn.headers.get("Content-Type", ""):
            data = self.encode_bas64_lines(conn.content)
            return data
Example #12
0
def get_urls():
    url = "http://10.0.83.77:5018/site/?page=1&hostname=baidu.com&size=6000"
    data = utils.http_req(url).json()
    items = data["items"]
    urls = []
    print(len(items))
    for item in items:
        urls.append(item["site"])

    with open("../arl_tool/urls2.txt", "w") as f:
        for x in set(urls):
            f.write(x + "\n")
Example #13
0
 def test_fetch_fingerprint(self):
     site = "https://www.baidu.com/"
     conn = utils.http_req(site)
     headers = utils.get_headers(conn)
     title = utils.get_title(conn.content)
     finger_list = [
         {
             "name": "百度测试",
             "rule": {
                 "html": [
                     "百度"
                 ],
                 "title": [],
                 "headers": [],
                 "favicon_hash": []
             }
         },
         {
             "name": "百度测试2",
             "rule": {
                 "html": [],
                 "title": ["百度222222", "百度"],
                 "headers": [],
                 "favicon_hash": []
             }
         },
         {
             "name": "百度测试3",
             "rule": {
                 "html": [],
                 "title": [],
                 "headers": ["TTTBAIDUIDTTT", "BAIDUID"],
                 "favicon_hash": []
             }
         },
         {
             "name": "百度测试4",
             "rule": {
                 "html": [],
                 "title": [],
                 "headers": [],
                 "favicon_hash": [789, 123456]
             }
         }
     ]
     finger_list.extend(load_fingerprint())
     result = fetch_fingerprint(content=conn.content, headers=headers,
                                title=title, favicon_hash=789, finger_list=finger_list)
     self.assertTrue(len(result) >= 4)
     self.assertTrue(result[0] == finger_list[0]["name"])
     self.assertTrue(result[3] == finger_list[3]["name"])
Example #14
0
 def match_urls(self, html):
     dom = pq(html)
     result_items = dom(self.pq_query).items()
     urls_result = [item.attr("href") for item in result_items]
     urls = set()
     for u in urls_result:
         try:
             resp = utils.http_req(u, "head")
             real_url = resp.headers.get('Location')
             if real_url:
                 urls.add(real_url)
         except Exception as e:
             logger.exception(e)
     return list(urls)
Example #15
0
    def search_subdomain(self, target):
        params = {"query": "*.{}".format(target)}
        auth = (self.auth_email, self.auth_key)
        conn = utils.http_req(self.subdomain_api,
                              params=params,
                              auth=auth,
                              timeout=(20, 120))
        data = conn.json()

        subdomains = []
        for item in data['subdomains']:
            item = item.strip("*.")
            domain = "{}.{}".format(item, target)
            if utils.domain_parsed(domain):
                subdomains.append(domain)

        return list(set(subdomains))
Example #16
0
    def find_icon_url_from_html(self):
        conn = http_req(self.url)
        if b"<link" not in conn.content:
            return
        d = pq(conn.content)
        links = d('link').items()
        icon_link_list = []
        for link in links:
            if link.attr("href") and 'icon' in link.attr("rel"):
                icon_link_list.append(link)

        for link in icon_link_list:
            if "shortcut" in link:
                return urljoin(self.url, link.attr('href'))

        if icon_link_list:
            return urljoin(self.url, icon_link_list[0].attr('href'))
Example #17
0
 def run(self):
     self.result_num()
     logger.info("baidu search {} results found for keyword {}".format(
         self.search_result_num, self.keyword))
     urls = []
     for page in range(
             1,
             min(int(self.search_result_num / 10) + 2, self.page_num + 1)):
         if page == 1:
             _urls = self.match_urls(self.first_html)
             logger.info("baidu firsturl result {}".format(len(_urls)))
         else:
             time.sleep(self.default_interval)
             url = self.search_url.format(page=(page - 1) * 10,
                                          keyword=quote(self.keyword))
             html = utils.http_req(url).text
             _urls = self.match_urls(html)
             logger.info("baidu search url {}, result {}".format(
                 url, len(_urls)))
             urls.extend(_urls)
     return urls
Example #18
0
 def match_urls(self, html):
     dom = pq(html)
     result_items = dom(self.pq_query).items()
     urls_result = [
         urljoin(self.base_search_url, item.attr("href"))
         for item in result_items
     ]
     urls = set()
     if urls_result:
         for u in urls_result:
             try:
                 resp = utils.http_req(u,
                                       "head",
                                       allow_redirects=False,
                                       verify=False)
                 real_url = resp.headers.get('Location')
                 if real_url:
                     urls.add(real_url)
             except Exception as e:
                 continue
     return list(urls)
Example #19
0
    def req(self):
        content = b''
        conn = utils.http_req(self.url.url, 'get', timeout=(3, 6), stream=True)
        self.conn = conn
        start_time = time.time()
        for data in conn.iter_content(chunk_size=512):
            if time.time() - start_time >= self.read_timeout:
                break
            content += data
            if len(content) >= int(self.max_length):
                break

        self.status_code = conn.status_code
        self.content = content[:self.max_length]

        content_len = self.conn.headers.get("Content-Length",
                                            len(self.content))
        self.conn.headers["Content-Length"] = content_len

        conn.close()

        return self.status_code, self.content
Example #20
0
def dingding_send(msg, access_token, secret, msgtype="text", title="灯塔消息推送"):
    ding_url = "https://oapi.dingtalk.com/robot/send?access_token={}".format(access_token)
    timestamp = str(round(time.time() * 1000))
    secret_enc = secret.encode('utf-8')
    string_to_sign = '{}\n{}'.format(timestamp, secret)
    string_to_sign_enc = string_to_sign.encode('utf-8')
    hmac_code = hmac.new(secret_enc, string_to_sign_enc, digestmod=hashlib.sha256).digest()
    sign = urllib.parse.quote_plus(base64.b64encode(hmac_code))
    param = "&timestamp={}&sign={}".format(timestamp, sign)
    ding_url = ding_url + param
    send_json = {
        "msgtype": msgtype,
        "text": {
            "content": msg
        },
        "markdown": {
            "title": title,
            "text": msg
        }
    }
    conn = http_req(ding_url, method='post', json=send_json)
    return conn.json()
Example #21
0
 def _api(self, url):
     data = utils.http_req(url, 'get', params=self.param).json()
     return data
Example #22
0
File: occi.py Project: stamhe/apc
 def compute_delete(self, id):
     return http_req(host=self.api, uri=('/compute/%s' % id), method='DELETE', headers=self.headers)
Example #23
0
File: occi.py Project: stamhe/apc
 def compute_create(self, template):
     return http_req(host=self.api, uri='/compute', method='POST', params=template, headers=self.headers)
Example #24
0
 def host(self):
     return http_req(host=self.api, uri='/host', headers=self.headers)
Example #25
0
 def host(self):
     return http_req(host=self.api, uri='/host', headers=self.headers)
Example #26
0
 def vm_action(self, vm_id, params):
     return http_req(host=self.api,
                     uri=('/vm/%s/action' % vm_id),
                     method='POST',
                     params=params,
                     headers=self.headers)
Example #27
0
 def vm_id(self, vm_id):
     return http_req(host=self.api,
                     uri=('/vm/%s' % vm_id),
                     headers=self.headers)
Example #28
0
 def vm(self):
     return http_req(host=self.api, uri='/vm', headers=self.headers)
Example #29
0
 def datastore_id(self, ds_id):
     return http_req(host=self.api,
                     uri=('/datastore/%s' % ds_id),
                     headers=self.headers)
Example #30
0
 def image_id(self, image_id, method='GET'):
     return http_req(host=self.api, uri=('/image/%s' % image_id), method=method, headers=self.headers)
Example #31
0
 def vm(self):
     return http_req(host=self.api, uri='/vm', headers=self.headers)
Example #32
0
 def datastore_id(self, ds_id):
     return http_req(host=self.api, uri=('/datastore/%s' % ds_id), headers=self.headers)
Example #33
0
 def vnet(self):
     return http_req(host=self.api, uri='/vnet', headers=self.headers)
Example #34
0
 def image_action(self, image_id, params, method='POST'):
     return http_req(host=self.api, uri=('/image/%s/action' % image_id), method=method, params=params, headers=self.headers)
Example #35
0
 def datastore(self):
     return http_req(host=self.api, uri='/datastore', headers=self.headers)
Example #36
0
 def image(self, method='GET', params=''):
     return http_req(host=self.api,
                     uri='/image',
                     method=method,
                     params=params,
                     headers=self.headers)
Example #37
0
 def vm_action(self, vm_id, params):
     return http_req(host=self.api, uri=('/vm/%s/action' % vm_id), method='POST', params=params, headers=self.headers)
Example #38
0
 def image(self, method='GET', params=''):
     return http_req(host=self.api, uri='/image', method=method, params=params, headers=self.headers)
Example #39
0
 def image_id(self, image_id, method='GET'):
     return http_req(host=self.api,
                     uri=('/image/%s' % image_id),
                     method=method,
                     headers=self.headers)
Example #40
0
 def image_action(self, image_id, params, method='POST'):
     return http_req(host=self.api,
                     uri=('/image/%s/action' % image_id),
                     method=method,
                     params=params,
                     headers=self.headers)
Example #41
0
 def datastore(self):
     return http_req(host=self.api, uri='/datastore', headers=self.headers)
Example #42
0
 def vnet(self):
     return http_req(host=self.api, uri='/vnet', headers=self.headers)
Example #43
0
 def vm_id(self, vm_id):
     return http_req(host=self.api, uri=('/vm/%s' % vm_id), headers=self.headers)