def site_spider(self): entry_urls_list = [] for site in self.site_list: entry_urls = [site] entry_urls.extend(self.search_engines_result.get(site, [])) entry_urls_list.append(entry_urls) site_spider_result = services.site_spider_thread(entry_urls_list) for site in site_spider_result: target_urls = site_spider_result[site] new_target_urls = [] for url in target_urls: if url in self.page_url_list: continue new_target_urls.append(url) self.page_url_list.append(url) page_map = services.page_fetch(new_target_urls) for url in page_map: item = { "site": site, "task_id": self.task_id, "source": CollectSource.SITESPIDER } item.update(page_map[url]) domain_parsed = utils.domain_parsed(site) if domain_parsed: item["fld"] = domain_parsed["fld"] utils.conn_db('url').insert_one(item)
def crtsh_search(domain): name_list = [] try: c = CrtshClient() items = c.search(domain) for item in items: for name in item["name_value"].split(): name = name.strip() name = name.strip("*.") name = name.lower() if "@" in name: continue if not utils.domain_parsed(domain): continue if name.endswith("." + domain): name_list.append(name) name_list = list(set(name_list)) logger.info("search crtsh {} {}".format(domain, len(name_list))) except Exception as e: logger.exception(e) return name_list
def _resolver(self): domains = [] domain_cname_record = [] for x in self.brute_out: if utils.check_domain_black(x["domain"]): continue domains.append(x["domain"]) self.brute_domain_map[x["domain"]] = x["record"] if x["type"] == 'CNAME': item = x["domain"].lower() if utils.check_domain_black(item): continue if utils.domain_parsed(item): self.domain_cnames.append(item) domain_cname_record.append(x["record"]) for domain in domain_cname_record: if not domain.endswith(self.base_domain_scope): continue if domain not in domains: domains.append(domain) start_time = time.time() logger.info("start reslover {}".format(self.base_domain, len(domains))) self.resolver_map = services.resolver_domain(domains) elapse = time.time() - start_time logger.info("end reslover {} result {}, elapse {}".format( self.base_domain, len(self.resolver_map), elapse))
def work(self, site): _, hostname, _ = get_host(site) conn = utils.http_req(site) item = { "site": site, "hostname": hostname, "ip":"", "title": utils.get_title(conn.content), "status": conn.status_code, "headers": utils.get_headers(conn), "http_server": conn.headers.get("Server", ""), "body_length": len(conn.content), "finger": [], "favicon": fetch_favicon(site) } domain_parsed = utils.domain_parsed(hostname) if domain_parsed: item["fld"] = domain_parsed["fld"] ips = utils.get_ip(hostname) if ips: item["ip"] = ips[0] else: item["ip"] = hostname self.site_info_list.append(item) if conn.status_code == 301 or conn.status_code == 302: url_302 = urljoin(site, conn.headers.get("Location", "")) if url_302 != site and url_302.startswith(site): self.work(url_302)
def _resolver(self): domains = [] domain_cname_record = [] for x in self.brute_out: current_domain = x["domain"].lower() if not utils.domain_parsed(current_domain): continue # 删除掉过长的域名 if len(current_domain) >= Config.DOMAIN_MAX_LEN: continue if utils.check_domain_black(current_domain): continue if current_domain not in domains: domains.append(current_domain) self.brute_domain_map[current_domain] = x["record"] if x["type"] == 'CNAME': self.domain_cnames.append(current_domain) current_record_domain = x['record'] if not utils.domain_parsed(current_record_domain): continue if utils.check_domain_black(current_record_domain): continue if current_record_domain not in domain_cname_record: domain_cname_record.append(current_record_domain) for domain in domain_cname_record: if not domain.endswith(self.base_domain_scope): continue if domain not in domains: domains.append(domain) start_time = time.time() logger.info("start reslover {} {}".format(self.base_domain, len(domains))) self.resolver_map = services.resolver_domain(domains) elapse = time.time() - start_time logger.info("end reslover {} result {}, elapse {}".format( self.base_domain, len(self.resolver_map), elapse))
def is_valid_domain(domain): from app.utils import domain_parsed if "." not in domain: return False if domain_parsed(domain): return True return False
def save_domain_info_list(self, domain_info_list, source = CollectSource.DOMAIN_BRUTE): for domain_info_obj in domain_info_list: domain_info = domain_info_obj.dump_json(flag=False) domain_info["task_id"] = self.task_id domain_info["source"] = source domain_parsed = utils.domain_parsed(domain_info["domain"]) if domain_parsed: domain_info["fld"] = domain_parsed["fld"] utils.conn_db('domain').insert_one(domain_info)
def search_subdomain(self, target): params = {"query": "*.{}".format(target)} auth = (self.auth_email, self.auth_key) conn = utils.http_req(self.subdomain_api, params=params, auth=auth, timeout=(20, 120)) data = conn.json() subdomains = [] for item in data['subdomains']: item = item.strip("*.") domain = "{}.{}".format(item, target) if utils.domain_parsed(domain): subdomains.append(domain) return list(set(subdomains))
def search_engines(self): self.search_engines_result = search_engines(self.site_list) for site in self.search_engines_result: target_urls = self.search_engines_result[site] page_map = services.page_fetch(target_urls) for url in page_map: self.page_url_list.append(url) item = { "site": site, "task_id": self.task_id, "source": CollectSource.SEARCHENGINE } item.update(page_map[url]) domain_parsed = utils.domain_parsed(site) if domain_parsed: item["fld"] = domain_parsed["fld"] utils.conn_db('url').insert_one(item)