def convert_grp(dst, src): if common.verbose: print "Converting grp table into user attributes" cur = src.cursor() try: cur.execute("select username, domain, grp from grp") groups = cur.fetchall() except MySQLdb.ProgrammingError: print "Error while querying grp table, skipping" return for group in groups: (username, domain, grp) = group domain = common.extract_domain(username, domain) try: uid = get_uid_by_uri(dst, username, domain) dst.cursor().execute( "insert into user_attrs (uid, name, type, value, flags) values" "(%s, %s, %s, %s, %s)", (uid, "acl", 2, grp, common.DB_LOAD_SER | common.DB_FOR_SERWEB), ) except UidNotFoundException: print "ERROR: Cannot find UID for group entry for '%s@%s'" % (username, domain) except MySQLdb.IntegrityError: print "Conflicting row found in user_attrs table (%s, %s, %s, %s, %s)" % ( uid, "acl", 2, grp, common.DB_LOAD_SER | common.DB_FOR_SERWEB, ) pass
def crawl(self, seed_url, max_urls=30, max_depth=1, obey_robots=False, max_size=1000000, force_html=True, **kwargs): """Crawl website html and return list of URLs crawled seed_url: url to start crawling from max_urls: maximum number of URLs to crawl (use None for no limit) max_depth: maximum depth to follow links into website (use None for no limit) obey_robots: whether to obey robots.txt max_size is passed to get() and is limited to 1MB by default force_text is passed to get() and is set to True by default so only crawl HTML content **kwargs is passed to get() """ user_agent = kwargs.get("user_agent", self.user_agent) server = "http://" + extract_domain(seed_url) robots = RobotFileParser() if obey_robots: robots.parse(self.get(server + "/robots.txt").splitlines()) # load robots.txt outstanding = [(seed_url, 0), (server, 0)] # which URLs need to crawl crawled = [] # urls that have crawled while outstanding: # more URLs to crawl if len(crawled) == max_urls: break url, cur_depth = outstanding.pop(0) if url not in crawled: html = self.get(url, max_size=max_size, force_html=force_html, **kwargs) crawled.append(url) if max_depth is None or cur_depth < max_depth: # continue crawling for scraped_url in re.findall(re.compile("<a[^>]+href=[\"'](.*?)[\"']", re.IGNORECASE), html): if "#" in scraped_url: scraped_url = scraped_url[ : scraped_url.index("#") ] # remove internal links to prevent duplicates if os.path.splitext(scraped_url)[ -1 ].lower() not in Download.IGNORED_EXTENSIONS and robots.can_fetch(user_agent, scraped_url): scraped_url = urljoin(server, scraped_url) # support relative links # check if same domain or sub-domain this_server = extract_domain(scraped_url) if this_server and (this_server in server or server in this_server): outstanding.append((scraped_url, cur_depth + 1)) return crawled
def domain_delay(self, url, delay, proxy=None, variance=0.5): """Delay a minimum time for each domain per proxy by storing last access times in a pdict url is what intend to download delay is the minimum amount of time (in seconds) to wait after downloading content from this domain variance is the amount of randomness in delay, 0-1 """ key = str(proxy) + ":" + extract_domain(url) if key in self.cache: dt = datetime.now() - self.cache[key] wait_secs = delay - dt.days * 24 * 60 * 60 - dt.seconds if wait_secs > 0: # randomize the time so less suspicious wait_secs = wait_secs - variance * delay + (2 * variance * delay * random.random()) time.sleep(max(0, wait_secs)) # make sure isn't negative time self.cache[key] = datetime.now() # update database timestamp to now
def convert_admin_privileges(dst, src): cur = src.cursor(); if common.verbose: print "Converting admin_privileges table" try: cur.execute("select username, domain, priv_name, priv_value from admin_privileges") except MySQLdb.ProgrammingError: print "Error while querying admin_privileges table, skipping" return privs = cur.fetchall() if common.verbose and cur.rowcount == 0: print "Source admin_privileges table is empty, skipping" return for priv in privs: (username, domain, priv_name, priv_value) = priv domain = common.extract_domain(username, domain) try: uid = get_uid_by_uri(dst, username, domain) if priv_name == "is_admin": dst.cursor().execute("insert into user_attrs (uid, name, type, value, flags) values " "(%s, %s, %s, %s, %s)", (uid, "sw_is_admin", 0, 1, common.DB_FOR_SERWEB)) elif priv_name == "acl_control": dst.cursor().execute("insert into user_attrs (uid, name, value, type, flags) values " "(%s, %s, %s, %s, %s)", (uid, "sw_acl_control", priv_value, 2, common.DB_FOR_SERWEB)) elif priv_name == "change_privileges": dst.cursor().execute("insert into user_attrs (uid, name, type, value, flags) values " "(%s, %s, %s, %s, %s)", (uid, "sw_is_hostmaster", 0, 1, common.DB_FOR_SERWEB)) else: print "ERROR: Unsupported privilege '%s' in admin_privileges table, skipping" % priv_name except UidNotFoundException: print "ERROR: Cannot find UID for admin_privilege entry for '%s@%s'" % (username, domain) except MySQLdb.IntegrityError: print "Conflicting row found user_attrs table" sys.exit(1)
def lookup(target, color=common.bcolors.OKGREEN): ROW_LIMIT = 20 API_URL = 'https://crt.sh/?q=%.{}&output=json' response = requests.get(API_URL.format(common.extract_domain(target))) if response.status_code != 200: print('Got status {} from crt.sh. Skipping...'.format( response.status_code)) return None data = response.json() subdomains = set() for entry in data: subdomains.add(entry['name_value']) table = PrettyTable() table.field_names = ['Enumerated Subdomains'] for domain in sorted(subdomains)[:ROW_LIMIT]: table.add_row([domain]) if len(subdomains) > ROW_LIMIT: table.add_row(['+{} MORE'.format(len(subdomains) - ROW_LIMIT)]) print(color + str(table) + common.bcolors.ENDC)