class Urls(AbstractModule): """ Urls module for AIL framework """ def __init__(self): """ Init Urls """ super(Urls, self).__init__() self.faup = Faup() self.redis_cache_key = regex_helper.generate_redis_cache_key( self.module_name) # Protocol file path protocolsfile_path = os.path.join( os.environ['AIL_HOME'], self.process.config.get("Directories", "protocolsfile")) # Get all uri from protocolsfile (Used for Curve) uri_scheme = "" with open(protocolsfile_path, 'r') as scheme_file: for scheme in scheme_file: uri_scheme += scheme[:-1] + "|" uri_scheme = uri_scheme[:-1] self.url_regex = "((?i:"+uri_scheme + \ ")\://(?:[a-zA-Z0-9\.\-]+(?:\:[a-zA-Z0-9\.&%\$\-]+)*@)*(?:(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|(?:[a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.(?:[a-zA-Z]{2,15}))(?:\:[0-9]+)*(?:/?(?:[a-zA-Z0-9\.\,\?'\\+&%\$#\=~_\-]+))*)" # Send module state to logs self.redis_logger.info(f"Module {self.module_name} initialized") def compute(self, message): """ Search for Web links from given message """ # Extract item id, score = message.split() item = Item(id) item_content = item.get_content() l_urls = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.url_regex, item.get_id(), item_content) for url in l_urls: self.faup.decode(url) unpack_url = self.faup.get() to_send = f"{url} {item.get_id()}" print(to_send) self.send_message_to_queue(to_send, 'Url') self.redis_logger.debug(f"url_parsed: {to_send}") if len(l_urls) > 0: to_print = f'Urls;{item.get_source()};{item.get_date()};{item.get_basename()};' self.redis_logger.info( f'{to_print}Detected {len(l_urls)} URL;{item.get_id()}')
def harvesting_google(query, numberofpage): listreturn = [] result = Popen(['casperjs', 'CeleryWeb/casperjs/googlesearch.js', str(query), str(numberofpage)], stdout=PIPE) urls = result.stdout.readlines() for url in urls: f = Faup() url=url.replace('\n','') f.decode(url) listreturn.append(f.get()) return listreturn
class SQLInjectionDetection(AbstractModule): """docstring for SQLInjectionDetection module.""" # # TODO: IMPROVE ME # Reference: https://github.com/stamparm/maltrail/blob/master/core/settings.py SQLI_REGEX = r"information_schema|sysdatabases|sysusers|floor\(rand\(|ORDER BY \d+|\bUNION\s+(ALL\s+)?SELECT\b|\b(UPDATEXML|EXTRACTVALUE)\(|\bCASE[^\w]+WHEN.*THEN\b|\bWAITFOR[^\w]+DELAY\b|\bCONVERT\(|VARCHAR\(|\bCOUNT\(\*\)|\b(pg_)?sleep\(|\bSELECT\b.*\bFROM\b.*\b(WHERE|GROUP|ORDER)\b|\bSELECT \w+ FROM \w+|\b(AND|OR|SELECT)\b.*/\*.*\*/|/\*.*\*/.*\b(AND|OR|SELECT)\b|\b(AND|OR)[^\w]+\d+['\") ]?[=><]['\"( ]?\d+|ODBC;DRIVER|\bINTO\s+(OUT|DUMP)FILE" def __init__(self): super(SQLInjectionDetection, self).__init__() self.faup = Faup() config_loader = ConfigLoader() self.server_statistics = config_loader.get_redis_conn("ARDB_Statistics") self.redis_logger.info(f"Module: {self.module_name} Launched") def compute(self, message): url, id = message.split() if self.is_sql_injection(url): self.faup.decode(url) url_parsed = self.faup.get() item = Item(id) item_id = item.get_id() print(f"Detected SQL in URL: {item_id}") print(urllib.request.unquote(url)) to_print = f'SQLInjection;{item.get_source()};{item.get_date()};{item.get_basename()};Detected SQL in URL;{item_id}' self.redis_logger.warning(to_print) # Send to duplicate self.send_message_to_queue(item_id, 'Duplicate') # Tag msg = f'infoleak:automatic-detection="sql-injection";{item_id}' self.send_message_to_queue(msg, 'Tags') # statistics tld = url_parsed['tld'] if tld is not None: ## TODO: # FIXME: remove me try: tld = tld.decode() except: pass date = datetime.now().strftime("%Y%m") self.server_statistics.hincrby(f'SQLInjection_by_tld:{date}', tld, 1) # Try to detect if the url passed might be an sql injection by appliying the regex # defined above on it. def is_sql_injection(self, url_parsed): line = urllib.request.unquote(url_parsed) return re.search(SQLInjectionDetection.SQLI_REGEX, line, re.I) is not None
p.populate_set_out('credential;{}'.format(filepath), 'BrowseWarningPaste') #Put in form, count occurences, then send to moduleStats creds_sites = {} site_occurence = re.findall(regex_site_for_stats, content) for site in site_occurence: site_domain = site[1:-1] if site_domain in creds_sites.keys(): creds_sites[site_domain] += 1 else: creds_sites[site_domain] = 1 for url in sites: faup.decode(url) domain = faup.get()['domain'] if domain in creds_sites.keys(): creds_sites[domain] += 1 else: creds_sites[domain] = 1 for site, num in creds_sites.iteritems( ): # Send for each different site to moduleStats print 'credential;{};{};{}'.format(num, site, paste.p_date) p.populate_set_out( 'credential;{};{};{}'.format(num, site, paste.p_date), 'ModuleStats') if sites_set: print("=======> Probably on : {}".format(', '.join(sites_set))) else:
valid_mx = check_mx_record(set_mxdomains, dns_server) item_date = Item.get_item_date(item_id) num_valid_email = 0 for domain_mx in valid_mx: num_valid_email += len(dict_mxdomains_email[domain_mx]) for email in dict_mxdomains_email[domain_mx]: msg = 'mail;{};{};{}'.format(1, email, item_date) p.populate_set_out(msg, 'ModuleStats') # Create country stats faup.decode(email) tld = faup.get()['tld'] try: tld = tld.decode() except: pass server_statistics.hincrby( 'mail_by_tld:{}'.format(item_date), tld, 1) msg = 'Mails;{};{};{};Checked {} e-mail(s);{}'.format( Item.get_source(item_id), item_date, Item.get_item_basename(item_id), num_valid_email, item_id) if num_valid_email > mail_threshold: print('{} Checked {} e-mail(s)'.format( item_id, num_valid_email)) publisher.warning(msg)
#!/usr/bin/env python3 # -*- coding: utf-8 -*- import pprint from pyfaup.faup import Faup f = Faup() f.decode("www.météo.fr") pprint.pprint(f.get())
now = datetime.datetime.now() path = os.path.join('onions', str(now.year).zfill(4), str(now.month).zfill(2), str(now.day).zfill(2), str(int(time.mktime(now.utctimetuple())))) to_print = 'Onion;{};{};{};'.format(PST.p_source, PST.p_date, PST.p_name) if activate_crawler: date_month = datetime.datetime.now().strftime("%Y%m") date = datetime.datetime.now().strftime("%Y%m%d") for url in urls: faup.decode(url) url_unpack = faup.get() ## TODO: # FIXME: remove me try: domain = url_unpack['domain'].decode().lower() except Exception as e: domain = url_unpack['domain'].lower() ## TODO: blackilst by port ? # check blacklist if r_onion.sismember('blacklist_onion', domain): continue subdomain = re.findall(url_regex, url) if len(subdomain) > 0: subdomain = subdomain[0][4].lower() else:
publisher.warning(to_print) #Send to duplicate p.populate_set_out(filename, 'Duplicate') p.populate_set_out('mail;{}'.format(filename), 'alertHandler') msg = 'infoleak:automatic-detection="mail";{}'.format(filename) p.populate_set_out(msg, 'Tags') #create country statistics date = datetime.datetime.now().strftime("%Y%m") for mail in MX_values[1]: print('mail;{};{};{}'.format(MX_values[1][mail], mail, PST.p_date)) p.populate_set_out('mail;{};{};{}'.format(MX_values[1][mail], mail, PST.p_date), 'ModuleStats') faup.decode(mail) tld = faup.get()['tld'] server_statistics.hincrby('mail_by_tld:'+date, tld, MX_values[1][mail]) else: publisher.info(to_print) #create country statistics for mail in MX_values[1]: print('mail;{};{};{}'.format(MX_values[1][mail], mail, PST.p_date)) p.populate_set_out('mail;{};{};{}'.format(MX_values[1][mail], mail, PST.p_date), 'ModuleStats') prec_filename = filename else: publisher.debug("Script Mails is Idling 10s") print('Sleeping') time.sleep(10)
class WebStats(AbstractModule): """ WebStats module for AIL framework """ # Config Var THRESHOLD_TOTAL_SUM = 200 # Above this value, a keyword is eligible for a progression THRESHOLD_INCREASE = 1.0 # The percentage representing the keyword occurence since num_day_to_look MAX_SET_CARDINALITY = 10 # The cardinality of the progression set NUM_DAY_TO_LOOK = 5 # the detection of the progression start num_day_to_look in the past def __init__(self): super(WebStats, self).__init__() # Send module state to logs self.redis_logger.info("Module %s initialized" % (self.module_name)) # Sent to the logging a description of the module self.redis_logger.info("Makes statistics about valid URL") self.pending_seconds = 5 * 60 # REDIS # self.r_serv_trend = redis.StrictRedis( host=self.process.config.get("ARDB_Trending", "host"), port=self.process.config.get("ARDB_Trending", "port"), db=self.process.config.get("ARDB_Trending", "db"), decode_responses=True) # FILE CURVE SECTION # self.csv_path_proto = os.path.join( os.environ['AIL_HOME'], self.process.config.get("Directories", "protocolstrending_csv")) self.protocolsfile_path = os.path.join( os.environ['AIL_HOME'], self.process.config.get("Directories", "protocolsfile")) self.csv_path_tld = os.path.join( os.environ['AIL_HOME'], self.process.config.get("Directories", "tldstrending_csv")) self.tldsfile_path = os.path.join( os.environ['AIL_HOME'], self.process.config.get("Directories", "tldsfile")) self.csv_path_domain = os.path.join( os.environ['AIL_HOME'], self.process.config.get("Directories", "domainstrending_csv")) self.faup = Faup() self.generate_new_graph = False def computeNone(self): if self.generate_new_graph: self.generate_new_graph = False today = datetime.date.today() year = today.year month = today.month self.redis_logger.debug('Building protocol graph') lib_words.create_curve_with_word_file(self.r_serv_trend, self.csv_path_proto, self.protocolsfile_path, year, month) self.redis_logger.debug('Building tld graph') lib_words.create_curve_with_word_file(self.r_serv_trend, self.csv_path_tld, self.tldsfile_path, year, month) self.redis_logger.debug('Building domain graph') lib_words.create_curve_from_redis_set(self.r_serv_trend, self.csv_path_domain, "domain", year, month) self.redis_logger.debug('end building') def compute(self, message): self.generate_new_graph = True # Do something with the message from the queue url, date, path = message.split() self.faup.decode(url) url_parsed = self.faup.get() # Scheme analysis self.analyse('scheme', date, url_parsed) # Tld analysis self.analyse('tld', date, url_parsed) # Domain analysis self.analyse('domain', date, url_parsed) self.compute_progression('scheme', self.NUM_DAY_TO_LOOK, url_parsed) self.compute_progression('tld', self.NUM_DAY_TO_LOOK, url_parsed) self.compute_progression('domain', self.NUM_DAY_TO_LOOK, url_parsed) def analyse(self, field_name, date, url_parsed): field = url_parsed[field_name] if field is not None: try: # faup version field = field.decode() except: pass self.r_serv_trend.hincrby(field, date, 1) if field_name == "domain": #save domain in a set for the monthly plot domain_set_name = "domain_set_" + date[0:6] self.r_serv_trend.sadd(domain_set_name, field) self.redis_logger.debug("added in " + domain_set_name + ": " + field) def get_date_range(self, num_day): curr_date = datetime.date.today() date = Date( str(curr_date.year) + str(curr_date.month).zfill(2) + str(curr_date.day).zfill(2)) date_list = [] for i in range(0, num_day + 1): date_list.append(date.substract_day(i)) return date_list def compute_progression_word(self, num_day, keyword): """ Compute the progression for one keyword """ date_range = self.get_date_range(num_day) # check if this keyword is eligible for progression keyword_total_sum = 0 value_list = [] for date in date_range: # get value up to date_range curr_value = self.r_serv_trend.hget(keyword, date) value_list.append(int(curr_value if curr_value is not None else 0)) keyword_total_sum += int( curr_value) if curr_value is not None else 0 oldest_value = value_list[ -1] if value_list[-1] != 0 else 1 #Avoid zero division # The progression is based on the ratio: value[i] / value[i-1] keyword_increase = 0 value_list_reversed = value_list[:] value_list_reversed.reverse() for i in range(1, len(value_list_reversed)): divisor = value_list_reversed[ i - 1] if value_list_reversed[i - 1] != 0 else 1 keyword_increase += value_list_reversed[i] / divisor return (keyword_increase, keyword_total_sum) def compute_progression(self, field_name, num_day, url_parsed): """ recompute the set top_progression zset - Compute the current field progression - re-compute the current progression for each first 2*self.MAX_SET_CARDINALITY fields in the top_progression_zset """ redis_progression_name_set = "z_top_progression_" + field_name keyword = url_parsed[field_name] if keyword is not None: #compute the progression of the current word keyword_increase, keyword_total_sum = self.compute_progression_word( num_day, keyword) #re-compute the progression of 2*self.MAX_SET_CARDINALITY current_top = self.r_serv_trend.zrevrangebyscore( redis_progression_name_set, '+inf', '-inf', withscores=True, start=0, num=2 * self.MAX_SET_CARDINALITY) for word, value in current_top: word_inc, word_tot_sum = self.compute_progression_word( num_day, word) self.r_serv_trend.zrem(redis_progression_name_set, word) if (word_tot_sum > self.THRESHOLD_TOTAL_SUM) and ( word_inc > self.THRESHOLD_INCREASE): self.r_serv_trend.zadd(redis_progression_name_set, float(word_inc), word) # filter before adding if (keyword_total_sum > self.THRESHOLD_TOTAL_SUM) and ( keyword_increase > self.THRESHOLD_INCREASE): self.r_serv_trend.zadd(redis_progression_name_set, float(keyword_increase), keyword)
class Credential(AbstractModule): """ Credential module for AIL framework """ # Split username with spec. char or with upper case, distinguish start with upper REGEX_CRED = "[a-z]+|[A-Z]{3,}|[A-Z]{1,2}[a-z]+|[0-9]+" REDIS_KEY_NUM_USERNAME = '******' REDIS_KEY_NUM_PATH = 'uniqNumForUsername' REDIS_KEY_ALL_CRED_SET = 'AllCredentials' REDIS_KEY_ALL_CRED_SET_REV = 'AllCredentialsRev' REDIS_KEY_ALL_PATH_SET = 'AllPath' REDIS_KEY_ALL_PATH_SET_REV = 'AllPathRev' REDIS_KEY_MAP_CRED_TO_PATH = 'CredToPathMapping' def __init__(self): super(Credential, self).__init__() self.faup = Faup() self.regex_web = "((?:https?:\/\/)[\.-_0-9a-zA-Z]+\.[0-9a-zA-Z]+)" self.regex_cred = "[a-zA-Z0-9\\._-]+@[a-zA-Z0-9\\.-]+\.[a-zA-Z]{2,6}[\\rn :\_\-]{1,10}[a-zA-Z0-9\_\-]+" self.regex_site_for_stats = "@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:" self.redis_cache_key = regex_helper.generate_redis_cache_key(self.module_name) # Database config_loader = ConfigLoader.ConfigLoader() self.server_cred = config_loader.get_redis_conn("ARDB_TermCred") self.server_statistics = config_loader.get_redis_conn("ARDB_Statistics") # Config values self.minimumLengthThreshold = config_loader.get_config_int("Credential", "minimumLengthThreshold") self.criticalNumberToAlert = config_loader.get_config_int("Credential", "criticalNumberToAlert") self.max_execution_time = 30 # Waiting time in secondes between to message proccessed self.pending_seconds = 10 # Send module state to logs self.redis_logger.info(f"Module {self.module_name} initialized") def compute(self, message): id, count = message.split() item = Item(id) item_content = item.get_content() # Extract all credentials all_credentials = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.regex_cred, item.get_id(), item_content, max_time=self.max_execution_time) if all_credentials: nb_cred = len(all_credentials) message = f'Checked {nb_cred} credentials found.' all_sites = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.regex_web, item.get_id(), item_content, max_time=self.max_execution_time) if all_sites: discovered_sites = ', '.join(all_sites) message += f' Related websites: {discovered_sites}' print(message) to_print = f'Credential;{item.get_source()};{item.get_date()};{item.get_basename()};{message};{item.get_id()}' #num of creds above tresh, publish an alert if nb_cred > self.criticalNumberToAlert: print(f"========> Found more than 10 credentials in this file : {item.get_id()}") self.redis_logger.warning(to_print) # Send to duplicate self.send_message_to_queue(item.get_id(), 'Duplicate') msg = f'infoleak:automatic-detection="credential";{item.get_id()}' self.send_message_to_queue(msg, 'Tags') site_occurence = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.regex_site_for_stats, item.get_id(), item_content, max_time=self.max_execution_time, r_set=False) creds_sites = {} for site in site_occurence: site_domain = site[1:-1].lower() if site_domain in creds_sites.keys(): creds_sites[site_domain] += 1 else: creds_sites[site_domain] = 1 for url in all_sites: self.faup.decode(url) domain = self.faup.get()['domain'] ## TODO: # FIXME: remove me, check faup versionb try: domain = domain.decode() except: pass if domain in creds_sites.keys(): creds_sites[domain] += 1 else: creds_sites[domain] = 1 for site, num in creds_sites.items(): # Send for each different site to moduleStats mssg = f'credential;{num};{site};{item.get_date()}' print(mssg) self.send_message_to_queue(mssg, 'ModuleStats') if all_sites: discovered_sites = ', '.join(all_sites) print(f"=======> Probably on : {discovered_sites}") date = datetime.now().strftime("%Y%m") for cred in all_credentials: maildomains = re.findall("@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,20}", cred.lower())[0] self.faup.decode(maildomains) tld = self.faup.get()['tld'] ## TODO: # FIXME: remove me try: tld = tld.decode() except: pass self.server_statistics.hincrby('credential_by_tld:'+date, tld, 1) else: self.redis_logger.info(to_print) print(f'found {nb_cred} credentials') # For searching credential in termFreq for cred in all_credentials: cred = cred.split('@')[0] #Split to ignore mail address # unique number attached to unique path uniq_num_path = self.server_cred.incr(Credential.REDIS_KEY_NUM_PATH) self.server_cred.hmset(Credential.REDIS_KEY_ALL_PATH_SET, {item.get_id(): uniq_num_path}) self.server_cred.hmset(Credential.REDIS_KEY_ALL_PATH_SET_REV, {uniq_num_path: item.get_id()}) # unique number attached to unique username uniq_num_cred = self.server_cred.hget(Credential.REDIS_KEY_ALL_CRED_SET, cred) if uniq_num_cred is None: # cred do not exist, create new entries uniq_num_cred = self.server_cred.incr(Credential.REDIS_KEY_NUM_USERNAME) self.server_cred.hmset(Credential.REDIS_KEY_ALL_CRED_SET, {cred: uniq_num_cred}) self.server_cred.hmset(Credential.REDIS_KEY_ALL_CRED_SET_REV, {uniq_num_cred: cred}) # Add the mapping between the credential and the path self.server_cred.sadd(Credential.REDIS_KEY_MAP_CRED_TO_PATH+'_'+str(uniq_num_cred), uniq_num_path) # Split credentials on capital letters, numbers, dots and so on # Add the split to redis, each split point towards its initial credential unique number splitedCred = re.findall(Credential.REGEX_CRED, cred) for partCred in splitedCred: if len(partCred) > self.minimumLengthThreshold: self.server_cred.sadd(partCred, uniq_num_cred)
print() print() print( '\033[92m------------------START CRAWLER------------------\033[0m' ) print('crawler type: {}'.format(type_hidden_service)) print( '\033[92m-------------------------------------------------\033[0m' ) print('url: {}'.format(url)) print('domain: {}'.format(domain)) print('domain_url: {}'.format(domain_url)) faup.decode(domain) onion_domain = faup.get()['domain'].decode() if not r_onion.sismember( 'blacklist_{}'.format(type_hidden_service), domain) and not r_onion.sismember( 'blacklist_{}'.format(type_hidden_service), onion_domain): date = datetime.datetime.now().strftime("%Y%m%d") date_month = datetime.datetime.now().strftime("%Y%m") if not r_onion.sismember( 'month_{}_up:{}'.format( type_hidden_service, date_month), domain) and not r_onion.sismember( '{}_down:{}'.format(
#!/usr/bin/python from pyfaup.faup import Faup url = "http://www.wallinfire.net" f = Faup() print("We decode the url: %s" % (url)) f.decode(url) data = f.get() print("URL TLD: %s" % (data['tld']))
msg = 'infoleak:automatic-detection="credential";{}'.format(filepath) p.populate_set_out(msg, 'Tags') #Put in form, count occurences, then send to moduleStats creds_sites = {} site_occurence = re.findall(regex_site_for_stats, content) for site in site_occurence: site_domain = site[1:-1] if site_domain in creds_sites.keys(): creds_sites[site_domain] += 1 else: creds_sites[site_domain] = 1 for url in sites: faup.decode(url) domain = faup.get()['domain'] ## TODO: # FIXME: remove me try: domain = domain.decode() except: pass if domain in creds_sites.keys(): creds_sites[domain] += 1 else: creds_sites[domain] = 1 for site, num in creds_sites.items(): # Send for each different site to moduleStats mssg = 'credential;{};{};{}'.format(num, site, paste.p_date) print(mssg) p.populate_set_out(mssg, 'ModuleStats')
month) print 'Building domain graph' lib_words.create_curve_from_redis_set(r_serv_trend, csv_path_domain, "domain", year, month) print 'end building' publisher.debug( "{} queue is empty, waiting".format(config_section)) print 'sleeping' time.sleep(5 * 60) continue else: generate_new_graph = True # Do something with the message from the queue url, date, path = message.split() faup.decode(url) url_parsed = faup.get() analyse(r_serv_trend, 'scheme', date, url_parsed) #Scheme analysis analyse(r_serv_trend, 'tld', date, url_parsed) #Tld analysis analyse(r_serv_trend, 'domain', date, url_parsed) #Domain analysis compute_progression(r_serv_trend, 'scheme', num_day_to_look, url_parsed) compute_progression(r_serv_trend, 'tld', num_day_to_look, url_parsed) compute_progression(r_serv_trend, 'domain', num_day_to_look, url_parsed)
print('Building domain graph') lib_words.create_curve_from_redis_set(r_serv_trend, csv_path_domain, "domain", year, month) print('end building') publisher.debug("{} queue is empty, waiting".format(config_section)) print('sleeping') time.sleep(5*60) continue else: generate_new_graph = True # Do something with the message from the queue url, date, path = message.split() faup.decode(url) url_parsed = faup.get() # Scheme analysis analyse(r_serv_trend, 'scheme', date, url_parsed) # Tld analysis analyse(r_serv_trend, 'tld', date, url_parsed) # Domain analysis analyse(r_serv_trend, 'domain', date, url_parsed) compute_progression(r_serv_trend, 'scheme', num_day_to_look, url_parsed) compute_progression(r_serv_trend, 'tld', num_day_to_look, url_parsed) compute_progression(r_serv_trend, 'domain', num_day_to_look, url_parsed)
if urls_file is None: source_info = "arg:%s" % (sys.argv[1]) else: source_info = "file:%s" % (sys.argv[1]) urlw_log = UrlwLog(source_info) urlw_log.open() urlw_log.custom_log("Starting...") urlw_p = UrlwPlugins(urlw_log) fauplib = Faup() if source_info.startswith("arg:"): fauplib.decode(sys.argv[1]) faup_object = fauplib.get() for plugin in urlw_p.plugins_list: urlw_p.run(plugin, sys.argv[1], faup_object) elif source_info.startswith("file:"): urls = urls_file.readlines() for url in urls: fauplib.decode(url) faup_object = fauplib.get() for plugin in urlw_p.plugins_list: urlw_p.run(plugin, url, faup_object) urls_file.close() urlw_log.custom_log("Done") urlw_log.close()
class LibInjection(AbstractModule): """docstring for LibInjection module.""" def __init__(self): super(LibInjection, self).__init__() self.faup = Faup() config_loader = ConfigLoader() self.server_statistics = config_loader.get_redis_conn("ARDB_Statistics") self.redis_logger.info(f"Module: {self.module_name} Launched") def compute(self, message): url, id = message.split() self.faup.decode(url) url_parsed = self.faup.get() ## TODO: # FIXME: remove me try: resource_path = url_parsed['resource_path'].encode() except: resource_path = url_parsed['resource_path'] ## TODO: # FIXME: remove me try: query_string = url_parsed['query_string'].encode() except: query_string = url_parsed['query_string'] result_path = {'sqli' : False} result_query = {'sqli' : False} if resource_path is not None: result_path = pylibinjection.detect_sqli(resource_path) #print(f'path is sqli : {result_path}') if query_string is not None: result_query = pylibinjection.detect_sqli(query_string) #print(f'query is sqli : {result_query}') if result_path['sqli'] is True or result_query['sqli'] is True: item = Item(id) item_id = item.get_id() print(f"Detected (libinjection) SQL in URL: {item_id}") print(urllib.request.unquote(url)) to_print = f'LibInjection;{item.get_source()};{item.get_date()};{item.get_basename()};Detected SQL in URL;{item_id}' self.redis_logger.warning(to_print) # Send to duplicate self.send_message_to_queue(item_id, 'Duplicate') # Add tag msg = f'infoleak:automatic-detection="sql-injection";{item_id}' self.send_message_to_queue(msg, 'Tags') #statistics ## TODO: # FIXME: remove me try: tld = url_parsed['tld'].decode() except: tld = url_parsed['tld'] if tld is not None: date = datetime.now().strftime("%Y%m") self.server_statistics.hincrby(f'SQLInjection_by_tld:{date}', tld, 1)
p.populate_set_out(filepath, 'Duplicate') #Send to BrowseWarningPaste p.populate_set_out('credential;{}'.format(filepath), 'BrowseWarningPaste') #Put in form, count occurences, then send to moduleStats creds_sites = {} site_occurence = re.findall(regex_site_for_stats, content) for site in site_occurence: site_domain = site[1:-1] if site_domain in creds_sites.keys(): creds_sites[site_domain] += 1 else: creds_sites[site_domain] = 1 for url in sites: faup.decode(url) domain = faup.get()['domain'] if domain in creds_sites.keys(): creds_sites[domain] += 1 else: creds_sites[domain] = 1 for site, num in creds_sites.iteritems(): # Send for each different site to moduleStats print 'credential;{};{};{}'.format(num, site, paste.p_date) p.populate_set_out('credential;{};{};{}'.format(num, site, paste.p_date), 'ModuleStats') if sites_set: print("=======> Probably on : {}".format(', '.join(sites_set))) else: publisher.info(to_print)
item_content, max_time=max_execution_time, r_set=False) creds_sites = {} for site in site_occurence: site_domain = site[1:-1].lower() if site_domain in creds_sites.keys(): creds_sites[site_domain] += 1 else: creds_sites[site_domain] = 1 for url in all_sites: faup.decode(url) domain = faup.get()['domain'] ## TODO: # FIXME: remove me try: domain = domain.decode() except: pass if domain in creds_sites.keys(): creds_sites[domain] += 1 else: creds_sites[domain] = 1 for site, num in creds_sites.items( ): # Send for each different site to moduleStats mssg = 'credential;{};{};{}'.format( num, site, Item.get_item_date(item_id))
class UrlsExtractor(object): def __init__(self): self._url_regex = re.compile( r'((?:(?:ht|f)tp(?:s?)\:\/\/)' r'(?:[!#$&-;=?-\[\]_a-z~]|%[0-9a-f]{2})+)', re.I) self._faup = Faup() def extract(self, text): """This function extract all url http(s) and ftp(s) from text. Return a dict, with a key for every second-level domain and value a list of disassembled urls (output Faup tool). Example disassembled url https://drive.google.com/drive/my-drive: { 'domain': 'google.com', 'domain_without_tld': 'google', 'fragment': None, 'host': 'drive.google.com', 'port': None, 'query_string': None, 'resource_path': '/drive/my-drive', 'scheme': 'https', 'subdomain': 'drive', 'tld': 'com', 'url': 'https://drive.google.com/drive/my-drive' } """ if not isinstance(text, unicode): raise NotUnicodeError("The given text is not in unicode") self._results = dict() for i in self._url_regex.finditer(text): try: """ import urlnorm url = urlnorm.norm(i.group(0).strip()) Can't use urlnorm because can't manage domain like http://contentsr,xn--90afavbplfx2a6a5b2a,xn--p1ai/ After norm it's impossible tokenize this kind of urls """ url = i.group(0).strip() except: raise FailedRegexUrl("Failed parsing regex urls") try: self._faup.decode(url) tokens = self._faup.get() # Get results for domain domain = self._results.get(tokens['domain'], None) if domain: domain.append(tokens) else: self._results[tokens['domain']] = [tokens] except: raise FailedFaupParsing("Failed tokenize url with Faup") @property def urls_obj(self): return self._results @property def urls_json(self): try: return json.dumps(self.urls_obj, ensure_ascii=False) except: raise FailedReturnJsonUrls("Failed make JSON from urls result")