def manager(): """ Manage (start/stop) the process (fetching/parsing) of the modules """ modules = config_db.smembers('modules') modules_nr = len(modules) # Cleanup for module in modules: config_db.delete(module + '|parsing') config_db.delete(module + '|fetching') while True: for module in modules: parsing = config_db.get(module + "|" + "parsing") fetching = config_db.get(module + "|" + "fetching") if parsing is None: launch_parser(module) if fetching is None: launch_fetcher(module) parsing = config_db.get(module + "|" + "parsing") fetching = config_db.get(module + "|" + "fetching") if parsing == 0 and fetching == 0: config_db.srem('modules', module) modules = config_db.smembers('modules') if len(modules) != modules_nr: modules_nr = len(modules) publisher.info('These modules are running: ' + str(modules)) else: time.sleep(sleep_timer)
def main(): """Main Function""" # CONFIG # cfg = ConfigParser.ConfigParser() cfg.read(configfile) # REDIS # r_serv = redis.StrictRedis( host = cfg.get("Redis_Queues", "host"), port = cfg.getint("Redis_Queues", "port"), db = cfg.getint("Redis_Queues", "db")) # LOGGING # publisher.channel = "Queuing" # ZMQ # Sub = ZMQ_PubSub.ZMQSub(configfile,"PubSub_Categ", "onion_categ", "tor") # FUNCTIONS # publisher.info("""Suscribed to channel {0}""".format("onion_categ")) while True: Sub.get_and_lpush(r_serv) if r_serv.sismember("SHUTDOWN_FLAGS", "Onion_Q"): r_serv.srem("SHUTDOWN_FLAGS", "Onion_Q") print "Shutdown Flag Up: Terminating" publisher.warning("Shutdown Flag Up: Terminating.") break
def launch_fetcher(module): """ Launch a process which fetch a dataset in a directory """ service_fetcher = os.path.join(services_dir, "fetch_raw_files.py") timer = '3600' if module is None: publisher.error('Unable to start fetching : module is None') return url = config_db.get(module + "|" + "url") if url is None: publisher.info(module + ' does not have an URL, no fetcher.') config_db.set(module + "|" + "fetching", 0) return directory = config_db.get(module + "|" + "home_dir") if directory is not None: subprocess.Popen([ "python", service_fetcher, '-n', module, '-d', directory, '-u', url, '-t', timer ]) config_db.set(module + "|" + "fetching", 1) publisher.info('Fetching of ' + module + 'started.') else: publisher.error('Unable to start fetching of ' + module + \ ': home_dir unknown.') config_db.set(module + "|" + "fetching", 0)
def regex_findall(module_name, redis_key, regex, item_id, item_content, max_time=30, r_set=True): proc = Proc(target=_regex_findall, args=(redis_key, regex, item_content, r_set, )) try: proc.start() proc.join(max_time) if proc.is_alive(): proc.terminate() Statistics.incr_module_timeout_statistic(module_name) err_mess = "{}: processing timeout: {}".format(module_name, item_id) print(err_mess) publisher.info(err_mess) return [] else: if r_set: all_items = r_serv_cache.smembers(redis_key) else: all_items = r_serv_cache.lrange(redis_key, 0 ,-1) r_serv_cache.delete(redis_key) proc.terminate() return all_items except KeyboardInterrupt: print("Caught KeyboardInterrupt, terminating workers") proc.terminate() sys.exit(0)
def main(): """Main Function""" # CONFIG # cfg = ConfigParser.ConfigParser() cfg.read(configfile) # REDIS # r_serv = redis.StrictRedis( host = cfg.get("Redis_Queues", "host"), port = cfg.getint("Redis_Queues", "port"), db = cfg.getint("Redis_Queues", "db")) # LOGGING # publisher.channel = "Queuing" # ZMQ # channel = cfg.get("PubSub_Words", "channel_0") subscriber_name = "curve" subscriber_config_section = "PubSub_Words" Sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name) # FUNCTIONS # publisher.info("""Suscribed to channel {0}""".format(channel)) while True: Sub.get_and_lpush(r_serv) if r_serv.sismember("SHUTDOWN_FLAGS", "Curve_Q"): r_serv.srem("SHUTDOWN_FLAGS", "Curve_Q") print "Shutdown Flag Up: Terminating" publisher.warning("Shutdown Flag Up: Terminating.") break
def fetcher(): """ Main function which fetch the datasets """ while config_db.sismember('modules', module): try: urllib.urlretrieve(url, temp_filename) except: publisher.error('Unable to fetch ' + url) __check_exit() continue drop_file = False """ Check is the file already exists, if the same file is found, the downloaded file is dropped. Else, it is moved in his final directory. """ to_check = glob.glob( os.path.join(old_directory, '*') ) to_check += glob.glob( os.path.join(directory, '*') ) for file in to_check: if filecmp.cmp(temp_filename, file): drop_file = True break if drop_file: os.unlink(temp_filename) publisher.debug('No new file on ' + url) else: os.rename(temp_filename, filename) publisher.info('New file on ' + url) __check_exit() config_db.delete(module + "|" + "fetching")
def launch(): """ Fetch all the whois entry assigned to the server of this :class:`Connector` """ i = 0 while True: try: entry = temp_db.spop(key_ris) if not entry: __disconnect() i = 0 publisher.debug("Disconnected of " + server) time.sleep(sleep_timer) continue if cache_db.get(entry) is None: if not connected: __connect() publisher.debug(server + ", query : " + str(entry)) whois = fetch_whois(entry) if whois != '': cache_db.setex(entry, server + '\n' + unicode(whois, errors="replace"), cache_ttl) if not keepalive: __disconnect() i += 1 if i%10000 == 0: publisher.info(str(temp_db.scard(key_ris)) + ' to process on ' + server) except IOError as text: publisher.error("IOError on " + server + ': ' + str(text)) time.sleep(sleep_timer) __disconnect()
def main(): """Main Function""" # CONFIG # cfg = ConfigParser.ConfigParser() cfg.read('./packages/config.cfg') # REDIS # r_serv = redis.StrictRedis( host = cfg.get("Redis_Queues", "host"), port = cfg.getint("Redis_Queues", "port"), db = cfg.getint("Redis_Queues", "db")) # LOGGING # publisher.channel = "Global" # ZMQ # PubGlob = ZMQ_PubSub.ZMQPub(configfile, "PubSub_Global", "global") # FONCTIONS # publisher.info("Starting to publish.") while True: filename = r_serv.lpop("filelist") if filename != None: msg = cfg.get("PubSub_Global", "channel")+" "+filename PubGlob.send_message(msg) publisher.debug("{0} Published".format(msg)) else: time.sleep(10) publisher.debug("Nothing to publish")
def analyse(url, path): faup.decode(url) url_parsed = faup.get() resource_path = url_parsed['resource_path'] query_string = url_parsed['query_string'] result_path = 0 result_query = 0 if resource_path is not None: result_path = is_sql_injection(resource_path) if query_string is not None: result_query = is_sql_injection(query_string) if (result_path > 0) or (result_query > 0): paste = Paste.Paste(path) if (result_path > 1) or (result_query > 1): print "Detected SQL in URL: " print urllib2.unquote(url) to_print = 'SQLInjection;{};{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, "Detected SQL in URL", paste.p_path) publisher.warning(to_print) #Send to duplicate p.populate_set_out(path, 'Duplicate') #send to Browse_warning_paste p.populate_set_out('sqlinjection;{}'.format(path), 'BrowseWarningPaste') else: print "Potential SQL injection:" print urllib2.unquote(url) to_print = 'SQLInjection;{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, "Potential SQL injection") publisher.info(to_print)
def main(): """Main Function""" # CONFIG # cfg = ConfigParser.ConfigParser() cfg.read(configfile) # REDIS # r_serv = redis.StrictRedis( host = cfg.get("Redis_default", "host"), port = cfg.getint("Redis_default", "port"), db = args.db) p_serv = r_serv.pipeline(False) # LOGGING # publisher.channel = "Script" # ZMQ # channel = cfg.get("PubSub_Longlines", "channel_0") Sub = ZMQ_PubSub.ZMQSub(configfile, "PubSub_Longlines", channel) # FUNCTIONS # publisher.info("Longlines ubscribed to channel {0}".format(cfg.get("PubSub_Longlines", "channel_0"))) while True: PST = P.Paste(Sub.get_message().split(" ", -1)[-1]) r_serv.sadd("Longlines", PST.p_mime) PST.save_in_redis(r_serv, PST.p_mime)
def prepare_bview_file(): publisher.info('Start converting binary bview file in plain text...') # create the plain text dump from the binary dump output = open(os.path.join(bview_dir, 'bview'), 'wr') nul_f = open(os.devnull, 'w') bgpdump = os.path.join(root_dir, path_to_bgpdump_bin) p_bgp = Popen([bgpdump, filename], stdout=PIPE, stderr=nul_f) for line in p_bgp.stdout: output.write(line) nul_f.close() output.close() publisher.info('Convertion finished, start splitting...') # Split the plain text file fs = FilesSplitter(output.name, number_of_splits) splitted_files = fs.fplit() publisher.info('Splitting finished.') # Flush the old routing database and launch the population of # the new database routing_db.flushdb() publisher.info('Start pushing all routes...') pushing_process_service = os.path.join(services_dir, "pushing_process") run_splitted_processing(split_procs, pushing_process_service, splitted_files) publisher.info('All routes pushed.') # Remove the binary and the plain text files os.unlink(output.name) os.unlink(filename)
def analyse(url, path): faup.decode(url) url_parsed = faup.get() resource_path = url_parsed['resource_path'] query_string = url_parsed['query_string'] result_path = 0 result_query = 0 if resource_path is not None: result_path = is_sql_injection(resource_path) if query_string is not None: result_query = is_sql_injection(query_string) if (result_path > 0) or (result_query > 0): paste = Paste.Paste(path) if (result_path > 1) or (result_query > 1): print "Detected SQL in URL: " print urllib2.unquote(url) to_print = 'SQLInjection;{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, "Detected SQL in URL") publisher.warning(to_print) #Send to duplicate p.populate_set_out(path, 'Duplicate') #send to Browse_warning_paste p.populate_set_out('sqlinjection;{}'.format(path), 'BrowseWarningPaste') else: print "Potential SQL injection:" print urllib2.unquote(url) to_print = 'SQLInjection;{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, "Potential SQL injection") publisher.info(to_print)
def create_tld_list(url = "https://mxr.mozilla.org/mozilla-central/source/netwerk/dns/effective_tld_names.dat?raw=1"): """Recover a tld list from url. :param url: -- The url of the tld list. :return: -- list This function recover from mozilla.org the list of the effective tld names, Save it as a file, and return a list of all the tld. """ domains = [] htmlSource = urllib.urlopen(url).read() with open("ICCANdomain", 'wb') as F: F.write(htmlSource) with open("ICCANdomain", 'rb') as F: for num, line in enumerate(F): if re.match(r"^\/\/|\n", line) == None: domains.append(re.sub(r'\*', '', line[:-1])) else: publisher.info("Comment line ignored.") return domains
def prepare_bview_file(): publisher.info('Start converting binary bview file in plain text...') # create the plain text dump from the binary dump output = open(os.path.join(bview_dir, 'bview'), 'wr') nul_f = open(os.devnull, 'w') bgpdump = os.path.join(root_dir, path_to_bgpdump_bin) p_bgp = Popen([bgpdump , filename], stdout=PIPE, stderr = nul_f) for line in p_bgp.stdout: output.write(line) nul_f.close() output.close() publisher.info('Convertion finished, start splitting...') # Split the plain text file fs = FilesSplitter(output.name, number_of_splits) splitted_files = fs.fplit() publisher.info('Splitting finished.') # Flush the old routing database and launch the population of # the new database routing_db.flushdb() publisher.info('Start pushing all routes...') pushing_process_service = os.path.join(services_dir, "pushing_process") run_splitted_processing(split_procs, pushing_process_service, splitted_files) publisher.info('All routes pushed.') # Remove the binary and the plain text files os.unlink(output.name) os.unlink(filename)
def refining_regex_dataset(r_serv, r_key, regex, min_match, year, month, luhn = True, dnscheck = True): """Refine the "raw dataset" of paste with regulars expressions :param r_serv: -- Redis connexion database :param r_key: -- (str) The name of the key read in redis (often the name of the keywords category list) :param min_match: -- (int) Below this number file are deleted :param regex: -- Regular expression which will be match. This function Refine database created with classify_token_paste function. It opening again the files which matchs the keywords category list, found regular expression inside it and count how many time is found. If there is not too much match about the regular expression the file is deleted from the list. Than it finally merge the result by day to be able to create a bar graph which will represent how many occurence by day the regex match. """ for filename in r_serv.zrange(r_key, 0, -1): with gzip.open(filename, 'rb') as F: var = 0 matchs = set([]) for num, kword in enumerate(F): match = re.findall(regex, kword) var += len(match) for y in match: if y != '' and len(y) < 100: matchs.add(y) # If there is less match than min_match delete it (False pos) if len(matchs) <= min_match : r_serv.zrem(r_key, filename) publisher.debug("{0} deleted".format(filename)) else: # else changing the score. if r_key == "creditcard_categ" and luhn: for card_number in matchs: if is_luhn_valid(card_number): r_serv.zincrby(r_key+'_occur', filename, 1) publisher.info("{1} is valid in the file {0}".format(filename, card_number)) else: publisher.debug("{0} card is invalid".format(card_number)) if r_key == "mails_categ" and dnscheck: r_serv.zadd(r_key+'_occur', checking_MX_record(r_serv, matchs), filename) else: # LUHN NOT TRIGGERED (Other Categs) r_serv.zadd(r_key+'_occur', len(matchs), filename) create_graph_by_day_datastruct(r_serv, r_key, year, month)
def db_import(filename, day): with open(filename, 'r') as f: entry = '' pipeline = routing_db.pipeline() i = 0 for line in f: # End of block, extracting the information if line == '\n': i += 1 parsed = re.findall('(?:ASPATH|PREFIX): ([^\n{]*)', entry) try: block = parsed[0].strip() # RIPE-NCC-RIS BGP IPv6 Anchor Prefix @RRC00 # RIPE-NCC-RIS BGP Anchor Prefix @ rrc00 - RIPE NCC if block in ['2001:7fb:ff00::/48', '84.205.80.0/24', '2001:7fb:fe00::/48', '84.205.64.0/24']: asn = 12654 else: asn = int(parsed[1].split()[-1].strip()) pipeline.hset(block, day, asn) except: #FIXME: check the cause of the exception publisher.warning(entry) entry = '' if i%10000 == 0: pipeline.execute() pipeline = routing_db.pipeline() else : # append the line to the current block. entry += line pipeline.execute() publisher.info('{f} finished, {nb} entries impported.'.\ format(f=filename, nb = i))
def redis_interbargraph_set(r_serv, year, month, overwrite): """Create a Redis sorted set. :param r_serv: -- connexion to redis database :param year: -- (integer) The year to process :param month: -- (integer) The month to process :param overwrite: -- (bool) trigger the overwrite mode This function create inside redis the intersection of all days in a month two by two. Example: For a month of 31days it will create 30 sorted set between day and day+1 until the last day. The overwrite mode delete the intersets and re-create them. """ a = date(year, month, 01) b = date(year, month, cal.monthrange(year, month)[1]) if overwrite: r_serv.delete("InterSet") for dt in rrule(DAILY, dtstart = a, until = b - timedelta(1)): dayafter = dt+timedelta(1) r_serv.delete(str(dt.strftime("%Y%m%d"))+str(dayafter.strftime("%Y%m%d"))) r_serv.zinterstore( str(dt.strftime("%Y%m%d"))+str(dayafter.strftime("%Y%m%d")), {str(dt.strftime("%Y%m%d")):1, str(dayafter.strftime("%Y%m%d")):-1}) r_serv.zadd( "InterSet", 1, str(dt.strftime("%Y%m%d"))+str(dayafter.strftime("%Y%m%d"))) else: for dt in rrule(DAILY, dtstart = a, until = b - timedelta(1)): dayafter = dt+timedelta(1) if r_serv.zcard(str(dt.strftime("%Y%m%d"))+str(dayafter.strftime("%Y%m%d"))) == 0: r_serv.zinterstore( str(dt.strftime("%Y%m%d"))+str(dayafter.strftime("%Y%m%d")), {str(dt.strftime("%Y%m%d")):1, str(dayafter.strftime("%Y%m%d")):-1}) r_serv.zadd( "InterSet", 1, str(dt.strftime("%Y%m%d"))+str(dayafter.strftime("%Y%m%d"))) publisher.info(str(dt.strftime("%Y%m%d"))+str(dayafter.strftime("%Y%m%d"))+" Intersection Created") else: publisher.warning("Data already exist, operation aborted.")
def main(): """Main Function""" # CONFIG # cfg = ConfigParser.ConfigParser() cfg.read(configfile) # REDIS # r_serv = redis.StrictRedis( host = cfg.get("Redis_Queues", "host"), port = cfg.getint("Redis_Queues", "port"), db = cfg.getint("Redis_Queues", "db")) # LOGGING # publisher.channel = "Script" # ZMQ # channel = cfg.get("PubSub_Longlines", "channel_1") subscriber_name = "tokenize" subscriber_config_section = "PubSub_Longlines" #Publisher publisher_config_section = "PubSub_Words" publisher_name = "pubtokenize" Sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name) Pub = ZMQ_PubSub.ZMQPub(configfile, publisher_config_section, publisher_name) channel_0 = cfg.get("PubSub_Words", "channel_0") # FUNCTIONS # publisher.info("Tokeniser subscribed to channel {0}".format(cfg.get("PubSub_Longlines", "channel_1"))) while True: message = Sub.get_msg_from_queue(r_serv) print message if message != None: PST = P.Paste(message.split(" ",-1)[-1]) else: if r_serv.sismember("SHUTDOWN_FLAGS", "Tokenize"): r_serv.srem("SHUTDOWN_FLAGS", "Tokenize") print "Shutdown Flag Up: Terminating" publisher.warning("Shutdown Flag Up: Terminating.") break publisher.debug("Tokeniser is idling 10s") time.sleep(10) print "sleepin" continue for word, score in PST._get_top_words().items(): if len(word) >= 4: msg = channel_0+' '+PST.p_path+' '+str(word)+' '+str(score) Pub.send_message(msg) print msg else: pass
def service_start_multiple(servicename, number, param=None): """ Start multiple services using `service_start` and save their pids """ i = 0 publisher.info('Starting ' + str(number) + ' times ' + servicename) while i < number: proc = service_start(servicename, param) writepid(servicename, proc) i += 1
def service_start_multiple(servicename, number, param = None): """ Start multiple services using `service_start` and save their pids """ i = 0 publisher.info('Starting ' + str(number) + ' times ' + servicename) while i < number: proc = service_start(servicename, param) writepid(servicename, proc) i += 1
def graph_categ_by_day(r_serv, filename, year, month, r_key): """Create a bargraph representing regex matching by day :param r_serv: -- Redis connexion database :param filename: -- (str) The absolute path where to save the figure.png :param r_key: -- (str) The name of the key read in redis (often the name of the keywords category list) :param year: -- (integer) The year to process :param month: -- (integer) The month to process This function display the amount of the category per day. """ adate = [] categ_num = [] rcParams['figure.figsize'] = 15, 10 a = date(year, month, 01) b = date(year, month, cal.monthrange(year, month)[1]) for dt in rrule(DAILY, dtstart = a, until = b): adate.append(dt.strftime("%d")) categ_num.append(r_serv.zscore(r_key+'_by_day',dt.strftime("%Y%m%d"))) n_groups = len(categ_num) adress_scores = tuple(categ_num) index = np.arange(n_groups) bar_width = 0.5 opacity = 0.6 ladress = plt.bar(index, adress_scores, bar_width, alpha = opacity, color = 'b', label = r_key) plt.plot(tuple(categ_num), 'r--') #plt.yscale('log') plt.xlabel('Days') plt.ylabel('Amount') plt.title('Occurence of '+r_key+' by day') plt.xticks(index + bar_width/2 , tuple(adate)) plt.legend() plt.grid() plt.tight_layout() plt.savefig(filename+".png", dpi=None, facecolor='w', edgecolor='b', orientation='portrait', papertype=None, format="png", transparent=False, bbox_inches=None, pad_inches=0.1, frameon=True) publisher.info(filename+".png"+" saved!")
def analyse(url, path): faup.decode(url) url_parsed = faup.get() resource_path = url_parsed['resource_path'] query_string = url_parsed['query_string'] result_path = 0 result_query = 0 if resource_path is not None: ## TODO: # FIXME: remove me try: resource_path = resource_path.decode() except: pass result_path = is_sql_injection(resource_path) if query_string is not None: ## TODO: # FIXME: remove me try: query_string = query_string.decode() except: pass result_query = is_sql_injection(query_string) if (result_path > 0) or (result_query > 0): paste = Paste.Paste(path) if (result_path > 1) or (result_query > 1): print("Detected SQL in URL: ") print(urllib.request.unquote(url)) to_print = 'SQLInjection;{};{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, "Detected SQL in URL", paste.p_rel_path) publisher.warning(to_print) #Send to duplicate p.populate_set_out(path, 'Duplicate') msg = 'infoleak:automatic-detection="sql-injection";{}'.format(path) p.populate_set_out(msg, 'Tags') #statistics tld = url_parsed['tld'] if tld is not None: ## TODO: # FIXME: remove me try: tld = tld.decode() except: pass date = datetime.datetime.now().strftime("%Y%m") server_statistics.hincrby('SQLInjection_by_tld:'+date, tld, 1) else: print("Potential SQL injection:") print(urllib.request.unquote(url)) to_print = 'SQLInjection;{};{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, "Potential SQL injection", paste.p_rel_path) publisher.info(to_print)
def __query_logging(ip, user_agent, method, q_ip=None, announce_date=None, days_limit=None, level=None): if level == 'warning': publisher.warning(__csv2string([ip, user_agent, method, q_ip, announce_date, days_limit, level])) elif level == 'error': publisher.error(__csv2string([ip, user_agent, method, q_ip, announce_date, days_limit, level])) else: publisher.info(__csv2string([ip, user_agent, method, q_ip, announce_date, days_limit, level]))
def main(): publisher.port = 6380 publisher.channel = "Script" config_section = 'DomClassifier' p = Process(config_section) addr_dns = p.config.get("DomClassifier", "dns") publisher.info("""ZMQ DomainClassifier is Running""") c = DomainClassifier.domainclassifier.Extract(rawtext="", nameservers=[addr_dns]) cc = p.config.get("DomClassifier", "cc") cc_tld = p.config.get("DomClassifier", "cc_tld") while True: try: message = p.get_from_set() if message is not None: PST = Paste.Paste(message) else: publisher.debug("Script DomClassifier is idling 1s") time.sleep(1) continue paste = PST.get_p_content() mimetype = PST._get_p_encoding() if mimetype == "text/plain": c.text(rawtext=paste) c.potentialdomain() c.validdomain(rtype=['A'], extended=True) localizeddomains = c.include(expression=cc_tld) if localizeddomains: print(localizeddomains) publisher.warning( 'DomainC;{};{};{};Checked {} located in {};{}'.format( PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc_tld, PST.p_path)) localizeddomains = c.localizedomain(cc=cc) if localizeddomains: print(localizeddomains) publisher.warning( 'DomainC;{};{};{};Checked {} located in {};{}'.format( PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc, PST.p_path)) except IOError: print("CRC Checksum Failed on :", PST.p_path) publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format( PST.p_source, PST.p_date, PST.p_name))
def prepare_bview_file(filename): publisher.info('Start converting binary bview file in plain text...') # create the plain text dump from the binary dump with open(path_output_bviewfile, 'w') as output: nul_f = open(os.devnull, 'w') p_bgp = Popen([bgpdump, filename], stdout=PIPE, stderr=nul_f) for line in p_bgp.stdout: output.write(line) nul_f.close() publisher.info('Convertion finished, start splitting...') # Split the plain text file return file_splitter.fsplit(path_output_bviewfile)
def test_publisher(self): for i in range(0, 21): if i % 2 == 0: publisher.info('test' + str(i)) elif i % 3 == 0: publisher.warning('test' + str(i)) elif i % 5 == 0: publisher.error('test' + str(i)) elif i % 7 == 0: publisher.critical('test' + str(i)) else: publisher.debug('test' + str(i)) time.sleep(1)
def add_asn_entry(asn, owner, ips_block): """ Add a new subnet to the ASNs known by the system, only if the subnet is not already present. Elsewhere, simply return the value from the database. """ key = None asn_timestamps = sorted(global_db.smembers(asn), reverse=True) key_list = [ "{asn}{sep}{timestamp}{sep}{ips_block}".format(\ asn = asn, timestamp = asn_timestamp, sep = separator, ips_block = key_ips_block) for asn_timestamp in asn_timestamps ] known_asn_ips_blocks = [] if len(key_list) != 0: known_asn_ips_blocks = global_db.mget(key_list) i = 0 for block in known_asn_ips_blocks: if block == ips_block: asn, timestamp, b = key_list[i].split(separator) temp_key = "{asn}{sep}{timestamp}".format(asn=asn, sep = separator, timestamp=timestamp) if global_db.get("{key}{sep}{owner}".format(key = temp_key, sep = separator, owner = key_owner)) == owner: key = temp_key break i +=1 if key is None: lock = global_db.getset('locked_new_ans', 1) if lock == 1 : # ensure the same new entry is not inserted twice return None timestamp = datetime.datetime.utcnow().isoformat() key = "{asn}{sep}{timestamp}".format(asn=asn, sep = separator, timestamp=timestamp) to_set = {\ "{key}{sep}{owner}".format(\ key = key, sep = separator, owner = key_owner) : owner, "{key}{sep}{ips_block}".format(\ key = key, sep = separator, ips_block = key_ips_block): ips_block } pipeline = global_db.pipeline(False) pipeline.sadd(asn, timestamp) pipeline.mset(to_set) pipeline.set('locked_new_ans', 0) pipeline.execute() publisher.info('New asn entry inserted in the database: {asn}, {owner}, {ipblock}'\ .format(asn = asn, owner = owner, ipblock = ips_block)) return key
def main(): """Main Function""" # CONFIG # cfg = ConfigParser.ConfigParser() cfg.read('./packages/config.cfg') # SCRIPT PARSER # parser = argparse.ArgumentParser( description= '''This script is a part of the Assisted Information Leak framework.''', epilog='''''') parser.add_argument('-db', type=int, default=0, help='The name of the Redis DB (default 0)', choices=[0, 1, 2, 3, 4], action='store') # REDIS # r_serv = redis.StrictRedis(host=cfg.get("Redis_Queues", "host"), port=cfg.getint("Redis_Queues", "port"), db=cfg.getint("Redis_Queues", "db")) # LOGGING # publisher.port = 6380 publisher.channel = "Queuing" # ZMQ # channel = cfg.get("PubSub_Global", "channel") # FUNCTIONS # publisher.info("""Suscribed to channel {0}""".format(channel)) while True: table = texttable.Texttable() table.header(["Queue name", "#Items"]) row = [] for queue in r_serv.smembers("queues"): current = r_serv.llen(queue) current = current - r_serv.llen(queue) row.append((queue, r_serv.llen(queue))) time.sleep(0.5) row.sort() table.add_rows(row, header=False) os.system('clear') print table.draw()
def launch_parser(module): """ Launch a parser on a dataset for a module """ service_parser = os.path.join(services_dir, "parse_raw_files.py") if module is None: publisher.error('Unable to start parsing : module is None') return directory = config_db.get(module + "|" + "home_dir") if directory is not None: subprocess.Popen(["python", service_parser, '-n', module, '-d', directory]) config_db.set(module + "|" + "parsing", 1) publisher.info('Parsing of ' + module + 'started.') else: publisher.error('Unable to start parsing of ' + module + ': home_dir unknown.') config_db.set(module + "|" + "parsing", 0)
def main(): publisher.port = 6380 publisher.channel = "Script" config_section = 'DomClassifier' p = Process(config_section) addr_dns = p.config.get("DomClassifier", "dns") publisher.info("""ZMQ DomainClassifier is Running""") c = DomainClassifier.domainclassifier.Extract(rawtext="", nameservers=[addr_dns]) cc = p.config.get("DomClassifier", "cc") cc_tld = p.config.get("DomClassifier", "cc_tld") while True: try: message = p.get_from_set() if message is not None: PST = Paste.Paste(message) else: publisher.debug("Script DomClassifier is idling 1s") time.sleep(1) continue paste = PST.get_p_content() mimetype = PST._get_p_encoding() if mimetype == "text/plain": c.text(rawtext=paste) c.potentialdomain() c.validdomain(rtype=['A'], extended=True) localizeddomains = c.include(expression=cc_tld) if localizeddomains: print(localizeddomains) publisher.warning('DomainC;{};{};{};Checked {} located in {};{}'.format( PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc_tld, PST.p_path)) localizeddomains = c.localizedomain(cc=cc) if localizeddomains: print(localizeddomains) publisher.warning('DomainC;{};{};{};Checked {} located in {};{}'.format( PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc, PST.p_path)) except IOError: print("CRC Checksum Failed on :", PST.p_path) publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format( PST.p_source, PST.p_date, PST.p_name))
def main(): """Main Function""" # CONFIG # cfg = ConfigParser.ConfigParser() cfg.read('./packages/config.cfg') # SCRIPT PARSER # parser = argparse.ArgumentParser( description='''This script is a part of the Assisted Information Leak framework.''', epilog='''''') parser.add_argument('-db', type=int, default=0, help='The name of the Redis DB (default 0)', choices=[0, 1, 2, 3, 4], action='store') # REDIS # r_serv = redis.StrictRedis( host=cfg.get("Redis_Queues", "host"), port=cfg.getint("Redis_Queues", "port"), db=cfg.getint("Redis_Queues", "db")) # LOGGING # publisher.port = 6380 publisher.channel = "Queuing" # ZMQ # channel = cfg.get("PubSub_Global", "channel") # FUNCTIONS # publisher.info("""Suscribed to channel {0}""".format(channel)) while True: table = texttable.Texttable() table.header(["Queue name", "#Items"]) row = [] for queue in r_serv.smembers("queues"): current = r_serv.llen(queue) current = current - r_serv.llen(queue) row.append((queue, r_serv.llen(queue))) time.sleep(0.5) row.sort() table.add_rows(row, header=False) os.system('clear') print table.draw()
def stop_services(signum, frame): """ Tell the modules to stop. """ config = ConfigParser.RawConfigParser() config_file = "/etc/bgpranking/bgpranking.conf" config.read(config_file) config_db = redis.Redis(port = int(config.get('redis','port_master')),\ db = config.get('redis','config')) modules = config_db.smembers('modules') # Cleanup for module in modules: config_db.delete(module + '|parsing') config_db.delete(module + '|fetching') config_db.delete('modules', modules) publisher.info('The services will be stopped ASAP') exit(0)
def parse(directory): old_dir = os.path.join(directory, 'old') to_import = glob.glob(os.path.join(directory, '*')) to_import.sort() for f_name in to_import: if os.path.isdir(f_name): continue try: update = None f = open(f_name).read() data = re.findall('as=AS(.*)&.*</a> (.*)\n', f) update_raw = re.sub('[\n()]', '', re.findall('File last modified at (.*)</I>', f, re.S)[0]) update = dateutil.parser.parse(update_raw).isoformat() yield update, data os.rename(f_name, os.path.join(old_dir, update)) except: publisher.info('Invalid file. Update:' + update)
def launch_parser(module): """ Launch a parser on a dataset for a module """ service_parser = os.path.join(services_dir, "parse_raw_files.py") timer = '60' if module is None: publisher.error('Unable to start parsing : module is None') return directory = config_db.get(module + "|" + "home_dir") if directory is not None: subprocess.Popen(["python", service_parser, '-n', module, '-d', directory, '-t', timer]) config_db.set(module + "|" + "parsing", 1) publisher.info('Parsing of ' + module + 'started.') else: publisher.error('Unable to start parsing of ' + module + \ ': home_dir unknown.') config_db.set(module + "|" + "parsing", 0)
def parse(directory): old_dir = os.path.join(directory, 'old') to_import = glob.glob(os.path.join(directory, '*')) to_import.sort() for f_name in to_import: if os.path.isdir(f_name): continue try: update = None f = open(f_name).read() data = re.findall('as=AS(.*)&.*</a> (.*)\n', f) update_raw = re.sub( '[\n()]', '', re.findall('File last modified at (.*)</I>', f, re.S)[0]) update = dateutil.parser.parse(update_raw).isoformat() yield update, data os.rename(f_name, os.path.join(old_dir, update)) except: publisher.info('Invalid file. Update:' + update)
def fetch(url, directory): temp_dir = os.path.join(directory, 'temp') old_dir = os.path.join(directory, 'old') filename = os.path.join(temp_dir, 'autnums.html') urlretrieve('http://www.cidr-report.org/as2.0/autnums.html', filename) f = open(filename).read() update_raw = re.sub('[\n()]', '', re.findall('File last modified at (.*)</I>', f, re.S)[0]) update = dateutil.parser.parse(update_raw).isoformat() newfile = os.path.join(directory, update) oldfile = os.path.join(old_dir, update) if os.path.exists(newfile) or os.path.exists(oldfile): os.remove(filename) return False else: os.rename(filename, newfile) publisher.info('File updated at ' + update) return True
def launch_fetcher(module): """ Launch a process which fetch a dataset in a directory """ service_fetcher = os.path.join(services_dir, "fetch_raw_files.py") if module is None: publisher.error('Unable to start fetching : module is None') return url = config_db.get(module + "|" + "url") if url is None: publisher.info(module + ' does not have an URL, no fetcher.') config_db.set(module + "|" + "fetching", 0) return directory = config_db.get(module + "|" + "home_dir") if directory is not None: subprocess.Popen(["python", service_fetcher, '-n', module, '-d', directory, '-u', url]) config_db.set(module + "|" + "fetching", 1) publisher.info('Fetching of ' + module + 'started.') else: publisher.error('Unable to start fetching of ' + module + ': home_dir unknown.') config_db.set(module + "|" + "fetching", 0)
def regex_search(module_name, redis_key, regex, item_id, item_content, max_time=30): proc = Proc(target=_regex_search, args=(redis_key, regex, item_content, )) try: proc.start() proc.join(max_time) if proc.is_alive(): proc.terminate() Statistics.incr_module_timeout_statistic(module_name) err_mess = "{}: processing timeout: {}".format(module_name, item_id) print(err_mess) publisher.info(err_mess) return None else: first_occ = r_serv_cache.get(redis_key) r_serv_cache.delete(redis_key) proc.terminate() return first_occ except KeyboardInterrupt: print("Caught KeyboardInterrupt, terminating workers") proc.terminate() sys.exit(0)
def fetch(url, directory): temp_dir = os.path.join(directory, 'temp') old_dir = os.path.join(directory, 'old') filename = os.path.join(temp_dir, 'autnums.html') urlretrieve('http://www.cidr-report.org/as2.0/autnums.html', filename) f = open(filename).read() update_raw = re.sub( '[\n()]', '', re.findall('File last modified at (.*)</I>', f, re.S)[0]) update = dateutil.parser.parse(update_raw).isoformat() newfile = os.path.join(directory, update) oldfile = os.path.join(old_dir, update) if os.path.exists(newfile) or os.path.exists(oldfile): os.remove(filename) return False else: os.rename(filename, newfile) publisher.info('File updated at ' + update) return True
def create_dirfile(r_serv, directory, overwrite): """Create a file of path. :param r_serv: -- connexion to redis database :param directory: -- The folder where to launch the listing of the .gz alerts This function create a list in redis with inside the absolute path of all the pastes needed to be proceeded by function using parallel (like redis_words_ranking) """ if overwrite: r_serv.delete("filelist") for x in listdirectory(directory): r_serv.lpush("filelist", x) publisher.info("The list was overwritten") else: if r_serv.llen("filelist") == 0: for x in listdirectory(directory): r_serv.lpush("filelist", x) publisher.info("New list created") else: for x in listdirectory(directory): r_serv.lpush("filelist", x) publisher.info("The list was updated with new elements")
def search_phone(message): paste = Paste.Paste(message) content = paste.get_p_content() # regex to find phone numbers, may raise many false positives (shalt thou seek optimization, upgrading is required) reg_phone = re.compile(r'(\+\d{1,4}(\(\d\))?\d?|0\d?)(\d{6,8}|([-/\. ]{1}\d{2,3}){3,4})') # list of the regex results in the Paste, may be null results = reg_phone.findall(content) # if the list is greater than 4, we consider the Paste may contain a list of phone numbers if len(results) > 4 : print results publisher.warning('{} contains PID (phone numbers)'.format(paste.p_name)) if __name__ == '__main__': # If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh) # Port of the redis instance used by pubsublogger publisher.port = 6380 # Script is the default channel used for the modules. publisher.channel = 'Script' # Section name in bin/packages/modules.cfg config_section = 'Phone' # Setup the I/O queues p = Process(config_section) # Sent to the logging a description of the module publisher.info("Run Phone module") # Endless loop getting messages from the input queue while True: # Get one message from the input queue message = p.get_from_set() if message is None: publisher.debug("{} queue is empty, waiting".format(config_section)) time.sleep(1) continue # Do something with the message from the queue search_phone(message)
def create_dirfile(r_serv, directory, overwrite): """Create a file of path. :param r_serv: -- connexion to redis database :param directory: -- The folder where to launch the listing of the .gz files This function create a list in redis with inside the absolute path of all the pastes needed to be proceeded by function using parallel (like redis_words_ranking) """ if overwrite: r_serv.delete("filelist") for x in listdirectory(directory): r_serv.lpush("filelist", x) publisher.info("The list was overwritten") else: if r_serv.llen("filelist") == 0: for x in listdirectory(directory): r_serv.lpush("filelist", x) publisher.info("New list created") else: for x in listdirectory(directory): r_serv.lpush("filelist", x) publisher.info("The list was updated with new elements")
def analyse(url, path): faup.decode(url) url_parsed = faup.get() resource_path = url_parsed['resource_path'] query_string = url_parsed['query_string'] result_path = 0 result_query = 0 if resource_path is not None: result_path = is_sql_injection(resource_path.decode('utf8')) if query_string is not None: result_query = is_sql_injection(query_string.decode('utf8')) if (result_path > 0) or (result_query > 0): paste = Paste.Paste(path) if (result_path > 1) or (result_query > 1): print("Detected SQL in URL: ") print(urllib.request.unquote(url)) to_print = 'SQLInjection;{};{};{};{};{}'.format( paste.p_source, paste.p_date, paste.p_name, "Detected SQL in URL", paste.p_path) publisher.warning(to_print) #Send to duplicate p.populate_set_out(path, 'Duplicate') #send to Browse_warning_paste p.populate_set_out('sqlinjection;{}'.format(path), 'alertHandler') msg = 'infoleak:automatic-detection="sql-injection";{}'.format( path) p.populate_set_out(msg, 'Tags') else: print("Potential SQL injection:") print(urllib.request.unquote(url)) to_print = 'SQLInjection;{};{};{};{};{}'.format( paste.p_source, paste.p_date, paste.p_name, "Potential SQL injection", paste.p_path) publisher.info(to_print)
def add_asn_entry(asn, owner, ips_block): """ Add a new subnet to the ASNs known by the system, only if the subnet is not already present. Elsewhere, simply return the value from the database. """ key = '{asn}|{block}'.format(asn=asn, block=ips_block) owners = global_db.hvals(key) if owner not in owners: lock = global_db.getset('locked_new_ans', 1) if lock == 1: # ensure the same new entry is not inserted twice return None timestamp = datetime.datetime.utcnow().isoformat() p = global_db.pipeline(False) p.hset(key, timestamp, owner) p.sadd(asn, ips_block) p.set('locked_new_ans', 0) p.execute() publisher.info('New asn entry inserted in the database: {asn}, {owner}, {ipblock}'\ .format(asn = asn, owner = owner, ipblock = ips_block)) return key
def db_import(filename, day): routing_db = get_redis_connector() with open(filename, 'r') as f: entry = '' pipeline = routing_db.pipeline() i = 0 for line in f: # End of block, extracting the information if line == '\n': i += 1 parsed = re.findall('(?:ASPATH|PREFIX): ([^\n{]*)', entry) try: block = parsed[0].strip() # RIPE-NCC-RIS BGP IPv6 Anchor Prefix @RRC00 # RIPE-NCC-RIS BGP Anchor Prefix @ rrc00 - RIPE NCC if block in [ '2001:7fb:ff00::/48', '84.205.80.0/24', '2001:7fb:fe00::/48', '84.205.64.0/24' ]: asn = 12654 else: asn = int(parsed[1].split()[-1].strip()) pipeline.hset(block, day, asn) except: # FIXME: check the cause of the exception publisher.warning(entry) entry = '' if i % 10000 == 0: pipeline.execute() pipeline = routing_db.pipeline() else: # append the line to the current block. entry += line pipeline.execute() publisher.info('{f} finished, {nb} entries impported.'.format( f=filename, nb=i))
def launch(): """ Fetch all the whois entry assigned to the server of this :class:`Connector` """ i = 0 while True: try: entry = temp_db.spop(key_ris) if not entry: __disconnect() i = 0 publisher.debug("Disconnected of " + server) time.sleep(sleep_timer) continue if cache_db.get(entry) is None: if not connected: __connect() publisher.debug(server + ", query : " + str(entry)) whois = fetch_whois(entry) if whois != '': cache_db.setex( entry, server + '\n' + unicode(whois, errors="replace"), cache_ttl) if not keepalive: __disconnect() i += 1 if i % 10000 == 0: publisher.info( str(temp_db.scard(key_ris)) + ' to process on ' + server) except IOError as text: publisher.error("IOError on " + server + ': ' + str(text)) publisher.info( str(temp_db.scard(key_ris)) + ' to process on ' + server) time.sleep(sleep_timer) __disconnect() except Exception as e: publisher.error("Error on " + server + ': ' + str(e)) publisher.info( str(temp_db.scard(key_ris)) + ' to process on ' + server) time.sleep(sleep_timer) __disconnect()
from packages import lib_refine from pubsublogger import publisher import re from Helper import Process if __name__ == "__main__": publisher.port = 6380 publisher.channel = "Script" config_section = 'CreditCards' p = Process(config_section) # FUNCTIONS # publisher.info("Creditcard script subscribed to channel creditcard_categ") creditcard_regex = "4[0-9]{12}(?:[0-9]{3})?" # FIXME For retro compatibility channel = 'creditcard_categ' # Source: http://www.richardsramblings.com/regex/credit-card-numbers/ cards = [ r'\b4\d{3}(?:[\ \-]?)\d{4}(?:[\ \-]?)\d{4}(?:[\ \-]?)\d{4}\b', # 16-digit VISA, with separators r'\b5[1-5]\d{2}(?:[\ \-]?)\d{4}(?:[\ \-]?)\d{4}(?:[\ \-]?)\d{4}\b', # 16 digits MasterCard r'\b6(?:011|22(?:(?=[\ \-]?(?:2[6-9]|[3-9]))|[2-8]|9(?=[\ \-]?(?:[01]|2[0-5])))|4[4-9]\d|5\d\d)(?:[\ \-]?)\d{4}(?:[\ \-]?)\d{4}(?:[\ \-]?)\d{4}\b', # Discover Card r'\b35(?:2[89]|[3-8]\d)(?:[\ \-]?)\d{4}(?:[\ \-]?)\d{4}(?:[\ \-]?)\d{4}\b', # Japan Credit Bureau (JCB) r'\b3[47]\d\d(?:[\ \-]?)\d{6}(?:[\ \-]?)\d{5}\b', # American Express r'\b(?:5[0678]\d\d|6304|6390|67\d\d)\d{8,15}\b', # Maestro ]
if __name__ == '__main__': # If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh) # Port of the redis instance used by pubsublogger publisher.port = 6380 # Script is the default channel used for the modules. publisher.channel = 'Script' # Section name in bin/packages/modules.cfg config_section = 'Keys' # Setup the I/O queues p = Process(config_section) # Sent to the logging a description of the module publisher.info("Run Keys module ") # Endless loop getting messages from the input queue while True: # Get one message from the input queue message = p.get_from_set() if message is None: publisher.debug("{} queue is empty, waiting".format(config_section)) time.sleep(1) continue # Do something with the message from the queue paste = Paste.Paste(message) search_key(paste) # (Optional) Send that thing to the next queue
REGEX_CRED = "[a-z]+|[A-Z]{3,}|[A-Z]{1,2}[a-z]+|[0-9]+" REDIS_KEY_NUM_USERNAME = '******' REDIS_KEY_NUM_PATH = 'uniqNumForUsername' REDIS_KEY_ALL_CRED_SET = 'AllCredentials' REDIS_KEY_ALL_CRED_SET_REV = 'AllCredentialsRev' REDIS_KEY_ALL_PATH_SET = 'AllPath' REDIS_KEY_ALL_PATH_SET_REV = 'AllPathRev' REDIS_KEY_MAP_CRED_TO_PATH = 'CredToPathMapping' if __name__ == "__main__": publisher.port = 6380 publisher.channel = "Script" config_section = "Credential" module_name = "Credential" p = Process(config_section) publisher.info("Find credentials") faup = Faup() regex_web = "((?:https?:\/\/)[\.-_0-9a-zA-Z]+\.[0-9a-zA-Z]+)" #regex_cred = "[a-zA-Z0-9._-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:[a-zA-Z0-9\_\-]+" regex_cred = "[a-zA-Z0-9\\._-]+@[a-zA-Z0-9\\.-]+\.[a-zA-Z]{2,6}[\\rn :\_\-]{1,10}[a-zA-Z0-9\_\-]+" regex_site_for_stats = "@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:" redis_cache_key = regex_helper.generate_redis_cache_key(module_name) while True: message = p.get_from_set() if message is None: publisher.debug("Script Credential is Idling 10s")
# SCRIPT PARSER # parser = argparse.ArgumentParser( description='Start Categ module on files.') parser.add_argument( '-d', type=str, default="../files/", help='Path to the directory containing the category files.', action='store') args = parser.parse_args() # FUNCTIONS # publisher.info("Script Categ started") categories = [ 'CreditCards', 'Mail', 'Onion', 'Web', 'Credential', 'Cve', 'ApiKey' ] tmp_dict = {} for filename in categories: bname = os.path.basename(filename) tmp_dict[bname] = [] with open(os.path.join(args.d, filename), 'r') as f: patterns = [r'%s' % (re.escape(s.strip())) for s in f] tmp_dict[bname] = re.compile('|'.join(patterns), re.IGNORECASE) prec_filename = None while True:
if __name__ == "__main__": publisher.port = 6380 publisher.channel = "Script" torclient_host = '127.0.0.1' torclient_port = 9050 config_section = 'Onion' p = Process(config_section) r_cache = redis.StrictRedis(host=p.config.get("Redis_Cache", "host"), port=p.config.getint("Redis_Cache", "port"), db=p.config.getint("Redis_Cache", "db")) # FUNCTIONS # publisher.info("Script subscribed to channel onion_categ") # FIXME For retro compatibility channel = 'onion_categ' # Getting the first message from redis. message = p.get_from_set() prec_filename = None # Thanks to Faup project for this regex # https://github.com/stricaud/faup url_regex = "((http|https|ftp)\://([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.onion)(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)" while True: if message is not None: print message
def main(): """Main Function""" # CONFIG # cfg = ConfigParser.ConfigParser() cfg.read(configfile) # Redis r_serv1 = redis.StrictRedis(host=cfg.get("Redis_Queues", "host"), port=cfg.getint("Redis_Queues", "port"), db=cfg.getint("Redis_Queues", "db")) # LOGGING # publisher.channel = "Script" # ZMQ # # Subscriber channel = cfg.get("PubSub_Global", "channel") subscriber_name = "DomainClassifier" subscriber_config_section = "PubSub_Global" cc = cfg.get("PubSub_DomainClassifier", "cc") cc_tld = cfg.get("PubSub_DomainClassifier", "cc_tld") sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name) # FUNCTIONS # publisher.info("""ZMQ DomainClassifier is Running""") c = DomainClassifier.domainclassifier.Extract(rawtext="") while True: try: message = sub.get_msg_from_queue(r_serv1) if message is not None: PST = Paste.Paste(message.split(" ", -1)[-1]) else: if r_serv1.sismember("SHUTDOWN_FLAGS", "Indexer"): r_serv1.srem("SHUTDOWN_FLAGS", "Indexer") publisher.warning("Shutdown Flag Up: Terminating.") break publisher.debug("Script DomainClassifier is idling 10s") time.sleep(1) continue docpath = message.split(" ", -1)[-1] paste = PST.get_p_content() mimetype = PST._get_p_encoding() if mimetype == "text/plain": c.text(rawtext=paste) c.potentialdomain() c.validdomain(rtype=['A'], extended=True) localizeddomains = c.include(expression=cc_tld) if localizeddomains: print(localizeddomains) publisher.warning( 'DomainC;{};{};{};Checked {} located in {}'.format( PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc_tld)) localizeddomains = c.localizedomain(cc=cc) if localizeddomains: print(localizeddomains) publisher.warning( 'DomainC;{};{};{};Checked {} located in {}'.format( PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc)) except IOError: print "CRC Checksum Failed on :", PST.p_path publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format( PST.p_source, PST.p_date, PST.p_name)) pass