def main(): """Main Function""" # CONFIG # cfg = ConfigParser.ConfigParser() cfg.read(configfile) # REDIS # r_serv = redis.StrictRedis( host = cfg.get("Redis_Queues", "host"), port = cfg.getint("Redis_Queues", "port"), db = cfg.getint("Redis_Queues", "db")) r_serv1 = redis.StrictRedis( host = cfg.get("Redis_Data_Merging", "host"), port = cfg.getint("Redis_Data_Merging", "port"), db = cfg.getint("Redis_Data_Merging", "db")) r_serv2 = redis.StrictRedis( host = cfg.get("Redis_Cache", "host"), port = cfg.getint("Redis_Cache", "port"), db = cfg.getint("Redis_Cache", "db")) # LOGGING # publisher.channel = "Script" # ZMQ # Sub = ZMQ_PubSub.ZMQSub(configfile,"PubSub_Categ", "mails_categ", "emails") # FUNCTIONS # publisher.info("Suscribed to channel mails_categ") message = Sub.get_msg_from_queue(r_serv) prec_filename = None email_regex = "[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}" while True: try: if message != None: channel, filename, word, score = message.split() if prec_filename == None or filename != prec_filename: PST = P.Paste(filename) MX_values = lib_refine.checking_MX_record(r_serv2, PST.get_regex(email_regex)) if MX_values[0] >= 1: PST.__setattr__(channel, MX_values) PST.save_attribute_redis(r_serv1, channel, (MX_values[0], list(MX_values[1]))) pprint.pprint(MX_values) if MX_values[0] > 10: publisher.warning('{0};{1};{2};{3};{4}'.format("Mails", PST.p_source, PST.p_date, PST.p_name,"Checked "+ str(MX_values[0])+ " e-mails" )) else: publisher.info('{0};{1};{2};{3};{4}'.format("Mails", PST.p_source, PST.p_date, PST.p_name,"Checked " str(MX_values[0])+ " e-mail(s)" )) prec_filename = filename else: if r_serv.sismember("SHUTDOWN_FLAGS", "Mails"): r_serv.srem("SHUTDOWN_FLAGS", "Mails") print "Shutdown Flag Up: Terminating" publisher.warning("Shutdown Flag Up: Terminating.") break publisher.debug("Script Mails is Idling 10s") time.sleep(10) message = Sub.get_msg_from_queue(r_serv) except dns.exception.Timeout: print "dns.exception.Timeout" pass
message = p.get_from_set() prec_filename = None # Log as critical if there are more that that amout of valid emails is_critical = 10 email_regex = "[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}" MX_values = None while True: if message is not None: filename, score = message.split() if prec_filename is None or filename != prec_filename: PST = Paste.Paste(filename) MX_values = lib_refine.checking_MX_record( r_serv2, PST.get_regex(email_regex)) if MX_values[0] >= 1: PST.__setattr__(channel, MX_values) PST.save_attribute_redis(channel, (MX_values[0], list(MX_values[1]))) pprint.pprint(MX_values) to_print = 'Mails;{};{};{};Checked {} e-mail(s)'.\ format(PST.p_source, PST.p_date, PST.p_name, MX_values[0]) if MX_values[0] > is_critical: publisher.warning(to_print) else: publisher.info(to_print)
# max execution time on regex signal.alarm(max_execution_time) try: # Transforming the set into a string MXdomains = re.findall("@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,20}", str(l_mails).lower()) except TimeoutException: p.incr_module_timeout_statistic() # add encoder type err_mess = "Mail: processing timeout: {}".format(item_id) print(err_mess) publisher.info(err_mess) continue else: signal.alarm(0) MX_values = lib_refine.checking_MX_record( r_serv_cache, MXdomains, addr_dns) if MX_values[0] >= 1: PST.__setattr__(channel, MX_values) PST.save_attribute_redis( channel, (MX_values[0], list(MX_values[1]))) to_print = 'Mails;{};{};{};Checked {} e-mail(s);{}'.\ format(PST.p_source, PST.p_date, PST.p_name, MX_values[0], PST.p_rel_path) if MX_values[0] > is_critical: publisher.warning(to_print) #Send to duplicate p.populate_set_out(item_id, 'Duplicate')