コード例 #1
0
def fetcher():
    """
        Main function which fetch the datasets
    """
    while config_db.sismember('modules', module):
        try:
            urllib.urlretrieve(url, temp_filename)
        except:
            publisher.error('Unable to fetch ' + url)
            __check_exit()
            continue
        drop_file = False
        """
            Check is the file already exists, if the same file is found,
            the downloaded file is dropped. Else, it is moved in his
            final directory.
        """
        to_check = glob.glob( os.path.join(old_directory, '*') )
        to_check += glob.glob( os.path.join(directory, '*') )
        for file in to_check:
            if filecmp.cmp(temp_filename, file):
                drop_file = True
                break
        if drop_file:
            os.unlink(temp_filename)
            publisher.debug('No new file on ' + url)
        else:
            os.rename(temp_filename, filename)
            publisher.info('New file on ' + url)
        __check_exit()
    config_db.delete(module + "|" + "fetching")
コード例 #2
0
def launch():
    """
        Fetch all the whois entry assigned to the server of this :class:`Connector`
    """
    i = 0
    while True:
        try:
            entry = temp_db.spop(key_ris)
            if not entry:
                __disconnect()
                i = 0
                publisher.debug("Disconnected of " + server)
                time.sleep(sleep_timer)
                continue
            if cache_db.get(entry) is None:
                if not connected:
                    __connect()
                publisher.debug(server + ", query : " + str(entry))
                whois = fetch_whois(entry)
                if whois != '':
                    cache_db.setex(entry, server + '\n' + unicode(whois,  errors="replace"), cache_ttl)
                if not keepalive:
                    __disconnect()
            i += 1
            if i%10000 == 0:
                publisher.info(str(temp_db.scard(key_ris)) + ' to process on ' + server)
        except IOError as text:
            publisher.error("IOError on " + server + ': ' + str(text))
            time.sleep(sleep_timer)
            __disconnect()
コード例 #3
0
 def get_asn_descriptions(self, asn):
     if not self.has_asnhistory:
         publisher.debug('ASN History not enabled.')
         return [datetime.date.today(), 'ASN History not enabled.']
     desc_history = self.asnhistory.get_all_descriptions(asn)
     return [(date.astimezone(tz.tzutc()).date(), descr)
             for date, descr in desc_history]
コード例 #4
0
ファイル: lib_jobs.py プロジェクト: caar2000/AIL-framework
def display_listof_pid(r_serv, arg):
    """Display the pid list from redis

    This function display infos in the shell about lauched process

    """
    jobs = {}
    joblist = []
    try:
        for job in r_serv.smembers("pid"):
            jobs = r_serv.hgetall(job)

            if jobs != None:
                start = datetime.strptime(r_serv.hget(job, "startime"), "%Y-%m-%d_%H:%M:%S")

                end = datetime.strptime(time.strftime("%Y-%m-%d_%H:%M:%S"), "%Y-%m-%d_%H:%M:%S")
                jobs['uptime'] = str(abs(start - end))
                joblist.append(jobs)
            else:
                publisher.debug("display_list_of_pid Aborted due to lack of Information in Redis")

        joblist = sorted(joblist, key=lambda k: k['uptime'], reverse=True)

        for job in joblist:
            print format_display_listof_pid(job, arg)

        if arg == "remain":
            print "Remaining: {0}".format(r_serv.llen("filelist"))

        if arg == "processed":
            print "processed: {0}".format(r_serv.llen("processed"))

    except TypeError:
        publisher.error("TypeError for display_listof_pid")
コード例 #5
0
ファイル: lib_refine.py プロジェクト: caar2000/AIL-framework
def refining_regex_dataset(r_serv, r_key, regex, min_match, year, month, luhn = True, dnscheck = True):
    """Refine the "raw dataset" of paste with regulars expressions

    :param r_serv: -- Redis connexion database
    :param r_key: -- (str) The name of the key read in redis (often the name of
        the keywords category list)
    :param min_match: -- (int) Below this number file are deleted
    :param regex: -- Regular expression which will be match.

    This function Refine database created with classify_token_paste function.
    It opening again the files which matchs the keywords category list, found
    regular expression inside it and count how many time is found.

    If there is not too much match about the regular expression the file is
    deleted from the list.

    Than it finally merge the result by day to be able to create a bar graph
    which will represent how many occurence by day the regex match.

    """
    for filename in r_serv.zrange(r_key, 0, -1):

        with gzip.open(filename, 'rb') as F:
            var = 0
            matchs = set([])

            for num, kword in enumerate(F):

                match = re.findall(regex, kword)
                var += len(match)

                for y in match:
                    if y != '' and len(y) < 100:
                        matchs.add(y)
            # If there is less match than min_match delete it (False pos)
            if len(matchs) <= min_match :
                r_serv.zrem(r_key, filename)
                publisher.debug("{0} deleted".format(filename))
            else:
            # else changing the score.
                if r_key == "creditcard_categ" and luhn:
                    for card_number in matchs:
                        if is_luhn_valid(card_number):

                            r_serv.zincrby(r_key+'_occur', filename, 1)

                            publisher.info("{1} is valid in the file {0}".format(filename, card_number))
                        else:
                            publisher.debug("{0} card is invalid".format(card_number))

                if r_key == "mails_categ" and dnscheck:
                    r_serv.zadd(r_key+'_occur', checking_MX_record(r_serv, matchs), filename)

                else:
                    # LUHN NOT TRIGGERED (Other Categs)
                    r_serv.zadd(r_key+'_occur',
                        len(matchs),
                        filename)

    create_graph_by_day_datastruct(r_serv, r_key, year, month)
コード例 #6
0
def remove_pure_doppelganger(r_serv, nb):
    """Remove identic paste

    :param r_serv: -- Redis connexion database
    :param nb: -- (int) Number of execution wanted

    Add to a temporary list the hash of wholes files and compare the new hash
    to the element of this list. If the hash is already inside, the file
    is deleted otherwise the hash is added in the list.

    """
    hashlist = []
    for x in xrange(0,nb):
        filename = r_serv.lpop("filelist")

        with open(filename, 'rb') as L:
            hashline = hashlib.md5(L.read()).hexdigest()

            print len(hashlist)

            if hashline in hashlist:

                os.remove(filename)
                publisher.debug("{0} removed".format(filename))
                print "{0} removed".format(filename)
            else:
                hashlist.append(hashline)
コード例 #7
0
ファイル: lib_words.py プロジェクト: caar2000/AIL-framework
def recovering_longlines(r_serv):
    """Get longlines with linenumbers

    """
    try:
        for n in xrange(0,nb):
            filename = r_serv.lpop("longlines")

            if filename != None:
                # For each values in redis (longline's line number)
                for numline in r_serv.smembers(filename):

                    with gzip.open(filename,'rb') as F:

                        for num, line in enumerate(F):
                            #When corresponding.
                            if int(num) == int(numline):
                                pass
                                # TREATMENT
            else:
                publisher.debug("Empty list")
                r_serv.save()
                break

    except (KeyboardInterrupt, SystemExit) as e:
        flush_list_of_pid(r_serv)
        publisher.debug("Pid list flushed")
コード例 #8
0
def main():
    """Main Function"""

    # CONFIG #
    cfg = ConfigParser.ConfigParser()
    cfg.read('./packages/config.cfg')

    # REDIS #
    r_serv = redis.StrictRedis(
        host = cfg.get("Redis_Queues", "host"),
        port = cfg.getint("Redis_Queues", "port"),
        db = cfg.getint("Redis_Queues", "db"))

    # LOGGING #
    publisher.channel = "Global"

    # ZMQ #
    PubGlob = ZMQ_PubSub.ZMQPub(configfile, "PubSub_Global", "global")

    # FONCTIONS #
    publisher.info("Starting to publish.")

    while True:
        filename = r_serv.lpop("filelist")

        if filename != None:

            msg = cfg.get("PubSub_Global", "channel")+" "+filename
            PubGlob.send_message(msg)
            publisher.debug("{0} Published".format(msg))
        else:
            time.sleep(10)
            publisher.debug("Nothing to publish")
コード例 #9
0
def main():
    """Main Function"""

    # CONFIG #
    cfg = ConfigParser.ConfigParser()
    cfg.read(configfile)

    # REDIS #
    r_serv = redis.StrictRedis(
        host = cfg.get("Redis_Queues", "host"),
        port = cfg.getint("Redis_Queues", "port"),
        db = cfg.getint("Redis_Queues", "db"))

    # LOGGING #
    publisher.channel = "Script"

    # ZMQ #
    channel = cfg.get("PubSub_Longlines", "channel_1")
    subscriber_name = "tokenize"
    subscriber_config_section = "PubSub_Longlines"

    #Publisher
    publisher_config_section = "PubSub_Words"
    publisher_name = "pubtokenize"

    Sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name)
    Pub = ZMQ_PubSub.ZMQPub(configfile, publisher_config_section, publisher_name)

    channel_0 = cfg.get("PubSub_Words", "channel_0")

    # FUNCTIONS #
    publisher.info("Tokeniser subscribed to channel {0}".format(cfg.get("PubSub_Longlines", "channel_1")))

    while True:
        message = Sub.get_msg_from_queue(r_serv)
        print message
        if message != None:
            PST = P.Paste(message.split(" ",-1)[-1])
        else:
            if r_serv.sismember("SHUTDOWN_FLAGS", "Tokenize"):
                r_serv.srem("SHUTDOWN_FLAGS", "Tokenize")
                print "Shutdown Flag Up: Terminating"
                publisher.warning("Shutdown Flag Up: Terminating.")
                break
            publisher.debug("Tokeniser is idling 10s")
            time.sleep(10)
            print "sleepin"
            continue

        for word, score in PST._get_top_words().items():
            if len(word) >= 4:
                msg = channel_0+' '+PST.p_path+' '+str(word)+' '+str(score)
                Pub.send_message(msg)
                print msg
            else:
                pass
コード例 #10
0
ファイル: lib_words.py プロジェクト: caar2000/AIL-framework
def detect_longline_from_list(r_serv, nb):
    try:
        for n in xrange(0,nb):

                if not dectect_longlines(r_serv, "filelist", True):
                    break

    except (KeyboardInterrupt, SystemExit) as e:
        flush_list_of_pid(r_serv)
        publisher.debug("Pid list flushed")
コード例 #11
0
def main():
    publisher.port = 6380
    publisher.channel = "Script"

    config_section = 'DomClassifier'

    p = Process(config_section)
    addr_dns = p.config.get("DomClassifier", "dns")

    publisher.info("""ZMQ DomainClassifier is Running""")

    c = DomainClassifier.domainclassifier.Extract(rawtext="",
                                                  nameservers=[addr_dns])

    cc = p.config.get("DomClassifier", "cc")
    cc_tld = p.config.get("DomClassifier", "cc_tld")

    while True:
        try:
            message = p.get_from_set()

            if message is not None:
                PST = Paste.Paste(message)
            else:
                publisher.debug("Script DomClassifier is idling 1s")
                time.sleep(1)
                continue
            paste = PST.get_p_content()
            mimetype = PST._get_p_encoding()

            if mimetype == "text/plain":
                c.text(rawtext=paste)
                c.potentialdomain()
                c.validdomain(rtype=['A'], extended=True)
                localizeddomains = c.include(expression=cc_tld)
                if localizeddomains:
                    print(localizeddomains)
                    publisher.warning(
                        'DomainC;{};{};{};Checked {} located in {};{}'.format(
                            PST.p_source, PST.p_date, PST.p_name,
                            localizeddomains, cc_tld, PST.p_path))
                localizeddomains = c.localizedomain(cc=cc)
                if localizeddomains:
                    print(localizeddomains)
                    publisher.warning(
                        'DomainC;{};{};{};Checked {} located in {};{}'.format(
                            PST.p_source, PST.p_date, PST.p_name,
                            localizeddomains, cc, PST.p_path))
        except IOError:
            print("CRC Checksum Failed on :", PST.p_path)
            publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format(
                PST.p_source, PST.p_date, PST.p_name))
コード例 #12
0
 def test_publisher(self):
     for i in range(0, 21):
         if i % 2 == 0:
             publisher.info('test' + str(i))
         elif i % 3 == 0:
             publisher.warning('test' + str(i))
         elif i % 5 == 0:
             publisher.error('test' + str(i))
         elif i % 7 == 0:
             publisher.critical('test' + str(i))
         else:
             publisher.debug('test' + str(i))
         time.sleep(1)
コード例 #13
0
 def asn_desc_via_history(self, asn):
     if self.has_asnhistory:
         asn_descr = self.asnhistory.get_last_description(asn)
         if asn_descr is None:
             # The ASN has no descripion in the database
             # publisher.error(\
             #        'Unable to find the ASN description of {}. ASN History might be down.'.\
             #        format(asn))
             asn_descr = 'No ASN description has been found.'
     else:
         publisher.debug('ASN History not enabled.')
         asn_descr = 'ASN History not enabled.'
     return asn_descr
コード例 #14
0
ファイル: test_publisher.py プロジェクト: Rafiot/PubSubLogger
 def test_publisher(self):
     for i in range(0, 21):
         if i % 2 == 0:
             publisher.info('test' + str(i))
         elif i % 3 == 0:
             publisher.warning('test' + str(i))
         elif i % 5 == 0:
             publisher.error('test' + str(i))
         elif i % 7 == 0:
             publisher.critical('test' + str(i))
         else:
             publisher.debug('test' + str(i))
         time.sleep(1)
コード例 #15
0
ファイル: api.py プロジェクト: CIRCL/bgpranking-redis-api
 def asn_desc_via_history(self, asn):
     if self.has_asnhistory:
         asn_descr = self.asnhistory.get_last_description(asn)
         if asn_descr is None:
             # The ASN has no descripion in the database
             # publisher.error(\
             #        'Unable to find the ASN description of {}. ASN History might be down.'.\
             #        format(asn))
             asn_descr = 'No ASN description has been found.'
     else:
         publisher.debug('ASN History not enabled.')
         asn_descr = 'ASN History not enabled.'
     return asn_descr
コード例 #16
0
def main():
    publisher.port = 6380
    publisher.channel = "Script"

    config_section = 'DomClassifier'

    p = Process(config_section)
    addr_dns = p.config.get("DomClassifier", "dns")

    publisher.info("""ZMQ DomainClassifier is Running""")

    c = DomainClassifier.domainclassifier.Extract(rawtext="", nameservers=[addr_dns])

    cc = p.config.get("DomClassifier", "cc")
    cc_tld = p.config.get("DomClassifier", "cc_tld")

    while True:
        try:
            message = p.get_from_set()

            if message is not None:
                PST = Paste.Paste(message)
            else:
                publisher.debug("Script DomClassifier is idling 1s")
                time.sleep(1)
                continue
            paste = PST.get_p_content()
            mimetype = PST._get_p_encoding()

            if mimetype == "text/plain":
                c.text(rawtext=paste)
                c.potentialdomain()
                c.validdomain(rtype=['A'], extended=True)
                localizeddomains = c.include(expression=cc_tld)
                if localizeddomains:
                    print(localizeddomains)
                    publisher.warning('DomainC;{};{};{};Checked {} located in {};{}'.format(
                        PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc_tld, PST.p_path))
                localizeddomains = c.localizedomain(cc=cc)
                if localizeddomains:
                    print(localizeddomains)
                    publisher.warning('DomainC;{};{};{};Checked {} located in {};{}'.format(
                        PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc, PST.p_path))
        except IOError:
            print("CRC Checksum Failed on :", PST.p_path)
            publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format(
                PST.p_source, PST.p_date, PST.p_name))
コード例 #17
0
ファイル: Tweet.py プロジェクト: S1sirocks/TwitterMon
    def get_TweetRawContent(self):

        publisher.port = 6380
        publisher.channel = 'Script'
        #publisher.debug("[-Tweet.py-] Requested RAW Content = " + self.p_path)
        tweetRaw = ''

        #publisher.debug("[-Tweet.py-] Reading file " + self.p_path)
        #print("[-Tweet.py-] Reading file " + self.p_path)
        try:
            with gzip.open(self.p_path, 'rb') as f:
                tweetRaw = f.read().decode('utf-8')
        except Exception as e:
            publisher.debug("error opening path: "+self.p_path + " with error "+str(e))
            paste = 'error opening path: '+self.p_path + ' with error '+str(e)

        return str(tweetRaw)
コード例 #18
0
ファイル: db_generator.py プロジェクト: CIRCL/IP-ASN-history
def update_running_pids(old_procs):
    """
        Update the list of the running process and return the list
    """
    new_procs = []
    for proc in old_procs:
        if proc.poll() is None and check_pid(proc.pid):
            publisher.debug(str(proc.pid) + ' is alive')
            new_procs.append(proc)
        else:
            try:
                publisher.debug(str(proc.pid) + ' is gone')
                os.kill(proc.pid, signal.SIGKILL)
            except:
                # the process is just already gone
                pass
    return new_procs
コード例 #19
0
def update_running_pids(old_procs):
    """
        Update the list of the running process and return the list
    """
    new_procs = []
    for proc in old_procs:
        if proc.poll() == None and check_pid(proc.pid):
            publisher.debug(str(proc.pid) + ' is alive')
            new_procs.append(proc)
        else:
            try:
                publisher.debug(str(proc.pid) + ' is gone')
                os.kill(proc.pid, signal.SIGKILL)
            except:
                # the process is just already gone
                pass
    return new_procs
コード例 #20
0
def launch():
    """
        Fetch all the whois entry assigned to the server of this :class:`Connector`
    """
    i = 0
    while True:
        try:
            entry = temp_db.spop(key_ris)
            if not entry:
                __disconnect()
                i = 0
                publisher.debug("Disconnected of " + server)
                time.sleep(sleep_timer)
                continue
            if cache_db.get(entry) is None:
                if not connected:
                    __connect()
                publisher.debug(server + ", query : " + str(entry))
                whois = fetch_whois(entry)
                if whois != '':
                    cache_db.setex(
                        entry,
                        server + '\n' + unicode(whois, errors="replace"),
                        cache_ttl)
                if not keepalive:
                    __disconnect()
            i += 1
            if i % 10000 == 0:
                publisher.info(
                    str(temp_db.scard(key_ris)) + ' to process on ' + server)
        except IOError as text:
            publisher.error("IOError on " + server + ': ' + str(text))
            publisher.info(
                str(temp_db.scard(key_ris)) + ' to process on ' + server)
            time.sleep(sleep_timer)
            __disconnect()
        except Exception as e:
            publisher.error("Error on " + server + ': ' + str(e))
            publisher.info(
                str(temp_db.scard(key_ris)) + ' to process on ' + server)
            time.sleep(sleep_timer)
            __disconnect()
コード例 #21
0
ファイル: lib_words.py プロジェクト: caar2000/AIL-framework
def redis_words_ranking(pipe, r_serv, nb, minlength, maxlength):
    """Looping function

    :param pipe: -- Redis pipe.
    :param nb: -- (int) Number of pastes proceeded by function
    :param minlength: -- (int) passed to the next function
    :param maxlength: -- (int) passed to the next function

    """
    try:
        for n in xrange(0,nb):

                path = r_serv.lpop("filelist")

                if path != None:
                    set_listof_pid(r_serv, path, sys.argv[0])

                    redis_zincr_words(pipe, path, minlength, maxlength)

                    update_listof_pid(r_serv)

                    r_serv.lpush("processed",path)

                    publisher.debug(path)
                else:
                    publisher.debug("Empty list")
                    break
    except (KeyboardInterrupt, SystemExit) as e:
        flush_list_of_pid(r_serv)
        publisher.debug("Pid list flushed")
コード例 #22
0
def search_phone(message):
    paste = Paste.Paste(message)
    content = paste.get_p_content()
    # regex to find phone numbers, may raise many false positives (shalt thou seek optimization, upgrading is required)
    reg_phone = re.compile(r'(\+\d{1,4}(\(\d\))?\d?|0\d?)(\d{6,8}|([-/\. ]{1}\d{2,3}){3,4})')
    # list of the regex results in the Paste, may be null
    results = reg_phone.findall(content)

    # if the list is greater than 4, we consider the Paste may contain a list of phone numbers
    if len(results) > 4 :
        print results
        publisher.warning('{} contains PID (phone numbers)'.format(paste.p_name))

	if __name__ == '__main__':
    # If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh)
    # Port of the redis instance used by pubsublogger
    publisher.port = 6380
    # Script is the default channel used for the modules.
    publisher.channel = 'Script'

    # Section name in bin/packages/modules.cfg
    config_section = 'Phone'

    # Setup the I/O queues
    p = Process(config_section)

    # Sent to the logging a description of the module
    publisher.info("Run Phone module")

    # Endless loop getting messages from the input queue
    while True:
        # Get one message from the input queue
        message = p.get_from_set()
        if message is None:
            publisher.debug("{} queue is empty, waiting".format(config_section))
            time.sleep(1)
            continue

        # Do something with the message from the queue
        search_phone(message)
コード例 #23
0
ファイル: Tweet.py プロジェクト: S1sirocks/TwitterMon
    def translateTweet(self,sentence,from_lang):

        publisher.debug("[-Tweet.py-] (translateTweet) Request from "+from_lang.upper())


        re.sub("#|@|&","",sentence)
        cfgTM = configparser.ConfigParser()
        cfgTM.read(TMconfigfile)
        emailforTranslation = cfgTM.get("TwitterAnalyzer", "email_for_translation")

        api_url = "http://mymemory.translated.net/api/get?q={}&langpair={}|{}&de={}".format(sentence,from_lang.upper(),"EN",emailforTranslation)
        hdrs = {
            'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
            'Accept-Encoding': 'none',
            'Accept-Language': 'en-US,en;q=0.8',
            'Connection': 'keep-alive'}

        publisher.debug("[-Tweet.py-] (translateTweet) Request url="+api_url) 
        response = requests.get(api_url, headers=hdrs)
        response_json = json.loads(response.text)
        translation = response_json["responseData"]["translatedText"]
        return translation
コード例 #24
0
ファイル: lib_words.py プロジェクト: caar2000/AIL-framework
def dectect_longlines(r_serv, r_key, store = False, maxlength = 500):
    """Store longlines's linenumbers in redis

    :param r_serv: -- The redis connexion database
    :param r_key: -- (str) The key name in redis
    :param store: -- (bool) Store the line numbers or not.
    :param maxlength: -- The limit between "short lines" and "long lines"

    This function connect to a redis list of filename (pastes filename);
    Open the paste and check inside if there is some line with their
    length >= to maxlength.
    If yes, the paste is "tagged" as containing a longlines in another
    redis structures, and the linenumber (of the long lines) can be stored
    in addition if the argument store is at True.

    """
    try:
        while True:
            #r_key_list (categ)
            filename = r_serv.lpop(r_key)

            if filename != None:

                set_listof_pid(r_serv, filename, sys.argv[0])

                # for each pastes
                with gzip.open(filename, 'rb') as F:
                    var = True
                    for num, line in enumerate(F):

                        if  len(line) >= maxlength:
                            #publisher.debug("Longline:{0}".format(line))
                            if var:
                                r_serv.rpush("longlines", filename)
                                var = False

                            if store:
                                r_serv.sadd(filename, num)
                            else:
                                publisher.debug("Line numbers of longlines not stored")

                update_listof_pid(r_serv)
            else:
                publisher.debug("Empty list")
                return False
                break

    except (KeyboardInterrupt, SystemExit) as e:
        flush_list_of_pid(r_serv)
        publisher.debug("Pid list flushed")
コード例 #25
0
    args = parser.parse_args()
    interval_first = args.firstdate
    interval_last = args.lastdate
    if interval_last is None:
        daemon = True
    else:
        daemon = False

    unavailable = []
    while 1:
        got_new_files = False
        if daemon or interval_last is None:
            interval_last = datetime.date.today().strftime("%Y-%m-%d")

        for fname, url in to_download():
            if not already_downloaded(fname) and url not in unavailable:
                publisher.debug("Trying to download: " + url)
                if downloadURL(url, fname):
                    got_new_files = True
                    publisher.info("Downloaded:" + fname)
                elif interval_last != datetime.date.today().strftime(
                        "%Y-%m-%d"):
                    # if today's file is not available, try again later.
                    unavailable.append(url)
        if not got_new_files:
            publisher.info('No new files to download.')
            if not daemon:
                publisher.info('Exiting...')
                break
            time.sleep(3600)
コード例 #26
0
ファイル: lib_refine.py プロジェクト: Rafiot/AIL-framework
def checking_MX_record(r_serv, adress_set):
    """Check if emails MX domains are responding.

    :param r_serv: -- Redis connexion database
    :param adress_set: -- (set) This is a set of emails adress
    :return: (int) Number of adress with a responding and valid MX domains

    This function will split the email adress and try to resolve their domains
    names: on [email protected] it will try to resolve gmail.com

    """
    score = 0
    num = len(adress_set)
    WalidMX = set([])
    # Transforming the set into a string
    MXdomains = re.findall("@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,20}", str(adress_set).lower())
    resolver = dns.resolver.Resolver()
    resolver.nameservers = ['149.13.33.69']
    resolver.timeout = 5
    resolver.lifetime = 2
    if MXdomains != []:

            for MXdomain in set(MXdomains):
                try:
                    # Already in Redis living.
                    if r_serv.exists(MXdomain[1:]):
                        score += 1
                        WalidMX.add(MXdomain[1:])
                    # Not already in Redis
                    else:
                        # If I'm Walid MX domain
                        if resolver.query(MXdomain[1:], rdtype=dns.rdatatype.MX):
                            # Gonna be added in redis.
                            r_serv.setex(MXdomain[1:], 1, timedelta(days=1))
                            score += 1
                            WalidMX.add(MXdomain[1:])
                        else:
                            pass

                except dns.resolver.NoNameservers:
                    publisher.debug('NoNameserver, No non-broken nameservers are available to answer the query.')

                except dns.resolver.NoAnswer:
                    publisher.debug('NoAnswer, The response did not contain an answer to the question.')

                except dns.name.EmptyLabel:
                    publisher.debug('SyntaxError: EmptyLabel')

                except dns.resolver.NXDOMAIN:
                    r_serv.setex(MXdomain[1:], 1, timedelta(days=1))
                    publisher.debug('The query name does not exist.')

                except dns.name.LabelTooLong:
                    publisher.debug('The Label is too long')

                except dns.resolver.Timeout:
                    r_serv.setex(MXdomain[1:], 1, timedelta(days=1))

                except Exception as e:
                    print e

    publisher.debug("emails before: {0} after: {1} (valid)".format(num, score))
    return (num, WalidMX)
コード例 #27
0
ファイル: Indexer.py プロジェクト: marcoramilli/AIL-framework
        if not exists_in(indexpath):
            ix = create_in(indexpath, schema)
        else:
            ix = open_dir(indexpath)

    # LOGGING #
    publisher.info("ZMQ Indexer is Running")

    while True:
        try:
            message = p.get_from_set()

            if message is not None:
                PST = Paste.Paste(message)
            else:
                publisher.debug("Script Indexer is idling 1s")
                time.sleep(1)
                continue
            docpath = message.split(" ", -1)[-1]
            paste = PST.get_p_content()
            print "Indexing :", docpath
            if indexertype == "whoosh":
                indexwriter = ix.writer()
                indexwriter.update_document(
                    title=unicode(docpath, errors='ignore'),
                    path=unicode(docpath, errors='ignore'),
                    content=unicode(paste, errors='ignore'))
                indexwriter.commit()
        except IOError:
            print "CRC Checksum Failed on :", PST.p_path
            publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format(
コード例 #28
0
ファイル: Mail.py プロジェクト: cs24/AIL-framework
        if message is not None:
            filename, score = message.split()

            if prec_filename is None or filename != prec_filename:
                PST = Paste.Paste(filename)
                MX_values = lib_refine.checking_MX_record(
                    r_serv2, PST.get_regex(email_regex))

                if MX_values[0] >= 1:

                    PST.__setattr__(channel, MX_values)
                    PST.save_attribute_redis(channel, (MX_values[0],
                                             list(MX_values[1])))

                    pprint.pprint(MX_values)
                    to_print = 'Mails;{};{};{};Checked {} e-mail(s)'.\
                        format(PST.p_source, PST.p_date, PST.p_name,
                               MX_values[0])
                    if MX_values[0] > is_critical:
                        publisher.warning(to_print)
                    else:
                        publisher.info(to_print)
            prec_filename = filename

        else:
            publisher.debug("Script Mails is Idling 10s")
            print 'Sleeping'
            time.sleep(10)

        message = p.get_from_set()
コード例 #29
0
ファイル: Credential.py プロジェクト: CIRCL/AIL-framework
        port=p.config.getint("ARDB_Statistics", "port"),
        db=p.config.getint("ARDB_Statistics", "db"),
        decode_responses=True)

    criticalNumberToAlert = p.config.getint("Credential", "criticalNumberToAlert")
    minTopPassList = p.config.getint("Credential", "minTopPassList")

    regex_web = "((?:https?:\/\/)[-_0-9a-zA-Z]+\.[0-9a-zA-Z]+)"
    #regex_cred = "[a-zA-Z0-9._-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:[a-zA-Z0-9\_\-]+"
    regex_cred = "[a-zA-Z0-9\\._-]+@[a-zA-Z0-9\\.-]+\.[a-zA-Z]{2,6}[\\rn :\_\-]{1,10}[a-zA-Z0-9\_\-]+"
    regex_site_for_stats = "@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:"

    while True:
        message = p.get_from_set()
        if message is None:
            publisher.debug("Script Credential is Idling 10s")
            #print('sleeping 10s')
            time.sleep(10)
            continue

        filepath, count = message.split(' ')

        paste = Paste.Paste(filepath)
        content = paste.get_p_content()
        creds = set(re.findall(regex_cred, content))

        if len(creds) == 0:
            continue

        sites= re.findall(regex_web, content) #Use to count occurences
        sites_set = set(re.findall(regex_web, content))
コード例 #30
0
    p = Process(config_section)

    # port generated automatically depending on the date
    curYear = datetime.now().year
    server = redis.StrictRedis(
                host=p.config.get("ARDB_DB", "host"),
                port=p.config.get("ARDB_DB", "port"),
                db=curYear,
                decode_responses=True)

    # FUNCTIONS #
    publisher.info("Script duplicate started")

    while True:
            message = p.get_from_set()
            if message is not None:
                module_name, p_path = message.split(';')
                print("new alert : {}".format(module_name))
                #PST = Paste.Paste(p_path)
            else:
                publisher.debug("Script Attribute is idling 10s")
                time.sleep(10)
                continue

            # Add in redis for browseWarningPaste
            # Format in set: WARNING_moduleName -> p_path
            key = "WARNING_" + module_name
            server.sadd(key, p_path)

            publisher.info('Saved warning paste {}'.format(p_path))
コード例 #31
0
                                # set number of files to submit
                                r_serv_log_submit.set(uuid + ':nb_total', len(files.children))
                                n = 1
                                for child in files.children:
                                    if verify_extention_filename(child.filename.decode()):
                                        create_paste(uuid, child.contents, ltags, ltagsgalaxies, uuid+'_'+ str(n) )
                                        n = n + 1
                                    else:
                                        print('bad extention')
                                        addError(uuid, 'Bad file extension: {}'.format(child.filename.decode()) )

                            except FileNotFoundError:
                                print('file not found')
                                addError(uuid, 'File not found: {}'.format(file_full_path), uuid )

                            remove_submit_uuid(uuid)



            # textarea input paste
            else:
                r_serv_log_submit.set(uuid + ':nb_total', 1)
                create_paste(uuid, paste_content.encode(), ltags, ltagsgalaxies, uuid)
                remove_submit_uuid(uuid)
                time.sleep(0.5)

        # wait for paste
        else:
            publisher.debug("Script submit_paste is Idling 10s")
            time.sleep(3)
コード例 #32
0
ファイル: Release.py プロジェクト: xme/AIL-framework
    p = Process(config_section)
    max_execution_time = p.config.getint("Curve", "max_execution_time")
    publisher.info("Release scripts to find release names")

    movie = "[a-zA-Z0-9.]+\.[0-9]{4}.[a-zA-Z0-9.]+\-[a-zA-Z]+"
    tv = "[a-zA-Z0-9.]+\.S[0-9]{2}E[0-9]{2}.[a-zA-Z0-9.]+\.[a-zA-Z0-9.]+\-[a-zA-Z0-9]+"
    xxx = "[a-zA-Z0-9._]+.XXX.[a-zA-Z0-9.]+\-[a-zA-Z0-9]+"

    regexs = [movie, tv, xxx]

    regex = '|'.join(regexs)
    while True:
        signal.alarm(max_execution_time)
        filepath = p.get_from_set()
        if filepath is None:
            publisher.debug("Script Release is Idling 10s")
            print('Sleeping')
            time.sleep(10)
            continue

        paste = Paste.Paste(filepath)
        content = paste.get_p_content()

        #signal.alarm(max_execution_time)
        try:
            releases = set(re.findall(regex, content))
            if len(releases) == 0:
                continue

                to_print = 'Release;{};{};{};{} releases;{}'.format(
                    paste.p_source, paste.p_date, paste.p_name, len(releases),
コード例 #33
0
ファイル: Duplicates.py プロジェクト: mokaddem/AIL-framework
    publisher.info("Script duplicate started")

    while True:
        try:
            hash_dico = {}
            dupl = set()
            dico_range_list = []

            x = time.time()

            message = p.get_from_set()
            if message is not None:
                path = message
                PST = Paste.Paste(path)
            else:
                publisher.debug("Script Attribute is idling 10s")
                print('sleeping')
                time.sleep(10)
                continue

            # the paste is too small
            if (PST._get_p_size() < min_paste_size):
                continue

            PST._set_p_hash_kind("ssdeep")
            PST._set_p_hash_kind("tlsh")

            # Assignate the correct redis connexion
            r_serv1 = dico_redis[PST.p_date.year + PST.p_date.month]

            # Creating the dico name: yyyymm
コード例 #34
0
    publisher.port = 6380
    publisher.channel = "Script"

    config_section = 'ApiKey'

    p = Process(config_section)

    publisher.info("ApiKey started")

    message = p.get_from_set()

    # TODO improve REGEX
    regex_aws_access_key = re.compile(
        r'(?<![A-Z0-9])=[A-Z0-9]{20}(?![A-Z0-9])')
    regex_aws_secret_key = re.compile(
        r'(?<!=[A-Za-z0-9+])=[A-Za-z0-9+]{40}(?![A-Za-z0-9+])')

    regex_google_api_key = re.compile(r'=AIza[0-9a-zA-Z-_]{35}')

    while True:

        message = p.get_from_set()

        if message is not None:

            search_api_key(message)

        else:
            publisher.debug("Script ApiKey is Idling 10s")
            time.sleep(10)
コード例 #35
0
ファイル: Url.py プロジェクト: MaximeStor/AIL-framework
    while True:
        if message is not None:
            filename, score = message.split()

            if prec_filename is None or filename != prec_filename:
                domains_list = []
                PST = Paste.Paste(filename)
                client = ip2asn()
                for x in PST.get_regex(url_regex):
                    scheme, credential, subdomain, domain, host, tld, \
                        port, resource_path, query_string, f1, f2, f3, \
                        f4 = x
                    domains_list.append(domain)
                    p.populate_set_out(x, 'Url')
                    publisher.debug('{} Published'.format(x))

                    if f1 == "onion":
                        print domain

                    hostl = unicode(subdomain+domain)
                    try:
                        socket.setdefaulttimeout(2)
                        ip = socket.gethostbyname(unicode(hostl))
                    except:
                        # If the resolver is not giving any IPv4 address,
                        # ASN/CC lookup is skip.
                        continue

                    try:
                        l = client.lookup(ip, qType='IP')
コード例 #36
0
if __name__ == '__main__':
    # If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh)
    # Port of the redis instance used by pubsublogger
    publisher.port = 6380
    # Script is the default channel used for the modules.
    publisher.channel = 'Script'

    # Section name in bin/packages/modules.cfg
    config_section = 'Keys'

    # Setup the I/O queues
    p = Process(config_section)

    # Sent to the logging a description of the module
    publisher.info("Run Keys module ")

    # Endless loop getting messages from the input queue
    while True:
        # Get one message from the input queue
        message = p.get_from_set()
        if message is None:
            publisher.debug("{} queue is empty, waiting".format(config_section))
            time.sleep(1)
            continue

        # Do something with the message from the queue
        paste = Paste.Paste(message)
        search_key(paste)

        # (Optional) Send that thing to the next queue
コード例 #37
0
    publisher.info("Find credentials")

    faup = Faup()

    regex_web = "((?:https?:\/\/)[\.-_0-9a-zA-Z]+\.[0-9a-zA-Z]+)"
    #regex_cred = "[a-zA-Z0-9._-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:[a-zA-Z0-9\_\-]+"
    regex_cred = "[a-zA-Z0-9\\._-]+@[a-zA-Z0-9\\.-]+\.[a-zA-Z]{2,6}[\\rn :\_\-]{1,10}[a-zA-Z0-9\_\-]+"
    regex_site_for_stats = "@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:"

    redis_cache_key = regex_helper.generate_redis_cache_key(module_name)

    while True:
        message = p.get_from_set()

        if message is None:
            publisher.debug("Script Credential is Idling 10s")
            time.sleep(10)
            continue

        item_id, count = message.split()

        item_content = Item.get_item_content(item_id)

        # Extract all credentials
        all_credentials = regex_helper.regex_findall(
            module_name,
            redis_cache_key,
            regex_cred,
            item_id,
            item_content,
            max_time=max_execution_time)
コード例 #38
0
ファイル: lib_refine.py プロジェクト: Rafiot/AIL-framework
def checking_A_record(r_serv, domains_set):
    score = 0
    num = len(domains_set)
    WalidA = set([])
    resolver = dns.resolver.Resolver()
    resolver.nameservers = ['149.13.33.69']
    resolver.timeout = 5
    resolver.lifetime = 2

    for Adomain in domains_set:
        try:
            # Already in Redis living.
            if r_serv.exists(Adomain):
                score += 1
                WalidA.add(Adomain)
            # Not already in Redis
            else:
                # If I'm Walid domain
                if resolver.query(Adomain, rdtype=dns.rdatatype.A):
                    # Gonna be added in redis.
                    r_serv.setex(Adomain, 1, timedelta(days=1))
                    score += 1
                    WalidA.add(Adomain)
                else:
                    pass

        except dns.resolver.NoNameservers:
            publisher.debug('NoNameserver, No non-broken nameservers are available to answer the query.')

        except dns.resolver.NoAnswer:
            publisher.debug('NoAnswer, The response did not contain an answer to the question.')

        except dns.name.EmptyLabel:
            publisher.debug('SyntaxError: EmptyLabel')

        except dns.resolver.NXDOMAIN:
            r_serv.setex(Adomain[1:], 1, timedelta(days=1))
            publisher.debug('The query name does not exist.')

        except dns.name.LabelTooLong:
            publisher.debug('The Label is too long')

        except Exception as e:
            print e

    publisher.debug("URLs before: {0} after: {1} (valid)".format(num, score))
    return (num, WalidA)
コード例 #39
0
ファイル: Categ.py プロジェクト: MaximeStor/AIL-framework
    categories = ['CreditCards', 'Mail', 'Onion', 'Web', 'Credential']
    tmp_dict = {}
    for filename in categories:
        bname = os.path.basename(filename)
        tmp_dict[bname] = []
        with open(os.path.join(args.d, filename), 'r') as f:
            patterns = [r'%s' % re.escape(s.strip()) for s in f]
            tmp_dict[bname] = re.compile('|'.join(patterns), re.IGNORECASE)

    prec_filename = None

    while True:
        filename = p.get_from_set()
        if filename is None:
            publisher.debug("Script Categ is Idling 10s")
            print 'Sleeping'
            time.sleep(10)
            continue

        paste = Paste.Paste(filename)
        content = paste.get_p_content()

        for categ, pattern in tmp_dict.items():
            found = set(re.findall(pattern, content))
            if len(found) > 0:
                msg = '{} {}'.format(paste.p_path, len(found))
                print msg, categ
                p.populate_set_out(msg, categ)

                publisher.info(
コード例 #40
0
        if not exists_in(indexpath):
            ix = create_in(indexpath, schema)
        else:
            ix = open_dir(indexpath)

    # LOGGING #
    publisher.info("ZMQ Indexer is Running")

    while True:
        try:
            message = p.get_from_set()

            if message is not None:
                PST = Paste.Paste(message)
            else:
                publisher.debug("Script Indexer is idling 1s")
                time.sleep(1)
                continue
            docpath = message.split(" ", -1)[-1]
            paste = PST.get_p_content()
            print "Indexing :", docpath
            if indexertype == "whoosh":
                indexwriter = ix.writer()
                indexwriter.update_document(title=unicode(docpath,
                                                          errors='ignore'),
                                            path=unicode(docpath,
                                                         errors='ignore'),
                                            content=unicode(paste,
                                                            errors='ignore'))
                indexwriter.commit()
        except IOError:
コード例 #41
0
    config_section = 'SQLInjectionDetection'

    # Setup the I/O queues
    p = Process(config_section)

    # Sent to the logging a description of the module
    publisher.info("Try to detect SQL injection")

    server_statistics = redis.StrictRedis(
        host=p.config.get("ARDB_Statistics", "host"),
        port=p.config.getint("ARDB_Statistics", "port"),
        db=p.config.getint("ARDB_Statistics", "db"),
        decode_responses=True)

    faup = Faup()

    # Endless loop getting messages from the input queue
    while True:
        # Get one message from the input queue
        message = p.get_from_set()

        if message is None:
            publisher.debug("{} queue is empty, waiting".format(config_section))
            time.sleep(10)
            continue

        else:
            # Do something with the message from the queue
            url, date, path = message.split()
            analyse(url, path)
コード例 #42
0
def main():
    """Main Function"""

    # CONFIG #
    cfg = ConfigParser.ConfigParser()
    cfg.read(configfile)

    # Redis
    r_serv1 = redis.StrictRedis(host=cfg.get("Redis_Queues", "host"),
                                port=cfg.getint("Redis_Queues", "port"),
                                db=cfg.getint("Redis_Queues", "db"))

    # LOGGING #
    publisher.channel = "Script"

    # ZMQ #
    # Subscriber
    channel = cfg.get("PubSub_Global", "channel")
    subscriber_name = "DomainClassifier"
    subscriber_config_section = "PubSub_Global"

    cc = cfg.get("PubSub_DomainClassifier", "cc")
    cc_tld = cfg.get("PubSub_DomainClassifier", "cc_tld")

    sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel,
                            subscriber_name)

    # FUNCTIONS #
    publisher.info("""ZMQ DomainClassifier is Running""")
    c = DomainClassifier.domainclassifier.Extract(rawtext="")

    while True:
        try:
            message = sub.get_msg_from_queue(r_serv1)

            if message is not None:
                PST = Paste.Paste(message.split(" ", -1)[-1])
            else:
                if r_serv1.sismember("SHUTDOWN_FLAGS", "Indexer"):
                    r_serv1.srem("SHUTDOWN_FLAGS", "Indexer")
                    publisher.warning("Shutdown Flag Up: Terminating.")
                    break
                publisher.debug("Script DomainClassifier is idling 10s")
                time.sleep(1)
                continue
            docpath = message.split(" ", -1)[-1]
            paste = PST.get_p_content()
            mimetype = PST._get_p_encoding()
            if mimetype == "text/plain":
                c.text(rawtext=paste)
                c.potentialdomain()
                c.validdomain(rtype=['A'], extended=True)
                localizeddomains = c.include(expression=cc_tld)
                if localizeddomains:
                    print(localizeddomains)
                    publisher.warning(
                        'DomainC;{};{};{};Checked {} located in {}'.format(
                            PST.p_source, PST.p_date, PST.p_name,
                            localizeddomains, cc_tld))
                localizeddomains = c.localizedomain(cc=cc)
                if localizeddomains:
                    print(localizeddomains)
                    publisher.warning(
                        'DomainC;{};{};{};Checked {} located in {}'.format(
                            PST.p_source, PST.p_date, PST.p_name,
                            localizeddomains, cc))
        except IOError:
            print "CRC Checksum Failed on :", PST.p_path
            publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format(
                PST.p_source, PST.p_date, PST.p_name))
            pass
コード例 #43
0
                        set_name = 'regex_' + dico_regexname_to_redis[regex_str]
                        new_to_the_set = server_term.sadd(set_name, filename)
                        new_to_the_set = True if new_to_the_set == 1 else False

                        #consider the num of occurence of this term
                        regex_value = int(
                            server_term.hincrby(
                                timestamp, dico_regexname_to_redis[regex_str],
                                int(1)))
                        #1 term per paste
                        if new_to_the_set:
                            regex_value_perPaste = int(
                                server_term.hincrby(
                                    "per_paste_" + str(timestamp),
                                    dico_regexname_to_redis[regex_str],
                                    int(1)))
                            server_term.zincrby(
                                "per_paste_" + curr_set,
                                dico_regexname_to_redis[regex_str], float(1))
                    server_term.zincrby(curr_set,
                                        dico_regexname_to_redis[regex_str],
                                        float(1))
                else:
                    pass

        else:
            publisher.debug("Script RegexForTermsFrequency is Idling")
            print "sleeping"
            time.sleep(5)
        message = p.get_from_set()
コード例 #44
0
    args = parser.parse_args()
    interval_first = args.firstdate
    interval_last = args.lastdate
    if interval_last is None:
        daemon = True
    else:
        daemon = False

    unavailable = []
    while 1:
        got_new_files = False
        if daemon or interval_last is None:
            interval_last = datetime.date.today().strftime("%Y-%m-%d")

        for fname, url in to_download():
            if not already_downloaded(fname) and url not in unavailable:
                publisher.debug("Trying to download: " + url)
                if downloadURL(url, fname):
                    got_new_files = True
                    publisher.info("Downloaded:" + fname)
                elif interval_last != datetime.date.today().strftime("%Y-%m-%d"):
                    # if today's file is not available, try again later.
                    unavailable.append(url)
        if not got_new_files:
            publisher.info('No new files to download.')
            if not daemon:
                publisher.info('Exiting...')
                break
            time.sleep(3600)
コード例 #45
0
    while True:
        if message is not None:
            generate_new_graph = True

            filename, word, score = message.split()
            temp = filename.split('/')
            date = temp[-4] + temp[-3] + temp[-2]

            low_word = word.lower()
            prev_score = r_serv1.hget(low_word, date)
            if prev_score is not None:
                r_serv1.hset(low_word, date, int(prev_score) + int(score))
            else:
                r_serv1.hset(low_word, date, score)

        else:
            if generate_new_graph:
                generate_new_graph = False
                print 'Building graph'
                today = datetime.date.today()
                year = today.year
                month = today.month
                lib_words.create_curve_with_word_file(r_serv1, csv_path,
                                                      wordfile_path, year,
                                                      month)

            publisher.debug("Script Curve is Idling")
            print "sleeping"
            time.sleep(10)
        message = p.get_from_set()
コード例 #46
0
    def get_ip_info(self, ip, days_limit=None):
        """
            Return informations related to an IP address.

            :param ip: The IP address
            :param days_limit: The number of days we want to check in the past
                (default: around 2 years)
            :rtype: Dictionary

                .. note:: Format of the output:

                    .. code-block:: python

                        {
                            'ip': ip,
                            'days_limit' : days_limit,
                            'ptrrecord' : 'ptr.record.com',
                            'history':
                                [
                                    {
                                        'asn': asn,
                                        'interval': [first, last],
                                        'block': block,
                                        'timestamp': timestamp,
                                        'descriptions':
                                            [
                                                [date, descr],
                                                ...
                                            ]
                                    },
                                    ...
                                ]
                        }
        """
        if days_limit is None:
            days_limit = 750
        to_return = {'ip': ip, 'days_limit': days_limit, 'history': []}
        if self.has_ptr:
            to_return['ptrrecord'] = self.get_ptr_record(ip)
        if not self.has_ipasn:
            publisher.debug('IPASN not enabled.')
            to_return['error'] = 'IPASN not enabled.'
            return to_return
        if not ip:
            to_return['error'] = 'No IP provided.'
            return to_return
        for first, last, asn, block in self.ipasn.aggregate_history(
                ip, days_limit):
            first_date = parser.parse(first).replace(tzinfo=tz.tzutc()).date()
            last_date = parser.parse(last).replace(tzinfo=tz.tzutc()).date()
            if self.has_asnhistory:
                desc_history = self.asnhistory.get_all_descriptions(asn)
                valid_descriptions = []
                for date, descr in desc_history:
                    date = date.astimezone(tz.tzutc()).date()
                    test_date = date - datetime.timedelta(days=1)
                    if last_date < test_date:
                        # Too new
                        continue
                    elif last_date >= test_date and first_date <= test_date:
                        # Changes within the interval
                        valid_descriptions.append([date.isoformat(), descr])
                    elif first_date > test_date:
                        # get the most recent change befrore the interval
                        valid_descriptions.append([date.isoformat(), descr])
                        break
            else:
                publisher.debug('ASN History not enabled.')
                valid_descriptions = [
                    datetime.date.today().isoformat(),
                    'ASN History not enabled.'
                ]
            if len(valid_descriptions) == 0:
                if len(desc_history) != 0:
                    # fallback, use the oldest description.
                    date = desc_history[-1][0].astimezone(tz.tzutc()).date()
                    descr = desc_history[-1][1]
                    valid_descriptions.append([date.isoformat(), descr])
                else:
                    # No history found for this ASN
                    if last_date > datetime.date(2013, 1, 1):
                        # ASN has been seen recently, should not happen
                        # as the asn history module is running since early 2013
                        publisher.error(
                            'Unable to find the ASN description of {}. IP address: {}. ASN History might be down.'
                            .format(asn, ip))
                    valid_descriptions.append(
                        ['0000-00-00', 'No ASN description has been found.'])
            entry = {}
            entry['asn'] = asn
            entry['interval'] = [first_date.isoformat(), last_date.isoformat()]
            entry['block'] = block
            entry['timestamp'] = self.get_first_seen(asn, block)
            entry['descriptions'] = valid_descriptions
            to_return['history'].append(entry)
        return to_return
コード例 #47
0
def checking_MX_record(r_serv, adress_set):
    """Check if emails MX domains are responding.

    :param r_serv: -- Redis connexion database
    :param adress_set: -- (set) This is a set of emails adress
    :return: (int) Number of adress with a responding and valid MX domains

    This function will split the email adress and try to resolve their domains
    names: on [email protected] it will try to resolve gmail.com

    """
    score = 0
    num = len(adress_set)
    WalidMX = set([])
    # Transforming the set into a string
    MXdomains = re.findall("@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,20}",
                           str(adress_set).lower())

    if MXdomains != []:

        for MXdomain in set(MXdomains):
            try:
                # Already in Redis living.
                if r_serv.exists(MXdomain[1:]):
                    score += 1
                    WalidMX.add(MXdomain[1:])
                # Not already in Redis
                else:
                    # If I'm Walid MX domain
                    if dns.resolver.query(MXdomain[1:],
                                          rdtype=dns.rdatatype.MX):
                        # Gonna be added in redis.
                        r_serv.setex(MXdomain[1:], timedelta(days=1), 1)
                        score += 1
                        WalidMX.add(MXdomain[1:])
                    else:
                        pass

            except dns.resolver.NoNameservers:
                publisher.debug(
                    'NoNameserver, No non-broken nameservers are available to answer the query.'
                )

            except dns.resolver.NoAnswer:
                publisher.debug(
                    'NoAnswer, The response did not contain an answer to the question.'
                )

            except dns.name.EmptyLabel:
                publisher.debug('SyntaxError: EmptyLabel')

            except dns.resolver.NXDOMAIN:
                publisher.debug('The query name does not exist.')

            except dns.name.LabelTooLong:
                publisher.debug('The Label is too long')

            finally:
                pass

    publisher.debug("emails before: {0} after: {1} (valid)".format(num, score))
    return (num, WalidMX)
コード例 #48
0
ファイル: Onion.py プロジェクト: stedeluxe/AIL-framework
                # Saving the list of extracted onion domains.
                PST.__setattr__(channel, domains_list)
                PST.save_attribute_redis(channel, domains_list)
                to_print = 'Onion;{};{};{};'.format(PST.p_source, PST.p_date,
                                                    PST.p_name)
                if len(domains_list) > 0:

                    publisher.warning('{}Detected {} .onion(s)'.format(
                        to_print, len(domains_list)))
                    now = datetime.datetime.now()
                    path = os.path.join(
                        'onions',
                        str(now.year).zfill(4),
                        str(now.month).zfill(2),
                        str(now.day).zfill(2),
                        str(int(time.mktime(now.utctimetuple()))))
                    to_print = 'Onion;{};{};{};'.format(
                        PST.p_source, PST.p_date, PST.p_name)
                    for url in fetch(p, r_cache, urls, domains_list, path):
                        publisher.warning('{}Checked {}'.format(to_print, url))
                else:
                    publisher.info('{}Onion related'.format(to_print))

            prec_filename = filename
        else:
            publisher.debug("Script url is Idling 10s")
            print 'Sleeping'
            time.sleep(10)
        message = p.get_from_set()
コード例 #49
0
ファイル: lib_refine.py プロジェクト: Mrnmap/ALLInfo
def checking_A_record(r_serv, domains_set):
    configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg')
    if not os.path.exists(configfile):
        raise Exception('Unable to find the configuration file. \
                        Did you set environment variables? \
                        Or activate the virtualenv.')
    cfg = configparser.ConfigParser()
    cfg.read(configfile)
    dns_server = cfg.get("Web", "dns")

    score = 0
    num = len(domains_set)
    WalidA = set([])
    resolver = dns.resolver.Resolver()
    resolver.nameservers = [dns_server]
    resolver.timeout = 5
    resolver.lifetime = 2

    for Adomain in domains_set:
        try:
            # Already in Redis living.
            if r_serv.exists(Adomain):
                score += 1
                WalidA.add(Adomain)
            # Not already in Redis
            else:
                # If I'm Walid domain
                if resolver.query(Adomain, rdtype=dns.rdatatype.A):
                    # Gonna be added in redis.
                    r_serv.setex(Adomain, 1, timedelta(days=1))
                    score += 1
                    WalidA.add(Adomain)
                else:
                    pass

        except dns.resolver.NoNameservers:
            publisher.debug(
                'NoNameserver, No non-broken nameservers are available to answer the query.'
            )

        except dns.resolver.NoAnswer:
            publisher.debug(
                'NoAnswer, The response did not contain an answer to the question.'
            )

        except dns.name.EmptyLabel:
            publisher.debug('SyntaxError: EmptyLabel')

        except dns.resolver.NXDOMAIN:
            r_serv.setex(Adomain[1:], 1, timedelta(days=1))
            publisher.debug('The query name does not exist.')

        except dns.name.LabelTooLong:
            publisher.debug('The Label is too long')

        except Exception as e:
            print(e)

    publisher.debug("URLs before: {0} after: {1} (valid)".format(num, score))
    return (num, WalidA)
コード例 #50
0
def checking_A_record(r_serv, domains_set):
    score = 0
    num = len(domains_set)
    WalidA = set([])

    for Adomain in domains_set:
        try:
            # Already in Redis living.
            if r_serv.exists(Adomain):
                score += 1
                WalidA.add(Adomain)
            # Not already in Redis
            else:
                # If I'm Walid domain
                if dns.resolver.query(Adomain, rdtype=dns.rdatatype.A):
                    # Gonna be added in redis.
                    r_serv.setex(Adomain, timedelta(days=1), 1)
                    score += 1
                    WalidA.add(Adomain)
                else:
                    pass

        except dns.resolver.NoNameservers:
            publisher.debug(
                'NoNameserver, No non-broken nameservers are available to answer the query.'
            )

        except dns.resolver.NoAnswer:
            publisher.debug(
                'NoAnswer, The response did not contain an answer to the question.'
            )

        except dns.name.EmptyLabel:
            publisher.debug('SyntaxError: EmptyLabel')

        except dns.resolver.NXDOMAIN:
            publisher.debug('The query name does not exist.')

        except dns.name.LabelTooLong:
            publisher.debug('The Label is too long')

        finally:
            pass

    publisher.debug("URLs before: {0} after: {1} (valid)".format(num, score))
    return (num, WalidA)
コード例 #51
0
ファイル: lib_refine.py プロジェクト: Mrnmap/ALLInfo
def checking_MX_record(r_serv, adress_set, addr_dns):
    """Check if emails MX domains are responding.

    :param r_serv: -- Redis connexion database
    :param adress_set: -- (set) This is a set of emails adress
    :param adress_set: -- (str) This is a server dns address
    :return: (int) Number of adress with a responding and valid MX domains

    This function will split the email adress and try to resolve their domains
    names: on [email protected] it will try to resolve gmail.com

    """

    #remove duplicate
    adress_set = list(set(adress_set))

    score = 0
    num = len(adress_set)
    WalidMX = set([])
    validMX = {}
    # Transforming the set into a string
    MXdomains = re.findall("@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,20}",
                           str(adress_set).lower())
    resolver = dns.resolver.Resolver()
    resolver.nameservers = [addr_dns]
    resolver.timeout = 5
    resolver.lifetime = 2
    if MXdomains != []:

        for MXdomain in MXdomains:
            try:
                MXdomain = MXdomain[1:]
                # Already in Redis living.
                if r_serv.exists(MXdomain):
                    score += 1
                    WalidMX.add(MXdomain)
                    validMX[MXdomain] = validMX.get(MXdomain, 0) + 1
                # Not already in Redis
                else:
                    # If I'm Walid MX domain
                    if resolver.query(MXdomain, rdtype=dns.rdatatype.MX):
                        # Gonna be added in redis.
                        r_serv.setex(MXdomain, 1, timedelta(days=1))
                        score += 1
                        WalidMX.add(MXdomain)
                        validMX[MXdomain] = validMX.get(MXdomain, 0) + 1
                    else:
                        pass

            except dns.resolver.NoNameservers:
                publisher.debug(
                    'NoNameserver, No non-broken nameservers are available to answer the query.'
                )
                print(
                    'NoNameserver, No non-broken nameservers are available to answer the query.'
                )

            except dns.resolver.NoAnswer:
                publisher.debug(
                    'NoAnswer, The response did not contain an answer to the question.'
                )
                print(
                    'NoAnswer, The response did not contain an answer to the question.'
                )

            except dns.name.EmptyLabel:
                publisher.debug('SyntaxError: EmptyLabel')
                print('SyntaxError: EmptyLabel')

            except dns.resolver.NXDOMAIN:
                r_serv.setex(MXdomain[1:], 1, timedelta(days=1))
                publisher.debug('The query name does not exist.')
                print('The query name does not exist.')

            except dns.name.LabelTooLong:
                publisher.debug('The Label is too long')
                print('The Label is too long')

            except dns.resolver.Timeout:
                print('timeout')
                r_serv.setex(MXdomain, 1, timedelta(days=1))

            except Exception as e:
                print(e)

    publisher.debug("emails before: {0} after: {1} (valid)".format(num, score))
    #return (num, WalidMX)
    return (num, validMX)
コード例 #52
0
ファイル: Categ.py プロジェクト: Mrnmap/ALLInfo
        'CreditCards', 'Mail', 'Onion', 'Web', 'Credential', 'Cve', 'ApiKey'
    ]
    tmp_dict = {}
    for filename in categories:
        bname = os.path.basename(filename)
        tmp_dict[bname] = []
        with open(os.path.join(args.d, filename), 'r') as f:
            patterns = [r'%s' % (re.escape(s.strip())) for s in f]
            tmp_dict[bname] = re.compile('|'.join(patterns), re.IGNORECASE)

    prec_filename = None

    while True:
        filename = p.get_from_set()
        if filename is None:
            publisher.debug("Script Categ is Idling 10s")
            print('Sleeping')
            time.sleep(10)
            continue

        paste = Paste.Paste(filename)
        content = paste.get_p_content()

        for categ, pattern in tmp_dict.items():
            found = set(re.findall(pattern, content))
            if len(found) >= matchingThreshold:
                msg = '{} {}'.format(paste.p_rel_path, len(found))

                print(msg, categ)
                p.populate_set_out(msg, categ)
コード例 #53
0
    # FUNCTIONS #
    publisher.info("""ZMQ Attribute is Running""")

    while True:
        try:
            message = h.redis_rpop()

            if message is not None:
                PST = Paste.Paste(message.split(" ", -1)[-1])
            else:
                if h.redis_queue_shutdown():
                    print "Shutdown Flag Up: Terminating"
                    publisher.warning("Shutdown Flag Up: Terminating.")
                    break
                publisher.debug("Script Attribute is idling 10s")
                time.sleep(10)
                continue

            # FIXME do it directly in the class
            PST.save_attribute_redis("p_encoding", PST._get_p_encoding())
            PST.save_attribute_redis("p_language", PST._get_p_language())
            # FIXME why not all saving everything there.
            PST.save_all_attributes_redis()
            # FIXME Not used.
            PST.store.sadd("Pastes_Objects", PST.p_path)
        except IOError:
            print "CRC Checksum Failed on :", PST.p_path
            publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format(
                PST.p_source, PST.p_date, PST.p_name))
コード例 #54
0
             old=timestamp, new=last_update)
         publisher.error(msg)
         continue
     else:
         msg = '===== Importing new file: {new} ====='.format(new=timestamp)
         publisher.info(msg)
         p = r.pipeline(transaction=False)
         p.set('last_update', timestamp)
         p.sadd('all_timestamps', timestamp)
         new_asns = 0
         updated_descrs = 0
         for asn, descr in data:
             all_descrs = r.hgetall(asn)
             if len(all_descrs) == 0:
                 p.hset(asn, timestamp, descr)
                 publisher.debug('New asn: {asn}'.format(asn=asn))
                 new_asns += 1
             else:
                 dates = sorted(all_descrs.keys())
                 last_descr = all_descrs[dates[-1]]
                 if descr != last_descr:
                     p.hset(asn, timestamp, descr)
                     msg = 'New description for {asn}. Was {old}, is {new}'.format(
                         asn=asn, old=last_descr, new=descr)
                     publisher.info(msg)
                     updated_descrs += 1
         p.execute()
         msg = '===== Import finished: {new}, new ASNs:{nb}, Updated:{up} ====='.format(
             new=timestamp, nb=new_asns, up=updated_descrs)
         publisher.info(msg)
 if args.not_new:
コード例 #55
0
ファイル: Onion.py プロジェクト: marcoramilli/AIL-framework
                    urls.append(url)

                # Saving the list of extracted onion domains.
                PST.__setattr__(channel, domains_list)
                PST.save_attribute_redis(channel, domains_list)
                to_print = 'Onion;{};{};{};'.format(PST.p_source, PST.p_date,
                                                    PST.p_name)
                if len(domains_list) > 0:

                    publisher.warning('{}Detected {} .onion(s)'.format(
                        to_print, len(domains_list)))
                    now = datetime.datetime.now()
                    path = os.path.join('onions', str(now.year).zfill(4),
                                        str(now.month).zfill(2),
                                        str(now.day).zfill(2),
                                        str(int(time.mktime(now.utctimetuple()))))
                    to_print = 'Onion;{};{};{};'.format(PST.p_source,
                                                        PST.p_date,
                                                        PST.p_name)
                    for url in fetch(p, r_cache, urls, domains_list, path):
                        publisher.warning('{}Checked {}'.format(to_print, url))
                else:
                    publisher.info('{}Onion related'.format(to_print))

            prec_filename = filename
        else:
            publisher.debug("Script url is Idling 10s")
            print 'Sleeping'
            time.sleep(10)
        message = p.get_from_set()
コード例 #56
0
ファイル: CreditCard.py プロジェクト: cs24/AIL-framework
        if message is not None:
            filename, score = message.split()
            paste = Paste.Paste(filename)
            content = paste.get_p_content()
            all_cards = re.findall(regex, content)
            if len(all_cards) > 0:
                print 'All matching', all_cards
                creditcard_set = set([])

                for card in all_cards:
                    clean_card = re.sub('[^0-9]', '', card)
                    if lib_refine.is_luhn_valid(clean_card):
                        print clean_card, 'is valid'
                        creditcard_set.add(clean_card)

                paste.__setattr__(channel, creditcard_set)
                paste.save_attribute_redis(channel, creditcard_set)

                pprint.pprint(creditcard_set)
                to_print = 'CreditCard;{};{};{};'.format(
                    paste.p_source, paste.p_date, paste.p_name)
                if (len(creditcard_set) > 0):
                    publisher.warning('{}Checked {} valid number(s)'.format(
                        to_print, len(creditcard_set)))
                else:
                    publisher.info('{}CreditCard related'.format(to_print))
        else:
            publisher.debug("Script creditcard is idling 1m")
            print 'Sleeping'
            time.sleep(10)
コード例 #57
0
                creditcard_set = set([])
                PST = Paste.Paste(filename)

                for x in PST.get_regex(creditcard_regex):
                    if lib_refine.is_luhn_valid(x):
                        creditcard_set.add(x)

                PST.__setattr__(channel, creditcard_set)
                PST.save_attribute_redis(channel, creditcard_set)

                pprint.pprint(creditcard_set)
                to_print = 'CreditCard;{};{};{};'.format(
                    PST.p_source, PST.p_date, PST.p_name)
                if (len(creditcard_set) > 0):
                    publisher.critical('{}Checked {} valid number(s)'.format(
                        to_print, len(creditcard_set)))
                else:
                    publisher.info('{}CreditCard related'.format(to_print))

            prec_filename = filename

        else:
            if h.redis_queue_shutdown():
                print "Shutdown Flag Up: Terminating"
                publisher.warning("Shutdown Flag Up: Terminating.")
                break
            publisher.debug("Script creditcard is idling 1m")
            time.sleep(60)

        message = h.redis_rpop()