Esempio n. 1
0
class Urls(AbstractModule):
    """
    Urls module for AIL framework
    """
    def __init__(self):
        """
        Init Urls
        """
        super(Urls, self).__init__()

        self.faup = Faup()
        self.redis_cache_key = regex_helper.generate_redis_cache_key(
            self.module_name)

        # Protocol file path
        protocolsfile_path = os.path.join(
            os.environ['AIL_HOME'],
            self.process.config.get("Directories", "protocolsfile"))
        # Get all uri from protocolsfile (Used for Curve)
        uri_scheme = ""
        with open(protocolsfile_path, 'r') as scheme_file:
            for scheme in scheme_file:
                uri_scheme += scheme[:-1] + "|"
        uri_scheme = uri_scheme[:-1]

        self.url_regex = "((?i:"+uri_scheme + \
            ")\://(?:[a-zA-Z0-9\.\-]+(?:\:[a-zA-Z0-9\.&%\$\-]+)*@)*(?:(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|(?:[a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.(?:[a-zA-Z]{2,15}))(?:\:[0-9]+)*(?:/?(?:[a-zA-Z0-9\.\,\?'\\+&%\$#\=~_\-]+))*)"

        # Send module state to logs
        self.redis_logger.info(f"Module {self.module_name} initialized")

    def compute(self, message):
        """
        Search for Web links from given message
        """
        # Extract item
        id, score = message.split()

        item = Item(id)
        item_content = item.get_content()

        l_urls = regex_helper.regex_findall(self.module_name,
                                            self.redis_cache_key,
                                            self.url_regex, item.get_id(),
                                            item_content)
        for url in l_urls:
            self.faup.decode(url)
            unpack_url = self.faup.get()

            to_send = f"{url} {item.get_id()}"
            print(to_send)
            self.send_message_to_queue(to_send, 'Url')
            self.redis_logger.debug(f"url_parsed: {to_send}")

        if len(l_urls) > 0:
            to_print = f'Urls;{item.get_source()};{item.get_date()};{item.get_basename()};'
            self.redis_logger.info(
                f'{to_print}Detected {len(l_urls)} URL;{item.get_id()}')
Esempio n. 2
0
def harvesting_google(query, numberofpage):
    listreturn = []
    result = Popen(['casperjs', 'CeleryWeb/casperjs/googlesearch.js', str(query), str(numberofpage)], stdout=PIPE)
    urls = result.stdout.readlines()
    for url in urls:
        f = Faup()
        url=url.replace('\n','')
        f.decode(url)
        listreturn.append(f.get())
    return listreturn
class SQLInjectionDetection(AbstractModule):
    """docstring for SQLInjectionDetection module."""

    # # TODO: IMPROVE ME
    # Reference: https://github.com/stamparm/maltrail/blob/master/core/settings.py
    SQLI_REGEX = r"information_schema|sysdatabases|sysusers|floor\(rand\(|ORDER BY \d+|\bUNION\s+(ALL\s+)?SELECT\b|\b(UPDATEXML|EXTRACTVALUE)\(|\bCASE[^\w]+WHEN.*THEN\b|\bWAITFOR[^\w]+DELAY\b|\bCONVERT\(|VARCHAR\(|\bCOUNT\(\*\)|\b(pg_)?sleep\(|\bSELECT\b.*\bFROM\b.*\b(WHERE|GROUP|ORDER)\b|\bSELECT \w+ FROM \w+|\b(AND|OR|SELECT)\b.*/\*.*\*/|/\*.*\*/.*\b(AND|OR|SELECT)\b|\b(AND|OR)[^\w]+\d+['\") ]?[=><]['\"( ]?\d+|ODBC;DRIVER|\bINTO\s+(OUT|DUMP)FILE"

    def __init__(self):
        super(SQLInjectionDetection, self).__init__()

        self.faup = Faup()

        config_loader = ConfigLoader()
        self.server_statistics = config_loader.get_redis_conn("ARDB_Statistics")

        self.redis_logger.info(f"Module: {self.module_name} Launched")

    def compute(self, message):
        url, id = message.split()

        if self.is_sql_injection(url):
            self.faup.decode(url)
            url_parsed = self.faup.get()

            item = Item(id)
            item_id = item.get_id()
            print(f"Detected SQL in URL: {item_id}")
            print(urllib.request.unquote(url))
            to_print = f'SQLInjection;{item.get_source()};{item.get_date()};{item.get_basename()};Detected SQL in URL;{item_id}'
            self.redis_logger.warning(to_print)

            # Send to duplicate
            self.send_message_to_queue(item_id, 'Duplicate')

            # Tag
            msg = f'infoleak:automatic-detection="sql-injection";{item_id}'
            self.send_message_to_queue(msg, 'Tags')

            # statistics
            tld = url_parsed['tld']
            if tld is not None:
                ## TODO: # FIXME: remove me
                try:
                    tld = tld.decode()
                except:
                    pass
                date = datetime.now().strftime("%Y%m")
                self.server_statistics.hincrby(f'SQLInjection_by_tld:{date}', tld, 1)

    # Try to detect if the url passed might be an sql injection by appliying the regex
    # defined above on it.
    def is_sql_injection(self, url_parsed):
        line = urllib.request.unquote(url_parsed)

        return re.search(SQLInjectionDetection.SQLI_REGEX, line, re.I) is not None
Esempio n. 4
0
            p.populate_set_out('credential;{}'.format(filepath),
                               'BrowseWarningPaste')

            #Put in form, count occurences, then send to moduleStats
            creds_sites = {}
            site_occurence = re.findall(regex_site_for_stats, content)
            for site in site_occurence:
                site_domain = site[1:-1]
                if site_domain in creds_sites.keys():
                    creds_sites[site_domain] += 1
                else:
                    creds_sites[site_domain] = 1

            for url in sites:
                faup.decode(url)
                domain = faup.get()['domain']
                if domain in creds_sites.keys():
                    creds_sites[domain] += 1
                else:
                    creds_sites[domain] = 1

            for site, num in creds_sites.iteritems(
            ):  # Send for each different site to moduleStats
                print 'credential;{};{};{}'.format(num, site, paste.p_date)
                p.populate_set_out(
                    'credential;{};{};{}'.format(num, site, paste.p_date),
                    'ModuleStats')

            if sites_set:
                print("=======> Probably on : {}".format(', '.join(sites_set)))
        else:
Esempio n. 5
0
            valid_mx = check_mx_record(set_mxdomains, dns_server)

            item_date = Item.get_item_date(item_id)

            num_valid_email = 0
            for domain_mx in valid_mx:
                num_valid_email += len(dict_mxdomains_email[domain_mx])

                for email in dict_mxdomains_email[domain_mx]:
                    msg = 'mail;{};{};{}'.format(1, email, item_date)
                    p.populate_set_out(msg, 'ModuleStats')

                    # Create country stats
                    faup.decode(email)
                    tld = faup.get()['tld']
                    try:
                        tld = tld.decode()
                    except:
                        pass
                    server_statistics.hincrby(
                        'mail_by_tld:{}'.format(item_date), tld, 1)

            msg = 'Mails;{};{};{};Checked {} e-mail(s);{}'.format(
                Item.get_source(item_id), item_date,
                Item.get_item_basename(item_id), num_valid_email, item_id)

            if num_valid_email > mail_threshold:
                print('{}    Checked {} e-mail(s)'.format(
                    item_id, num_valid_email))
                publisher.warning(msg)
Esempio n. 6
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import pprint
from pyfaup.faup import Faup

f = Faup()
f.decode("www.météo.fr")
pprint.pprint(f.get())

Esempio n. 7
0
                    now = datetime.datetime.now()
                    path = os.path.join('onions', str(now.year).zfill(4),
                                        str(now.month).zfill(2),
                                        str(now.day).zfill(2),
                                        str(int(time.mktime(now.utctimetuple()))))
                    to_print = 'Onion;{};{};{};'.format(PST.p_source,
                                                        PST.p_date,
                                                        PST.p_name)

                    if activate_crawler:
                        date_month = datetime.datetime.now().strftime("%Y%m")
                        date = datetime.datetime.now().strftime("%Y%m%d")
                        for url in urls:

                            faup.decode(url)
                            url_unpack = faup.get()
                            ## TODO: # FIXME: remove me
                            try:
                                domain = url_unpack['domain'].decode().lower()
                            except Exception as e:
                                 domain = url_unpack['domain'].lower()

                            ## TODO: blackilst by port ?
                            # check blacklist
                            if r_onion.sismember('blacklist_onion', domain):
                                continue

                            subdomain = re.findall(url_regex, url)
                            if len(subdomain) > 0:
                                subdomain = subdomain[0][4].lower()
                            else:
Esempio n. 8
0
                        publisher.warning(to_print)
                        #Send to duplicate
                        p.populate_set_out(filename, 'Duplicate')
                        p.populate_set_out('mail;{}'.format(filename), 'alertHandler')

                        msg = 'infoleak:automatic-detection="mail";{}'.format(filename)
                        p.populate_set_out(msg, 'Tags')

                        #create country statistics
                        date = datetime.datetime.now().strftime("%Y%m")
                        for mail in MX_values[1]:
                            print('mail;{};{};{}'.format(MX_values[1][mail], mail, PST.p_date))
                            p.populate_set_out('mail;{};{};{}'.format(MX_values[1][mail], mail, PST.p_date), 'ModuleStats')

                            faup.decode(mail)
                            tld = faup.get()['tld']
                            server_statistics.hincrby('mail_by_tld:'+date, tld, MX_values[1][mail])

                    else:
                        publisher.info(to_print)
                #create country statistics
                for mail in MX_values[1]:
                    print('mail;{};{};{}'.format(MX_values[1][mail], mail, PST.p_date))
                    p.populate_set_out('mail;{};{};{}'.format(MX_values[1][mail], mail, PST.p_date), 'ModuleStats')

            prec_filename = filename

        else:
            publisher.debug("Script Mails is Idling 10s")
            print('Sleeping')
            time.sleep(10)
Esempio n. 9
0
class WebStats(AbstractModule):
    """
    WebStats module for AIL framework
    """

    # Config Var
    THRESHOLD_TOTAL_SUM = 200  # Above this value, a keyword is eligible for a progression
    THRESHOLD_INCREASE = 1.0  # The percentage representing the keyword occurence since num_day_to_look
    MAX_SET_CARDINALITY = 10  # The cardinality of the progression set
    NUM_DAY_TO_LOOK = 5  # the detection of the progression start num_day_to_look in the past

    def __init__(self):
        super(WebStats, self).__init__()

        # Send module state to logs
        self.redis_logger.info("Module %s initialized" % (self.module_name))
        # Sent to the logging a description of the module
        self.redis_logger.info("Makes statistics about valid URL")

        self.pending_seconds = 5 * 60

        # REDIS #
        self.r_serv_trend = redis.StrictRedis(
            host=self.process.config.get("ARDB_Trending", "host"),
            port=self.process.config.get("ARDB_Trending", "port"),
            db=self.process.config.get("ARDB_Trending", "db"),
            decode_responses=True)

        # FILE CURVE SECTION #
        self.csv_path_proto = os.path.join(
            os.environ['AIL_HOME'],
            self.process.config.get("Directories", "protocolstrending_csv"))
        self.protocolsfile_path = os.path.join(
            os.environ['AIL_HOME'],
            self.process.config.get("Directories", "protocolsfile"))

        self.csv_path_tld = os.path.join(
            os.environ['AIL_HOME'],
            self.process.config.get("Directories", "tldstrending_csv"))
        self.tldsfile_path = os.path.join(
            os.environ['AIL_HOME'],
            self.process.config.get("Directories", "tldsfile"))

        self.csv_path_domain = os.path.join(
            os.environ['AIL_HOME'],
            self.process.config.get("Directories", "domainstrending_csv"))

        self.faup = Faup()
        self.generate_new_graph = False

    def computeNone(self):
        if self.generate_new_graph:
            self.generate_new_graph = False

            today = datetime.date.today()
            year = today.year
            month = today.month

            self.redis_logger.debug('Building protocol graph')
            lib_words.create_curve_with_word_file(self.r_serv_trend,
                                                  self.csv_path_proto,
                                                  self.protocolsfile_path,
                                                  year, month)

            self.redis_logger.debug('Building tld graph')
            lib_words.create_curve_with_word_file(self.r_serv_trend,
                                                  self.csv_path_tld,
                                                  self.tldsfile_path, year,
                                                  month)

            self.redis_logger.debug('Building domain graph')
            lib_words.create_curve_from_redis_set(self.r_serv_trend,
                                                  self.csv_path_domain,
                                                  "domain", year, month)
            self.redis_logger.debug('end building')

    def compute(self, message):
        self.generate_new_graph = True

        # Do something with the message from the queue
        url, date, path = message.split()
        self.faup.decode(url)
        url_parsed = self.faup.get()

        # Scheme analysis
        self.analyse('scheme', date, url_parsed)
        # Tld analysis
        self.analyse('tld', date, url_parsed)
        # Domain analysis
        self.analyse('domain', date, url_parsed)

        self.compute_progression('scheme', self.NUM_DAY_TO_LOOK, url_parsed)
        self.compute_progression('tld', self.NUM_DAY_TO_LOOK, url_parsed)
        self.compute_progression('domain', self.NUM_DAY_TO_LOOK, url_parsed)

    def analyse(self, field_name, date, url_parsed):
        field = url_parsed[field_name]

        if field is not None:
            try:  # faup version
                field = field.decode()
            except:
                pass

            self.r_serv_trend.hincrby(field, date, 1)

            if field_name == "domain":  #save domain in a set for the monthly plot
                domain_set_name = "domain_set_" + date[0:6]
                self.r_serv_trend.sadd(domain_set_name, field)
                self.redis_logger.debug("added in " + domain_set_name + ": " +
                                        field)

    def get_date_range(self, num_day):
        curr_date = datetime.date.today()
        date = Date(
            str(curr_date.year) + str(curr_date.month).zfill(2) +
            str(curr_date.day).zfill(2))
        date_list = []

        for i in range(0, num_day + 1):
            date_list.append(date.substract_day(i))
        return date_list

    def compute_progression_word(self, num_day, keyword):
        """
        Compute the progression for one keyword
        """
        date_range = self.get_date_range(num_day)
        # check if this keyword is eligible for progression
        keyword_total_sum = 0
        value_list = []
        for date in date_range:  # get value up to date_range
            curr_value = self.r_serv_trend.hget(keyword, date)
            value_list.append(int(curr_value if curr_value is not None else 0))
            keyword_total_sum += int(
                curr_value) if curr_value is not None else 0
        oldest_value = value_list[
            -1] if value_list[-1] != 0 else 1  #Avoid zero division

        # The progression is based on the ratio: value[i] / value[i-1]
        keyword_increase = 0
        value_list_reversed = value_list[:]
        value_list_reversed.reverse()
        for i in range(1, len(value_list_reversed)):
            divisor = value_list_reversed[
                i - 1] if value_list_reversed[i - 1] != 0 else 1
            keyword_increase += value_list_reversed[i] / divisor

        return (keyword_increase, keyword_total_sum)

    def compute_progression(self, field_name, num_day, url_parsed):
        """
            recompute the set top_progression zset
                - Compute the current field progression
                - re-compute the current progression for each first 2*self.MAX_SET_CARDINALITY fields in the top_progression_zset
        """
        redis_progression_name_set = "z_top_progression_" + field_name

        keyword = url_parsed[field_name]
        if keyword is not None:

            #compute the progression of the current word
            keyword_increase, keyword_total_sum = self.compute_progression_word(
                num_day, keyword)

            #re-compute the progression of 2*self.MAX_SET_CARDINALITY
            current_top = self.r_serv_trend.zrevrangebyscore(
                redis_progression_name_set,
                '+inf',
                '-inf',
                withscores=True,
                start=0,
                num=2 * self.MAX_SET_CARDINALITY)
            for word, value in current_top:
                word_inc, word_tot_sum = self.compute_progression_word(
                    num_day, word)
                self.r_serv_trend.zrem(redis_progression_name_set, word)
                if (word_tot_sum > self.THRESHOLD_TOTAL_SUM) and (
                        word_inc > self.THRESHOLD_INCREASE):
                    self.r_serv_trend.zadd(redis_progression_name_set,
                                           float(word_inc), word)

            # filter before adding
            if (keyword_total_sum > self.THRESHOLD_TOTAL_SUM) and (
                    keyword_increase > self.THRESHOLD_INCREASE):
                self.r_serv_trend.zadd(redis_progression_name_set,
                                       float(keyword_increase), keyword)
Esempio n. 10
0
class Credential(AbstractModule):
    """
    Credential module for AIL framework
    """

    # Split username with spec. char or with upper case, distinguish start with upper
    REGEX_CRED = "[a-z]+|[A-Z]{3,}|[A-Z]{1,2}[a-z]+|[0-9]+"
    REDIS_KEY_NUM_USERNAME = '******'
    REDIS_KEY_NUM_PATH = 'uniqNumForUsername'
    REDIS_KEY_ALL_CRED_SET = 'AllCredentials'
    REDIS_KEY_ALL_CRED_SET_REV = 'AllCredentialsRev'
    REDIS_KEY_ALL_PATH_SET = 'AllPath'
    REDIS_KEY_ALL_PATH_SET_REV = 'AllPathRev'
    REDIS_KEY_MAP_CRED_TO_PATH = 'CredToPathMapping'


    def __init__(self):
        super(Credential, self).__init__()

        self.faup = Faup()

        self.regex_web = "((?:https?:\/\/)[\.-_0-9a-zA-Z]+\.[0-9a-zA-Z]+)"
        self.regex_cred = "[a-zA-Z0-9\\._-]+@[a-zA-Z0-9\\.-]+\.[a-zA-Z]{2,6}[\\rn :\_\-]{1,10}[a-zA-Z0-9\_\-]+"
        self.regex_site_for_stats = "@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:"

        self.redis_cache_key = regex_helper.generate_redis_cache_key(self.module_name)

        # Database
        config_loader = ConfigLoader.ConfigLoader()
        self.server_cred = config_loader.get_redis_conn("ARDB_TermCred")
        self.server_statistics = config_loader.get_redis_conn("ARDB_Statistics")

        # Config values
        self.minimumLengthThreshold = config_loader.get_config_int("Credential", "minimumLengthThreshold")
        self.criticalNumberToAlert = config_loader.get_config_int("Credential", "criticalNumberToAlert")

        self.max_execution_time = 30

        # Waiting time in secondes between to message proccessed
        self.pending_seconds = 10

        # Send module state to logs
        self.redis_logger.info(f"Module {self.module_name} initialized")


    def compute(self, message):

        id, count = message.split()
        item = Item(id)

        item_content = item.get_content()

        # Extract all credentials
        all_credentials = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.regex_cred, item.get_id(), item_content, max_time=self.max_execution_time)

        if all_credentials:
            nb_cred = len(all_credentials)
            message = f'Checked {nb_cred} credentials found.'

            all_sites = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.regex_web, item.get_id(), item_content, max_time=self.max_execution_time)
            if all_sites:
                discovered_sites = ', '.join(all_sites)
                message += f' Related websites: {discovered_sites}'

            print(message)

            to_print = f'Credential;{item.get_source()};{item.get_date()};{item.get_basename()};{message};{item.get_id()}'

            #num of creds above tresh, publish an alert
            if nb_cred > self.criticalNumberToAlert:
                print(f"========> Found more than 10 credentials in this file : {item.get_id()}")
                self.redis_logger.warning(to_print)

                # Send to duplicate
                self.send_message_to_queue(item.get_id(), 'Duplicate')

                msg = f'infoleak:automatic-detection="credential";{item.get_id()}'
                self.send_message_to_queue(msg, 'Tags')

                site_occurence = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.regex_site_for_stats, item.get_id(), item_content, max_time=self.max_execution_time, r_set=False)

                creds_sites = {}

                for site in site_occurence:
                    site_domain = site[1:-1].lower()
                    if site_domain in creds_sites.keys():
                        creds_sites[site_domain] += 1
                    else:
                        creds_sites[site_domain] = 1

                for url in all_sites:
                    self.faup.decode(url)
                    domain = self.faup.get()['domain']
                    ## TODO: # FIXME: remove me, check faup versionb
                    try:
                        domain = domain.decode()
                    except:
                        pass
                    if domain in creds_sites.keys():
                        creds_sites[domain] += 1
                    else:
                        creds_sites[domain] = 1

                for site, num in creds_sites.items(): # Send for each different site to moduleStats

                    mssg = f'credential;{num};{site};{item.get_date()}'
                    print(mssg)
                    self.send_message_to_queue(mssg, 'ModuleStats')

                if all_sites:
                    discovered_sites = ', '.join(all_sites)
                    print(f"=======> Probably on : {discovered_sites}")

                date = datetime.now().strftime("%Y%m")
                for cred in all_credentials:
                    maildomains = re.findall("@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,20}", cred.lower())[0]
                    self.faup.decode(maildomains)
                    tld = self.faup.get()['tld']
                    ## TODO: # FIXME: remove me
                    try:
                        tld = tld.decode()
                    except:
                        pass
                    self.server_statistics.hincrby('credential_by_tld:'+date, tld, 1)
            else:
                self.redis_logger.info(to_print)
                print(f'found {nb_cred} credentials')

            # For searching credential in termFreq
            for cred in all_credentials:
                cred = cred.split('@')[0] #Split to ignore mail address

                # unique number attached to unique path
                uniq_num_path = self.server_cred.incr(Credential.REDIS_KEY_NUM_PATH)
                self.server_cred.hmset(Credential.REDIS_KEY_ALL_PATH_SET, {item.get_id(): uniq_num_path})
                self.server_cred.hmset(Credential.REDIS_KEY_ALL_PATH_SET_REV, {uniq_num_path: item.get_id()})

                # unique number attached to unique username
                uniq_num_cred = self.server_cred.hget(Credential.REDIS_KEY_ALL_CRED_SET, cred)
                if uniq_num_cred is None:
                    # cred do not exist, create new entries
                    uniq_num_cred = self.server_cred.incr(Credential.REDIS_KEY_NUM_USERNAME)
                    self.server_cred.hmset(Credential.REDIS_KEY_ALL_CRED_SET, {cred: uniq_num_cred})
                    self.server_cred.hmset(Credential.REDIS_KEY_ALL_CRED_SET_REV, {uniq_num_cred: cred})

                # Add the mapping between the credential and the path
                self.server_cred.sadd(Credential.REDIS_KEY_MAP_CRED_TO_PATH+'_'+str(uniq_num_cred), uniq_num_path)

                # Split credentials on capital letters, numbers, dots and so on
                # Add the split to redis, each split point towards its initial credential unique number
                splitedCred = re.findall(Credential.REGEX_CRED, cred)
                for partCred in splitedCred:
                    if len(partCred) > self.minimumLengthThreshold:
                        self.server_cred.sadd(partCred, uniq_num_cred)
Esempio n. 11
0
                    now = datetime.datetime.now()
                    path = os.path.join('onions', str(now.year).zfill(4),
                                        str(now.month).zfill(2),
                                        str(now.day).zfill(2),
                                        str(int(time.mktime(now.utctimetuple()))))
                    to_print = 'Onion;{};{};{};'.format(PST.p_source,
                                                        PST.p_date,
                                                        PST.p_name)

                    if activate_crawler:
                        date_month = datetime.datetime.now().strftime("%Y%m")
                        date = datetime.datetime.now().strftime("%Y%m%d")
                        for url in urls:

                            faup.decode(url)
                            url_unpack = faup.get()
                            ## TODO: # FIXME: remove me
                            try:
                                domain = url_unpack['domain'].decode().lower()
                            except Exception as e:
                                 domain = url_unpack['domain'].lower()

                            ## TODO: blackilst by port ?
                            # check blacklist
                            if r_onion.sismember('blacklist_onion', domain):
                                continue

                            subdomain = re.findall(url_regex, url)
                            if len(subdomain) > 0:
                                subdomain = subdomain[0][4].lower()
                            else:
Esempio n. 12
0
                print()
                print()
                print(
                    '\033[92m------------------START CRAWLER------------------\033[0m'
                )
                print('crawler type:     {}'.format(type_hidden_service))
                print(
                    '\033[92m-------------------------------------------------\033[0m'
                )
                print('url:         {}'.format(url))
                print('domain:      {}'.format(domain))
                print('domain_url:  {}'.format(domain_url))

                faup.decode(domain)
                onion_domain = faup.get()['domain'].decode()

                if not r_onion.sismember(
                        'blacklist_{}'.format(type_hidden_service),
                        domain) and not r_onion.sismember(
                            'blacklist_{}'.format(type_hidden_service),
                            onion_domain):

                    date = datetime.datetime.now().strftime("%Y%m%d")
                    date_month = datetime.datetime.now().strftime("%Y%m")

                    if not r_onion.sismember(
                            'month_{}_up:{}'.format(
                                type_hidden_service,
                                date_month), domain) and not r_onion.sismember(
                                    '{}_down:{}'.format(
Esempio n. 13
0
File: test.py Progetto: sim0nx/faup
#!/usr/bin/python

from pyfaup.faup import Faup

url = "http://www.wallinfire.net"

f = Faup()
print("We decode the url: %s" % (url))
f.decode(url)
data = f.get()
print("URL TLD: %s" % (data['tld']))

Esempio n. 14
0
            msg = 'infoleak:automatic-detection="credential";{}'.format(filepath)
            p.populate_set_out(msg, 'Tags')

            #Put in form, count occurences, then send to moduleStats
            creds_sites = {}
            site_occurence = re.findall(regex_site_for_stats, content)
            for site in site_occurence:
                site_domain = site[1:-1]
                if site_domain in creds_sites.keys():
                    creds_sites[site_domain] += 1
                else:
                    creds_sites[site_domain] = 1

            for url in sites:
                faup.decode(url)
                domain = faup.get()['domain']
                ## TODO: # FIXME: remove me
                try:
                    domain = domain.decode()
                except:
                    pass
                if domain in creds_sites.keys():
                    creds_sites[domain] += 1
                else:
                    creds_sites[domain] = 1

            for site, num in creds_sites.items(): # Send for each different site to moduleStats

                mssg = 'credential;{};{};{}'.format(num, site, paste.p_date)
                print(mssg)
                p.populate_set_out(mssg, 'ModuleStats')
Esempio n. 15
0
                                                      month)

                print 'Building domain graph'
                lib_words.create_curve_from_redis_set(r_serv_trend,
                                                      csv_path_domain,
                                                      "domain", year, month)
                print 'end building'

            publisher.debug(
                "{} queue is empty, waiting".format(config_section))
            print 'sleeping'
            time.sleep(5 * 60)
            continue

        else:
            generate_new_graph = True
            # Do something with the message from the queue
            url, date, path = message.split()
            faup.decode(url)
            url_parsed = faup.get()

            analyse(r_serv_trend, 'scheme', date, url_parsed)  #Scheme analysis
            analyse(r_serv_trend, 'tld', date, url_parsed)  #Tld analysis
            analyse(r_serv_trend, 'domain', date, url_parsed)  #Domain analysis
            compute_progression(r_serv_trend, 'scheme', num_day_to_look,
                                url_parsed)
            compute_progression(r_serv_trend, 'tld', num_day_to_look,
                                url_parsed)
            compute_progression(r_serv_trend, 'domain', num_day_to_look,
                                url_parsed)
Esempio n. 16
0
                print('Building domain graph')
                lib_words.create_curve_from_redis_set(r_serv_trend, csv_path_domain,
                                                      "domain", year,
                                                      month)
                print('end building')


            publisher.debug("{} queue is empty, waiting".format(config_section))
            print('sleeping')
            time.sleep(5*60)
            continue

        else:
            generate_new_graph = True
            # Do something with the message from the queue
            url, date, path = message.split()
            faup.decode(url)
            url_parsed = faup.get()

            # Scheme analysis
            analyse(r_serv_trend, 'scheme', date, url_parsed)
            # Tld analysis
            analyse(r_serv_trend, 'tld', date, url_parsed)
            # Domain analysis
            analyse(r_serv_trend, 'domain', date, url_parsed)

            compute_progression(r_serv_trend, 'scheme', num_day_to_look, url_parsed)
            compute_progression(r_serv_trend, 'tld', num_day_to_look, url_parsed)
            compute_progression(r_serv_trend, 'domain', num_day_to_look, url_parsed)
Esempio n. 17
0
    if urls_file is None:
        source_info = "arg:%s" % (sys.argv[1])
    else:
        source_info = "file:%s" % (sys.argv[1])

    urlw_log = UrlwLog(source_info)
    urlw_log.open()
    urlw_log.custom_log("Starting...")
    urlw_p = UrlwPlugins(urlw_log)

    fauplib = Faup()

    if source_info.startswith("arg:"):
        fauplib.decode(sys.argv[1])
        faup_object = fauplib.get()
        for plugin in urlw_p.plugins_list:
            urlw_p.run(plugin, sys.argv[1], faup_object)

    elif source_info.startswith("file:"):
        urls = urls_file.readlines()
        for url in urls:
            fauplib.decode(url)
            faup_object = fauplib.get()
            for plugin in urlw_p.plugins_list:
                urlw_p.run(plugin, url, faup_object)

        urls_file.close()

    urlw_log.custom_log("Done")
    urlw_log.close()
Esempio n. 18
0
class LibInjection(AbstractModule):
    """docstring for LibInjection module."""

    def __init__(self):
        super(LibInjection, self).__init__()

        self.faup = Faup()

        config_loader = ConfigLoader()
        self.server_statistics = config_loader.get_redis_conn("ARDB_Statistics")

        self.redis_logger.info(f"Module: {self.module_name} Launched")

    def compute(self, message):
        url, id = message.split()

        self.faup.decode(url)
        url_parsed = self.faup.get()
        ## TODO: # FIXME: remove me
        try:
            resource_path = url_parsed['resource_path'].encode()
        except:
            resource_path = url_parsed['resource_path']

        ## TODO: # FIXME: remove me
        try:
            query_string = url_parsed['query_string'].encode()
        except:
            query_string = url_parsed['query_string']

        result_path = {'sqli' : False}
        result_query = {'sqli' : False}

        if resource_path is not None:
            result_path = pylibinjection.detect_sqli(resource_path)
            #print(f'path is sqli : {result_path}')

        if query_string is not None:
            result_query = pylibinjection.detect_sqli(query_string)
            #print(f'query is sqli : {result_query}')

        if result_path['sqli'] is True or result_query['sqli'] is True:
            item = Item(id)
            item_id = item.get_id()
            print(f"Detected (libinjection) SQL in URL: {item_id}")
            print(urllib.request.unquote(url))

            to_print = f'LibInjection;{item.get_source()};{item.get_date()};{item.get_basename()};Detected SQL in URL;{item_id}'
            self.redis_logger.warning(to_print)

            # Send to duplicate
            self.send_message_to_queue(item_id, 'Duplicate')

            # Add tag
            msg = f'infoleak:automatic-detection="sql-injection";{item_id}'
            self.send_message_to_queue(msg, 'Tags')

            #statistics
            ## TODO: # FIXME: remove me
            try:
                tld = url_parsed['tld'].decode()
            except:
                tld = url_parsed['tld']
            if tld is not None:
                date = datetime.now().strftime("%Y%m")
                self.server_statistics.hincrby(f'SQLInjection_by_tld:{date}', tld, 1)
Esempio n. 19
0
            p.populate_set_out(filepath, 'Duplicate')
            #Send to BrowseWarningPaste
            p.populate_set_out('credential;{}'.format(filepath), 'BrowseWarningPaste')
            
            #Put in form, count occurences, then send to moduleStats
            creds_sites = {}
            site_occurence = re.findall(regex_site_for_stats, content)
            for site in site_occurence:
                site_domain = site[1:-1]
                if site_domain in creds_sites.keys():
                    creds_sites[site_domain] += 1
                else:
                    creds_sites[site_domain] = 1

            for url in sites:
                faup.decode(url)
                domain = faup.get()['domain']
                if domain in creds_sites.keys():
                    creds_sites[domain] += 1
                else:
                    creds_sites[domain] = 1

            for site, num in creds_sites.iteritems(): # Send for each different site to moduleStats
                print 'credential;{};{};{}'.format(num, site, paste.p_date)
                p.populate_set_out('credential;{};{};{}'.format(num, site, paste.p_date), 'ModuleStats')

            if sites_set:
                print("=======> Probably on : {}".format(', '.join(sites_set)))
        else:
            publisher.info(to_print)
Esempio n. 20
0
                item_content,
                max_time=max_execution_time,
                r_set=False)

            creds_sites = {}

            for site in site_occurence:
                site_domain = site[1:-1].lower()
                if site_domain in creds_sites.keys():
                    creds_sites[site_domain] += 1
                else:
                    creds_sites[site_domain] = 1

            for url in all_sites:
                faup.decode(url)
                domain = faup.get()['domain']
                ## TODO: # FIXME: remove me
                try:
                    domain = domain.decode()
                except:
                    pass
                if domain in creds_sites.keys():
                    creds_sites[domain] += 1
                else:
                    creds_sites[domain] = 1

            for site, num in creds_sites.items(
            ):  # Send for each different site to moduleStats

                mssg = 'credential;{};{};{}'.format(
                    num, site, Item.get_item_date(item_id))
Esempio n. 21
0
class UrlsExtractor(object):
    def __init__(self):
        self._url_regex = re.compile(
            r'((?:(?:ht|f)tp(?:s?)\:\/\/)'
            r'(?:[!#$&-;=?-\[\]_a-z~]|%[0-9a-f]{2})+)', re.I)
        self._faup = Faup()

    def extract(self, text):
        """This function extract all url http(s) and ftp(s) from text.
        Return a dict, with a key for every second-level domain and
        value a list of disassembled urls (output Faup tool).

        Example disassembled url https://drive.google.com/drive/my-drive:

            {
                'domain': 'google.com',
                'domain_without_tld': 'google',
                'fragment': None,
                'host': 'drive.google.com',
                'port': None,
                'query_string': None,
                'resource_path': '/drive/my-drive',
                'scheme': 'https',
                'subdomain': 'drive',
                'tld': 'com',
                'url': 'https://drive.google.com/drive/my-drive'
            }

        """

        if not isinstance(text, unicode):
            raise NotUnicodeError("The given text is not in unicode")

        self._results = dict()

        for i in self._url_regex.finditer(text):

            try:
                """
                import urlnorm
                url = urlnorm.norm(i.group(0).strip())

                Can't use urlnorm because can't manage domain like
                http://contentsr,xn--90afavbplfx2a6a5b2a,xn--p1ai/

                After norm it's impossible tokenize this kind of urls
                """

                url = i.group(0).strip()
            except:
                raise FailedRegexUrl("Failed parsing regex urls")

            try:
                self._faup.decode(url)
                tokens = self._faup.get()

                # Get results for domain
                domain = self._results.get(tokens['domain'], None)

                if domain:
                    domain.append(tokens)
                else:
                    self._results[tokens['domain']] = [tokens]

            except:
                raise FailedFaupParsing("Failed tokenize url with Faup")

    @property
    def urls_obj(self):
        return self._results

    @property
    def urls_json(self):
        try:
            return json.dumps(self.urls_obj, ensure_ascii=False)
        except:
            raise FailedReturnJsonUrls("Failed make JSON from urls result")