예제 #1
0
파일: crawler.py 프로젝트: mdeous/OSINT
    def run(self):
        i = 0
        while (True):
            i = i + 1
            if i % 1000 == 0:
                time.sleep(10)
            url = self.r.rpop('crawl')
            fex = Faup()
            if url:
                print "url found: " + url
                fex.decode(url)
                domain = fex.get_host()
                entry = self.db.new_domaines.find_one({'domaine': domain})
                if entry == None:
                    print "record: " + domain
                    self.db.new_domaines.save({
                        'domaine': domain,
                        'urls': [url]
                    })

                urls_stored = entry['urls']
                if not url in urls_stored:
                    urls_stored.append(url)
                    entry['urls'] = urls_stored
                    self.db.new_domaines.save(entry)
예제 #2
0
    def run(self):
        i = 0
        while (True):
            i = i + 1
            if i % 1000 == 0:
                time.sleep(10)
            self.lock.acquire()
            self.r.switchDB(1)
            url = self.r.rpop('crawl')
            self.lock.release()
            # print url
            fex = Faup()
            if url:
                print "url found: " + url
                try:
                    fex.decode(url)
                    domain = fex.get_host()
                    entry = self.db.new_domaines.find_one({'domaine': domain})
                    if entry == None:
                        print "record: " + domain
                        self.db.new_domaines.save({'domaine': domain, 'urls': [url]})

                    urls_stored = entry['urls']
                    if not url in urls_stored:
                        urls_stored.append(url)
                        entry['urls'] = urls_stored
                        self.db.new_domaines.save(entry)
                except:
                    print "parsing fault " + url
예제 #3
0
def get_urls(url, depth=1):
    if depth > 5:
        print('Too many redirects.')
        return
    fex = Faup()

    def meta_redirect(content):
        c = content.lower()
        soup = BeautifulSoup(c, "html.parser")
        for result in soup.find_all(attrs={'http-equiv': 'refresh'}):
            if result:
                out = result["content"].split(";")
                if len(out) == 2:
                    wait, text = out
                    a, url = text.split('=', 1)
                    return url.strip()
        return None

    resolve, reason = try_resolve(fex, url)
    if not resolve:
        # FIXME: inform that the domain does not resolve
        yield url
        return

    logging.debug("Making HTTP connection to " + url)

    headers = {'User-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:8.0) Gecko/20100101 Firefox/8.0'}
    try:
        response = requests.get(url, allow_redirects=True, headers=headers,
                                timeout=15, verify=False)
    except:
        # That one can fail (DNS for example)
        # FIXME: inform that the get failed
        yield url
        return
    if response.history is not None:
        for h in response.history:
            # Yeld the urls in the order we find them
            yield h.url

    yield response.url

    meta_redir_url = meta_redirect(response.content)
    if meta_redir_url is not None:
        depth += 1
        if not meta_redir_url.startswith('http'):
            fex.decode(url)
            base = '{}://{}'.format(fex.get_scheme(), fex.get_host())
            port = fex.get_port()
            if port is not None:
                base += ':{}'.format(port)
            if not meta_redir_url.startswith('/'):
                # relative redirect. resource_path has the initial '/'
                if fex.get_resource_path() is not None:
                    base += fex.get_resource_path()
            if not base.endswith('/'):
                base += '/'
            meta_redir_url = base + meta_redir_url
        for url in get_urls(meta_redir_url, depth):
            yield url
예제 #4
0
def whois(server, port, domain, ignorelist, replacelist):
    cached = _cache_get(domain, 'whois')
    if cached is not None:
        return cached
    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    s.settimeout(15)
    try:
        s.connect((server, port))
    except Exception:
        print("Connection problems - check WHOIS server")
        print(("WHOIS request while problem occurred: ", domain))
        print(("WHOIS server: {}:{}".format(server, port)))
        sys.exit(0)
    if domain.startswith('http'):
        fex = Faup()
        fex.decode(domain)
        d = fex.get_domain().lower()
    else:
        d = domain
    s.send(d + "\r\n")
    response = ''
    while True:
        d = s.recv(4096)
        response += d
        if d == '':
            break
    s.close()
    match = re.findall(r'[\w\.-]+@[\w\.-]+', response)
    emails = process_emails(match, ignorelist, replacelist)
    if len(emails) == 0:
        return None
    list_mail = list(set(emails))
    _cache_set(domain, list_mail, 'whois')
    return list_mail
예제 #5
0
 def process(self):
     list_domains=self.db['new_domaines'].distinct('domaine')
     fex=Faup()
     for domain in list_domains:
         url='http://'+str(domain)
         fex.decode(url, False)
         print (fex.get_tld()+','+fex.get_domain()+','+','.join(fex.get_subdomain().split('.')[::-1]).replace('www','')).replace(',,',',')
예제 #6
0
def get_urls(url, depth=1):
    if depth > 5:
        print('Too many redirects.')
        return
    fex = Faup()

    def meta_redirect(content):
        c = content.lower()
        soup = BeautifulSoup(c)
        for result in soup.find_all(attrs={'http-equiv': 'refresh'}):
            if result:
                out = result["content"].split(";")
                if len(out) == 2:
                    wait, text = out
                    a, url = text.split('=', 1)
                    return url.strip()
        return None

    resolve, reason = try_resolve(fex, url)
    if not resolve:
        # FIXME: inform that the domain does not resolve
        yield url
        return

    logging.debug("Making HTTP connection to " + url)

    headers = {'User-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:8.0) Gecko/20100101 Firefox/8.0'}
    try:
        response = requests.get(url, allow_redirects=True, headers=headers,
                                timeout=15, verify=False)
    except:
        # That one can fail (DNS for example)
        # FIXME: inform that the get failed
        yield url
        return
    if response.history is not None:
        for h in response.history:
            # Yeld the urls in the order we find them
            yield h.url

    yield response.url

    meta_redir_url = meta_redirect(response.content)
    if meta_redir_url is not None:
        depth += 1
        if not meta_redir_url.startswith('http'):
            fex.decode(url)
            base = '{}://{}'.format(fex.get_scheme(), fex.get_host())
            port = fex.get_port()
            if port is not None:
                base += ':{}'.format(port)
            if not meta_redir_url.startswith('/'):
                # relative redirect. resource_path has the initial '/'
                if fex.get_resource_path() is not None:
                    base += fex.get_resource_path()
            if not base.endswith('/'):
                base += '/'
            meta_redir_url = base + meta_redir_url
        for url in get_urls(meta_redir_url, depth):
            yield url
예제 #7
0
def dns_resolve(url):
    cached = _cache_get(url, 'dns')
    if cached is not None:
        return cached
    fex = Faup()
    fex.decode(url)
    host = fex.get_host().lower()
    ipv4 = None
    ipv6 = None
    if is_ip(host):
        if ':' in host:
            try:
                socket.inet_pton(socket.AF_INET6, host)
                ipv6 = [host]
            except:
                pass
        else:
            try:
                socket.inet_aton(host)
                ipv4 = [host]
            except:
                pass
    else:
        try:
            ipv4 = [str(ip) for ip in dns.resolver.query(host, 'A')]
        except:
            logging.debug("No IPv4 address assigned to: " + host)
        try:
            ipv6 = [str(ip) for ip in dns.resolver.query(host, 'AAAA')]
        except:
            logging.debug("No IPv6 address assigned to: " + host)
    _cache_set(url, (ipv4, ipv6), 'dns')
    return ipv4, ipv6
예제 #8
0
파일: crawler.py 프로젝트: 5l1v3r1/OSINT-1
    def run(self):
        i = 0
        while (True):
            i = i + 1
            if i % 1000 == 0:
                time.sleep(10)
            self.lock.acquire()
            self.r.switchDB(1)
            url = self.r.rpop('crawl')
            self.lock.release()
            # print url
            fex = Faup()
            if url:
                print "url found: " + url
                try:
                    fex.decode(url)
                    domain = fex.get_host()
                    entry = self.db.new_domaines.find_one({'domaine': domain})
                    if entry == None:
                        print "record: " + domain
                        self.db.new_domaines.save({
                            'domaine': domain,
                            'urls': [url]
                        })

                    urls_stored = entry['urls']
                    if not url in urls_stored:
                        urls_stored.append(url)
                        entry['urls'] = urls_stored
                        self.db.new_domaines.save(entry)
                except:
                    print "parsing fault " + url
예제 #9
0
def whois(server, port, domain, ignorelist, replacelist):
    cached = _cache_get(domain, 'whois')
    if cached is not None:
        return cached
    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    s.settimeout(15)
    try:
        s.connect((server, port))
    except Exception:
        print("Connection problems - check WHOIS server")
        print(("WHOIS request while problem occurred: ", domain))
        print(("WHOIS server: {}:{}".format(server, port)))
        sys.exit(0)
    if domain.startswith('http'):
        fex = Faup()
        fex.decode(domain)
        d = fex.get_domain().lower()
    else:
        d = domain
    s.send(d + "\r\n")
    response = ''
    while True:
        d = s.recv(4096)
        response += d
        if d == '':
            break
    s.close()
    match = re.findall(r'[\w\.-]+@[\w\.-]+', response)
    emails = process_emails(match, ignorelist, replacelist)
    if len(emails) == 0:
        return None
    list_mail = list(set(emails))
    _cache_set(domain, list_mail, 'whois')
    return list_mail
예제 #10
0
def getmisp_urls(key, url, timeframe):
    response_domains = []
    headers = {
        'Authorization': '{}'.format(key),
        'Content-type': 'application/json',
        'Accept': 'application/json'
    }
    payload = '{ "returnFormat": "json", "type": "url", "last": "%s", "enforceWarninglist": true }' % timeframe
    response = requests.post(url, headers=headers, data=payload, verify=False)
    json_response = json.loads(response.text)
    fp = Faup()
    try:
        for attr in json_response['response']['Attribute']:
            url = attr['value']
            eventid = attr['event_id']
            if eventid not in ignore_eventid:
                category = attr['category']
                timestamp = datetime.datetime.utcfromtimestamp(
                    int(attr['timestamp'])).strftime('%Y-%m-%d')
                fp.decode(url)
                domain = fp.get_domain()
                if re.match(r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}", domain):
                    response_domains.append({
                        'domain': domain,
                        'eventid': eventid,
                        'category': category,
                        'timestamp': timestamp
                    })

        return response_domains
    except:
        return response_domains
예제 #11
0
 def __post_init__(self):
     if self.domain is None:
         f = Faup(
         )  # Example code at https://programtalk.com/python-examples-amp/pyfaup.faup.Faup/
         f.decode(self.address.split("@")[-1])
         self.top_level_domain = f.get_tld()
         self.domain = f.get_domain()
         self.subdomain = f.get_subdomain()
예제 #12
0
class Urls(AbstractModule):
    """
    Urls module for AIL framework
    """
    def __init__(self):
        """
        Init Urls
        """
        super(Urls, self).__init__()

        self.faup = Faup()
        self.redis_cache_key = regex_helper.generate_redis_cache_key(
            self.module_name)

        # Protocol file path
        protocolsfile_path = os.path.join(
            os.environ['AIL_HOME'],
            self.process.config.get("Directories", "protocolsfile"))
        # Get all uri from protocolsfile (Used for Curve)
        uri_scheme = ""
        with open(protocolsfile_path, 'r') as scheme_file:
            for scheme in scheme_file:
                uri_scheme += scheme[:-1] + "|"
        uri_scheme = uri_scheme[:-1]

        self.url_regex = "((?i:"+uri_scheme + \
            ")\://(?:[a-zA-Z0-9\.\-]+(?:\:[a-zA-Z0-9\.&%\$\-]+)*@)*(?:(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|(?:[a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.(?:[a-zA-Z]{2,15}))(?:\:[0-9]+)*(?:/?(?:[a-zA-Z0-9\.\,\?'\\+&%\$#\=~_\-]+))*)"

        # Send module state to logs
        self.redis_logger.info(f"Module {self.module_name} initialized")

    def compute(self, message):
        """
        Search for Web links from given message
        """
        # Extract item
        id, score = message.split()

        item = Item(id)
        item_content = item.get_content()

        l_urls = regex_helper.regex_findall(self.module_name,
                                            self.redis_cache_key,
                                            self.url_regex, item.get_id(),
                                            item_content)
        for url in l_urls:
            self.faup.decode(url)
            unpack_url = self.faup.get()

            to_send = f"{url} {item.get_id()}"
            print(to_send)
            self.send_message_to_queue(to_send, 'Url')
            self.redis_logger.debug(f"url_parsed: {to_send}")

        if len(l_urls) > 0:
            to_print = f'Urls;{item.get_source()};{item.get_date()};{item.get_basename()};'
            self.redis_logger.info(
                f'{to_print}Detected {len(l_urls)} URL;{item.get_id()}')
예제 #13
0
 def process(self):
     list_domains = self.db['new_domaines'].distinct('domaine')
     fex = Faup()
     for domain in list_domains:
         url = 'http://' + str(domain)
         fex.decode(url, False)
         print(fex.get_tld() + ',' + fex.get_domain() + ',' +
               ','.join(fex.get_subdomain().split('.')[::-1]).replace(
                   'www', '')).replace(',,', ',')
예제 #14
0
    def __post_init__(self):
        f = Faup(
        )  # Example code at https://programtalk.com/python-examples-amp/pyfaup.faup.Faup/
        f.decode(self.url)

        self.scheme = f.get_scheme()
        self.top_level_domain = f.get_tld()
        self.domain = f.get_domain()
        self.subdomain = f.get_subdomain()
        self.path = f.get_resource_path()
예제 #15
0
def harvesting_google(query, numberofpage):
    listreturn = []
    result = Popen(['casperjs', 'CeleryWeb/casperjs/googlesearch.js', str(query), str(numberofpage)], stdout=PIPE)
    urls = result.stdout.readlines()
    for url in urls:
        f = Faup()
        url=url.replace('\n','')
        f.decode(url)
        listreturn.append(f.get())
    return listreturn
class SQLInjectionDetection(AbstractModule):
    """docstring for SQLInjectionDetection module."""

    # # TODO: IMPROVE ME
    # Reference: https://github.com/stamparm/maltrail/blob/master/core/settings.py
    SQLI_REGEX = r"information_schema|sysdatabases|sysusers|floor\(rand\(|ORDER BY \d+|\bUNION\s+(ALL\s+)?SELECT\b|\b(UPDATEXML|EXTRACTVALUE)\(|\bCASE[^\w]+WHEN.*THEN\b|\bWAITFOR[^\w]+DELAY\b|\bCONVERT\(|VARCHAR\(|\bCOUNT\(\*\)|\b(pg_)?sleep\(|\bSELECT\b.*\bFROM\b.*\b(WHERE|GROUP|ORDER)\b|\bSELECT \w+ FROM \w+|\b(AND|OR|SELECT)\b.*/\*.*\*/|/\*.*\*/.*\b(AND|OR|SELECT)\b|\b(AND|OR)[^\w]+\d+['\") ]?[=><]['\"( ]?\d+|ODBC;DRIVER|\bINTO\s+(OUT|DUMP)FILE"

    def __init__(self):
        super(SQLInjectionDetection, self).__init__()

        self.faup = Faup()

        config_loader = ConfigLoader()
        self.server_statistics = config_loader.get_redis_conn("ARDB_Statistics")

        self.redis_logger.info(f"Module: {self.module_name} Launched")

    def compute(self, message):
        url, id = message.split()

        if self.is_sql_injection(url):
            self.faup.decode(url)
            url_parsed = self.faup.get()

            item = Item(id)
            item_id = item.get_id()
            print(f"Detected SQL in URL: {item_id}")
            print(urllib.request.unquote(url))
            to_print = f'SQLInjection;{item.get_source()};{item.get_date()};{item.get_basename()};Detected SQL in URL;{item_id}'
            self.redis_logger.warning(to_print)

            # Send to duplicate
            self.send_message_to_queue(item_id, 'Duplicate')

            # Tag
            msg = f'infoleak:automatic-detection="sql-injection";{item_id}'
            self.send_message_to_queue(msg, 'Tags')

            # statistics
            tld = url_parsed['tld']
            if tld is not None:
                ## TODO: # FIXME: remove me
                try:
                    tld = tld.decode()
                except:
                    pass
                date = datetime.now().strftime("%Y%m")
                self.server_statistics.hincrby(f'SQLInjection_by_tld:{date}', tld, 1)

    # Try to detect if the url passed might be an sql injection by appliying the regex
    # defined above on it.
    def is_sql_injection(self, url_parsed):
        line = urllib.request.unquote(url_parsed)

        return re.search(SQLInjectionDetection.SQLI_REGEX, line, re.I) is not None
예제 #17
0
파일: crawler.py 프로젝트: pagedegeek/OSINT
 def run(self):
     i = 0
     while True:
         i = i + 1
         if i % 1000 == 0:
             time.sleep(10)
         url = self.r.rpop("crawl")
         fex = Faup()
         if url:
             fex.decode(url)
             domain = fex.get_host()
             entry = self.db.new_domaines.find_one({"domaine": domain})
             if entry == None:
                 print "record: " + domain
                 self.db.new_domaines.save({"domaine": domain, "urls": [url]})
예제 #18
0
def is_valid_url(url):
    cached = _cache_get(url, 'valid')
    key = date.today().isoformat() + '_submissions'
    r_cache.zincrby(key, url)
    if cached is not None:
        return cached
    fex = Faup()
    if url.startswith('hxxp'):
        url = 'http' + url[4:]
    elif not url.startswith('http'):
        url = 'http://' + url
    logging.debug("Checking validity of URL: " + url)
    fex.decode(url)
    scheme = fex.get_scheme()
    host = fex.get_host()
    if scheme is None or host is None:
        reason = "Not a valid http/https URL/URI"
        return False, url, reason
    _cache_set(url, (True, url, None), 'valid')
    return True, url, None
예제 #19
0
def is_valid_url(url):
    cached = _cache_get(url, 'valid')
    key = date.today().isoformat() + '_submissions'
    r_cache.zincrby(key, url)
    if cached is not None:
        return cached
    fex = Faup()
    if url.startswith('hxxp'):
        url = 'http' + url[4:]
    elif not url.startswith('http'):
        url = 'http://' + url
    logging.debug("Checking validity of URL: " + url)
    fex.decode(url)
    scheme = fex.get_scheme()
    host = fex.get_host()
    if scheme is None or host is None:
        reason = "Not a valid http/https URL/URI"
        return False, url, reason
    _cache_set(url, (True, url, None), 'valid')
    return True, url, None
예제 #20
0
def dns_resolve(url):
    cached = _cache_get(url, 'dns')
    if cached is not None:
        return cached
    fex = Faup()
    fex.decode(url)
    host = fex.get_host().lower()
    ipv4 = None
    ipv6 = None
    if not is_ip(host):
        try:
            ipv4 = [str(ip) for ip in dns.resolver.query(host, 'A')]
        except:
            logging.debug("No IPv4 address assigned to: " + host)
        try:
            ipv6 = [str(ip) for ip in dns.resolver.query(host, 'AAAA')]
        except:
            logging.debug("No IPv6 address assigned to: " + host)
    _cache_set(url, (ipv4, ipv6), 'dns')
    return ipv4, ipv6
예제 #21
0
 def run(self):
     i=0
     while(True):
         i=i+1
         if i % 1000==0:
             time.sleep(10)
         url=self.r.rpop('crawl')
         fex=Faup()
         if url:
             print "url found: "+url
             fex.decode(url)
             domain=fex.get_host()
             entry = self.db.new_domaines.find_one({'domaine':domain})
             if entry== None:
                 print "record: "+ domain
                 self.db.new_domaines.save({'domaine':domain,'urls':[url]})
                           
             urls_stored = entry['urls']
             if not url in urls_stored:
                 urls_stored.append(url)
                 entry['urls']=urls_stored
                 self.db.new_domaines.save(entry)
예제 #22
0
파일: crawler.py 프로젝트: 5l1v3r1/OSINT-1
    def sort(self, elem_links, url):
        fex = Faup()
        f = Filters()
        f.load()
        self.r.switchDB(1)
        extend = True
        domainfilter = True
        schemefilter = True
        try:
            for link in elem_links:
                new_url = link
                self.r.switchDB(2)
                if not self.r.get(new_url) and new_url:
                    self.r.switchDB(1)
                    if not self.r.get(new_url):
                        fex.decode(new_url)
                        domain = fex.get_host()
                        if f.isfilteredscheme(fex.get_scheme()):
                            self.r.switchDB(2)
                            self.r.put(new_url, new_url)
                            schemefilter = False
                        if f.isfiltereddomains(domain):
                            self.r.switchDB(2)
                            self.r.put(new_url, new_url)
                            domainfilter = False
                        if f.isfilteredextention(fex.get_resource_path()):
                            extend = False
                            self.r.switchDB(2)
                            self.r.put(new_url, new_url)

                        if extend and domainfilter and schemefilter:
                            self.r.switchDB(1)
                            self.r.rpush('crawl', new_url)
                            self.queue.append(new_url)
        except TypeError as e:
            print "TypeError"
예제 #23
0
    def sort(self, elem_links, url):
        fex = Faup()
        f = Filters()
        f.load()
        self.r.switchDB(1)
        extend = True
        domainfilter = True
        schemefilter = True
        try:
            for link in elem_links:
                new_url = link
                self.r.switchDB(2)
                if not self.r.get(new_url) and new_url:
                    self.r.switchDB(1)
                    if not self.r.get(new_url):
                        fex.decode(new_url)
                        domain = fex.get_host()
                        if f.isfilteredscheme(fex.get_scheme()):
                            self.r.switchDB(2)
                            self.r.put(new_url, new_url)
                            schemefilter = False
                        if f.isfiltereddomains(domain):
                            self.r.switchDB(2)
                            self.r.put(new_url, new_url)
                            domainfilter = False
                        if f.isfilteredextention(fex.get_resource_path()):
                            extend = False
                            self.r.switchDB(2)
                            self.r.put(new_url, new_url)

                        if extend and domainfilter and schemefilter:
                            self.r.switchDB(1)
                            self.r.rpush('crawl', new_url)
                            self.queue.append(new_url)
        except TypeError as e:
            print "TypeError"
예제 #24
0
파일: Web.py 프로젝트: Rafiot/AIL-framework
    while True:
        if message is not None:
            filename, score = message.split()

            if prec_filename is None or filename != prec_filename:
                domains_list = []
                PST = Paste.Paste(filename)
                client = ip2asn()
                for x in PST.get_regex(url_regex):
                    matching_url = re.search(url_regex, PST.get_p_content())
                    url = matching_url.group(0)

                    to_send = "{} {} {}".format(url, PST._get_p_date(), filename)
                    p.populate_set_out(to_send, 'Url')

                    faup.decode(url)
                    domain = faup.get_domain()
                    subdomain = faup.get_subdomain()
                    f1 = None

                    domains_list.append(domain)

                    publisher.debug('{} Published'.format(url))

                    if f1 == "onion":
                        print domain

                    hostl = unicode(avoidNone(subdomain)+avoidNone(domain))
                    try:
                        socket.setdefaulttimeout(1)
                        ip = socket.gethostbyname(unicode(hostl))
예제 #25
0
 # The following needs fixes for ExpandedPyMisp
 for attribs in res_search['response']['Attribute']:
     uuid = attribs['uuid']
 if uuid is not None:
     print("URL is already present.")
     # add sighting
     # if MISP allows to sight on add, we should implement it here, too
     misp.sighting(uuid=uuid, source="URLabuse")
     sys.exit(0)
 # This is obsolete
 #event = misp.get(misp_id)
 #existing_event = MISPEvent()
 #existing_event.load(event)
 redirect_count = 0
 fex = Faup()
 fex.decode(url)
 hostname = fex.get_host().lower()
 screenshot = hostname.decode() + '.png'
 mispObject = MISPObject('phishing')
 mispObject.add_attribute('hostname', value=hostname.decode())
 for key in response['result']:
     u = list(key.keys())[0]
     if redirect_count == 0:
         comment = "initial URL"
         mispObject.add_attribute('url', value=u, comment=comment)
     else:
         comment = "redirect URL: {}"
         mispObject.add_attribute('url-redirect',
                                  value=u,
                                  comment=comment.format(redirect_count))
     redirect_count += 1
예제 #26
0
class WebStats(AbstractModule):
    """
    WebStats module for AIL framework
    """

    # Config Var
    THRESHOLD_TOTAL_SUM = 200  # Above this value, a keyword is eligible for a progression
    THRESHOLD_INCREASE = 1.0  # The percentage representing the keyword occurence since num_day_to_look
    MAX_SET_CARDINALITY = 10  # The cardinality of the progression set
    NUM_DAY_TO_LOOK = 5  # the detection of the progression start num_day_to_look in the past

    def __init__(self):
        super(WebStats, self).__init__()

        # Send module state to logs
        self.redis_logger.info("Module %s initialized" % (self.module_name))
        # Sent to the logging a description of the module
        self.redis_logger.info("Makes statistics about valid URL")

        self.pending_seconds = 5 * 60

        # REDIS #
        self.r_serv_trend = redis.StrictRedis(
            host=self.process.config.get("ARDB_Trending", "host"),
            port=self.process.config.get("ARDB_Trending", "port"),
            db=self.process.config.get("ARDB_Trending", "db"),
            decode_responses=True)

        # FILE CURVE SECTION #
        self.csv_path_proto = os.path.join(
            os.environ['AIL_HOME'],
            self.process.config.get("Directories", "protocolstrending_csv"))
        self.protocolsfile_path = os.path.join(
            os.environ['AIL_HOME'],
            self.process.config.get("Directories", "protocolsfile"))

        self.csv_path_tld = os.path.join(
            os.environ['AIL_HOME'],
            self.process.config.get("Directories", "tldstrending_csv"))
        self.tldsfile_path = os.path.join(
            os.environ['AIL_HOME'],
            self.process.config.get("Directories", "tldsfile"))

        self.csv_path_domain = os.path.join(
            os.environ['AIL_HOME'],
            self.process.config.get("Directories", "domainstrending_csv"))

        self.faup = Faup()
        self.generate_new_graph = False

    def computeNone(self):
        if self.generate_new_graph:
            self.generate_new_graph = False

            today = datetime.date.today()
            year = today.year
            month = today.month

            self.redis_logger.debug('Building protocol graph')
            lib_words.create_curve_with_word_file(self.r_serv_trend,
                                                  self.csv_path_proto,
                                                  self.protocolsfile_path,
                                                  year, month)

            self.redis_logger.debug('Building tld graph')
            lib_words.create_curve_with_word_file(self.r_serv_trend,
                                                  self.csv_path_tld,
                                                  self.tldsfile_path, year,
                                                  month)

            self.redis_logger.debug('Building domain graph')
            lib_words.create_curve_from_redis_set(self.r_serv_trend,
                                                  self.csv_path_domain,
                                                  "domain", year, month)
            self.redis_logger.debug('end building')

    def compute(self, message):
        self.generate_new_graph = True

        # Do something with the message from the queue
        url, date, path = message.split()
        self.faup.decode(url)
        url_parsed = self.faup.get()

        # Scheme analysis
        self.analyse('scheme', date, url_parsed)
        # Tld analysis
        self.analyse('tld', date, url_parsed)
        # Domain analysis
        self.analyse('domain', date, url_parsed)

        self.compute_progression('scheme', self.NUM_DAY_TO_LOOK, url_parsed)
        self.compute_progression('tld', self.NUM_DAY_TO_LOOK, url_parsed)
        self.compute_progression('domain', self.NUM_DAY_TO_LOOK, url_parsed)

    def analyse(self, field_name, date, url_parsed):
        field = url_parsed[field_name]

        if field is not None:
            try:  # faup version
                field = field.decode()
            except:
                pass

            self.r_serv_trend.hincrby(field, date, 1)

            if field_name == "domain":  #save domain in a set for the monthly plot
                domain_set_name = "domain_set_" + date[0:6]
                self.r_serv_trend.sadd(domain_set_name, field)
                self.redis_logger.debug("added in " + domain_set_name + ": " +
                                        field)

    def get_date_range(self, num_day):
        curr_date = datetime.date.today()
        date = Date(
            str(curr_date.year) + str(curr_date.month).zfill(2) +
            str(curr_date.day).zfill(2))
        date_list = []

        for i in range(0, num_day + 1):
            date_list.append(date.substract_day(i))
        return date_list

    def compute_progression_word(self, num_day, keyword):
        """
        Compute the progression for one keyword
        """
        date_range = self.get_date_range(num_day)
        # check if this keyword is eligible for progression
        keyword_total_sum = 0
        value_list = []
        for date in date_range:  # get value up to date_range
            curr_value = self.r_serv_trend.hget(keyword, date)
            value_list.append(int(curr_value if curr_value is not None else 0))
            keyword_total_sum += int(
                curr_value) if curr_value is not None else 0
        oldest_value = value_list[
            -1] if value_list[-1] != 0 else 1  #Avoid zero division

        # The progression is based on the ratio: value[i] / value[i-1]
        keyword_increase = 0
        value_list_reversed = value_list[:]
        value_list_reversed.reverse()
        for i in range(1, len(value_list_reversed)):
            divisor = value_list_reversed[
                i - 1] if value_list_reversed[i - 1] != 0 else 1
            keyword_increase += value_list_reversed[i] / divisor

        return (keyword_increase, keyword_total_sum)

    def compute_progression(self, field_name, num_day, url_parsed):
        """
            recompute the set top_progression zset
                - Compute the current field progression
                - re-compute the current progression for each first 2*self.MAX_SET_CARDINALITY fields in the top_progression_zset
        """
        redis_progression_name_set = "z_top_progression_" + field_name

        keyword = url_parsed[field_name]
        if keyword is not None:

            #compute the progression of the current word
            keyword_increase, keyword_total_sum = self.compute_progression_word(
                num_day, keyword)

            #re-compute the progression of 2*self.MAX_SET_CARDINALITY
            current_top = self.r_serv_trend.zrevrangebyscore(
                redis_progression_name_set,
                '+inf',
                '-inf',
                withscores=True,
                start=0,
                num=2 * self.MAX_SET_CARDINALITY)
            for word, value in current_top:
                word_inc, word_tot_sum = self.compute_progression_word(
                    num_day, word)
                self.r_serv_trend.zrem(redis_progression_name_set, word)
                if (word_tot_sum > self.THRESHOLD_TOTAL_SUM) and (
                        word_inc > self.THRESHOLD_INCREASE):
                    self.r_serv_trend.zadd(redis_progression_name_set,
                                           float(word_inc), word)

            # filter before adding
            if (keyword_total_sum > self.THRESHOLD_TOTAL_SUM) and (
                    keyword_increase > self.THRESHOLD_INCREASE):
                self.r_serv_trend.zadd(redis_progression_name_set,
                                       float(keyword_increase), keyword)
예제 #27
0
class UrlsExtractor(object):
    def __init__(self):
        self._url_regex = re.compile(
            r'((?:(?:ht|f)tp(?:s?)\:\/\/)'
            r'(?:[!#$&-;=?-\[\]_a-z~]|%[0-9a-f]{2})+)', re.I)
        self._faup = Faup()

    def extract(self, text):
        """This function extract all url http(s) and ftp(s) from text.
        Return a dict, with a key for every second-level domain and
        value a list of disassembled urls (output Faup tool).

        Example disassembled url https://drive.google.com/drive/my-drive:

            {
                'domain': 'google.com',
                'domain_without_tld': 'google',
                'fragment': None,
                'host': 'drive.google.com',
                'port': None,
                'query_string': None,
                'resource_path': '/drive/my-drive',
                'scheme': 'https',
                'subdomain': 'drive',
                'tld': 'com',
                'url': 'https://drive.google.com/drive/my-drive'
            }

        """

        if not isinstance(text, unicode):
            raise NotUnicodeError("The given text is not in unicode")

        self._results = dict()

        for i in self._url_regex.finditer(text):

            try:
                """
                import urlnorm
                url = urlnorm.norm(i.group(0).strip())

                Can't use urlnorm because can't manage domain like
                http://contentsr,xn--90afavbplfx2a6a5b2a,xn--p1ai/

                After norm it's impossible tokenize this kind of urls
                """

                url = i.group(0).strip()
            except:
                raise FailedRegexUrl("Failed parsing regex urls")

            try:
                self._faup.decode(url)
                tokens = self._faup.get()

                # Get results for domain
                domain = self._results.get(tokens['domain'], None)

                if domain:
                    domain.append(tokens)
                else:
                    self._results[tokens['domain']] = [tokens]

            except:
                raise FailedFaupParsing("Failed tokenize url with Faup")

    @property
    def urls_obj(self):
        return self._results

    @property
    def urls_json(self):
        try:
            return json.dumps(self.urls_obj, ensure_ascii=False)
        except:
            raise FailedReturnJsonUrls("Failed make JSON from urls result")
예제 #28
0
            #Send to BrowseWarningPaste
            p.populate_set_out('credential;{}'.format(filepath),
                               'BrowseWarningPaste')

            #Put in form, count occurences, then send to moduleStats
            creds_sites = {}
            site_occurence = re.findall(regex_site_for_stats, content)
            for site in site_occurence:
                site_domain = site[1:-1]
                if site_domain in creds_sites.keys():
                    creds_sites[site_domain] += 1
                else:
                    creds_sites[site_domain] = 1

            for url in sites:
                faup.decode(url)
                domain = faup.get()['domain']
                if domain in creds_sites.keys():
                    creds_sites[domain] += 1
                else:
                    creds_sites[domain] = 1

            for site, num in creds_sites.iteritems(
            ):  # Send for each different site to moduleStats
                print 'credential;{};{};{}'.format(num, site, paste.p_date)
                p.populate_set_out(
                    'credential;{};{};{}'.format(num, site, paste.p_date),
                    'ModuleStats')

            if sites_set:
                print("=======> Probably on : {}".format(', '.join(sites_set)))
예제 #29
0
class Mail2MISP():
    def __init__(self,
                 misp_url,
                 misp_key,
                 verifycert,
                 config,
                 offline=False,
                 urlsonly=False):
        self.offline = offline
        if not self.offline:
            self.misp = ExpandedPyMISP(misp_url,
                                       misp_key,
                                       verifycert,
                                       debug=config.debug)
        self.config = config
        self.urlsonly = urlsonly
        if not hasattr(self.config, 'enable_dns'):
            setattr(self.config, 'enable_dns', True)
        if self.urlsonly is False:
            setattr(self.config, 'enable_dns', False)
        self.debug = self.config.debug
        self.config_from_email_body = {}
        # Init Faup
        self.f = Faup()
        self.sightings_to_add = []

    def load_email(self, pseudofile):
        self.pseudofile = pseudofile
        self.original_mail = message_from_bytes(self.pseudofile.getvalue(),
                                                policy=policy.default)
        self.subject = self.original_mail.get('Subject')
        try:
            self.sender = self.original_mail.get('From')
        except:
            self.sender = "<unknown sender>"

        # Remove words from subject
        for removeword in self.config.removelist:
            self.subject = re.sub(removeword, "", self.subject).strip()

        # Initialize the MISP event
        self.misp_event = MISPEvent()
        self.misp_event.info = f'{self.config.email_subject_prefix} - {self.subject}'
        self.misp_event.distribution = self.config.default_distribution
        self.misp_event.threat_level_id = self.config.default_threat_level
        self.misp_event.analysis = self.config.default_analysis

    def sighting(self, value, source):
        if self.offline:
            raise Exception('The script is running in offline mode, ')
        '''Add a sighting'''
        s = MISPSighting()
        s.from_dict(value=value, source=source)
        self.misp.add_sighting(s)

    def _find_inline_forward(self):
        '''Does the body contains a forwarded email?'''
        for identifier in self.config.forward_identifiers:
            if identifier in self.clean_email_body:
                self.clean_email_body, fw_email = self.clean_email_body.split(
                    identifier)
                return self.forwarded_email(
                    pseudofile=BytesIO(fw_email.encode()))

    def _find_attached_forward(self):
        forwarded_emails = []
        for attachment in self.original_mail.iter_attachments():
            attachment_content = attachment.get_content()
            # Search for email forwarded as attachment
            # I could have more than one, attaching everything.
            if isinstance(attachment_content, message.EmailMessage):
                forwarded_emails.append(
                    self.forwarded_email(
                        pseudofile=BytesIO(attachment_content.as_bytes())))
            else:
                if isinstance(attachment_content, str):
                    attachment_content = attachment_content.encode()
                filename = attachment.get_filename()
                if not filename:
                    filename = 'missing_filename'
                if self.config_from_email_body.get(
                        'attachment'
                ) == self.config.m2m_benign_attachment_keyword:
                    # Attach sane file
                    self.misp_event.add_attribute(
                        'attachment',
                        value=filename,
                        data=BytesIO(attachment_content))
                else:
                    f_object, main_object, sections = make_binary_objects(
                        pseudofile=BytesIO(attachment_content),
                        filename=filename,
                        standalone=False)
                    self.misp_event.add_object(f_object)
                    if main_object:
                        self.misp_event.add_object(main_object)
                        [
                            self.misp_event.add_object(section)
                            for section in sections
                        ]
        return forwarded_emails

    def email_from_spamtrap(self):
        '''The email comes from a spamtrap and should be attached as-is.'''
        raw_body = self.original_mail.get_body(preferencelist=('html',
                                                               'plain'))
        if raw_body:
            self.clean_email_body = html.unescape(
                raw_body.get_payload(decode=True).decode(
                    'utf8', 'surrogateescape'))
        else:
            self.clean_email_body = ''
        return self.forwarded_email(self.pseudofile)

    def forwarded_email(self, pseudofile: BytesIO):
        '''Extracts all possible indicators out of an email and create a MISP event out of it.
        * Gets all relevant Headers
        * Attach the body
        * Create MISP file objects (uses lief if possible)
        * Set all references
        '''
        email_object = EMailObject(pseudofile=pseudofile,
                                   attach_original_mail=True,
                                   standalone=False)
        if email_object.attachments:
            # Create file objects for the attachments
            for attachment_name, attachment in email_object.attachments:
                if not attachment_name:
                    attachment_name = 'NameMissing.txt'
                if self.config_from_email_body.get(
                        'attachment'
                ) == self.config.m2m_benign_attachment_keyword:
                    a = self.misp_event.add_attribute('attachment',
                                                      value=attachment_name,
                                                      data=attachment)
                    email_object.add_reference(a.uuid, 'related-to',
                                               'Email attachment')
                else:
                    f_object, main_object, sections = make_binary_objects(
                        pseudofile=attachment,
                        filename=attachment_name,
                        standalone=False)
                    if self.config.vt_key:
                        try:
                            vt_object = VTReportObject(
                                self.config.vt_key,
                                f_object.get_attributes_by_relation(
                                    'sha256')[0].value,
                                standalone=False)
                            self.misp_event.add_object(vt_object)
                            f_object.add_reference(vt_object.uuid,
                                                   'analysed-with')
                        except InvalidMISPObject as e:
                            print(e)
                            pass
                    self.misp_event.add_object(f_object)
                    if main_object:
                        self.misp_event.add_object(main_object)
                        for section in sections:
                            self.misp_event.add_object(section)
                    email_object.add_reference(f_object.uuid, 'related-to',
                                               'Email attachment')
        self.process_body_iocs(email_object)
        if self.config.spamtrap or self.config.attach_original_mail or self.config_from_email_body.get(
                'attach_original_mail'):
            self.misp_event.add_object(email_object)
        return email_object

    def process_email_body(self):
        mail_as_bytes = self.original_mail.get_body(
            preferencelist=('html', 'plain')).get_payload(decode=True)
        if mail_as_bytes:
            self.clean_email_body = html.unescape(
                mail_as_bytes.decode('utf8', 'surrogateescape'))
            # Check if there are config lines in the body & convert them to a python dictionary:
            #   <config.body_config_prefix>:<key>:<value> => {<key>: <value>}
            self.config_from_email_body = {
                k.strip(): v.strip()
                for k, v in re.findall(
                    f'{self.config.body_config_prefix}:(.*):(.*)',
                    self.clean_email_body)
            }
            if self.config_from_email_body:
                # ... remove the config lines from the body
                self.clean_email_body = re.sub(
                    rf'^{self.config.body_config_prefix}.*\n?',
                    '',
                    html.unescape(
                        self.original_mail.get_body(
                            preferencelist=('html', 'plain')).get_payload(
                                decode=True).decode('utf8',
                                                    'surrogateescape')),
                    flags=re.MULTILINE)
            # Check if autopublish key is present and valid
            if self.config_from_email_body.get(
                    'm2mkey') == self.config.m2m_key:
                if self.config_from_email_body.get('distribution') is not None:
                    self.misp_event.distribution = self.config_from_email_body.get(
                        'distribution')
                if self.config_from_email_body.get('threat_level') is not None:
                    self.misp_event.threat_level_id = self.config_from_email_body.get(
                        'threat_level')
                if self.config_from_email_body.get('analysis') is not None:
                    self.misp_event.analysis = self.config_from_email_body.get(
                        'analysis')
                if self.config_from_email_body.get('publish'):
                    self.misp_event.publish()

            self._find_inline_forward()
        else:
            self.clean_email_body = ''
        self._find_attached_forward()

    def process_body_iocs(self, email_object=None):
        if email_object:
            body = html.unescape(
                email_object.email.get_body(
                    preferencelist=('html',
                                    'plain')).get_payload(decode=True).decode(
                                        'utf8', 'surrogateescape'))
        else:
            body = self.clean_email_body

        # Cleanup body content
        # Depending on the source of the mail, there is some cleanup to do. Ignore lines in body of message
        for ignoreline in self.config.ignorelist:
            body = re.sub(rf'^{ignoreline}.*\n?', '', body, flags=re.MULTILINE)

        # Remove everything after the stopword from the body
        body = body.split(self.config.stopword, 1)[0]

        # Add tags to the event if keywords are found in the mail
        for tag in self.config.tlptags:
            for alternativetag in self.config.tlptags[tag]:
                if alternativetag in body.lower():
                    self.misp_event.add_tag(tag)

        # Prepare extraction of IOCs
        # Refang email data
        body = refang(body)

        # Extract and add hashes
        contains_hash = False
        for h in set(re.findall(hashmarker.MD5_REGEX, body)):
            contains_hash = True
            attribute = self.misp_event.add_attribute(
                'md5', h, enforceWarninglist=self.config.enforcewarninglist)
            if email_object:
                email_object.add_reference(attribute.uuid, 'contains')
            if self.config.sighting:
                self.sightings_to_add.append((h, self.config.sighting_source))
        for h in set(re.findall(hashmarker.SHA1_REGEX, body)):
            contains_hash = True
            attribute = self.misp_event.add_attribute(
                'sha1', h, enforceWarninglist=self.config.enforcewarninglist)
            if email_object:
                email_object.add_reference(attribute.uuid, 'contains')
            if self.config.sighting:
                self.sightings_to_add.append((h, self.config.sighting_source))
        for h in set(re.findall(hashmarker.SHA256_REGEX, body)):
            contains_hash = True
            attribute = self.misp_event.add_attribute(
                'sha256', h, enforceWarninglist=self.config.enforcewarninglist)
            if email_object:
                email_object.add_reference(attribute.uuid, 'contains')
            if self.config.sighting:
                self.sightings_to_add.append((h, self.config.sighting_source))

        if contains_hash:
            [
                self.misp_event.add_tag(tag)
                for tag in self.config.hash_only_tags
            ]

        # # Extract network IOCs
        urllist = []
        urllist += re.findall(urlmarker.WEB_URL_REGEX, body)
        urllist += re.findall(urlmarker.IP_REGEX, body)
        if self.debug:
            syslog.syslog(str(urllist))

        hostname_processed = []

        # Add IOCs and expanded information to MISP
        for entry in set(urllist):
            ids_flag = True
            self.f.decode(entry)

            domainname = self.f.get_domain()
            if domainname in self.config.excludelist:
                # Ignore the entry
                continue

            hostname = self.f.get_host()

            scheme = self.f.get_scheme()
            if scheme:
                scheme = scheme

            resource_path = self.f.get_resource_path()
            if resource_path:
                resource_path = resource_path

            if self.debug:
                syslog.syslog(domainname)

            if domainname in self.config.internallist and self.urlsonly is False:  # Add link to internal reference unless in urlsonly mode
                attribute = self.misp_event.add_attribute(
                    'link',
                    entry,
                    category='Internal reference',
                    to_ids=False,
                    enforceWarninglist=False)
                if email_object:
                    email_object.add_reference(attribute.uuid, 'contains')
            elif domainname in self.config.externallist or self.urlsonly is False:  # External analysis
                attribute = self.misp_event.add_attribute(
                    'link',
                    entry,
                    category='External analysis',
                    to_ids=False,
                    enforceWarninglist=False)
                if email_object:
                    email_object.add_reference(attribute.uuid, 'contains')
            elif domainname in self.config.externallist or self.urlsonly:  # External analysis
                if self.urlsonly:
                    comment = self.subject + " (from: " + self.sender + ")"
                else:
                    comment = ""
                attribute = self.misp.add_attribute(
                    self.urlsonly, {
                        "type": 'link',
                        "value": entry,
                        "category": 'External analysis',
                        "to_ids": False,
                        "comment": comment
                    })
                for tag in self.config.tlptags:
                    for alternativetag in self.config.tlptags[tag]:
                        if alternativetag in self.subject.lower():
                            self.misp.tag(attribute["uuid"], tag)
                            new_subject = comment.replace(alternativetag, '')
                            self.misp.change_comment(attribute["uuid"],
                                                     new_subject)

            else:  # The URL is probably an indicator.
                comment = ""
                if (domainname in self.config.noidsflaglist) or (
                        hostname in self.config.noidsflaglist):
                    ids_flag = False
                    comment = "Known host (mostly for connectivity test or IP lookup)"
                if self.debug:
                    syslog.syslog(str(entry))

                if scheme:
                    if is_ip(hostname):
                        attribute = self.misp_event.add_attribute(
                            'url',
                            entry,
                            to_ids=False,
                            enforceWarninglist=self.config.enforcewarninglist)
                        if email_object:
                            email_object.add_reference(attribute.uuid,
                                                       'contains')
                    else:
                        if resource_path:  # URL has path, ignore warning list
                            attribute = self.misp_event.add_attribute(
                                'url',
                                entry,
                                to_ids=ids_flag,
                                enforceWarninglist=False,
                                comment=comment)
                            if email_object:
                                email_object.add_reference(
                                    attribute.uuid, 'contains')
                        else:  # URL has no path
                            attribute = self.misp_event.add_attribute(
                                'url',
                                entry,
                                to_ids=ids_flag,
                                enforceWarninglist=self.config.
                                enforcewarninglist,
                                comment=comment)
                            if email_object:
                                email_object.add_reference(
                                    attribute.uuid, 'contains')
                    if self.config.sighting:
                        self.sightings_to_add.append(
                            (entry, self.config.sighting_source))

                if hostname in hostname_processed:
                    # Hostname already processed.
                    continue

                hostname_processed.append(hostname)
                if self.config.sighting:
                    self.sightings_to_add.append(
                        (hostname, self.config.sighting_source))

                if self.debug:
                    syslog.syslog(hostname)

                comment = ''
                port = self.f.get_port()
                if port:
                    port = port
                    comment = f'on port: {port}'

                if is_ip(hostname):
                    attribute = self.misp_event.add_attribute(
                        'ip-dst',
                        hostname,
                        to_ids=ids_flag,
                        enforceWarninglist=self.config.enforcewarninglist,
                        comment=comment)
                    if email_object:
                        email_object.add_reference(attribute.uuid, 'contains')
                else:
                    related_ips = []
                    if HAS_DNS and self.config.enable_dns:
                        try:
                            syslog.syslog(hostname)
                            for rdata in dns.resolver.query(hostname, 'A'):
                                if self.debug:
                                    syslog.syslog(str(rdata))
                                related_ips.append(rdata.to_text())
                        except Exception as e:
                            if self.debug:
                                syslog.syslog(str(e))

                    if related_ips:
                        hip = MISPObject(name='ip-port')
                        hip.add_attribute(
                            'hostname',
                            value=hostname,
                            to_ids=ids_flag,
                            enforceWarninglist=self.config.enforcewarninglist,
                            comment=comment)
                        for ip in set(related_ips):
                            hip.add_attribute('ip',
                                              type='ip-dst',
                                              value=ip,
                                              to_ids=False,
                                              enforceWarninglist=self.config.
                                              enforcewarninglist)
                        self.misp_event.add_object(hip)
                        if email_object:
                            email_object.add_reference(hip.uuid, 'contains')
                    else:
                        if self.urlsonly is False:
                            attribute = self.misp_event.add_attribute(
                                'hostname',
                                value=hostname,
                                to_ids=ids_flag,
                                enforceWarninglist=self.config.
                                enforcewarninglist,
                                comment=comment)
                        if email_object:
                            email_object.add_reference(attribute.uuid,
                                                       'contains')

    def add_event(self):
        '''Add event on the remote MISP instance.'''

        # Add additional tags depending on others
        tags = []
        for tag in [t.name for t in self.misp_event.tags]:
            if self.config.dependingtags.get(tag):
                tags += self.config.dependingtags.get(tag)

        # Add additional tags according to configuration
        for malware in self.config.malwaretags:
            if malware.lower() in self.subject.lower():
                tags += self.config.malwaretags.get(malware)
        if tags:
            [self.misp_event.add_tag(tag) for tag in tags]

        has_tlp_tag = False
        for tag in [t.name for t in self.misp_event.tags]:
            if tag.lower().startswith('tlp'):
                has_tlp_tag = True
        if not has_tlp_tag:
            self.misp_event.add_tag(self.config.tlptag_default)

        if self.offline:
            return self.misp_event.to_json()
        event = self.misp.add_event(self.misp_event, pythonify=True)
        if self.config.sighting:
            for value, source in self.sightings_to_add:
                self.sighting(value, source)
        return event
예제 #30
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import pprint
from pyfaup.faup import Faup

f = Faup()
f.decode("www.météo.fr")
pprint.pprint(f.get())

예제 #31
0
파일: test.py 프로젝트: sebdraven/faup
#!/usr/bin/python

from pyfaup.faup import Faup
import sys
import codecs
import binascii

f = Faup()
file_urls=codecs.open(sys.argv[1],'r','ascii',errors='ignore')
urls=file_urls.readlines()
for url in urls:
    url=url.replace('\n','')
    #print("We decode the url: %s" % (url))
    #if sys.version.split('.')[0].split('.')[0]=='3':
    f.decode(bytes(url,'utf-8'), False)
    #if sys.version.split('.')[0].split('.')[0]=='2':
    #        f.decode(bytes(url),False)
    #data = f.get()
    f.get_tld()
    #f.get_domain()
    #f.get_subdomain()
    #print(f.get_tld())
    #print(f.get_domain())
    #print("URL TLD: %s" % (f.get_tld()))
예제 #32
0
        url_arg = sys.argv[1]

    if urls_file is None:
        source_info = "arg:%s" % (sys.argv[1])
    else:
        source_info = "file:%s" % (sys.argv[1])

    urlw_log = UrlwLog(source_info)
    urlw_log.open()
    urlw_log.custom_log("Starting...")
    urlw_p = UrlwPlugins(urlw_log)

    fauplib = Faup()

    if source_info.startswith("arg:"):
        fauplib.decode(sys.argv[1])
        faup_object = fauplib.get()
        for plugin in urlw_p.plugins_list:
            urlw_p.run(plugin, sys.argv[1], faup_object)

    elif source_info.startswith("file:"):
        urls = urls_file.readlines()
        for url in urls:
            fauplib.decode(url)
            faup_object = fauplib.get()
            for plugin in urlw_p.plugins_list:
                urlw_p.run(plugin, url, faup_object)

        urls_file.close()

    urlw_log.custom_log("Done")
예제 #33
0
 def get_port(self):
     f = Faup()
     f.decode(self.url)
     return f.get_port()
예제 #34
0
class Query():
    def __init__(self, loglevel: int = logging.DEBUG):
        self.__init_logger(loglevel)
        self.fex = Faup()
        self.cache = Redis(unix_socket_path=get_socket_path('cache'),
                           db=1,
                           decode_responses=True)

    def __init_logger(self, loglevel) -> None:
        self.logger = logging.getLogger(f'{self.__class__.__name__}')
        self.logger.setLevel(loglevel)

    def _cache_set(self, key, value, field=None):
        if field is None:
            self.cache.setex(key, json.dumps(value), 3600)
        else:
            self.cache.hset(key, field, json.dumps(value))
            self.cache.expire(key, 3600)

    def _cache_get(self, key, field=None):
        if field is None:
            value_json = self.cache.get(key)
        else:
            value_json = self.cache.hget(key, field)
        if value_json is not None:
            return json.loads(value_json)
        return None

    def to_bool(self, s):
        """
        Converts the given string to a boolean.
        """
        return s.lower() in ('1', 'true', 'yes', 'on')

    def get_submissions(self, url, day=None):
        if day is None:
            day = date.today().isoformat()
        else:
            day = day.isoformat()
        return self.cache.zscore(f'{day}_submissions', url)

    def get_mail_sent(self, url, day=None):
        if day is None:
            day = date.today().isoformat()
        else:
            day = day.isoformat()
        self.fex.decode(url)
        host = self.fex.get_host()
        return self.cache.sismember(f'{day}_mails', host)

    def set_mail_sent(self, url, day=None):
        if day is None:
            day = date.today().isoformat()
        else:
            day = day.isoformat()
        self.fex.decode(url)
        host = self.fex.get_host()
        return self.cache.sadd(f'{day}_mails', host)

    def is_valid_url(self, url):
        cached = self._cache_get(url, 'valid')
        key = f'{date.today().isoformat()}_submissions'
        self.cache.zincrby(key, 1, url)
        if cached is not None:
            return cached
        if url.startswith('hxxp'):
            url = 'http' + url[4:]
        elif not url.startswith('http'):
            url = 'http://' + url
        logging.debug("Checking validity of URL: " + url)
        self.fex.decode(url)
        scheme = self.fex.get_scheme()
        host = self.fex.get_host()
        if scheme is None or host is None:
            reason = "Not a valid http/https URL/URI"
            return False, url, reason
        self._cache_set(url, (True, url, None), 'valid')
        return True, url, None

    def is_ip(self, host):
        try:
            ipaddress.ip_address(host)
            return True
        except ValueError:
            return False

    def try_resolve(self, url):
        self.fex.decode(url)
        host = self.fex.get_host().lower()
        if self.is_ip(host):
            return True, None
        try:
            ipaddr = dns.resolver.query(host, 'A')
        except Exception:
            reason = "DNS server problem. Check resolver settings."
            return False, reason
        if not ipaddr:
            reason = "Host " + host + " does not exist."
            return False, reason
        return True, None

    def get_urls(self, url, depth=1):
        if depth > 5:
            print('Too many redirects.')
            return

        def meta_redirect(content):
            c = content.lower()
            soup = BeautifulSoup(c, "html.parser")
            for result in soup.find_all(attrs={'http-equiv': 'refresh'}):
                if result:
                    out = result["content"].split(";")
                    if len(out) == 2:
                        wait, text = out
                        try:
                            a, url = text.split('=', 1)
                            return url.strip()
                        except Exception:
                            print(text)
            return None

        resolve, reason = self.try_resolve(url)
        if not resolve:
            # FIXME: inform that the domain does not resolve
            yield url
            return

        logging.debug(f"Making HTTP connection to {url}")

        headers = {
            'User-agent':
            'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:8.0) Gecko/20100101 Firefox/8.0'
        }
        try:
            response = requests.get(url,
                                    allow_redirects=True,
                                    headers=headers,
                                    timeout=15,
                                    verify=False)
        except Exception:
            # That one can fail (DNS for example)
            # FIXME: inform that the get failed
            yield url
            return
        if response.history is not None:
            for h in response.history:
                # Yeld the urls in the order we find them
                yield h.url

        yield response.url

        meta_redir_url = meta_redirect(response.content)
        if meta_redir_url is not None:
            depth += 1
            if not meta_redir_url.startswith('http'):
                self.fex.decode(url)
                base = '{}://{}'.format(self.fex.get_scheme(),
                                        self.fex.get_host())
                port = self.fex.get_port()
                if port is not None:
                    base += f':{port}'
                if not meta_redir_url.startswith('/'):
                    # relative redirect. resource_path has the initial '/'
                    if self.fex.get_resource_path() is not None:
                        base += self.fex.get_resource_path()
                if not base.endswith('/'):
                    base += '/'
                meta_redir_url = base + meta_redir_url
            for url in self.get_urls(meta_redir_url, depth):
                yield url

    def url_list(self, url):
        cached = self._cache_get(url, 'list')
        if cached is not None:
            return cached
        list_urls = []
        for u in self.get_urls(url):
            if u is None or u in list_urls:
                continue
            list_urls.append(u)
        self._cache_set(url, list_urls, 'list')
        return list_urls

    def dns_resolve(self, url):
        cached = self._cache_get(url, 'dns')
        if cached is not None:
            return cached
        self.fex.decode(url)
        host = self.fex.get_host().lower()
        ipv4 = None
        ipv6 = None
        if self.is_ip(host):
            if ':' in host:
                try:
                    socket.inet_pton(socket.AF_INET6, host)
                    ipv6 = [host]
                except Exception:
                    pass
            else:
                try:
                    socket.inet_aton(host)
                    ipv4 = [host]
                except Exception:
                    pass
        else:
            try:
                ipv4 = [str(ip) for ip in dns.resolver.query(host, 'A')]
            except Exception:
                logging.debug("No IPv4 address assigned to: " + host)
            try:
                ipv6 = [str(ip) for ip in dns.resolver.query(host, 'AAAA')]
            except Exception:
                logging.debug("No IPv6 address assigned to: " + host)
        self._cache_set(url, (ipv4, ipv6), 'dns')
        return ipv4, ipv6

    def phish_query(self, url, key, query):
        cached = self._cache_get(query, 'phishtank')
        if cached is not None:
            return cached
        postfields = {'url': quote(query), 'format': 'json', 'app_key': key}
        response = requests.post(url, data=postfields)
        res = response.json()
        if res["meta"]["status"] == "success":
            if res["results"]["in_database"]:
                self._cache_set(query, res["results"]["phish_detail_page"],
                                'phishtank')
                return res["results"]["phish_detail_page"]
            else:
                # no information
                pass
        elif res["meta"]["status"] == 'error':
            # Inform the user?
            # errormsg = res["errortext"]
            pass
        return None

    def sphinxsearch(server, port, url, query):
        # WARNING: too dangerous to have on the public interface
        return ''
        """
        if not sphinx:
            return None
        cached = _cache_get(query, 'sphinx')
        if cached is not None:
            return cached
        client = sphinxapi.SphinxClient()
        client.SetServer(server, port)
        client.SetMatchMode(2)
        client.SetConnectTimeout(5.0)
        result = []
        res = client.Query(query)
        if res.get("matches") is not None:
            for ticket in res["matches"]:
                ticket_id = ticket["id"]
                ticket_link = url + str(ticket_id)
                result.append(ticket_link)
        _cache_set(query, result, 'sphinx')
        return result

        """

    def vt_query_url(self, url, url_up, key, query, upload=True):
        cached = self._cache_get(query, 'vt')
        if cached is not None and cached[2] is not None:
            return cached
        parameters = {"resource": query, "apikey": key}
        if upload:
            parameters['scan'] = 1
        response = requests.post(url, data=parameters)
        if response.text is None or len(response.text) == 0:
            return None
        res = response.json()
        msg = res["verbose_msg"]
        link = res.get("permalink")
        positives = res.get("positives")
        total = res.get("total")
        self._cache_set(query, (msg, link, positives, total), 'vt')
        return msg, link, positives, total

    def gsb_query(self, url, query):
        cached = self._cache_get(query, 'gsb')
        if cached is not None:
            return cached
        param = '1\n' + query
        response = requests.post(url, data=param)
        status = response.status_code
        if status == 200:
            self._cache_set(query, response.text, 'gsb')
            return response.text

    '''
    def urlquery_query(url, key, query):
        return None
        cached = _cache_get(query, 'urlquery')
        if cached is not None:
            return cached
        try:
            urlquery.url = url
            urlquery.key = key
            response = urlquery.search(query)
        except Exception:
            return None
        if response['_response_']['status'] == 'ok':
            if response.get('reports') is not None:
                total_alert_count = 0
                for r in response['reports']:
                    total_alert_count += r['urlquery_alert_count']
                    total_alert_count += r['ids_alert_count']
                    total_alert_count += r['blacklist_alert_count']
                    _cache_set(query, total_alert_count, 'urlquery')
                    return total_alert_count
            else:
                return None
    '''

    def process_emails(self, emails, ignorelist, replacelist):
        to_return = list(set(emails))
        for mail in reversed(to_return):
            for ignorelist_entry in ignorelist:
                if re.search(ignorelist_entry, mail, re.I):
                    if mail in to_return:
                        to_return.remove(mail)
            for k, v in list(replacelist.items()):
                if re.search(k, mail, re.I):
                    if k in to_return:
                        to_return.remove(k)
                        to_return += v
        return to_return

    def whois(self, server, port, domain, ignorelist, replacelist):
        cached = self._cache_get(domain, 'whois')
        if cached is not None:
            return cached
        s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        s.settimeout(15)
        try:
            s.connect((server, port))
        except Exception:
            print("Connection problems - check WHOIS server")
            print(("WHOIS request while problem occurred: ", domain))
            print(("WHOIS server: {}:{}".format(server, port)))
            return None
        if domain.startswith('http'):
            self.fex.decode(domain)
            d = self.fex.get_domain().lower()
        else:
            d = domain
        s.send(("{}\r\n".format(d)).encode())
        response = b''
        while True:
            d = s.recv(4096)
            response += d
            if d == b'':
                break
        s.close()
        match = re.findall(r'[\w\.-]+@[\w\.-]+', response.decode())
        emails = self.process_emails(match, ignorelist, replacelist)
        if len(emails) == 0:
            return None
        list_mail = list(set(emails))
        self._cache_set(domain, list_mail, 'whois')
        return list_mail

    def pdnscircl(self, url, user, passwd, q):
        cached = self._cache_get(q, 'pdns')
        if cached is not None:
            return cached
        pdns = PyPDNS(url, basic_auth=(user, passwd))
        response = pdns.query(q)
        all_uniq = []
        for e in reversed(response):
            host = e['rrname'].lower()
            if host in all_uniq:
                continue
            else:
                all_uniq.append(host)
        response = (len(all_uniq), all_uniq[:5])
        self._cache_set(q, response, 'pdns')
        return response

    def psslcircl(self, url, user, passwd, q):
        cached = self._cache_get(q, 'pssl')
        if cached is not None:
            return cached
        pssl = PyPSSL(url, basic_auth=(user, passwd))
        response = pssl.query(q)
        if response.get(q) is not None:
            certinfo = response.get(q)
            entries = {}
            for sha1 in certinfo['certificates']:
                entries[sha1] = []
                if certinfo['subjects'].get(sha1):
                    for value in certinfo['subjects'][sha1]['values']:
                        entries[sha1].append(value)
            self._cache_set(q, entries, 'pssl')
            return entries
        return None

    def eupi(self, url, key, q):
        cached = self._cache_get(q, 'eupi')
        if cached is not None:
            return cached
        eu = PyEUPI(key, url)
        response = eu.search_url(url=q)
        if response.get('results'):
            r = response.get('results')[0]['tag_label']
            self._cache_set(q, r, 'eupi')
            return r
        eu.post_submission(q)
        return None

    def bgpranking(self, ip):
        cached = self._cache_get(ip, 'ipasn')
        if cached is not None:
            asn = cached['asn']
            prefix = cached['prefix']
        else:
            ipasn = IPASNHistory()
            response = ipasn.query(ip)
            if 'response' not in response:
                asn = None
                prefix = None
            entry = response['response'][list(response['response'].keys())[0]]
            if entry:
                self._cache_set(ip, entry, 'ipasn')
                asn = entry['asn']
                prefix = entry['prefix']
            else:
                asn = None
                prefix = None

        if not asn or not prefix:
            # asn, prefix, asn_descr, rank, position, known_asns
            return None, None, None, None, None, None

        cached = self._cache_get(ip, 'bgpranking')
        if cached is not None:
            return cached
        bgpranking = BGPRanking()
        response = bgpranking.query(asn,
                                    date=(date.today() -
                                          timedelta(1)).isoformat())
        if 'response' not in response or not response['response']:
            return None, None, None, None, None, None
        to_return = (asn, prefix, response['response']['asn_description'],
                     response['response']['ranking']['rank'],
                     response['response']['ranking']['position'],
                     response['response']['ranking']['total_known_asns'])
        self._cache_set(ip, to_return, 'bgpranking')
        return to_return

    def lookyloo(self, url):
        cached = self._cache_get(url, 'lookyloo')
        if cached is not None:
            return cached
        lookyloo = Lookyloo()
        lookyloo_perma_url = lookyloo.enqueue(url)
        if lookyloo_perma_url:
            self._cache_set(url, lookyloo_perma_url, 'lookyloo')
            return lookyloo_perma_url
        return None

    def _deserialize_cached(self, entry):
        to_return = {}
        redirects = []
        h = self.cache.hgetall(entry)
        for key, value in h.items():
            v = json.loads(value)
            if key == 'list':
                redirects = v
                continue
            to_return[key] = v
        return to_return, redirects

    def get_url_data(self, url):
        data, redirects = self._deserialize_cached(url)
        if data.get('dns') is not None:
            ipv4, ipv6 = data['dns']
            ip_data = {}
            if ipv4 is not None:
                for ip in ipv4:
                    info, _ = self._deserialize_cached(ip)
                    ip_data[ip] = info
            if ipv6 is not None:
                for ip in ipv6:
                    info, _ = self._deserialize_cached(ip)
                    ip_data[ip] = info
            if len(ip_data) > 0:
                data.update(ip_data)
        return {url: data}, redirects

    def cached(self, url, digest=False):
        url_data, redirects = self.get_url_data(url)
        to_return = [url_data]
        for u in redirects:
            if u == url:
                continue
            data, redir = self.get_url_data(u)
            to_return.append(data)
        if digest:
            return {'result': to_return, 'digest': self.digest(to_return)}
        return {'result': to_return}

    def ip_details_digest(self, ips, all_info, all_asns, all_mails):
        to_return = ''
        for ip in ips:
            to_return += '\t' + ip + '\n'
            data = all_info[ip]
            if data.get('bgpranking'):
                to_return += '\t\tis announced by {} ({}). Position {}/{}.\n'.format(
                    data['bgpranking'][2], data['bgpranking'][0],
                    data['bgpranking'][4], data['bgpranking'][5])
                all_asns.add('{} ({})'.format(data['bgpranking'][2],
                                              data['bgpranking'][0]))
            if data.get('whois'):
                all_mails.update(data.get('whois'))
        return to_return

    def digest(self, data):
        to_return = ''
        all_mails = set()
        all_asns = set()
        for entry in data:
            # Each URL we're redirected to
            for url, info in entry.items():
                # info contains the information we got for the URL.
                to_return += '\n{}\n'.format(url)
                if 'whois' in info:
                    all_mails.update(info['whois'])
                if 'lookyloo' in info:
                    to_return += '\tLookyloo permanent URL: {}\n'.format(
                        info['lookyloo'])
                if 'vt' in info and len(info['vt']) == 4:
                    if info['vt'][2] is not None:
                        to_return += '\t{} out of {} positive detections in VT - {}\n'.format(
                            info['vt'][2], info['vt'][3], info['vt'][1])
                    else:
                        to_return += '\t{} - {}\n'.format(
                            info['vt'][0], info['vt'][1])
                if 'gsb' in info:
                    to_return += '\tKnown as malicious on Google Safe Browsing: {}\n'.format(
                        info['gsb'])
                if 'phishtank' in info:
                    to_return += '\tKnown on PhishTank: {}\n'.format(
                        info['phishtank'])

                if 'dns' in info:
                    ipv4, ipv6 = info['dns']
                    if ipv4 is not None:
                        to_return += self.ip_details_digest(
                            ipv4, info, all_asns, all_mails)
                    if ipv6 is not None:
                        to_return += self.ip_details_digest(
                            ipv6, info, all_asns, all_mails)
        return to_return, list(all_mails), list(all_asns)
예제 #35
0
파일: test.py 프로젝트: aguinet/faup
#!/usr/bin/python

from pyfaup.faup import Faup
import sys
import codecs
import binascii

f = Faup()
file_urls=codecs.open(sys.argv[1],'r','ascii',errors='ignore')
urls=file_urls.readlines()

for url in urls:
    url=url.replace('\n','')
    print("URL:[%s]" % (url))
    f.decode(url)
    print("-----> Extracted TLD:%s" % f.get_tld())

예제 #36
0
                    if MX_values[0] > is_critical:
                        publisher.warning(to_print)
                        #Send to duplicate
                        p.populate_set_out(filename, 'Duplicate')
                        p.populate_set_out('mail;{}'.format(filename), 'alertHandler')

                        msg = 'infoleak:automatic-detection="mail";{}'.format(filename)
                        p.populate_set_out(msg, 'Tags')

                        #create country statistics
                        date = datetime.datetime.now().strftime("%Y%m")
                        for mail in MX_values[1]:
                            print('mail;{};{};{}'.format(MX_values[1][mail], mail, PST.p_date))
                            p.populate_set_out('mail;{};{};{}'.format(MX_values[1][mail], mail, PST.p_date), 'ModuleStats')

                            faup.decode(mail)
                            tld = faup.get()['tld']
                            server_statistics.hincrby('mail_by_tld:'+date, tld, MX_values[1][mail])

                    else:
                        publisher.info(to_print)
                #create country statistics
                for mail in MX_values[1]:
                    print('mail;{};{};{}'.format(MX_values[1][mail], mail, PST.p_date))
                    p.populate_set_out('mail;{};{};{}'.format(MX_values[1][mail], mail, PST.p_date), 'ModuleStats')

            prec_filename = filename

        else:
            publisher.debug("Script Mails is Idling 10s")
            print('Sleeping')
예제 #37
0
class Credential(AbstractModule):
    """
    Credential module for AIL framework
    """

    # Split username with spec. char or with upper case, distinguish start with upper
    REGEX_CRED = "[a-z]+|[A-Z]{3,}|[A-Z]{1,2}[a-z]+|[0-9]+"
    REDIS_KEY_NUM_USERNAME = '******'
    REDIS_KEY_NUM_PATH = 'uniqNumForUsername'
    REDIS_KEY_ALL_CRED_SET = 'AllCredentials'
    REDIS_KEY_ALL_CRED_SET_REV = 'AllCredentialsRev'
    REDIS_KEY_ALL_PATH_SET = 'AllPath'
    REDIS_KEY_ALL_PATH_SET_REV = 'AllPathRev'
    REDIS_KEY_MAP_CRED_TO_PATH = 'CredToPathMapping'


    def __init__(self):
        super(Credential, self).__init__()

        self.faup = Faup()

        self.regex_web = "((?:https?:\/\/)[\.-_0-9a-zA-Z]+\.[0-9a-zA-Z]+)"
        self.regex_cred = "[a-zA-Z0-9\\._-]+@[a-zA-Z0-9\\.-]+\.[a-zA-Z]{2,6}[\\rn :\_\-]{1,10}[a-zA-Z0-9\_\-]+"
        self.regex_site_for_stats = "@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:"

        self.redis_cache_key = regex_helper.generate_redis_cache_key(self.module_name)

        # Database
        config_loader = ConfigLoader.ConfigLoader()
        self.server_cred = config_loader.get_redis_conn("ARDB_TermCred")
        self.server_statistics = config_loader.get_redis_conn("ARDB_Statistics")

        # Config values
        self.minimumLengthThreshold = config_loader.get_config_int("Credential", "minimumLengthThreshold")
        self.criticalNumberToAlert = config_loader.get_config_int("Credential", "criticalNumberToAlert")

        self.max_execution_time = 30

        # Waiting time in secondes between to message proccessed
        self.pending_seconds = 10

        # Send module state to logs
        self.redis_logger.info(f"Module {self.module_name} initialized")


    def compute(self, message):

        id, count = message.split()
        item = Item(id)

        item_content = item.get_content()

        # Extract all credentials
        all_credentials = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.regex_cred, item.get_id(), item_content, max_time=self.max_execution_time)

        if all_credentials:
            nb_cred = len(all_credentials)
            message = f'Checked {nb_cred} credentials found.'

            all_sites = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.regex_web, item.get_id(), item_content, max_time=self.max_execution_time)
            if all_sites:
                discovered_sites = ', '.join(all_sites)
                message += f' Related websites: {discovered_sites}'

            print(message)

            to_print = f'Credential;{item.get_source()};{item.get_date()};{item.get_basename()};{message};{item.get_id()}'

            #num of creds above tresh, publish an alert
            if nb_cred > self.criticalNumberToAlert:
                print(f"========> Found more than 10 credentials in this file : {item.get_id()}")
                self.redis_logger.warning(to_print)

                # Send to duplicate
                self.send_message_to_queue(item.get_id(), 'Duplicate')

                msg = f'infoleak:automatic-detection="credential";{item.get_id()}'
                self.send_message_to_queue(msg, 'Tags')

                site_occurence = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.regex_site_for_stats, item.get_id(), item_content, max_time=self.max_execution_time, r_set=False)

                creds_sites = {}

                for site in site_occurence:
                    site_domain = site[1:-1].lower()
                    if site_domain in creds_sites.keys():
                        creds_sites[site_domain] += 1
                    else:
                        creds_sites[site_domain] = 1

                for url in all_sites:
                    self.faup.decode(url)
                    domain = self.faup.get()['domain']
                    ## TODO: # FIXME: remove me, check faup versionb
                    try:
                        domain = domain.decode()
                    except:
                        pass
                    if domain in creds_sites.keys():
                        creds_sites[domain] += 1
                    else:
                        creds_sites[domain] = 1

                for site, num in creds_sites.items(): # Send for each different site to moduleStats

                    mssg = f'credential;{num};{site};{item.get_date()}'
                    print(mssg)
                    self.send_message_to_queue(mssg, 'ModuleStats')

                if all_sites:
                    discovered_sites = ', '.join(all_sites)
                    print(f"=======> Probably on : {discovered_sites}")

                date = datetime.now().strftime("%Y%m")
                for cred in all_credentials:
                    maildomains = re.findall("@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,20}", cred.lower())[0]
                    self.faup.decode(maildomains)
                    tld = self.faup.get()['tld']
                    ## TODO: # FIXME: remove me
                    try:
                        tld = tld.decode()
                    except:
                        pass
                    self.server_statistics.hincrby('credential_by_tld:'+date, tld, 1)
            else:
                self.redis_logger.info(to_print)
                print(f'found {nb_cred} credentials')

            # For searching credential in termFreq
            for cred in all_credentials:
                cred = cred.split('@')[0] #Split to ignore mail address

                # unique number attached to unique path
                uniq_num_path = self.server_cred.incr(Credential.REDIS_KEY_NUM_PATH)
                self.server_cred.hmset(Credential.REDIS_KEY_ALL_PATH_SET, {item.get_id(): uniq_num_path})
                self.server_cred.hmset(Credential.REDIS_KEY_ALL_PATH_SET_REV, {uniq_num_path: item.get_id()})

                # unique number attached to unique username
                uniq_num_cred = self.server_cred.hget(Credential.REDIS_KEY_ALL_CRED_SET, cred)
                if uniq_num_cred is None:
                    # cred do not exist, create new entries
                    uniq_num_cred = self.server_cred.incr(Credential.REDIS_KEY_NUM_USERNAME)
                    self.server_cred.hmset(Credential.REDIS_KEY_ALL_CRED_SET, {cred: uniq_num_cred})
                    self.server_cred.hmset(Credential.REDIS_KEY_ALL_CRED_SET_REV, {uniq_num_cred: cred})

                # Add the mapping between the credential and the path
                self.server_cred.sadd(Credential.REDIS_KEY_MAP_CRED_TO_PATH+'_'+str(uniq_num_cred), uniq_num_path)

                # Split credentials on capital letters, numbers, dots and so on
                # Add the split to redis, each split point towards its initial credential unique number
                splitedCred = re.findall(Credential.REGEX_CRED, cred)
                for partCred in splitedCred:
                    if len(partCred) > self.minimumLengthThreshold:
                        self.server_cred.sadd(partCred, uniq_num_cred)
예제 #38
0
                ## TODO: add MAIL trackers

            valid_mx = check_mx_record(set_mxdomains, dns_server)

            item_date = Item.get_item_date(item_id)

            num_valid_email = 0
            for domain_mx in valid_mx:
                num_valid_email += len(dict_mxdomains_email[domain_mx])

                for email in dict_mxdomains_email[domain_mx]:
                    msg = 'mail;{};{};{}'.format(1, email, item_date)
                    p.populate_set_out(msg, 'ModuleStats')

                    # Create country stats
                    faup.decode(email)
                    tld = faup.get()['tld']
                    try:
                        tld = tld.decode()
                    except:
                        pass
                    server_statistics.hincrby(
                        'mail_by_tld:{}'.format(item_date), tld, 1)

            msg = 'Mails;{};{};{};Checked {} e-mail(s);{}'.format(
                Item.get_source(item_id), item_date,
                Item.get_item_basename(item_id), num_valid_email, item_id)

            if num_valid_email > mail_threshold:
                print('{}    Checked {} e-mail(s)'.format(
                    item_id, num_valid_email))
예제 #39
0
파일: test.py 프로젝트: adulau/faup
#!/usr/bin/python

from pyfaup.faup import Faup
import sys
import codecs
import binascii

f = Faup()
file_urls=codecs.open(sys.argv[1],'r','ascii',errors='ignore')
urls=file_urls.readlines()
for url in urls:
    url=url.replace('\n','')
    print("URL:[%s]" % (url))
    f.decode(url, False)
    print("-----> Extracted TLD:%s" % f.get_tld())

예제 #40
0
파일: test.py 프로젝트: ylmrx/faup
import codecs
import binascii

# dynamically list all Faup's methods
methods = []
for m in dir(Faup):
    if re.search("^get_", m):
        methods.append(m)
methods.remove("get_version")

# run
# run
if len(sys.argv) != 2:
    print "%s <file containing 1 url per line>" % sys.argv[0]
    sys.exit(0)

f = Faup()
file_urls = codecs.open(sys.argv[1], 'r', 'ascii', errors='ignore')
urls = file_urls.readlines()

for url in urls:
    url = url.replace('\n', '')
    print("URL:[%s]" % (url))
    f.decode(url)
    #    print("-----> Extracted TLD:%s" % f.get_tld())
    #    print("-----> Extracted TLD:%s" % f.get_domain_without_tld())

    for m in methods:
        fct = getattr(f, m)
        print "\t%s : %s" % (re.sub("^get_", "", m), fct())
예제 #41
0
                        msg = 'infoleak:automatic-detection="mail";{}'.format(
                            filename)
                        p.populate_set_out(msg, 'Tags')

                        #create country statistics
                        date = datetime.datetime.now().strftime("%Y%m")
                        for mail in MX_values[1]:
                            print('mail;{};{};{}'.format(
                                MX_values[1][mail], mail, PST.p_date))
                            p.populate_set_out(
                                'mail;{};{};{}'.format(MX_values[1][mail],
                                                       mail, PST.p_date),
                                'ModuleStats')

                            faup.decode(mail)
                            tld = faup.get()['tld']
                            server_statistics.hincrby('mail_by_tld:' + date,
                                                      tld, MX_values[1][mail])

                    else:
                        publisher.info(to_print)
                #create country statistics
                for mail in MX_values[1]:
                    print('mail;{};{};{}'.format(MX_values[1][mail], mail,
                                                 PST.p_date))
                    p.populate_set_out(
                        'mail;{};{};{}'.format(MX_values[1][mail], mail,
                                               PST.p_date), 'ModuleStats')

            prec_filename = filename
예제 #42
0
class Web(AbstractModule):
    """
    Web module for AIL framework
    """

    # Used to prevent concat with empty fields due to url parsing
    def avoidNone(self, a_string):
        if a_string is None:
            return ""
        else:
            return a_string

    def __init__(self):
        """
        Init Web
        """
        super(Web, self).__init__()

        # REDIS Cache
        self.r_serv2 = redis.StrictRedis(
            host=self.process.config.get("Redis_Cache", "host"),
            port=self.process.config.getint("Redis_Cache", "port"),
            db=self.process.config.getint("Redis_Cache", "db"),
            decode_responses=True)

        # Country to log as critical
        self.cc_critical = self.process.config.get("Url", "cc_critical")

        # FUNCTIONS #

        self.faup = Faup()

        # Protocol file path
        protocolsfile_path = os.path.join(os.environ['AIL_HOME'],
                                          self.process.config.get("Directories", "protocolsfile"))
        # Get all uri from protocolsfile (Used for Curve)
        uri_scheme = ""
        with open(protocolsfile_path, 'r') as scheme_file:
            for scheme in scheme_file:
                uri_scheme += scheme[:-1]+"|"
        uri_scheme = uri_scheme[:-1]

        self.url_regex = "((?i:"+uri_scheme + \
            ")\://(?:[a-zA-Z0-9\.\-]+(?:\:[a-zA-Z0-9\.&%\$\-]+)*@)*(?:(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|(?:[a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.(?:com|edu|gov|int|mil|net|org|biz|arpa|info|name|pro|aero|coop|museum|[a-zA-Z]{2}))(?:\:[0-9]+)*(?:/(?:$|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)"

        self.prec_filename = None

        # Send module state to logs
        self.redis_logger.info("Module %s initialized" % (self.module_name))

    def compute(self, message):
        """
        Search for Web links from given message
        """
        # Extract item
        filename, score = message.split()

        if self.prec_filename is None or filename != self.prec_filename:
            domains_list = set()
            PST = Paste.Paste(filename)
            client = ip2asn()

            detected_urls = PST.get_regex(self.url_regex)
            if len(detected_urls) > 0:
                to_print = 'Web;{};{};{};'.format(
                    PST.p_source, PST.p_date, PST.p_name)
                self.redis_logger.info('{}Detected {} URL;{}'.format(
                    to_print, len(detected_urls), PST.p_rel_path))

            for url in detected_urls:
                self.redis_logger.debug("match regex: %s" % (url))

                # self.redis_logger.debug("match regex search: %s"%(url))

                to_send = "{} {} {}".format(url, PST._get_p_date(), filename)
                self.process.populate_set_out(to_send, 'Url')
                self.redis_logger.debug("url_parsed: %s" % (to_send))

                self.faup.decode(url)
                domain = self.faup.get_domain()
                subdomain = self.faup.get_subdomain()

                self.redis_logger.debug('{} Published'.format(url))

                if subdomain is not None:
                    # TODO: # FIXME: remove me
                    try:
                        subdomain = subdomain.decode()
                    except:
                        pass

                if domain is not None:
                    # TODO: # FIXME: remove me
                    try:
                        domain = domain.decode()
                    except:
                        pass
                    domains_list.add(domain)

                hostl = self.avoidNone(subdomain) + self.avoidNone(domain)

                try:
                    socket.setdefaulttimeout(1)
                    ip = socket.gethostbyname(hostl)
                    # If the resolver is not giving any IPv4 address,
                    # ASN/CC lookup is skip.
                    l = client.lookup(ip, qType='IP')
                except ipaddress.AddressValueError:
                    self.redis_logger.debug(
                        f'ASN/CC lookup failed for IP {ip}')
                    continue
                except:
                    self.redis_logger.debug(
                        f'Resolver IPv4 address failed for host {hostl}')
                    continue

                cc = getattr(l, 'cc')
                asn = ''
                if getattr(l, 'asn') is not None:
                    asn = getattr(l, 'asn')[2:]  # remobe b'

                # EU is not an official ISO 3166 code (but used by RIPE
                # IP allocation)
                if cc is not None and cc != "EU":
                    self.redis_logger.debug('{};{};{};{}'.format(hostl, asn, cc,
                                                                 pycountry.countries.get(alpha_2=cc).name))
                    if cc == self.cc_critical:
                        to_print = 'Url;{};{};{};Detected {} {}'.format(
                            PST.p_source, PST.p_date, PST.p_name,
                            hostl, cc)
                        self.redis_logger.info(to_print)
                else:
                    self.redis_logger.debug('{};{};{}'.format(hostl, asn, cc))

            A_values = lib_refine.checking_A_record(self.r_serv2,
                                                    domains_list)

            if A_values[0] >= 1:

                pprint.pprint(A_values)
                # self.redis_logger.info('Url;{};{};{};Checked {} URL;{}'.format(
                #     PST.p_source, PST.p_date, PST.p_name, A_values[0], PST.p_rel_path))

        self.prec_filename = filename
예제 #43
0
class LibInjection(AbstractModule):
    """docstring for LibInjection module."""

    def __init__(self):
        super(LibInjection, self).__init__()

        self.faup = Faup()

        config_loader = ConfigLoader()
        self.server_statistics = config_loader.get_redis_conn("ARDB_Statistics")

        self.redis_logger.info(f"Module: {self.module_name} Launched")

    def compute(self, message):
        url, id = message.split()

        self.faup.decode(url)
        url_parsed = self.faup.get()
        ## TODO: # FIXME: remove me
        try:
            resource_path = url_parsed['resource_path'].encode()
        except:
            resource_path = url_parsed['resource_path']

        ## TODO: # FIXME: remove me
        try:
            query_string = url_parsed['query_string'].encode()
        except:
            query_string = url_parsed['query_string']

        result_path = {'sqli' : False}
        result_query = {'sqli' : False}

        if resource_path is not None:
            result_path = pylibinjection.detect_sqli(resource_path)
            #print(f'path is sqli : {result_path}')

        if query_string is not None:
            result_query = pylibinjection.detect_sqli(query_string)
            #print(f'query is sqli : {result_query}')

        if result_path['sqli'] is True or result_query['sqli'] is True:
            item = Item(id)
            item_id = item.get_id()
            print(f"Detected (libinjection) SQL in URL: {item_id}")
            print(urllib.request.unquote(url))

            to_print = f'LibInjection;{item.get_source()};{item.get_date()};{item.get_basename()};Detected SQL in URL;{item_id}'
            self.redis_logger.warning(to_print)

            # Send to duplicate
            self.send_message_to_queue(item_id, 'Duplicate')

            # Add tag
            msg = f'infoleak:automatic-detection="sql-injection";{item_id}'
            self.send_message_to_queue(msg, 'Tags')

            #statistics
            ## TODO: # FIXME: remove me
            try:
                tld = url_parsed['tld'].decode()
            except:
                tld = url_parsed['tld']
            if tld is not None:
                date = datetime.now().strftime("%Y%m")
                self.server_statistics.hincrby(f'SQLInjection_by_tld:{date}', tld, 1)
예제 #44
0
            msg = 'infoleak:automatic-detection="credential";{}'.format(filepath)
            p.populate_set_out(msg, 'Tags')

            #Put in form, count occurences, then send to moduleStats
            creds_sites = {}
            site_occurence = re.findall(regex_site_for_stats, content)
            for site in site_occurence:
                site_domain = site[1:-1]
                if site_domain in creds_sites.keys():
                    creds_sites[site_domain] += 1
                else:
                    creds_sites[site_domain] = 1

            for url in sites:
                faup.decode(url)
                domain = faup.get()['domain']
                ## TODO: # FIXME: remove me
                try:
                    domain = domain.decode()
                except:
                    pass
                if domain in creds_sites.keys():
                    creds_sites[domain] += 1
                else:
                    creds_sites[domain] = 1

            for site, num in creds_sites.items(): # Send for each different site to moduleStats

                mssg = 'credential;{};{};{}'.format(num, site, paste.p_date)
                print(mssg)
예제 #45
0
for h in hashlist_md5:
    misp.add_hashes(new_event, md5=h)
for h in hashlist_sha1:
    misp.add_hashes(new_event, sha1=h)
for h in hashlist_sha256:
    misp.add_hashes(new_event, sha256=h)

if (len(hashlist_md5) > 0) or (len(hashlist_sha1) > 0) or (len(hashlist_sha256)
                                                           > 0):
    for tag in hash_only_tags:
        misp.add_tag(new_event, tag)

# Add IOCs and expanded information to MISP
for entry in urllist:
    ids_flag = True
    f.decode(entry)
    domainname = f.get_domain().decode('utf-8', 'ignore')
    hostname = f.get_host().decode('utf-8', 'ignore')
    try:
        schema = f.get_scheme().decode('utf-8', 'ignore')
    except:
        schema = False
    if debug:
        syslog.syslog(domainname)
    if domainname not in excludelist:
        if domainname in internallist:
            misp.add_named_attribute(new_event,
                                     'link',
                                     entry,
                                     category='Internal reference',
                                     to_ids=False,
예제 #46
0
                item_id,
                item_content,
                max_time=max_execution_time,
                r_set=False)

            creds_sites = {}

            for site in site_occurence:
                site_domain = site[1:-1].lower()
                if site_domain in creds_sites.keys():
                    creds_sites[site_domain] += 1
                else:
                    creds_sites[site_domain] = 1

            for url in all_sites:
                faup.decode(url)
                domain = faup.get()['domain']
                ## TODO: # FIXME: remove me
                try:
                    domain = domain.decode()
                except:
                    pass
                if domain in creds_sites.keys():
                    creds_sites[domain] += 1
                else:
                    creds_sites[domain] = 1

            for site, num in creds_sites.items(
            ):  # Send for each different site to moduleStats

                mssg = 'credential;{};{};{}'.format(
예제 #47
0
                domain_url = 'http://{}'.format(domain)

                print()
                print()
                print(
                    '\033[92m------------------START CRAWLER------------------\033[0m'
                )
                print('crawler type:     {}'.format(type_hidden_service))
                print(
                    '\033[92m-------------------------------------------------\033[0m'
                )
                print('url:         {}'.format(url))
                print('domain:      {}'.format(domain))
                print('domain_url:  {}'.format(domain_url))

                faup.decode(domain)
                onion_domain = faup.get()['domain'].decode()

                if not r_onion.sismember(
                        'blacklist_{}'.format(type_hidden_service),
                        domain) and not r_onion.sismember(
                            'blacklist_{}'.format(type_hidden_service),
                            onion_domain):

                    date = datetime.datetime.now().strftime("%Y%m%d")
                    date_month = datetime.datetime.now().strftime("%Y%m")

                    if not r_onion.sismember(
                            'month_{}_up:{}'.format(
                                type_hidden_service,
                                date_month), domain) and not r_onion.sismember(