コード例 #1
0
def getmisp_urls(key, url, timeframe):
    response_domains = []
    headers = {
        'Authorization': '{}'.format(key),
        'Content-type': 'application/json',
        'Accept': 'application/json'
    }
    payload = '{ "returnFormat": "json", "type": "url", "last": "%s", "enforceWarninglist": true }' % timeframe
    response = requests.post(url, headers=headers, data=payload, verify=False)
    json_response = json.loads(response.text)
    fp = Faup()
    try:
        for attr in json_response['response']['Attribute']:
            url = attr['value']
            eventid = attr['event_id']
            if eventid not in ignore_eventid:
                category = attr['category']
                timestamp = datetime.datetime.utcfromtimestamp(
                    int(attr['timestamp'])).strftime('%Y-%m-%d')
                fp.decode(url)
                domain = fp.get_domain()
                if re.match(r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}", domain):
                    response_domains.append({
                        'domain': domain,
                        'eventid': eventid,
                        'category': category,
                        'timestamp': timestamp
                    })

        return response_domains
    except:
        return response_domains
コード例 #2
0
 def __init__(self,
              misp_url,
              misp_key,
              verifycert,
              config,
              offline=False,
              urlsonly=False):
     self.offline = offline
     if not self.offline:
         self.misp = ExpandedPyMISP(misp_url,
                                    misp_key,
                                    verifycert,
                                    debug=config.debug)
     self.config = config
     self.urlsonly = urlsonly
     if not hasattr(self.config, 'enable_dns'):
         setattr(self.config, 'enable_dns', True)
     if self.urlsonly is False:
         setattr(self.config, 'enable_dns', False)
     self.debug = self.config.debug
     self.config_from_email_body = {}
     if not hasattr(self.config, 'ignore_nullsize_attachments'):
         setattr(self.config, 'ignore_nullsize_attachments', False)
     self.ignore_nullsize_attachments = self.config.ignore_nullsize_attachments
     # Init Faup
     self.f = Faup()
     self.sightings_to_add = []
コード例 #3
0
    def __init__(self):
        super(Credential, self).__init__()

        self.faup = Faup()

        self.regex_web = "((?:https?:\/\/)[\.-_0-9a-zA-Z]+\.[0-9a-zA-Z]+)"
        self.regex_cred = "[a-zA-Z0-9\\._-]+@[a-zA-Z0-9\\.-]+\.[a-zA-Z]{2,6}[\\rn :\_\-]{1,10}[a-zA-Z0-9\_\-]+"
        self.regex_site_for_stats = "@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:"

        self.redis_cache_key = regex_helper.generate_redis_cache_key(self.module_name)

        # Database
        config_loader = ConfigLoader.ConfigLoader()
        self.server_cred = config_loader.get_redis_conn("ARDB_TermCred")
        self.server_statistics = config_loader.get_redis_conn("ARDB_Statistics")

        # Config values
        self.minimumLengthThreshold = config_loader.get_config_int("Credential", "minimumLengthThreshold")
        self.criticalNumberToAlert = config_loader.get_config_int("Credential", "criticalNumberToAlert")

        self.max_execution_time = 30

        # Waiting time in secondes between to message proccessed
        self.pending_seconds = 10

        # Send module state to logs
        self.redis_logger.info(f"Module {self.module_name} initialized")
コード例 #4
0
ファイル: crawler.py プロジェクト: mdeous/OSINT
    def run(self):
        i = 0
        while (True):
            i = i + 1
            if i % 1000 == 0:
                time.sleep(10)
            url = self.r.rpop('crawl')
            fex = Faup()
            if url:
                print "url found: " + url
                fex.decode(url)
                domain = fex.get_host()
                entry = self.db.new_domaines.find_one({'domaine': domain})
                if entry == None:
                    print "record: " + domain
                    self.db.new_domaines.save({
                        'domaine': domain,
                        'urls': [url]
                    })

                urls_stored = entry['urls']
                if not url in urls_stored:
                    urls_stored.append(url)
                    entry['urls'] = urls_stored
                    self.db.new_domaines.save(entry)
コード例 #5
0
ファイル: crawler.py プロジェクト: 5l1v3r1/OSINT-1
    def run(self):
        i = 0
        while (True):
            i = i + 1
            if i % 1000 == 0:
                time.sleep(10)
            self.lock.acquire()
            self.r.switchDB(1)
            url = self.r.rpop('crawl')
            self.lock.release()
            # print url
            fex = Faup()
            if url:
                print "url found: " + url
                try:
                    fex.decode(url)
                    domain = fex.get_host()
                    entry = self.db.new_domaines.find_one({'domaine': domain})
                    if entry == None:
                        print "record: " + domain
                        self.db.new_domaines.save({
                            'domaine': domain,
                            'urls': [url]
                        })

                    urls_stored = entry['urls']
                    if not url in urls_stored:
                        urls_stored.append(url)
                        entry['urls'] = urls_stored
                        self.db.new_domaines.save(entry)
                except:
                    print "parsing fault " + url
コード例 #6
0
def dns_resolve(url):
    cached = _cache_get(url, 'dns')
    if cached is not None:
        return cached
    fex = Faup()
    fex.decode(url)
    host = fex.get_host().lower()
    ipv4 = None
    ipv6 = None
    if is_ip(host):
        if ':' in host:
            try:
                socket.inet_pton(socket.AF_INET6, host)
                ipv6 = [host]
            except:
                pass
        else:
            try:
                socket.inet_aton(host)
                ipv4 = [host]
            except:
                pass
    else:
        try:
            ipv4 = [str(ip) for ip in dns.resolver.query(host, 'A')]
        except:
            logging.debug("No IPv4 address assigned to: " + host)
        try:
            ipv6 = [str(ip) for ip in dns.resolver.query(host, 'AAAA')]
        except:
            logging.debug("No IPv6 address assigned to: " + host)
    _cache_set(url, (ipv4, ipv6), 'dns')
    return ipv4, ipv6
コード例 #7
0
def whois(server, port, domain, ignorelist, replacelist):
    cached = _cache_get(domain, 'whois')
    if cached is not None:
        return cached
    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    s.settimeout(15)
    try:
        s.connect((server, port))
    except Exception:
        print("Connection problems - check WHOIS server")
        print(("WHOIS request while problem occurred: ", domain))
        print(("WHOIS server: {}:{}".format(server, port)))
        sys.exit(0)
    if domain.startswith('http'):
        fex = Faup()
        fex.decode(domain)
        d = fex.get_domain().lower()
    else:
        d = domain
    s.send(d + "\r\n")
    response = ''
    while True:
        d = s.recv(4096)
        response += d
        if d == '':
            break
    s.close()
    match = re.findall(r'[\w\.-]+@[\w\.-]+', response)
    emails = process_emails(match, ignorelist, replacelist)
    if len(emails) == 0:
        return None
    list_mail = list(set(emails))
    _cache_set(domain, list_mail, 'whois')
    return list_mail
コード例 #8
0
def tld_extract(domain):

    if "_faup" not in __builtins__:
        __builtins__["_faup"] = Faup()
    _faup = __builtins__["_faup"]
    _faup.decode(domain.decode("utf-8").strip(b"."))
    return (_faup.get_subdomain() or b"", _faup.get_domain_without_tld() or b"", _faup.get_tld() or b"")
コード例 #9
0
    def __init__(self):
        """
        Init Urls
        """
        super(Urls, self).__init__()

        self.faup = Faup()
        self.redis_cache_key = regex_helper.generate_redis_cache_key(
            self.module_name)

        # Protocol file path
        protocolsfile_path = os.path.join(
            os.environ['AIL_HOME'],
            self.process.config.get("Directories", "protocolsfile"))
        # Get all uri from protocolsfile (Used for Curve)
        uri_scheme = ""
        with open(protocolsfile_path, 'r') as scheme_file:
            for scheme in scheme_file:
                uri_scheme += scheme[:-1] + "|"
        uri_scheme = uri_scheme[:-1]

        self.url_regex = "((?i:"+uri_scheme + \
            ")\://(?:[a-zA-Z0-9\.\-]+(?:\:[a-zA-Z0-9\.&%\$\-]+)*@)*(?:(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|(?:[a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.(?:[a-zA-Z]{2,15}))(?:\:[0-9]+)*(?:/?(?:[a-zA-Z0-9\.\,\?'\\+&%\$#\=~_\-]+))*)"

        # Send module state to logs
        self.redis_logger.info(f"Module {self.module_name} initialized")
コード例 #10
0
def get_urls(url, depth=1):
    if depth > 5:
        print('Too many redirects.')
        return
    fex = Faup()

    def meta_redirect(content):
        c = content.lower()
        soup = BeautifulSoup(c, "html.parser")
        for result in soup.find_all(attrs={'http-equiv': 'refresh'}):
            if result:
                out = result["content"].split(";")
                if len(out) == 2:
                    wait, text = out
                    a, url = text.split('=', 1)
                    return url.strip()
        return None

    resolve, reason = try_resolve(fex, url)
    if not resolve:
        # FIXME: inform that the domain does not resolve
        yield url
        return

    logging.debug("Making HTTP connection to " + url)

    headers = {'User-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:8.0) Gecko/20100101 Firefox/8.0'}
    try:
        response = requests.get(url, allow_redirects=True, headers=headers,
                                timeout=15, verify=False)
    except:
        # That one can fail (DNS for example)
        # FIXME: inform that the get failed
        yield url
        return
    if response.history is not None:
        for h in response.history:
            # Yeld the urls in the order we find them
            yield h.url

    yield response.url

    meta_redir_url = meta_redirect(response.content)
    if meta_redir_url is not None:
        depth += 1
        if not meta_redir_url.startswith('http'):
            fex.decode(url)
            base = '{}://{}'.format(fex.get_scheme(), fex.get_host())
            port = fex.get_port()
            if port is not None:
                base += ':{}'.format(port)
            if not meta_redir_url.startswith('/'):
                # relative redirect. resource_path has the initial '/'
                if fex.get_resource_path() is not None:
                    base += fex.get_resource_path()
            if not base.endswith('/'):
                base += '/'
            meta_redir_url = base + meta_redir_url
        for url in get_urls(meta_redir_url, depth):
            yield url
コード例 #11
0
ファイル: datamodels.py プロジェクト: jgru/spamtrap-system
 def __post_init__(self):
     if self.domain is None:
         f = Faup(
         )  # Example code at https://programtalk.com/python-examples-amp/pyfaup.faup.Faup/
         f.decode(self.address.split("@")[-1])
         self.top_level_domain = f.get_tld()
         self.domain = f.get_domain()
         self.subdomain = f.get_subdomain()
コード例 #12
0
    def __init__(self):
        super(LibInjection, self).__init__()

        self.faup = Faup()

        config_loader = ConfigLoader()
        self.server_statistics = config_loader.get_redis_conn("ARDB_Statistics")

        self.redis_logger.info(f"Module: {self.module_name} Launched")
コード例 #13
0
 def process(self):
     list_domains = self.db['new_domaines'].distinct('domaine')
     fex = Faup()
     for domain in list_domains:
         url = 'http://' + str(domain)
         fex.decode(url, False)
         print(fex.get_tld() + ',' + fex.get_domain() + ',' +
               ','.join(fex.get_subdomain().split('.')[::-1]).replace(
                   'www', '')).replace(',,', ',')
コード例 #14
0
ファイル: datamodels.py プロジェクト: jgru/spamtrap-system
    def __post_init__(self):
        f = Faup(
        )  # Example code at https://programtalk.com/python-examples-amp/pyfaup.faup.Faup/
        f.decode(self.url)

        self.scheme = f.get_scheme()
        self.top_level_domain = f.get_tld()
        self.domain = f.get_domain()
        self.subdomain = f.get_subdomain()
        self.path = f.get_resource_path()
コード例 #15
0
ファイル: urls.py プロジェクト: AysadKozanoglu/spamscope
    def initialize(self, stormconf, context):
        super(Urls, self).initialize(stormconf, context)

        # Faup
        self.faup = Faup()

        # Input bolts for Phishing bolt
        self.input_bolts = set(context["source->stream->grouping"].keys())

        # All mails
        self._mails = {}

        # Load keywords
        self._load_lists()
コード例 #16
0
ファイル: test_utils.py プロジェクト: zhanghetong/spamscope
    def test_urls_extractor(self):

        body = """
        bla bla https://tweetdeck.twitter.com/random bla bla
        http://kafka.apache.org/documentation.html
        http://kafka.apache.org/documentation1.html
        bla bla bla https://docs.python.org/2/library/re.html bla bla
        bla bla bla https://docs.python.org/2/library/re_2.html> bla bla
        <p>https://tweetdeck.twitter.com/random</p> bla bla
        <p>https://tweetdeck.twitter.com/random_2</p>
        """

        body_unicode_error = """
        Return-Path: <>
        Delivered-To: [email protected]
        Received: (qmail 15482 invoked from network); 29 Nov 2015 12:28:40 -000
        Received: from unknown (HELO 112.149.154.61) (112.149.154.61)
        by smtp.customers.net with SMTP; 29 Nov 2015 12:28:40 -0000
        Received: from unknown (HELO localhost)
            ([email protected]@110.68.103.81)
                by 112.149.154.61 with ESMTPA; Sun, 29 Nov 2015 21:29:24 +0900
                From: [email protected]
                To: [email protected]
                Subject: Gain your male attrctiveness

                Give satisfaction to your loved one
                http://contents.xn--90afavbplfx2a6a5b2a.xn--p1ai/
        """
        parser = Faup()

        urls = utils.urls_extractor(parser, body)
        self.assertIsInstance(urls, dict)
        self.assertIn("apache.org", urls)
        self.assertIn("python.org", urls)
        self.assertIn("twitter.com", urls)

        for i in ("apache.org", "python.org", "twitter.com"):
            self.assertIsInstance(urls[i], list)
            self.assertEqual(len(urls[i]), 2)

        urls = utils.urls_extractor(parser, body_unicode_error)
        self.assertIsInstance(urls, dict)
        self.assertIn("xn--90afavbplfx2a6a5b2a.xn--p1ai", urls)
        self.assertEqual(len(urls["xn--90afavbplfx2a6a5b2a.xn--p1ai"]), 1)
コード例 #17
0
def is_valid_url(url):
    cached = _cache_get(url, 'valid')
    key = date.today().isoformat() + '_submissions'
    r_cache.zincrby(key, url)
    if cached is not None:
        return cached
    fex = Faup()
    if url.startswith('hxxp'):
        url = 'http' + url[4:]
    elif not url.startswith('http'):
        url = 'http://' + url
    logging.debug("Checking validity of URL: " + url)
    fex.decode(url)
    scheme = fex.get_scheme()
    host = fex.get_host()
    if scheme is None or host is None:
        reason = "Not a valid http/https URL/URI"
        return False, url, reason
    _cache_set(url, (True, url, None), 'valid')
    return True, url, None
コード例 #18
0
    def __init__(self):
        super(WebStats, self).__init__()

        # Send module state to logs
        self.redis_logger.info("Module %s initialized" % (self.module_name))
        # Sent to the logging a description of the module
        self.redis_logger.info("Makes statistics about valid URL")

        self.pending_seconds = 5 * 60

        # REDIS #
        self.r_serv_trend = redis.StrictRedis(
            host=self.process.config.get("ARDB_Trending", "host"),
            port=self.process.config.get("ARDB_Trending", "port"),
            db=self.process.config.get("ARDB_Trending", "db"),
            decode_responses=True)

        # FILE CURVE SECTION #
        self.csv_path_proto = os.path.join(
            os.environ['AIL_HOME'],
            self.process.config.get("Directories", "protocolstrending_csv"))
        self.protocolsfile_path = os.path.join(
            os.environ['AIL_HOME'],
            self.process.config.get("Directories", "protocolsfile"))

        self.csv_path_tld = os.path.join(
            os.environ['AIL_HOME'],
            self.process.config.get("Directories", "tldstrending_csv"))
        self.tldsfile_path = os.path.join(
            os.environ['AIL_HOME'],
            self.process.config.get("Directories", "tldsfile"))

        self.csv_path_domain = os.path.join(
            os.environ['AIL_HOME'],
            self.process.config.get("Directories", "domainstrending_csv"))

        self.faup = Faup()
        self.generate_new_graph = False
コード例 #19
0
    def __init__(self):
        """
        Init Web
        """
        super(Web, self).__init__()

        # REDIS Cache
        self.r_serv2 = redis.StrictRedis(
            host=self.process.config.get("Redis_Cache", "host"),
            port=self.process.config.getint("Redis_Cache", "port"),
            db=self.process.config.getint("Redis_Cache", "db"),
            decode_responses=True)

        # Country to log as critical
        self.cc_critical = self.process.config.get("Url", "cc_critical")

        # FUNCTIONS #

        self.faup = Faup()

        # Protocol file path
        protocolsfile_path = os.path.join(os.environ['AIL_HOME'],
                                          self.process.config.get("Directories", "protocolsfile"))
        # Get all uri from protocolsfile (Used for Curve)
        uri_scheme = ""
        with open(protocolsfile_path, 'r') as scheme_file:
            for scheme in scheme_file:
                uri_scheme += scheme[:-1]+"|"
        uri_scheme = uri_scheme[:-1]

        self.url_regex = "((?i:"+uri_scheme + \
            ")\://(?:[a-zA-Z0-9\.\-]+(?:\:[a-zA-Z0-9\.&%\$\-]+)*@)*(?:(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|(?:[a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.(?:com|edu|gov|int|mil|net|org|biz|arpa|info|name|pro|aero|coop|museum|[a-zA-Z]{2}))(?:\:[0-9]+)*(?:/(?:$|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)"

        self.prec_filename = None

        # Send module state to logs
        self.redis_logger.info("Module %s initialized" % (self.module_name))
コード例 #20
0
ファイル: crawler.py プロジェクト: 5l1v3r1/OSINT-1
    def sort(self, elem_links, url):
        fex = Faup()
        f = Filters()
        f.load()
        self.r.switchDB(1)
        extend = True
        domainfilter = True
        schemefilter = True
        try:
            for link in elem_links:
                new_url = link
                self.r.switchDB(2)
                if not self.r.get(new_url) and new_url:
                    self.r.switchDB(1)
                    if not self.r.get(new_url):
                        fex.decode(new_url)
                        domain = fex.get_host()
                        if f.isfilteredscheme(fex.get_scheme()):
                            self.r.switchDB(2)
                            self.r.put(new_url, new_url)
                            schemefilter = False
                        if f.isfiltereddomains(domain):
                            self.r.switchDB(2)
                            self.r.put(new_url, new_url)
                            domainfilter = False
                        if f.isfilteredextention(fex.get_resource_path()):
                            extend = False
                            self.r.switchDB(2)
                            self.r.put(new_url, new_url)

                        if extend and domainfilter and schemefilter:
                            self.r.switchDB(1)
                            self.r.rpush('crawl', new_url)
                            self.queue.append(new_url)
        except TypeError as e:
            print "TypeError"
コード例 #21
0
REDIS_KEY_NUM_PATH = 'uniqNumForUsername'
REDIS_KEY_ALL_CRED_SET = 'AllCredentials'
REDIS_KEY_ALL_CRED_SET_REV = 'AllCredentialsRev'
REDIS_KEY_ALL_PATH_SET = 'AllPath'
REDIS_KEY_ALL_PATH_SET_REV = 'AllPathRev'
REDIS_KEY_MAP_CRED_TO_PATH = 'CredToPathMapping'

if __name__ == "__main__":
    publisher.port = 6380
    publisher.channel = "Script"
    config_section = "Credential"
    module_name = "Credential"
    p = Process(config_section)
    publisher.info("Find credentials")

    faup = Faup()

    regex_web = "((?:https?:\/\/)[\.-_0-9a-zA-Z]+\.[0-9a-zA-Z]+)"
    #regex_cred = "[a-zA-Z0-9._-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:[a-zA-Z0-9\_\-]+"
    regex_cred = "[a-zA-Z0-9\\._-]+@[a-zA-Z0-9\\.-]+\.[a-zA-Z]{2,6}[\\rn :\_\-]{1,10}[a-zA-Z0-9\_\-]+"
    regex_site_for_stats = "@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:"

    redis_cache_key = regex_helper.generate_redis_cache_key(module_name)

    while True:
        message = p.get_from_set()

        if message is None:
            publisher.debug("Script Credential is Idling 10s")
            time.sleep(10)
            continue
コード例 #22
0
 def initialize(self, stormconf, context):
     super(AbstractUrlsHandlerBolt, self).initialize(stormconf, context)
     self._load_whitelist()
     self._parser_faup = Faup()
コード例 #23
0
ファイル: mail_to_misp.py プロジェクト: jmlynch/mail_to_misp
        position = new_position
email_data = t_email_data

# Refang email data
email_data = refang(email_data)

## Extract various IOCs

urllist = list()
urllist += re.findall(urlmarker.WEB_URL_REGEX, email_data)
urllist += re.findall(urlmarker.IP_REGEX, email_data)
if debug:
    syslog.syslog(str(urllist))

# Init Faup
f = Faup()

# Add tags according to configuration
for malware in malwaretags:
    if malware in email_subject.lower():
        for tag in malwaretags[malware]:
            misp.add_tag(new_event, tag)

# Extract and add hashes
hashlist_md5 = re.findall(hashmarker.MD5_REGEX, email_data)
hashlist_sha1 = re.findall(hashmarker.SHA1_REGEX, email_data)
hashlist_sha256 = re.findall(hashmarker.SHA256_REGEX, email_data)

for h in hashlist_md5:
    misp.add_hashes(new_event, md5=h)
for h in hashlist_sha1:
コード例 #24
0
class TestPhishing(unittest.TestCase):
    faup = Faup()

    def setUp(self):
        parser = mailparser.parse_from_file(mail_thug)
        self.email = parser.mail
        self.attachments = parser.attachments

        parser = mailparser.parse_from_file(mail_form)
        self.email_form = parser.mail

        body = self.email_form.get("body")
        self.urls = utils.urls_extractor(body, self.faup)

        d = {
            "generic": "conf/keywords/targets.example.yml",
            "custom": "conf/keywords/targets_english.example.yml"
        }
        self.targets = utils.load_keywords_dict(d)

        d = {
            "generic": "conf/keywords/subjects.example.yml",
            "custom": "conf/keywords/subjects_english.example.yml"
        }
        self.subjects = utils.load_keywords_list(d)

    def test_ParserError(self):
        parser = mailparser.parse_from_file(mail_test_6)
        body = parser.mail.get("body")
        flag_form = phishing.check_form(body)
        self.assertFalse(flag_form)

    def test_none_values(self):
        email = copy.deepcopy(self.email)
        email.pop("body", None)
        email.pop("subjects", None)
        email.pop("from", None)

        phishing.check_phishing(email=email,
                                attachments=self.attachments,
                                urls_body=self.urls,
                                urls_attachments=self.urls,
                                target_keys=self.targets,
                                subject_keys=self.subjects)

    def test_check_form(self):
        body = self.email_form.get("body")
        flag_form = phishing.check_form(body)
        self.assertTrue(flag_form)

        body = self.email.get("body")
        flag_form = phishing.check_form(body)
        self.assertFalse(flag_form)

    def test_form_value_error(self):
        parser = mailparser.parse_from_file(mail_test_5)
        body = parser.mail.get("body")
        flag_form = phishing.check_form(body)
        self.assertFalse(flag_form)

    def test_check_urls(self):
        flag = False
        if any(
                phishing.check_urls(self.urls, i)
                for i in self.targets.values()):
            flag = True

        self.assertTrue(flag)

    def test_check_phishing(self):
        results = phishing.check_phishing(email=self.email,
                                          attachments=self.attachments,
                                          urls_body=self.urls,
                                          urls_attachments=self.urls,
                                          target_keys=self.targets,
                                          subject_keys=self.subjects)

        self.assertIsInstance(results, dict)
        self.assertEqual(results["score"], 123)
        self.assertIn("filename_attachments", results["score_expanded"])
        self.assertIn("mail_subject", results["score_expanded"])
        self.assertIn("mail_body", results["score_expanded"])
        self.assertIn("mail_from", results["score_expanded"])
        self.assertIn("urls_body", results["score_expanded"])
        self.assertIn("urls_attachments", results["score_expanded"])
        self.assertIn("Test", results["targets"])
        self.assertTrue(results["with_phishing"])

    def test_check_phishing_form(self):
        results = phishing.check_phishing(email=self.email_form,
                                          attachments=self.attachments,
                                          urls_body=self.urls,
                                          urls_attachments=self.urls,
                                          target_keys=self.targets,
                                          subject_keys=self.subjects)

        self.assertIn("mail_form", results["score_expanded"])
コード例 #25
0
ファイル: issue36.py プロジェクト: ylmrx/faup
#!/usr/bin/python

from pyfaup.faup import Faup

f = Faup()
f = Faup()

コード例 #26
0
 uuid = None
 # The following needs fixes for ExpandedPyMisp
 for attribs in res_search['response']['Attribute']:
     uuid = attribs['uuid']
 if uuid is not None:
     print("URL is already present.")
     # add sighting
     # if MISP allows to sight on add, we should implement it here, too
     misp.sighting(uuid=uuid, source="URLabuse")
     sys.exit(0)
 # This is obsolete
 #event = misp.get(misp_id)
 #existing_event = MISPEvent()
 #existing_event.load(event)
 redirect_count = 0
 fex = Faup()
 fex.decode(url)
 hostname = fex.get_host().lower()
 screenshot = hostname.decode() + '.png'
 mispObject = MISPObject('phishing')
 mispObject.add_attribute('hostname', value=hostname.decode())
 for key in response['result']:
     u = list(key.keys())[0]
     if redirect_count == 0:
         comment = "initial URL"
         mispObject.add_attribute('url', value=u, comment=comment)
     else:
         comment = "redirect URL: {}"
         mispObject.add_attribute('url-redirect',
                                  value=u,
                                  comment=comment.format(redirect_count))
コード例 #27
0
class TestUtils(unittest.TestCase):
    faup = Faup()

    def setUp(self):
        self.f = utils.reformat_output

        p = mailparser.parse_from_file(mail)
        self.mail_obj = p.mail
        self.mail_obj['analisys_date'] = datetime.datetime.utcnow().isoformat()

        self.attachments = MailAttachments.withhashes(p.attachments)
        self.attachments.run()

        self.parameters = {
            'elastic_index_mail': "spamscope_mails-",
            'elastic_type_mail': "spamscope",
            'elastic_index_attach': "spamscope_attachments-",
            'elastic_type_attach': "spamscope"
        }

    def test_mail_item(self):
        mail = utils.MailItem(filename=text_file,
                              mail_server="test_mail_server",
                              mailbox="test_mailbox",
                              priority=1,
                              trust="test_trust",
                              mail_type=1,
                              headers=["header1", "header2"])

        self.assertEqual(mail.filename, text_file)
        self.assertEqual(mail.mail_server, "test_mail_server")
        self.assertEqual(mail.mailbox, "test_mailbox")
        self.assertEqual(mail.priority, 1)
        self.assertEqual(mail.trust, "test_trust")
        self.assertIsInstance(mail.timestamp, float)
        self.assertEqual(mail.mail_type, 1)
        self.assertIsInstance(mail.headers, list)
        self.assertEqual(mail.headers, ["header1", "header2"])

        mail_1 = utils.MailItem(filename=text_file,
                                mail_server="test_mail_server",
                                mailbox="test_mailbox",
                                priority=1,
                                trust="test_trust")

        mail_2 = utils.MailItem(filename=text_file,
                                mail_server="test_mail_server",
                                mailbox="test_mailbox",
                                priority=2,
                                trust="test_trust")

        mail_3 = utils.MailItem(filename=text_file,
                                mail_server="test_mail_server",
                                mailbox="test_mailbox",
                                priority=1,
                                trust="test_trust")

        self.assertTrue(mail_1 < mail_2)
        self.assertFalse(mail_1 < mail_3)

    def test_load_conf(self):
        c = "conf/spamscope.example.yml"
        conf = utils.load_config(c)
        self.assertIsInstance(conf, dict)

        with self.assertRaises(RuntimeError):
            utils.load_config("conf/fake.yml")

    def test_write_payload(self):
        with open(text_file) as f:
            payload = f.read()
        sha1_origin = fingerprints(payload).sha1

        file_path = utils.write_payload(payload.encode("base64"), ".txt")
        self.assertEqual(os.path.splitext(file_path)[-1], ".txt")

        with open(file_path) as f:
            payload = f.read()
        sha1_clone = fingerprints(payload).sha1

        self.assertEqual(sha1_origin, sha1_clone)
        self.assertTrue(os.path.exists(file_path))

        os.remove(file_path)
        self.assertFalse(os.path.exists(file_path))

        p = mailparser.parse_from_file(mail_test_11)
        attachments = MailAttachments.withhashes(p.attachments)
        attachments.run()

        for i in attachments:
            temp = utils.write_payload(
                i["payload"],
                i["extension"],
                i["content_transfer_encoding"],
            )
            os.remove(temp)

    def test_search_words_in_text(self):
        with open(text_file) as f:
            text = f.read()

        keywords_1 = ["nomatch", "nomatch"]
        self.assertEqual(utils.search_words_in_text(text, keywords_1), False)

        keywords_2 = ["nomatch", "nomatch", "theophrastus rationibus"]
        self.assertEqual(utils.search_words_in_text(text, keywords_2), True)

        keywords_3 = ["nomatch", "theophrastus nomatch"]
        self.assertEqual(utils.search_words_in_text(text, keywords_3), False)

        keywords_4 = ["theophrastus quo vidit"]
        self.assertEqual(utils.search_words_in_text(text, keywords_4), True)

        keywords_5 = [12345678]
        self.assertEqual(utils.search_words_in_text(text, keywords_5), True)

        keywords_6 = [11111, 44444]
        self.assertEqual(utils.search_words_in_text(text, keywords_6), True)

    def test_reformat_output_first(self):

        with self.assertRaises(RuntimeError):
            self.f(mail=self.mail_obj)

        with self.assertRaises(KeyError):
            self.f(mail=self.mail_obj, bolt="output-elasticsearch")

        m, a = self.f(mail=self.mail_obj,
                      bolt="output-elasticsearch",
                      **self.parameters)

        # Attachments
        self.assertIsInstance(a, list)
        self.assertEqual(len(a), 1)
        self.assertIsInstance(a[0], dict)
        self.assertIn('@timestamp', m)
        self.assertIn('_index', a[0])
        self.assertIn('_type', a[0])
        self.assertIn('type', a[0])

        # Mail
        self.assertIsInstance(m, dict)
        self.assertIn('@timestamp', m)
        self.assertIn('_index', m)
        self.assertIn('_type', m)
        self.assertIn('type', m)

    def test_reformat_output_second(self):
        m = copy.deepcopy(self.mail_obj)
        m['attachments'] = list(self.attachments)

        m, a = self.f(mail=m, bolt="output-elasticsearch", **self.parameters)

        # Attachments
        self.assertIsInstance(a, list)
        self.assertEqual(len(a), 2)

        self.assertIsInstance(a[0], dict)
        self.assertIn('@timestamp', a[0])
        self.assertIn('_index', a[0])
        self.assertIn('_type', a[0])
        self.assertIn('type', a[0])
        self.assertIn('payload', a[0])
        self.assertEqual(a[0]['is_archived'], True)

        self.assertIsInstance(a[1], dict)
        self.assertIn('@timestamp', a[1])
        self.assertIn('_index', a[1])
        self.assertIn('_type', a[1])
        self.assertIn('type', a[1])
        self.assertIn('files', a[1])
        self.assertIn('payload', a[1])
        # self.assertIn('tika', a[1])
        self.assertNotIn('payload', a[1]['files'][0])
        self.assertEqual(a[1]['is_archived'], False)
        self.assertEqual(a[1]['is_archive'], True)

        # Mail
        self.assertIsInstance(m, dict)
        self.assertIn('@timestamp', m)

    def test_reformat_output_third(self):
        m = copy.deepcopy(self.mail_obj)
        m['attachments'] = list(self.attachments)

        m, a = self.f(mail=m, bolt="output-redis")

        # Attachments
        self.assertIsInstance(a, list)
        self.assertEqual(len(a), 2)

        self.assertIsInstance(a[0], dict)
        self.assertNotIn('@timestamp', a[0])
        self.assertNotIn('_index', a[0])
        self.assertNotIn('_type', a[0])
        self.assertNotIn('type', a[0])
        self.assertIn('payload', a[0])
        self.assertEqual(a[0]['is_archived'], True)

        self.assertIsInstance(a[1], dict)
        self.assertNotIn('@timestamp', a[1])
        self.assertNotIn('_index', a[1])
        self.assertNotIn('_type', a[1])
        self.assertNotIn('type', a[1])
        self.assertIn('files', a[1])
        self.assertIn('payload', a[1])
        # self.assertIn('tika', a[1])
        self.assertNotIn('payload', a[1]['files'][0])
        self.assertEqual(a[1]['is_archived'], False)
        self.assertEqual(a[1]['is_archive'], True)

        # Mail
        self.assertIsInstance(m, dict)
        self.assertNotIn('@timestamp', m)
        self.assertNotIn('_index', m)
        self.assertNotIn('_type', m)
        self.assertNotIn('type', m)

    def test_load_keywords_list(self):
        d = {
            "generic": "conf/keywords/subjects.example.yml",
            "custom": "conf/keywords/subjects_english.example.yml"
        }
        results = utils.load_keywords_list(d)
        self.assertIsInstance(results, set)
        self.assertIn("fattura", results)
        self.assertIn("conferma", results)
        self.assertIn("123456", results)
        self.assertNotIn(123456, results)

        with self.assertRaises(RuntimeError):
            d = {"generic": "conf/keywords/targets.example.yml"}
            results = utils.load_keywords_list(d)

    def test_load_keywords_dict(self):
        d = {
            "generic": "conf/keywords/targets.example.yml",
            "custom": "conf/keywords/targets_english.example.yml"
        }
        results = utils.load_keywords_dict(d)
        self.assertIsInstance(results, dict)
        self.assertIn("Banca Tizio", results)
        self.assertNotIn("banca tizio", results)
        self.assertIn("tizio", results["Banca Tizio"])
        self.assertIn("caio rossi", results["Banca Tizio"])
        self.assertNotIn(12345, results["Banca Tizio"])
        self.assertIn("12345", results["Banca Tizio"])
        self.assertNotIn("123", results["Banca Tizio"])
        self.assertNotIn(123, results["Banca Tizio"])
        self.assertIn("123 456", results["Banca Tizio"])

        with self.assertRaises(RuntimeError):
            d = {"generic": "conf/keywords/subjects.example.yml"}
            results = utils.load_keywords_dict(d)

    def test_urls_extractor(self):

        body = """
        bla bla https://tweetdeck.twitter.com/random bla bla
        http://kafka.apache.org/documentation.html
        http://kafka.apache.org/documentation1.html
        bla bla bla https://docs.python.org/2/library/re.html bla bla
        bla bla bla https://docs.python.org/2/library/re_2.html> bla bla
        <p>https://tweetdeck.twitter.com/random</p> bla bla
        <p>https://tweetdeck.twitter.com/random_2</p>
        """

        body_unicode_error = """
        Return-Path: <>
        Delivered-To: [email protected]
        Received: (qmail 15482 invoked from network); 29 Nov 2015 12:28:40 -000
        Received: from unknown (HELO 112.149.154.61) (112.149.154.61)
        by smtp.customers.net with SMTP; 29 Nov 2015 12:28:40 -0000
        Received: from unknown (HELO localhost)
            ([email protected]@110.68.103.81)
                by 112.149.154.61 with ESMTPA; Sun, 29 Nov 2015 21:29:24 +0900
                From: [email protected]
                To: [email protected]
                Subject: Gain your male attrctiveness

                Give satisfaction to your loved one
                http://contents.xn--90afavbplfx2a6a5b2a.xn--p1ai/
        """

        urls = utils.urls_extractor(body, self.faup)
        self.assertIsInstance(urls, dict)
        self.assertIn("apache.org", urls)
        self.assertIn("python.org", urls)
        self.assertIn("twitter.com", urls)

        for i in ("apache.org", "python.org", "twitter.com"):
            self.assertIsInstance(urls[i], list)
            self.assertEqual(len(urls[i]), 2)

        urls = utils.urls_extractor(body_unicode_error, self.faup)
        self.assertIsInstance(urls, dict)
        self.assertIn("xn--90afavbplfx2a6a5b2a.xn--p1ai", urls)
        self.assertEqual(len(urls["xn--90afavbplfx2a6a5b2a.xn--p1ai"]), 1)

    def test_load_whitelist(self):
        d = {"generic": {"path": "conf/whitelists/generic.example.yml"}}
        results = utils.load_whitelist(d)
        self.assertIsInstance(results, set)
        self.assertIn("google.com", results)
        self.assertIn("amazon.com", results)
        self.assertIn("facebook.com", results)

        d = {
            "generic": {
                "path": "conf/whitelists/generic.example.yml",
                "expiry": None
            }
        }
        results = utils.load_whitelist(d)
        self.assertIsInstance(results, set)
        self.assertIn("google.com", results)
        self.assertIn("amazon.com", results)
        self.assertIn("facebook.com", results)

        d = {
            "generic": {
                "path": "conf/whitelists/generic.example.yml",
                "expiry": "2016-06-28T12:33:00.000Z"
            }
        }
        results = utils.load_whitelist(d)
        self.assertIsInstance(results, set)
        self.assertEqual(len(results), 0)

    def test_text2urls_whitelisted(self):

        body = """
        bla bla https://tweetdeck.twitter.com/random bla bla
        http://kafka.apache.org/documentation.html
        http://kafka.apache.org/documentation1.html
        bla bla bla https://docs.python.org/2/library/re.html bla bla
        bla bla bla https://docs.python.org/2/library/re_2.html> bla bla
        <p>https://tweetdeck.twitter.com/random</p> bla bla
        <p>https://tweetdeck.twitter.com/random_2</p>
        """

        d = {"generic": {"path": "conf/whitelists/generic.example.yml"}}
        whitelist = utils.load_whitelist(d)
        urls = utils.text2urls_whitelisted(body, whitelist, self.faup)

        self.assertIsInstance(urls, dict)
        self.assertNotIn("apache.org", urls)
        self.assertIn("python.org", urls)
        self.assertIsInstance(urls["python.org"], list)
        self.assertIn("twitter.com", urls)
        self.assertIsInstance(urls["twitter.com"], list)

    def test_text2urls_whitelisted_nonetype_error(self):
        p = mailparser.parse_from_file(mail_test_7)
        body = p.body
        urls = utils.urls_extractor(body, self.faup)

        for k in urls:
            self.assertIsNotNone(k)

        d = {"generic": {"path": "conf/whitelists/generic.example.yml"}}
        whitelist = utils.load_whitelist(d)

        utils.text2urls_whitelisted(body, whitelist, self.faup)

    def test_reformat_urls(self):

        body = """
        bla bla https://tweetdeck.twitter.com/random bla bla
        http://kafka.apache.org/documentation.html
        http://kafka.apache.org/documentation1.html
        bla bla bla https://docs.python.org/2/library/re.html bla bla
        bla bla bla https://docs.python.org/2/library/re_2.html> bla bla
        <p>https://tweetdeck.twitter.com/random</p> bla bla
        <p>https://tweetdeck.twitter.com/random_2</p>
        """

        d = {"generic": {"path": "conf/whitelists/generic.example.yml"}}
        whitelist = utils.load_whitelist(d)
        urls = utils.text2urls_whitelisted(body, whitelist, self.faup)
        self.assertIsInstance(urls, dict)

        urls = utils.reformat_urls(urls)
        self.assertIsInstance(urls, list)

        with self.assertRaises(TypeError):
            utils.reformat_urls(dict)

    def test_timeout(self):
        with self.assertRaises(utils.TimeoutError):
            sleeping()

    def test_register_order(self):
        register = utils.register
        processors = set()

        @register(processors, priority=2)
        def number_two():
            pass

        @register(processors, priority=1)
        def number_one():
            pass

        @register(processors, priority=4)
        def number_four():
            pass

        @register(processors, priority=3)
        def number_three():
            pass

        processors = [i[0] for i in sorted(processors, key=itemgetter(1))]

        self.assertIs(processors[0], number_one)
        self.assertIs(processors[1], number_two)
        self.assertIs(processors[2], number_three)
        self.assertIs(processors[3], number_four)

    def test_is_file_older_than(self):
        r = utils.is_file_older_than(text_file, seconds=20)
        self.assertTrue(r)
        r = utils.is_file_older_than(text_file, seconds=3153600000)
        self.assertFalse(r)

    def test_dump_load(self):
        path = "/tmp/object.dump"
        d = deque(maxlen=5)
        d.append(1)
        d.append(2)
        self.assertIsInstance(d, deque)
        utils.dump_obj(path, d)
        d_dumped = utils.load_obj(path)
        self.assertIsInstance(d_dumped, deque)
        self.assertEqual(d, d_dumped)
コード例 #28
0
 def __init__(self, loglevel: int = logging.DEBUG):
     self.__init_logger(loglevel)
     self.fex = Faup()
     self.cache = Redis(unix_socket_path=get_socket_path('cache'),
                        db=1,
                        decode_responses=True)
コード例 #29
0
 def __init__(self):
     self._url_regex = re.compile(
         r'((?:(?:ht|f)tp(?:s?)\:\/\/)'
         r'(?:[!#$&-;=?-\[\]_a-z~]|%[0-9a-f]{2})+)', re.I)
     self._faup = Faup()
コード例 #30
0
ファイル: datamodels.py プロジェクト: jgru/spamtrap-system
 def get_port(self):
     f = Faup()
     f.decode(self.url)
     return f.get_port()