def get_urls(url, depth=1): if depth > 5: print('Too many redirects.') return fex = Faup() def meta_redirect(content): c = content.lower() soup = BeautifulSoup(c) for result in soup.find_all(attrs={'http-equiv': 'refresh'}): if result: out = result["content"].split(";") if len(out) == 2: wait, text = out a, url = text.split('=', 1) return url.strip() return None resolve, reason = try_resolve(fex, url) if not resolve: # FIXME: inform that the domain does not resolve yield url return logging.debug("Making HTTP connection to " + url) headers = {'User-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:8.0) Gecko/20100101 Firefox/8.0'} try: response = requests.get(url, allow_redirects=True, headers=headers, timeout=15, verify=False) except: # That one can fail (DNS for example) # FIXME: inform that the get failed yield url return if response.history is not None: for h in response.history: # Yeld the urls in the order we find them yield h.url yield response.url meta_redir_url = meta_redirect(response.content) if meta_redir_url is not None: depth += 1 if not meta_redir_url.startswith('http'): fex.decode(url) base = '{}://{}'.format(fex.get_scheme(), fex.get_host()) port = fex.get_port() if port is not None: base += ':{}'.format(port) if not meta_redir_url.startswith('/'): # relative redirect. resource_path has the initial '/' if fex.get_resource_path() is not None: base += fex.get_resource_path() if not base.endswith('/'): base += '/' meta_redir_url = base + meta_redir_url for url in get_urls(meta_redir_url, depth): yield url
def get_urls(url, depth=1): if depth > 5: print('Too many redirects.') return fex = Faup() def meta_redirect(content): c = content.lower() soup = BeautifulSoup(c, "html.parser") for result in soup.find_all(attrs={'http-equiv': 'refresh'}): if result: out = result["content"].split(";") if len(out) == 2: wait, text = out a, url = text.split('=', 1) return url.strip() return None resolve, reason = try_resolve(fex, url) if not resolve: # FIXME: inform that the domain does not resolve yield url return logging.debug("Making HTTP connection to " + url) headers = {'User-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:8.0) Gecko/20100101 Firefox/8.0'} try: response = requests.get(url, allow_redirects=True, headers=headers, timeout=15, verify=False) except: # That one can fail (DNS for example) # FIXME: inform that the get failed yield url return if response.history is not None: for h in response.history: # Yeld the urls in the order we find them yield h.url yield response.url meta_redir_url = meta_redirect(response.content) if meta_redir_url is not None: depth += 1 if not meta_redir_url.startswith('http'): fex.decode(url) base = '{}://{}'.format(fex.get_scheme(), fex.get_host()) port = fex.get_port() if port is not None: base += ':{}'.format(port) if not meta_redir_url.startswith('/'): # relative redirect. resource_path has the initial '/' if fex.get_resource_path() is not None: base += fex.get_resource_path() if not base.endswith('/'): base += '/' meta_redir_url = base + meta_redir_url for url in get_urls(meta_redir_url, depth): yield url
def __post_init__(self): f = Faup( ) # Example code at https://programtalk.com/python-examples-amp/pyfaup.faup.Faup/ f.decode(self.url) self.scheme = f.get_scheme() self.top_level_domain = f.get_tld() self.domain = f.get_domain() self.subdomain = f.get_subdomain() self.path = f.get_resource_path()
def sort(self, elem_links, url): fex = Faup() f = Filters() f.load() self.r.switchDB(1) extend = True domainfilter = True schemefilter = True try: for link in elem_links: new_url = link self.r.switchDB(2) if not self.r.get(new_url) and new_url: self.r.switchDB(1) if not self.r.get(new_url): fex.decode(new_url) domain = fex.get_host() if f.isfilteredscheme(fex.get_scheme()): self.r.switchDB(2) self.r.put(new_url, new_url) schemefilter = False if f.isfiltereddomains(domain): self.r.switchDB(2) self.r.put(new_url, new_url) domainfilter = False if f.isfilteredextention(fex.get_resource_path()): extend = False self.r.switchDB(2) self.r.put(new_url, new_url) if extend and domainfilter and schemefilter: self.r.switchDB(1) self.r.rpush('crawl', new_url) self.queue.append(new_url) except TypeError as e: print "TypeError"
class Mail2MISP(): def __init__(self, misp_url, misp_key, verifycert, config, offline=False, urlsonly=False): self.offline = offline if not self.offline: self.misp = ExpandedPyMISP(misp_url, misp_key, verifycert, debug=config.debug) self.config = config self.urlsonly = urlsonly if not hasattr(self.config, 'enable_dns'): setattr(self.config, 'enable_dns', True) if self.urlsonly is False: setattr(self.config, 'enable_dns', False) self.debug = self.config.debug self.config_from_email_body = {} # Init Faup self.f = Faup() self.sightings_to_add = [] def load_email(self, pseudofile): self.pseudofile = pseudofile self.original_mail = message_from_bytes(self.pseudofile.getvalue(), policy=policy.default) self.subject = self.original_mail.get('Subject') try: self.sender = self.original_mail.get('From') except: self.sender = "<unknown sender>" # Remove words from subject for removeword in self.config.removelist: self.subject = re.sub(removeword, "", self.subject).strip() # Initialize the MISP event self.misp_event = MISPEvent() self.misp_event.info = f'{self.config.email_subject_prefix} - {self.subject}' self.misp_event.distribution = self.config.default_distribution self.misp_event.threat_level_id = self.config.default_threat_level self.misp_event.analysis = self.config.default_analysis def sighting(self, value, source): if self.offline: raise Exception('The script is running in offline mode, ') '''Add a sighting''' s = MISPSighting() s.from_dict(value=value, source=source) self.misp.add_sighting(s) def _find_inline_forward(self): '''Does the body contains a forwarded email?''' for identifier in self.config.forward_identifiers: if identifier in self.clean_email_body: self.clean_email_body, fw_email = self.clean_email_body.split( identifier) return self.forwarded_email( pseudofile=BytesIO(fw_email.encode())) def _find_attached_forward(self): forwarded_emails = [] for attachment in self.original_mail.iter_attachments(): attachment_content = attachment.get_content() # Search for email forwarded as attachment # I could have more than one, attaching everything. if isinstance(attachment_content, message.EmailMessage): forwarded_emails.append( self.forwarded_email( pseudofile=BytesIO(attachment_content.as_bytes()))) else: if isinstance(attachment_content, str): attachment_content = attachment_content.encode() filename = attachment.get_filename() if not filename: filename = 'missing_filename' if self.config_from_email_body.get( 'attachment' ) == self.config.m2m_benign_attachment_keyword: # Attach sane file self.misp_event.add_attribute( 'attachment', value=filename, data=BytesIO(attachment_content)) else: f_object, main_object, sections = make_binary_objects( pseudofile=BytesIO(attachment_content), filename=filename, standalone=False) self.misp_event.add_object(f_object) if main_object: self.misp_event.add_object(main_object) [ self.misp_event.add_object(section) for section in sections ] return forwarded_emails def email_from_spamtrap(self): '''The email comes from a spamtrap and should be attached as-is.''' raw_body = self.original_mail.get_body(preferencelist=('html', 'plain')) if raw_body: self.clean_email_body = html.unescape( raw_body.get_payload(decode=True).decode( 'utf8', 'surrogateescape')) else: self.clean_email_body = '' return self.forwarded_email(self.pseudofile) def forwarded_email(self, pseudofile: BytesIO): '''Extracts all possible indicators out of an email and create a MISP event out of it. * Gets all relevant Headers * Attach the body * Create MISP file objects (uses lief if possible) * Set all references ''' email_object = EMailObject(pseudofile=pseudofile, attach_original_mail=True, standalone=False) if email_object.attachments: # Create file objects for the attachments for attachment_name, attachment in email_object.attachments: if not attachment_name: attachment_name = 'NameMissing.txt' if self.config_from_email_body.get( 'attachment' ) == self.config.m2m_benign_attachment_keyword: a = self.misp_event.add_attribute('attachment', value=attachment_name, data=attachment) email_object.add_reference(a.uuid, 'related-to', 'Email attachment') else: f_object, main_object, sections = make_binary_objects( pseudofile=attachment, filename=attachment_name, standalone=False) if self.config.vt_key: try: vt_object = VTReportObject( self.config.vt_key, f_object.get_attributes_by_relation( 'sha256')[0].value, standalone=False) self.misp_event.add_object(vt_object) f_object.add_reference(vt_object.uuid, 'analysed-with') except InvalidMISPObject as e: print(e) pass self.misp_event.add_object(f_object) if main_object: self.misp_event.add_object(main_object) for section in sections: self.misp_event.add_object(section) email_object.add_reference(f_object.uuid, 'related-to', 'Email attachment') self.process_body_iocs(email_object) if self.config.spamtrap or self.config.attach_original_mail or self.config_from_email_body.get( 'attach_original_mail'): self.misp_event.add_object(email_object) return email_object def process_email_body(self): mail_as_bytes = self.original_mail.get_body( preferencelist=('html', 'plain')).get_payload(decode=True) if mail_as_bytes: self.clean_email_body = html.unescape( mail_as_bytes.decode('utf8', 'surrogateescape')) # Check if there are config lines in the body & convert them to a python dictionary: # <config.body_config_prefix>:<key>:<value> => {<key>: <value>} self.config_from_email_body = { k.strip(): v.strip() for k, v in re.findall( f'{self.config.body_config_prefix}:(.*):(.*)', self.clean_email_body) } if self.config_from_email_body: # ... remove the config lines from the body self.clean_email_body = re.sub( rf'^{self.config.body_config_prefix}.*\n?', '', html.unescape( self.original_mail.get_body( preferencelist=('html', 'plain')).get_payload( decode=True).decode('utf8', 'surrogateescape')), flags=re.MULTILINE) # Check if autopublish key is present and valid if self.config_from_email_body.get( 'm2mkey') == self.config.m2m_key: if self.config_from_email_body.get('distribution') is not None: self.misp_event.distribution = self.config_from_email_body.get( 'distribution') if self.config_from_email_body.get('threat_level') is not None: self.misp_event.threat_level_id = self.config_from_email_body.get( 'threat_level') if self.config_from_email_body.get('analysis') is not None: self.misp_event.analysis = self.config_from_email_body.get( 'analysis') if self.config_from_email_body.get('publish'): self.misp_event.publish() self._find_inline_forward() else: self.clean_email_body = '' self._find_attached_forward() def process_body_iocs(self, email_object=None): if email_object: body = html.unescape( email_object.email.get_body( preferencelist=('html', 'plain')).get_payload(decode=True).decode( 'utf8', 'surrogateescape')) else: body = self.clean_email_body # Cleanup body content # Depending on the source of the mail, there is some cleanup to do. Ignore lines in body of message for ignoreline in self.config.ignorelist: body = re.sub(rf'^{ignoreline}.*\n?', '', body, flags=re.MULTILINE) # Remove everything after the stopword from the body body = body.split(self.config.stopword, 1)[0] # Add tags to the event if keywords are found in the mail for tag in self.config.tlptags: for alternativetag in self.config.tlptags[tag]: if alternativetag in body.lower(): self.misp_event.add_tag(tag) # Prepare extraction of IOCs # Refang email data body = refang(body) # Extract and add hashes contains_hash = False for h in set(re.findall(hashmarker.MD5_REGEX, body)): contains_hash = True attribute = self.misp_event.add_attribute( 'md5', h, enforceWarninglist=self.config.enforcewarninglist) if email_object: email_object.add_reference(attribute.uuid, 'contains') if self.config.sighting: self.sightings_to_add.append((h, self.config.sighting_source)) for h in set(re.findall(hashmarker.SHA1_REGEX, body)): contains_hash = True attribute = self.misp_event.add_attribute( 'sha1', h, enforceWarninglist=self.config.enforcewarninglist) if email_object: email_object.add_reference(attribute.uuid, 'contains') if self.config.sighting: self.sightings_to_add.append((h, self.config.sighting_source)) for h in set(re.findall(hashmarker.SHA256_REGEX, body)): contains_hash = True attribute = self.misp_event.add_attribute( 'sha256', h, enforceWarninglist=self.config.enforcewarninglist) if email_object: email_object.add_reference(attribute.uuid, 'contains') if self.config.sighting: self.sightings_to_add.append((h, self.config.sighting_source)) if contains_hash: [ self.misp_event.add_tag(tag) for tag in self.config.hash_only_tags ] # # Extract network IOCs urllist = [] urllist += re.findall(urlmarker.WEB_URL_REGEX, body) urllist += re.findall(urlmarker.IP_REGEX, body) if self.debug: syslog.syslog(str(urllist)) hostname_processed = [] # Add IOCs and expanded information to MISP for entry in set(urllist): ids_flag = True self.f.decode(entry) domainname = self.f.get_domain() if domainname in self.config.excludelist: # Ignore the entry continue hostname = self.f.get_host() scheme = self.f.get_scheme() if scheme: scheme = scheme resource_path = self.f.get_resource_path() if resource_path: resource_path = resource_path if self.debug: syslog.syslog(domainname) if domainname in self.config.internallist and self.urlsonly is False: # Add link to internal reference unless in urlsonly mode attribute = self.misp_event.add_attribute( 'link', entry, category='Internal reference', to_ids=False, enforceWarninglist=False) if email_object: email_object.add_reference(attribute.uuid, 'contains') elif domainname in self.config.externallist or self.urlsonly is False: # External analysis attribute = self.misp_event.add_attribute( 'link', entry, category='External analysis', to_ids=False, enforceWarninglist=False) if email_object: email_object.add_reference(attribute.uuid, 'contains') elif domainname in self.config.externallist or self.urlsonly: # External analysis if self.urlsonly: comment = self.subject + " (from: " + self.sender + ")" else: comment = "" attribute = self.misp.add_attribute( self.urlsonly, { "type": 'link', "value": entry, "category": 'External analysis', "to_ids": False, "comment": comment }) for tag in self.config.tlptags: for alternativetag in self.config.tlptags[tag]: if alternativetag in self.subject.lower(): self.misp.tag(attribute["uuid"], tag) new_subject = comment.replace(alternativetag, '') self.misp.change_comment(attribute["uuid"], new_subject) else: # The URL is probably an indicator. comment = "" if (domainname in self.config.noidsflaglist) or ( hostname in self.config.noidsflaglist): ids_flag = False comment = "Known host (mostly for connectivity test or IP lookup)" if self.debug: syslog.syslog(str(entry)) if scheme: if is_ip(hostname): attribute = self.misp_event.add_attribute( 'url', entry, to_ids=False, enforceWarninglist=self.config.enforcewarninglist) if email_object: email_object.add_reference(attribute.uuid, 'contains') else: if resource_path: # URL has path, ignore warning list attribute = self.misp_event.add_attribute( 'url', entry, to_ids=ids_flag, enforceWarninglist=False, comment=comment) if email_object: email_object.add_reference( attribute.uuid, 'contains') else: # URL has no path attribute = self.misp_event.add_attribute( 'url', entry, to_ids=ids_flag, enforceWarninglist=self.config. enforcewarninglist, comment=comment) if email_object: email_object.add_reference( attribute.uuid, 'contains') if self.config.sighting: self.sightings_to_add.append( (entry, self.config.sighting_source)) if hostname in hostname_processed: # Hostname already processed. continue hostname_processed.append(hostname) if self.config.sighting: self.sightings_to_add.append( (hostname, self.config.sighting_source)) if self.debug: syslog.syslog(hostname) comment = '' port = self.f.get_port() if port: port = port comment = f'on port: {port}' if is_ip(hostname): attribute = self.misp_event.add_attribute( 'ip-dst', hostname, to_ids=ids_flag, enforceWarninglist=self.config.enforcewarninglist, comment=comment) if email_object: email_object.add_reference(attribute.uuid, 'contains') else: related_ips = [] if HAS_DNS and self.config.enable_dns: try: syslog.syslog(hostname) for rdata in dns.resolver.query(hostname, 'A'): if self.debug: syslog.syslog(str(rdata)) related_ips.append(rdata.to_text()) except Exception as e: if self.debug: syslog.syslog(str(e)) if related_ips: hip = MISPObject(name='ip-port') hip.add_attribute( 'hostname', value=hostname, to_ids=ids_flag, enforceWarninglist=self.config.enforcewarninglist, comment=comment) for ip in set(related_ips): hip.add_attribute('ip', type='ip-dst', value=ip, to_ids=False, enforceWarninglist=self.config. enforcewarninglist) self.misp_event.add_object(hip) if email_object: email_object.add_reference(hip.uuid, 'contains') else: if self.urlsonly is False: attribute = self.misp_event.add_attribute( 'hostname', value=hostname, to_ids=ids_flag, enforceWarninglist=self.config. enforcewarninglist, comment=comment) if email_object: email_object.add_reference(attribute.uuid, 'contains') def add_event(self): '''Add event on the remote MISP instance.''' # Add additional tags depending on others tags = [] for tag in [t.name for t in self.misp_event.tags]: if self.config.dependingtags.get(tag): tags += self.config.dependingtags.get(tag) # Add additional tags according to configuration for malware in self.config.malwaretags: if malware.lower() in self.subject.lower(): tags += self.config.malwaretags.get(malware) if tags: [self.misp_event.add_tag(tag) for tag in tags] has_tlp_tag = False for tag in [t.name for t in self.misp_event.tags]: if tag.lower().startswith('tlp'): has_tlp_tag = True if not has_tlp_tag: self.misp_event.add_tag(self.config.tlptag_default) if self.offline: return self.misp_event.to_json() event = self.misp.add_event(self.misp_event, pythonify=True) if self.config.sighting: for value, source in self.sightings_to_add: self.sighting(value, source) return event
class Query(): def __init__(self, loglevel: int = logging.DEBUG): self.__init_logger(loglevel) self.fex = Faup() self.cache = Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True) def __init_logger(self, loglevel) -> None: self.logger = logging.getLogger(f'{self.__class__.__name__}') self.logger.setLevel(loglevel) def _cache_set(self, key, value, field=None): if field is None: self.cache.setex(key, json.dumps(value), 3600) else: self.cache.hset(key, field, json.dumps(value)) self.cache.expire(key, 3600) def _cache_get(self, key, field=None): if field is None: value_json = self.cache.get(key) else: value_json = self.cache.hget(key, field) if value_json is not None: return json.loads(value_json) return None def to_bool(self, s): """ Converts the given string to a boolean. """ return s.lower() in ('1', 'true', 'yes', 'on') def get_submissions(self, url, day=None): if day is None: day = date.today().isoformat() else: day = day.isoformat() return self.cache.zscore(f'{day}_submissions', url) def get_mail_sent(self, url, day=None): if day is None: day = date.today().isoformat() else: day = day.isoformat() self.fex.decode(url) host = self.fex.get_host() return self.cache.sismember(f'{day}_mails', host) def set_mail_sent(self, url, day=None): if day is None: day = date.today().isoformat() else: day = day.isoformat() self.fex.decode(url) host = self.fex.get_host() return self.cache.sadd(f'{day}_mails', host) def is_valid_url(self, url): cached = self._cache_get(url, 'valid') key = f'{date.today().isoformat()}_submissions' self.cache.zincrby(key, 1, url) if cached is not None: return cached if url.startswith('hxxp'): url = 'http' + url[4:] elif not url.startswith('http'): url = 'http://' + url logging.debug("Checking validity of URL: " + url) self.fex.decode(url) scheme = self.fex.get_scheme() host = self.fex.get_host() if scheme is None or host is None: reason = "Not a valid http/https URL/URI" return False, url, reason self._cache_set(url, (True, url, None), 'valid') return True, url, None def is_ip(self, host): try: ipaddress.ip_address(host) return True except ValueError: return False def try_resolve(self, url): self.fex.decode(url) host = self.fex.get_host().lower() if self.is_ip(host): return True, None try: ipaddr = dns.resolver.query(host, 'A') except Exception: reason = "DNS server problem. Check resolver settings." return False, reason if not ipaddr: reason = "Host " + host + " does not exist." return False, reason return True, None def get_urls(self, url, depth=1): if depth > 5: print('Too many redirects.') return def meta_redirect(content): c = content.lower() soup = BeautifulSoup(c, "html.parser") for result in soup.find_all(attrs={'http-equiv': 'refresh'}): if result: out = result["content"].split(";") if len(out) == 2: wait, text = out try: a, url = text.split('=', 1) return url.strip() except Exception: print(text) return None resolve, reason = self.try_resolve(url) if not resolve: # FIXME: inform that the domain does not resolve yield url return logging.debug(f"Making HTTP connection to {url}") headers = { 'User-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:8.0) Gecko/20100101 Firefox/8.0' } try: response = requests.get(url, allow_redirects=True, headers=headers, timeout=15, verify=False) except Exception: # That one can fail (DNS for example) # FIXME: inform that the get failed yield url return if response.history is not None: for h in response.history: # Yeld the urls in the order we find them yield h.url yield response.url meta_redir_url = meta_redirect(response.content) if meta_redir_url is not None: depth += 1 if not meta_redir_url.startswith('http'): self.fex.decode(url) base = '{}://{}'.format(self.fex.get_scheme(), self.fex.get_host()) port = self.fex.get_port() if port is not None: base += f':{port}' if not meta_redir_url.startswith('/'): # relative redirect. resource_path has the initial '/' if self.fex.get_resource_path() is not None: base += self.fex.get_resource_path() if not base.endswith('/'): base += '/' meta_redir_url = base + meta_redir_url for url in self.get_urls(meta_redir_url, depth): yield url def url_list(self, url): cached = self._cache_get(url, 'list') if cached is not None: return cached list_urls = [] for u in self.get_urls(url): if u is None or u in list_urls: continue list_urls.append(u) self._cache_set(url, list_urls, 'list') return list_urls def dns_resolve(self, url): cached = self._cache_get(url, 'dns') if cached is not None: return cached self.fex.decode(url) host = self.fex.get_host().lower() ipv4 = None ipv6 = None if self.is_ip(host): if ':' in host: try: socket.inet_pton(socket.AF_INET6, host) ipv6 = [host] except Exception: pass else: try: socket.inet_aton(host) ipv4 = [host] except Exception: pass else: try: ipv4 = [str(ip) for ip in dns.resolver.query(host, 'A')] except Exception: logging.debug("No IPv4 address assigned to: " + host) try: ipv6 = [str(ip) for ip in dns.resolver.query(host, 'AAAA')] except Exception: logging.debug("No IPv6 address assigned to: " + host) self._cache_set(url, (ipv4, ipv6), 'dns') return ipv4, ipv6 def phish_query(self, url, key, query): cached = self._cache_get(query, 'phishtank') if cached is not None: return cached postfields = {'url': quote(query), 'format': 'json', 'app_key': key} response = requests.post(url, data=postfields) res = response.json() if res["meta"]["status"] == "success": if res["results"]["in_database"]: self._cache_set(query, res["results"]["phish_detail_page"], 'phishtank') return res["results"]["phish_detail_page"] else: # no information pass elif res["meta"]["status"] == 'error': # Inform the user? # errormsg = res["errortext"] pass return None def sphinxsearch(server, port, url, query): # WARNING: too dangerous to have on the public interface return '' """ if not sphinx: return None cached = _cache_get(query, 'sphinx') if cached is not None: return cached client = sphinxapi.SphinxClient() client.SetServer(server, port) client.SetMatchMode(2) client.SetConnectTimeout(5.0) result = [] res = client.Query(query) if res.get("matches") is not None: for ticket in res["matches"]: ticket_id = ticket["id"] ticket_link = url + str(ticket_id) result.append(ticket_link) _cache_set(query, result, 'sphinx') return result """ def vt_query_url(self, url, url_up, key, query, upload=True): cached = self._cache_get(query, 'vt') if cached is not None and cached[2] is not None: return cached parameters = {"resource": query, "apikey": key} if upload: parameters['scan'] = 1 response = requests.post(url, data=parameters) if response.text is None or len(response.text) == 0: return None res = response.json() msg = res["verbose_msg"] link = res.get("permalink") positives = res.get("positives") total = res.get("total") self._cache_set(query, (msg, link, positives, total), 'vt') return msg, link, positives, total def gsb_query(self, url, query): cached = self._cache_get(query, 'gsb') if cached is not None: return cached param = '1\n' + query response = requests.post(url, data=param) status = response.status_code if status == 200: self._cache_set(query, response.text, 'gsb') return response.text ''' def urlquery_query(url, key, query): return None cached = _cache_get(query, 'urlquery') if cached is not None: return cached try: urlquery.url = url urlquery.key = key response = urlquery.search(query) except Exception: return None if response['_response_']['status'] == 'ok': if response.get('reports') is not None: total_alert_count = 0 for r in response['reports']: total_alert_count += r['urlquery_alert_count'] total_alert_count += r['ids_alert_count'] total_alert_count += r['blacklist_alert_count'] _cache_set(query, total_alert_count, 'urlquery') return total_alert_count else: return None ''' def process_emails(self, emails, ignorelist, replacelist): to_return = list(set(emails)) for mail in reversed(to_return): for ignorelist_entry in ignorelist: if re.search(ignorelist_entry, mail, re.I): if mail in to_return: to_return.remove(mail) for k, v in list(replacelist.items()): if re.search(k, mail, re.I): if k in to_return: to_return.remove(k) to_return += v return to_return def whois(self, server, port, domain, ignorelist, replacelist): cached = self._cache_get(domain, 'whois') if cached is not None: return cached s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.settimeout(15) try: s.connect((server, port)) except Exception: print("Connection problems - check WHOIS server") print(("WHOIS request while problem occurred: ", domain)) print(("WHOIS server: {}:{}".format(server, port))) return None if domain.startswith('http'): self.fex.decode(domain) d = self.fex.get_domain().lower() else: d = domain s.send(("{}\r\n".format(d)).encode()) response = b'' while True: d = s.recv(4096) response += d if d == b'': break s.close() match = re.findall(r'[\w\.-]+@[\w\.-]+', response.decode()) emails = self.process_emails(match, ignorelist, replacelist) if len(emails) == 0: return None list_mail = list(set(emails)) self._cache_set(domain, list_mail, 'whois') return list_mail def pdnscircl(self, url, user, passwd, q): cached = self._cache_get(q, 'pdns') if cached is not None: return cached pdns = PyPDNS(url, basic_auth=(user, passwd)) response = pdns.query(q) all_uniq = [] for e in reversed(response): host = e['rrname'].lower() if host in all_uniq: continue else: all_uniq.append(host) response = (len(all_uniq), all_uniq[:5]) self._cache_set(q, response, 'pdns') return response def psslcircl(self, url, user, passwd, q): cached = self._cache_get(q, 'pssl') if cached is not None: return cached pssl = PyPSSL(url, basic_auth=(user, passwd)) response = pssl.query(q) if response.get(q) is not None: certinfo = response.get(q) entries = {} for sha1 in certinfo['certificates']: entries[sha1] = [] if certinfo['subjects'].get(sha1): for value in certinfo['subjects'][sha1]['values']: entries[sha1].append(value) self._cache_set(q, entries, 'pssl') return entries return None def eupi(self, url, key, q): cached = self._cache_get(q, 'eupi') if cached is not None: return cached eu = PyEUPI(key, url) response = eu.search_url(url=q) if response.get('results'): r = response.get('results')[0]['tag_label'] self._cache_set(q, r, 'eupi') return r eu.post_submission(q) return None def bgpranking(self, ip): cached = self._cache_get(ip, 'ipasn') if cached is not None: asn = cached['asn'] prefix = cached['prefix'] else: ipasn = IPASNHistory() response = ipasn.query(ip) if 'response' not in response: asn = None prefix = None entry = response['response'][list(response['response'].keys())[0]] if entry: self._cache_set(ip, entry, 'ipasn') asn = entry['asn'] prefix = entry['prefix'] else: asn = None prefix = None if not asn or not prefix: # asn, prefix, asn_descr, rank, position, known_asns return None, None, None, None, None, None cached = self._cache_get(ip, 'bgpranking') if cached is not None: return cached bgpranking = BGPRanking() response = bgpranking.query(asn, date=(date.today() - timedelta(1)).isoformat()) if 'response' not in response or not response['response']: return None, None, None, None, None, None to_return = (asn, prefix, response['response']['asn_description'], response['response']['ranking']['rank'], response['response']['ranking']['position'], response['response']['ranking']['total_known_asns']) self._cache_set(ip, to_return, 'bgpranking') return to_return def lookyloo(self, url): cached = self._cache_get(url, 'lookyloo') if cached is not None: return cached lookyloo = Lookyloo() lookyloo_perma_url = lookyloo.enqueue(url) if lookyloo_perma_url: self._cache_set(url, lookyloo_perma_url, 'lookyloo') return lookyloo_perma_url return None def _deserialize_cached(self, entry): to_return = {} redirects = [] h = self.cache.hgetall(entry) for key, value in h.items(): v = json.loads(value) if key == 'list': redirects = v continue to_return[key] = v return to_return, redirects def get_url_data(self, url): data, redirects = self._deserialize_cached(url) if data.get('dns') is not None: ipv4, ipv6 = data['dns'] ip_data = {} if ipv4 is not None: for ip in ipv4: info, _ = self._deserialize_cached(ip) ip_data[ip] = info if ipv6 is not None: for ip in ipv6: info, _ = self._deserialize_cached(ip) ip_data[ip] = info if len(ip_data) > 0: data.update(ip_data) return {url: data}, redirects def cached(self, url, digest=False): url_data, redirects = self.get_url_data(url) to_return = [url_data] for u in redirects: if u == url: continue data, redir = self.get_url_data(u) to_return.append(data) if digest: return {'result': to_return, 'digest': self.digest(to_return)} return {'result': to_return} def ip_details_digest(self, ips, all_info, all_asns, all_mails): to_return = '' for ip in ips: to_return += '\t' + ip + '\n' data = all_info[ip] if data.get('bgpranking'): to_return += '\t\tis announced by {} ({}). Position {}/{}.\n'.format( data['bgpranking'][2], data['bgpranking'][0], data['bgpranking'][4], data['bgpranking'][5]) all_asns.add('{} ({})'.format(data['bgpranking'][2], data['bgpranking'][0])) if data.get('whois'): all_mails.update(data.get('whois')) return to_return def digest(self, data): to_return = '' all_mails = set() all_asns = set() for entry in data: # Each URL we're redirected to for url, info in entry.items(): # info contains the information we got for the URL. to_return += '\n{}\n'.format(url) if 'whois' in info: all_mails.update(info['whois']) if 'lookyloo' in info: to_return += '\tLookyloo permanent URL: {}\n'.format( info['lookyloo']) if 'vt' in info and len(info['vt']) == 4: if info['vt'][2] is not None: to_return += '\t{} out of {} positive detections in VT - {}\n'.format( info['vt'][2], info['vt'][3], info['vt'][1]) else: to_return += '\t{} - {}\n'.format( info['vt'][0], info['vt'][1]) if 'gsb' in info: to_return += '\tKnown as malicious on Google Safe Browsing: {}\n'.format( info['gsb']) if 'phishtank' in info: to_return += '\tKnown on PhishTank: {}\n'.format( info['phishtank']) if 'dns' in info: ipv4, ipv6 = info['dns'] if ipv4 is not None: to_return += self.ip_details_digest( ipv4, info, all_asns, all_mails) if ipv6 is not None: to_return += self.ip_details_digest( ipv6, info, all_asns, all_mails) return to_return, list(all_mails), list(all_asns)