def update(self): params = {'key': yeti_config.get('threattracking', 'google_api_key')} # , 'includeGridData': 'True'} - we don't want to do that. 200Mo file. base = "https://sheets.googleapis.com/v4/spreadsheets/" + yeti_config.get( 'threattracking', 'sheet_key') self.api = hammock.Hammock(base, params=params) r = self.api.GET() if r.status_code != 200: raise requests.ConnectionError( 'Return code for {query} is {code}'.format(query=r.request.url, code=r.status_code)) sheets = r.json()['sheets'] for s_p in sheets: s = s_p['properties'] title = s['title'] if title in [ 'README', 'Home', '_Malware', '_Download', '_Schemes', '_Sources' ]: continue size = s['gridProperties'] actors_list_info = self.each_sheet_work(s) self.create_entities(title, actors_list_info) return
def update(self): """ """ params = {"key": yeti_config.get("threattracking", "google_api_key")} # , 'includeGridData': 'True'} - we don't want to do that. 200Mo file. base = "https://sheets.googleapis.com/v4/spreadsheets/" + yeti_config.get( "threattracking", "sheet_key") self.api = hammock.Hammock(base, params=params) if False: r = self.api.GET() if r.status_code != 200: raise requests.ConnectionError( "Return code for {query} is {code}".format( query=r.request.url, code=r.status_code)) sheets = r.json()["sheets"] json.dump(sheets, open("actor.sheets.json", "w")) else: sheets = json.load(open("actor.sheets.json", "r")) # print(pprint.pformat(sheets)) for s_p in sheets: s = s_p["properties"] title = s["title"] if title in [ "Home", "_Malware", "_Download", "_Schemes", "_Sources" ]: continue size = s["gridProperties"] # print(title, size['columnCount'], size['rowCount']) actors_list_info = self.each_sheet_work(s) self.create_entities(title, actors_list_info) return
def update(self): """ """ params = {'key': yeti_config.get('threattracking', 'google_api_key')} # , 'includeGridData': 'True'} - we don't want to do that. 200Mo file. base = "https://sheets.googleapis.com/v4/spreadsheets/" + yeti_config.get( 'threattracking', 'sheet_key') self.api = hammock.Hammock(base, params=params) if False: r = self.api.GET() if r.status_code != 200: raise requests.ConnectionError( 'Return code for {query} is {code}'.format( query=r.request.url, code=r.status_code)) sheets = r.json()['sheets'] json.dump(sheets, open("actor.sheets.json", "w")) else: sheets = json.load(open("actor.sheets.json", "r")) # print(pprint.pformat(sheets)) for s_p in sheets: s = s_p['properties'] title = s['title'] if title in ['Home', '_Malware', '_Download', '_Schemes', '_Sources']: continue size = s['gridProperties'] # print(title, size['columnCount'], size['rowCount']) actors_list_info = self.each_sheet_work(s) self.create_entities(title, actors_list_info) return
def __init__(self, *args, **kwargs): super(ThreatInsight, self).__init__(*args, **kwargs) self.config = { 'api_user': yeti_config.get('proofpoint', 'api_user'), 'api_password': yeti_config.get('proofpoint', 'api_password'), 'tenant_id': yeti_config.get('proofpoint', 'tenant_id'), 'import_email_metadata': yeti_config.get('proofpoint', 'import_email_metadata'), }
def __init__(self, *args, **kwargs): super(ThreatInsight, self).__init__(*args, **kwargs) self.config = { "api_user": yeti_config.get("proofpoint", "api_user"), "api_password": yeti_config.get("proofpoint", "api_password"), "tenant_id": yeti_config.get("proofpoint", "tenant_id"), "import_email_metadata": yeti_config.get("proofpoint", "import_email_metadata"), }
def get_instances(self): self.instances = {} for instance in yeti_config.get('misp', 'instances', '').split(','): config = { 'url': yeti_config.get(instance, 'url'), 'key': yeti_config.get(instance, 'key'), 'name': yeti_config.get(instance, 'name') or instance, 'organisations': {} } if config['url'] and config['key']: self.instances[instance] = config
def get_pulses(self): self.pulses = {} for pulse in yeti_config.get("otx", "pulses", "").split(","): config = { "pulse_id": yeti_config.get(pulse, "pulse_id"), "use_otx_tags": yeti_config.get(pulse, "use_otx_tags") == "Y" or False, } if config["pulse_id"]: self.pulses[pulse] = config
def get_pulses(self): self.pulses = {} for pulse in yeti_config.get('otx', 'pulses', '').split(','): config = { 'pulse_id': yeti_config.get(pulse, 'pulse_id'), 'use_otx_tags': yeti_config.get(pulse, 'use_otx_tags') == "Y" or False } if config['pulse_id']: self.pulses[pulse] = config
def update(self): otx_key = yeti_config.get('otx', 'key') number_page = yeti_config.get('otx', 'pages') assert otx_key and number_page headers = {'X-OTX-API-KEY': otx_key} for i in range(1, int(number_page)): items = self.update_json(headers=headers, params={'page': i}) for item in items['results']: self.analyze(item)
def analyze(self, item): if not item: return item_date = parser.parse(item['pubDate']) max_age = yeti_config.get('limits', 'max_age') limit_date = pytz.UTC.localize(datetime.now()) - timedelta(days=max_age) if item_date < limit_date: return md5_search = re.search(r'md5:\t([a-fA-F\d]{32})<br />', item['description']) if not bool(md5_search): return context = {} tags = ['malware'] if item['category'] != '': context['threat'] = item['category'] signature = item['category']\ .replace(' ', '_')\ .replace('/', '_')\ .replace(':', '_')\ .replace('.', '-')\ .replace('!', '-') if signature == 'clean_site': return tags.append(signature) context['date_added'] = item_date context['source'] = self.name context['reference'] = item['link'] try: sample = Hash.get_or_create(value=md5_search.group(1)) sample.add_context(context) sample.add_source("feed") sample.tag(tags) except ObservableValidationError as e: logging.error(e) return except Exception as e: print(e) try: url = Url.get_or_create(value=item['title']) url.add_context(context) url.add_source("feed") url.tag(tags) sample.active_link_to(url, 'drops', self.name) except ObservableValidationError as e: logging.error(e) return
def analyze(self, data): if not data or data[0].startswith('phish_id'): return _, url, phish_detail_url, submission_time, verified, verification_time, online, target = data item_date = dateutil.parser.parse(submission_time) max_age = yeti_config.get('limits', 'max_age') limit_date = pytz.UTC.localize( datetime.now()) - timedelta(days=max_age) if item_date < limit_date: return tags = ['phishing'] context = { 'source': self.name, 'phish_detail_url': phish_detail_url, 'submission_time': submission_time, 'verified': verified, 'verification_time': verification_time, 'online': online, 'target': target } if url is not None and url != '': try: url = Url.get_or_create(value=url) url.add_context(context) url.add_source('feed') url.tag(tags) except ObservableValidationError as e: logging.error(e)
def update(self): otx_key = yeti_config.get("otx", "key") number_page = yeti_config.get("otx", "pages") assert otx_key and number_page, "OTX key and pages not configured in yeti.conf" headers = {"X-OTX-API-KEY": otx_key} for i in range(1, int(number_page)): items = self.update_json(headers=headers, params={"page": i}, key="results", filter_row="created") for index, item in items: self.analyze(item) time.sleep(2)
def update(self): user_agent = {'User-agent': yeti_config.get('cleanmx', 'user_agent')} for item in self.update_xml( 'item', ["title", "category", "link", "pubDate", "description"], headers=user_agent): self.analyze(item)
def update(self): otx_key = yeti_config.get('otx', 'key') number_page = yeti_config.get('otx', 'pages') assert otx_key and number_page headers = {'X-OTX-API-KEY': otx_key} for i in range(1, int(number_page)): items = self.update_json(headers=headers, params={'page': i}, key='results', filter_row='created') for index, item in items: self.analyze(item) time.sleep(2)
def update(self): api_key = yeti_config.get('vt', 'key') if api_key: self.source = 'https://www.virustotal.com/intelligence/hunting/notifications-feed/?key=%s' % api_key for index, item in self.update_json(key='notifications'): self.analyze(item) else: logging.error("Your VT API key is not set in the confile file")
def update(self): api_key = yeti_config.get("vt", "key") if not api_key: raise Exception("Your VT API key is not set in the yeti.conf file") self.source = ( "https://www.virustotal.com/intelligence/hunting/notifications-feed/?key=%s" % api_key) for index, item in self.update_json(key="notifications"): self.analyze(item)
class PhishTank(Feed): # set default values for feed default_values = { "frequency": timedelta(hours=4), "name": "PhishTank", "source": "http://data.phishtank.com/data/%s/online-valid.csv" % yeti_config.get("phishtank", "key"), "description": "PhishTank community feed. Contains a list of possible Phishing URLs.", } # should tell yeti how to get and chunk the feed def update(self): # Using update_lines because the pull should result in # a list of URLs, 1 per line. Split on newline for index, line in self.update_csv( delimiter=",", filter_row="submission_time", date_parser=lambda x: pd.to_datetime(x.rsplit("+", 1)[0]), comment=None, ): self.analyze(line) # don't need to do much here; want to add the information # and tag it with 'phish' def analyze(self, line): tags = ["phishing"] url = line["url"] context = { "source": self.name, "phish_detail_url": line["phish_detail_url"], "submission_time": line["submission_time"], "verified": line["verified"], "verification_time": line["verification_time"], "online": line["online"], "target": line["target"], } if url is not None and url != "": try: url = Url.get_or_create(value=url) url.add_context(context) url.add_source(self.name) url.tag(tags) except ObservableValidationError as e: logging.error(e)
def update(self): api_key = yeti_config.get('abuseIPDB', 'key') if api_key: self.source = "https://api.abuseipdb.com/api/v2/blacklist?&key=%s&plaintext&limit=10000" % ( api_key) # change the limit rate if you subscribe to a paid plan for line in self.update_lines(): self.analyze(line) else: logging.error( "Your abuseIPDB API key is not set in the yeti.conf file")
class PhishTank(Feed): # set default values for feed key = otx_key = yeti_config.get('phishtank', 'key') default_values = { 'frequency': timedelta(hours=4), 'name': 'PhishTank', 'source': 'http://data.phishtank.com/data/%s/online-valid.csv' % key, 'description': 'PhishTank community feed. Contains a list of possible Phishing URLs.' } # should tell yeti how to get and chunk the feed def update(self): # Using update_lines because the pull should result in # a list of URLs, 1 per line. Split on newline for index, line in self.update_csv( delimiter=',', filter_row='submission_time', date_parser=lambda x: pd.to_datetime(x.rsplit('+', 1)[0]), comment=None): self.analyze(line) # don't need to do much here; want to add the information # and tag it with 'phish' def analyze(self, line): tags = ['phishing'] url = line['url'] context = { 'source': self.name, 'phish_detail_url': line['phish_detail_url'], 'submission_time': line['submission_time'], 'verified': line['verified'], 'verification_time': line['verification_time'], 'online': line['online'], 'target': line['target'] } if url is not None and url != '': try: url = Url.get_or_create(value=url) url.add_context(context) url.add_source(self.name) url.tag(tags) except ObservableValidationError as e: logging.error(e)
def update(self): api_key = yeti_config.get("abuseIPDB", "key") if not api_key: raise Exception( "Your abuseIPDB API key is not set in the yeti.conf file") self.source = ( "https://api.abuseipdb.com/api/v2/blacklist?&key=%s&plaintext&limit=10000" % (api_key)) # change the limit rate if you subscribe to a paid plan for line in self.update_lines(): self.analyze(line)
def analyze(self, line): if not line or line[0].startswith("#"): return date, _type, family, hostname, url, status, registrar, ips, asns, countries = tuple( line) item_date = dateutil.parser.parse(date) max_age = yeti_config.get('limits', 'max_age') limit_date = datetime.now() - timedelta(days=max_age) if item_date < limit_date: return tags = [] tags += TYPE_DICT[_type] tags.append(family.lower()) context = { "first_seen": date, "status": status, "registrar": registrar, "countries": countries.split("|"), "asns": asns.split("|"), "source": self.name } try: url = Url.get_or_create(value=url.rstrip()) url.add_context(context) url.tag(tags) hostname = Observable.add_text(hostname) hostname.tag(tags + ['blocklist']) for ip in ips.split("|"): if ip != hostname and ip is not None and ip != '': try: i = Ip.get_or_create(value=ip) i.active_link_to(hostname, "First seen IP", self.name, clean_old=False) except ObservableValidationError as e: logging.error("Invalid Observable: {}".format(e)) except ObservableValidationError as e: logging.error(e)
def analyze(self, item): if not item: return item_date = parser.parse(item['pubDate']) max_age = yeti_config.get('limits', 'max_age') limit_date = pytz.UTC.localize( datetime.now()) - timedelta(days=max_age) if item_date < limit_date: return context = {} tags = ['phishing'] if item['category'] != '': context['threat'] = item['category'] signature = item['category']\ .replace(' ', '_')\ .replace('/', '_')\ .replace(':', '_')\ .replace('.', '-')\ .replace('!', '-') if signature == 'clean_site': return tags.append(signature) context['date_added'] = item_date context['source'] = self.name context['reference'] = item['link'] try: url = Url.get_or_create(value=item['title']) url.add_context(context) url.add_source("feed") url.tag(tags) except ObservableValidationError as e: logging.error(e) return
def analyze(self, item): if not item or item[0].startswith("#"): return id_feed, dateadded, url, url_status, threat, tags, urlhaus_link = item item_date = dateutil.parser.parse(dateadded) max_age = yeti_config.get('limits', 'max_age') limit_date = datetime.now() - timedelta(days=max_age) if item_date < limit_date: return if url: try: url_obs = Url.get_or_create(value=url) if tags != None: tags = tags\ .replace(' ', '_')\ .replace('/', '_')\ .replace(':', '_')\ .replace('.', '-')\ .replace('!', '-') tags = tags.split(',') url_obs.tag(tags) context = { "id_urlhaus": id_feed, "first_seen": dateadded, "status": url_status, "source": self.name, "report": urlhaus_link, "threat": threat } url_obs.add_context(context) url_obs.add_source('feed') except ObservableValidationError as e: logging.error(e)
def get_instances(self): self.instances = {} for instance in yeti_config.get("misp", "instances", "").split(","): config = { "url": yeti_config.get(instance, "url"), "key": yeti_config.get(instance, "key"), "name": yeti_config.get(instance, "name") or instance, "galaxy_filter": yeti_config.get(instance, "galaxy_filter"), "days": yeti_config.get(instance, "days"), "organisations": {}, } if config["url"] and config["key"]: self.instances[instance] = config
def get_instances(self): self.instances = {} for instance in yeti_config.get("misp", "instances", "").split(","): config = { "name": yeti_config.get(instance, "name") or instance, "galaxy_filter": yeti_config.get(instance, "galaxy_filter"), "days": yeti_config.get(instance, "days"), "organisations": {}, } try: config["url"] = yeti_config.get(instance, "url") config["key"] = yeti_config.get(instance, "key") self.instances[instance] = config except Exception as e: logging.error("Error Misp connection %s" % e)
from __future__ import unicode_literals import logging import os import geoip2.database from geoip2.errors import AddressNotFoundError from core.analytics import InlineAnalytics from core.config.config import yeti_config from core.errors import ObservableValidationError reader = None try: path = yeti_config.get('maxmind', 'path') if path: reader = geoip2.database.Reader(path) except IOError as e: logging.info( "Could not open GeoLite2-City.mmdb. Will proceed without GeoIP data") logging.info(e) reader = False class ProcessIp(InlineAnalytics): default_values = { "name": "ProcessIp", "description": "Extracts information from IP addresses", } ACTS_ON = 'Ip'
def __init__(self, *args, **kwargs): super(OtxFeed, self).__init__(*args, **kwargs) self.otx = OTXv2(yeti_config.get('otx', 'key')) self.get_pulses()
from __future__ import unicode_literals import logging import geoip2.database from geoip2.errors import AddressNotFoundError from core.analytics import InlineAnalytics from core.config.config import yeti_config from core.errors import ObservableValidationError reader = None try: path = yeti_config.get("maxmind", "path") if path: reader = geoip2.database.Reader(path) except IOError as e: logging.info("Could not open GeoLite2-City.mmdb. Will proceed without GeoIP data") logging.info(e) reader = False class ProcessIp(InlineAnalytics): default_values = { "name": "ProcessIp", "description": "Extracts information from IP addresses", } ACTS_ON = "Ip" @staticmethod
from __future__ import unicode_literals import logging import geoip2.database from geoip2.errors import AddressNotFoundError from core.analytics import InlineAnalytics from core.config.config import yeti_config from core.errors import ObservableValidationError reader = None try: path = yeti_config.get('maxmind', 'path') if path: reader = geoip2.database.Reader(path) except IOError as e: logging.info( "Could not open GeoLite2-City.mmdb. Will proceed without GeoIP data") logging.info(e) reader = False class ProcessIp(InlineAnalytics): default_values = { "name": "ProcessIp", "description": "Extracts information from IP addresses", } ACTS_ON = 'Ip'
import logging from logging import FileHandler from logging import Formatter from core.config.config import yeti_config LOG_FORMAT = ( "%(asctime)s [%(levelname)s]: %(message)s") LOG_LEVEL = logging.INFO # user logger USER_LOG_FILE = yeti_config.get('logging', 'filename') userLogger = logging.getLogger("userLogger.messaging") userLogger.setLevel(LOG_LEVEL) userLogger_file_handler = FileHandler(USER_LOG_FILE) userLogger_file_handler.setLevel(LOG_LEVEL) userLogger_file_handler.setFormatter(Formatter(LOG_FORMAT)) userLogger.addHandler(userLogger_file_handler)
def analyze(self, item): if not item or item[0].startswith("#"): return first_seen, url, filetype, md5, sha256, signature = item item_date = dateutil.parser.parse(first_seen) max_age = yeti_config.get('limits', 'max_age') limit_date = datetime.now() - timedelta(days=max_age) if item_date < limit_date: return if url: try: url_obs = Url.get_or_create(value=url) if signature != None: tag = signature\ .replace(' ', '_')\ .replace('/', '_')\ .replace(':', '_')\ .replace('.', '-')\ .replace('!', '-') url_obs.tag(tag) context = { 'first_seen': first_seen, 'source': self.name } url_obs.add_context(context) url_obs.add_source('feed') context_malware = { 'source': self.name } malware_file = File.get_or_create( value='FILE:{}'.format(sha256)) malware_file.add_context(context_malware) sha256 = Hash.get_or_create(value=sha256) sha256.tag(filetype) sha256.add_context(context_malware) if signature != 'None': sha256.tag(signature) md5 = Hash.get_or_create(value=md5) md5.add_context(context_malware) md5.tag(filetype) if signature != 'None': md5.tag(signature) malware_file.active_link_to(md5, 'md5', self.name) malware_file.active_link_to(sha256, 'sha256', self.name) if signature != 'None': malware_file.tag(signature) malware_file.tag(filetype) url_obs.active_link_to(malware_file, 'drops', self.name) except ObservableValidationError as e: logging.error(e)
"""Set up logging for Yeti.""" import logging import os from logging import FileHandler from logging import Formatter from core.config.config import yeti_config LOG_FORMAT = "%(asctime)s [%(levelname)s]: %(message)s" LOG_LEVEL = logging.INFO # user logger USER_LOG_FILE = yeti_config.get("logging", "filename") # Fall back to tmp if the logging directory does not exist if not os.path.isdir(os.path.dirname(USER_LOG_FILE)): USER_LOG_FILE = "/tmp/yeti.log" userLogger = logging.getLogger("userLogger.messaging") userLogger.setLevel(LOG_LEVEL) userLogger_file_handler = FileHandler(USER_LOG_FILE) userLogger_file_handler.setLevel(LOG_LEVEL) userLogger_file_handler.setFormatter(Formatter(LOG_FORMAT)) userLogger.addHandler(userLogger_file_handler)