def derive(strings): values = set() observables = set() for string in iterify(strings): if string: try: t = Observable.guess_type(string) observable = t(value=string) observable.normalize() observables.add(observable) values.add(observable.value) except ObservableValidationError: values.add(string) new = [] for observable in observables: for a in analyzers.get(observable.__class__, []): new.extend([ n for n in a.analyze_string(observable.value) if n and n not in values ]) if len(new) == 0: return values, values else: _, extended = derive(new + list(values)) return values, extended
def post(self): q = request.get_json(silent=True) params = q.pop("params", {}) observables = [] for o in q["observables"]: try: obs = Observable.guess_type(o['value'])(value=o['value']) obs.clean() observables.append(obs.value) # Save observables & eventual tags to database if params.get('save_query', False): obs = obs.save() obs.tag(o.get("tags", [])) obs.add_source("query") except ObservableValidationError: continue # match observables with known indicators data = match_observables([o for o in observables]) # find related observables (eg. URLs for domain, etc.) # related_observables = [obs.get_related() for obs in observables] # data = self.match_observables(related_observable) # # we need to find a way to degrade the "confidence" in # hits obtained from related observables return render(data, "analysis.html")
def derive(observables): """Indicate that the module needs a specific attribute to work properly. This function is only useful in abstract modules, in order to make sure that modules that inherit from this class correctly defines needed class attributes. Args: variables: a string or an array of strings containing the name of needed class attributes. Raises: ModuleInitializationError: One of the needed attributes is not correctly defined. """ new = [] observables = list(iterify(observables)) for i, observable in enumerate(observables): try: t = Observable.guess_type(observable) temp = t(value=observable) temp.clean() observable = temp.value observables[i] = observable for a in analyzers.get(t, []): new.extend([n for n in a.analyze_string(observable) if n and n not in observables]) except ObservableValidationError: pass if len(new) == 0: return observables else: return observables + derive(new)
def _make_threat_nodes(threat, context, tags): # extract Url and Hash info threats = dict() if threat['threatStatus'] != 'active': # FIXME, clear out false positive ? log.warning("threatStatus %s for threat %s", threat['threatStatus'], threat['threatID']) log.debug(pprint.pformat(threat)) return None log.debug('_make_threat_nodes for threat %s', threat['threatID']) # threattype, classification # url, phish: url leads to phishing page (threat is url) # url, malware: url leads to malware download (threat is url, threatid is maybe sha256) # attachment, malware: attachement is malware (threat is sha256) # spam, url if threat['threatType'] == 'url': if threat['classification'] == 'phish': pass # just keep the url elif threat['classification'] == 'malware': # get url and hash threats['attachment'] = threat elif threat['classification'] == 'spam': log.info('URL threat - ignore classification %s', threat['classification']) else: log.error('Type: url, Unsupported classification %s', threat['classification']) log.debug(pprint.pformat(threat)) return None threats['url'] = threat elif threat['threatType'] == 'attachment': if threat['classification'] == 'malware': threats['attachment'] = threat else: log.error('Type: attachment, Unsupported classification %s', threat['classification']) log.debug(pprint.pformat(threat)) return None else: log.error('Unsupported threatType %s classification %s', threat['threatType'], threat['classification']) log.debug(pprint.pformat(threat)) return None # FIXME check if they exist already. # if they do, do not parse the threat a second time ? threat_nodes = [] if 'url' in threats: #Proofpoint sometimes supplies a hostname marked as a Url. #this relies on Yeti to determine the type/class and add act appropriately threat_nodes.append( Observable.guess_type(threats['url']['threat']).get_or_create( value=threats['url']['threat'], context=[context])) if 'attachment' in threats: threat_nodes.append( Hash.get_or_create(value=threats['attachment']['threatID'], context=[context])) for o in threat_nodes: o.tag([t['name'] for t in tags]) return threat_nodes
def derive(observables): """Indicate that the module needs a specific attribute to work properly. This function is only useful in abstract modules, in order to make sure that modules that inherit from this class correctly defines needed class attributes. Args: variables: a string or an array of strings containing the name of needed class attributes. Raises: ModuleInitializationError: One of the needed attributes is not correctly defined. """ new = [] for observable in iterify(observables): try: t = Observable.guess_type(observable) for a in analyzers.get(t, []): new.extend([ n for n in a.analyze_string(observable) if n and n not in observables ]) except ObservableValidationError: pass if len(new) == 0: return observables else: return derive(new + observables)
def derive(strings): values = set() observables = set() for string in iterify(strings): if string: try: t = Observable.guess_type(string) observable = t(value=string) observable.normalize() observables.add(observable) values.add(observable.value) except ObservableValidationError: values.add(string) new = [] for observable in observables: for a in analyzers.get(observable.__class__, []): new.extend([ n for n in a.analyze_string(observable.value) if n and n not in values ]) if len(new) == 0: return values, values else: _, extended = derive(new + list(values)) return values, extended
def each(url): try: host = ProcessUrl.analyze_string(url.value)[0] h = Observable.guess_type(host).get_or_create(value=host) h.add_source("analytics") Link.connect(src=url, dst=h) except ObservableValidationError: logging.error("An error occurred when trying to add {} to the database".format(host))
def each(url): try: host = ProcessUrl.analyze_string(url.value)[0] h = Observable.guess_type(host).get_or_create(value=host) h.add_source("analytics") url.active_link_to(h, "hostname", "ProcessUrl", clean_old=False) return h except ObservableValidationError: logging.error("An error occurred when trying to add {} to the database".format(host))
def each(url): try: host = ProcessUrl.analyze_string(url.value)[0] h = Observable.guess_type(host).get_or_create(value=host) h.add_source("analytics") url.active_link_to(h, "hostname", "ProcessUrl", clean_old=False) return h except ObservableValidationError: logging.error( "An error occurred when trying to add {} to the database". format(host))
def derive(observables): if isinstance(observables, (str, unicode)): observables = [observables] new = [] for observable in observables: t = Observable.guess_type(observable) for a in analyzers.get(t, []): new.extend([n for n in a.analyze_string(observable) if n and n not in observables]) if len(new) == 0: return observables else: return derive(new + observables)
def derive(observables): new = [] for observable in iterify(observables): try: t = Observable.guess_type(observable) for a in analyzers.get(t, []): new.extend([n for n in a.analyze_string(observable) if n and n not in observables]) except ObservableValidationError: pass if len(new) == 0: return observables else: return derive(new + observables)
import sys import logging from os import path YETI_ROOT = path.normpath(path.dirname(path.dirname(path.abspath(__file__)))) sys.path.append(YETI_ROOT) from core.analytics import OneShotAnalytics from core.scheduling import Scheduler from core.observables import Observable if __name__ == '__main__': logging.basicConfig(level=logging.DEBUG) Scheduler() if len(sys.argv) == 1: print("Re-run using a analytic name as argument") for f in OneShotAnalytics.objects(): print(" {}".format(f.name)) if len(sys.argv) > 1: name = sys.argv[1] f = OneShotAnalytics.objects.get(name=name) print("Running {}...".format(f.name)) observable = Observable.guess_type( sys.argv[2]).get_or_create(value=sys.argv[2]) f.analyze(observable, {})
def match_observables(observables, save_matches=False, fetch_neighbors=True): # Remove empty observables observables, extended_query = derive(observables) observables = list(observables) data = { "matches": [], "unknown": set(observables), "entities": {}, "known": [], "neighbors": [], } # add to "known" for o in Observable.objects(value__in=list(extended_query)): data['known'].append(o.info()) del_from_set(data['unknown'], o.value) if fetch_neighbors: for link, node in (o.incoming()): if isinstance(node, Observable): if (link.src.value not in extended_query or link.dst.value not in extended_query) and node.tags: data['neighbors'].append((link.info(), node.info())) for nodes in o.neighbors("Entity").values(): for l, node in nodes: # add node name and link description to indicator node_data = { "entity": node.type, "name": node.name, "link_description": l.description } # uniquely add node information to related entitites ent = data['entities'].get(node.name, node.info()) if 'matches' not in ent: ent['matches'] = {"observables": []} if 'observables' not in ent['matches']: ent['matches']['observables'] = [] info = node.info() o_info = o.info() info['matched_observable'] = { "value": o_info['value'], "tags": [t['name'] for t in o_info['tags']], "human_url": o_info['human_url'], "url": o_info['url'], "context": o_info['context'] } if info not in ent['matches']['observables']: ent['matches']['observables'].append(info) data['entities'][node.name] = ent # add to "matches" for o, i in Indicator.search(extended_query): if save_matches: o = Observable.add_text(o) else: o = Observable.guess_type(o)(value=o) try: o.validate() except ObservableValidationError: pass try: o = Observable.objects.get(value=o.value) except Exception: pass match = i.info() match.update({ "observable": o.info() if o.id else o.value, "related": [], "suggested_tags": set() }) for nodes in i.neighbors("Entity").values(): for l, node in nodes: # add node name and link description to indicator node_data = { "entity": node.type, "name": node.name, "link_description": l.description } match["related"].append(node_data) # uniquely add node information to related entitites ent = data['entities'].get(node.name, node.info()) if 'matches' not in ent: ent['matches'] = {"indicators": []} if 'indicators' not in ent['matches']: ent['matches']['indicators'] = [] info = i.info() info['matched_observable'] = o.value if info not in ent['matches']['indicators']: ent['matches']['indicators'].append(info) data['entities'][node.name] = ent o_tags = o.get_tags() for tag in node.generate_tags(): if tag not in o_tags: match["suggested_tags"].add(tag) data["matches"].append(match) data['entities'] = data['entities'].values() return data
def match_observables(observables, save_matches=False): # Remove empty observables observables = [observable for observable in observables if observable] extended_query = set(observables) | set(derive(observables)) added_entities = set() data = { "matches": [], "unknown": set(observables), "entities": [], "known": [], "neighbors": [], } # add to "known" for o in Observable.objects(value__in=list(extended_query)): data['known'].append(o.info()) del_from_set(data['unknown'], o.value) for link, node in (o.incoming()): if isinstance(node, Observable): if (link.src.value not in extended_query or link.dst.value not in extended_query) and node.tags: data['neighbors'].append((link.info(), node.info())) # add to "matches" for o, i in Indicator.search(extended_query): del_from_set(data["unknown"], o) if save_matches: o = Observable.add_text(o) else: o = Observable.guess_type(o)(value=o) o.validate() try: o = Observable.objects.get(value=o.value) except Exception: pass match = i.info() match.update({"observable": o.info(), "related": [], "suggested_tags": set()}) for nodes in i.neighbors("Entity").values(): for l, node in nodes: # add node name and link description to indicator node_data = {"entity": node.type, "name": node.name, "link_description": l.description} match["related"].append(node_data) # uniquely add node information to related entitites if node.name not in added_entities: nodeinfo = node.info() nodeinfo['type'] = node.type data["entities"].append(nodeinfo) added_entities.add(node.name) o_tags = o.get_tags() [match["suggested_tags"].add(tag) for tag in node.generate_tags() if tag not in o_tags] data["matches"].append(match) del_from_set(data["unknown"], o.value) return data
def match_observables(observables, save_matches=False, fetch_neighbors=True): # Remove empty observables observables = [refang(observable) for observable in observables if observable] extended_query = set(observables) | set(derive(observables)) data = { "matches": [], "unknown": set(observables), "entities": {}, "known": [], "neighbors": [], } # add to "known" for o in Observable.objects(value__in=list(extended_query)): data['known'].append(o.info()) del_from_set(data['unknown'], o.value) if fetch_neighbors: for link, node in (o.incoming()): if isinstance(node, Observable): if (link.src.value not in extended_query or link.dst.value not in extended_query) and node.tags: data['neighbors'].append((link.info(), node.info())) for nodes in o.neighbors("Entity").values(): for l, node in nodes: # add node name and link description to indicator node_data = {"entity": node.type, "name": node.name, "link_description": l.description} # uniquely add node information to related entitites ent = data['entities'].get(node.name, node.info()) if 'matches' not in ent: ent['matches'] = {"observables": []} if 'observables' not in ent['matches']: ent['matches']['observables'] = [] info = node.info() o_info = o.info() info['matched_observable'] = { "value": o_info['value'], "tags": [t['name'] for t in o_info['tags']], "human_url": o_info['human_url'], "url": o_info['url'] } if info not in ent['matches']['observables']: ent['matches']['observables'].append(info) data['entities'][node.name] = ent # add to "matches" for o, i in Indicator.search(extended_query): if save_matches: o = Observable.add_text(o) else: o = Observable.guess_type(o)(value=o) try: o.validate() except ObservableValidationError: pass try: o = Observable.objects.get(value=o.value) except Exception: pass match = i.info() match.update({"observable": o.info(), "related": [], "suggested_tags": set()}) for nodes in i.neighbors("Entity").values(): for l, node in nodes: # add node name and link description to indicator node_data = {"entity": node.type, "name": node.name, "link_description": l.description} match["related"].append(node_data) # uniquely add node information to related entitites ent = data['entities'].get(node.name, node.info()) if 'matches' not in ent: ent['matches'] = {"indicators": []} if 'indicators' not in ent['matches']: ent['matches']['indicators'] = [] info = i.info() info['matched_observable'] = o.value if info not in ent['matches']['indicators']: ent['matches']['indicators'].append(info) data['entities'][node.name] = ent o_tags = o.get_tags() [match["suggested_tags"].add(tag) for tag in node.generate_tags() if tag not in o_tags] data["matches"].append(match) data['entities'] = data['entities'].values() return data
def _make_threat_nodes(threat, context, tags): # extract Url and Hash info threats = dict() if threat['threatStatus'] != 'active': # FIXME, clear out false positive ? log.warning( "threatStatus %s for threat %s", threat['threatStatus'], threat['threatID']) log.debug(pprint.pformat(threat)) return None log.debug('_make_threat_nodes for threat %s', threat['threatID']) # threattype, classification # url, phish: url leads to phishing page (threat is url) # url, malware: url leads to malware download (threat is url, threatid is maybe sha256) # attachment, malware: attachement is malware (threat is sha256) # spam, url if threat['threatType'] == 'url': if threat['classification'] == 'phish': pass # just keep the url elif threat['classification'] == 'malware': # get url and hash threats['attachment'] = threat elif threat['classification'] == 'spam': log.info( 'URL threat - ignore classification %s', threat['classification']) else: log.error( 'Type: url, Unsupported classification %s', threat['classification']) log.debug(pprint.pformat(threat)) return None threats['url'] = threat elif threat['threatType'] == 'attachment': if threat['classification'] == 'malware': threats['attachment'] = threat else: log.error( 'Type: attachment, Unsupported classification %s', threat['classification']) log.debug(pprint.pformat(threat)) return None else: log.error( 'Unsupported threatType %s classification %s', threat['threatType'], threat['classification']) log.debug(pprint.pformat(threat)) return None # FIXME check if they exist already. # if they do, do not parse the threat a second time ? threat_nodes = [] if 'url' in threats: #Proofpoint sometimes supplies a hostname marked as a Url. #this relies on Yeti to determine the type/class and add act appropriately threat_nodes.append( Observable.guess_type(threats['url']['threat']).get_or_create( value=threats['url']['threat'], context=[context])) if 'attachment' in threats: threat_nodes.append( Hash.get_or_create( value=threats['attachment']['threatID'], context=[context])) for o in threat_nodes: o.tag([t['name'] for t in tags]) return threat_nodes