def _extract_phishtank(self, path_in, case_managers): logging.debug("Extracting phishing web-sites from PhishTank feeds...") ph_ws_phishtank = dict() try: with open(os.path.join(path_in, 'phishtank.txt')) as file: entries = json.load(file) for cm_name in case_managers: cm = case_managers[cm_name] cm_brands = cm['cm_brands'] cm_websites = cm['cm_websites'] for entry in entries: if entry['verified'] == 'yes': toadd = False brand = entry['target'] if brand is None: brand = 'null' else: brand = Utils.sanitize_brand(brand) url = entry['url'] if url is None: url = 'null' else: url = Utils.sanitize_url(url) i = 0 while i < len(cm_brands) and not toadd: if cm_brands[i] in brand: toadd = True i += 1 if not toadd: i = 0 while i < len(cm_websites) and not toadd: if cm_websites[i] in url: toadd = True i += 1 if toadd: elem = dict() elem['brand'] = brand elem['url'] = url details = entry['details'] if details: ip = (details[0])['ip_address'] if ip is None: ip = 'null' elem['ip'] = ip country_code = (details[0])['country'] if country_code is None: country_code = 'null' else: country_code = country_code.lower() elem['country'] = country_code else: elem['ip'] = 'null' elem['country'] = 'null' time_string = entry['submission_time'] time = 'null' if time_string is not None: time = datetime.datetime.strptime(time_string, "%Y-%m-%dT%H:%M:%S+00:00") time = time.strftime("%Y-%m-%d %H:%M:%S") elem['time'] = time domain = Utils.extract_domain(url) elem['domain'] = domain if cm_name not in ph_ws_phishtank.keys(): elems = [] elems.append(elem) ph_ws_phishtank[cm_name] = elems else: toadd = True elems = ph_ws_phishtank[cm_name] i = 0 while i < len(elems) and toadd: if domain == (elems[i])['domain']: toadd = False i += 1 if toadd: elems.append(elem) ph_ws_phishtank[cm_name] = elems return ph_ws_phishtank except Exception, e: logging.error("Error extracting phishing web-sites from PhishTank feed: %s" % (e)) raise Exception
def _extract_cleanmx(self, path_in, case_managers): logging.debug("Extracting phishing web-sites from CleanMX feeds...") ph_ws_cleanmx = dict() try: with open(os.path.join(path_in, 'cleanmx.txt')) as file: entries = ((json.load(file)['output'])['entries'])['entry'] for cm_name in case_managers: cm = case_managers[cm_name] cm_brands = cm['cm_brands'] cm_websites = cm['cm_websites'] for entry in entries: if entry['response'] == 'alive': toadd = False brand = entry['target'] if brand is None: brand = 'null' else: brand = Utils.sanitize_brand(brand) url = entry['url'] if url is None: url = 'null' else: url = Utils.sanitize_url(url) i = 0 while i < len(cm_brands) and not toadd: if cm_brands[i] in brand: toadd = True i += 1 if not toadd: i = 0 while i < len(cm_websites) and not toadd: if cm_websites[i] in url: toadd = True i += 1 if toadd: elem = dict() elem['brand'] = brand elem['url'] = url ip = entry['ip'] if ip is None: ip = 'null' elem['ip'] = ip country_code = entry['country'] if country_code is None: country_code = 'null' else: country_code = country_code.lower() elem['country'] = country_code time_string = entry['first'] time = 'null' if time_string is not None: time = datetime.datetime.fromtimestamp(int(time_string)) time = time.strftime("%Y-%m-%d %H:%M:%S") elem['time'] = time domain = Utils.extract_domain(url) elem['domain'] = domain if cm_name not in ph_ws_cleanmx.keys(): elems = [] elems.append(elem) ph_ws_cleanmx[cm_name] = elems else: toadd = True elems = ph_ws_cleanmx[cm_name] i = 0 while i < len(elems) and toadd: if domain == (elems[i])['domain']: toadd = False i += 1 if toadd: elems.append(elem) ph_ws_cleanmx[cm_name] = elems return ph_ws_cleanmx except Exception, e: logging.error("Error extracting phishing web-sites from CleanMX feed: %s" % (e)) raise Exception
def _extract_openphish(self, path_in, case_managers): logging.debug("Extracting phishing web-sites from OpenPhish feeds...") ph_ws_openphish = dict() try: with open(os.path.join(path_in, 'openphish.txt')) as file: entries = json.load(file) for cm_name in case_managers: cm = case_managers[cm_name] cm_brands = cm['cm_brands'] cm_websites = cm['cm_websites'] for entry_name in entries: toadd = False entry = entries[entry_name] brand = entry['brand'] if brand is None: brand = 'null' else: brand = Utils.sanitize_brand(brand) url = entry['url'] if url is None: url = 'null' else: url = Utils.sanitize_url(url) i = 0 while i < len(cm_brands) and not toadd: if cm_brands[i] in brand: toadd = True i += 1 if not toadd: i = 0 while i < len(cm_websites) and not toadd: if cm_websites[i] in url: toadd = True i += 1 if toadd: elem = dict() elem['brand'] = brand elem['url'] = url ip = entry['ip'] if ip is None: ip = 'null' elem['ip'] = ip country_code = entry['country_code'] if country_code is None: country_code = 'null' else: country_code = country_code.lower() elem['country'] = country_code time_string = entry['discover_time'] time = 'null' if time_string is not None: time = datetime.datetime.strptime(time_string, "%d-%m-%Y %H:%M:%S %Z") time = time.strftime("%Y-%m-%d %H:%M:%S") elem['time'] = time domain = Utils.extract_domain(url) elem['domain'] = domain if cm_name not in ph_ws_openphish.keys(): elems = [] elems.append(elem) ph_ws_openphish[cm_name] = elems else: toadd = True elems = ph_ws_openphish[cm_name] i = 0 while i < len(elems) and toadd: if domain == (elems[i])['domain']: toadd = False i += 1 if toadd: elems.append(elem) ph_ws_openphish[cm_name] = elems return ph_ws_openphish except Exception, e: logging.error("Error extracting phishing web-sites from OpenPhish feed: %s" % (e)) raise Exception