def main(): key = sys.argv[1] db = "../gsb_v4.db" platforms = ["WINDOWS"] sbl = SafeBrowsingList(key, db_path=db, platforms=platforms) #sbl.update_hash_prefix_cache() print(sbl.storage.get_threat_lists()) url = sys.argv[2] u = URL(url) print(u.url) print(u.canonical) for i in u.url_permutations(u.canonical): print(i) print(u.digest(i)) url_hashes = u.hashes print(url_hashes) full_hashes = list(url_hashes) print(full_hashes) cues = [to_hex(fh[0:4]) for fh in full_hashes] print(cues) print(sbl.storage.lookup_hash_prefix(cues)) bl = sbl.lookup_url(url) print(bl)
def URLPermutations(url): url_class = URL(url) urls = [] for url_variant in url_class.url_permutations(url): urls.append(url_variant) #print url_variant return urls
def computeURLHashes(twitter_urls): twitter_urls_hashes_dict = {} num_urls = len(twitter_urls_dict) counter = 0 printCounter = 0 for url in twitter_urls: if url not in twitter_urls_hashes_dict: #url = url.encode('utf8') #url = urllib.quote(url, safe="%/:=&?~#+!$,;'@()*[]") #url_hashes = sbl.get_hash(url) url_hashes = URL(url).hashes hash_list = [] #hash_prefix_list = [] for h in url_hashes: #hash_prefix = sqlite3.Binary(h[0:4]) #hash_prefix = str(hash_prefix).encode('hex') hash_list.append(h) #hash_prefix_list.append(hash_prefix) #twitter_urls_hashes_dict[url] = (url,hash_list,hash_prefix_list) twitter_urls_hashes_dict[url] = (hash_list) counter += 1 printCounter += 1 if (printCounter == 1000): progress_bar(counter, num_urls, '%s of %s' % (counter, num_urls)) printCounter = 0 return twitter_urls_hashes_dict
def lookup_url(self, url): """Look up specified URL in Safe Browsing threat lists.""" url_hashes = URL(url).hashes list_names = self._lookup_hashes(url_hashes) if list_names: return list_names return None
def formatURL(url, permutation=False): url = url.encode('utf8') #url = url.rstrip('/') #url = url.encode('utf8') #url = urllib.quote(url, safe="%/:=&?~#+!$,;'@()*[]") #url = urlparse(url)[1] url_class = URL(url) con_url = url_class.canonical return con_url
def lookup_url(self, url): """Look up specified URL in Safe Browsing threat lists.""" if type(url) is not str: url = url.encode('utf8') if not url.strip(): raise ValueError("Empty input string.") url_hashes = URL(url).hashes list_names = self._lookup_hashes(url_hashes) if list_names: return list_names return None
def lookup_url(self, url): """Look up specified URL in Safe Browsing threat lists.""" if type(url) is not str: url = url.encode('utf8') if not url.strip(): raise ValueError("Empty input string.") url_hashes = URL(url).hashes try: list_names = self._lookup_hashes(url_hashes) self.storage.commit() except Exception: self.storage.rollback() raise if list_names: return list_names return None
def lookup_urls(self, urls): "Look up list of URLs in Safe Browsing thread lists." url_hashes = {} for url in urls: for url_hash in URL(url).hashes: if not url_hash in url_hashes: url_hashes[url_hash] = url if not url_hashes: return try: list_names = self._lookup_map_hashes(url_hashes) self.storage.commit() except Exception: self.storage.rollback() raise if list_names: return list_names return None
def test_permutations(self): for k, v in self.url_permutations.items(): p = list(URL.url_permutations(k)) self.assertEqual(p, v)
def test_canonicalize(self): for nu, cu in self.canonical_urls.items(): self.assertEqual(URL(nu).canonical, cu)
#gsb_matches[formatURL('http://nastygirls.xyz')] = None #t_co_filtered['http://nastygirls.xyz'] = None op_urls = importOpenPhishURLs() pt_urls = importPhishTankURLs() found_ctr = 0 for key in gsb_matches: #print key if key in t_co_filtered: print key print "GSB (gglsbl) timestamps:" url = key url_hashes = URL(url).hashes hash_list = [] for h in url_hashes: hash_list.append(h) for url_hash in hash_list: hash_prefix = sqlite3.Binary(url_hash[0:4]) #hash_prefix = str(hash_prefix).encode('hex') #print hash_prefix cursor.execute(""" SELECT timestamp, threat_type, platform_type, threat_entry_type FROM hash_prefix WHERE value = ? """, (hash_prefix, )) #get all hash prefixes
def lookup_db(): from gglsbl import SafeBrowsingList from gglsbl.protocol import URL from gglsbl.utils import to_hex res = {} rdict = { 'status':'', 'message':'', } key = '' db = '../gsb_v4.db' platforms = ['WINDOWS'] if os.path.isfile(config): cp = ConfigParser() cp.read(config) if 'api' in cp: if 'key' in cp['api']: key = cp['api']['key'] if 'database' in cp: if 'localdb' in cp['database']: db = cp['database']['localdb'] if not key: logging.error('API key not found.') rdict['status'] = 500 rdict['message'] = 'Internal Server Error' url = '' update = False if request.method == 'GET': url = request.args.get('url') update = request.args.get('update') if not url: rdict['status'] = 400 rdict['message'] = "The parameter 'url' is missing" if not rdict['status']: sbl = SafeBrowsingList(key, db_path=db, platforms=platforms) logging.debug(sbl.storage.get_threat_lists()) #if update: # sbl.update_hash_prefix_cache() u = URL(url) #res['url'] = { res = { 'query': u.url, 'canonical': u.canonical, 'permutations': [], } for i in u.url_permutations(u.canonical): p = { 'pattern': i, 'sha256': to_hex(u.digest(i)) } #res['url']['permutations'].append(p) res['permutations'].append(p) url_hashes = u.hashes full_hashes = list(url_hashes) cues = [to_hex(fh[0:4]) for fh in full_hashes] #res['cues'] = cues res['results'] = [] matched = sbl.storage.lookup_hash_prefix(cues) for m in matched: prefix = to_hex(m[1]) for p in res['permutations']: if re.match(prefix, p['sha256']): result = { 'pattern': p['pattern'], #'prefix': to_hex(m[1]), 'prefix': prefix, 'matched': str(m[0]), } res['results'].append(result) #bl = sbl.lookup_url(url) #res['matched'] = bl logging.info(res) res = jsonify(res) if not res: if not rdict["status"]: rdict["status"] = 400 rdict["message"] = "Invalid request." res = jsonify(rdict) res.status_code = rdict["status"] return res
def test_permutations(self): for k,v in self.url_permutations.items(): p = list(URL.url_permutations(k)) self.assertEqual(p, v)