Ejemplo n.º 1
0
def main():
    key = sys.argv[1]
    db = "../gsb_v4.db"
    platforms = ["WINDOWS"]
    sbl = SafeBrowsingList(key, db_path=db, platforms=platforms)
    #sbl.update_hash_prefix_cache()
    print(sbl.storage.get_threat_lists())

    url = sys.argv[2]
    u = URL(url)
    print(u.url)
    print(u.canonical)
    for i in u.url_permutations(u.canonical):
        print(i)
        print(u.digest(i))
    url_hashes = u.hashes
    print(url_hashes)

    full_hashes = list(url_hashes)
    print(full_hashes)

    cues = [to_hex(fh[0:4]) for fh in full_hashes]
    print(cues)

    print(sbl.storage.lookup_hash_prefix(cues))
    bl = sbl.lookup_url(url)
    print(bl)
Ejemplo n.º 2
0
def URLPermutations(url):
    url_class = URL(url)
    urls = []
    for url_variant in url_class.url_permutations(url):
        urls.append(url_variant)
        #print url_variant
    return urls
Ejemplo n.º 3
0
def computeURLHashes(twitter_urls):
    twitter_urls_hashes_dict = {}
    num_urls = len(twitter_urls_dict)
    counter = 0
    printCounter = 0
    for url in twitter_urls:
        if url not in twitter_urls_hashes_dict:
            #url = url.encode('utf8')
            #url = urllib.quote(url, safe="%/:=&?~#+!$,;'@()*[]")
            #url_hashes = sbl.get_hash(url)
            url_hashes = URL(url).hashes
            hash_list = []
            #hash_prefix_list = []
            for h in url_hashes:
                #hash_prefix = sqlite3.Binary(h[0:4])
                #hash_prefix = str(hash_prefix).encode('hex')
                hash_list.append(h)
                #hash_prefix_list.append(hash_prefix)

            #twitter_urls_hashes_dict[url] = (url,hash_list,hash_prefix_list)
            twitter_urls_hashes_dict[url] = (hash_list)
        counter += 1
        printCounter += 1
        if (printCounter == 1000):
            progress_bar(counter, num_urls, '%s of %s' % (counter, num_urls))
            printCounter = 0
    return twitter_urls_hashes_dict
Ejemplo n.º 4
0
 def lookup_url(self, url):
     """Look up specified URL in Safe Browsing threat lists."""
     url_hashes = URL(url).hashes
     list_names = self._lookup_hashes(url_hashes)
     if list_names:
         return list_names
     return None
Ejemplo n.º 5
0
def formatURL(url, permutation=False):
    url = url.encode('utf8')
    #url = url.rstrip('/')
    #url = url.encode('utf8')
    #url = urllib.quote(url, safe="%/:=&?~#+!$,;'@()*[]")
    #url = urlparse(url)[1]
    url_class = URL(url)
    con_url = url_class.canonical
    return con_url
Ejemplo n.º 6
0
 def lookup_url(self, url):
     """Look up specified URL in Safe Browsing threat lists."""
     if type(url) is not str:
         url = url.encode('utf8')
     if not url.strip():
         raise ValueError("Empty input string.")
     url_hashes = URL(url).hashes
     list_names = self._lookup_hashes(url_hashes)
     if list_names:
         return list_names
     return None
Ejemplo n.º 7
0
 def lookup_url(self, url):
     """Look up specified URL in Safe Browsing threat lists."""
     if type(url) is not str:
         url = url.encode('utf8')
     if not url.strip():
         raise ValueError("Empty input string.")
     url_hashes = URL(url).hashes
     try:
         list_names = self._lookup_hashes(url_hashes)
         self.storage.commit()
     except Exception:
         self.storage.rollback()
         raise
     if list_names:
         return list_names
     return None
Ejemplo n.º 8
0
    def lookup_urls(self, urls):
        "Look up list of URLs in Safe Browsing thread lists."
        url_hashes = {}
        for url in urls:
            for url_hash in URL(url).hashes:
                if not url_hash in url_hashes:
                    url_hashes[url_hash] = url

        if not url_hashes:
            return

        try:
            list_names = self._lookup_map_hashes(url_hashes)
            self.storage.commit()
        except Exception:
            self.storage.rollback()
            raise

        if list_names:
            return list_names
        return None
Ejemplo n.º 9
0
 def test_permutations(self):
     for k, v in self.url_permutations.items():
         p = list(URL.url_permutations(k))
         self.assertEqual(p, v)
Ejemplo n.º 10
0
 def test_canonicalize(self):
     for nu, cu in self.canonical_urls.items():
         self.assertEqual(URL(nu).canonical, cu)
#gsb_matches[formatURL('http://nastygirls.xyz')] = None
#t_co_filtered['http://nastygirls.xyz'] = None

op_urls = importOpenPhishURLs()
pt_urls = importPhishTankURLs()

found_ctr = 0
for key in gsb_matches:
    #print key
    if key in t_co_filtered:
        print key

        print "GSB (gglsbl) timestamps:"
        url = key
        url_hashes = URL(url).hashes
        hash_list = []

        for h in url_hashes:
            hash_list.append(h)

        for url_hash in hash_list:
            hash_prefix = sqlite3.Binary(url_hash[0:4])
            #hash_prefix = str(hash_prefix).encode('hex')
            #print hash_prefix

            cursor.execute("""
				SELECT timestamp, threat_type, platform_type, threat_entry_type
				FROM hash_prefix
				WHERE value = ?
			""", (hash_prefix, ))  #get all hash prefixes
Ejemplo n.º 12
0
def lookup_db():
    from gglsbl import SafeBrowsingList
    from gglsbl.protocol import URL
    from gglsbl.utils import to_hex

    res = {}
    rdict = {
        'status':'',
        'message':'',
    }

    key = ''
    db = '../gsb_v4.db'
    platforms = ['WINDOWS']
    if os.path.isfile(config):
        cp = ConfigParser()
        cp.read(config)
        if 'api' in cp:
            if 'key' in cp['api']:
                key = cp['api']['key']
        if 'database' in cp:
            if 'localdb' in cp['database']:
                db = cp['database']['localdb']
    if not key:
        logging.error('API key not found.')
        rdict['status'] = 500
        rdict['message'] = 'Internal Server Error'

    url = ''
    update = False
    if request.method == 'GET':
        url = request.args.get('url')
        update = request.args.get('update')
    if not url:
        rdict['status'] = 400
        rdict['message'] = "The parameter 'url' is missing"

    if not rdict['status']:
        sbl = SafeBrowsingList(key, db_path=db, platforms=platforms)
        logging.debug(sbl.storage.get_threat_lists())
        #if update:
        #    sbl.update_hash_prefix_cache()
        u = URL(url)
        #res['url'] = {
        res = {
            'query': u.url,
            'canonical': u.canonical,
            'permutations': [],
        }
        for i in u.url_permutations(u.canonical):
            p = {
                'pattern': i,
                'sha256': to_hex(u.digest(i))
            }
            #res['url']['permutations'].append(p)
            res['permutations'].append(p)
            
        url_hashes = u.hashes
        full_hashes = list(url_hashes)
        cues = [to_hex(fh[0:4]) for fh in full_hashes]
        #res['cues'] = cues
        res['results'] = []
        matched = sbl.storage.lookup_hash_prefix(cues)
        for m in matched:
            prefix = to_hex(m[1])
            for p in res['permutations']:
                if re.match(prefix, p['sha256']):
                    result = {
                        'pattern': p['pattern'],
                        #'prefix': to_hex(m[1]),
                        'prefix': prefix,
                        'matched': str(m[0]),
                    }
                    res['results'].append(result)
        #bl = sbl.lookup_url(url)
        #res['matched'] = bl
        logging.info(res)
        res = jsonify(res)

    if not res:
        if not rdict["status"]:
            rdict["status"] = 400
            rdict["message"] = "Invalid request."
        res = jsonify(rdict)
        res.status_code = rdict["status"]

    return res 
Ejemplo n.º 13
0
 def test_permutations(self):
     for k,v in self.url_permutations.items():
         p = list(URL.url_permutations(k))
         self.assertEqual(p, v)