Ejemplo n.º 1
0
def computeURLHashes(twitter_urls):
    twitter_urls_hashes_dict = {}
    num_urls = len(twitter_urls_dict)
    counter = 0
    printCounter = 0
    for url in twitter_urls:
        if url not in twitter_urls_hashes_dict:
            #url = url.encode('utf8')
            #url = urllib.quote(url, safe="%/:=&?~#+!$,;'@()*[]")
            #url_hashes = sbl.get_hash(url)
            url_hashes = URL(url).hashes
            hash_list = []
            #hash_prefix_list = []
            for h in url_hashes:
                #hash_prefix = sqlite3.Binary(h[0:4])
                #hash_prefix = str(hash_prefix).encode('hex')
                hash_list.append(h)
                #hash_prefix_list.append(hash_prefix)

            #twitter_urls_hashes_dict[url] = (url,hash_list,hash_prefix_list)
            twitter_urls_hashes_dict[url] = (hash_list)
        counter += 1
        printCounter += 1
        if (printCounter == 1000):
            progress_bar(counter, num_urls, '%s of %s' % (counter, num_urls))
            printCounter = 0
    return twitter_urls_hashes_dict
def importCertStreamURLs(limit):
    counter = 0
    printCounter = 0
    cur = con.cursor()
    #cur.execute('SELECT url FROM tweet_urls LIMIT 500000')
    #cur.execute("SELECT url,tweet_id FROM tweet_urls_4 LIMIT "+str(start)+", "+str(batch_size)+" ")
    #cur.execute("SELECT domain, id FROM certstream_domains_5 ORDER BY id DESC LIMIT "+str(limit))
    cur.execute(
        "SELECT domain FROM certstream_domains_5 ORDER BY id DESC LIMIT " +
        str(limit))
    row = cur.fetchone()
    domains = []
    while row is not None:
        #if row[0] !="":
        formatted_domain = formatURL("http://" + row[0])
        #domains.append((formatted_domain,row[1]))
        domains.append(formatted_domain)

        counter += 1
        printCounter += 1
        if (printCounter == 1000):
            progress_bar(counter, limit, '%s of %s' % (counter, limit))
            printCounter = 0
        row = cur.fetchone()
    cur.close()
    return domains
def importRecentTCoFiltered(limit):
    cur = con.cursor()
    cur.execute(
        'SELECT full_url FROM t_co_all_urls_experiment ORDER BY id DESC  LIMIT '
        + str(limit))
    #cursor.execute("SELECT url,tweet_id FROM tweet_urls_3 WHERE DATE(date_added) = '2017-03-20'  ")
    #cursor.execute("SELECT url,tweet_id FROM tweet_urls_3 LIMIT "+str(start)+", "+str(batch_size)+" ")
    row = cur.fetchone()
    twitter_urls_dict = {}
    #twitter_urls_list = []
    printCounter = 0
    counter = 0
    while row is not None:
        #url = row[0].rstrip('/')
        formatted_url = formatURL(row[0])
        twitter_urls_dict[formatted_url] = None
        counter += 1
        printCounter += 1
        if (printCounter == 100):
            progress_bar(counter, limit, '%s of %s' % (counter, limit))
            printCounter = 0
        row = cur.fetchone()

    #print "twitter_urls_list len: ",len(twitter_urls_list)
    #print "num recent t.co urls: ",len(twitter_urls_dict)
    cur.close()
    return twitter_urls_dict
def URLLookup(twitter_urls, phishing_urls, blacklist):
    counter = 0
    match_counter = 0
    netloc_counter = 0
    printCounter = 0
    num_urls = len(twitter_urls)
    for url in twitter_urls:
        if url in phishing_urls:
            match_counter += 1
            print url
            logPhishingURL(url, 5, twitter_urls[url], blacklist)
            markTweetsPhishy(url, twitter_urls[url])
        url_permutations = URLPermutations(url)
        for url_p in url_permutations:
            if url_p in phishing_urls:
                match_counter += 1
                print url_p
                logPhishingURL(url, 5, twitter_urls[url_p], blacklist)
                markTweetsPhishy(url, twitter_urls[url])
            #if saveNetLoc(url):
            #	netloc_counter += 1;
        counter += 1
        printCounter += 1
        if (printCounter == 1000):
            progress_bar(counter, num_urls, '%s of %s ' % (counter, num_urls))
            printCounter = 0

    return (match_counter, netloc_counter)
Ejemplo n.º 5
0
def importRecentTwitterURLs(limit, from_id=None, to_id=None):
    cur = con.cursor()
    if from_id is not None and to_id is not None:
        print "importing by range (" + str(from_id) + " to " + str(to_id) + ")"
        cur.execute('SELECT url,tweet_id FROM tweet_urls_5b WHERE id >= ' +
                    str(from_id) + ' AND id <= ' + str(to_id))
    else:
        print "importing recent by limit (" + str(limit) + ")"
        cur.execute(
            'SELECT url,tweet_id FROM tweet_urls_5b ORDER BY id DESC  LIMIT ' +
            str(limit))

    row = cur.fetchone()
    twitter_urls_dict = {}
    #twitter_urls_list = []
    printCounter = 0
    counter = 0
    while row is not None:
        #url = row[0].rstrip('/')
        formatted_url = formatURL(row[0])
        #url = row[0]
        #url = url.encode('utf8')
        #url = urllib.quote(url, safe="%/:=&?~#+!$,;'@()*[]")
        tweet_id = row[1]
        #print url
        #print tweet_id
        #twitter_urls_list.append(url)
        tweet_ids = {}
        if formatted_url not in twitter_urls_dict:
            #twitter_urls_dict[url] = [str(tweet_id),]
            tweet_ids[tweet_id] = None
            twitter_urls_dict[formatted_url] = tweet_ids
            #twitter_urls_dict[url] = url
            #print "not in dict: ", twitter_urls_dict[url]
            #print type(twitter_urls_dict[url])
        elif formatted_url in twitter_urls_dict:
            #print "in dict: url: "+url+", ",twitter_urls_dict[url]
            #print type(twitter_urls_dict[url])
            tweet_ids = twitter_urls_dict[formatted_url]
            if tweet_id not in tweet_ids:
                tweet_ids[tweet_id] = None
                twitter_urls_dict[formatted_url] = tweet_ids
            #print tweet_ids
        else:
            print "UNKNOWN: ", formatted_url
            exit()

        counter += 1
        printCounter += 1
        if (printCounter == 1000):
            progress_bar(counter, limit, '%s of %s' % (counter, limit))
            printCounter = 0
        row = cur.fetchone()

    #print "twitter_urls_list len: ",len(twitter_urls_list)
    print "twitter_urls_dict len: ", len(twitter_urls_dict)
    cur.close()
    return twitter_urls_dict
Ejemplo n.º 6
0
def importRedirectionChainURLs(limit):
    cursor = con.cursor()
    #print "starting at "+str(start)+", batch size "+str(batch_size)
    print "importing redirection chain from most recent tweets..."
    #cursor.execute("SELECT redirection_chain,id FROM tweets_3 LIMIT "+str(start)+", "+str(batch_size)+" ")
    cursor.execute(
        "SELECT redirection_chain,id FROM tweets_5 ORDER BY id DESC limit " +
        str(limit))
    row = cursor.fetchone()
    printCounter = 0
    counter = 0
    twitter_urls_dict = {}
    while row is not None:
        tweet_id = row[1]
        if row[0] is not None:
            urls = row[0].split(" -> ")
            for url in urls[
                    1:]:  # remove first url in this chain as it will have already been checked by importRecentTwitterURLs
                formatted_url = formatURL(url)

                tweet_ids = {}
                if formatted_url not in twitter_urls_dict:
                    #twitter_urls_dict[url] = [str(tweet_id),]
                    tweet_ids[tweet_id] = None
                    twitter_urls_dict[formatted_url] = tweet_ids
                    #twitter_urls_dict[url] = url
                    #print "not in dict: ", twitter_urls_dict[url]
                    #print type(twitter_urls_dict[url])
                elif formatted_url in twitter_urls_dict:
                    #print "in dict: url: "+url+", ",twitter_urls_dict[url]
                    #print type(twitter_urls_dict[url])
                    tweet_ids = twitter_urls_dict[formatted_url]
                    if tweet_id not in tweet_ids:
                        tweet_ids[tweet_id] = None
                        twitter_urls_dict[formatted_url] = tweet_ids
                    #print tweet_ids
                else:
                    print "UNKNOWN: ", formatted_url
                    exit()

        counter += 1
        printCounter += 1
        if (printCounter == 1000):
            progress_bar(counter, limit, '%s of %s' % (counter, limit))
            printCounter = 0
        row = cursor.fetchone()

    cursor.close()
    print "num unique urls: ", len(twitter_urls_dict)
    return twitter_urls_dict
Ejemplo n.º 7
0
def produceURLPermutations(url_dict):
    print "producing URL permutations..."
    url_permutations_dict = {}
    counter = 0
    printCounter = 0
    for url in url_dict:
        #print url
        url_permutations = URLPermutations(url)
        for url_p in url_permutations:
            #print url_p
            url_permutations_dict[url_p] = False

        counter += 1
        printCounter += 1
        if (printCounter == 1000):
            progress_bar(counter, len(url_dict),
                         '%s of %s' % (counter, len(url_dict)))
            printCounter = 0
    return url_permutations_dict
def importRecentTwitterURLs(limit):
    cursor = con.cursor()
    print "importing most recently tweeted urls..."
    cursor.execute(
        "SELECT url,tweet_id FROM tweet_urls_5 ORDER BY id DESC limit " +
        str(limit))
    row = cursor.fetchone()
    printCounter = 0
    counter = 0
    twitter_urls_dict = {}
    while row is not None:
        formatted_url = formatURL(row[0])
        tweet_id = row[1]
        tweet_ids = {}
        if formatted_url not in twitter_urls_dict:
            #twitter_urls_dict[url] = [str(tweet_id),]
            tweet_ids[tweet_id] = None
            twitter_urls_dict[formatted_url] = tweet_ids
            #twitter_urls_dict[url] = url
            #print "not in dict: ", twitter_urls_dict[url]
            #print type(twitter_urls_dict[url])
        elif formatted_url in twitter_urls_dict:
            #print "in dict: url: "+url+", ",twitter_urls_dict[url]
            #print type(twitter_urls_dict[url])
            tweet_ids = twitter_urls_dict[formatted_url]
            if tweet_id not in tweet_ids:
                tweet_ids[tweet_id] = None
                twitter_urls_dict[formatted_url] = tweet_ids
            #print tweet_ids
        else:
            print "UNKNOWN: ", formatted_url
            exit()

        counter += 1
        printCounter += 1
        if (printCounter == 1000):
            progress_bar(counter, limit, '%s of %s' % (counter, limit))
            printCounter = 0
        row = cursor.fetchone()

    cursor.close()
    print "num unique urls: ", len(twitter_urls_dict)
    return twitter_urls_dict
Ejemplo n.º 9
0
def importRedirectionChain(start, batch_size):
	cursor = con.cursor()
	print "import redirection chains\nstarting at "+str(start)+", batch size "+str(batch_size)
	cursor.execute("SELECT redirection_chain,id FROM tweets_5 LIMIT "+str(start)+", "+str(batch_size)+" ")
	row = cursor.fetchone()
	printCounter=0
	counter = 0
	twitter_urls_dict = {}
	while row is not None:
		if row[0] is not None:
			urls = row[0].split(" -> ")
			for url in urls:
				formatted_url = format_url(url)
				tweet_id = row[1]
				tweet_ids = {}
				if formatted_url not in twitter_urls_dict:
					#twitter_urls_dict[url] = [str(tweet_id),]
					tweet_ids[tweet_id] = None
					twitter_urls_dict[formatted_url] = tweet_ids
					#twitter_urls_dict[url] = url
					#print "not in dict: ", twitter_urls_dict[url]
					#print type(twitter_urls_dict[url])
				elif formatted_url in twitter_urls_dict:
					#print "in dict: url: "+url+", ",twitter_urls_dict[url]
					#print type(twitter_urls_dict[url])
					tweet_ids = twitter_urls_dict[formatted_url]
					if tweet_id not in tweet_ids:
						tweet_ids[tweet_id] = None
						twitter_urls_dict[formatted_url] = tweet_ids
					#print tweet_ids
				else:
					print "UNKNOWN: ",formatted_url
					exit()
			
		counter += 1
		printCounter += 1
		if (printCounter == 1000):
			progress_bar(counter, 1000000, '%s of %s' % (counter, 1000000))
			printCounter = 0
		row = cursor.fetchone()	
	cursor.close()
	print "num unique urls: ",len(twitter_urls_dict)
	return twitter_urls_dict
Ejemplo n.º 10
0
def URLLookup_v2(twitter_urls, gsb_url_hash_prefixes, twitter_urls_dict,
                 redirection_chain_url_lookup):
    # twitter_urls = (hash_list)
    # twitter_urls_dict = (tweet_ids)
    counter = 0
    hash_prefix_counter = 0
    hash_prefix_no_collision_counter = 0
    printCounter = 0
    num_urls = len(twitter_urls)
    malware_count = 0
    phish_count = 0
    url_already_checked_ctr = 0
    url_matches_in_gsb = 0
    gsb_lookup_counter = 0
    malware_matches = {}
    phishing_matches = {}
    for url in twitter_urls:
        j = 0
        url_hashes = twitter_urls[url]
        #url_hash_prefixes = url_data[2]
        #url_hashes = url_data[2]
        #for hash_prefix in url_hash_prefixes:
        for url_hash in url_hashes:
            hash_prefix = sqlite3.Binary(url_hash[0:4])
            hash_prefix = str(hash_prefix).encode('hex')
            if hash_prefix in gsb_url_hash_prefixes:
                #print "num hash prefixes:",gsb_url_hash_prefixes[hash_prefix]
                if gsb_url_hash_prefixes[hash_prefix] == 6:
                    hash_prefix_no_collision_counter += 1

                    if (lookup_gsb_full_hash(sqlite3.Binary(url_hash))):

                        database_lock = True
                        while database_lock:
                            try:
                                gsb_lookup_counter += 1
                                gsblookup = sbl.lookup_url(url)
                                #gsblookup = sbl.lookup_hash(url_hash)
                                #gsblookup = sbl._lookup_hashes(url_hashes)
                                if gsblookup:
                                    url_matches_in_gsb += 1
                                    cur = con.cursor()
                                    sql = "INSERT INTO gsb_full_hash_log_5(url, hash_prefix, full_hash) VALUES(%s, %s, %s)"
                                    cur.execute(sql,
                                                (url[0:500],
                                                 sqlite3.Binary(url_hash[0:4]),
                                                 sqlite3.Binary(url_hash)))

                                    for i in gsblookup:
                                        #print str(i)
                                        #print type(i)
                                        #if i == "goog-malware-shavar":
                                        if str(i
                                               ) == "MALWARE/ANY_PLATFORM/URL":

                                            print url
                                            cur.execute(
                                                "UPDATE gsb_full_hash_log_5 SET malware = '1' WHERE full_hash = %s",
                                                (url_hash, ))
                                            con.commit()
                                            #print "malware",url
                                            #sys.stdout.write('\r-')
                                            #sys.stdout.flush()
                                            malware_count += 1
                                            malware_matches[url] = (url_hash)
                                            logMalwareURL(
                                                url, 4, twitter_urls_dict[url],
                                                redirection_chain_url_lookup)
                                            #markTweetsMalware(url, twitter_urls_dict[url])
                                        #if i == "googpub-phish-shavar":
                                        if str(
                                                i
                                        ) == "SOCIAL_ENGINEERING/ANY_PLATFORM/URL":
                                            print url
                                            cur.execute(
                                                "UPDATE gsb_full_hash_log_5 SET social_engineering = '1' WHERE full_hash = %s",
                                                (url_hash, ))
                                            con.commit()
                                            #print "phishing ",url
                                            phish_count += 1
                                            logPhishingURL(
                                                url, 4, twitter_urls_dict[url],
                                                redirection_chain_url_lookup)
                                            #markTweetsPhishy(url, twitter_urls_dict[url])
                                            phishing_matches[url] = (url_hash)
                                else:  # url hash prefix match but full hash not in GSB i.e. different URL. Mark in db so doesn't get checked again
                                    cur = con.cursor()
                                    sql = "INSERT INTO gsb_full_hash_log_5(hash_prefix, full_hash, not_in_gsb) VALUES(%s, %s, %s)"
                                    cur.execute(sql,
                                                (sqlite3.Binary(url_hash[0:4]),
                                                 sqlite3.Binary(url_hash), 1))

                                database_lock = False

                            except KeyError:
                                # 18 Jun 2018, disabling error: 'exceptions.KeyError'>, KeyError('matches',)
                                # as appearing every minute or so
                                print "Looks like a key error:", sys.exc_info(
                                )[1]
                                log(
                                    "certstream-url-checker-v2-phishing_5.txt",
                                    "Looks like a key error: " +
                                    str(sys.exc_info()[1]))
                                print "URL:", url
                                time.sleep(5)
                                database_lock = False
                            except (RuntimeError, IntegrityError,
                                    urllib2.HTTPError, urllib2.URLError,
                                    SocketError,
                                    sqlite3.OperationalError) as e:
                                print e
                                print url
                                log(
                                    "tpl_fast_v2-output.txt", "error: " +
                                    str(e.message) + "\nURL: " + url + "\n")
                                print "waiting 5 seconds..."
                                time.sleep(5)
                                database_lock = False
                            except sqlite3.OperationalError:
                                print("database locked, waiting 5 seconds...")
                                log("tpl_v3.2-output.txt",
                                    "gglsbl3 database is locked")
                                time.sleep(5)

                    else:
                        #print "URL already in gsb_full_hash_lookup"
                        url_already_checked_ctr += 1
                #else: #hash prefix not unique
                #still log URL anyway, but mark that it's not only URL

                hash_prefix_counter += 1

                #gglsbl_db = "/tmp/gsb_v4.db"
                #sql_db = sqlite3.connect(gglsbl_db)
                #cursor = sql_db.cursor()
                #cursor.execute("SELECT threat_type, platform_type, threat_entry_type from hash_prefix WHERE value = ?",(sqlite3.Binary(url_hash[0:4]),) ) #get all hash prefixes
                #cursor.execute('''SELECT value from full_hash''') #get all full hashes
                #results = cursor.fetchall()
                #for r in results:
                #	print r[0]+" "+r[1]+" "+r[2]

                # database_lock = True
                # while database_lock:
                # 	try:
                # 		gsblookup = sbl.lookup_hash(url_hash)
                # 		#gsblookup = sbl._lookup_hashes(url_hashes)
                # 		if gsblookup:
                # 			for i in gsblookup:
                # 				#print str(i)
                # 				#print type(i)
                # 				if i == "goog-malware-shavar":
                # 				#if str(i) == "MALWARE/ANY_PLATFORM/URL":
                # 					#print "malware",url
                # 					#sys.stdout.write('\r-')
                # 					#sys.stdout.flush()
                # 					malware_count += 1
                # 					malware_matches[url] = (url_hash)
                # 					logMalwareURL(url, 4, twitter_urls_dict[url], redirection_chain_url_lookup)
                # 					markTweetsMalware(url, twitter_urls_dict[url])
                # 				if i == "googpub-phish-shavar":
                # 				#if str(i) == "SOCIAL_ENGINEERING/ANY_PLATFORM/URL":
                # 					#print "phishing ",url
                # 					phish_count += 1
                # 					logPhishingURL(url, 4, twitter_urls_dict[url], redirection_chain_url_lookup)
                # 					markTweetsPhishy(url, twitter_urls_dict[url])
                # 					phishing_matches[url] = (url_hash)
                # 		database_lock = False
                # 	except (RuntimeError, IntegrityError, urllib2.HTTPError, urllib2.URLError, SocketError, sqlite3.OperationalError) as e:
                # 		print e
                # 		print url
                # 		log("tpl_fast_v2-output.txt", "error: "+str(e.message)+"\nURL: "+url+"\n")
                # 		print "waiting 5 seconds..."
                # 		time.sleep(5)
                # 		database_lock = False
                # 	except sqlite3.OperationalError:
                # 		print("database locked, waiting 5 seconds...")
                # 		log("tpl_v3.2-output.txt", "gglsbl3 database is locked")
                # 		time.sleep(5)
            j += 1
        counter += 1
        printCounter += 1
        if (printCounter == 1000):
            #progress_bar(counter, num_urls, '%s of %s (hash_prefix_counter: %s)' % (counter, num_urls, hash_prefix_counter))
            progress_bar(counter, num_urls, '%s of %s' % (counter, num_urls))
            printCounter = 0
        #print "num hash prefix matches: ", hash_prefix_counter
    return (phish_count, malware_count, phishing_matches, malware_matches,
            hash_prefix_counter, hash_prefix_no_collision_counter,
            url_already_checked_ctr, url_matches_in_gsb, gsb_lookup_counter)
def URLLookup_v2(domain_hashes, gsb_urls):
    # twitter_urls = (hash_list)
    # twitter_urls_dict = (tweet_ids)
    phish_count = 0
    malware_count = 0
    phishing_matches = {}
    counter = 0
    hash_prefix_counter = 0
    printCounter = 0
    num_urls = len(domain_hashes)
    malware_domains = []
    phishing_domains = []
    for url in domain_hashes:
        #print url
        url_hashes = domain_hashes[url]
        for url_hash in url_hashes:
            #print url_hash
            hash_prefix = sqlite3.Binary(url_hash[0:4])
            hash_prefix = str(hash_prefix).encode('hex')
            if hash_prefix in gsb_urls:

                if (lookup_gsb_full_hash(sqlite3.Binary(url_hash))):

                    hash_prefix_counter += 1
                    database_lock = True
                    while database_lock:
                        try:
                            print "url hash prefix match!"
                            #gsblookup = sbl.lookup_hash(url_hash)
                            gsblookup = sbl.lookup_url(url)
                            if gsblookup:

                                cur = con.cursor()
                                sql = "INSERT INTO gsb_full_hash_log_certstream_5(url, hash_prefix, full_hash) VALUES(%s, %s, %s)"
                                cur.execute(
                                    sql,
                                    (url[0:500], sqlite3.Binary(url_hash[0:4]),
                                     sqlite3.Binary(url_hash)))

                                print "url full hash match!"
                                for i in gsblookup:
                                    print i
                                    if str(i) == "MALWARE/ANY_PLATFORM/URL":
                                        cur.execute(
                                            "UPDATE gsb_full_hash_log_certstream_5 SET malware = '1' WHERE full_hash = %s",
                                            (url_hash, ))
                                        con.commit()
                                        print "malware"
                                        malware_count += 1
                                        #sys.stdout.write('\r-')
                                        #sys.stdout.flush()
                                        #malware_domains.append()
                                    if str(
                                            i
                                    ) == "SOCIAL_ENGINEERING/ANY_PLATFORM/URL":
                                        cur.execute(
                                            "UPDATE gsb_full_hash_log_certstream_5 SET social_engineering = '1' WHERE full_hash = %s",
                                            (url_hash, ))
                                        con.commit()
                                        print "phishing "  #,url_data[0]
                                        phish_count += 1
                                        #logPhishingURL(url, 3, twitter_urls_dict[url], redirection_chain_url_lookup)
                                        #markTweetsPhishy(url, twitter_urls_dict[url])
                                        phishing_matches[url] = (url_hash)

                            else:  # url hash prefix match but full hash not in GSB i.e. different URL. Mark in db so doesn't get checked again
                                cur = con.cursor()
                                sql = "INSERT INTO gsb_full_hash_log_certstream_5(hash_prefix, full_hash, not_in_gsb) VALUES(%s, %s, %s)"
                                cur.execute(sql,
                                            (sqlite3.Binary(url_hash[0:4]),
                                             sqlite3.Binary(url_hash), 1))

                            database_lock = False
                        except (RuntimeError, IntegrityError,
                                urllib2.HTTPError, urllib2.URLError,
                                SocketError) as e:
                            print e
                            print url
                            log(
                                "tpl_v3.2-output.txt", "error: " +
                                str(e.message) + "\nURL: " + url + "\n")
                            print "waiting 5 seconds..."
                            time.sleep(5)
                            database_lock = False
                        except sqlite3.OperationalError:
                            print("database locked, waiting 5 seconds...")
                            log("tpl_v3.2-output.txt",
                                "gglsbl3 database is locked")
                            time.sleep(5)
                            database_lock = False
                        except KeyError:
                            # 18 Jun 2018, disabling error: 'exceptions.KeyError'>, KeyError('matches',)
                            # as appearing every minute or so
                            print "Looks like a key error:", sys.exc_info()[1]
                            log(
                                "certstream-url-checker-v2-phishing_5.txt",
                                "Looks like a key error: " +
                                str(sys.exc_info()[1]))
                            print "URL:", url
                            time.sleep(5)
                            database_lock = False
                        except:
                            print "We have an error:", sys.exc_info()[1]
                            print sys.exc_info()
                            import os
                            import traceback
                            from send_email import sendAdminAlert
                            script_file_name = os.path.basename(__file__)
                            error_string = ""
                            for frame in traceback.extract_tb(
                                    sys.exc_info()[2]):
                                fname, lineno, fn, text = frame
                                error_string += "\nError in %s on line %d" % (
                                    fname, lineno)
                            print error_string
                            sendAdminAlert(
                                "Error (within URLLookup_v2 loop) in " +
                                script_file_name,
                                "Python script: " + script_file_name +
                                "\nError reprted: " + str(sys.exc_info()[1]) +
                                "\nLine:" + str(error_string))
                            print "waiting 90 seconds"
                            time.sleep(90)
                            database_lock = False

                else:
                    print "URL already in gsb_full_hash_lookup"
                #j+=1
        counter += 1
        printCounter += 1
        if (printCounter == 10):
            #progress_bar(counter, num_urls, '%s of %s (hash_prefix_counter: %s)' % (counter, num_urls, hash_prefix_counter))
            progress_bar(counter, num_urls, '%s of %s' % (counter, num_urls))
            printCounter = 0

    return (phish_count, malware_count, phishing_matches)
Ejemplo n.º 12
0
                #print url
                #print timestamp_matches_list[0]
                #print lookupURLFullHash(full_url_hash)
                cur.execute(
                    "UPDATE spam_urls_5 SET gglsbl_timestamp = %s WHERE url = %s AND gglsbl_timestamp IS NULL",
                    (timestamp_matches_list[0], url))
                con.commit()
            elif len(timestamp_matches_dict) > 1:
                multiple_counter += 1
            elif len(timestamp_matches_dict) == 0:
                zero_match_counter += 1

            counter += 1
            printCounter += 1
            if (printCounter == 10):
                progress_bar(counter, len(spam_urls),
                             '%s of %s' % (counter, len(spam_urls)))
                printCounter = 0

        print "\nURLs checked:", len(spam_urls)
        print "success:", success_counter
        print "zero matches:", zero_match_counter
        print "multi TSs:", multiple_counter

        conn.close()

    except:
        print "We have an error:", sys.exc_info()[1]
        import os
        import sys
        import traceback
        from send_email import sendAdminAlert