def check(): """ checks a list of url's against Spamhaus's SBL - urllist.txt the list must be full path and include http:// http://www.spamhaus.org/sbl/listings/{domain_provider} i.e. godaddy.com :return: url's that are listed """ checker = SpamHausChecker() with open('urllist.txt', 'r') as urllist: for url in urllist: url = url.strip() if checker.is_spam(url) is True: print url, 'is in the SBL' else: continue
def check_spamhaus(url): global urlschecked, want_spamhaus domain = get_domain(url) if not want_spamhaus: return False if url.startswith('http') or url.startswith('https'): # short cirquit (caching is good!) if urlschecked.has_key("sh-" + domain): return urlschecked["sh-" + domain] checker = SpamHausChecker() try: ret = checker.is_spam(url) except Exception: print("Whoops, trying again") return False urlschecked["sh-" + domain] = ret return ret else: return False
# In[ ]: # In[9]: from sklearn.preprocessing import LabelEncoder data = ['first', 'second', 'third', 'fourth'] enc = LabelEncoder() label_encoder = enc.fit(data) integer_classes = label_encoder.transform(label_encoder.classes_) integer_classes # In[12]: from spam.spamhaus import SpamHausChecker checker = SpamHausChecker() checker.is_spam("http://www.google.com/search?q=food") # In[16]: from sklearn.ensemble import RandomForestClassifier import numpy as np print('test') # In[12]: import pandas as pd import numpy as np import seaborn as sns df = pd.DataFrame({'a': [1, 1, 0, 1], 'b': [1, 1, 1, 0]}) sum((df['a'] == 1) & (df['b'] == 1))
# In[9]: from sklearn.preprocessing import LabelEncoder data = ['first', 'second', 'third', 'fourth'] enc = LabelEncoder() label_encoder = enc.fit(data) integer_classes = label_encoder.transform(label_encoder.classes_) integer_classes # In[12]: from spam.spamhaus import SpamHausChecker checker = SpamHausChecker() checker.is_spam("http://www.google.com/search?q=food") # In[16]: from sklearn.ensemble import RandomForestClassifier import numpy as np print('test') # In[12]: import pandas as pd import numpy as np import seaborn as sns