def __init__(self): """ For URL parsing refer to RFC 2396 http://www.ietf.org/rfc/rfc2396.txt For the url http://a.b.c.d.e.f.g/1.html?param=1#Tag the client will try these possible strings: a.b.c.d.e.f.g/1.html?param=1#Tag a.b.c.d.e.f.g/1.html?param=1 a.b.c.d.e.f.g/1.html a.b.c.d.e.f.g/ c.d.e.f.g/1.html?param=1#Tag c.d.e.f.g/1.html?param=1 c.d.e.f.g/1.html c.d.e.f.g/ d.e.f.g/1.html?param=1#Tag d.e.f.g/1.html?param=1 d.e.f.g/1.html d.e.f.g/ e.f.g/1.html?param=1#Tag e.f.g/1.html?param=1 e.f.g/1.html e.f.g/ f.g/1.html?param=1#Tag f.g/1.html?param=1 f.g/1.html f.g/ Refer to http://code.google.com/apis/safebrowsing/ for more details. """ self.backend = DbObj().backend
class Google_Blacklist(object): """ Google Blacklist class that is used to fetch and prepare hashes to be stored in the database. """ def __init__(self,badware_type="malware"): """ The constructor initializes the module. """ self.backend = DbObj().backend self.url = self._get_URL() badware_dict = {"malware": "M","black": "B"} self.badware_type = badware_type if not badware_type in badware_dict: raise KeyError("Invalid Badware Type") self.badware_code = badware_dict[badware_type] self.remove_row_regexp = re.compile("^-\w+") def _get_URL(self): return URL def fetch_data(self): version = self.backend.get_version(self.badware_type) st = string.Template(self.url) if not version: # Start the version number from the beginning self.version_number = "1:-1" else: self.version_number = version self.final_url = st.safe_substitute(key = self.backend.api_key, badware_type = self.badware_type, version = self.version_number) self.fetch_url_pointer = urllib2.urlopen(self.final_url) self.url_hashes_data = self.fetch_url_pointer.readlines() if self.url_hashes_data == []: # No data, so no point checking version # number. This case might be because of # throttling or no updates available. return 0 rows_to_delete = {} rows_to_insert = {} for url_hash in self.url_hashes_data[1:-1]: if self.remove_row_regexp.match(url_hash): rows_to_delete.update({url_hash.strip(): self.badware_code}) else: cleaned_url_hash = url_hash.strip() if cleaned_url_hash and cleaned_url_hash != "\n": rows_to_insert.update({cleaned_url_hash[1:]: self.badware_code}) self.backend.delete_rows(rows_to_delete) version_number_rx = re.compile("\d\.\d+").search(self.url_hashes_data[0]) new_version_number = ":".join(version_number_rx.group().split(".")) if self.version_number == "1:-1": self.version_number = new_version_number self.backend.insert_version_row(self.badware_type, self.version_number) else: self.backend.update_version_row(self.badware_type, new_version_number, self.version_number) self.backend.insert_rows(rows_to_insert) return 0
def __init__(self, badware_type="malware"): """ The constructor initializes the module. """ self.backend = DbObj().backend self.url = self._get_URL() badware_dict = {"malware": "M", "black": "B"} self.badware_type = badware_type if not badware_type in badware_dict: raise KeyError("Invalid Badware Type") self.badware_code = badware_dict[badware_type] self.remove_row_regexp = re.compile("^-\w+")
def __init__(self,badware_type="malware"): """ The constructor initializes the module. """ self.backend = DbObj().backend self.url = self._get_URL() badware_dict = {"malware": "M","black": "B"} self.badware_type = badware_type if not badware_type in badware_dict: raise KeyError("Invalid Badware Type") self.badware_code = badware_dict[badware_type] self.remove_row_regexp = re.compile("^-\w+")
class Google_Blacklist(object): """ Google Blacklist class that is used to fetch and prepare hashes to be stored in the database. """ def __init__(self, badware_type="malware"): """ The constructor initializes the module. """ self.backend = DbObj().backend self.url = self._get_URL() badware_dict = {"malware": "M", "black": "B"} self.badware_type = badware_type if not badware_type in badware_dict: raise KeyError("Invalid Badware Type") self.badware_code = badware_dict[badware_type] self.remove_row_regexp = re.compile("^-\w+") def _get_URL(self): return URL def fetch_data(self): version = self.backend.get_version(self.badware_type) st = string.Template(self.url) if not version: # Start the version number from the beginning self.version_number = "1:-1" else: self.version_number = version self.final_url = st.safe_substitute(key=self.backend.api_key, badware_type=self.badware_type, version=self.version_number) self.fetch_url_pointer = urllib2.urlopen(self.final_url) self.url_hashes_data = self.fetch_url_pointer.readlines() if self.url_hashes_data == []: # No data, so no point checking version # number. This case might be because of # throttling or no updates available. return 0 rows_to_delete = {} rows_to_insert = {} for url_hash in self.url_hashes_data[1:-1]: if self.remove_row_regexp.match(url_hash): rows_to_delete.update({url_hash.strip(): self.badware_code}) else: cleaned_url_hash = url_hash.strip() if cleaned_url_hash and cleaned_url_hash != "\n": rows_to_insert.update( {cleaned_url_hash[1:]: self.badware_code}) self.backend.delete_rows(rows_to_delete) version_number_rx = re.compile("\d\.\d+").search( self.url_hashes_data[0]) new_version_number = ":".join(version_number_rx.group().split(".")) if self.version_number == "1:-1": self.version_number = new_version_number self.backend.insert_version_row(self.badware_type, self.version_number) else: self.backend.update_version_row(self.badware_type, new_version_number, self.version_number) self.backend.insert_rows(rows_to_insert) return 0
class Lookup(object): def __init__(self): """ For URL parsing refer to RFC 2396 http://www.ietf.org/rfc/rfc2396.txt For the url http://a.b.c.d.e.f.g/1.html?param=1#Tag the client will try these possible strings: a.b.c.d.e.f.g/1.html?param=1#Tag a.b.c.d.e.f.g/1.html?param=1 a.b.c.d.e.f.g/1.html a.b.c.d.e.f.g/ c.d.e.f.g/1.html?param=1#Tag c.d.e.f.g/1.html?param=1 c.d.e.f.g/1.html c.d.e.f.g/ d.e.f.g/1.html?param=1#Tag d.e.f.g/1.html?param=1 d.e.f.g/1.html d.e.f.g/ e.f.g/1.html?param=1#Tag e.f.g/1.html?param=1 e.f.g/1.html e.f.g/ f.g/1.html?param=1#Tag f.g/1.html?param=1 f.g/1.html f.g/ Refer to http://code.google.com/apis/safebrowsing/ for more details. """ self.backend = DbObj().backend def lookup_by_url(self, url): """ Lookup Method by URL. """ self.url = url.lower() # Break URL into components url_components = url_re.match(self.url).groups() # Prepare the lookup list as given in the main docstring. self.lookup_list = set() hostname = url_components[3] hostname_comp = hostname.split(".") if not hostname_comp: raise AttributeError("Invalid URL.") for i in xrange(len(hostname_comp) - 1): filtered_hostname_comp = ".".join(hostname_comp[i:]) self.lookup_list.add(filtered_hostname_comp + "/") if url_components[4]: path = url_components[4].split('/') for j in xrange(len(path) + 1): filtered_paths = '/'.join(path[:j]) if not '.' in filtered_paths: self.lookup_list.add(filtered_hostname_comp + "%s/" % filtered_paths) self.lookup_list.add(filtered_hostname_comp + url_components[4]) if url_components[5]: self.lookup_list.add(filtered_hostname_comp + ''.join(url_components[4:6])) if url_components[7]: self.lookup_list.add(filtered_hostname_comp + ''.join(url_components[4:6]) + url_components[7]) # Prepare the MD5 hash list for lookups. md5_hash_list = [] for url_comp in self.lookup_list: md5_hash_list.append(md5(url_comp).hexdigest()) return self.backend.lookup_by_md5(md5_hash_list) # A helper function. Currently unused def lookup_by_md5(self, md5_hash): """ Lookup by MD5 hash. """ return self.backend.lookup_by_md5([ md5_hash, ])
class Lookup(object): def __init__(self): """ For URL parsing refer to RFC 2396 http://www.ietf.org/rfc/rfc2396.txt For the url http://a.b.c.d.e.f.g/1.html?param=1#Tag the client will try these possible strings: a.b.c.d.e.f.g/1.html?param=1#Tag a.b.c.d.e.f.g/1.html?param=1 a.b.c.d.e.f.g/1.html a.b.c.d.e.f.g/ c.d.e.f.g/1.html?param=1#Tag c.d.e.f.g/1.html?param=1 c.d.e.f.g/1.html c.d.e.f.g/ d.e.f.g/1.html?param=1#Tag d.e.f.g/1.html?param=1 d.e.f.g/1.html d.e.f.g/ e.f.g/1.html?param=1#Tag e.f.g/1.html?param=1 e.f.g/1.html e.f.g/ f.g/1.html?param=1#Tag f.g/1.html?param=1 f.g/1.html f.g/ Refer to http://code.google.com/apis/safebrowsing/ for more details. """ self.backend = DbObj().backend def lookup_by_url(self, url): """ Lookup Method by URL. """ self.url = url.lower() # Break URL into components url_components = url_re.match(self.url).groups() # Prepare the lookup list as given in the main docstring. self.lookup_list = set() hostname = url_components[3] hostname_comp = hostname.split(".") if not hostname_comp: raise AttributeError("Invalid URL.") for i in xrange(len(hostname_comp) - 1): filtered_hostname_comp = ".".join(hostname_comp[i:]) self.lookup_list.add(filtered_hostname_comp + "/") if url_components[4]: path = url_components[4].split('/') for j in xrange(len(path) + 1): filtered_paths = '/'.join(path[:j]) if not '.' in filtered_paths: self.lookup_list.add(filtered_hostname_comp + "%s/" %filtered_paths) self.lookup_list.add(filtered_hostname_comp + url_components[4]) if url_components[5]: self.lookup_list.add(filtered_hostname_comp + ''.join(url_components[4:6])) if url_components[7]: self.lookup_list.add(filtered_hostname_comp + ''.join(url_components[4:6]) + url_components[7]) # Prepare the MD5 hash list for lookups. md5_hash_list = [] for url_comp in self.lookup_list: md5_hash_list.append(md5(url_comp).hexdigest()) return self.backend.lookup_by_md5(md5_hash_list) # A helper function. Currently unused def lookup_by_md5(self, md5_hash): """ Lookup by MD5 hash. """ return self.backend.lookup_by_md5([md5_hash, ])