Ejemplo n.º 1
0
    def __init__(self):
        """
        For URL parsing refer to RFC 2396
        http://www.ietf.org/rfc/rfc2396.txt

        For the url http://a.b.c.d.e.f.g/1.html?param=1#Tag the client will try these possible strings:
        a.b.c.d.e.f.g/1.html?param=1#Tag
        a.b.c.d.e.f.g/1.html?param=1
        a.b.c.d.e.f.g/1.html
        a.b.c.d.e.f.g/        
        c.d.e.f.g/1.html?param=1#Tag
        c.d.e.f.g/1.html?param=1
        c.d.e.f.g/1.html
        c.d.e.f.g/
        d.e.f.g/1.html?param=1#Tag
        d.e.f.g/1.html?param=1
        d.e.f.g/1.html
        d.e.f.g/
        e.f.g/1.html?param=1#Tag
        e.f.g/1.html?param=1
        e.f.g/1.html
        e.f.g/
        f.g/1.html?param=1#Tag
        f.g/1.html?param=1
        f.g/1.html
        f.g/

        Refer to http://code.google.com/apis/safebrowsing/ for more details.
        """
        self.backend = DbObj().backend
Ejemplo n.º 2
0
class Google_Blacklist(object):
    """
    Google Blacklist class that is used to fetch and prepare hashes to be
    stored in the database.
    """
    def __init__(self,badware_type="malware"):
        """
        The constructor initializes the module.
        """
        self.backend = DbObj().backend
        self.url = self._get_URL()
        badware_dict = {"malware": "M","black": "B"}
        self.badware_type = badware_type
        if not badware_type in badware_dict:
            raise KeyError("Invalid Badware Type")
        self.badware_code = badware_dict[badware_type]
        self.remove_row_regexp = re.compile("^-\w+")

    def _get_URL(self):
        return URL

    def fetch_data(self):
        version = self.backend.get_version(self.badware_type)
        st = string.Template(self.url)
        if not version:
            # Start the version number from the beginning
            self.version_number = "1:-1"
        else:
            self.version_number = version
        self.final_url = st.safe_substitute(key = self.backend.api_key,
                                            badware_type = self.badware_type,
                                            version = self.version_number)
        self.fetch_url_pointer = urllib2.urlopen(self.final_url)
        self.url_hashes_data = self.fetch_url_pointer.readlines()
        if self.url_hashes_data == []:
            # No data, so no point checking version 
            # number. This case might be because of
            # throttling or no updates available.
            return 0
        rows_to_delete = {}
        rows_to_insert = {}
        for url_hash in self.url_hashes_data[1:-1]:
            if self.remove_row_regexp.match(url_hash):
                rows_to_delete.update({url_hash.strip(): self.badware_code})
            else:
                cleaned_url_hash = url_hash.strip()
                if cleaned_url_hash and cleaned_url_hash != "\n":
                    rows_to_insert.update({cleaned_url_hash[1:]: self.badware_code})
        self.backend.delete_rows(rows_to_delete)

        version_number_rx = re.compile("\d\.\d+").search(self.url_hashes_data[0])
        new_version_number = ":".join(version_number_rx.group().split("."))
        if self.version_number == "1:-1":
            self.version_number = new_version_number
            self.backend.insert_version_row(self.badware_type, self.version_number)
        else:
            self.backend.update_version_row(self.badware_type, new_version_number, self.version_number)
        self.backend.insert_rows(rows_to_insert)
        return 0
Ejemplo n.º 3
0
 def __init__(self, badware_type="malware"):
     """
     The constructor initializes the module.
     """
     self.backend = DbObj().backend
     self.url = self._get_URL()
     badware_dict = {"malware": "M", "black": "B"}
     self.badware_type = badware_type
     if not badware_type in badware_dict:
         raise KeyError("Invalid Badware Type")
     self.badware_code = badware_dict[badware_type]
     self.remove_row_regexp = re.compile("^-\w+")
Ejemplo n.º 4
0
    def __init__(self):
        """
        For URL parsing refer to RFC 2396
        http://www.ietf.org/rfc/rfc2396.txt

        For the url http://a.b.c.d.e.f.g/1.html?param=1#Tag the client will try these possible strings:
        a.b.c.d.e.f.g/1.html?param=1#Tag
        a.b.c.d.e.f.g/1.html?param=1
        a.b.c.d.e.f.g/1.html
        a.b.c.d.e.f.g/        
        c.d.e.f.g/1.html?param=1#Tag
        c.d.e.f.g/1.html?param=1
        c.d.e.f.g/1.html
        c.d.e.f.g/
        d.e.f.g/1.html?param=1#Tag
        d.e.f.g/1.html?param=1
        d.e.f.g/1.html
        d.e.f.g/
        e.f.g/1.html?param=1#Tag
        e.f.g/1.html?param=1
        e.f.g/1.html
        e.f.g/
        f.g/1.html?param=1#Tag
        f.g/1.html?param=1
        f.g/1.html
        f.g/

        Refer to http://code.google.com/apis/safebrowsing/ for more details.
        """
        self.backend = DbObj().backend
Ejemplo n.º 5
0
 def __init__(self,badware_type="malware"):
     """
     The constructor initializes the module.
     """
     self.backend = DbObj().backend
     self.url = self._get_URL()
     badware_dict = {"malware": "M","black": "B"}
     self.badware_type = badware_type
     if not badware_type in badware_dict:
         raise KeyError("Invalid Badware Type")
     self.badware_code = badware_dict[badware_type]
     self.remove_row_regexp = re.compile("^-\w+")
Ejemplo n.º 6
0
class Google_Blacklist(object):
    """
    Google Blacklist class that is used to fetch and prepare hashes to be
    stored in the database.
    """
    def __init__(self, badware_type="malware"):
        """
        The constructor initializes the module.
        """
        self.backend = DbObj().backend
        self.url = self._get_URL()
        badware_dict = {"malware": "M", "black": "B"}
        self.badware_type = badware_type
        if not badware_type in badware_dict:
            raise KeyError("Invalid Badware Type")
        self.badware_code = badware_dict[badware_type]
        self.remove_row_regexp = re.compile("^-\w+")

    def _get_URL(self):
        return URL

    def fetch_data(self):
        version = self.backend.get_version(self.badware_type)
        st = string.Template(self.url)
        if not version:
            # Start the version number from the beginning
            self.version_number = "1:-1"
        else:
            self.version_number = version
        self.final_url = st.safe_substitute(key=self.backend.api_key,
                                            badware_type=self.badware_type,
                                            version=self.version_number)
        self.fetch_url_pointer = urllib2.urlopen(self.final_url)
        self.url_hashes_data = self.fetch_url_pointer.readlines()
        if self.url_hashes_data == []:
            # No data, so no point checking version
            # number. This case might be because of
            # throttling or no updates available.
            return 0
        rows_to_delete = {}
        rows_to_insert = {}
        for url_hash in self.url_hashes_data[1:-1]:
            if self.remove_row_regexp.match(url_hash):
                rows_to_delete.update({url_hash.strip(): self.badware_code})
            else:
                cleaned_url_hash = url_hash.strip()
                if cleaned_url_hash and cleaned_url_hash != "\n":
                    rows_to_insert.update(
                        {cleaned_url_hash[1:]: self.badware_code})
        self.backend.delete_rows(rows_to_delete)

        version_number_rx = re.compile("\d\.\d+").search(
            self.url_hashes_data[0])
        new_version_number = ":".join(version_number_rx.group().split("."))
        if self.version_number == "1:-1":
            self.version_number = new_version_number
            self.backend.insert_version_row(self.badware_type,
                                            self.version_number)
        else:
            self.backend.update_version_row(self.badware_type,
                                            new_version_number,
                                            self.version_number)
        self.backend.insert_rows(rows_to_insert)
        return 0
Ejemplo n.º 7
0
class Lookup(object):
    def __init__(self):
        """
        For URL parsing refer to RFC 2396
        http://www.ietf.org/rfc/rfc2396.txt

        For the url http://a.b.c.d.e.f.g/1.html?param=1#Tag the client will try these possible strings:
        a.b.c.d.e.f.g/1.html?param=1#Tag
        a.b.c.d.e.f.g/1.html?param=1
        a.b.c.d.e.f.g/1.html
        a.b.c.d.e.f.g/        
        c.d.e.f.g/1.html?param=1#Tag
        c.d.e.f.g/1.html?param=1
        c.d.e.f.g/1.html
        c.d.e.f.g/
        d.e.f.g/1.html?param=1#Tag
        d.e.f.g/1.html?param=1
        d.e.f.g/1.html
        d.e.f.g/
        e.f.g/1.html?param=1#Tag
        e.f.g/1.html?param=1
        e.f.g/1.html
        e.f.g/
        f.g/1.html?param=1#Tag
        f.g/1.html?param=1
        f.g/1.html
        f.g/

        Refer to http://code.google.com/apis/safebrowsing/ for more details.
        """
        self.backend = DbObj().backend

    def lookup_by_url(self, url):
        """
        Lookup Method by URL.
        """
        self.url = url.lower()

        # Break URL into components
        url_components = url_re.match(self.url).groups()

        # Prepare the lookup list as given in the main docstring.
        self.lookup_list = set()
        hostname = url_components[3]
        hostname_comp = hostname.split(".")
        if not hostname_comp:
            raise AttributeError("Invalid URL.")

        for i in xrange(len(hostname_comp) - 1):
            filtered_hostname_comp = ".".join(hostname_comp[i:])
            self.lookup_list.add(filtered_hostname_comp + "/")
            if url_components[4]:
                path = url_components[4].split('/')
                for j in xrange(len(path) + 1):
                    filtered_paths = '/'.join(path[:j])
                    if not '.' in filtered_paths:
                        self.lookup_list.add(filtered_hostname_comp +
                                             "%s/" % filtered_paths)
                self.lookup_list.add(filtered_hostname_comp +
                                     url_components[4])
                if url_components[5]:
                    self.lookup_list.add(filtered_hostname_comp +
                                         ''.join(url_components[4:6]))
                    if url_components[7]:
                        self.lookup_list.add(filtered_hostname_comp +
                                             ''.join(url_components[4:6]) +
                                             url_components[7])

        # Prepare the MD5 hash list for lookups.
        md5_hash_list = []
        for url_comp in self.lookup_list:
            md5_hash_list.append(md5(url_comp).hexdigest())
        return self.backend.lookup_by_md5(md5_hash_list)

    # A helper function. Currently unused
    def lookup_by_md5(self, md5_hash):
        """
        Lookup by MD5 hash.
        """
        return self.backend.lookup_by_md5([
            md5_hash,
        ])
Ejemplo n.º 8
0
class Lookup(object):
    def __init__(self):
        """
        For URL parsing refer to RFC 2396
        http://www.ietf.org/rfc/rfc2396.txt

        For the url http://a.b.c.d.e.f.g/1.html?param=1#Tag the client will try these possible strings:
        a.b.c.d.e.f.g/1.html?param=1#Tag
        a.b.c.d.e.f.g/1.html?param=1
        a.b.c.d.e.f.g/1.html
        a.b.c.d.e.f.g/        
        c.d.e.f.g/1.html?param=1#Tag
        c.d.e.f.g/1.html?param=1
        c.d.e.f.g/1.html
        c.d.e.f.g/
        d.e.f.g/1.html?param=1#Tag
        d.e.f.g/1.html?param=1
        d.e.f.g/1.html
        d.e.f.g/
        e.f.g/1.html?param=1#Tag
        e.f.g/1.html?param=1
        e.f.g/1.html
        e.f.g/
        f.g/1.html?param=1#Tag
        f.g/1.html?param=1
        f.g/1.html
        f.g/

        Refer to http://code.google.com/apis/safebrowsing/ for more details.
        """
        self.backend = DbObj().backend

    def lookup_by_url(self, url):
        """
        Lookup Method by URL.
        """
        self.url = url.lower()
        
        # Break URL into components
        url_components = url_re.match(self.url).groups()

        # Prepare the lookup list as given in the main docstring.
        self.lookup_list = set()
        hostname = url_components[3]
        hostname_comp = hostname.split(".")
        if not hostname_comp:
            raise AttributeError("Invalid URL.")

        for i in xrange(len(hostname_comp) - 1):
            filtered_hostname_comp = ".".join(hostname_comp[i:])
            self.lookup_list.add(filtered_hostname_comp + "/")
            if url_components[4]:
                path = url_components[4].split('/')
                for j in xrange(len(path) + 1):
                    filtered_paths = '/'.join(path[:j])
                    if not '.' in filtered_paths:
                        self.lookup_list.add(filtered_hostname_comp + "%s/" %filtered_paths)
                self.lookup_list.add(filtered_hostname_comp + url_components[4])
                if url_components[5]:
                    self.lookup_list.add(filtered_hostname_comp + ''.join(url_components[4:6]))
                    if url_components[7]:
                        self.lookup_list.add(filtered_hostname_comp + ''.join(url_components[4:6]) + url_components[7])
            
        # Prepare the MD5 hash list for lookups.
        md5_hash_list = []
        for url_comp in self.lookup_list:
            md5_hash_list.append(md5(url_comp).hexdigest())
        return self.backend.lookup_by_md5(md5_hash_list)
              
    # A helper function. Currently unused
    def lookup_by_md5(self, md5_hash):
        """
        Lookup by MD5 hash.
        """
        return self.backend.lookup_by_md5([md5_hash, ])