Example #1
0
    def test_iterkeys(self):
        disk_dict = DiskDict()

        disk_dict['a'] = 'abc'
        disk_dict['b'] = 'abc'
        disk_dict['c'] = 'abc'

        self.assertEqual(set(disk_dict.iterkeys()), set(['a', 'b', 'c']))
Example #2
0
    def test_iterkeys(self):
        disk_dict = DiskDict()

        disk_dict['a'] = 'abc'
        disk_dict['b'] = 'abc'
        disk_dict['c'] = 'abc'

        self.assertEqual(set(disk_dict.iterkeys()), set(['a', 'b', 'c']))
Example #3
0
    def __init__(self):
        AuditPlugin.__init__(self)

        # Internal variables
        self._expected_res_mutant = DiskDict()
        self._freq_list = DiskList()

        re_str = '<!--#exec cmd="echo -n (.*?);echo -n (.*?)" -->'
        self._extract_results_re = re.compile(re_str)
Example #4
0
 def test_remove_table(self):
     disk_dict = DiskDict()
     table_name = disk_dict.table_name
     db = get_default_temp_db_instance()
     
     self.assertTrue(db.table_exists(table_name))
     
     disk_dict.cleanup()
     
     self.assertFalse(db.table_exists(table_name))
Example #5
0
    def test_remove_table(self):
        disk_dict = DiskDict()
        table_name = disk_dict.table_name
        db = get_default_temp_db_instance()

        self.assertTrue(db.table_exists(table_name))

        disk_dict.cleanup()

        self.assertFalse(db.table_exists(table_name))
Example #6
0
    def test_get(self):
        disk_dict = DiskDict()

        disk_dict[0] = 'abc'

        abc1 = disk_dict.get(0)
        abc2 = disk_dict.get(0, 1)
        two = disk_dict.get(1, 2)

        self.assertEqual(abc1, 'abc')
        self.assertEqual(abc2, 'abc')
        self.assertEqual(two, 2)
Example #7
0
    def test_get(self):
        disk_dict = DiskDict()

        disk_dict[0] = 'abc'
        
        abc1 = disk_dict.get(0)
        abc2 = disk_dict.get(0, 1)
        two = disk_dict.get(1, 2)
        
        self.assertEqual(abc1, 'abc')
        self.assertEqual(abc2, 'abc')
        self.assertEqual(two, 2)
Example #8
0
    def __init__(self):
        AuditPlugin.__init__(self)

        # Internal variables
        self._expected_res_mutant = DiskDict()
        self._freq_list = DiskList()
        
        re_str = '<!--#exec cmd="echo -n (.*?);echo -n (.*?)" -->'
        self._extract_results_re = re.compile(re_str) 
Example #9
0
    def test_int(self):
        disk_dict = DiskDict()

        for i in xrange(100):
            disk_dict[i] = i

        # Do it twice to test that it works as expected (not creating a new)
        # row in the table, but modifying the value
        for i in xrange(100):
            disk_dict[i] = i

        self.assertEqual(len(disk_dict), 100)
        self.assertEqual(disk_dict[50], 50)
        self.assertIn(50, disk_dict)
Example #10
0
 def __init__(self, max_variants=5):
     self._disk_dict = DiskDict()
     self._db_lock = threading.RLock()
     self.max_variants = max_variants
Example #11
0
    def test_not_in(self):
        disk_dict = DiskDict()

        self.assertRaises(KeyError, disk_dict.__getitem__, 'abc')
Example #12
0
class ssi(AuditPlugin):
    '''
    Find server side inclusion vulnerabilities.
    :author: Andres Riancho ([email protected])
    '''

    def __init__(self):
        AuditPlugin.__init__(self)

        # Internal variables
        self._expected_res_mutant = DiskDict()
        self._freq_list = DiskList()
        
        re_str = '<!--#exec cmd="echo -n (.*?);echo -n (.*?)" -->'
        self._extract_results_re = re.compile(re_str) 

    def audit(self, freq, orig_response):
        '''
        Tests an URL for server side inclusion vulnerabilities.

        :param freq: A FuzzableRequest
        '''
        # Create the mutants to send right now,
        ssi_strings = self._get_ssi_strings()
        mutants = create_mutants(freq, ssi_strings, orig_resp=orig_response)

        # Used in end() to detect "persistent SSI"
        for mut in mutants:
            expected_result = self._extract_result_from_payload(
                mut.get_mod_value())
            self._expected_res_mutant[expected_result] = mut

        self._freq_list.append(freq)
        # End of persistent SSI setup

        self._send_mutants_in_threads(self._uri_opener.send_mutant,
                                      mutants,
                                      self._analyze_result)

    def _get_ssi_strings(self):
        '''
        This method returns a list of server sides to try to include.

        :return: A string, see above.
        '''
        yield '<!--#exec cmd="echo -n %s;echo -n %s" -->' % (rand_alpha(5),
                                                             rand_alpha(5))

        # TODO: Add mod_perl ssi injection support
        # http://www.sens.buffalo.edu/services/webhosting/advanced/perlssi.shtml
        #yield <!--#perl sub="sub {print qq/If you see this, mod_perl is working!/;}" -->

    def _extract_result_from_payload(self, payload):
        '''
        Extract the expected result from the payload we're sending.
        '''
        match = self._extract_results_re.search(payload)
        return match.group(1) + match.group(2)

    def _analyze_result(self, mutant, response):
        '''
        Analyze the result of the previously sent request.
        :return: None, save the vuln to the kb.
        '''
        if self._has_no_bug(mutant):
            e_res = self._extract_result_from_payload(mutant.get_mod_value())
            if e_res in response and not e_res in mutant.get_original_response_body():
                
                desc = 'Server side include (SSI) was found at: %s'
                desc = desc % mutant.found_at()
                
                v = Vuln.from_mutant('Server side include vulnerability', desc,
                                     severity.HIGH, response.id, self.get_name(),
                                     mutant)

                v.add_to_highlight(e_res)
                self.kb_append_uniq(self, 'ssi', v)

    def end(self):
        '''
        This method is called when the plugin wont be used anymore and is used
        to find persistent SSI vulnerabilities.

        Example where a persistent SSI can be found:

        Say you have a "guestbook" (a CGI application that allows visitors
        to leave messages for everyone to see) on a server that has SSI
        enabled. Most such guestbooks around the Net actually allow visitors
        to enter HTML code as part of their comments. Now, what happens if a
        malicious visitor decides to do some damage by entering the following:

        <!--#exec cmd="ls" -->

        If the guestbook CGI program was designed carefully, to strip SSI
        commands from the input, then there is no problem. But, if it was not,
        there exists the potential for a major headache!

        For a working example please see moth VM.
        '''
        multi_in_inst = multi_in(self._expected_res_mutant.keys())

        def filtered_freq_generator(freq_list):
            already_tested = ScalableBloomFilter()

            for freq in freq_list:
                if freq not in already_tested:
                    already_tested.add(freq)
                    yield freq

        def analyze_persistent(freq, response):

            for matched_expected_result in multi_in_inst.query(response.get_body()):
                # We found one of the expected results, now we search the
                # self._persistent_data to find which of the mutants sent it
                # and create the vulnerability
                mutant = self._expected_res_mutant[matched_expected_result]
                
                desc = 'Server side include (SSI) was found at: %s' \
                       ' The result of that injection is shown by browsing'\
                       ' to "%s".' 
                desc = desc % (mutant.found_at(), freq.get_url())
                
                v = Vuln.from_mutant('Persistent server side include vulnerability',
                                     desc, severity.HIGH, response.id,
                                     self.get_name(), mutant)
                
                v.add_to_highlight(matched_expected_result)
                self.kb_append(self, 'ssi', v)

        self._send_mutants_in_threads(self._uri_opener.send_mutant,
                                      filtered_freq_generator(self._freq_list),
                                      analyze_persistent,
                                      cache=False)
        
        self._expected_res_mutant.cleanup()
        self._freq_list.cleanup()

    def get_long_desc(self):
        '''
        :return: A DETAILED description of the plugin functions and features.
        '''
        return '''
Example #13
0
    def __init__(self):
        GrepPlugin.__init__(self)

        # Internal variables
        self._comments = DiskDict()
        self._already_reported_interesting = ScalableBloomFilter()
Example #14
0
 def __init__(self, max_variants=5):
     self._disk_dict = DiskDict()
     self._db_lock = threading.RLock()
     self.max_variants = max_variants
Example #15
0
class ssi(AuditPlugin):
    '''
    Find server side inclusion vulnerabilities.
    :author: Andres Riancho ([email protected])
    '''
    def __init__(self):
        AuditPlugin.__init__(self)

        # Internal variables
        self._expected_res_mutant = DiskDict()
        self._freq_list = DiskList()

        re_str = '<!--#exec cmd="echo -n (.*?);echo -n (.*?)" -->'
        self._extract_results_re = re.compile(re_str)

    def audit(self, freq, orig_response):
        '''
        Tests an URL for server side inclusion vulnerabilities.

        :param freq: A FuzzableRequest
        '''
        # Create the mutants to send right now,
        ssi_strings = self._get_ssi_strings()
        mutants = create_mutants(freq, ssi_strings, orig_resp=orig_response)

        # Used in end() to detect "persistent SSI"
        for mut in mutants:
            expected_result = self._extract_result_from_payload(
                mut.get_mod_value())
            self._expected_res_mutant[expected_result] = mut

        self._freq_list.append(freq)
        # End of persistent SSI setup

        self._send_mutants_in_threads(self._uri_opener.send_mutant, mutants,
                                      self._analyze_result)

    def _get_ssi_strings(self):
        '''
        This method returns a list of server sides to try to include.

        :return: A string, see above.
        '''
        yield '<!--#exec cmd="echo -n %s;echo -n %s" -->' % (rand_alpha(5),
                                                             rand_alpha(5))

        # TODO: Add mod_perl ssi injection support
        # http://www.sens.buffalo.edu/services/webhosting/advanced/perlssi.shtml
        #yield <!--#perl sub="sub {print qq/If you see this, mod_perl is working!/;}" -->

    def _extract_result_from_payload(self, payload):
        '''
        Extract the expected result from the payload we're sending.
        '''
        match = self._extract_results_re.search(payload)
        return match.group(1) + match.group(2)

    def _analyze_result(self, mutant, response):
        '''
        Analyze the result of the previously sent request.
        :return: None, save the vuln to the kb.
        '''
        if self._has_no_bug(mutant):
            e_res = self._extract_result_from_payload(mutant.get_mod_value())
            if e_res in response and not e_res in mutant.get_original_response_body(
            ):

                desc = 'Server side include (SSI) was found at: %s'
                desc = desc % mutant.found_at()

                v = Vuln.from_mutant('Server side include vulnerability',
                                     desc, severity.HIGH, response.id,
                                     self.get_name(), mutant)

                v.add_to_highlight(e_res)
                self.kb_append_uniq(self, 'ssi', v)

    def end(self):
        '''
        This method is called when the plugin wont be used anymore and is used
        to find persistent SSI vulnerabilities.

        Example where a persistent SSI can be found:

        Say you have a "guestbook" (a CGI application that allows visitors
        to leave messages for everyone to see) on a server that has SSI
        enabled. Most such guestbooks around the Net actually allow visitors
        to enter HTML code as part of their comments. Now, what happens if a
        malicious visitor decides to do some damage by entering the following:

        <!--#exec cmd="ls" -->

        If the guestbook CGI program was designed carefully, to strip SSI
        commands from the input, then there is no problem. But, if it was not,
        there exists the potential for a major headache!

        For a working example please see moth VM.
        '''
        multi_in_inst = multi_in(self._expected_res_mutant.keys())

        def filtered_freq_generator(freq_list):
            already_tested = ScalableBloomFilter()

            for freq in freq_list:
                if freq not in already_tested:
                    already_tested.add(freq)
                    yield freq

        def analyze_persistent(freq, response):

            for matched_expected_result in multi_in_inst.query(
                    response.get_body()):
                # We found one of the expected results, now we search the
                # self._persistent_data to find which of the mutants sent it
                # and create the vulnerability
                mutant = self._expected_res_mutant[matched_expected_result]

                desc = 'Server side include (SSI) was found at: %s' \
                       ' The result of that injection is shown by browsing'\
                       ' to "%s".'
                desc = desc % (mutant.found_at(), freq.get_url())

                v = Vuln.from_mutant(
                    'Persistent server side include vulnerability', desc,
                    severity.HIGH, response.id, self.get_name(), mutant)

                v.add_to_highlight(matched_expected_result)
                self.kb_append(self, 'ssi', v)

        self._send_mutants_in_threads(self._uri_opener.send_mutant,
                                      filtered_freq_generator(self._freq_list),
                                      analyze_persistent,
                                      cache=False)

        self._expected_res_mutant.cleanup()
        self._freq_list.cleanup()

    def get_long_desc(self):
        '''
        :return: A DETAILED description of the plugin functions and features.
        '''
        return '''
Example #16
0
class VariantDB(object):
    def __init__(self, max_variants=5):
        self._disk_dict = DiskDict()
        self._db_lock = threading.RLock()
        self.max_variants = max_variants

    def append(self, reference):
        '''
        Called when a new reference is found and we proved that new
        variants are still needed.

        :param reference: The reference (as a URL object) to add. This method
                          will "normalize" it before adding it to the internal
                          shelve.
        '''
        clean_reference = self._clean_reference(reference)

        with self._db_lock:
            count = self._disk_dict.get(clean_reference, None)

            if count is not None:
                self._disk_dict[clean_reference] = count + 1
            else:
                self._disk_dict[clean_reference] = 1

    def _clean_reference(self, reference):
        '''
        This method is VERY dependent on the are_variants method from
        core.data.request.variant_identification , make sure to remember that
        when changing stuff here or there.

        What this method does is to "normalize" any input reference string so
        that they can be compared very simply using string match.

        '''
        res = reference.get_domain_path() + reference.get_file_name()

        if reference.has_query_string():

            res += '?'
            qs = reference.querystring.copy()

            for key in qs:
                value_list = qs[key]
                for i, value in enumerate(value_list):
                    if value.isdigit():
                        qs[key][i] = 'number'
                    else:
                        qs[key][i] = 'string'

            res += str(qs)

        return res

    def need_more_variants(self, reference):
        '''
        :return: True if there are not enough variants associated with
        this reference in the DB.
        '''
        clean_reference = self._clean_reference(reference)
        # I believe this is atomic enough...
        count = self._disk_dict.get(clean_reference, 0)
        if count >= self.max_variants:
            return False
        else:
            return True
Example #17
0
    def __init__(self):
        GrepPlugin.__init__(self)

        # Internal variables
        self._comments = DiskDict()
        self._already_reported_interesting = ScalableBloomFilter()
Example #18
0
class html_comments(GrepPlugin):
    '''
    Extract and analyze HTML comments.

    :author: Andres Riancho ([email protected])
    '''

    HTML_RE = re.compile('<[a-zA-Z]*.*?>.*?</[a-zA-Z]>')

    INTERESTING_WORDS = (
        'user',
        'pass',
        'xxx',
        'fix',
        'bug',
        'broken',
        'oops',
        'hack',
        'caution',
        'todo',
        'note',
        'warning',
        '!!!',
        '???',
        'shit',
        'stupid',
        'tonto',
        'porqueria',
        'ciudado',
        'usuario',
        'contrase',
        'puta',
        'secret',
        '@',
        'email',
        'security',
        'captcha',
        'pinga',
        'cojones',
        # some in Portuguese
        'banco',
        'bradesco',
        'itau',
        'visa',
        'bancoreal',
        u'transfêrencia',
        u'depósito',
        u'cartão',
        u'crédito',
        'dados pessoais')

    _multi_in = multi_in(INTERESTING_WORDS)

    def __init__(self):
        GrepPlugin.__init__(self)

        # Internal variables
        self._comments = DiskDict()
        self._already_reported_interesting = ScalableBloomFilter()

    def grep(self, request, response):
        '''
        Plugin entry point, parse those comments!

        :param request: The HTTP request object.
        :param response: The HTTP response object
        :return: None
        '''
        if not response.is_text_or_html():
            return

        try:
            dp = parser_cache.dpc.get_document_parser_for(response)
        except w3afException:
            return

        for comment in dp.get_comments():
            # These next two lines fix this issue:
            # audit.ssi + grep.html_comments + web app with XSS = false positive
            if request.sent(comment):
                continue

            # show nice comments ;)
            comment = comment.strip()

            if self._is_new(comment, response):

                self._interesting_word(comment, request, response)
                self._html_in_comment(comment, request, response)

    def _interesting_word(self, comment, request, response):
        '''
        Find interesting words in HTML comments
        '''
        comment = comment.lower()
        for word in self._multi_in.query(response.body):
            if (word, response.get_url()
                ) not in self._already_reported_interesting:
                desc = 'A comment with the string "%s" was found in: "%s".'\
                       ' This could be interesting.'
                desc = desc % (word, response.get_url())

                i = Info('Interesting HTML comment', desc, response.id,
                         self.get_name())
                i.set_dc(request.get_dc())
                i.set_uri(response.get_uri())
                i.add_to_highlight(word)

                kb.kb.append(self, 'interesting_comments', i)
                om.out.information(i.get_desc())

                self._already_reported_interesting.add(
                    (word, response.get_url()))

    def _html_in_comment(self, comment, request, response):
        '''
        Find HTML code in HTML comments
        '''
        html_in_comment = self.HTML_RE.search(comment)

        if html_in_comment and \
        (comment, response.get_url()) not in self._already_reported_interesting:
            # There is HTML code in the comment.
            comment = comment.replace('\n', '')
            comment = comment.replace('\r', '')
            desc = 'A comment with the string "%s" was found in: "%s".'\
                   ' This could be interesting.'
            desc = desc % (comment, response.get_url())

            i = Info('HTML comment contains HTML code', desc, response.id,
                     self.get_name())
            i.set_dc(request.get_dc())
            i.set_uri(response.get_uri())
            i.add_to_highlight(html_in_comment.group(0))

            kb.kb.append(self, 'html_comment_hides_html', i)
            om.out.information(i.get_desc())
            self._already_reported_interesting.add(
                (comment, response.get_url()))

    def _is_new(self, comment, response):
        '''
        Make sure that we perform a thread safe check on the self._comments dict,
        in order to avoid duplicates.
        '''
        with self._plugin_lock:

            #pylint: disable=E1103
            comment_data = self._comments.get(comment, None)

            if comment_data is None:
                self._comments[comment] = [
                    (response.get_url(), response.id),
                ]
                return True
            else:
                if response.get_url() not in [x[0] for x in comment_data]:
                    comment_data.append((response.get_url(), response.id))
                    self._comments[comment] = comment_data
                    return True
            #pylint: enable=E1103

        return False

    def end(self):
        '''
        This method is called when the plugin wont be used anymore.
        :return: None
        '''
        inform = []
        for comment in self._comments.iterkeys():
            urls_with_this_comment = self._comments[comment]
            stick_comment = ' '.join(comment.split())
            if len(stick_comment) > 40:
                msg = 'A comment with the string "%s..." (and %s more bytes)'\
                      ' was found on these URL(s):'
                om.out.information(
                    msg % (stick_comment[:40], str(len(stick_comment) - 40)))
            else:
                msg = 'A comment containing "%s" was found on these URL(s):'
                om.out.information(msg % (stick_comment))

            for url, request_id in urls_with_this_comment:
                inform.append('- ' + url + ' (request with id: ' +
                              str(request_id) + ')')

            inform.sort()
            for i in inform:
                om.out.information(i)

        self._comments.cleanup()

    def get_long_desc(self):
        '''
        :return: A DETAILED description of the plugin functions and features.
        '''
        return '''
Example #19
0
class html_comments(GrepPlugin):
    '''
    Extract and analyze HTML comments.

    :author: Andres Riancho ([email protected])
    '''

    HTML_RE = re.compile('<[a-zA-Z]*.*?>.*?</[a-zA-Z]>')

    INTERESTING_WORDS = (
        'user', 'pass', 'xxx', 'fix', 'bug', 'broken', 'oops', 'hack',
        'caution', 'todo', 'note', 'warning', '!!!', '???', 'shit',
        'stupid', 'tonto', 'porqueria', 'ciudado', 'usuario', 'contrase',
        'puta', 'secret', '@', 'email', 'security', 'captcha', 'pinga',
        'cojones',
        # some in Portuguese
        'banco', 'bradesco', 'itau', 'visa', 'bancoreal', u'transfêrencia',
        u'depósito', u'cartão', u'crédito', 'dados pessoais'
    )

    _multi_in = multi_in(INTERESTING_WORDS)

    def __init__(self):
        GrepPlugin.__init__(self)

        # Internal variables
        self._comments = DiskDict()
        self._already_reported_interesting = ScalableBloomFilter()

    def grep(self, request, response):
        '''
        Plugin entry point, parse those comments!

        :param request: The HTTP request object.
        :param response: The HTTP response object
        :return: None
        '''
        if not response.is_text_or_html():
            return
        
        try:
            dp = parser_cache.dpc.get_document_parser_for(response)
        except w3afException:
            return
        
        for comment in dp.get_comments():
            # These next two lines fix this issue:
            # audit.ssi + grep.html_comments + web app with XSS = false positive
            if request.sent(comment):
                continue

            # show nice comments ;)
            comment = comment.strip()

            if self._is_new(comment, response):

                self._interesting_word(comment, request, response)
                self._html_in_comment(comment, request, response)

    def _interesting_word(self, comment, request, response):
        '''
        Find interesting words in HTML comments
        '''
        comment = comment.lower()
        for word in self._multi_in.query(response.body):
            if (word, response.get_url()) not in self._already_reported_interesting:
                desc = 'A comment with the string "%s" was found in: "%s".'\
                       ' This could be interesting.'
                desc = desc % (word, response.get_url())

                i = Info('Interesting HTML comment', desc,
                         response.id, self.get_name())
                i.set_dc(request.get_dc())
                i.set_uri(response.get_uri())
                i.add_to_highlight(word)
                
                kb.kb.append(self, 'interesting_comments', i)
                om.out.information(i.get_desc())
                
                self._already_reported_interesting.add((word,
                                                        response.get_url()))

    def _html_in_comment(self, comment, request, response):
        '''
        Find HTML code in HTML comments
        '''
        html_in_comment = self.HTML_RE.search(comment)
        
        if html_in_comment and \
        (comment, response.get_url()) not in self._already_reported_interesting:
            # There is HTML code in the comment.
            comment = comment.replace('\n', '')
            comment = comment.replace('\r', '')
            desc = 'A comment with the string "%s" was found in: "%s".'\
                   ' This could be interesting.'
            desc = desc % (comment, response.get_url())

            i = Info('HTML comment contains HTML code', desc,
                     response.id, self.get_name())
            i.set_dc(request.get_dc())
            i.set_uri(response.get_uri())
            i.add_to_highlight(html_in_comment.group(0))
            
            kb.kb.append(self, 'html_comment_hides_html', i)
            om.out.information(i.get_desc())
            self._already_reported_interesting.add(
                (comment, response.get_url()))

    def _is_new(self, comment, response):
        '''
        Make sure that we perform a thread safe check on the self._comments dict,
        in order to avoid duplicates.
        '''
        with self._plugin_lock:
            
            #pylint: disable=E1103
            comment_data = self._comments.get(comment, None)
            
            if comment_data is None:
                self._comments[comment] = [(response.get_url(), response.id), ]
                return True
            else:
                if response.get_url() not in [x[0] for x in comment_data]:
                    comment_data.append((response.get_url(), response.id))
                    self._comments[comment] = comment_data
                    return True
            #pylint: enable=E1103
            
        return False

    def end(self):
        '''
        This method is called when the plugin wont be used anymore.
        :return: None
        '''
        inform = []
        for comment in self._comments.iterkeys():
            urls_with_this_comment = self._comments[comment]
            stick_comment = ' '.join(comment.split())
            if len(stick_comment) > 40:
                msg = 'A comment with the string "%s..." (and %s more bytes)'\
                      ' was found on these URL(s):'
                om.out.information(
                    msg % (stick_comment[:40], str(len(stick_comment) - 40)))
            else:
                msg = 'A comment containing "%s" was found on these URL(s):'
                om.out.information(msg % (stick_comment))

            for url, request_id in urls_with_this_comment:
                inform.append('- ' + url +
                              ' (request with id: ' + str(request_id) + ')')

            inform.sort()
            for i in inform:
                om.out.information(i)
        
        self._comments.cleanup()

    def get_long_desc(self):
        '''
        :return: A DETAILED description of the plugin functions and features.
        '''
        return '''
Example #20
0
class VariantDB(object):

    def __init__(self, max_variants=5):
        self._disk_dict = DiskDict()
        self._db_lock = threading.RLock()
        self.max_variants = max_variants

    def append(self, reference):
        '''
        Called when a new reference is found and we proved that new
        variants are still needed.

        :param reference: The reference (as a URL object) to add. This method
                          will "normalize" it before adding it to the internal
                          shelve.
        '''
        clean_reference = self._clean_reference(reference)

        with self._db_lock:
            count = self._disk_dict.get(clean_reference, None)

            if count is not None:
                self._disk_dict[clean_reference] = count + 1
            else:
                self._disk_dict[clean_reference] = 1

    def _clean_reference(self, reference):
        '''
        This method is VERY dependent on the are_variants method from
        core.data.request.variant_identification , make sure to remember that
        when changing stuff here or there.

        What this method does is to "normalize" any input reference string so
        that they can be compared very simply using string match.

        '''
        res = reference.get_domain_path() + reference.get_file_name()

        if reference.has_query_string():

            res += '?'
            qs = reference.querystring.copy()

            for key in qs:
                value_list = qs[key]
                for i, value in enumerate(value_list):
                    if value.isdigit():
                        qs[key][i] = 'number'
                    else:
                        qs[key][i] = 'string'

            res += str(qs)

        return res

    def need_more_variants(self, reference):
        '''
        :return: True if there are not enough variants associated with
        this reference in the DB.
        '''
        clean_reference = self._clean_reference(reference)
        # I believe this is atomic enough...
        count = self._disk_dict.get(clean_reference, 0)
        if count >= self.max_variants:
            return False
        else:
            return True