Beispiel #1
0
    def _get_extensions(self):
        """ Build files extensions list """
        result = {}
        coll = Registry().get('mongo').spider_urls
        links = coll.group({'path': True}, '', {}, 'function () {}')
        links = mongo_result_to_list(links)

        exts = []
        for link in links:
            if link['path'].rfind('.') > -1 and len(
                    link['path']) - link['path'].rfind('.') <= 5:
                exts.append(link['path'][link['path'].rfind('.'):])

        for ext in list(set(exts)):
            if ext not in result:
                result[ext] = []

            links = coll.find({'path': re.compile('\\' + ext + '$')})
            links = mongo_result_to_list(links)

            for link in links:
                result[ext].append(
                    link['path'] + '?' +
                    link['query'] if link['query'] else link['path'])

        return result
Beispiel #2
0
    def _get_codes_stat(self):
        """ Build dict with http-codes and their counts """
        coll = Registry().get('mongo').spider_urls
        result = {}

        codes = coll.group({'code': True}, '', {}, 'function () {}')
        for code in codes:
            links = []
            code = code['code']
            data = coll.find({'code': code}, {'path': 1, 'query': 1})
            for link in mongo_result_to_list(data):
                links.append(link['path'] + '?' + link['query'] if link['query'] else link['path'])
            result[int(code)] = links

        return result
Beispiel #3
0
    def _get_codes_stat(self):
        """ Build dict with http-codes and their counts """
        coll = Registry().get('mongo').spider_urls
        result = {}

        codes = coll.group({'code': True}, '', {}, 'function () {}')
        for code in codes:
            links = []
            code = code['code']
            data = coll.find({'code': code}, {'path': 1, 'query': 1})
            for link in mongo_result_to_list(data):
                links.append(link['path'] + '?' +
                             link['query'] if link['query'] else link['path'])
            result[int(code)] = links

        return result
Beispiel #4
0
    def _get_extensions(self):
        """ Build files extensions list """
        result = {}
        coll = Registry().get('mongo').spider_urls
        links = coll.group({'path': True}, '', {}, 'function () {}')
        links = mongo_result_to_list(links)

        exts = []
        for link in links:
            if link['path'].rfind('.') > -1 and len(link['path']) - link['path'].rfind('.') <= 5:
                exts.append(link['path'][link['path'].rfind('.'):])

        for ext in list(set(exts)):
            if ext not in result:
                result[ext] = []

            links = coll.find({'path': re.compile('\\' + ext + '$')})
            links = mongo_result_to_list(links)

            for link in links:
                result[ext].append(link['path'] + '?' + link['query'] if link['query'] else link['path'])

        return result