def _get_extensions(self): """ Build files extensions list """ result = {} coll = Registry().get('mongo').spider_urls links = coll.group({'path': True}, '', {}, 'function () {}') links = mongo_result_to_list(links) exts = [] for link in links: if link['path'].rfind('.') > -1 and len( link['path']) - link['path'].rfind('.') <= 5: exts.append(link['path'][link['path'].rfind('.'):]) for ext in list(set(exts)): if ext not in result: result[ext] = [] links = coll.find({'path': re.compile('\\' + ext + '$')}) links = mongo_result_to_list(links) for link in links: result[ext].append( link['path'] + '?' + link['query'] if link['query'] else link['path']) return result
def _get_codes_stat(self): """ Build dict with http-codes and their counts """ coll = Registry().get('mongo').spider_urls result = {} codes = coll.group({'code': True}, '', {}, 'function () {}') for code in codes: links = [] code = code['code'] data = coll.find({'code': code}, {'path': 1, 'query': 1}) for link in mongo_result_to_list(data): links.append(link['path'] + '?' + link['query'] if link['query'] else link['path']) result[int(code)] = links return result
def _get_extensions(self): """ Build files extensions list """ result = {} coll = Registry().get('mongo').spider_urls links = coll.group({'path': True}, '', {}, 'function () {}') links = mongo_result_to_list(links) exts = [] for link in links: if link['path'].rfind('.') > -1 and len(link['path']) - link['path'].rfind('.') <= 5: exts.append(link['path'][link['path'].rfind('.'):]) for ext in list(set(exts)): if ext not in result: result[ext] = [] links = coll.find({'path': re.compile('\\' + ext + '$')}) links = mongo_result_to_list(links) for link in links: result[ext].append(link['path'] + '?' + link['query'] if link['query'] else link['path']) return result