def hosts_handler(page=1, page_size=10, current_host=None, which_collection="crawl-data", filter_field=None, filter_regex=None, show_all=None): """Put together host documents for use with hosts endpoint """ mmu = MemexMongoUtils(which_collection = which_collection) # for host in mmu.get_hosts_filtered(filter_field = "host", filter_regex = "windows"): # print "b" # print host khc = KnownHostsCompare() if current_host: current_page_size = page_size_max = 10*100 # max results per page host_dics = mmu.list_hosts(page=page, page_size=current_page_size, filter_field=filter_field, filter_regex=filter_regex, show_all=show_all) matched=False i = 0 for host_dic in host_dics: i += 1 if host_dic["host"] == current_host: matched = True if matched and (i % page_size == 0): break # clean the leftovers n = len(host_dics) for x in range(i, n): host_dics.pop() else: host_dics = mmu.list_hosts(page=page, page_size=page_size, filter_field=filter_field, filter_regex=filter_regex, show_all=show_all) for host_dic in host_dics: #print host_dic #host scoring is added here as is known hostedness host_dic.pop("_id") is_known_host = khc.is_known_host(host_dic["host"]) host_dic["is_known_host"] = is_known_host hsu = mmu.get_highest_scoring_url_with_screenshot(host_dic["host"]) #host_score = mmu.get_host_score(host_dic["host"]) #host_dic["host_score"] = host_score if hsu: screenshot_path = get_screenshot_relative_path(hsu['screenshot_path']) host_dic["hsu_screenshot_path"] = screenshot_path else: host_dic["hsu_screenshot_path"] = None return host_dics
def get_page_number_for_host(path, page_size, current_host, filter_field=None, filter_regex=None, show_all=None): which_collection = get_collection_by_path(path) mmu = MemexMongoUtils(which_collection=which_collection) max_page_size = 100 * 100 # max results per page host_dics = mmu.list_hosts(page=1, page_size=max_page_size, filter_field=filter_field, filter_regex=filter_regex, show_all=show_all) i = 0 current_page = 0 for host_dic in host_dics: if host_dic["host"] == current_host: current_page = (i / page_size) break else: i += 1 return current_page
def hosts_handler(page = 1, which_collection = "crawl-data", filter_field = None, filter_regex = None): """Put together host documents for use with hosts endpoint """ mmu = MemexMongoUtils(which_collection = which_collection) khc = KnownHostsCompare() host_dics = mmu.list_hosts(page = page, filter_field = filter_field, filter_regex = filter_regex) for host_dic in host_dics: #host scoring is added here as is known hostedness host_dic.pop("_id") is_known_host = khc.is_known_host(host_dic["host"]) host_dic["is_known_host"] = is_known_host hsu = mmu.get_highest_scoring_url_with_screenshot(host_dic["host"]) host_score = mmu.get_host_score(host_dic["host"]) host_dic["host_score"] = host_score if hsu: screenshot_path = get_screenshot_relative_path(hsu['screenshot_path']) host_dic["hsu_screenshot_path"] = screenshot_path else: host_dic["hsu_screenshot_path"] = None return host_dics
def get_page_number_for_host(path, page_size, current_host, filter_field=None, filter_regex=None, show_all=None): which_collection = get_collection_by_path(path) mmu = MemexMongoUtils(which_collection=which_collection) max_page_size = 100*100 # max results per page host_dics = mmu.list_hosts(page=1, page_size=max_page_size, filter_field=filter_field, filter_regex=filter_regex, show_all=show_all) i = 0 current_page = 0 for host_dic in host_dics: if host_dic["host"] == current_host: current_page = (i/page_size) break else: i += 1 return current_page
def hosts_handler(page=1, page_size=10, current_host=None, which_collection="crawl-data", filter_field=None, filter_regex=None, show_all=None): """Put together host documents for use with hosts endpoint """ mmu = MemexMongoUtils(which_collection=which_collection) # for host in mmu.get_hosts_filtered(filter_field = "host", filter_regex = "windows"): # print "b" # print host khc = KnownHostsCompare() if current_host: current_page_size = page_size_max = 10 * 100 # max results per page host_dics = mmu.list_hosts(page=page, page_size=current_page_size, filter_field=filter_field, filter_regex=filter_regex, show_all=show_all) matched = False i = 0 for host_dic in host_dics: i += 1 if host_dic["host"] == current_host: matched = True if matched and (i % page_size == 0): break # clean the leftovers n = len(host_dics) for x in range(i, n): host_dics.pop() else: host_dics = mmu.list_hosts(page=page, page_size=page_size, filter_field=filter_field, filter_regex=filter_regex, show_all=show_all) for host_dic in host_dics: #print host_dic #host scoring is added here as is known hostedness host_dic.pop("_id") is_known_host = khc.is_known_host(host_dic["host"]) host_dic["is_known_host"] = is_known_host hsu = mmu.get_highest_scoring_url_with_screenshot(host_dic["host"]) #host_score = mmu.get_host_score(host_dic["host"]) #host_dic["host_score"] = host_score if hsu: screenshot_path = get_screenshot_relative_path( hsu['screenshot_path']) host_dic["hsu_screenshot_path"] = screenshot_path else: host_dic["hsu_screenshot_path"] = None return host_dics