def is_similar_page(res1, res2, radio=0.85): if res1 is None or res2 is None: return False body1 = res1.text body2 = res2.text simhash1 = Simhash(body1.split()) simhash2 = Simhash(body2.split()) calc_radio = simhash1.similarity(simhash2) if calc_radio > radio: return True else: return False
def IsSimilarPage(res1, res2, radio): ''' 计算页面相似度函数 ''' if res1 is None or res2 is None: return False simhash1 = Simhash(str(res1)) simhash2 = Simhash(str(res2)) calc_radio = simhash1.similarity(simhash2) # print("两个页面的相似度为:%s" % (calc_radio)) if calc_radio >= radio and calc_radio < 0.99: return True else: return False
def print_guesses(self, name): """ Compares plugins and their similarity hashes for a given connection name. """ plugin_found = False connection_hash = self.get_hash(name) for plugin in self.core.plugin_manager.plugins: plugin_hash = int(plugin.config['target']['sim_hash']) hash_object = Simhash('', hashbits=64) hash_object.hash = plugin_hash similarity = connection_hash.similarity(hash_object) if similarity > 0.75: plugin_found = True print("Found plugin {} with {:.1f}% similarity.".format( plugin.config['name'], similarity * 100)) if not plugin_found: print("No matching plugin found")
def is_similar_page(res1, res2, radio): ''' 计算页面相似度函数 ''' if res1 is None or res2 is None: return False # body1 = res1.text # body2 = res2.text simhash1 = Simhash(str(res1)) simhash2 = Simhash(str(res2)) calc_radio = simhash1.similarity(simhash2) if calc_radio >= float(radio): return True else: return False
def is_similar_page(res1, res2, radio): ''' 计算页面相似度函数 ''' if res1 is None or res2 is None: return False # body1 = res1.text # body2 = res2.text simhash1 = Simhash(str(res1)) simhash2 = Simhash(str(res2)) calc_radio = simhash1.similarity(simhash2) # print("两个页面的相似度为:%s" % (calc_radio)) if calc_radio >= radio: return True else: return False
def get_hash(self, name): """ Generates a similarity hash for a given connection name. """ serialized_database = self.serialize_database(name) return Simhash(serialized_database, hashbits=64)