def machine_similarities(self, machine1, machine2): pn1, pn2 = machine1.printname(), machine2.printname() self.log(u"machine1: {0}, machine2: {1}".format(pn1, pn2)) sims = self.zero_similarities() links1, nodes1 = self.get_links_nodes(machine1) links2, nodes2 = self.get_links_nodes(machine2) self.log("links1: {0}, links2: {1}".format(links1, links2)) self.log("nodes1: {0}, nodes2: {1}".format(nodes1, nodes2)) if self.contains(links1, machine2) or self.contains(links2, machine1): sims["links_contain"] = 1 if self.contains(nodes1, machine2) or self.contains(nodes2, machine1): sims["nodes_contain"] = 1 pn1, pn2 = machine1.printname(), machine2.printname() # TODO if pn1 in links2 or pn2 in links1: sims["0-connected"] = 1 entities1 = filter(lambda l: "@" in l, links1) entities2 = filter(lambda l: "@" in l, links2) sims["entities_jaccard"] = jaccard(entities1, entities2) sims["links_jaccard"] = jaccard(links1, links2) sims["nodes_jaccard"] = jaccard(nodes1, nodes2) return sims
def _links_and_nodes_similarity(self, machine1, machine2, exclude_nodes=False, no_contain_score=False): sim = 0 links1, nodes1 = self.get_links_nodes(machine1) links2, nodes2 = self.get_links_nodes(machine2) if not no_contain_score: if (self.contains(links1, machine2) or self.contains(links2, machine1)): sim = max(sim, 0.35) elif (not exclude_nodes) and (self.contains(nodes1, machine2) or self.contains(nodes2, machine1)): sim = max(sim, 0.25) self.log('links1: {0}, links2: {1}'.format(links1, links2)) self.log('nodes1: {0}, nodes2: {1}'.format(nodes1, nodes2)) if True: pn1, pn2 = machine1.printname(), machine2.printname() if pn1 in links2 or pn2 in links1: self.log("{0} and {1} connected by 0-path, returning 1".format( pn1, pn2)) return 1 entities1 = filter(lambda l: "@" in l, links1) entities2 = filter(lambda l: "@" in l, links2) if entities1 or entities2: sim = max(sim, jaccard(entities1, entities2)) else: sim = max(sim, jaccard(links1, links2)) if not exclude_nodes: node_sim = jaccard(nodes1, nodes2) if node_sim > sim: self.log( 'picking node sim ({0}) over link sim ({1})'.format( node_sim, sim)) sim = node_sim return sim
def _links_and_nodes_similarity(self, machine1, machine2, exclude_nodes=False, no_contain_score=False): sim = 0 links1, nodes1 = self.get_links_nodes(machine1) links2, nodes2 = self.get_links_nodes(machine2) if not no_contain_score: if (self.contains(links1, machine2) or self.contains(links2, machine1)): sim = max(sim, 0.35) elif (not exclude_nodes) and (self.contains(nodes1, machine2) or self.contains(nodes2, machine1)): sim = max(sim, 0.25) self.log('links1: {0}, links2: {1}'.format(links1, links2)) self.log('nodes1: {0}, nodes2: {1}'.format(nodes1, nodes2)) if True: pn1, pn2 = machine1.printname(), machine2.printname() if pn1 in links2 or pn2 in links1: self.log( "{0} and {1} connected by 0-path, returning 1".format( pn1, pn2)) return 1 entities1 = filter(lambda l: "@" in l, links1) entities2 = filter(lambda l: "@" in l, links2) if entities1 or entities2: sim = max(sim, jaccard(entities1, entities2)) else: sim = max(sim, jaccard(links1, links2)) if not exclude_nodes: node_sim = jaccard(nodes1, nodes2) if node_sim > sim: self.log( 'picking node sim ({0}) over link sim ({1})'.format( node_sim, sim)) sim = node_sim return sim
def nodes_jaccard(self, nodes1, nodes2): return {"nodes_jaccard": jaccard(nodes1, nodes2)}
def entitiess_jaccard(self, links1, links2): entities1 = filter(lambda l: "@" in l, links1) entities2 = filter(lambda l: "@" in l, links2) return {'entities_jaccard': jaccard(entities1, entities2)}
def links_jaccard(self, links1, links2): return {"links_jaccard": jaccard(links1, links2)}
def graph_similarity(graph1, graph2): return jaccard(graph1.edges, graph2.edges)
def nodes_jaccard(self, nodes1, nodes2): return { "nodes_jaccard" : jaccard(nodes1, nodes2)}
def entitiess_jaccard(self, links1, links2): entities1 = filter(lambda l: "@" in l, links1) entities2 = filter(lambda l: "@" in l, links2) return {'entities_jaccard' : jaccard(entities1, entities2)}
def links_jaccard(self, links1, links2): return { "links_jaccard" : jaccard(links1, links2)}