class WebPageSimilarity: def __init__(self): self.structuralSimilarity = StructuralSimilarity() self.contentSimilarity = ContentSimilarity() #self.visualSimilarity = VisualSimilarity() self.linkSimilarity = LinkSimilarity() def calculate_similarity(self, url1, url2): similarity_dict = {} similarity_dict['content_similarity'] = self.contentSimilarity.main(url1, url2) similarity_dict['link_similarity'] = self.linkSimilarity.main(url1, url2) #similarity_dict['visual_similarity'] = self.visualSimilarity(url1, url2) similarity_dict['structural_similarity'] = self.structuralSimilarity.main(url1, url2) return similarity_dict def calculate_weights(self, listOfCalculatedSimilarities, listOfActualSimilarities): a = np.array(listOfCalculatedSimilarities) b = np.array(listOfActualSimilarities) result = np.linalg.lstsq(a, b) return result
def __init__(self): self.structuralSimilarity = StructuralSimilarity() self.contentSimilarity = ContentSimilarity() #self.visualSimilarity = VisualSimilarity() self.linkSimilarity = LinkSimilarity()