def content(self): sects = parser.decompose(extract_normed_body(self.html)) clusts = cluster.lbcluster(sects) # sorting cluster by their score clusts.sort(cmp=lambda a,b: cmp(b.points, a.points)) best = clusts[0] if len(best.body) > 0: return decode_entities(best.body) return False
def title(self): sects = parser.decompose(extract_normed_body(self.html)) clusts = cluster.lbcluster(sects) # sorting cluster by their score clusts.sort(cmp=lambda a,b: cmp(b.points, a.points)) # calcurate high score cluster best = clusts[0] if len(best.blocks) == 0: return False factor = 1.0 continuous = 1.0 bestmatch = [u'', 0] items = sects[:sects.index(best.blocks[0])] items.reverse() for b in items: if len(bestmatch[0]) > 0: continuous /= self.continuous_factor if len(b.text) == 0: continue factor *= self.decay_factor if lbttlscore(b, factor) * continuous > bestmatch[1]: bestmatch[0] = b.text bestmatch[1] = lbttlscore(b, factor) * continuous return bestmatch[0]