예제 #1
0
 def content(self):
     sects = parser.decompose(extract_normed_body(self.html))
     clusts = cluster.lbcluster(sects)
     # sorting cluster by their score
     clusts.sort(cmp=lambda a,b: cmp(b.points, a.points))
     best = clusts[0]
     if len(best.body) > 0:
         return decode_entities(best.body)
     return False
예제 #2
0
 def title(self):
     sects = parser.decompose(extract_normed_body(self.html))
     clusts = cluster.lbcluster(sects)
     # sorting cluster by their score
     clusts.sort(cmp=lambda a,b: cmp(b.points, a.points))
     # calcurate high score cluster
     best = clusts[0]
     if len(best.blocks) == 0:
         return False
     factor = 1.0
     continuous = 1.0
     bestmatch = [u'', 0]
     items = sects[:sects.index(best.blocks[0])]
     items.reverse()
     for b in items:
         if len(bestmatch[0]) > 0:
             continuous /= self.continuous_factor
         if len(b.text) == 0:
             continue
         factor *= self.decay_factor
         if lbttlscore(b, factor) * continuous > bestmatch[1]:
             bestmatch[0]  = b.text
             bestmatch[1] = lbttlscore(b, factor) * continuous
     return bestmatch[0]