def calculate_best_node(self, doc): #print(lxml.html.tostring(doc)) #doc.text_content() top_nodes = Parser.css_select(doc,'#sina_keyword_ad_area2') if len(top_nodes) < 1: top_node = ContentExtractor.calculate_best_node(self,doc) else: top_node = top_nodes[0] #print(Parser.getText(top_node)) #if top_node is None: # return doc return top_node
def __init__(self,config): ContentExtractor.__init__(self,config)