def getComment(mid, uid): wid = base62.mid2str(mid) url1 = "http://weibo.com/"+str(uid)+"/"+wid print url1 req = urllib2.Request(url=url1,) result = urllib2.urlopen(req) text = result.read() return extractRawData(text)
def parse_node(self, node, parse_user_info=True): di = {} dd = node.xpath('./dd')[0] user_node = node.xpath('./dd/a')[0] di['content'] = dd.text_content().split('\n\t')[1] di['mid'] = node.xpath('./@mid')[0] di['nick'] = user_node.xpath('./@title')[0] di['uid'] = user_node.xpath('./@usercard')[0].split('=')[1] di['wid'] = base62.mid2str(di['mid']) if parse_user_info: user_info = self.parse_user_info(di['uid'], di['wid']) else: user_info = {} return dict(di.items() + user_info.items())