def _parse(self, data): self.ID.status_id = data['id'] self.ID.id = data['id'] self.ID.source_user_id = self.parsed.userid = str(data['user']['id']) self.parsed.time = utils.str2utc(data['created_at'], " +08:00") self.parsed.text = data['title'] + u" " self.parsed.text += data['text'] self.parsed.username = data['user']['screen_name'] if hasattr(data, 'reshared_status'): self.parsed.text += u"// " + data['reshared_status']['user'][ 'screen_name'] + self._get_share_text(data['reshared_status']) if data['attachments']: for at in data['attachments']: if str(at['type']) in ['image', 'photos']: for at1 in at['media']: self.parsed.attachments.append({ 'type': 'picture', 'format': ['link'], # FIXME: page photo don't have raw_src 'data': at1['src'].replace("small", "raw", 1) }) else: self.parsed.text += at['title'] + u" : " + at['description'] self.parsed.attachments.append({ 'type': at['type'], 'format': ['link'], 'data': at['expaned_href'] })
def _parse_feed_status(self, dct): #logger.debug(json.dumps(dct)) # By trial, it seems: # * 'post_id' : the id of news feeds # * 'source_id' : the id of status # equal to 'status_id' returned by # 'status.get' interface # self.id = dct["post_id"] self.ID.status_id = dct["source_id"] self.ID.source_user_id = dct["actor_id"] self.parsed.userid = str(dct['actor_id']) self.parsed.username = dct['name'] self.parsed.time = utils.str2utc(dct["update_time"], " +08:00") self.parsed.text = dct['message'] #print dct try: self.parsed.username_orig = dct['attachment'][0]['owner_name'] self.parsed.text_orig = dct['attachment'][0]['content'] self.parsed.text += " || " + "@" + self.parsed.username_orig \ + " : " + self.parsed.text_orig #print self.parsed.text except: pass #except Exception, e: # raise e self.parsed.text_trace = dct['message'] self.parsed.reposts_count = 'N/A' self.parsed.comments_count = dct['comments']['count']
def _parse(self, dct): if 'deleted' in dct and dct['deleted']: logger.debug("This is a deleted message %s of SinaWeiboStatusMessage", dct["id"]) self.parsed.time = "unknown" self.parsed.username = "******" self.parsed.userid = "unknown" self.parsed.text = "unknown" self.deleted = True return self.ID.id = dct["id"] self.parsed.time = utils.str2utc(dct["created_at"]) self.parsed.username = dct['user']['name'] self.parsed.userid = dct['user']['id'] self.parsed.reposts_count = dct['reposts_count'] self.parsed.comments_count = dct['comments_count'] if 'retweeted_status' in dct: self.parsed.username_orig = "unknown" try: self.parsed.username_orig = dct['retweeted_status']['user']['name'] except KeyError: logger.warning('KeyError when parsing SinaWeiboStatus. May be deleted original message') self.parsed.text_orig = dct['retweeted_status']['text'] self.parsed.text_trace = dct['text'] self.parsed.text = self.parsed.text_trace \ + " || " + "@" + self.parsed.username_orig \ + " : " + self.parsed.text_orig else: self.parsed.text_orig = dct['text'] self.parsed.text_trace = None self.parsed.text = self.parsed.text_orig
def _parse_feed_share(self, dct): self.ID.status_id = dct["source_id"] self.ID.source_user_id = dct["actor_id"] self.parsed.userid = str(dct['actor_id']) self.parsed.username = dct['name'] self.parsed.time = utils.str2utc(dct["update_time"], " +08:00") self.parsed.attachments = [] if dct['feed_type'] == 33: self._parse_feed_33(dct) elif dct['feed_type'] == 32: self._parse_feed_32(dct) else: #self.parsed.text_orig = dct['description'] self.parsed.text_last = dct['message'] self.parsed.text_trace = dct['trace']['text'] self.parsed.title = dct['title'] self.parsed.description = dct['description'] self.parsed.reposts_count = 'N/A' self.parsed.comments_count = dct['comments']['count'] self.parsed.text_orig = self.parsed.title + "||" + self.parsed.description # Assemble a general format message self.parsed.text = self.parsed.text_trace \ + "||" + self.parsed.title \ + "||" + self.parsed.description
def _parse_feed_share(self, dct): self.ID.status_id = dct["source_id"] self.ID.source_user_id = dct["actor_id"] self.parsed.userid = dct['actor_id'] self.parsed.username = dct['name'] self.parsed.time = utils.str2utc(dct["update_time"], " +08:00") if dct['feed_type'] == 33: self._parse_feed_33(dct) elif dct['feed_type'] == 32: self._parse_feed_32(dct) else: #self.parsed.text_orig = dct['description'] self.parsed.text_last = dct['message'] self.parsed.text_trace = dct['trace']['text'] self.parsed.title = dct['title'] self.parsed.description = dct['description'] self.parsed.reposts_count = 'N/A' self.parsed.comments_count = dct['comments']['count'] self.parsed.text_orig = self.parsed.title + "||" + self.parsed.description # Assemble a general format message self.parsed.text = self.parsed.text_trace \ + "||" + self.parsed.title \ + "||" + self.parsed.description
def _parse(self, data): self.ID.status_id = data['id'] self.ID.id = data['id'] self.ID.source_user_id = self.parsed.userid = str(data['user']['id']) self.parsed.time = utils.str2utc(data['created_at'], " +08:00") self.parsed.text = data['title'] + u" " self.parsed.text += data['text'] self.parsed.username = data['user']['screen_name'] if hasattr(data, 'reshared_status'): self.parsed.text += u"// " + data['reshared_status']['user']['screen_name'] + self._get_share_text(data['reshared_status']) if data['attachments']: for at in data['attachments']: if str(at['type']) in ['image', 'photos']: for at1 in at['media']: self.parsed.attachments.append( { 'type': 'picture', 'format': ['link'], # FIXME: page photo don't have raw_src 'data': at1['src'].replace("small", "raw", 1) } ) else: self.parsed.text += at['title'] + u" : " + at['description'] self.parsed.attachments.append( { 'type': at['type'], 'format': ['link'], 'data': at['expaned_href'] })
def _parse(self, dct): if "deleted" in dct and dct["deleted"]: logger.debug("This is a deleted message %s of SinaWeiboStatusMessage", dct["id"]) self.parsed.time = "unknown" self.parsed.username = "******" self.parsed.userid = "unknown" self.parsed.text = "unknown" self.deleted = True return self.ID.id = dct["id"] self.parsed.time = utils.str2utc(dct["created_at"]) self.parsed.username = dct["user"]["name"] self.parsed.userid = dct["user"]["id"] self.parsed.reposts_count = dct["reposts_count"] self.parsed.comments_count = dct["comments_count"] if "retweeted_status" in dct: self.parsed.username_orig = "unknown" try: self.parsed.username_orig = dct["retweeted_status"]["user"]["name"] except KeyError: logger.warning("KeyError when parsing SinaWeiboStatus. May be deleted original message") self.parsed.text_orig = dct["retweeted_status"]["text"] self.parsed.text_trace = dct["text"] self.parsed.text = ( self.parsed.text_trace + " || " + "@" + self.parsed.username_orig + " : " + self.parsed.text_orig ) else: self.parsed.text_orig = dct["text"] self.parsed.text_trace = None self.parsed.text = self.parsed.text_orig
def _parse_feed_status(self, dct): #logger.debug(json.dumps(dct)) # By trial, it seems: # * 'post_id' : the id of news feeds # * 'source_id' : the id of status # equal to 'status_id' returned by # 'status.get' interface # self.id = dct["post_id"] self.ID.status_id = dct["source_id"] self.ID.source_user_id = dct["actor_id"] self.parsed.userid = dct['actor_id'] self.parsed.username = dct['name'] self.parsed.time = utils.str2utc(dct["update_time"], " +08:00") self.parsed.text = dct['message'] #print dct try: self.parsed.username_orig = dct['attachment'][0]['owner_name'] self.parsed.text_orig = dct['attachment'][0]['content'] self.parsed.text += " || " + "@" + self.parsed.username_orig \ + " : " + self.parsed.text_orig #print self.parsed.text except: pass #except Exception, e: # raise e self.parsed.text_trace = dct['message'] self.parsed.reposts_count = 'N/A' self.parsed.comments_count = dct['comments']['count']
def _parse(self, dct): if 'deleted' in dct and dct['deleted']: logger.debug( "This is a deleted message %s of SinaWeiboStatusMessage", dct["id"]) self.parsed.time = "unknown" self.parsed.username = "******" self.parsed.userid = "unknown" self.parsed.text = "unknown" self.deleted = True return self.ID.id = dct["id"] self.parsed.time = utils.str2utc(dct["created_at"]) self.parsed.username = dct['user']['name'] self.parsed.userid = dct['user']['id'] self.parsed.reposts_count = dct['reposts_count'] self.parsed.comments_count = dct['comments_count'] if 'pic_urls' in dct: for pic in dct['pic_urls']: self.parsed.attachments.append({ 'type': 'picture', 'format': ['link'], 'data': pic['thumbnail_pic'].replace('/thumbnail/', '/woriginal/') }) if 'retweeted_status' in dct: self.parsed.username_orig = "unknown" if 'pic_urls' in dct['retweeted_status']: for pic in dct['retweeted_status']['pic_urls']: self.parsed.attachments.append({ 'type': 'picture', 'format': ['link'], 'data': pic['thumbnail_pic'].replace('/thumbnail/', '/woriginal/') }) try: self.parsed.username_orig = dct['retweeted_status']['user'][ 'name'] except KeyError: logger.warning( 'KeyError when parsing SinaWeiboStatus. May be deleted original message' ) self.parsed.text_orig = dct['retweeted_status']['text'] self.parsed.text_trace = dct['text'] self.parsed.text = self.parsed.text_trace \ + " || " + "@" + self.parsed.username_orig \ + " : " + self.parsed.text_orig else: self.parsed.text_orig = dct['text'] self.parsed.text_trace = None self.parsed.text = self.parsed.text_orig
def _parse(self, dct): if 'deleted' in dct and dct['deleted']: logger.debug( "This is a deleted message %s of SinaWeiboStatusMessage", dct["id"]) self.parsed.time = "unknown" self.parsed.username = "******" self.parsed.userid = "unknown" self.parsed.text = "unknown" self.deleted = True return self.ID.id = dct["id"] self.parsed.time = utils.str2utc(dct["created_at"]) self.parsed.username = dct['user']['name'] self.parsed.userid = dct['user']['id'] self.parsed.reposts_count = dct['reposts_count'] self.parsed.comments_count = dct['comments_count'] if 'pic_urls' in dct: for pic in dct['pic_urls']: self.parsed.attachments.append({ 'type': 'picture', 'format': ['link'], 'data': pic['thumbnail_pic'].replace('/thumbnail/', '/woriginal/') }) if 'retweeted_status' in dct: self.parsed.username_orig = "unknown" if 'pic_urls' in dct['retweeted_status']: for pic in dct['retweeted_status']['pic_urls']: self.parsed.attachments.append({ 'type': 'picture', 'format': ['link'], 'data': pic['thumbnail_pic'].replace('/thumbnail/', '/woriginal/') }) try: self.parsed.username_orig = dct['retweeted_status']['user'][ 'name'] except KeyError: logger.warning( 'KeyError when parsing SinaWeiboStatus. May be deleted original message' ) self.parsed.text_orig = dct['retweeted_status']['text'] self.parsed.text_trace = dct['text'] self.parsed.text = self.parsed.text_trace \ + "//@" + self.parsed.username_orig \ + ": " + self.parsed.text_orig else: self.parsed.text_orig = dct['text'] self.parsed.text_trace = None self.parsed.text = self.parsed.text_orig
def _parse(self, dct): self.ID.status_id = dct['id'] self.ID.source_user_id = dct['ownerId'] self.ID.feed_type = 'STATUS' self.parsed.userid = str(dct['ownerId']) self.parsed.username = dct['name'] self.parsed.time = utils.str2utc(dct['createTime'], " +08:00") self.parsed.text = dct['content']
def _parse(self, dct): self.ID.status_id = dct['status_id'] self.ID.source_user_id = dct['uid'] self.ID.feed_type = 'STATUS' self.parsed.userid = str(dct['uid']) self.parsed.username = dct['name'] self.parsed.time = utils.str2utc(dct['time'], " +08:00") self.parsed.text = dct['message']
def _parse(self, dct): if 'deleted' in dct and dct['deleted']: logger.debug("This is a deleted message %s of SinaWeiboStatusMessage", dct["id"]) self.parsed.time = "unknown" self.parsed.username = "******" self.parsed.userid = "unknown" self.parsed.text = "unknown" self.deleted = True return self.ID.id = dct["id"] self.parsed.time = utils.str2utc(dct["created_at"]) self.parsed.username = dct['user']['name'] self.parsed.userid = dct['user']['id'] self.parsed.reposts_count = dct['reposts_count'] self.parsed.comments_count = dct['comments_count'] # accordian to http://open.weibo.com/qa/index.php?qa=448&qa_1=v2-%E5%B7%B2%E6%94%B6%E8%97%8F%E5%BE%AE%E5%8D%9A-%E6%8E%A5%E5%8F%A3statuses-friends-timeline%E8%BF%94%E5%9B%9E%E5%AD%97%E6%AE%B5-favorited-%E4%B8%BAfalse # Currently we have no way to tell whether # a weibo message is favorited Although there's a # specious property self.parsed.liked = False if 'pic_urls' in dct: for pic in dct['pic_urls']: self.parsed.attachments.append( { 'type': 'picture', 'format': ['link'], 'data': pic['thumbnail_pic'].replace('/thumbnail/', '/woriginal/') }) if 'retweeted_status' in dct: self.parsed.username_orig = "unknown" if 'pic_urls' in dct['retweeted_status']: for pic in dct['retweeted_status']['pic_urls']: self.parsed.attachments.append( { 'type': 'picture', 'format': ['link'], 'data': pic['thumbnail_pic'].replace('/thumbnail/', '/woriginal/') }) try: self.parsed.username_orig = dct['retweeted_status']['user']['name'] except KeyError: logger.warning('KeyError when parsing SinaWeiboStatus. May be deleted original message') self.parsed.text_orig = dct['retweeted_status']['text'] self.parsed.text_trace = dct['text'] self.parsed.text = self.parsed.text_trace \ + "//@" + self.parsed.username_orig \ + ": " + self.parsed.text_orig else: self.parsed.text_orig = dct['text'] self.parsed.text_trace = None self.parsed.text = self.parsed.text_orig
def _parse_feed_blog(self, dct): self.ID.feed_id = dct["post_id"] self.ID.user_type = dct["actor_type"] self.ID.blog_id = dct["source_id"] if dct["actor_type"] == "user": self.ID.source_user_id = dct["actor_id"] else: #page self.ID.source_page_id = dct["actor_id"] self.parsed.userid = dct['actor_id'] self.parsed.username = dct['name'] self.parsed.time = utils.str2utc(dct["update_time"], " +08:00") self.parsed.text = dct['description'] self.parsed.title = dct['title']
def _parse(self, dct): self.ID.status_id = dct['id'] self.ID.source_user_id = self.parsed.userid = str(dct['sourceUser']['id']) self.parsed.username = dct['sourceUser']['name'] self.parsed.time = utils.str2utc(dct['time'], " +08:00") self.parsed.text = "" self.ID.feed_type = self.parsed.feed_type = dct['type'] try: if self.ID.feed_type == "PUBLISH_ONE_PHOTO" or self.ID.feed_type == "PUBLISH_MORE_PHOTO": self.ID.resource_id = dct["attachment"][0]["id"] else: self.ID.resource_id = dct["resource"]["id"] except Exception, e: logger.warning(str(e)) self.ID.resource_id = self.ID.status_id
def _parse_feed_blog(self, dct): self.ID.feed_id = dct["post_id"] self.ID.user_type = dct["actor_type"] self.ID.blog_id = dct["source_id"] if dct["actor_type"] == "user": self.ID.source_user_id = dct["actor_id"] else: #page self.ID.source_page_id = dct["actor_id"] self.parsed.userid = str(dct['actor_id']) self.parsed.username = dct['name'] self.parsed.time = utils.str2utc(dct["update_time"], " +08:00") # This is the news feed of blogs, so you can not get the body self.parsed.description = dct['description'] self.parsed.text = dct['description'] self.parsed.title = dct['title']
def _parse(self, dct): self.ID.status_id = dct['id'] self.ID.source_user_id = self.parsed.userid = str( dct['sourceUser']['id']) self.parsed.username = dct['sourceUser']['name'] self.parsed.time = utils.str2utc(dct['time'], " +08:00") self.parsed.text = "" self.ID.feed_type = self.parsed.feed_type = dct['type'] try: if self.ID.feed_type == "PUBLISH_ONE_PHOTO" or self.ID.feed_type == "PUBLISH_MORE_PHOTO": self.ID.resource_id = dct["attachment"][0]["id"] else: self.ID.resource_id = dct["resource"]["id"] except Exception, e: logger.warning(str(e)) self.ID.resource_id = self.ID.status_id
def _parse(self, dct): #print dct #logger.debug("%s", dct) if 'deleted' in dct and dct['deleted']: logger.debug("This is a deleted message %s of SinaWeiboStatusMessage", dct["id"]) self.parsed.time = "unknown" self.parsed.username = "******" self.parsed.userid = "unknown" self.parsed.text = "unknown" self.deleted = True return #return snstype.DeletedMessage(dct) self.ID.id = dct["id"] self.parsed.time = utils.str2utc(dct["created_at"]) self.parsed.username = dct['user']['name'] self.parsed.userid = dct['user']['id'] #if 'user' in dct: # self.parsed.username = dct['user']['name'] # self.parsed.userid = dct['user']['id'] # logger.warning("Parsed one message with unknown 'user' for SinaWeiboStatusMessage") #else: # self.parsed.username = "******" # self.parsed.userid = "unknown" self.parsed.reposts_count = dct['reposts_count'] self.parsed.comments_count = dct['comments_count'] if 'retweeted_status' in dct: self.parsed.username_orig = "unknown" try: self.parsed.username_orig = dct['retweeted_status']['user']['name'] except KeyError: logger.warning('KeyError when parsing SinaWeiboStatus. May be deleted original message') self.parsed.text_orig = dct['retweeted_status']['text'] self.parsed.text_trace = dct['text'] self.parsed.text = self.parsed.text_trace \ + " || " + "@" + self.parsed.username_orig \ + " : " + self.parsed.text_orig else: self.parsed.text_orig = dct['text'] self.parsed.text_trace = None self.parsed.text = self.parsed.text_orig
def _parse_feed_blog(self, dct): self.ID.feed_id = dct["post_id"] self.ID.user_type = dct["actor_type"] self.ID.blog_id = dct["source_id"] if dct["actor_type"] == "user": self.ID.source_user_id = dct["actor_id"] else: #page self.ID.source_page_id = dct["actor_id"] self.parsed.userid = str(dct['actor_id']) self.parsed.username = dct['name'] self.parsed.time = utils.str2utc(dct["update_time"], " +08:00") # This is the news feed of blogs, so you can not get the body self.parsed.title = dct['title'] self.parsed.description = dct['description'] self.parsed.text = '"' + self.parsed.title + '" ' + self.parsed.description self.parsed.attachments = [ {'type': 'blog', 'format': ['link'], 'data': dct['href']} ]
def _parse_feed_blog(self, dct): self.ID.feed_id = dct["post_id"] self.ID.user_type = dct["actor_type"] self.ID.blog_id = dct["source_id"] if dct["actor_type"] == "user": self.ID.source_user_id = dct["actor_id"] else: #page self.ID.source_page_id = dct["actor_id"] self.parsed.userid = str(dct['actor_id']) self.parsed.username = dct['name'] self.parsed.time = utils.str2utc(dct["update_time"], " +08:00") # This is the news feed of blogs, so you can not get the body self.parsed.title = dct['title'] self.parsed.description = dct['description'] self.parsed.text = '"' + self.parsed.title + '" ' + self.parsed.description self.parsed.attachments = [{ 'type': 'blog', 'format': ['link'], 'data': dct['href'] }]
def _parse(self, dct): self.ID.status_id = dct['source_id'] self.ID.source_user_id = self.parsed.userid = str(dct['actor_id']) self.parsed.username = dct['name'] self.parsed.time = utils.str2utc(dct['update_time'], " +08:00") self.parsed.text = "" self.ID.feed_type = self.parsed.feed_type = { 10: 'STATUS', 11: 'STATUS', 20: 'BLOG', 21: 'SHARE', 22: 'BLOG', 23: 'SHARE', 30: 'PHOTO', 31: 'PHOTO', 32: 'SHARE', 33: 'SHARE', 34: 'OTHER', 35: 'OTHER', 36: 'SHARE', 40: 'OTHER', 41: 'OTHER', 50: 'SHARE', 51: 'SHARE', 52: 'SHARE', 53: 'SHARE', 54: 'SHARE', 55: 'SHARE' }[dct['feed_type']] ORIG_USER = '******' if 'attachment' in dct and dct['attachment']: for at in dct['attachment']: if 'owner_name' in at and at['owner_name']: ORIG_USER = at['owner_name'] self.parsed.username_orig = ORIG_USER if 'message' in dct: self.parsed.text += dct['message'] if dct['feed_type'] in [21, 23, 32, 33, 36, 50, 51, 52, 53, 54, 55]: self.parsed.text += u" //" + ORIG_USER + ":" if 'title' in dct: if 'message' not in dct or dct['message'] != dct['title']: self.parsed.text += ' "' + dct['title'] + '" ' if 'description' in dct: self.parsed.text += dct['description'] if 'attachment' in dct and dct['attachment']: for at in dct['attachment']: if at['media_type'] == 'photo': self.parsed.attachments.append( { 'type': 'picture', 'format': ['link'], #FIXME: page photo don't have raw_src 'data': 'raw_src' in at and at['raw_src'] or at['src'].replace('head_', 'original_') } ) elif 'href' in at: attype = 'link' if at['media_type'] in ['album', 'blog']: attype = at['media_type'] self.parsed.attachments.append( { 'type': attype, 'format': ['link'], 'data': at['href'] }) if 'content' in at: self.parsed.text += at['content']