コード例 #1
0
    def receive(self, message):
        post = Post()
        html_bodies = message.bodies('text/html')
        img_links = []
        video_links = []
        for content_type, body in html_bodies:
            decoded_body = body.decode()
            img_links.extend(self.find_image_links(decoded_body))
            video_links.extend(self.find_video_links(decoded_body))

        if hasattr(message, "attachments") and message.attachments:
            post.attachments = []
            for attachment in message.attachments:
                post.attachments.append(db.Blob(attachment[1].decode()))

        plaintext_bodies = message.bodies('text/plain')
        allBodies = ''
        for body in plaintext_bodies:
            allBodies = allBodies + body[1].decode()

        if hasattr(message, "subject"):
            subject, encoding = decode_header(message.subject)[0]
            post.caption = unicode(subject)
        post.author = message.sender
        post.content = allBodies
        post.images = img_links
        post.videos = video_links
        post.source = "email"
        post.put()
コード例 #2
0
 def receive(self, message):
     post = Post()
     html_bodies = message.bodies('text/html')
     img_links = []
     video_links = []
     for content_type, body in html_bodies:
         decoded_body = body.decode()
         img_links.extend(self.find_image_links(decoded_body))
         video_links.extend(self.find_video_links(decoded_body))
 
     if hasattr(message, "attachments") and message.attachments:
         post.attachments = []
         for attachment in message.attachments:
             post.attachments.append(db.Blob(attachment[1].decode()))
 
     plaintext_bodies = message.bodies('text/plain')
     allBodies = '';
     for body in plaintext_bodies:
         allBodies = allBodies + body[1].decode()
 
     if hasattr(message, "subject"):
         subject, encoding = decode_header(message.subject)[0]
         post.caption = unicode(subject)
     post.author = message.sender
     post.content = allBodies
     post.images = img_links
     post.videos = video_links
     post.source = "email"
     post.put()
コード例 #3
0
ファイル: main.py プロジェクト: sephiria/sandbox
    def post(self):
        bot_test = self.request.get('bot_test').strip().lower()
        if (bot_test != 'orange'):
            self.redirect('/')
            return

        post = Post()

        if users.get_current_user():
            post.author = users.get_current_user().nickname()

        url = self.request.get('url').strip()
        if url:
            if not url.startswith("http"):
                url = "http://" + url
        post.images = [db.Link(url)]
        post.caption = self.request.get('caption')
        post.source = "homepage"
        post.put()
        self.redirect('/')
コード例 #4
0
ファイル: main.py プロジェクト: sephiria/sandbox
    def post(self):
        bot_test = self.request.get('bot_test').strip().lower()
        if (bot_test != 'orange'):
            self.redirect('/')
            return
             
        post = Post()

        if users.get_current_user():
            post.author = users.get_current_user().nickname()

        url = self.request.get('url').strip()
        if url:
            if not url.startswith("http"):
                url = "http://"+url
        post.images = [db.Link(url)]
        post.caption = self.request.get('caption')
        post.source = "homepage"
        post.put()
        self.redirect('/')
コード例 #5
0
ファイル: pttcrawler.py プロジェクト: ricky155030/ptt-crawler
    def parse_all_posts(self, author=True, title=True, date=True, contents=True, messages=True,
                        reply=True, images=True, ip=False):
        try:
            if self.post_urls is None:
                raise Exception("You must run get_posts_url first")
        except Exception as err:
            print(err)
            sys.exit(1)

        post_objects = list()
        for url in self.post_urls:
            post_url = "http://www.ptt.cc/bbs/{}/{}".format(self.board_name, url)
            soup = self._get_soup(post_url)
            post = Post(post_url)

            try:
                article_meta = soup.find_all('span', class_="article-meta-value")
                if author:
                    post.author = article_meta[0].contents[0]

                if title:
                    post.title = article_meta[2].contents[0]

                if date:
                    post.date = article_meta[3].contents[0]

                # TODO: ip
                if ip:
                    pass

                if contents:
                    a = str(soup.find(id="main-container").contents[1])
                    a = a.split("</div>")
                    a = a[4].split("<span class=\"f2\">※ 發信站: 批踢踢實業坊(ptt.cc),")
                    post.contents = a[0].replace(' ', '').replace('\n', '').replace('\t', '')

                if images:
                    a = soup.find_all('img')
                    imgs = list()
                    for i in a:
                        imgs.append(i.attrs.get('src'))
                    post.images = imgs

                # TODO: There are some push_content lost due to hyperlink in the content
                if messages or reply:
                    # messages = list()
                    for tag in soup.find_all("div", "push"):
                        # d = dict()
                        push_tag = tag.find("span", "push-tag").\
                            string.replace(' ', '')

                        # d.setdefault('狀態', push_tag)
                        # d.setdefault('留言者', push_userid)
                        # d.setdefault('留言內容', push_content)
                        # d.setdefault('留言時間', push_ipdatetime)

                        # messages.append(d)

                        if push_tag == '推':
                            post.good += 1
                        elif push_tag == '噓':
                            post.bad += 1
                        else:
                            post.normal += 1

                    # post.messages = messages
            except:
                pass

            post_objects.append(post)

        return post_objects