Exemple #1
0
 def test_get_post_52319(self, mock_get):
     test_url = 'http://berlinreport.com/bbs/board.php?bo_table=lifeqna&wr_id=2329'
     mock_get.get(test_url,
                  content=open('news/fixtures/flohmarkt_52319.html', 'r').read())
     bp = BerlinParser(test_url)
     rst = bp.parse_post()
     self.assertTrue(rst['content'])
Exemple #2
0
    def get_posts(self,
                  publisher_name=None,
                  url=None,
                  sleep_time=0,
                  howmany=100,
                  single=True):
        category, latest_id = get_latest(url)

        post_id = latest_id
        print 'Start at url [%s]' % url

        while post_id > 1:
            print post_id
            posts = Post.objects.filter(post_id=post_id, table_category=category)
            post_url = "http://berlinreport.com/bbs/board.php?bo_table=%s&wr_id=%d"\
                       % (category, post_id)
            post_id = post_id - 1

            if posts.exists():
                # print 'duplicated %s %s' % (post_url, posts[0].subject)
                print 'd',
                continue

            bp = BerlinParser(url=post_url)
            item = bp.parse_post(publisher_name)

            if item['subject']:
                item['table_category'] = category
                links = item.pop('links')
                emails = item.pop('emails')
                images = item.pop('images')
                post = Post.objects.create(**item)
                post.update_relates(links, emails, images)

                print "%s %s %s" % (post.subject, post.member, post.created_at)
                time.sleep(sleep_time)
            else:
                links = item.pop('links')
                emails = item.pop('emails')
                images = item.pop('images')
                item['subject'] = item['post_id']
                post = Post.objects.create(**item)

                print 'skip %s' % post_url

            if single or (howmany < 0):
                break

            howmany = howmany - 1