Python BeautifulSoup.decompose Beispiele, BeautifulSoup.BeautifulSoup.decompose Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: extract.py Projekt: ashfame/indianrail-db

def extract_trains():
    """Extracts the train data from html dump"""
    db.query(TempTrain).delete() # Truncate TempTrain
    db.commit()

    for row in db.query(Raw.url):
        url = row.url
        if url not in train_urls:
            continue

        debug("Processing html of %s" % url)
        raw_html = db.query(Raw.html).filter_by(url=url).first()
        s = BeautifulSoup(raw_html.html)
        table = s.find('table', id='SearchResultsTable')
        update_departure_days(table)

        header = table.find('tr', 'tableheader').extract()
        headings = [td.text for td in header]

        trains = to_dict(table, headings)
        trains = clean_keys(trains)

        train_links = get_train_links(table)
        s.decompose()

        for train in trains:
            number = train['number']
            train['name'] = train_links[number][0]
            train['url'] = train_links[number][1]

            db.add(TempTrain(**train))
            db.commit()

Beispiel #2

0

Datei anzeigen

    def testPixivImageParseInfo(self):
        p = open('./test/test-image-info.html', 'r')
        page = BeautifulSoup(p.read())
        image2 = PixivImage(32039274, page)
        page.decompose()
        del page

        self.assertEqual(image2.imageId, 32039274)
        self.assertEqual(image2.imageTitle, u"新しいお姫様")
        self.assertTrue(len(image2.imageCaption) > 0)
        # print(u"\r\nCaption = {0}".format(image2.imageCaption))

        self.assertTrue(u'MAYU' in image2.imageTags)
        self.assertTrue(u'VOCALOID' in image2.imageTags)
        self.assertTrue(u'VOCALOID3' in image2.imageTags)
        self.assertTrue(u'なにこれかわいい' in image2.imageTags)
        self.assertTrue(u'やはり存在する斧' in image2.imageTags)

        self.assertEqual(image2.imageMode, "big")
        self.assertEqual(image2.worksDate, '12/10/12 15:23')
        self.assertEqual(image2.worksResolution, '642x900')
        # self.assertEqual(image2.worksTools, 'Photoshop SAI')
        # self.assertEqual(image2.jd_rtv, 88190)
        # self.assertEqual(image2.jd_rtc, 6711)
        # self.assertEqual(image2.jd_rtt, 66470)
        self.assertEqual(image2.artist.artistToken, 'nardack')

Beispiel #3

0

Datei anzeigen

Datei: test.PixivModel.py Projekt: EvilNeo/PixivUtil2

    def testPixivImageParseInfo(self):
      p = open('./test/test-image-info.html', 'r')
      page = BeautifulSoup(p.read())
      image2 = PixivImage(32039274, page)
      page.decompose()
      del page

      self.assertEqual(image2.imageId, 32039274)
      self.assertEqual(image2.imageTitle, u"新しいお姫様")

      self.assertTrue(u'MAYU' in image2.imageTags)
      self.assertTrue(u'VOCALOID' in image2.imageTags)
      self.assertTrue(u'VOCALOID3' in image2.imageTags)
      self.assertTrue(u'うさぎになりたい' in image2.imageTags)
      self.assertTrue(u'なにこれかわいい' in image2.imageTags)
      self.assertTrue(u'やはり存在する斧' in image2.imageTags)

      self.assertEqual(image2.imageMode, "bigNew")
      self.assertEqual(image2.worksDate,'12-11-2012 00:23')
      self.assertEqual(image2.worksResolution,'642x900')
      self.assertEqual(image2.worksTools, 'Photoshop SAI')
      #self.assertEqual(image2.jd_rtv, 88190)
      #self.assertEqual(image2.jd_rtc, 6711)
      #self.assertEqual(image2.jd_rtt, 66470)
      self.assertEqual(image2.artist.artistToken, 'nardack')

Beispiel #4

0

Datei anzeigen

Datei: test.PixivModel.py Projekt: awkwardusername/PixivUtil2

    def testPixivImageParseInfo(self):
      p = open('./test/test-image-info.html', 'r')
      page = BeautifulSoup(p.read())
      image2 = PixivImage(32039274, page)
      page.decompose()
      del page

      self.assertEqual(image2.imageId, 32039274)
      self.assertEqual(image2.imageTitle, u"新しいお姫様")
      self.assertEqual(image2.imageCaption, u'EXIT TUNES様より冬コミ発売予定の「MAYU画集(仮)」に１枚描かせて頂きました。詳しくはこちらをご確認下さい！★ <a href="/jump.php?http%3A%2F%2Fexittunes.com%2Fevent%2Fc83%2Findex.html" target="_blank">http://exittunes.com/event/c83/index.html</a> ★「MAYU」公式サイト<a href="/jump.php?http%3A%2F%2Fmayusan.jp%2F" target="_blank">http://mayusan.jp/</a>')

      self.assertTrue(u'MAYU' in image2.imageTags)
      self.assertTrue(u'VOCALOID' in image2.imageTags)
      self.assertTrue(u'VOCALOID3' in image2.imageTags)
      self.assertTrue(u'うさぎになりたい' in image2.imageTags)
      self.assertTrue(u'なにこれかわいい' in image2.imageTags)
      self.assertTrue(u'やはり存在する斧' in image2.imageTags)
      self.assertTrue(u'ヤンデレ' in image2.imageTags)
      self.assertTrue(u'吸いこまれそうな瞳の色' in image2.imageTags)

      self.assertEqual(image2.imageMode, "big")
      self.assertEqual(image2.worksDate,'12-11-2012 00:23')
      self.assertEqual(image2.worksResolution,'642x900')
      self.assertEqual(image2.worksTools, 'Photoshop SAI')
      #self.assertEqual(image2.jd_rtv, 88190)
      #self.assertEqual(image2.jd_rtc, 6711)
      #self.assertEqual(image2.jd_rtt, 66470)
      self.assertEqual(image2.artist.artistToken, 'nardack')

Beispiel #5

0

Datei anzeigen

Datei: test_PixivHelper.py Projekt: yifei-fu/PixivUtil2

    def testCreateFilenameUnicode(self):
        p = open('./test/test-image-unicode.htm', 'r')
        page = BeautifulSoup(p.read())
        imageInfo = PixivImage(2493913, page)
        page.decompose()
        del page

        # cross check with json value for artist info
        js_file = open('./test/detail-267014.json', 'r')
        js = json.load(js_file)

        self.assertEqual(imageInfo.artist.artistId, str(js["user"]["id"]))
        self.assertEqual(imageInfo.artist.artistToken, js["user"]["account"])
        self.assertEqual(
            imageInfo.artist.artistAvatar,
            js["user"]["profile_image_urls"]["medium"].replace("_170", ""))

        nameFormat = '%member_token% (%member_id%)\\%urlFilename% %works_date_only% %works_res% %works_tools% %title%'
        expected = unicode(
            u'balzehn (267014)\\2493913 12/23/08 852x1200 アラクネのいる日常２.jpg')
        result = PixivHelper.makeFilename(
            nameFormat,
            imageInfo,
            artistInfo=None,
            tagsSeparator=' ',
            fileUrl='http://i2.pixiv.net/img16/img/balzehn/2493913.jpg')
        # print(result)
        self.assertEqual(result, expected)

Beispiel #6

0

Datei anzeigen

Datei: test.PixivModel.py Projekt: yife/PixivUtil2

    def testPixivImageParseInfo(self):
        p = open("./test/test-image-info.html", "r")
        page = BeautifulSoup(p.read())
        image2 = PixivImage(32039274, page)
        page.decompose()
        del page

        self.assertEqual(image2.imageId, 32039274)
        self.assertEqual(image2.imageTitle, u"新しいお姫様")

        self.assertTrue(u"MAYU" in image2.imageTags)
        self.assertTrue(u"VOCALOID" in image2.imageTags)
        self.assertTrue(u"VOCALOID3" in image2.imageTags)
        self.assertTrue(u"うさぎになりたい" in image2.imageTags)
        self.assertTrue(u"なにこれかわいい" in image2.imageTags)
        self.assertTrue(u"やはり存在する斧" in image2.imageTags)

        self.assertEqual(image2.imageMode, "bigNew")
        self.assertEqual(image2.worksDate, "12-11-2012 00:23")
        self.assertEqual(image2.worksResolution, "642x900")
        self.assertEqual(image2.worksTools, "Photoshop SAI")
        # self.assertEqual(image2.jd_rtv, 88190)
        # self.assertEqual(image2.jd_rtc, 6711)
        # self.assertEqual(image2.jd_rtt, 66470)
        self.assertEqual(image2.artist.artistToken, "nardack")

Beispiel #7

0

Datei anzeigen

Datei: CharityParse.py Projekt: qmorgan/InsightSoft

def _CNHistoryParse(indexpage, only_get_EIN=True):
    '''Parse CN History Pages'''

    # Load up the index page, A.html, etc
    indexpagepath = indexpage
    if os.path.exists(indexpagepath):
        html = open(indexpagepath, 'r')
        soup = BeautifulSoup(html)
        # all of the text names are in links (a href ...)
        if only_get_EIN:  # ONLY GET THE EIN AND THEN EXIT
            alist = soup.findAll('a')
            for link in alist:
                try:
                    if 'search.irs' in link.get('href'):
                        orgname = link.get('href')
                        html.close()
                        soup.decompose()
                        return orgname
                except:
                    pass

        html.close()
        soup.decompose()
    else:
        raise ValueError(
            "Page {} was indexed but doesn't exist??".format(indexpage))

Beispiel #8

0

Datei anzeigen

Datei: CharityParse.py Projekt: qmorgan/InsightSoft

def _CNHistoryParse(indexpage,only_get_EIN=True):
    '''Parse CN History Pages'''

    # Load up the index page, A.html, etc
    indexpagepath = indexpage
    if os.path.exists(indexpagepath):
        html = open(indexpagepath,'r')
        soup = BeautifulSoup(html)
        # all of the text names are in links (a href ...)
        if only_get_EIN: # ONLY GET THE EIN AND THEN EXIT
            alist=soup.findAll('a')
            for link in alist:
                try:
                    if 'search.irs' in link.get('href'):
                        orgname = link.get('href')
                        html.close()
                        soup.decompose()
                        return orgname
                except:
                    pass
                
        html.close()
        soup.decompose()
    else:
        raise ValueError("Page {} was indexed but doesn't exist??".format(indexpage))

Beispiel #9

0

Datei anzeigen

Datei: extract.py Projekt: ashfame/indianrail-db

def extract_train_schedules():
    """Extracts the train schedule data from html dump"""
    db.query(TempSchedule).delete() # Truncate TempSchedule
    db.commit()

    count = db.query(func.count(Raw.id)).scalar()
    for i, row in enumerate(db.query(Raw.url)):
        url = row.url
        if url in train_urls:
            continue

        debug("Processing html of %s (%s of %s. Remaining %s)" % (url, i, count, count-i))
        train = db.query(TempTrain).filter_by(url=url).first()
        raw_html = db.query(Raw.html).filter_by(url=url).first()
        raw_html = raw_html.html
        s = BeautifulSoup(raw_html)

        assert train.number in s.text
        #assert unicode(train.name, 'utf-8') in s.text

        table = s.find('table', 'schtable')
        header = table.find('tr', 'first-child').extract()
        headings = [td.text for td in header]

        schedule = to_dict(table, headings)
        schedule = clean_keys(schedule)


        for sch in schedule:
            sch['train_number'] = train.number
            db.add(TempSchedule(**sch))

        update_return_train(train, s)
        s.decompose()
        db.commit()

Beispiel #10

0

Datei anzeigen

Datei: test.PixivHelper.py Projekt: Phenrei/PixivUtil2

    def testCreateMangaFilename(self):
        p = open('./test/test-image-manga.htm', 'r')
        page = BeautifulSoup(p.read())
        imageInfo = PixivImage(28820443, page)
        imageInfo.imageCount = 100
        page.decompose()
        del page

        # cross check with json value for artist info
        js_file = open('./test/detail-554800.json', 'r')
        js = json.load(js_file)

        self.assertEqual(imageInfo.artist.artistId, str(js["user"]["id"]))
        self.assertEqual(imageInfo.artist.artistToken, js["user"]["account"])
        self.assertEqual(imageInfo.artist.artistAvatar, js["user"]["profile_image_urls"]["medium"].replace("_170", ""))

        nameFormat = '%member_token% (%member_id%)\\%urlFilename% %page_number% %works_date_only% %works_res% %works_tools% %title%'

        expected = unicode(u'maidoll (554800)\\28865189_p0 001 07/22/12 Multiple images: 2P C82おまけ本 「沙耶は俺の嫁」サンプル.jpg')
        result = PixivHelper.makeFilename(nameFormat, imageInfo, artistInfo=None, tagsSeparator=' ', fileUrl='http://i2.pixiv.net/img26/img/ffei/28865189_p0.jpg')
        # print(result)
        self.assertEqual(result, expected)

        expected = unicode(u'maidoll (554800)\\28865189_p14 015 07/22/12 Multiple images: 2P C82おまけ本 「沙耶は俺の嫁」サンプル.jpg')
        result = PixivHelper.makeFilename(nameFormat, imageInfo, artistInfo=None, tagsSeparator=' ', fileUrl='http://i2.pixiv.net/img26/img/ffei/28865189_p14.jpg')
        # print(result)
        self.assertEqual(result, expected)

        expected = unicode(u'maidoll (554800)\\28865189_p921 922 07/22/12 Multiple images: 2P C82おまけ本 「沙耶は俺の嫁」サンプル.jpg')
        result = PixivHelper.makeFilename(nameFormat, imageInfo, artistInfo=None, tagsSeparator=' ', fileUrl='http://i2.pixiv.net/img26/img/ffei/28865189_p921.jpg')
        # print(result)
        self.assertEqual(result, expected)

Beispiel #11

0

Datei anzeigen

    def getImagePage(self, imageId, parent=None, fromBookmark=False,
                     bookmark_count=-1, image_response_count=-1):
        image = None
        response = None
        PixivHelper.GetLogger().debug("Getting image page: {0}".format(imageId))
        if self._isWhitecube:
            url = "https://www.pixiv.net/rpc/whitecube/index.php?mode=work_details_modal_whitecube&id={0}&tt={1}".format(imageId, self._whitecubeToken)
            response = self.open(url).read()
            PixivHelper.GetLogger().debug(response);
            image = PixivModelWhiteCube.PixivImage(imageId,
                                                   response,
                                                   parent,
                                                   fromBookmark,
                                                   bookmark_count,
                                                   image_response_count,
                                                   dateFormat=self._config.dateFormat)
            # overwrite artist info
            self.getMemberInfoWhitecube(image.artist.artistId, image.artist)
        else:
            url = "http://www.pixiv.net/member_illust.php?mode=medium&illust_id={0}".format(imageId)
            response = self.open(url).read()
            parsed = BeautifulSoup(response)
            image = PixivModel.PixivImage(imageId,
                                          parsed,
                                          parent,
                                          fromBookmark,
                                          bookmark_count,
                                          image_response_count,
                                          dateFormat=self._config.dateFormat)
            if image.imageMode == "ugoira_view" or image.imageMode == "bigNew":
                image.ParseImages(parsed)
            parsed.decompose()

        return (image, response)

Beispiel #12

0

Datei anzeigen

    def testPixivImageParseInfoPixivPremiumOffer(self):
        p = open('./test/test-image-parse-image-38826533-pixiv-premium.html',
                 'r')
        page = BeautifulSoup(p.read())
        image2 = PixivImage(38826533, page)
        page.decompose()
        del page

        self.assertEqual(image2.imageId, 38826533)
        self.assertEqual(image2.imageTitle, u"てやり")
        self.assertEqual(image2.imageCaption, u'一応シーダ様です。')

        self.assertTrue(u'R-18' in image2.imageTags)
        self.assertTrue(u'FE' in image2.imageTags)
        self.assertTrue(u'ファイアーエムブレム' in image2.imageTags)
        self.assertTrue(u'シーダ' in image2.imageTags)

        self.assertEqual(image2.imageMode, "big")
        self.assertEqual(image2.worksDate, '9/30/2013 01:43')
        self.assertEqual(image2.worksResolution, '1000x2317')
        self.assertEqual(image2.worksTools, 'CLIP STUDIO PAINT')
        # self.assertEqual(image2.jd_rtv, 88190)
        # self.assertEqual(image2.jd_rtc, 6711)
        # self.assertEqual(image2.jd_rtt, 66470)
        self.assertEqual(image2.artist.artistToken, 'hvcv')

Beispiel #13

0

Datei anzeigen

    def testCreateMangaFilename(self):
        p = open('./test/test-image-manga.htm', 'r')
        page = BeautifulSoup(p.read())
        imageInfo = PixivImage(28820443, page)
        imageInfo.imageCount = 100
        page.decompose()
        del page

        # cross check with json value for artist info
        js_file = open('./test/detail-554800.json', 'r')
        js = json.load(js_file)

        self.assertEqual(imageInfo.artist.artistId, str(js["user"]["id"]))
        self.assertEqual(imageInfo.artist.artistToken, js["user"]["account"])
        self.assertEqual(imageInfo.artist.artistAvatar, js["user"]["profile_image_urls"]["medium"].replace("_170", ""))

        nameFormat = '%member_token% (%member_id%)\\%urlFilename% %page_number% %works_date_only% %works_res% %works_tools% %title%'

        expected = unicode(u'maidoll (554800)\\28865189_p0 001 07/22/12 Multiple images: 2P C82おまけ本 「沙耶は俺の嫁」サンプル.jpg')
        result = PixivHelper.makeFilename(nameFormat, imageInfo, artistInfo=None, tagsSeparator=' ', fileUrl='http://i2.pixiv.net/img26/img/ffei/28865189_p0.jpg')
        # print(result)
        self.assertEqual(result, expected)

        expected = unicode(u'maidoll (554800)\\28865189_p14 015 07/22/12 Multiple images: 2P C82おまけ本 「沙耶は俺の嫁」サンプル.jpg')
        result = PixivHelper.makeFilename(nameFormat, imageInfo, artistInfo=None, tagsSeparator=' ', fileUrl='http://i2.pixiv.net/img26/img/ffei/28865189_p14.jpg')
        # print(result)
        self.assertEqual(result, expected)

        expected = unicode(u'maidoll (554800)\\28865189_p921 922 07/22/12 Multiple images: 2P C82おまけ本 「沙耶は俺の嫁」サンプル.jpg')
        result = PixivHelper.makeFilename(nameFormat, imageInfo, artistInfo=None, tagsSeparator=' ', fileUrl='http://i2.pixiv.net/img26/img/ffei/28865189_p921.jpg')
        # print(result)
        self.assertEqual(result, expected)

Beispiel #14

0

Datei anzeigen

Datei: test.PixivModel.py Projekt: jdstroy/PixivUtil2

    def testPixivImageParseInfo(self):
        p = open('./test/test-image-info.html', 'r')
        page = BeautifulSoup(p.read())
        image2 = PixivImage(32039274, page)
        page.decompose()
        del page

        self.assertEqual(image2.imageId, 32039274)
        self.assertEqual(image2.imageTitle, u"新しいお姫様")

        self.assertTrue(u'MAYU' in image2.imageTags)
        self.assertTrue(u'VOCALOID' in image2.imageTags)
        self.assertTrue(u'VOCALOID3' in image2.imageTags)
        self.assertTrue(u'うさぎになりたい' in image2.imageTags)
        self.assertTrue(u'なにこれかわいい' in image2.imageTags)
        self.assertTrue(u'やはり存在する斧' in image2.imageTags)
        self.assertTrue(u'ヤンデレ' in image2.imageTags)
        self.assertTrue(u'吸いこまれそうな瞳の色' in image2.imageTags)

        self.assertEqual(image2.imageMode, "big")
        self.assertEqual(image2.worksDate, '12-11-2012 00:23')
        self.assertEqual(image2.worksResolution, '642x900')
        self.assertEqual(image2.worksTools, 'Photoshop SAI')
        #self.assertEqual(image2.jd_rtv, 88190)
        #self.assertEqual(image2.jd_rtc, 6711)
        #self.assertEqual(image2.jd_rtt, 66470)
        self.assertEqual(image2.artist.artistToken, 'nardack')

Beispiel #15

0

Datei anzeigen

Datei: test.PixivModel.py Projekt: sanniu/PixivUtil2

    def testPixivImageParseInfoPixivPremiumOffer(self):
        p = open('./test/test-image-parse-image-38826533-pixiv-premium.html',
                 'r')
        page = BeautifulSoup(p.read())
        image2 = PixivImage(38826533, page)
        page.decompose()
        del page

        self.assertEqual(image2.imageId, 38826533)
        self.assertEqual(image2.imageTitle, u"てやり")
        self.assertEqual(image2.imageCaption, u'一応シーダ様です。')

        self.assertTrue(u'R-18' in image2.imageTags)
        self.assertTrue(u'FE' in image2.imageTags)
        self.assertTrue(u'ファイアーエムブレム' in image2.imageTags)
        self.assertTrue(u'シーダ' in image2.imageTags)

        self.assertEqual(image2.imageMode, "big")
        self.assertEqual(image2.worksDate, '9-30-2013 01:43')
        self.assertEqual(image2.worksResolution, '1000x2317')
        self.assertEqual(image2.worksTools, 'CLIP STUDIO PAINT')
        #self.assertEqual(image2.jd_rtv, 88190)
        #self.assertEqual(image2.jd_rtc, 6711)
        #self.assertEqual(image2.jd_rtt, 66470)
        self.assertEqual(image2.artist.artistToken, 'hvcv')

Beispiel #16

0

Datei anzeigen

Datei: test.PixivModel.py Projekt: yife/PixivUtil2

 def testPixivArtistNoImage(self):
     # print '\nTesting member page - no image'
     p = open("./test/test-noimage.htm", "r")
     page = BeautifulSoup(p.read())
     with self.assertRaises(PixivException):
         PixivArtist(1233, page)
     page.decompose()
     del page

Beispiel #17

0

Datei anzeigen

Datei: test.PixivModel.py Projekt: yife/PixivUtil2

 def testPixivImageDeleted(self):
     # print '\nTesting image page - deleted image'
     p = open("./test/test-image-deleted.htm", "r")
     page = BeautifulSoup(p.read())
     with self.assertRaises(PixivException):
         PixivImage(123, page)
     page.decompose()
     del page

Beispiel #18

0

Datei anzeigen

 def testPixivArtistNotLoggedIn(self):
     p = open('./test/test-member-nologin.htm', 'r')
     page = BeautifulSoup(p.read())
     with self.assertRaises(PixivException) as ex:
         PixivArtist(143229, page)
     self.assertEqual(ex.exception.errorCode, 100)
     page.decompose()
     del page

Beispiel #19

0

Datei anzeigen

Datei: test.PixivModel.py Projekt: Hakus/PixivUtil2

 def testPixivArtistDeleted(self):
   #print '\nTesting member page - deleted member'
   p = open('./test/test-member-deleted.htm', 'r')
   page = BeautifulSoup(p.read())
   with self.assertRaises(PixivModelException):
       PixivArtist(123, page)
   page.decompose()
   del page

Beispiel #20

0

Datei anzeigen

Datei: test.PixivModel.py Projekt: Hakus/PixivUtil2

 def testPixivArtistNoImage(self):
   #print '\nTesting member page - no image'
   p = open('./test/test-noimage.htm', 'r')
   page = BeautifulSoup(p.read())
   with self.assertRaises(PixivModelException):
       PixivArtist(363073, page)
   page.decompose()
   del page

Beispiel #21

0

Datei anzeigen

Datei: test.PixivModel.py Projekt: EvilNeo/PixivUtil2

 def testPixivArtistNoMember(self):
   #print '\nTesting member page - no member'
   p = open('./test/test-nouser.htm', 'r')
   page = BeautifulSoup(p.read())
   with self.assertRaises(PixivException):
       PixivArtist(1, page)
   page.decompose()
   del page

Beispiel #22

0

Datei anzeigen

 def testPixivArtistNoImage(self):
   #print '\nTesting member page - no image'
   p = open('./test/test-noimage.htm', 'r')
   page = BeautifulSoup(p.read())
   with self.assertRaises(PixivException):
       PixivArtist(1233, page)
   page.decompose()
   del page

Beispiel #23

0

Datei anzeigen

Datei: test.PixivModel.py Projekt: EvilNeo/PixivUtil2

 def testPixivArtistNotLoggedIn(self):
   p = open('./test/test-member-nologin.htm', 'r')
   page = BeautifulSoup(p.read())
   with self.assertRaises(PixivException) as ex:
       PixivArtist(143229, page)
   self.assertEqual(ex.exception.errorCode, 100)
   page.decompose()
   del page

Beispiel #24

0

Datei anzeigen

Datei: test.PixivModel.py Projekt: EvilNeo/PixivUtil2

 def testPixivImageNoImageEng(self):
   #print '\nTesting image page - no image'
   p = open('./test/test-image-noimage-eng.htm', 'r')
   page = BeautifulSoup(p.read())
   with self.assertRaises(PixivException):
       PixivImage(123, page)
   page.decompose()
   del page

Beispiel #25

0

Datei anzeigen

 def testPixivArtistNoMember(self):
     # print('\nTesting member page - no member')
     p = open('./test/test-nouser.htm', 'r')
     page = BeautifulSoup(p.read())
     with self.assertRaises(PixivException):
         PixivArtist(1, page)
     page.decompose()
     del page

Beispiel #26

0

Datei anzeigen

 def testPixivImageDeleted(self):
     # print '\nTesting image page - deleted image'
     p = open('./test/test-image-deleted.htm', 'r')
     page = BeautifulSoup(p.read())
     with self.assertRaises(PixivException):
         PixivImage(123, page)
     page.decompose()
     del page

Beispiel #27

0

Datei anzeigen

 def testPixivImageNoImageEng(self):
     # print('\nTesting image page - no image')
     p = open('./test/test-image-noimage-eng.htm', 'r')
     page = BeautifulSoup(p.read())
     with self.assertRaises(PixivException):
         PixivImage(123, page)
     page.decompose()
     del page

Beispiel #28

0

Datei anzeigen

Datei: test_PixivModel.py Projekt: Nandaka/PixivUtil2

 def testPixivArtistNoImage(self):
     # print('\nTesting member page - no image')
     p = open('./test/test-noimage.htm', 'r')
     page = BeautifulSoup(p.read())
     with self.assertRaises(PixivException):
         member = PixivArtist(1233, page)
         # print(member.imageList)
     page.decompose()
     del page

Beispiel #29

0

Datei anzeigen

Datei: test.PixivModel.py Projekt: EvilNeo/PixivUtil2

 def testPixivArtistServerError(self):
   #print '\nTesting member page'
   p = open('./test/test-server-error.html', 'r')
   page = BeautifulSoup(p.read())
   with self.assertRaises(PixivException) as ex:
     artist = PixivArtist(234753, page)
   self.assertEqual(ex.exception.errorCode, PixivException.SERVER_ERROR)
   page.decompose()
   del page

Beispiel #30

0

Datei anzeigen

 def testPixivImageServerError2(self):
     # print('\nTesting image page')
     p = open('./test/test-image-generic-error.html', 'r')
     page = BeautifulSoup(p.read())
     with self.assertRaises(PixivException) as ex:
         image = PixivImage(37882549, page)
     self.assertEqual(ex.exception.errorCode, PixivException.UNKNOWN_IMAGE_ERROR)
     page.decompose()
     del page

Beispiel #31

0

Datei anzeigen

 def testPixivArtistServerError(self):
     # print('\nTesting member page')
     p = open('./test/test-server-error.html', 'r')
     page = BeautifulSoup(p.read())
     with self.assertRaises(PixivException) as ex:
         artist = PixivArtist(234753, page)
     self.assertEqual(ex.exception.errorCode, PixivException.SERVER_ERROR)
     page.decompose()
     del page

Beispiel #32

0

Datei anzeigen

 def testPixivArtistSuspended(self):
     # print('\nTesting member page - suspended member')
     p = open('./test/test-member-suspended.htm', 'r')
     page = BeautifulSoup(p.read())
     with self.assertRaises(PixivException) as ex:
         PixivArtist(123, page)
     self.assertEqual(ex.exception.errorCode, 1002)
     page.decompose()
     del page

Beispiel #33

0

Datei anzeigen

Datei: test.PixivModel.py Projekt: yife/PixivUtil2

 def testPixivImageServerError(self):
     # print '\nTesting image page'
     p = open("./test/test-server-error.html", "r")
     page = BeautifulSoup(p.read())
     with self.assertRaises(PixivException) as ex:
         image = PixivImage(9138317, page)
     self.assertEqual(ex.exception.errorCode, PixivException.SERVER_ERROR)
     page.decompose()
     del page

Beispiel #34

0

Datei anzeigen

Datei: test.PixivModel.py Projekt: EvilNeo/PixivUtil2

 def testPixivImageServerError2(self):
   #print '\nTesting image page'
   p = open('./test/test-image-generic-error.html', 'r')
   page = BeautifulSoup(p.read())
   with self.assertRaises(PixivException) as ex:
     image = PixivImage(37882549, page)
   self.assertEqual(ex.exception.errorCode, PixivException.SERVER_ERROR)
   page.decompose()
   del page

Beispiel #35

0

Datei anzeigen

Datei: test.PixivModel.py Projekt: EvilNeo/PixivUtil2

 def testPixivArtistSuspended(self):
   #print '\nTesting member page - suspended member'
   p = open('./test/test-member-suspended.htm', 'r')
   page = BeautifulSoup(p.read())
   with self.assertRaises(PixivException) as ex:
       PixivArtist(123, page)
   self.assertEqual(ex.exception.errorCode, 1002)
   page.decompose()
   del page

Beispiel #36

0

Datei anzeigen

 def testPixivImageServerError(self):
     # print('\nTesting image page')
     p = open('./test/test-server-error.html', 'r')
     page = BeautifulSoup(p.read())
     with self.assertRaises(PixivException) as ex:
         image = PixivImage(9138317, page)
     self.assertEqual(ex.exception.errorCode, PixivException.SERVER_ERROR)
     page.decompose()
     del page

Beispiel #37

0

Datei anzeigen

Datei: test.PixivModel.py Projekt: EvilNeo/PixivUtil2

 def testPixivImageUgoira(self):
   #print '\nTesting image page'
   p = open('./test/test-image-ugoira.htm', 'r')
   page = BeautifulSoup(p.read())
   image = PixivImage(46281014, page)
   urls = image.ParseImages(page)
   print image.imageUrls
   self.assertTrue(image.imageUrls[0].find(".zip") > -1)
   page.decompose()
   del page

Beispiel #38

0

Datei anzeigen

 def testPixivImageUgoira(self):
     # print('\nTesting image page')
     p = open('./test/test-image-ugoira.htm', 'r')
     page = BeautifulSoup(p.read())
     image = PixivImage(46281014, page)
     urls = image.ParseImages(page)
     # print(image.imageUrls)
     self.assertTrue(image.imageUrls[0].find(".zip") > -1)
     page.decompose()
     del page

Beispiel #39

0

Datei anzeigen

Datei: test_PixivModel.py Projekt: Nandaka/PixivUtil2

    def testPixivImageModeManga(self):
        # print('\nTesting image page - manga')
        p = open('./test/test-image-manga.htm', 'r')
        page = BeautifulSoup(p.read())
        image = PixivImage(28820443, page)
        page.decompose()
        del page

        self.assertNotEqual(image, None)
        self.assertEqual(image.imageId, 28820443)
        self.assertEqual(image.imageMode, 'manga')

Beispiel #40

0

Datei anzeigen

    def testPixivImageModeManga(self):
        # print('\nTesting image page - manga')
        p = open('./test/test-image-manga.htm', 'r')
        page = BeautifulSoup(p.read())
        image = PixivImage(28820443, page)
        page.decompose()
        del page

        self.assertNotEqual(image, None)
        self.assertEqual(image.imageId, 28820443)
        self.assertEqual(image.imageMode, 'manga')

Beispiel #41

0

Datei anzeigen

    def testPixivImageRateCount(self):
        p = open('./test/test-image-rate_count.htm', 'r')
        page = BeautifulSoup(p.read())
        image = PixivImage(28865189, page)
        page.decompose()
        del page

        self.assertNotEqual(image, None)
        self.assertEqual(image.imageId, 28865189)
        self.assertEqual(image.imageMode, 'manga')
        self.assertTrue(image.jd_rtv > 0)
        self.assertTrue(image.jd_rtc > 0)

Beispiel #42

0

Datei anzeigen

Datei: test_PixivModel.py Projekt: Nandaka/PixivUtil2

    def testPixivImageRateCount(self):
        p = open('./test/test-image-rate_count.htm', 'r')
        page = BeautifulSoup(p.read())
        image = PixivImage(28865189, page)
        page.decompose()
        del page

        self.assertNotEqual(image, None)
        self.assertEqual(image.imageId, 28865189)
        self.assertEqual(image.imageMode, 'manga')
        self.assertTrue(image.jd_rtv > 0)
        self.assertTrue(image.jd_rtc > 0)

Beispiel #43

0

Datei anzeigen

Datei: test.PixivHelper.py Projekt: Wuji2000/PixivUtil2

    def testCreateFilenameUnicode(self):
        p = open('./test/test-image-unicode.htm', 'r')
        page = BeautifulSoup(p.read())
        imageInfo = PixivImage(2493913, page)
        page.decompose()
        del page

        nameFormat = '%member_token% (%member_id%)\%urlFilename% %works_date_only% %works_res% %works_tools% %title%'
        expected = unicode(u'balzehn (267014)\\2493913 12-23-2008 852x1200 Photoshop SAI つけペン アラクネのいる日常２.jpg')
        result = PixivHelper.makeFilename(nameFormat, imageInfo, artistInfo=None, tagsSeparator=' ', fileUrl='http://i2.pixiv.net/img16/img/balzehn/2493913.jpg')
        # print(result)
        self.assertEqual(result, expected)

Beispiel #44

0

Datei anzeigen

Datei: test.PixivHelper.py Projekt: MokoJumbie/PixivUtil2

  def testCreateFilenameUnicode(self):
    p = open('./test/test-image-unicode.htm', 'r')
    page = BeautifulSoup(p.read())
    imageInfo = PixivImage(2493913, page)
    page.decompose()
    del page

    nameFormat = '%member_token% (%member_id%)\%urlFilename% %works_date_only% %works_res% %works_tools% %title% - %tags%'
    expected = unicode(u'balzehn (267014)\\2493913 12-23-2008 852x1200 Photoshop SAI つけペン アラクネのいる日常２ - R-18 これは萌える アラクネ ツンデレ ピロートークの上手さに定評のある兄弟 モンスター娘 モン娘のいる日常シリーズ 人外 魔物娘 魔界全土喝采.jpg')                       
    result = PixivHelper.makeFilename(nameFormat, imageInfo, artistInfo=None, tagsSeparator=' ', fileUrl='http://i2.pixiv.net/img16/img/balzehn/2493913.jpg')
    ##print result    
    self.assertEqual(result, expected)

Beispiel #45

0

Datei anzeigen

Datei: crawlMarket.py Projekt: bhenne/Android-Market-Crawler

    def crawlAppsForCategory(self, url, cat, cat2):
        pageIndex = 0
        curl = url % (cat, pageIndex, self.pageIncrements)
        twice = False

        while True:
            try:
                #print curl
                request = urllib2.Request(curl)
                request.add_header("User-Agent", "PermissionCrawler")
                handle = urllib2.build_opener()
                content = handle.open(request).read()
                soup = BeautifulSoup(content)

                print " crawling next %d entries starting with #%d" % (self.pageIncrements, pageIndex+1)
                appURLS = self.extractAppUrls(soup)
                duplicates = self.extractPermissionsIntoDB(appURLS, cat, cat2)

                if len(duplicates) == 0:
                    pageIndex+=self.pageIncrements
                # if we got first full repetition of page 1, go back one page and move on slowly until second full repetition
                elif ((len(duplicates) == self.pageIncrements) or (len(duplicates) >= myThreshold)) and (twice == False):
                    print >> sys.stderr, "  ! %d duplicate entries on last iteration" % len(duplicates)
                    pageIndex = max(pageIndex-self.pageIncrements, 0)
                    twice = True
                    duplicates = set()
                elif twice == True:
                    pageIndex+=1
                # resorting of top n apps may produce 1 or 2 duplicates - ignore low number of duplicates
                else:
                    pageIndex+=self.pageIncrements

                curl = url % (cat, pageIndex, self.pageIncrements)

                soup.decompose()

                if TERMAPP == True:
                    connection.close()
                    sys.exit()

                if ((len(duplicates) == self.pageIncrements) or (len(duplicates) >= myThreshold)) and (twice == True):
                    print >> sys.stderr, "INFO: stopped crawling categrory %s due to %s duplicates at last iteration twice" % (cat, len(duplicates))
                    return False

            except urllib2.HTTPError, error:
                if error.code == 404:
                    print >> sys.stderr, "404 ERROR: %s -> %s" % (error, error.url)
                if error.code == 403:
                    print >> sys.stderr, "403 (NO MORE APP PAGES FOR THIS CATEGORY)ERROR: %s -> %s" % (error, error.url)
                else:
                    print >> sys.stderr, "ERROR: %s" % error
                break

Beispiel #46

0

Datei anzeigen

Datei: PixivBrowserFactory.py Projekt: Nandaka/PixivUtil2

    def getSearchTagPage(self, tags,
                         current_page,
                         wild_card=True,
                         title_caption=False,
                         start_date=None,
                         end_date=None,
                         member_id=None,
                         oldest_first=False,
                         start_page=1):
        response = None
        result = None
        url = ''

        if member_id is not None:
            # from member id search by tags
            (artist, response) = self.getMemberPage(member_id, current_page, False, tags)

            # convert to PixivTags
            result = PixivModelWhiteCube.PixivTags()
            result.parseMemberTags(artist, member_id, tags)
        else:
            # search by tags
            url = PixivHelper.generateSearchTagUrl(tags, current_page,
                                                   title_caption,
                                                   wild_card,
                                                   oldest_first,
                                                   start_date,
                                                   end_date,
                                                   member_id,
                                                   self._config.r18mode)

            PixivHelper.print_and_log('info', 'Looping... for ' + url)
            # response = self.open(url).read()
            response = self.getPixivPage(url, returnParsed=False).read()
            self.handleDebugTagSearchPage(response, url)

            parse_search_page = BeautifulSoup(response)

            result = PixivModel.PixivTags()
            if member_id is not None:
                result.parseMemberTags(parse_search_page, member_id, tags)
            else:
                try:
                    result.parseTags(parse_search_page, tags)
                except BaseException:
                    PixivHelper.dumpHtml("Dump for SearchTags " + tags + ".html", response)
                    raise

            parse_search_page.decompose()
            del parse_search_page

        return (result, response)

Beispiel #47

0

Datei anzeigen

    def getSearchTagPage(self, tags,
                         current_page,
                         wild_card=True,
                         title_caption=False,
                         start_date=None,
                         end_date=None,
                         member_id=None,
                         oldest_first=False,
                         start_page=1):
        response = None
        result = None
        url = ''

        if member_id is not None:
            # from member id search by tags
            (artist, response) = self.getMemberPage(member_id, current_page, False, tags)

            # convert to PixivTags
            result = PixivModelWhiteCube.PixivTags()
            result.parseMemberTags(artist, member_id, tags)
        else:
            # search by tags
            url = PixivHelper.generateSearchTagUrl(tags, current_page,
                                                   title_caption,
                                                   wild_card,
                                                   oldest_first,
                                                   start_date,
                                                   end_date,
                                                   member_id,
                                                   self._config.r18mode)

            PixivHelper.print_and_log('info', 'Looping... for ' + url)
            # response = self.open(url).read()
            response = self.getPixivPage(url, returnParsed=False).read()
            self.handleDebugTagSearchPage(response, url)

            parse_search_page = BeautifulSoup(response)

            result = PixivModel.PixivTags()
            if member_id is not None:
                result.parseMemberTags(parse_search_page, member_id, tags)
            else:
                try:
                    result.parseTags(parse_search_page, tags)
                except BaseException:
                    PixivHelper.dumpHtml("Dump for SearchTags " + tags + ".html", response)
                    raise

            parse_search_page.decompose()
            del parse_search_page

        return (result, response)

Beispiel #48

0

Datei anzeigen

def parseJs(page):
    parsed = BeautifulSoup(page.decode("utf8"))
    jss = parsed.find('meta', attrs={'id': 'meta-preload-data'})

    # cleanup
    parsed.decompose()
    del parsed

    if jss is None or len(jss["content"]) == 0:
        return None  # Possibly error page

    payload = demjson.decode(jss["content"])
    return payload

Beispiel #49

0

Datei anzeigen

Datei: test.PixivModel.py Projekt: RoxasShadow/PixivUtil2

 def testPixivArtistProfileDataSrc(self):
   #print '\nTesting member page ProfileDataSrc'
   p = open('./test/test-helper-avatar-name.htm', 'r')
   page = BeautifulSoup(p.read())
   try:
     artist = PixivArtist(1107124, page)
   except PixivModelException as ex:
     print ex
   page.decompose()
   del page
   self.assertNotEqual(artist, None)
   self.assertEqual(artist.artistId, 1107124)
   self.assertEqual(artist.artistToken, 'kirabara29')

Beispiel #50

0

Datei anzeigen

Datei: test_PixivModel.py Projekt: Nandaka/PixivUtil2

    def testPixivImageUnicode(self):
        # print('\nTesting image page - big')
        p = open('./test/test-image-unicode.htm', 'r')
        page = BeautifulSoup(p.read())
        image = PixivImage(2493913, page)
        page.decompose()
        del page

        self.assertNotEqual(image, None)
        self.assertEqual(image.imageId, 2493913)
        self.assertEqual(image.imageMode, 'big')
        self.assertEqual(image.worksDate, '12/23/08 12:01')
        self.assertEqual(image.worksResolution, '852x1200')

Beispiel #51

0

Datei anzeigen

Datei: test.PixivModel.py Projekt: Hakus/PixivUtil2

 def testPixivArtistPage(self):
   #print '\nTesting member page'
   p = open('./test/test.htm', 'r')
   page = BeautifulSoup(p.read())
   try:
     artist = PixivArtist(363073, page)
     artist.PrintInfo()
   except PixivModelException as ex:
     print ex
   page.decompose()
   del page
   self.assertNotEqual(artist, None)
   self.assertEqual(artist.artistId, 363073)

Beispiel #52

0

Datei anzeigen

    def testPixivArtistNoAvatar(self):
        # print('\nTesting member page without avatar image')
        p = open('./test/test-member-noavatar.htm', 'r')
        artist = None
        page = BeautifulSoup(p.read())
        artist = PixivArtist(26357, page)

        page.decompose()
        del page
        self.assertNotEqual(artist, None)
        self.assertEqual(artist.artistId, 26357)
        self.assertEqual(artist.artistToken, 'yukimaruko')
        self.assertTrue(artist.artistAvatar.find("no_profile.png") > 0)

Beispiel #53

0

Datei anzeigen

 def testPixivArtistBookmark(self):
   #print '\nTesting member page'
   p = open('./test/test-member-bookmark.htm', 'r')
   page = BeautifulSoup(p.read())
   try:
     artist = PixivArtist(3281699, page)
     #artist.PrintInfo()
   except PixivException as ex:
     print ex
   page.decompose()
   del page
   self.assertNotEqual(artist, None)
   self.assertEqual(artist.artistId, 3281699)

Beispiel #54

0

Datei anzeigen

 def testPixivArtistProfileDataSrc(self):
   #print '\nTesting member page ProfileDataSrc'
   p = open('./test/test-helper-avatar-name.htm', 'r')
   page = BeautifulSoup(p.read())
   try:
     artist = PixivArtist(1107124, page)
   except PixivException as ex:
     print ex
   page.decompose()
   del page
   self.assertNotEqual(artist, None)
   self.assertEqual(artist.artistId, 1107124)
   self.assertEqual(artist.artistToken, 'kirabara29')

Beispiel #55

0

Datei anzeigen

    def testPixivImageUnicode(self):
        # print('\nTesting image page - big')
        p = open('./test/test-image-unicode.htm', 'r')
        page = BeautifulSoup(p.read())
        image = PixivImage(2493913, page)
        page.decompose()
        del page

        self.assertNotEqual(image, None)
        self.assertEqual(image.imageId, 2493913)
        self.assertEqual(image.imageMode, 'big')
        self.assertEqual(image.worksDate, '12/23/08 12:01')
        self.assertEqual(image.worksResolution, '852x1200')

Beispiel #56

0

Datei anzeigen

    def testPixivImageParseNoTags(self):
        p = open('./test/test-image-no_tags.htm', 'r')
        page = BeautifulSoup(p.read())
        image = PixivImage(9175987, page)
        page.decompose()
        del page

        self.assertNotEqual(image, None)
        self.assertEqual(image.imageId, 9175987)
        self.assertEqual(image.worksDate, '03/05/10 18:04')
        self.assertEqual(image.worksResolution, '1155x768')
        # self.assertEqual(image.worksTools, u'SAI')
        self.assertEqual(image.imageTags, [])

Beispiel #57

0

Datei anzeigen

 def testPixivArtistNoAvatar(self):
   #print '\nTesting member page without avatar image'
   p = open('./test/test-member-noavatar.htm', 'r')
   page = BeautifulSoup(p.read())
   try:
     artist = PixivArtist(26357, page)
     #artist.PrintInfo()
   except PixivException as ex:
     print ex
   page.decompose()
   del page
   self.assertNotEqual(artist, None)
   self.assertEqual(artist.artistId, 26357)
   self.assertEqual(artist.artistToken, 'yukimaruko')

Beispiel #58

0

Datei anzeigen

 def testPixivImageModeManga(self):
     # print '\nTesting image page - manga'
     p = open('./test/test-image-manga.htm', 'r')
     page = BeautifulSoup(p.read())
     try:
         image = PixivImage(28820443, page)
         # image.PrintInfo()
     except PixivException as ex:
         print ex
     page.decompose()
     del page
     self.assertNotEqual(image, None)
     self.assertEqual(image.imageId, 28820443)
     self.assertEqual(image.imageMode, 'manga')

Beispiel #59

0

Datei anzeigen

 def testPixivImageNoAvatar(self):
     # print('\nTesting artist page without avatar image')
     p = open('./test/test-image-noavatar.htm', 'r')
     page = BeautifulSoup(p.read())
     image = PixivImage(20496355, page)
     page.decompose()
     del page
     # self.assertNotEqual(image, None)
     self.assertEqual(image.artist.artistToken, 'iymt')
     self.assertEqual(image.imageId, 20496355)
     # 07/22/2011 03:09｜512×600｜RETAS STUDIO&nbsp;
     # print(image.worksDate, image.worksResolution, image.worksTools)
     self.assertEqual(image.worksDate, '07/21/11 18:09')
     self.assertEqual(image.worksResolution, '512x600')