Esempio n. 1
0
 def print_character_pinyin(self, characters):
     pinyin = CHARACTER_PINYIN_MAPPING.get(characters, '')
     pinyins = []
     pinyin_nums = []
     if not pinyin:
         pinyin = Pinyin().get_pinyin(characters, ' ', tone_marks='marks')
         pinyin_num = Pinyin().get_pinyin(characters,
                                          ' ',
                                          tone_marks='numbers')
         pinyins = pinyin.split(' ')
         pinyin_nums = pinyin_num.split(' ')
     else:
         for p in pinyin.split(' '):
             r = decode_pinyin(p)
             pinyins.append(r)
     print(f'''"{characters}": "{''.join(pinyin_nums)}",''')
     return pinyins
Esempio n. 2
0
    def download(self):
        global condition
        condition = ""
        # 定义爬取信息
        pinyin = Pinyin().get_pinyin(self.keyword)
        pinyinList = pinyin.split("-")
        keywordPinyin = "".join(pinyinList)
        URLToScrap = "https://818ps.com/muban/0-0-0-0-%s-null-52_24_0_0-0-0-0.html?route_id=&route=&after_route=" % str(
            keywordPinyin)
        URLData = urllib.request.urlopen(URLToScrap).read().decode(
            "utf-8", "ignore")
        imageExp = "/pic/(.*?).html"
        imageURLList = re.compile(imageExp).findall(URLData)
        imageURLList = list(set(imageURLList))

        # 处理信息填写及错误
        if len(imageURLList) == 0:
            print("没有搜索到相关海报")
            condition = "img:none"
        elif self.count > len(imageURLList):
            print("数量过多\n最多%s张" % len(imageURLList))
            condition = "img:overflow"
        elif not self.infoIsCorrect():
            print('下载信息填写错误!')
            condition = "info:error"
            print(infoIsCorrect(printReason=True))
        else:
            # 网站请求错误处理
            try:
                # 重复下载需要的图片
                imageCount = self.count
                if self.count == 0:  # 规则:如果数量为0,下载全部
                    imageCount = len(imageURLList)

                for imgSerial in range(imageCount):
                    imgNumber = str(imageURLList[imgSerial])
                    detailImgURL = "https://818ps.com/detail/" + imgNumber + ".html"
                    detailData = urllib.request.urlopen(
                        detailImgURL).read().decode("utf-8", "ignore")
                    detailImageExp = 'https://img.tuguaishou.com/ips_templ_preview/(.*?)"'
                    detailImgURLArg = re.compile(detailImageExp).findall(
                        detailData)

                    for i in range(len(detailImgURLArg)):
                        finalImageURL = "https://img.tuguaishou.com/ips_templ_preview/" + str(
                            detailImgURLArg[i])
                        finalImgFileID = str(imgSerial) + '-' + str(imgNumber)
                        print("正在下载:编号为%s的图片..." % finalImgFileID)

                        # 保存图片
                        file = self.path + str(
                            self.keyword
                        ) + "-" + finalImgFileID + "." + self.suffix
                        if self.isTest == False:
                            self.images.append(file)
                            urllib.request.urlretrieve(finalImageURL,
                                                       filename=file)
                            print("下载完成!目录:", file)
                        else:
                            print("测试完成!测试伪目录:", file)
            # 错误处理
            except urllib.error.URLError as e:
                if hasattr(e, 'code'):
                    print(e.code)
                if hasattr(e, "reason"):
                    print(e.reason)