def print_character_pinyin(self, characters): pinyin = CHARACTER_PINYIN_MAPPING.get(characters, '') pinyins = [] pinyin_nums = [] if not pinyin: pinyin = Pinyin().get_pinyin(characters, ' ', tone_marks='marks') pinyin_num = Pinyin().get_pinyin(characters, ' ', tone_marks='numbers') pinyins = pinyin.split(' ') pinyin_nums = pinyin_num.split(' ') else: for p in pinyin.split(' '): r = decode_pinyin(p) pinyins.append(r) print(f'''"{characters}": "{''.join(pinyin_nums)}",''') return pinyins
def download(self): global condition condition = "" # 定义爬取信息 pinyin = Pinyin().get_pinyin(self.keyword) pinyinList = pinyin.split("-") keywordPinyin = "".join(pinyinList) URLToScrap = "https://818ps.com/muban/0-0-0-0-%s-null-52_24_0_0-0-0-0.html?route_id=&route=&after_route=" % str( keywordPinyin) URLData = urllib.request.urlopen(URLToScrap).read().decode( "utf-8", "ignore") imageExp = "/pic/(.*?).html" imageURLList = re.compile(imageExp).findall(URLData) imageURLList = list(set(imageURLList)) # 处理信息填写及错误 if len(imageURLList) == 0: print("没有搜索到相关海报") condition = "img:none" elif self.count > len(imageURLList): print("数量过多\n最多%s张" % len(imageURLList)) condition = "img:overflow" elif not self.infoIsCorrect(): print('下载信息填写错误!') condition = "info:error" print(infoIsCorrect(printReason=True)) else: # 网站请求错误处理 try: # 重复下载需要的图片 imageCount = self.count if self.count == 0: # 规则:如果数量为0,下载全部 imageCount = len(imageURLList) for imgSerial in range(imageCount): imgNumber = str(imageURLList[imgSerial]) detailImgURL = "https://818ps.com/detail/" + imgNumber + ".html" detailData = urllib.request.urlopen( detailImgURL).read().decode("utf-8", "ignore") detailImageExp = 'https://img.tuguaishou.com/ips_templ_preview/(.*?)"' detailImgURLArg = re.compile(detailImageExp).findall( detailData) for i in range(len(detailImgURLArg)): finalImageURL = "https://img.tuguaishou.com/ips_templ_preview/" + str( detailImgURLArg[i]) finalImgFileID = str(imgSerial) + '-' + str(imgNumber) print("正在下载:编号为%s的图片..." % finalImgFileID) # 保存图片 file = self.path + str( self.keyword ) + "-" + finalImgFileID + "." + self.suffix if self.isTest == False: self.images.append(file) urllib.request.urlretrieve(finalImageURL, filename=file) print("下载完成!目录:", file) else: print("测试完成!测试伪目录:", file) # 错误处理 except urllib.error.URLError as e: if hasattr(e, 'code'): print(e.code) if hasattr(e, "reason"): print(e.reason)