def name(self): info = self.info if '/detail/' in self.url: title = u'{} (bcy_{}) - {}'.format(clean_title(info['artist']), info['uid'], info['id']) else: title = u'{} (bcy_{})'.format(clean_title(info['artist']), info['uid']) return title
def __init__(self, illust, url, ugoira_data=None, format_name=None): self.illust = illust self.id = illust.id self.type = illust.type self.title = illust.title self.artist = illust.user.name self.url = url ps = re.findall('_p([0-9]+)', url) p = ps[(-1)] if ps else 0 self.p = p self.ext = os.path.splitext(self.url.split('?')[0].split('#')[0])[1] if self.type == 'ugoira': self.ugoira_data = ugoira_data if format_name: name = format_name.replace('id', '###id*').replace( 'page', '###page*').replace('artist', '###artist*').replace( 'title', '###title*') name = name.replace('###id*', str(self.id)).replace( '###page*', str(self.p)).replace('###artist*', self.artist).replace( '###title*', self.title) self.filename = clean_title( name.strip(), allow_dot=True, n=-len(self.ext)) + self.ext else: self.filename = os.path.basename(url.split('?')[0].split('#')[0])
def f(_): html = downloader.read_html(url, session=session) soup = Soup(html) box = soup.find('section', id='picBox') img = box.find('img') if img is None: raise Exception('No img') onclick = img.attrs.get('onclick', '') if onclick and '.src' in onclick: print('onclick', onclick) img = re.find('''.src *= *['"](.+?)['"]''', onclick) else: img = img.attrs['src'] img = urljoin(url, img) filename = clean_title(os.path.basename(img.split('?')[0])) name, ext = os.path.splitext(filename) # https://www.hentai-foundry.com/pictures/user/DrGraevling/74069/Eversong-Interrogation-pg.-13 if ext.lower() not in [ '.bmp', '.png', '.gif', '.jpg', '.jpeg', '.webp', '.webm', '.avi', '.mp4', '.mkv', '.wmv' ]: filename = u'{}.jpg'.format(name) self.filename = filename return img
def get(self, url): if self._url: return self._url ydl = youtube_dl.YoutubeDL() info = ydl.extract_info(url) # get best video fs = info['formats'] fs = sorted(fs, key=lambda x: int(x['width']), reverse=True) f = fs[0] url_video = f['url'] # thumb self.thumb_url = info['thumbnails'][0]['url'] self.thumb = BytesIO() downloader.download(self.thumb_url, buffer=self.thumb) # m3u8 print(f['protocol']) if 'm3u8' in f['protocol']: url_video = M3u8_stream(url_video, referer=url) # title & filename format = self.format.replace('title', '###title').replace('id', '###id') self.title = format.replace('###title', info['title']).replace( '###id', u'{}'.format(info['id'])) ext = '.mp4' self.filename = clean_title(self.title, n=-len(ext)) + ext self._url = url_video return self._url
def __init__(self, title, url): if title.startswith('NEW'): title = title.replace('NEW', '', 1).strip() title = fix_title_page(title) self.title = clean_title(title) self.url = url self.id = int(re.findall('wr_id=([0-9]+)', url)[0])
def get_title(self, url: str) -> str: if "tags=" not in url: return '[N/A]' + url.split('yande.re/')[-1] url_tags = url.split("tags=")[-1].split('+') return clean_title(" ".join(url_tags))
def __init__(self, url, url_thumb, referer, title): self.url = LazyUrl(referer, lambda x: url, self) self.url_thumb = url_thumb self.thumb = BytesIO() downloader.download(url_thumb, referer=referer, buffer=self.thumb) self.title = title ext = '.mp4' self.filename = u'{}{}'.format(clean_title(title, n=-len(ext)), ext)
def read(self): cw = self.customWidget ui_setting = self.ui_setting format = compatstr( ui_setting.youtubeFormat.currentText()).lower().strip() cw.enableSegment(1024 * 1024 // 2) thumb = BytesIO() if '/users/' in self.url: info = read_channel(self.url, cw) urls = info['urls'] p2f = get_p2f(cw) if p2f: self.single = False self.title = clean_title(info['title']) videos = [Video(url, format) for url in info['urls']] self.urls = [video.url for video in videos] video = videos[0] video.url() downloader.download(video.info['thumbnail'], buffer=thumb) self.setIcon(thumb) return else: cw.gal_num = self.url = urls.pop(0) if urls and cw.alive: s = u', '.join(urls) self.exec_queue.put((s, 'downButton(customWidget)')) elif '/photos/gallery/' in self.url: info = read_gallery(self.url, cw) for img in info['imgs']: self.urls.append(img.url) self.single = False self.title = clean_title(info['title']) self.url = info['url'] cw.disableSegment() return else: urls = [] video = Video(self.url, format) video.url() self.urls.append(video.url) downloader.download(video.info['thumbnail'], buffer=thumb) self.setIcon(thumb) self.title = video.title
def read(self): for page in get_pages(self.url, self.soup): text = Text(page, len(self.urls) + 1) self.urls.append(text.url) self.artist = self.info['artist'] self.title = clean_title('[{}] {}'.format(self.artist, self.info['title']), n=-len('[merged] .txt'))
def read(self): cw = self.customWidget info = get_imgs(self.id, cw=cw) for post in info['posts']: self.urls.append(post.url) self.title = u'{} (bdsmlr_{})'.format(clean_title(info['username']), self.id)
def __init__(self, stream, referer, id, title, url_thumb, format='title'): self.url = LazyUrl(referer, lambda x: stream, self) self.id = id format = format.replace('title', '###title').replace('id', '###id') title = format.replace('###title', title).replace('###id', (u'{}').format(self.id)) self.title = title self.filename = u'{}.mp4'.format(clean_title(title)) self.url_thumb = url_thumb self.thumb = BytesIO() downloader.download(url_thumb, buffer=self.thumb)
def read(self): cw = self.customWidget artist = get_artist(self.id, self.soup) if cw: cw.artist = artist for img in get_imgs(self.url, artist, cw=cw): self.urls.append(img.url) self.title = clean_title(artist)
def id(self): if self.type_sankaku == 'www': id = u'[www] ' + self.soup.find('h1', class_='entry-title').text.strip() else: qs = query_url(self.url) tags = qs.get('tags', []) tags.sort() id = u' '.join(tags) if not id: id = u'N/A' id = '[{}] '.format(self.type_sankaku) + id return clean_title(id)
def __init__(self, url, url_page, title, url_thumb, format='title'): self._url = url self.url = LazyUrl(url_page, self.get, self) self.id = get_id(url_page) ext = '.mp4' self.title = title = clean_title(title) format = format.replace('title', '###title').replace('id', '###id') title = format.replace('###title', title).replace('###id', (u'{}').format(self.id)) self.filename = (u'{}{}').format(title, ext) f = IO() self.url_thumb = url_thumb downloader.download(url_thumb, buffer=f) self.thumb = f
def id(self): if self._id is None: parsed_url = urlparse(self.url) qs = parse_qs(parsed_url.query) if 'page=favorites' in self.url: id = qs.get('id', ['N/A'])[0] id = u'fav_{}'.format(id) else: tags = qs.get('tags', []) tags.sort() id = u' '.join(tags) if not id: id = u'N/A' self._id = id return clean_title(self._id)
def __init__(self, url, format='title'): ydl = youtube_dl.YoutubeDL() info = ydl.extract_info(url) f = info['formats'][-1] url_video = f['url'] self.url = LazyUrl(url, lambda _: url_video, self) self.url_thumb = info['thumbnails'][0]['url'] self.thumb = BytesIO() downloader.download(self.url_thumb, buffer=self.thumb) format = format.replace('title', '###title').replace('id', '###id') self.title = format.replace('###title', info['title']).replace( '###id', '{}'.format(info['id'])) ext = get_ext(url_video) self.filename = clean_title(self.title, n=-len(ext)) + ext
def id(self): if self._id is None: parsed_url = urlparse(self.url) qs = parse_qs(parsed_url.query) if 'donmai.us/favorites' in self.url: id = qs.get('user_id', [''])[0] print('len(id) =', len(id), u'"{}"'.format(id)) assert len(id) > 0, '[Fav] User id is not specified' id = u'fav_{}'.format(id) else: tags = qs.get('tags', []) tags.sort() id = u' '.join(tags) if not id: id = u'N/A' self._id = id return clean_title(self._id)
def get(self, url): if self._url: return self._url self.info = get_info(url) title = self.info['title'] id = self.info['id'] format = self.format.replace('title', '###title*').replace('id', '###id*') self.title = format.replace('###title*', title).replace('###id*', (u'{}').format(id)) video_best = self.info['formats'][(-1)] self._url = video_best['url'] ext = os.path.splitext(self._url.split('?')[0])[1] self.filename = u'{}{}'.format(clean_title(self.title, n=-len(ext)), ext) return self._url
def get_video(url, format='title'): options = {} ydl = youtube_dl.YoutubeDL(options) info = ydl.extract_info(url) video_best = info['formats'][(-1)] video = video_best['url'] print(video) ext = os.path.splitext(video.split('?')[0])[1].lower()[1:] title = info['title'] id = info['display_id'] format = format.replace('title', '###title*').replace('id', '###id*') title = format.replace('###title*', title).replace('###id*', (u'{}').format(id)) title = clean_title(title, allow_dot=True) if ext == 'm3u8': video = M3u8_stream(video, n_thread=4) video = Video(video, url, u'{}.{}'.format(title, 'mp4')) else: video = Video(video, url, u'{}.{}'.format(title, ext)) return info, video
def __init__(self, url, page, p, img1=None): ext = os.path.splitext(url.split('?')[0])[1] if ext.lower()[1:] not in ('jpg', 'jpeg', 'bmp', 'png', 'gif', 'webm', 'webp'): ext = '.jpg' self.filename = (u'{}/{:04}{}').format(clean_title(page.title), p, ext) # img1, img, img1, s3, img1, img, img1, s3 #self._url = img1 or url #url_alter = Url_alter(url, img1) # img, s3, img1, img, img1, s3, img1, img_without_// self._url = url or img1 url_alter = Url_alter(url, img1) url_alter.c_s3 += 1 if not self._url: raise Exception('no url') self.url = LazyUrl(page.url, lambda _: self._url, self, url_alter=url_alter)
def get_name(user_id, api=None, cw=None): print_ = get_print(cw) for try_ in range(N_TRY): # 1450 try: info = api.user_detail(user_id) error = info.get('error') if error: if u'存在しない作品IDです' in error['user_message']: raise Exception(u'ID does not exist') raise PixivError(error) break except PixivError as e: api = e.api print_(e) if try_ < N_TRY - 1: print_('retry...') sleep(SLEEP) else: raise name = info.user.name name = clean_title(name) return name
def name(self): return clean_title(self.soup.find('title').text.replace('- ASIANSISTER.COM', '').strip())
def __init__(self, url, title): self.title = clean_title(title) self.url = url
def get_videos(url, cw=None, depth=0): print_ = get_print(cw) if utils.ui_setting: res_text = compatstr(utils.ui_setting.youtubeCombo_res.currentText()) res = { '720p': 720, '1080p': 1080, '2K': 1440, '4K': 2160, '8K': 4320 }[res_text] else: res = 720 mobj = re.match(_VALID_URL, url) video_id = mobj.group('id') anime_id = mobj.group('anime_id') print(video_id, anime_id) print_ = get_print(cw) html = downloader.read_html(url, methods={'requests'}) soup = Soup(html) title = soup.find('h1').attrs['title'].strip() url_thumb = soup.find('meta', {'property': 'og:image'}).attrs['content'] p = get_page(url) if p is None: p = 1 print('page:', p) if p > 1: pages = get_pages(html) cid = pages[(p - 1)]['cid'] else: cid = re.findall('\\bcid(?:["\\\']:|=)(\\d+)', html)[0] print_('cid: {}'.format(cid)) headers = {'Referer': url} entries = [] RENDITIONS = [ 'qn={}&quality={}&type='.format(qlt, qlt) for qlt in RESOLS.keys() ] # + ['quality=2&type=mp4'] for num, rendition in enumerate(RENDITIONS, start=1): print('####', num, rendition) payload = 'appkey=%s&cid=%s&otype=json&%s' % (_APP_KEY, cid, rendition) sign = hashlib.md5( (payload + _BILIBILI_KEY).encode('utf-8')).hexdigest() url_json = 'http://interface.bilibili.com/v2/playurl?%s&sign=%s' % ( payload, sign) s_json = downloader.read_html(url_json) print(s_json[:1000]) video_info = json.loads(s_json) if not video_info: continue if 'durl' not in video_info: print('#### error', num) if num < len(RENDITIONS): continue msg = video_info.get('message') if msg: raise Exception(msg) quality = video_info['quality'] resolution = get_resolution(quality) s = (u'resolution: {}').format(resolution) print_(s) # 2184 if int(re.find('([0-9]+)p', resolution)) > res: print_('skip resolution') continue for idx, durl in enumerate(video_info['durl']): # 1343 if idx == 0: size = downloader.get_size(durl['url'], referer=url) if size < 1024 * 1024 and depth == 0: print_('size is too small') return get_videos(url, cw, depth + 1) formats = [{ 'url': durl['url'], 'filesize': int_or_none(durl['size']) }] for backup_url in durl.get('backup_url', []): formats.append({ 'url': backup_url, 'preference': -2 if 'hd.mp4' in backup_url else -3 }) for a_format in formats: a_format.setdefault('http_headers', {}).update({'Referer': url}) entries.append({ 'id': '%s_part%s' % (video_id, idx), 'duration': float_or_none(durl.get('length'), 1000), 'formats': formats }) break videos = [] for entry in entries: url_video = entry['formats'][0]['url'] video = Video(url_video, url, video_id, len(videos)) videos.append(video) info = {'title': clean_title(title), 'url_thumb': url_thumb} return (videos, info)
def name(self): title = self.soup.find('h1', class_='name').span.text.strip() title = u'{} (pawoo_{})'.format(title, self.id) return clean_title(title)
def read(self): type = self.pixiv_type cw = self.customWidget print_ = cw.print_ ui_setting = self.ui_setting if type == 'following': raise NotImplementedError('following') self._format = [None, 'gif', 'webp', 'png'][ui_setting.ugoira_convert.currentIndex()] self._format_name = compatstr(ui_setting.pixivFormat.currentText()) types = [t.lower() for t in query_url(self.url).get('type', [])] if types: s = (u', ').join(sorted(types)) types = set(types) else: s = 'all' types = None print_((u'Type: {}').format(s)) print_((u'info: {}').format(self.info)) api = self.api query = self.id.replace('_bmk', '').replace('_illust', '').replace( 'pixiv_', '').replace('search_', '') if type != 'search': query = int(query) print('pixiv_query:', query) try: if type in ('user', 'bookmark', 'search'): max_pid = get_max_range(cw, 2000) if ui_setting.groupBox_tag.isChecked(): tags = [ compatstr(ui_setting.tagList.item(i).text()) for i in range(ui_setting.tagList.count()) ] else: tags = [] if type == 'search': query = query.replace('+', ' ') name = query else: id = self.id.replace('_bmk', '').replace('pixiv_', '').replace( 'search_', '') print('name', id) name = get_name(id, self.api, cw=cw) cw.artist = name title = u'{} ({})'.format(name, self.id) print_(title) dir = os.path.join(get_outdir('pixiv'), clean_title(title)) imgs = get_imgs(query, type=type, api=api, n=max_pid, tags=tags, types=types, format=self._format, format_name=self._format_name, dir=dir, cw=cw, title=title, info=self.info) elif type == 'illust': for try_ in range(N_TRY): try: detail = api.illust_detail(query, req_auth=True) error = detail.get('error') if error: raise PixivError(error) break except PixivError as e: api = e.api print_(e) if try_ < N_TRY - 1: print_('retry...') sleep(SLEEP) else: raise illust = detail.illust name = illust.title title = (u'{} ({})').format(name, self.id) dir = os.path.join(get_outdir('pixiv'), clean_title(title)) imgs = get_imgs_from_illust(illust, api=api, format=self._format, dir=dir, cw=cw, format_name=self._format_name) except PixivError as e: msg = (u'PixivError: {}').format(e.message) return self.Invalid(msg) self.imgs = imgs for img in imgs: self.urls.append(img.url) self.filenames[img.url] = img.filename self.title = clean_title(title) # 1390
def name(self): return clean_title(self.id)
def name(self): if self._name is None: self._name = clean_title(self._info.name()) return self._name
def __init__(self, url, referer, title, id): self.url = LazyUrl(referer, lambda _: url, self) ext = os.path.splitext(url.split('?')[0])[1] n = len(id) + len(ext) + 3 title = clean_title(title, n=-n) self.filename = u'{} - {}{}'.format(id, title, ext)
def __init__(self, page, p): self.page = page self.url = LazyUrl(page.url, self.get, self) self.filename = clean_title('[{:04}] {}'.format(p, page.title), n=-4) + '.txt'