def select(): if utils.ui_setting.askYoutube.isChecked(): value = utils.messageBox(tr_(u'Youtube format?'), icon=QtGui.QMessageBox.Question, buttons=[tr_(u'MP4 (동영상)'), tr_(u'MP3 (음원)')]) format = ['mp4', 'mp3'][value] return format
def get_imgs(id, title, session, type=None, cw=None): print_ = get_print(cw) if type is None: type = 'projects' referer = 'https://www.artstation.com/{}'.format(id) html = downloader.read_html(referer, session=session) print(session.cookies.keys()) datas = [] p = 1 while p < 1000: url = 'https://www.artstation.com/users/{}/{}.json?page={}'.format( id, type, p) print(url) for try_ in range(4): try: html = downloader.read_html(url, session=session, referer=referer) break except Exception as e: print(e) else: raise j = json.loads(html) data = j['data'] if not data: break datas += data if cw: if not cw.alive: return [] cw.setTitle(('{} {} - {}').format(tr_('페이지 읽는 중...'), title, len(datas))) else: print(len(datas)) p += 1 imgs = [] i = 0 while i < len(datas): data = datas[i] date = data['created_at'][2:10] post_url = data['permalink'] print('post_url', post_url) id_art = get_id_art(post_url) imgs += get_imgs_page(id_art, session, date=date, cw=cw) if cw: if not cw.alive: return [] cw.setTitle(('{} {} - {}').format(tr_('이미지 읽는 중...'), title, len(imgs))) else: print(len(imgs)) i += 1 return imgs
def f(urls): from Qt import QInputDialog n_thread, ok = QInputDialog.getInt(Downloader.mainWindow, tr_('Set number of threads'), tr_('Number of threads?'), value=DEFAULT_N_THREAD, min=1, max=4, step=1) if not ok: return return n_thread
def get_imgs(url, title, session, soup=None, cw=None): print_ = get_print(cw) if soup is None: html = downloader.read_html(url, session=session) soup = Soup(html) pages = get_pages(url, soup=soup) print_('pages: {}'.format(len(pages))) pages = page_selector.filter(pages, cw) imgs = [] for i, page in enumerate(pages): imgs_already = get_imgs_already('jmana', title, page, cw) if imgs_already: imgs += imgs_already continue imgs += get_imgs_page(page, url, session, cw) if cw is not None: if not cw.alive: return cw.setTitle((u'{} {} / {} ({} / {})').format( tr_(u'\uc77d\ub294 \uc911...'), title, page.title, i + 1, len(pages))) if not imgs: raise Exception('no imgs') return imgs
def f(url): if '/viewer/' in url: raise Exception(tr_('목록 주소를 입력해주세요')) html = read_html(url) soup = Soup(html) pages = get_pages(soup, url) return pages
def f(html, browser=None): soup = Soup(html) if is_captcha(soup): print('captcha') browser.show() sd['shown'] = True elif sd['shown']: browser.hide() sd['shown'] = False try: info['uid'] = soup.find('h2', class_='share-title').text.strip() info['nickname'] = soup.find( 'h1', class_='share-sub-title').text.strip() except Exception as e: print_(print_error(e)[0]) c = 0 ids_now = set() for div in soup.findAll('div', class_='video-feed-item'): a = div.find('a') if a is None: continue href = a['href'] if not href: continue m = re.search(PATTERN_VID, href) if m is None: continue id_video = int(m.group('id')) ids_now.add(id_video) if id_video in ids: continue ids.add(id_video) info['items'].append({'id': id_video}) c += 1 print_('items: {}'.format(len(info['items']))) if len(info['items']) >= max_pid: info['items'] = info['items'][:max_pid] return True browser.runJavaScript( 'window.scrollTo(0, document.body.scrollHeight);') sleep(15, cw) if c or (ids_now and min(ids_now) > min(ids)): sd['count_empty'] = 0 else: print_('empty') sd['count_empty'] += 1 msg = '{} {} (tiktok_{}) - {}'.format(tr_('읽는 중...'), info.get('nickname'), info.get('uid'), len(info['items'])) if cw: if not cw.alive: raise Exception('cw dead') cw.setTitle(msg) else: print(msg) return sd['count_empty'] > 4
def get_stories(url, title=None, cw=None, session=None): print_ = get_print(cw) html = downloader.read_html(url, session=session) data = get_sd(url, html=html, cw=cw) uploader_id = data['entry_data']['ProfilePage'][0]['graphql']['user']['id'] csrf_token = data['config']['csrf_token']# session.cookies.set(name='ig_pr', value='1', path='/', domain='.instagram.com') print('uploader_id:', uploader_id) variables = { 'user_id': uploader_id, 'include_chaining': True, 'include_reel': True, 'include_suggested_users': False, 'include_logged_out_extras': False, 'include_highlight_reels': True, 'include_live_status': True, } j = get_query('d4d88dc1500312af6f937f7b804c68c3', variables, session, cw) imgs = [] ids = set() data = j['data'] hs = data['user']['edge_highlight_reels'] edges = hs['edges'] edges.insert(0, str(uploader_id)) for i, edge in enumerate(edges): if isinstance(edge, str): id = edge hid = None url_str = url else: id = None hid = edge['node']['id'] url_str = 'https://www.instagram.com/stories/highlights/{}/'.format(hid) try: imgs_new = get_stories_single(url_str, id=id, cw=cw, session=session) for img in imgs_new: if img.id in ids: print('duplicate: {}'.format(img.id)) continue ids.add(img.id) imgs.append(img) print_('stories: {}'.format(hid)) except Exception as e: print_(u'Failed to get stories: {}'.format(hid)) print(e) msg = u'{} {} ({}/{})'.format(tr_(u'스토리 읽는 중...'), title, i+1, len(edges)) if cw: if not cw.alive: return cw.setTitle(msg) else: print(msg) imgs = sort_str(imgs) return imgs
def read(self): list = self.soup.find('ul', class_='list-body') if list is None: return self.Invalid(tr_('목록 주소를 입력해주세요: {}').format(self.url)) self.title = tr_('읽는 중... {}').format(self.name) self.artist = get_artist(self.soup) imgs = get_imgs(self.url, self.soup, self.session, self.customWidget) for img in imgs: if isinstance(img, Image): self.urls.append(img.url) else: self.urls.append(img) self.title = self.name
def post_processing(self): cw = self.customWidget ui_setting = self.ui_setting format = self._format if cw is not None and format is not None: try: dither = ui_setting.checkDither.isChecked() quality = ui_setting.ugoira_quality.value() except Exception as e: print(e) dither = True quality = 90 imgs_ugoira = [] for img in self.imgs: if img.url not in cw.urls: continue if img.type == 'ugoira': if os.path.splitext(img.url)[1].lower() == '.zip': imgs_ugoira.append(img) for j, img in enumerate(imgs_ugoira): if not cw.valid or not cw.alive: return self.exec_queue.put( (cw, (u'customWidget.pbar.setFormat(u"[%v/%m] {} [{}/{}]")' ).format(tr_(u'움짤 변환...'), j, len(imgs_ugoira)))) filename = os.path.join(self.dir, img.filename) out = os.path.splitext(filename)[0] + '.' + format cw.print_((u'convert ugoira: {} --> {}').format(filename, out)) try: duration = [ frame.delay for frame in img.ugoira_data.frames ] self.print_((u'Duration: {}').format(duration)) ffmpeg.gif(filename, out, duration=duration, dither=dither, quality=quality, cw=cw) except Exception as e: self.print_(print_error(e)[0]) continue if not cw.valid or not cw.alive: return try: self.removeDirList.append((filename, False)) cw.dones.add(out) i = cw.urls.index(img.url) cw.imgs[i] = out if i == 0: cw.firstImg = out cw.setIcon(out) except Exception as e: return self.Invalid(e=e) self.exec_queue.put( (cw, u'customWidget.pbar.setFormat("[%v/%m]")'))
def get_imgs(url, title, soup=None, session=None, cw=None, pages=None): if soup is None or session is None: data = get_soup(url, cw=cw) soup, session = data['soup'], data['session'] if pages is None: pages = get_pages(soup, url, cw) pages = page_selector.filter(pages, cw) imgs = [] for i, page in enumerate(pages): dir = os.path.join(get_outdir('manamoa'), title, page.title) print('test dir:', dir) if SKIP and isDoneFolder(dir, page, cw=cw): cw.print_((u'Skip: {}').format(page.title)) for p, img in enumerate(sorted(os.listdir(dir))): img = os.path.join(dir, img) imgs.append(img) continue if cw is not None: if not cw.alive: return cw.setTitle((u'{} {} / {} ({} / {})').format( tr_(u'\uc77d\ub294 \uc911...'), title, page.title, i + 1, len(pages))) imgs += get_imgs_page(page, session, cw) return imgs
def f(url): session, soup, url = get_soup(url) list = soup.find('ul', class_='list-body') if list is None: raise Exception(tr_('목록 주소를 입력해주세요')) pages = get_pages(url, soup) return pages
def get_imgs(url, title, cw=None): print_ = get_print(cw) imgs = [] for p in range(1, 1001): url = setPage(url, p) print_(url) soup = read_soup(url) view = soup.find('div', class_='photos-list') if view is None: if p == 1: raise errors.LoginRequired() else: break # Guest user for img in view.findAll('img'): img = img.attrs['data-src'] img = Image(img, url, len(imgs)) imgs.append(img) pgn = soup.find('ul', class_='pagination') ps = [getPage(a.attrs['href']) for a in pgn.findAll('a')] if pgn else [] if not ps or p >= max(ps): print('max p') break msg = '{} {} ({} / {})'.format(tr_('읽는 중...'), title, p, max(ps)) if cw: cw.setTitle(msg) else: print(msg) return imgs
def get_imgs(url, soup=None, session=None, cw=None): if soup is None: html = read_html(url, session=session, cw=cw) soup = Soup(html) title = get_title(soup) pagination = soup.find('div', class_='pagination') if pagination is None: page = Page(None, url, soup) imgs = get_imgs_page(page, session=session, cw=cw) else: pages = get_pages(url, soup, session=session) imgs = [] for i, page in enumerate(pages): s = '{} {} / {} ({} / {})'.format(tr_('읽는 중...'), title, page.title, i + 1, len(pages)) if cw: if not cw.alive: return cw.setTitle(s) else: print(s) imgs += get_imgs_page(page, session=session, cw=cw) if not imgs: raise Exception('no imgs') return imgs
def process_ids(ids, info, imgs, cw, depth=0): print_ = get_print(cw) max_pid = get_max_range(cw) for i, id_illust in enumerate(ids): try: info_illust = get_info( 'https://www.pixiv.net/en/artworks/{}'.format(id_illust), cw, depth=depth + 1) except Exception as e: if depth == 0 and (e.args and e.args[0] == '不明なエラーが発生しました' or type(e) == errors.LoginRequired ): # logout during extraction raise e print_('process_ids error ({}):\n{}'.format( depth, print_error(e)[0])) continue imgs += info_illust['imgs'] s = '{} {} - {}'.format(tr_('읽는 중...'), info['title'], len(imgs)) if cw: cw.setTitle(s) else: print(s) if len(imgs) >= max_pid: break if depth == 0: check_alive(cw)
def get_imgs(id, api, cw=None, title=None, type='board'): imgs = [] ids = set() print('get_imgs: type={}'.format(type)) if type == 'board': gen = api.board_pins(id) elif type == 'section': gen = api.board_section_pins(id) else: raise Exception((u'Type "{}" is not supported').format(type)) for img in gen: if 'images' not in img: print('skip img:', img['id']) continue img = Image(img) if img.id in ids: print('duplicate:', img.id) continue ids.add(img.id) print(img.url) print(img.filename) print imgs.append(img) if cw is not None: if not cw.alive: return [] cw.setTitle((u'{} {} ({})').format(tr_(u'\uc77d\ub294 \uc911...'), title, len(imgs))) return imgs
def get_imgs(url, soup=None, session=None, cw=None): print_ = get_print(cw) if soup is None or session is None: session, soup = get_soup(url) pages = get_pages(url, soup) pages = page_selector.filter(pages, cw) title = get_title(soup) imgs = [] for i, page in enumerate(pages): dir = os.path.join(get_outdir('manatoki'), title, page.title) print('test dir:', dir) if SKIP and size_folder(dir) > 0: print_('Skip: {}'.format(page.title)) for p, img in enumerate(sorted(os.listdir(dir))): img = os.path.join(dir, img) imgs.append(img) continue imgs_ = get_imgs_page(page, url, session, cw) imgs += imgs_ s = '{} {} / {} ({} / {})'.format(tr_('읽는 중...'), title, page.title, i + 1, len(pages)) print_('{} {}'.format(page.title, len(imgs_))) if cw is not None: if not cw.alive: return cw.setTitle(s) else: print('read page... {} ({})'.format(page.url, len(imgs))) return imgs
def get_imgs(url, title=None, cw=None): flickr_auth.get_api(title, cw) if not flickr_auth.isAuth: raise Exception('No Auth') if '/albums/' in url: user, ps = find_ps(url) handle = ps else: user = flickr_api.Person.findByUrl(url) handle = user photos = [] per_page = 500 for page in range(1, 200): photos_new = handle.getPhotos(per_page=per_page, page=page) photos += photos_new if len(photos_new) < per_page: break msg = u'{} {} - {}'.format(tr_(u'읽는 중...'), title, len(photos)) if cw: if not cw.alive: break cw.setTitle(msg) else: print(msg) imgs = [] for photo in photos: img = Image(photo) imgs.append(img) return imgs
def read(self): cw = self.customWidget title = self.get_title(self.url) ids = set() url = self.url while True: html = urlopen(url) soup = BeautifulSoup(html, "html.parser") tmp = soup.find_all(attrs={'class':'directlink'}, href=True) for image_html in tmp: image_url = image_html['href'] id_ = self.get_id(image_url) if id_ in ids: self.print_('duplicate: {}'.format(id_)) continue ids.add(id_) self.urls.append(image_url) self.filenames[image_url] = self.get_filename(image_url) if not cw.alive: break cw.setTitle('{} {} - {}'.format(tr_('읽는 중...'), title, len(self.urls))) next_page = soup.find('a', attrs={'rel':'next'}, href=True) if not next_page: break else: url = urljoin(self.url, next_page['href']) self.title = title
def read_channel(url, cw=None): print_ = get_print(cw) username = url.split('/users/')[1].split('/')[0] info = {} soup = downloader.read_soup(url) title = soup.find('div', class_='user-name').text.strip() info['title'] = u'[Channel] {}'.format(title) urls = [] urls_set = set() for p in range(1, 101): items = read_page(username, p, cw) if not items: print('no items') break for item in items: if item.find('span', class_='thumb-image-container__status-text'): #2858 continue url = item.a.attrs['href'] if url in urls_set: print('duplicate:', url) continue urls_set.add(url) urls.append(url) s = '{} {} - {}'.format(tr_('읽는 중...'), info['title'], len(urls)) if cw: cw.setTitle(s) else: print(s) info['urls'] = urls return info
def init(self): self.url = self.url.replace('lhscan.net', 'loveheaven.net') self.session = Session() #clf2.solve(self.url, session=self.session, cw=self.cw) soup = self.soup if not soup.find('ul', class_='manga-info'): self.Invalid(u'{}: {}'.format(tr_(u'목록 주소를 입력해주세요'), self.url))
def get_imgs(url, title, soup=None, session=None, cw=None): print_ = get_print(cw) if soup is None or session is None: session, soup, url = get_soup(url, session) pages = get_pages(url, soup) pages = page_selector.filter(pages, cw) imgs = [] for i, page in enumerate(pages): imgs_already = get_imgs_already('manatoki', title, page, cw) if imgs_already: imgs += imgs_already continue imgs_ = get_imgs_page(page, title, url, session, cw) imgs += imgs_ s = '{} {} / {} ({} / {})'.format(tr_('읽는 중...'), title, page.title, i + 1, len(pages)) print_('{} {}'.format(page.title, len(imgs_))) if cw is not None: if not cw.alive: return cw.setTitle(s) else: print('read page... {} ({})'.format(page.url, len(imgs))) return imgs
def get_imgs(url, soup=None, cw=None): if soup is None: html = downloader.read_html(url) soup = Soup(hrml) title = get_title(soup, cw) pages = get_pages(url, soup) pages = page_selector.filter(pages, cw) imgs = [] for i, page in enumerate(pages): imgs_already = get_imgs_already('comicwalker', title, page, cw) if imgs_already: imgs += imgs_already continue if cw is not None: if not cw.alive: return cw.setTitle(u'{} {} / {} ({} / {})'.format( tr_(u'읽는 중...'), title, page.title, i + 1, len(pages))) imgs += get_imgs_page(page) return imgs
def get_imgs(url, title=None, cw=None): url = clean_url(url) if 's=view' in url and 'page=favorites' not in url: raise NotImplementedError('Not Implemented') tags = get_tags(url) tags = quote(tags, safe='/') tags = tags.replace('%20', '+') url = 'https://gelbooru.com/index.php?page=post&s=list&tags={}'.format( tags) print_ = get_print(cw) # Range max_pid = get_max_range(cw, 2000) imgs = [] ids = set() count_no_imgs = 0 for p in range(500): #1017 url = setPage(url, len(ids)) print_(url) html = downloader.read_html(url) soup = Soup(html) posts = soup.findAll('div', class_='thumbnail-preview') imgs_new = [] for post in posts: id_ = int(re.find('[0-9]+', post.find('a')['id'], err='no id')) if id_ in ids: print('duplicate:', id_) continue ids.add(id_) url_img = urljoin(url, post.find('a')['href']) img = Image(id_, url_img) imgs_new.append(img) if imgs_new: imgs += imgs_new count_no_imgs = 0 else: print('no imgs') count_no_imgs += 1 if count_no_imgs > 1: print('break') break if len(imgs) >= max_pid: break if cw is not None: if not cw.alive: break cw.setTitle(u'{} {} - {}'.format(tr_(u'읽는 중...'), title, len(imgs))) if not imgs: raise Exception('no imgs') return imgs
def init(self): if u'bdsmlr.com/post/' in self.url: raise errors.Invalid( tr_(u'개별 다운로드는 지원하지 않습니다: {}').format(self.url)) self.url = 'https://{}.bdsmlr.com'.format(self.id_) self.session = Session() clf2.solve(self.url, session=self.session, cw=self.cw)
def get_imgs(url, title=None, customWidget=None): url = clean_url(url) if 's=view' in url and 'page=favorites' not in url: raise NotImplementedError('Not Implemented') if 'page=dapi' not in url.lower(): tags = get_tags(url).replace(' ', '+') url = "https://gelbooru.com/index.php?page=dapi&s=post&q=index&tags={}&pid={}&limit={}".format( tags, 0, LIMIT) if customWidget is not None: print_ = customWidget.print_ else: def print_(*values): sys.stdout.writelines(values + ('\n', )) # Range if customWidget is not None: range_pid = customWidget.range else: range_pid = None if range_pid is not None: max_pid = max(parse_range(range_pid, max=100000)) else: max_pid = 2000 imgs = [] url_imgs = set() for p in range(100): url = setPage(url, p) #print_(url) html = downloader.read_html(url) soup = BeautifulSoup(html, 'html.parser') posts = soup.findAll('post') if not posts: break for post in posts: url_img = post.attrs['file_url'] if url_img not in url_imgs: url_imgs.add(url_img) id = post.attrs['id'] img = Image(id, url_img) imgs.append(img) if len(imgs) >= max_pid: break if len(imgs) >= max_pid: break if customWidget is not None and not customWidget.alive: break if customWidget is not None: customWidget.exec_queue.put( (customWidget, u"customWidget.setTitle(u'{} {} - {}')".format( tr_(u'읽는 중...'), title, len(imgs)))) return imgs
def init(self): if '/viewer/' in self.url: return self.Invalid(tr_('목록 주소를 입력해주세요: {}').format(self.url)) if '/view/' not in self.url and not self.url.lower().startswith( 'http'): self.url = 'http://webtoon.daum.net/webtoon/view/{}'.format( self.url) self.session = None self._info = get_info(self.url, self.session)
def read(self): self.title = tr_(u'읽는 중... {}').format(self.name) imgs = get_imgs(self.url, self.name, self.session, self.soup, self.cw) for img in imgs: self.urls.append(img.url) self.title = self.name
def f(url): if 'board.php' in url: raise Exception( tr_(u'\ubaa9\ub85d \uc8fc\uc18c\ub97c \uc785\ub825\ud574\uc8fc\uc138\uc694' )) data = get_soup(url) soup, session = data['soup'], data['session'] pages = get_pages(soup, url) return pages
def init(self): self.url = self.url.replace('bdsmlr_', '') if u'bdsmlr.com/post/' in self.url: return self.Invalid(tr_(u'개별 다운로드는 지원하지 않습니다: {}').format( self.url), fail=False) self.url = ('https://{}.bdsmlr.com').format(self.id)
def init(self): self.url = self.url.replace('bdsmlr_', '') if u'bdsmlr.com/post/' in self.url: return self.Invalid(tr_(u'개별 다운로드는 지원하지 않습니다: {}').format(self.url), fail=False) self.url = 'https://{}.bdsmlr.com'.format(self.id_) self.session = Session() clf2.solve(self.url, session=self.session, cw=self.customWidget)