Exemple #1
0
def fix_url(url, session=None, cw=None):
    print_ = get_print(cw)
    if '&manga_name=' not in url:
        return url
    print_('fix url')
    qs = query_url(url)
    name = qs['manga_name'][0].replace('+', ' ')
    url_search = urljoin(url, '/bbs/search.php')
    url_search = update_url_query(url_search, {'stx': [name]})
    print(url_search)
    html = read_html(url_search, session=session)
    soup = Soup(html)
    posts = soup.findAll('div', class_='post-row')
    print_(('posts:').format(len(posts)))
    if len(posts) != 1:
        return url
    for a in posts[0].findAll('a'):
        href = urljoin(url, a.attrs['href'])
        if 'manga_detail' in href:
            break
    else:
        raise Exception('Failed to find link')

    if cw is not None:
        cw.gal_num = href
    return href
 def fix_url(cls, url):  # 2033
     if not re.match('https?://.+', url, re.IGNORECASE):
         url = 'https://www.youtube.com/watch?v={}'.format(url)
     qs = query_url(url)
     if 'v' in qs:
         url = url.split('?')[0] + '?v={}'.format(qs['v'][0])
     return url
Exemple #3
0
 def get_types(self):
     return set() # legacy; #2653
     types = set()
     for t in query_url(self.url).get('type', []):
         t = t.lower()
         types.add(t)
     return types
    def read(self):
        qs = query_url(self.url)
        for key in qs:
            if key.lower() in ('file', 'filename'):
                name = qs[key][(-1)]
                break
        else:
            name = os.path.basename(self.url)
            for esc in ['?', '#']:
                name = name.split(esc)[0]

        ext = get_ext(name)
        if not ext:
            try:
                ext = downloader.get_ext(self.url)
            except:
                ext = ''
        name = os.path.splitext(name)[0]

        self.urls.append(self.url)
        
        id_ = md5(self.url.encode('utf8')).hexdigest()[:8]
        tail = ' ({}){}'.format(id_, ext)
        filename = clean_title(name, n=-len(tail)) + tail
        
        self.filenames[self.url] = filename
        
        self.title = filename
Exemple #5
0
 def read(self):
     if '/post/' in self.url:
         raise errors.Invalid(
             tr_('개별 다운로드는 지원하지 않습니다: {}').format(self.url))
     self._popular = 'search-Popular.' in self.url
     self.title = clean_title(self.name)
     qs = query_url(self.url)
     q = qs['q'][0]
     for id in get_ids_multi(q, self._popular, self.cw):
         img = Image(id, self.url)
         self.urls.append(img.url)
 def id(self):
     if self.type_sankaku == 'www':
         id = u'[www] ' + self.soup.find('h1', class_='entry-title').text.strip()
     else:
         qs = query_url(self.url)
         tags = qs.get('tags', [])
         tags.sort()
         id = u' '.join(tags)
         if not id:
             id = u'N/A'
         id = '[{}] '.format(self.type_sankaku) + id
     return clean_title(id)
Exemple #7
0
def get_tags(url):
    url = clean_url(url)
    qs = query_url(url)
    if 'page=favorites' in url:
        id = qs.get('id', ['N/A'])[0]
        id = u'fav_{}'.format(id)
    else:
        tags = qs.get('tags', [])
        tags.sort()
        id = u' '.join(tags)
    if not id:
        id = u'N/A'
    return id
Exemple #8
0
def get_page(url):
    qs = query_url(url)
    page = qs.get('p')
    if page:
        page = int(page[0])
    else:
        page = re.findall('_p([0-9]+)', url)
        if page:
            page = int(page[0])
        else:
            page = None
    if page == 1:
        page = None
    return page
Exemple #9
0
def get_id(url):
    if '/dashboard/blog/' in url:
        url = re.find('/dashboard/blog/([0-9a-zA-Z_-]+)', url)
    if '/login_required/' in url:
        url = url.split('/login_required/')[1].split('?')[0].split('/')[0]
    if 'tumblr.com/blog/view/' in url:
        url = url.split('tumblr.com/blog/view/')[1]
    if 'tumblr.com' in url:
        if 'www.tumblr.com' in url:
            qs = query_url(url)
            url = qs.get('url', [url])[0]
        url = url.split('.tumblr.com')[0].split('/')[(-1)]
    if url == 'www':
        raise Exception('no id')
    return url
Exemple #10
0
 def fix_url(cls, url):
     if 'pornhub_gif_' in url:
         url = 'https://www.pornhub.com/gif/{}'.format(
             url.replace('pornhub_gif_', ''))
     elif 'pornhub_album_' in url:
         url = 'https://www.pornhub.com/album/{}'.format(
             url.replace('pornhub_album_', ''))
     elif 'pornhub_' in url:
         url = 'https://www.pornhub.com/view_video.php?viewkey={}'\
                    .format(url.replace('pornhub_', ''))
     if '/authenticate/goToLoggedIn' in url:
         qs = utils.query_url(url)
         url = urljoin(url, qs['url'][0])
     url = url.replace('pornhubthbh7ap3u.onion', 'pornhub.com')
     return url
 def id(self):
     if self.type_sankaku == 'www':
         id = '[www] ' + self.soup.find('h1', class_='entry-title').text.strip()
     else:
         if '/post/show/' in self.url:
             id = get_id(self.url)
         else:
             qs = query_url(self.url)
             tags = qs.get('tags', [])
             tags.sort()
             id = ' '.join(tags)
             if not id:
                 id = 'N/A'
         id = '[{}] {}'.format(self.type_sankaku, id)
     return clean_title(id)
    def read(self):
        cw = self.cw
        title = self.url
        if self.url.startswith('magnet:'):
            qs = utils.query_url(self.url)
            if 'dn' in qs:
                self._dn = qs['dn'][0]
        info = getattr(cw, 'info?', None)
        if info is not None:
            self.print_('cached info')
            self._info = info
        if self._info is None:
            try:
                self._info = torrent.get_info(self.url, cw, timeout=TIMEOUT, callback=self.callback)
                if CACHE_INFO:
                    setattr(cw, 'info?', self._info)
            except Exception as e:
                self.update_pause()
                if not cw.paused:
                    raise errors.Invalid('Faild to read metadata: {}'.format(self.url), fail=True)
        if self._info is None:
            cw.paused = True
        if cw.paused:
            return
        hash_ = self._info.hash.hex()
        self.print_('v2: {}'.format(self._info.v2))
        self.print_('Hash: {}'.format(hash_))
        if not self._info.v2:
            self.url = 'magnet:?xt=urn:btih:{}'.format(hash_)#
        date = datetime.fromtimestamp(self._info.creation_date())
        date = date.strftime('%y-%m-%d %H:%M:%S')
        self.print_('Created on: {}'.format(date))
        self.print_('Total size: {}'.format(fs.size(self._info.total_size())))
        self.print_('Pieces: {} x {}'.format(self._info.num_pieces(), fs.size(self._info.piece_length())))
        self.print_('Creator: {}'.format(self._info.creator()))
        self.print_('Comment: {}'.format(self._info.comment()))
        cw.setTotalFileSize(self._info.total_size())
        
        cw.imgs.clear()
        cw.dones.clear()

        self.urls = [self.url]
        self.title = self.name
        self.update_files()
        
        cw.pbar.show()
    def read(self):
        qs = query_url(self.url)
        for key in qs:
            if key.lower() in ('file', 'filename'):
                name = qs[key][(-1)]
                break
        else:
            name = os.path.basename(self.url)
            for esc in ['?', '#']:
                name = name.split(esc)[0]

        if not get_ext(name):
            name += downloader.get_ext(self.url)

        self.urls.append(self.url)
        self.filenames[self.url] = clean_title(name)

        self.title = name
 def get_dn(cls, url):
     if url.startswith('magnet:'):
         qs = utils.query_url(url)
         if 'dn' in qs:
             return utils.html_unescape(qs['dn'][0])
Exemple #15
0
def get_info(url, cw=None, depth=0):
    print_ = get_print(cw)
    api = PixivAPI()
    info = {}
    imgs = []

    if utils.ui_setting:
        ugoira_ext = [None, '.gif', '.webp',
                      '.png'][utils.ui_setting.ugoira_convert.currentIndex()]
    else:
        ugoira_ext = None
    if utils.ui_setting:
        format_ = compatstr(utils.ui_setting.pixivFormat.currentText())
    else:
        format_ = 'id_ppage'

    max_pid = get_max_range(cw)

    if api.illust_id(url):  # Single post
        id_ = api.illust_id(url)
        data = api.illust(id_)
        login = '******' not in data
        if FORCE_LOGIN and not login:  #
            raise errors.LoginRequired()
        if data['xRestrict'] and not login:
            raise errors.LoginRequired('R-18')
        info['artist'] = data['userName']
        info['artist_id'] = data['userId']
        info['raw_title'] = data['illustTitle']
        info['title'] = '{} (pixiv_illust_{})'.format(info['raw_title'], id_)
        info['create_date'] = parse_time(data['createDate'])
        tags_illust = set(tag['tag'] for tag in data['tags']['tags'])

        if tags_matched(tags_illust, cw):
            if data['illustType'] == 2:  # ugoira
                data = api.ugoira_meta(id_)
                ugoira = {
                    'ext': ugoira_ext,
                    'delay': [frame['delay'] for frame in data['frames']],
                }
                img = Image(data['originalSrc'],
                            url,
                            id_,
                            0,
                            format_,
                            info,
                            cw,
                            ugoira=ugoira)
                imgs.append(img)
            else:
                data = api.pages(id_)
                for img in data:
                    img = Image(img['urls']['original'], url, id_, len(imgs),
                                format_, info, cw)
                    imgs.append(img)
        else:
            print('tags mismatched')
    elif '/bookmarks/' in url or 'bookmark.php' in url:  # User bookmarks
        id_ = api.user_id(url)
        if id_ is None:  #
            id_ = my_id()
        process_user(id_, info, api)
        info['title'] = '{} (pixiv_bmk_{})'.format(info['artist'],
                                                   info['artist_id'])
        ids = []
        ids_set = set()
        offset = 0
        while len(ids) < max_pid:
            data = api.bookmarks(id_, offset)
            c = 0
            for id in [work['id'] for work in data['works']]:
                if id in ids_set:
                    continue
                ids_set.add(id)
                ids.append(id)
                c += 1
            if not c:
                break
            offset += LIMIT
            if depth == 0:
                check_alive(cw)
        process_ids(ids[:max_pid], info, imgs, cw, depth)
    elif '/tags/' in url or 'search.php' in url:  # Search
        q = unquote(
            re.find(r'/tags/([^/]+)', url)
            or re.find('[?&]word=([^&]*)', url, err='no tags'))
        info['title'] = '{} (pixiv_search_{})'.format(q, q.replace(' ', '+'))
        qs = query_url(url)
        order = qs.get('order', ['date_d'])[0]
        mode = qs.get('mode', ['all'])[0]
        ids = []
        ids_set = set()
        p = 1
        while len(ids) < max_pid:
            data = api.search(q, order, mode, p=p)
            c = 0
            for id in [
                    illust['id'] for illust in data['illustManga']['data']
                    if 'id' in illust
            ]:
                if id in ids_set:
                    continue
                ids_set.add(id)
                ids.append(id)
                c += 1
            if not c:
                break
            p += 1
        process_ids(ids[:max_pid], info, imgs, cw, depth)
    elif 'bookmark_new_illust.php' in url or 'bookmark_new_illust_r18.php' in url:  # Newest works: Following
        r18 = 'bookmark_new_illust_r18.php' in url
        id_ = my_id()
        process_user(id_, info, api)
        info['title'] = '{} (pixiv_following_{}{})'.format(
            info['artist'], 'r18_' if r18 else '', info['artist_id'])
        ids = []
        ids_set = set()
        p = 1
        while len(ids) < max_pid:
            c = 0
            for id in api.following(p, r18=r18):
                if id in ids_set:
                    continue
                ids_set.add(id)
                ids.append(id)
                c += 1
            if not c:
                break
            p += 1
        process_ids(ids[:max_pid], info, imgs, cw, depth)
    elif api.user_id(url):  # User illusts
        id_ = api.user_id(url)
        process_user(id_, info, api)
        data = api.profile(id_)
        info['title'] = '{} (pixiv_{})'.format(info['artist'],
                                               info['artist_id'])
        ids = []
        for illusts in [data['illusts'], data['manga']]:
            if not illusts:
                continue
            ids += list(illusts.keys())
        ids = sorted(ids, key=int, reverse=True)
        process_ids(ids[:max_pid], info, imgs, cw, depth)
    else:
        raise NotImplementedError()
    info['imgs'] = imgs[:max_pid]

    return info
Exemple #16
0
    def read(self):
        type = self.pixiv_type
        cw = self.customWidget
        print_ = cw.print_
        ui_setting = self.ui_setting

        if type == 'following':
            raise NotImplementedError('following')

        self._format = [None, 'gif', 'webp',
                        'png'][ui_setting.ugoira_convert.currentIndex()]
        self._format_name = compatstr(ui_setting.pixivFormat.currentText())
        types = [t.lower() for t in query_url(self.url).get('type', [])]
        if types:
            s = (u', ').join(sorted(types))
            types = set(types)
        else:
            s = 'all'
            types = None
        print_((u'Type: {}').format(s))
        print_((u'info: {}').format(self.info))
        api = self.api
        query = self.id.replace('_bmk', '').replace('_illust', '').replace(
            'pixiv_', '').replace('search_', '')
        if type != 'search':
            query = int(query)
        print('pixiv_query:', query)
        try:
            if type in ('user', 'bookmark', 'search'):
                max_pid = get_max_range(cw, 2000)
                if ui_setting.groupBox_tag.isChecked():
                    tags = [
                        compatstr(ui_setting.tagList.item(i).text())
                        for i in range(ui_setting.tagList.count())
                    ]
                else:
                    tags = []
                if type == 'search':
                    query = query.replace('+', ' ')
                    name = query
                else:
                    id = self.id.replace('_bmk', '').replace('pixiv_',
                                                             '').replace(
                                                                 'search_', '')
                    print('name', id)
                    name = get_name(id, self.api, cw=cw)
                    cw.artist = name
                title = u'{} ({})'.format(name, self.id)
                print_(title)
                dir = os.path.join(get_outdir('pixiv'), clean_title(title))
                imgs = get_imgs(query,
                                type=type,
                                api=api,
                                n=max_pid,
                                tags=tags,
                                types=types,
                                format=self._format,
                                format_name=self._format_name,
                                dir=dir,
                                cw=cw,
                                title=title,
                                info=self.info)
            elif type == 'illust':
                for try_ in range(N_TRY):
                    try:
                        detail = api.illust_detail(query, req_auth=True)
                        error = detail.get('error')
                        if error:
                            raise PixivError(error)
                        break
                    except PixivError as e:
                        api = e.api
                        print_(e)
                        if try_ < N_TRY - 1:
                            print_('retry...')
                        sleep(SLEEP)
                else:
                    raise

                illust = detail.illust
                name = illust.title
                title = (u'{} ({})').format(name, self.id)
                dir = os.path.join(get_outdir('pixiv'), clean_title(title))
                imgs = get_imgs_from_illust(illust,
                                            api=api,
                                            format=self._format,
                                            dir=dir,
                                            cw=cw,
                                            format_name=self._format_name)
        except PixivError as e:
            msg = (u'PixivError: {}').format(e.message)
            return self.Invalid(msg)

        self.imgs = imgs
        for img in imgs:
            self.urls.append(img.url)
            self.filenames[img.url] = img.filename

        self.title = clean_title(title)  # 1390
Exemple #17
0
def route_search():

    # Get query
    text_query = flask.request.args.get('q', None)
    page_num = flask.request.args.get('page', 1)
    show_syntax = flask.request.args.get('show_syntax', 0)
    has_more_items = False
    expires = 0

    # Fail if bad parameters provided
    try:
        page_num = int(page_num)
        show_syntax = bool(int(show_syntax))
    except ValueError:
        flask.abort(400)

    # Search page
    if text_query is not None:
        query = Search.query(text_query,
                             page_num,
                             offset=(page_num - 1) *
                             app.config['ITEMS_PER_PAGE'],
                             count=app.config['ITEMS_PER_PAGE'] + 1)
        items = query.all()
        if len(items) == app.config['ITEMS_PER_PAGE'] + 1:
            items = items[:-1]
            has_more_items = True
        query_title = (text_query if len(text_query) > 0 else 'HN Firehose')
        meta_og_title = u'hnapp search: "%s"' % query_title
        title = u'%s – hnapp' % query_title

    # Front page
    else:
        query = None
        items = None
        title = u'hnapp – Hacker News Search With RSS & JSON Feeds'
        meta_og_title = u'hnapp – Hacker News RSS'

    # Meta SEO tags
    meta_keywords = u'Hacker News,RSS,hnapp'
    meta_description = u'Get only the stories and comments you want. Follow users, keywords, jobs, mentions of your product, etc.'

    # Get format
    if flask.request.path == '/':
        output_format = None
        html_template = 'search.html'
    else:
        if text_query is None:
            flask.abort(400)
        output_format = flask.request.path[1:]
        if output_format == 'bare':
            html_template = 'parts/items.html'

    # Those users who created their filters on alpha version new.hnapp.com
    # don't expect comments, so we fix it for them
    if flask.request.args.get('legacy', 0) == '1':
        # Not a search – redirect to home page
        if text_query is None:
            return flask.redirect(query_url(None, output_format=None),
                                  code=302)
        else:
            return flask.redirect(query_url('type:story ' + text_query,
                                            output_format=output_format),
                                  code=302)

    # Web page or bare HTML
    if output_format in (None, 'bare'):
        page_data = {
            'is_app':
            True,
            'title':
            title,
            'meta_og_title':
            meta_og_title,
            'meta_keywords':
            meta_keywords,
            'meta_description':
            meta_description,
            'show_syntax':
            show_syntax,
            'query':
            text_query,
            'items':
            items,
            'has_more_items':
            int(has_more_items),
            'page_expires_at':
            int(time.time()) + 60 * 5,  # Cache pages for this many seconds
            'this_url':
            flask.request.url,
            'prev_url':
            query_url(text_query, page_num=page_num -
                      1) if query is not None else None,
            'next_url':
            query_url(text_query, page_num=page_num +
                      1) if query is not None else None,
            'rss_url':
            query_url(text_query, output_format='rss')
            if query is not None else None,
            'json_url':
            query_url(text_query, output_format='json')
            if query is not None else None,
            'page_num':
            page_num,
            'ga_id':
            app.config['GA_ID'],
            'HOST_NAME':
            app.config['HOST_NAME']
        }
        return flask.render_template(html_template, **page_data)

    # RSS
    elif output_format == 'rss':
        feed = AtomFeed(title=title.encode('ascii', 'xmlcharrefreplace'),
                        title_type='html',
                        feed_url=query_url(text_query, output_format='rss'),
                        author='via hnapp',
                        url=query_url(text_query),
                        generator=('hnapp', app.config['HOST_NAME'], None))
        for item in items:
            feed.add(**item.feed_entry())
        return feed.get_response()

    # JSON
    elif output_format == 'json':

        feed = {}
        feed['has_more_items'] = has_more_items
        feed['query'] = text_query
        feed['items'] = [item.json_entry() for item in items]
        return flask.jsonify(**feed)

    # output_format defined but query is not
    # not supposed to happen, we checked for this above
    else:
        flask.abort(500)
Exemple #18
0
def route_search():

	# Get query
	text_query = flask.request.args.get('q', None)
	page_num = flask.request.args.get('page', 1);
	show_syntax = flask.request.args.get('show_syntax', 0)
	has_more_items = False
	expires = 0

	# Fail if bad parameters provided
	try:
		page_num = int(page_num)
		show_syntax = bool(int(show_syntax))
	except ValueError:
		flask.abort(400)

	# Search page
	if text_query is not None:
		query = Search.query(text_query,
							 page_num,
							 offset=(page_num-1)*app.config['ITEMS_PER_PAGE'],
							 count=app.config['ITEMS_PER_PAGE']+1
							 )
		items = query.all()
		if len(items) == app.config['ITEMS_PER_PAGE']+1:
			items = items[:-1]
			has_more_items = True
		query_title = (text_query if len(text_query) > 0 else 'HN Firehose')
		meta_og_title = u'hnapp search: "%s"' % query_title
		title = u'%s – hnapp' % query_title

	# Front page
	else:
		query = None
		items = None
		title = u'hnapp – Hacker News Search With RSS & JSON Feeds'
		meta_og_title = u'hnapp – Hacker News RSS'


	# Meta SEO tags
	meta_keywords = u'Hacker News,RSS,hnapp'
	meta_description = u'Get only the stories and comments you want. Follow users, keywords, jobs, mentions of your product, etc.'


	# Get format
	if flask.request.path == '/':
		output_format = None
		html_template = 'search.html'
	else:
		if text_query is None:
			flask.abort(400)
		output_format = flask.request.path[1:]
		if output_format == 'bare':
			html_template = 'parts/items.html'


	# Those users who created their filters on alpha version new.hnapp.com
	# don't expect comments, so we fix it for them
	if flask.request.args.get('legacy', 0) == '1':
		# Not a search – redirect to home page
		if text_query is None:
			return flask.redirect(query_url(None, output_format=None), code=302)
		else:
			return flask.redirect(query_url('type:story '+text_query, output_format=output_format), code=302)


	# Web page or bare HTML
	if output_format in (None, 'bare'):
		page_data = {
			'is_app': True,
			'title': title,
			'meta_og_title': meta_og_title,
			'meta_keywords': meta_keywords,
			'meta_description': meta_description,
			'show_syntax': show_syntax,
			'query': text_query,
			'items': items,
			'has_more_items': int(has_more_items),
			'page_expires_at': int(time.time()) + 60*5, # Cache pages for this many seconds
			'this_url': flask.request.url,
			'prev_url': query_url(text_query, page_num=page_num-1) if query is not None else None,
			'next_url': query_url(text_query, page_num=page_num+1) if query is not None else None,
			'rss_url': query_url(text_query, output_format='rss') if query is not None else None,
			'json_url': query_url(text_query, output_format='json') if query is not None else None,
			'page_num': page_num,
			'ga_id': app.config['GA_ID'],
			'HOST_NAME': app.config['HOST_NAME']
			}
		return flask.render_template(html_template, **page_data)


	# RSS
	elif output_format == 'rss':
		feed = AtomFeed(title=title.encode('ascii', 'xmlcharrefreplace'),
						title_type='html',
						feed_url=query_url(text_query, output_format='rss'),
						author='via hnapp',
						url=query_url(text_query),
						generator=('hnapp', app.config['HOST_NAME'], None)
						)
		for item in items:
			feed.add(**item.feed_entry())
		return feed.get_response()


	# JSON
	elif output_format == 'json':

		feed = {}
		feed['has_more_items'] = has_more_items
		feed['query'] = text_query
		feed['items'] = [item.json_entry() for item in items]
		return flask.jsonify(**feed)


	# output_format defined but query is not
	# not supposed to happen, we checked for this above
	else:
		flask.abort(500)
Exemple #19
0
    def init(self):
        self.url = clean_url(self.url)
        url = self.url

        # Determine the type
        if 'bookmark.php?type=user' in url or url.startswith(headers['following']):
            type = 'following'
        elif 'bookmark.php' in url or url.startswith(headers['bookmark']) or '/bookmarks/' in url:
            type = 'bookmark'
        elif 'illust_id=' in url or url.startswith(headers['illust']) or '/artworks/' in url:
            type = 'illust'
        elif 'search.php' in url or url.startswith(headers['search']):
            type = 'search'
            order = query_url(url).get('order', ['date_d'])[0] # data_d, date, popular_d, popular_male_d, popular_female_d
            scd = query_url(url).get('scd', [None])[0] # 2019-09-27
            ecd = query_url(url).get('ecd', [None])[0] # 2019-09-28
            blt = query_url(url).get('blt', [None])[0] # 5000
            bgt = query_url(url).get('bgt', [None])[0] # 9999
            type_ = query_url(url).get('type', [None])[0] # None (all), illust, manga, ugoira
            self.info = {'order': order, 
               'scd': scd, 
               'ecd': ecd, 
               'blt': blt, 
               'bgt': bgt, 
               'type': type_}
        elif '/tags/' in url:
            type = 'search'
            order = query_url(url).get('order', ['date_d'])[0]
            scd = query_url(url).get('scd', [None])[0]
            ecd = query_url(url).get('ecd', [None])[0]
            blt = query_url(url).get('blt', [None])[0]
            bgt = query_url(url).get('bgt', [None])[0]
            type_ = query_url(url).get('type', [None])[0] # None (all), illust, manga, ugoira
            if type_ is None:
                try:
                    type_ = url.split('/tags/')[1].split('/')[1]
                except IndexError:
                    type_ = None
                type_ = {'illustrations': 'illust'}.get(type_, type_)
            self.info = {'order': order, 
               'scd': scd, 
               'ecd': ecd, 
               'blt': blt, 
               'bgt': bgt, 
               'type': type_}
        elif 'id=' in url and 'mode=' not in url or url.startswith(headers['user']) or 'pixiv.me' in url or '/users/' in url:
            type = 'user'
        else:
            self.Invalid((u'[pixiv] Can not determine type: {}').format(url))
            return 'stop'
        header = headers[type]
        if 'pixiv.net' in url or 'pixiv.me' in url:
            if not url.startswith('http://') and not url.startswith('https://'):
                url = u'https://' + url
            self.url = url
        else:
            url = url.replace('bmk_', '').replace('illust_', '').replace('pixiv_', '').replace('search_', '')
            if type == 'user':
                url = 'https://www.pixiv.net/member_illust.php?id={}'.format(url)
            elif type == 'bookmark':
                url = 'https://www.pixiv.net/bookmark.php?id={}'.format(url)
            elif type == 'illust':
                url = 'https://www.pixiv.net/member_illust.php?mode=medium&illust_id={}'.format(url)
            elif type == 'search':
                url = 'https://www.pixiv.net/search.php?s_mode=s_tag&word={}'.format(url)
                url = clean_url(url)
            else:
                self.Invalid('{}{}: ???'.format(header, url))
                return 'stop'
            self.url = url
        self.print_('PIXIV_TYPE: {}'.format(type))
        self.pixiv_type = type
        try:
            self.api = pixiv_auth.get_api()
            if 'error' in self.api.user_detail(11):
                self.api = pixiv_auth.get_api(force=True)
        except Exception as e:
            self.print_(print_error(e)[0])
            self.Invalid(tr_('로그인 실패: {}{}\n[옵션 - 설정 - 픽시브 설정 - 로그인] 에서 설정해주세요.').format(header, url))
            return 'stop'
 def fix_url(cls, url):  # 2033
     qs = query_url(url)
     if 'v' in qs:
         url = url.split('?')[0] + '?v={}'.format(qs['v'][0])
     return url
Exemple #21
0
 def name(self):
     qs = query_url(self.url)
     name = qs['q'][0]
     if self._popular:
         name += ' - Popular'
     return name
def get_info(url, cw=None, depth=0, tags_add=None):
    print_ = get_print(cw)
    api = PixivAPI()
    info = {}
    imgs = []

    ugoira_ext = [None, '.gif', '.webp', '.png'
                  ][utils.ui_setting.ugoira_convert.currentIndex(
                  )] if utils.ui_setting else None
    format_ = compatstr(utils.ui_setting.pixivFormat.currentText()
                        ) if utils.ui_setting else 'id_ppage'

    max_pid = get_max_range(cw)

    if api.illust_id(url):  # Single post
        id_ = api.illust_id(url)
        data = api.illust(id_)
        login = '******' not in data
        if FORCE_LOGIN and not login:  #
            raise errors.LoginRequired()
        if data['xRestrict'] and not login:
            raise errors.LoginRequired('R-18')
        info['artist'] = data['userName']
        info['artist_id'] = data['userId']
        info['raw_title'] = data['illustTitle']
        info['title'] = '{} (pixiv_illust_{})'.format(info['raw_title'], id_)
        info['create_date'] = parse_time(data['createDate'])
        tags_illust = set(tag['tag'] for tag in data['tags']['tags'])

        if tags_matched(tags_illust, tags_add, cw):
            if data['illustType'] == 2:  # ugoira
                data = api.ugoira_meta(id_)
                ugoira = {
                    'ext': ugoira_ext,
                    'delay': [frame['delay'] for frame in data['frames']],
                }
                img = Image(data['originalSrc'],
                            url,
                            id_,
                            0,
                            format_,
                            info,
                            cw,
                            ugoira=ugoira)
                imgs.append(img)
            else:
                data = api.pages(id_)
                for img in data:
                    img = Image(img['urls']['original'], url, id_, len(imgs),
                                format_, info, cw)
                    imgs.append(img)
        else:
            print('tags mismatched')
    elif '/bookmarks/' in url or 'bookmark.php' in url:  # User bookmarks
        id_ = api.user_id(url)
        if id_ is None:  #
            id_ = my_id()
        if id_ == my_id():
            rests = ['show', 'hide']
        else:
            rests = ['show']
        process_user(id_, info, api)
        info['title'] = '{} (pixiv_bmk_{})'.format(info['artist'],
                                                   info['artist_id'])
        ids = []
        ids_set = set()
        for rest in rests:
            offset = 0
            while len(ids) < max_pid:
                data = api.bookmarks(id_, offset, rest=rest)
                c = 0
                for id in [work['id'] for work in data['works']]:
                    if id in ids_set:
                        continue
                    ids_set.add(id)
                    ids.append(id)
                    c += 1
                if not c:
                    break
                offset += LIMIT
                if depth == 0:
                    check_alive(cw)
        process_ids(ids, info, imgs, cw, depth)
    elif '/tags/' in url or 'search.php' in url:  # Search
        q = unquote(
            re.find(r'/tags/([^/]+)', url)
            or re.find('[?&]word=([^&]*)', url, err='no tags'))
        info['title'] = '{} (pixiv_search_{})'.format(q, q.replace(' ', '+'))
        qs = query_url(url)
        order = qs.get('order', ['date_d'])[0]
        mode = qs.get('mode', ['all'])[0]
        s_mode = qs.get('s_mode', ['s_tag_full'])[0]
        scd = qs.get('scd', [None])[0]
        ecd = qs.get('ecd', [None])[0]
        type_ = qs.get('type', ['all'])[0]
        wlt = qs.get('wlt', [None])[0]
        wgt = qs.get('wgt', [None])[0]
        hlt = qs.get('hlt', [None])[0]
        hgt = qs.get('hgt', [None])[0]
        blt = qs.get('blt', [None])[0]
        bgt = qs.get('bgt', [None])[0]
        ratio = qs.get('ratio', [None])[0]
        tool = qs.get('tool', [None])[0]
        logs = [
            'order: {}'.format(order),
            'mode: {}'.format(mode),
            's_mode: {}'.format(s_mode),
            'scd / ecd: {} / {}'.format(scd, ecd),
            'type: {}'.format(type_),
            'wlt /  wgt: {} / {}'.format(wlt, wgt),
            'hlt / hgt: {} / {}'.format(hlt, hgt),
            'blt / bgt: {} / {}'.format(blt, bgt),
            'ratio: {}'.format(ratio),
            'tool: {}'.format(tool),
        ]
        print_('\n'.join(logs))
        ids = []
        ids_set = set()
        p = 1
        while len(ids) < max_pid:
            data = api.search(q,
                              order,
                              mode,
                              p=p,
                              s_mode=s_mode,
                              scd=scd,
                              ecd=ecd,
                              type_=type_,
                              wlt=wlt,
                              wgt=wgt,
                              hlt=hlt,
                              hgt=hgt,
                              blt=blt,
                              bgt=bgt,
                              ratio=ratio,
                              tool=tool)
            c = 0
            for id in [
                    illust['id'] for illust in data['illustManga']['data']
                    if 'id' in illust
            ]:
                if id in ids_set:
                    continue
                ids_set.add(id)
                ids.append(id)
                c += 1
            if not c:
                break
            p += 1
        process_ids(ids, info, imgs, cw, depth)
    elif 'bookmark_new_illust.php' in url or 'bookmark_new_illust_r18.php' in url:  # Newest works: Following
        r18 = 'bookmark_new_illust_r18.php' in url
        id_ = my_id()
        process_user(id_, info, api)
        info['title'] = '{} (pixiv_following_{}{})'.format(
            info['artist'], 'r18_' if r18 else '', info['artist_id'])
        ids = []
        ids_set = set()
        p = 1
        while len(ids) < max_pid:
            data = api.following(p, r18=r18)
            c = 0
            for id in data['page']['ids']:
                if id in ids_set:
                    continue
                ids_set.add(id)
                ids.append(id)
                c += 1
            if not c:
                break
            p += 1
        process_ids(ids, info, imgs, cw, depth)
    elif api.user_id(url):  # User illusts
        m = re.search(r'/users/[0-9]+/([\w]+)/?([^\?#/]*)', url)
        type_ = {
            'illustrations': 'illusts',
            'manga': 'manga'
        }.get(m and m.groups()[0])
        if type_:
            types = [type_]
        else:
            types = ['illusts', 'manga']
        if m:
            tag = unquote(m.groups()[1]) or None
        else:
            tag = None
        print_('types: {}, tag: {}'.format(types, tag))

        id_ = api.user_id(url)
        process_user(id_, info, api)
        data = api.profile(id_)
        info['title'] = '{} (pixiv_{})'.format(info['artist'],
                                               info['artist_id'])

        ids = []
        for type_ in types:
            illusts = data[type_]
            if not illusts:
                continue
            ids += list(illusts.keys())
        ids = sorted(ids, key=int, reverse=True)
        if not ids:
            raise Exception('no imgs')
        process_ids(ids,
                    info,
                    imgs,
                    cw,
                    depth,
                    tags_add=[tag] if tag else None)
    else:
        raise NotImplementedError()
    info['imgs'] = imgs[:max_pid]

    return info