Python clean_title Exemples, fucking_encoding.clean_title Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : bcy_downloader.py Projet : zerox20m/Hitomi-Downloader

 def name(self):
     info = self.info
     if '/detail/' in self.url:
         title = u'{} (bcy_{}) - {}'.format(clean_title(info['artist']),
                                            info['uid'], info['id'])
     else:
         title = u'{} (bcy_{})'.format(clean_title(info['artist']),
                                       info['uid'])
     return title

Exemple #2

0

Afficher le fichier

 def __init__(self, illust, url, ugoira_data=None, format_name=None):
     self.illust = illust
     self.id = illust.id
     self.type = illust.type
     self.title = illust.title
     self.artist = illust.user.name
     self.url = url
     ps = re.findall('_p([0-9]+)', url)
     p = ps[(-1)] if ps else 0
     self.p = p
     self.ext = os.path.splitext(self.url.split('?')[0].split('#')[0])[1]
     if self.type == 'ugoira':
         self.ugoira_data = ugoira_data
     if format_name:
         name = format_name.replace('id', '###id*').replace(
             'page', '###page*').replace('artist', '###artist*').replace(
                 'title', '###title*')
         name = name.replace('###id*', str(self.id)).replace(
             '###page*', str(self.p)).replace('###artist*',
                                              self.artist).replace(
                                                  '###title*', self.title)
         self.filename = clean_title(
             name.strip(), allow_dot=True, n=-len(self.ext)) + self.ext
     else:
         self.filename = os.path.basename(url.split('?')[0].split('#')[0])

Exemple #3

0

Afficher le fichier

        def f(_):
            html = downloader.read_html(url, session=session)
            soup = Soup(html)

            box = soup.find('section', id='picBox')
            img = box.find('img')
            if img is None:
                raise Exception('No img')

            onclick = img.attrs.get('onclick', '')
            if onclick and '.src' in onclick:
                print('onclick', onclick)
                img = re.find('''.src *= *['"](.+?)['"]''', onclick)
            else:
                img = img.attrs['src']
            img = urljoin(url, img)

            filename = clean_title(os.path.basename(img.split('?')[0]))
            name, ext = os.path.splitext(filename)

            # https://www.hentai-foundry.com/pictures/user/DrGraevling/74069/Eversong-Interrogation-pg.-13
            if ext.lower() not in [
                    '.bmp', '.png', '.gif', '.jpg', '.jpeg', '.webp', '.webm',
                    '.avi', '.mp4', '.mkv', '.wmv'
            ]:
                filename = u'{}.jpg'.format(name)

            self.filename = filename
            return img

Exemple #4

0

Afficher le fichier

    def get(self, url):
        if self._url:
            return self._url

        ydl = youtube_dl.YoutubeDL()
        info = ydl.extract_info(url)

        # get best video
        fs = info['formats']
        fs = sorted(fs, key=lambda x: int(x['width']), reverse=True)
        f = fs[0]
        url_video = f['url']

        # thumb
        self.thumb_url = info['thumbnails'][0]['url']
        self.thumb = BytesIO()
        downloader.download(self.thumb_url, buffer=self.thumb)

        # m3u8
        print(f['protocol'])
        if 'm3u8' in f['protocol']:
            url_video = M3u8_stream(url_video, referer=url)

        # title & filename
        format = self.format.replace('title',
                                     '###title').replace('id', '###id')
        self.title = format.replace('###title', info['title']).replace(
            '###id', u'{}'.format(info['id']))
        ext = '.mp4'
        self.filename = clean_title(self.title, n=-len(ext)) + ext

        self._url = url_video

        return self._url

Exemple #5

0

Afficher le fichier

 def __init__(self, title, url):
     if title.startswith('NEW'):
         title = title.replace('NEW', '', 1).strip()
     title = fix_title_page(title)
     self.title = clean_title(title)
     self.url = url
     self.id = int(re.findall('wr_id=([0-9]+)', url)[0])

Exemple #6

0

Afficher le fichier

Fichier : yandere_downloader.py Projet : rheehot/Hitomi-Downloader

    def get_title(self, url: str) -> str:
        if "tags=" not in url:
            return '[N/A]' + url.split('yande.re/')[-1]

        url_tags = url.split("tags=")[-1].split('+')

        return clean_title(" ".join(url_tags))

Exemple #7

0

Afficher le fichier

Fichier : avgle_downloader.py Projet : zerox20m/Hitomi-Downloader

 def __init__(self, url, url_thumb, referer, title):
     self.url = LazyUrl(referer, lambda x: url, self)
     self.url_thumb = url_thumb
     self.thumb = BytesIO()
     downloader.download(url_thumb, referer=referer, buffer=self.thumb)
     self.title = title
     ext = '.mp4'
     self.filename = u'{}{}'.format(clean_title(title, n=-len(ext)), ext)

Exemple #8

0

Afficher le fichier

    def read(self):
        cw = self.customWidget
        ui_setting = self.ui_setting
        format = compatstr(
            ui_setting.youtubeFormat.currentText()).lower().strip()
        cw.enableSegment(1024 * 1024 // 2)
        thumb = BytesIO()

        if '/users/' in self.url:
            info = read_channel(self.url, cw)
            urls = info['urls']
            p2f = get_p2f(cw)
            if p2f:
                self.single = False
                self.title = clean_title(info['title'])
                videos = [Video(url, format) for url in info['urls']]
                self.urls = [video.url for video in videos]
                video = videos[0]
                video.url()
                downloader.download(video.info['thumbnail'], buffer=thumb)
                self.setIcon(thumb)
                return
            else:
                cw.gal_num = self.url = urls.pop(0)
                if urls and cw.alive:
                    s = u', '.join(urls)
                    self.exec_queue.put((s, 'downButton(customWidget)'))
        elif '/photos/gallery/' in self.url:
            info = read_gallery(self.url, cw)
            for img in info['imgs']:
                self.urls.append(img.url)
            self.single = False
            self.title = clean_title(info['title'])
            self.url = info['url']
            cw.disableSegment()
            return
        else:
            urls = []
        video = Video(self.url, format)
        video.url()
        self.urls.append(video.url)

        downloader.download(video.info['thumbnail'], buffer=thumb)
        self.setIcon(thumb)
        self.title = video.title

Exemple #9

0

Afficher le fichier

    def read(self):
        for page in get_pages(self.url, self.soup):
            text = Text(page, len(self.urls) + 1)
            self.urls.append(text.url)

        self.artist = self.info['artist']
        self.title = clean_title('[{}] {}'.format(self.artist,
                                                  self.info['title']),
                                 n=-len('[merged] .txt'))

Exemple #10

0

Afficher le fichier

Fichier : bdsmlr_downloader.py Projet : zerox20m/Hitomi-Downloader

    def read(self):
        cw = self.customWidget

        info = get_imgs(self.id, cw=cw)

        for post in info['posts']:
            self.urls.append(post.url)

        self.title = u'{} (bdsmlr_{})'.format(clean_title(info['username']),
                                              self.id)

Exemple #11

0

Afficher le fichier

 def __init__(self, stream, referer, id, title, url_thumb, format='title'):
     self.url = LazyUrl(referer, lambda x: stream, self)
     self.id = id
     format = format.replace('title', '###title').replace('id', '###id')
     title = format.replace('###title',
                            title).replace('###id', (u'{}').format(self.id))
     self.title = title
     self.filename = u'{}.mp4'.format(clean_title(title))
     self.url_thumb = url_thumb
     self.thumb = BytesIO()
     downloader.download(url_thumb, buffer=self.thumb)

Exemple #12

0

Afficher le fichier

Fichier : wikiart_downloader.py Projet : rheehot/Hitomi-Downloader

    def read(self):
        cw = self.customWidget

        artist = get_artist(self.id, self.soup)
        if cw:
            cw.artist = artist

        for img in get_imgs(self.url, artist, cw=cw):
            self.urls.append(img.url)

        self.title = clean_title(artist)

Exemple #13

0

Afficher le fichier

 def id(self):
     if self.type_sankaku == 'www':
         id = u'[www] ' + self.soup.find('h1',
                                         class_='entry-title').text.strip()
     else:
         qs = query_url(self.url)
         tags = qs.get('tags', [])
         tags.sort()
         id = u' '.join(tags)
         if not id:
             id = u'N/A'
         id = '[{}] '.format(self.type_sankaku) + id
     return clean_title(id)

Exemple #14

0

Afficher le fichier

 def __init__(self, url, url_page, title, url_thumb, format='title'):
     self._url = url
     self.url = LazyUrl(url_page, self.get, self)
     self.id = get_id(url_page)
     ext = '.mp4'
     self.title = title = clean_title(title)
     format = format.replace('title', '###title').replace('id', '###id')
     title = format.replace('###title',
                            title).replace('###id', (u'{}').format(self.id))
     self.filename = (u'{}{}').format(title, ext)
     f = IO()
     self.url_thumb = url_thumb
     downloader.download(url_thumb, buffer=f)
     self.thumb = f

Exemple #15

0

Afficher le fichier

Fichier : gelbooru_downloader.py Projet : tkekem0125/Hitomi-Downloader-issues

 def id(self):
     if self._id is None:
         parsed_url = urlparse(self.url)
         qs = parse_qs(parsed_url.query)
         if 'page=favorites' in self.url:
             id = qs.get('id', ['N/A'])[0]
             id = u'fav_{}'.format(id)
         else:
             tags = qs.get('tags', [])
             tags.sort()
             id = u' '.join(tags)
         if not id:
             id = u'N/A'
         self._id = id
     return clean_title(self._id)

Exemple #16

0

Afficher le fichier

Fichier : youporn_downloader.py Projet : rheehot/Hitomi-Downloader

    def __init__(self, url, format='title'):
        ydl = youtube_dl.YoutubeDL()
        info = ydl.extract_info(url)

        f = info['formats'][-1]
        url_video = f['url']
        self.url = LazyUrl(url, lambda _: url_video, self)

        self.url_thumb = info['thumbnails'][0]['url']
        self.thumb = BytesIO()
        downloader.download(self.url_thumb, buffer=self.thumb)
        format = format.replace('title', '###title').replace('id', '###id')
        self.title = format.replace('###title', info['title']).replace(
            '###id', '{}'.format(info['id']))
        ext = get_ext(url_video)
        self.filename = clean_title(self.title, n=-len(ext)) + ext

Exemple #17

0

Afficher le fichier

Fichier : danbooru_downloader.py Projet : rheehot/Hitomi-Downloader

 def id(self):
     if self._id is None:
         parsed_url = urlparse(self.url)
         qs = parse_qs(parsed_url.query)
         if 'donmai.us/favorites' in self.url:
             id = qs.get('user_id', [''])[0]
             print('len(id) =', len(id), u'"{}"'.format(id))
             assert len(id) > 0, '[Fav] User id is not specified'
             id = u'fav_{}'.format(id)
         else:
             tags = qs.get('tags', [])
             tags.sort()
             id = u' '.join(tags)
         if not id:
             id = u'N/A'
         self._id = id
     return clean_title(self._id)

Exemple #18

0

Afficher le fichier

    def get(self, url):
        if self._url:
            return self._url
        self.info = get_info(url)

        title = self.info['title']
        id = self.info['id']

        format = self.format.replace('title',
                                     '###title*').replace('id', '###id*')
        self.title = format.replace('###title*',
                                    title).replace('###id*',
                                                   (u'{}').format(id))
        video_best = self.info['formats'][(-1)]
        self._url = video_best['url']
        ext = os.path.splitext(self._url.split('?')[0])[1]
        self.filename = u'{}{}'.format(clean_title(self.title, n=-len(ext)),
                                       ext)
        return self._url

Exemple #19

0

Afficher le fichier

Fichier : twitch_downloader.py Projet : rheehot/Hitomi-Downloader

def get_video(url, format='title'):
    options = {}
    ydl = youtube_dl.YoutubeDL(options)
    info = ydl.extract_info(url)
    video_best = info['formats'][(-1)]
    video = video_best['url']
    print(video)
    ext = os.path.splitext(video.split('?')[0])[1].lower()[1:]
    title = info['title']
    id = info['display_id']
    format = format.replace('title', '###title*').replace('id', '###id*')
    title = format.replace('###title*', title).replace('###id*',
                                                       (u'{}').format(id))
    title = clean_title(title, allow_dot=True)

    if ext == 'm3u8':
        video = M3u8_stream(video, n_thread=4)
        video = Video(video, url, u'{}.{}'.format(title, 'mp4'))
    else:
        video = Video(video, url, u'{}.{}'.format(title, ext))
    return info, video

Exemple #20

0

Afficher le fichier

    def __init__(self, url, page, p, img1=None):
        ext = os.path.splitext(url.split('?')[0])[1]
        if ext.lower()[1:] not in ('jpg', 'jpeg', 'bmp', 'png', 'gif', 'webm',
                                   'webp'):
            ext = '.jpg'
        self.filename = (u'{}/{:04}{}').format(clean_title(page.title), p, ext)

        # img1, img, img1, s3, img1, img, img1, s3
        #self._url = img1 or url
        #url_alter = Url_alter(url, img1)

        # img, s3, img1, img, img1, s3, img1, img_without_//
        self._url = url or img1
        url_alter = Url_alter(url, img1)
        url_alter.c_s3 += 1

        if not self._url:
            raise Exception('no url')

        self.url = LazyUrl(page.url,
                           lambda _: self._url,
                           self,
                           url_alter=url_alter)

Exemple #21

0

Afficher le fichier

def get_name(user_id, api=None, cw=None):
    print_ = get_print(cw)

    for try_ in range(N_TRY):  # 1450
        try:
            info = api.user_detail(user_id)
            error = info.get('error')
            if error:
                if u'存在しない作品IDです' in error['user_message']:
                    raise Exception(u'ID does not exist')
                raise PixivError(error)
            break
        except PixivError as e:
            api = e.api
            print_(e)
            if try_ < N_TRY - 1:
                print_('retry...')
            sleep(SLEEP)
    else:
        raise

    name = info.user.name
    name = clean_title(name)
    return name

Exemple #22

0

Afficher le fichier

Fichier : asiansister_downloader.py Projet : zerox20m/Hitomi-Downloader

 def name(self):
     return clean_title(self.soup.find('title').text.replace('- ASIANSISTER.COM', '').strip())

Exemple #23

0

Afficher le fichier

Fichier : pixiv_comic_downloader.py Projet : zerox20m/Hitomi-Downloader

 def __init__(self, url, title):
     self.title = clean_title(title)
     self.url = url

Exemple #24

0

Afficher le fichier

def get_videos(url, cw=None, depth=0):
    print_ = get_print(cw)
    if utils.ui_setting:
        res_text = compatstr(utils.ui_setting.youtubeCombo_res.currentText())
        res = {
            '720p': 720,
            '1080p': 1080,
            '2K': 1440,
            '4K': 2160,
            '8K': 4320
        }[res_text]
    else:
        res = 720

    mobj = re.match(_VALID_URL, url)
    video_id = mobj.group('id')
    anime_id = mobj.group('anime_id')
    print(video_id, anime_id)
    print_ = get_print(cw)
    html = downloader.read_html(url, methods={'requests'})
    soup = Soup(html)
    title = soup.find('h1').attrs['title'].strip()
    url_thumb = soup.find('meta', {'property': 'og:image'}).attrs['content']
    p = get_page(url)
    if p is None:
        p = 1
    print('page:', p)
    if p > 1:
        pages = get_pages(html)
        cid = pages[(p - 1)]['cid']
    else:
        cid = re.findall('\\bcid(?:["\\\']:|=)(\\d+)', html)[0]
    print_('cid: {}'.format(cid))
    headers = {'Referer': url}
    entries = []

    RENDITIONS = [
        'qn={}&quality={}&type='.format(qlt, qlt) for qlt in RESOLS.keys()
    ]  # + ['quality=2&type=mp4']

    for num, rendition in enumerate(RENDITIONS, start=1):
        print('####', num, rendition)
        payload = 'appkey=%s&cid=%s&otype=json&%s' % (_APP_KEY, cid, rendition)
        sign = hashlib.md5(
            (payload + _BILIBILI_KEY).encode('utf-8')).hexdigest()
        url_json = 'http://interface.bilibili.com/v2/playurl?%s&sign=%s' % (
            payload, sign)
        s_json = downloader.read_html(url_json)
        print(s_json[:1000])
        video_info = json.loads(s_json)
        if not video_info:
            continue
        if 'durl' not in video_info:
            print('#### error', num)
            if num < len(RENDITIONS):
                continue
            msg = video_info.get('message')
            if msg:
                raise Exception(msg)
        quality = video_info['quality']
        resolution = get_resolution(quality)
        s = (u'resolution: {}').format(resolution)
        print_(s)

        # 2184
        if int(re.find('([0-9]+)p', resolution)) > res:
            print_('skip resolution')
            continue

        for idx, durl in enumerate(video_info['durl']):
            # 1343
            if idx == 0:
                size = downloader.get_size(durl['url'], referer=url)
                if size < 1024 * 1024 and depth == 0:
                    print_('size is too small')
                    return get_videos(url, cw, depth + 1)

            formats = [{
                'url': durl['url'],
                'filesize': int_or_none(durl['size'])
            }]
            for backup_url in durl.get('backup_url', []):
                formats.append({
                    'url':
                    backup_url,
                    'preference':
                    -2 if 'hd.mp4' in backup_url else -3
                })

            for a_format in formats:
                a_format.setdefault('http_headers',
                                    {}).update({'Referer': url})

            entries.append({
                'id': '%s_part%s' % (video_id, idx),
                'duration': float_or_none(durl.get('length'), 1000),
                'formats': formats
            })

        break

    videos = []
    for entry in entries:
        url_video = entry['formats'][0]['url']
        video = Video(url_video, url, video_id, len(videos))
        videos.append(video)

    info = {'title': clean_title(title), 'url_thumb': url_thumb}
    return (videos, info)

Exemple #25

0

Afficher le fichier

 def name(self):
     title = self.soup.find('h1', class_='name').span.text.strip()
     title = u'{} (pawoo_{})'.format(title, self.id)
     return clean_title(title)

Exemple #26

0

Afficher le fichier

    def read(self):
        type = self.pixiv_type
        cw = self.customWidget
        print_ = cw.print_
        ui_setting = self.ui_setting

        if type == 'following':
            raise NotImplementedError('following')

        self._format = [None, 'gif', 'webp',
                        'png'][ui_setting.ugoira_convert.currentIndex()]
        self._format_name = compatstr(ui_setting.pixivFormat.currentText())
        types = [t.lower() for t in query_url(self.url).get('type', [])]
        if types:
            s = (u', ').join(sorted(types))
            types = set(types)
        else:
            s = 'all'
            types = None
        print_((u'Type: {}').format(s))
        print_((u'info: {}').format(self.info))
        api = self.api
        query = self.id.replace('_bmk', '').replace('_illust', '').replace(
            'pixiv_', '').replace('search_', '')
        if type != 'search':
            query = int(query)
        print('pixiv_query:', query)
        try:
            if type in ('user', 'bookmark', 'search'):
                max_pid = get_max_range(cw, 2000)
                if ui_setting.groupBox_tag.isChecked():
                    tags = [
                        compatstr(ui_setting.tagList.item(i).text())
                        for i in range(ui_setting.tagList.count())
                    ]
                else:
                    tags = []
                if type == 'search':
                    query = query.replace('+', ' ')
                    name = query
                else:
                    id = self.id.replace('_bmk', '').replace('pixiv_',
                                                             '').replace(
                                                                 'search_', '')
                    print('name', id)
                    name = get_name(id, self.api, cw=cw)
                    cw.artist = name
                title = u'{} ({})'.format(name, self.id)
                print_(title)
                dir = os.path.join(get_outdir('pixiv'), clean_title(title))
                imgs = get_imgs(query,
                                type=type,
                                api=api,
                                n=max_pid,
                                tags=tags,
                                types=types,
                                format=self._format,
                                format_name=self._format_name,
                                dir=dir,
                                cw=cw,
                                title=title,
                                info=self.info)
            elif type == 'illust':
                for try_ in range(N_TRY):
                    try:
                        detail = api.illust_detail(query, req_auth=True)
                        error = detail.get('error')
                        if error:
                            raise PixivError(error)
                        break
                    except PixivError as e:
                        api = e.api
                        print_(e)
                        if try_ < N_TRY - 1:
                            print_('retry...')
                        sleep(SLEEP)
                else:
                    raise

                illust = detail.illust
                name = illust.title
                title = (u'{} ({})').format(name, self.id)
                dir = os.path.join(get_outdir('pixiv'), clean_title(title))
                imgs = get_imgs_from_illust(illust,
                                            api=api,
                                            format=self._format,
                                            dir=dir,
                                            cw=cw,
                                            format_name=self._format_name)
        except PixivError as e:
            msg = (u'PixivError: {}').format(e.message)
            return self.Invalid(msg)

        self.imgs = imgs
        for img in imgs:
            self.urls.append(img.url)
            self.filenames[img.url] = img.filename

        self.title = clean_title(title)  # 1390

Exemple #27

0

Afficher le fichier

 def name(self):
     return clean_title(self.id)

Exemple #28

0

Afficher le fichier

Fichier : torrent_downloader.py Projet : zerox20m/Hitomi-Downloader

 def name(self):
     if self._name is None:
         self._name = clean_title(self._info.name())
     return self._name

Exemple #29

0

Afficher le fichier

Fichier : wikiart_downloader.py Projet : rheehot/Hitomi-Downloader

 def __init__(self, url, referer, title, id):
     self.url = LazyUrl(referer, lambda _: url, self)
     ext = os.path.splitext(url.split('?')[0])[1]
     n = len(id) + len(ext) + 3
     title = clean_title(title, n=-n)
     self.filename = u'{} - {}{}'.format(id, title, ext)

Exemple #30

0

Afficher le fichier

 def __init__(self, page, p):
     self.page = page
     self.url = LazyUrl(page.url, self.get, self)
     self.filename = clean_title('[{:04}] {}'.format(p, page.title),
                                 n=-4) + '.txt'