Python Link.ImageLink 예제들

프로그래밍 언어: Python

클래스/타입: Link

메소드/함수: ImageLink

hotexamples.com에서의 예제들: 1

Python Link.ImageLink - 1개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 Link.ImageLink 패키지로부터 bookwyrm에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Link(30)

MessageLink(2)

create_link_tuples_from_chains(2)

l2_sendto(2)

routing_flag1(2)

routing_flag2(2)

start_listener(2)

ChannelIds(1)

Firma(1)

ImageLink(1)

RSSLink(1)

garbler(1)

getLinkUrlsAndIds(1)

inhibit(1)

set_garbler(1)

setup(1)

예제 #1

파일 보기

파일: Threads.py 프로젝트: salt-lick/Imageboard-Image-Scraper

        def run(self):
            content = self.download()

            # Set up Dirs
            if not _os.path.exists(self.url.getDir()):
                _os.makedirs(self.url.getDir())

            if _Globals.globals.save_page:
                if not _os.path.exists(self.url.getStaticDir()):
                    _os.makedirs(self.url.getStaticDir())

            # Get Images
            if _Globals.globals.keep_names:
                images = {}

                pairs = _re.findall(
                    r"<a (?:title=\"([^\"]*?)\" )?href=\"(//i\.4cdn\.org/\w+/\d+\.\w+).*?\>(.*?)\<\/a>",
                    content)

                image_names = []

                for val1, key, val2 in pairs:
                    if val1:
                        value = val1

                        if val1 in image_names:
                            value = key.split('/')[
                                -1]  # no overlapping image names
                    elif val2:
                        value = val2

                        if val2 == 'Spoiler Image' or val2 in image_names:
                            value = key.split('/')[
                                -1]  # spoiler image or overlapping image name

                    image_names.append(value)

                    images[key] = value
            else:
                images = set(
                    _re.findall(r"(//i\.4cdn\.org/\w+/\d+\.\w+)", content))

            for image in images:
                link = _Link.ImageLink('https:' + image)
                link.setThread(self.url)

                if _Globals.globals.keep_names:
                    link.name = images[image]

                self.push(link)

            # Save page HTML
            if _Globals.globals.save_page and content:  # don't save page html if 404 (thread deleted)
                converted_content = content  # have to change all the links below

                # replacing image URLs
                for original_image_url in images:
                    if _Globals.globals.keep_names:
                        new_name = images[original_image_url]
                    else:
                        link = _Link.ImageLink('https:' + original_image_url)
                        new_name = link.getName()

                    converted_content = converted_content.replace(
                        original_image_url, new_name)

                # replacing thumbnail urls
                thumbnail_list = set(
                    _re.findall(
                        r"\<img src=\"(//[0-9]+\.t\.4cdn\.org/\w+/([0-9]+)s\.jpg)\"",
                        content))

                for url, image_number in thumbnail_list:
                    new_image_filename = None

                    # discover which image it is we're looking for
                    for image_url in images:
                        if image_number in image_url:
                            if _Globals.globals.keep_names:
                                new_image_filename = images[image_url]
                            else:
                                link = _Link.ImageLink('https:' + image_url)
                                new_image_filename = link.getName()

                    if new_image_filename is None:
                        print 'image filename not discovered for thumbnail:', image_number, url  # shouldn't happen
                        continue

                    converted_content = converted_content.replace(
                        url, new_image_filename)

                # downloading static files, and replacing filenames
                static = _StaticFiles.StaticFileHandler()
                for url in static.extract_static_urls(content):
                    static.download(url, self.url.getStaticDir())
                    new_filename = _os.path.join('static', url.lstrip('/'))
                    # we don't wanna load the js, doesn't add much and hits 4ch servers when we load the page
                    if url.endswith('.js'):
                        new_filename += '.dontload'
                    converted_content = converted_content.replace(
                        url, new_filename)

                # Write file
                page_filename = _os.path.join(
                    self.url.getDir(),
                    '{}.html'.format(self.url.getThreadNumber()))
                with open(page_filename, 'w') as page_file:
                    page_file.write(converted_content)