Python BeautifulSoup.repr 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: bs4

클래스/타입: BeautifulSoup

메소드/함수: __repr__

hotexamples.com에서의 예제들: 3

Python BeautifulSoup.__repr__ - 3개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 bs4.BeautifulSoup.__repr__에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

append(30)

BeautifulSoup(30)

__str__(30)

__init__(11)

attrs(10)

__len__(8)

__repr__(3)

__unicode__(2)

article(2)

__copy__(2)

__getattr__(2)

first(2)

findAllNext(2)

feed(1)

currentTag(1)

fartind(1)

BF(1)

filter_wikilinks(1)

fina_all(1)

fnd_all(1)

h1(1)

replace_with(1)

td(1)

toCSV(1)

copy(1)

alcohol(1)

astype(1)

assign(1)

apply(1)

add_structure(1)

add_shared_term(1)

a(1)

_title(1)

_repr_html_(1)

_find_all(1)

_all_strings(1)

__getitem__(1)

__contains__(1)

NavigableString(1)

Date(1)

wrap(1)

예제 #1

파일 보기

class ParserBS(AbstractParser):
    """
    The custom parser over BeautifulSoup
    """
    def __init__(self, html_raw: str, parser_bs_type: str = "html.parser"):
        self.html_parsed = BeautifulSoup(html_raw, parser_bs_type)

    @property
    def html_raw(self) -> str:
        return self.html_parsed.__str__()

    @cached_property
    def title(self) -> str:
        title = self.html_parsed.find("title")
        return title and title.text or ""

    @cached_property
    def anchor_nodes(self) -> Iterable[ResultSet]:
        return self.html_parsed.find_all("a", attrs={"href": True})

    def get_related_anchors_href(self) -> Iterable[str]:
        collection: Set[str] = set()

        for node in self.anchor_nodes:
            href: str = node.attrs.get("href")
            if not ParserBS._is_href_url_related(href):
                continue
            collection.add(href)

        return collection

    def __repr__(self):
        return self.html_parsed.__repr__()

예제 #2

파일 보기

파일: make_kindle_book.py 프로젝트: jungledrum/kindle

def change_encode():
    files = get_articles()
    for item in files:
        with open(path+"/"+item+".html", "r") as f:
            html = f.read()
            soup = BeautifulSoup(html)
            s = """
            <head>
            <meta http-equiv="content-type" content="text/html; charset=utf-8">
            <meta charset="UTF-8">
            </head>
            """
            tag_head = soup.new_tag("head")
            tag_meta = soup.new_tag("meta")
            tag_meta["http-equiv"] = "content-type"
            tag_meta["content"] = "text/html; charset=utf-8"
            soup.html.body.insert_before(tag_head)
            soup.html.head.append(tag_meta)
        with open(path+"/"+item+".html", "w") as f:
            f.write(soup.__repr__())

예제 #3

파일 보기

파일: ImportPagesScript.py 프로젝트: des-labs/des_ncsa

def get_page_content():
    # goes to website and creates file based off of HTML tile
    urlinput = input("What URL do you want to pull from?")
    filename = ''.join(
        random.choice(string.ascii_uppercase + string.digits)
        for _ in range(1, 10))
    filename += ".html"
    page = ssn.get(urlinput)
    soup = BeautifulSoup(page.content, 'html.parser')
    newfile = open(root_dir + root_folder + filename, "w+")

    # function to add content to the beginning of the file
    def line_prepender(file, line):
        with open(root_dir + root_folder + file, 'r+') as f:
            content = f.read()
            f.seek(0, 0)
            f.write(line.rstrip('\r\n') + '\n' + content)

    pagecontent = soup.find("div", {"id": "main-content"})
    pagecontent = BeautifulSoup(pagecontent.__repr__(), 'html.parser')

    # saves pictures on the page to seperate files
    for item in pagecontent.find_all("img",
                                     {"class": "confluence-embedded-image"}):
        list_att = list(item.attrs.keys())
        imagesrc = item['src']
        imageURL = "https://opensource.ncsa.illinois.edu" + imagesrc
        r = ssn.get(imageURL, allow_redirects=True)
        picturesfilename = str(item['data-linked-resource-default-alias'])
        open(root_dir + "/images/" + picturesfilename, 'wb').write(r.content)
        for att in list_att:
            if att not in ['src', 'width', 'height', 'scale']:
                del item[att]
        item['src'] = '/static/images/' + picturesfilename

    # delete classes
    for tag in pagecontent():
        del tag["class"]

    newfile.write(pagecontent.prettify())

    # add page identifiers to end of file
    header = input("What should the heading be? (Title of the des-card)")
    pageid = input(
        "What should the dom-module id be? (des-home, des-data, etc.)")
    pageclass = input(
        "What Polymer class should this is labeled as (desHome)?")
    newfile = open(root_dir + root_folder + filename, 'a')
    endtext = """
    </div>
    </des-card>
    </template>
    <script>
    class {pageclass} extends Polymer.Element {{
      static get is() {{ return '{pageid}'; }}
       }}
     window.customElements.define({pageclass}.is,{pageclass});
     </script>
     </dom-module>
    """.format(pageclass=pageclass, pageid=pageid)
    newfile.write(endtext)

    # renames file
    # newfilename = str(soup.title.text)[:-15] + ".html"
    # newfilename = re.sub('[/]', '-', newfilename)

    newfilename = pageid + ".html"
    os.rename(root_dir + root_folder + filename,
              root_dir + root_folder + newfilename)

    # add page identifiers to beginning of file
    initext = """\
    <dom-module id='{pageid}'>
    <template>
    <style include='shared-styles'>
    :host {{
       display: block;
       padding: 10px;
       }}
    </style>
    <des-card heading="{header}">
    <div class=card-content>
    """.format(pageid=pageid, header=header)
    line_prepender(newfilename, initext)

    # adds more pages
    def addanotherpage():
        answer = input("Do you have another page to add? Y/N")
        if answer == "yes" or answer == "Yes" or answer == "y":
            get_page_content()
            return
        if answer == "no" or answer == "No" or answer == "n":
            print(
                "Please note that locations within imported pages for images and"
                "other files will need to be changed to reflect the correct corresponding"
                "location on the user's computer "
                "(unless the file isn't imported from a local location)")
            sleep(2)
            print("Exiting program")
            ssn.get(
                "https://opensource.ncsa.illinois.edu/confluence/login.action?logout=true"
            )
            return
        else:
            print("Please enter Yes or No")
            addanotherpage()

    addanotherpage()

Python BeautifulSoup.__repr__ 예제들

Python BeautifulSoup.repr 예제들