Python web2pyHTMLParser 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: gluon.html

메소드/함수: web2pyHTMLParser

hotexamples.com에서의 예제들: 3

Python web2pyHTMLParser - 3개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 gluon.html.web2pyHTMLParser에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: default.py 프로젝트: pombredanne/phylesystem-api

    def _new_nexson_with_crossref_metadata(doi, ref_string, include_cc0=False):
        if doi:
            # use the supplied DOI to fetch study metadata
            search_term = doi
        elif ref_string:
            # use the supplied reference text to fetch study metadata
            search_term = ref_string

        # look for matching studies via CrossRef.org API
        doi_lookup_response = fetch(
            'http://search.crossref.org/dois?%s' % 
            urlencode({'q': search_term})
        )
        doi_lookup_response = unicode(doi_lookup_response, 'utf-8')   # make sure it's Unicode!
        matching_records = anyjson.loads(doi_lookup_response)

        # if we got a match, grab the first (probably only) record
        if len(matching_records) > 0:
            match = matching_records[0];

            # Convert HTML reference string to plain text
            raw_publication_reference = match.get('fullCitation', '')
            ref_element_tree = web2pyHTMLParser(raw_publication_reference).tree
            # root of this tree is the complete mini-DOM
            ref_root = ref_element_tree.elements()[0]
            # reduce this root to plain text (strip any tags)

            meta_publication_reference = ref_root.flatten().decode('utf-8')
            meta_publication_url = match.get('doi', u'')  # already in URL form
            meta_year = match.get('year', u'')
            
        else:
            # Add a bogus reference string to signal the lack of results
            if doi:
                meta_publication_reference = u'No matching publication found for this DOI!'
            else:
                meta_publication_reference = u'No matching publication found for this reference string'
            meta_publication_url = u''
            meta_year = u''

        # add any found values to a fresh NexSON template
        nexson = get_empty_nexson(BY_ID_HONEY_BADGERFISH, include_cc0=include_cc0)
        nexml_el = nexson['nexml']
        nexml_el[u'^ot:studyPublicationReference'] = meta_publication_reference
        if meta_publication_url:
            nexml_el[u'^ot:studyPublication'] = {'@href': meta_publication_url}
        if meta_year:
            nexml_el[u'^ot:studyYear'] = meta_year
        return nexson

예제 #2

파일 보기

파일: test_html.py 프로젝트: BuhtigithuB/web2py

    def test_web2pyHTMLParser(self):
        #tag should not be a byte
        self.assertEqual(web2pyHTMLParser("<div></div>").tree.components[0].tag, 'div')
        a = str(web2pyHTMLParser('<div>a<span>b</div>c').tree)
        self.assertEqual(a, "<div>a<span>b</span></div>c")

        tree = web2pyHTMLParser('hello<div a="b">world</div>').tree
        tree.element(_a='b')['_c']=5
        self.assertEqual(str(tree), 'hello<div a="b" c="5">world</div>')

        a = str(web2pyHTMLParser('<div><img class="img"/></div>', closed=['img']).tree)
        self.assertEqual(a, '<div><img class="img" /></div>')

        #greater-than sign ( > )  --> decimal &#62; --> hexadecimal &#x3E;
        #Less-than sign    ( < )  --> decimal &#60; --> hexadecimal &#x3C;
        # test decimal
        a = str(web2pyHTMLParser('<div>&#60; &#62;</div>').tree)
        self.assertEqual(a, '<div>&lt; &gt;</div>')
        # test hexadecimal
        a = str(web2pyHTMLParser('<div>&#x3C; &#x3E;</div>').tree)
        self.assertEqual(a, '<div>&lt; &gt;</div>')

예제 #3

파일 보기

파일: test_html.py 프로젝트: ksuhr1/CMPS183-hw3

    def test_web2pyHTMLParser(self):
        #tag should not be a byte
        self.assertEqual(web2pyHTMLParser("<div></div>").tree.components[0].tag, 'div')
        a = str(web2pyHTMLParser('<div>a<span>b</div>c').tree)
        self.assertEqual(a, "<div>a<span>b</span></div>c")

        tree = web2pyHTMLParser('hello<div a="b">world</div>').tree
        tree.element(_a='b')['_c']=5
        self.assertEqual(str(tree), 'hello<div a="b" c="5">world</div>')

        a = str(web2pyHTMLParser('<div><img class="img"/></div>', closed=['img']).tree)
        self.assertEqual(a, '<div><img class="img" /></div>')

        #greater-than sign ( > )  --> decimal &#62; --> hexadecimal &#x3E;
        #Less-than sign    ( < )  --> decimal &#60; --> hexadecimal &#x3C;
        # test decimal
        a = str(web2pyHTMLParser('<div>&#60; &#62;</div>').tree)
        self.assertEqual(a, '<div>&lt; &gt;</div>')
        # test hexadecimal
        a = str(web2pyHTMLParser('<div>&#x3C; &#x3E;</div>').tree)
        self.assertEqual(a, '<div>&lt; &gt;</div>')