def copy(element): """ beautifulsoup4 객체의 요소를 복제합니다. 오직 해당 요소의 정보만 복사합니다. 자식들에 대한 정보는 원본과 공유됩니다. .. bugs:: work around bug where there is no builder set https://bugs.launchpad.net/beautifulsoup/+bug/1307471. .. 이 함수 작성에 다음 문서를 참조하였음. http://stackoverflow.com/questions/23057631/clone-element-with-beautifulsoup :param element: 복제 할 원소. :type element: bs4.Tag, bs4.NavigableString, bs4.Comment :return: 복제된 원소. """ if isinstance(element, (NavigableString, Comment)): return type(element)(element) clone_element = Tag(None, element.builder, element.name, element.namespace, element.nsprefix) clone_element.attrs = dict(element.attrs) for attr in ('can_be_empty_element', 'hidden'): setattr(clone_element, attr, getattr(element, attr)) for child in element.contents: clone_element.append(child) return clone_element
def clone_beautiful_soup_tag(elements): """ :type element: Tag :rtype: Tag """ if elements is None: raise ElementTypeError('elements is None!') if isinstance(elements, (Tag, NavigableString, BeautifulSoup)): element = elements if isinstance(element, NavigableString): return type(element)(element) copy = Tag(None, element.builder, element.name, element.namespace, element.nsprefix) # work around bug where there is no builder set # https://bugs.launchpad.net/beautifulsoup/+bug/1307471 copy.attrs = dict(element.attrs) for attr in ('can_be_empty_element', 'hidden'): setattr(copy, attr, getattr(element, attr)) for child in element.contents: copy.append(clone_beautiful_soup_tag(child)) return copy else: return [clone_beautiful_soup_tag(x) for x in elements]
def deepcopy(element): """ beautifulsoup4 객체의 요소를 재귀적으로 복제합니다. 요소가 가진 정보 및 자식의 모든 정보를 재귀적으로 복제합니다. 이 함수를 통하여, beautifulsoup4 4.0.2에서 append 함수 사용시 발생하는 DOM 깨짐현상(기존에 존재하던 태그에 접근 불가능해지거나, 각 메소드들 별로 원래 정상적으로 접근하게될 결과의 일부분만 얻게 되는 현상)을 피할 수 있습니다. .. 이 함수 작성에 다음 문서를 참조하였음. http://stackoverflow.com/questions/23057631/clone-element-with-beautifulsoup :param element: 복제 할 원소. :type element: bs4.Tag, bs4.NavigableString, bs4.Comment :return: 복제된 원소. """ if isinstance(element, (NavigableString, Comment)): return type(element)(element) clone_element = Tag(None, element.builder, element.name, element.namespace, element.nsprefix) # work around bug where there is no builder set # https://bugs.launchpad.net/beautifulsoup/+bug/1307471 clone_element.attrs = dict(element.attrs) for attr in ('can_be_empty_element', 'hidden'): setattr(clone_element, attr, getattr(element, attr)) for child in element.contents: clone_element.append(deepcopy(child)) return clone_element
def construct_xml(self): soup = BeautifulSoup(etree.tostring(etree.Element('OTA_AirLowFareSearchRQ')), 'xml') query = soup.contents[0] query.attrs = { 'xmlns':'http://www.opentravel.org/OTA/2003/05', 'xmlns:xsi':'http://www.w3.org/2001/XMLSchema-instance', 'PrimaryLangId':'en', 'Version':'2.001', 'TimeStamp':str(datetime.datetime.now().isoformat()), 'EchoToken':str(time.mktime(time.gmtime())), 'xsi:schemaLocation':'http://www.opentravel.org/2006A/OTA_AirLowFareSearchRQ.xsd', } t_pos = Tag(name='POS') t_source = Tag(name='Source') t_req = Tag(name='RequestorID') t_req.attrs = { 'ID':'weathersick', 'URL':'http://www.weathersick.com', 'Type':'18', } t_source.append(t_req) t_pos.append(t_source) query.append(t_pos) t_odinf = Tag(name='OriginDestinationInformation') t_odinf.attrs {'RPH':1} t_deptime = Tag(name='DepartureDateTime') t_deptime. OriginDestinationInformation RPH="1" import pdb; pdb.set_trace()
def __call__(self, DOM): tag = Tag(name='script', builder=BUILDER) tag.attrs = { 'type': 'text/javascript', 'src': self.url, } if not DOM.body: DOM.html.insert(0, Tag(name='body')) DOM.body.append(tag) return DOM
def f(html): tag = Tag(name='script', builder=BUILDER) tag.attrs = { 'type': 'text/javascript', 'src': URL, } if not html.head: html.html.insert(0, Tag(name='head')) html.head.append(tag) return html
def __call__(self, DOM): tag = Tag(name='link', builder=BUILDER) tag.attrs = { 'type': 'text/css', 'rel': 'stylesheet', 'href': self.url, } if not DOM.head: DOM.html.insert(0, Tag(name='head')) DOM.head.append(tag) return DOM
def f(html): tag = Tag(name='link', builder=BUILDER) tag.attrs = { 'type': 'text/css', 'rel': 'stylesheet', 'href': URL, } if not html.head: html.html.insert(0, Tag(name='head')) html.head.append(tag) return html
def clone(el): if isinstance(el, NavigableString): return type(el)(el) copy = Tag(None, el.builder, el.name, el.namespace, el.nsprefix) copy.attrs = dict(el.attrs) for attr in ('can_be_empty_element', 'hidden'): setattr(copy, attr, getattr(el, attr)) for child in el.contents: copy.append(clone(child)) return copy
def clone(el): if isinstance(el, NavigableString): return type(el)(el) copy = Tag(None, el.builder, el.name, el.namespace, el.nsprefix) # work around bug where there is no builder set # https://bugs.launchpad.net/beautifulsoup/+bug/1307471 copy.attrs = dict(el.attrs) for attr in ('can_be_empty_element', 'hidden'): setattr(copy, attr, getattr(el, attr)) for child in el.contents: copy.append(clone(child)) return copy
def soup(self): ''' Returns HTML as a BeautifulSoup element. ''' components_soup = Tag(name=self.tagname, builder=BUILDER) components_soup.attrs = self.attributes for c in flatten(self.components): if hasattr(c, 'soup'): components_soup.append(c.soup()) elif type(c) in (str, ): # components_soup.append(BeautifulSoup(str(c))) components_soup.append(str(c)) # else: # Component should not be integrated # pass return components_soup
def clone_bs4_elem(el): """Clone a bs4 tag before modifying it. Code from `http://stackoverflow.com/questions/23057631/clone-element-with -beautifulsoup` """ if isinstance(el, NavigableString): return type(el)(el) copy = Tag(None, el.builder, el.name, el.namespace, el.nsprefix) # work around bug where there is no builder set # https://bugs.launchpad.net/beautifulsoup/+bug/1307471 copy.attrs = dict(el.attrs) for attr in ('can_be_empty_element', 'hidden'): setattr(copy, attr, getattr(el, attr)) for child in el.contents: copy.append(clone_bs4_elem(child)) return copy
def clean_tag_attributes(tag: Tag) -> None: if not isinstance(tag, NavigableString): tag.attrs = {k: v for k, v in tag.attrs.items() if k in ALLOWED_ATTRIBUTES}
def clean_tag_attributes(tag: Tag) -> None: if not isinstance(tag, NavigableString): tag.attrs = { k: v for k, v in tag.attrs.items() if k in allowed_attributes }