def rewriteLinksSection(dom, soup, links_table): links = [] for fnt in links_table.findAll('font', {'size': '2', 'face':'verdana'}): if str(fnt).startswith('<font size="2" face="verdana"><a href="'): link = fnt.find('a') caption = link.getText('').strip() if caption.endswith(' Translation') and OMIT_TRANSLATIONS: continue links.append((link['href'], caption)) links_table.decompose() if not INCLUDE_LINKS or len(links) == 0: return b = Tag(soup, 'b') b.string = 'Links' dom.append(b) ul = Tag(soup, 'ul') for url, caption in links: li = Tag(soup, 'li') a = Tag(soup, 'a', {'href': url}) a.string = caption li.append(a) ul.append(li) dom.append(ul)
def generate_table(summary): soup = BeautifulSoup() new_tag_table = Tag(soup, "table") new_tag_table["border"] = 1 new_tag_table["cellspacing"] = 0 new_tag_table["cellpadding"] = 0 new_tag_table["bordercolordark"] = "#000000" new_tag_table["cellspacing"] = "#ffffff" soup.append(new_tag_table) new_Tag_tr = Tag(soup, "tr") new_Tag_tr["bgcolor"] = "#0072E3" new_tag_table.append(new_Tag_tr) for i in ["TestSuite", "Passed", "Failed", "Total"]: new_Tag_td = Tag(soup, "td") new_Tag_td.string = str(i) new_Tag_tr.append(new_Tag_td) for i in summary: new_Tag_tr = Tag(soup, "tr") new_tag_table.append(new_Tag_tr) for j in i: new_Tag_td = Tag(soup, "td") new_Tag_td.string = str(j) new_Tag_tr.append(new_Tag_td) print str(soup.prettify()) return str(soup.prettify())
def ConvertToTestHtml(quest): types = quest.type titles = quest.text quests_ids = [quest.id] answers = RETestAnswer.objects.filter(question__id__in=quests_ids) newbs = BeautifulSoup() pNode = Tag(newbs, 'p') newbs.insert(0,pNode) if quest.img: print 'Image!!!' print quest.img.url imageNode = Tag(newbs, 'image', [('src', quest.img.url)]) newbs.insert(0,imageNode) TitleNode = Tag(newbs, 'p') TitleNode.string = titles newbs.insert(0,TitleNode) i = 0 if types != 1: for answer in answers: radioname = 'ans' + str(i) nt = Tag(newbs,'input', [('type', 'radio'), ('type', radioname), ('name', 'answerradio'), ('value', str(answer.is_correct))]) nt.string = answer.name pNode.insert(len(pNode.contents), nt) pNode.insert(len(pNode.contents), Tag(newbs, 'br')) else: for answer in answers: radioname = 'ans' + str(i) nt = Tag(newbs,'input', [('type', 'text'), ('name', 'answertext'),('ans', answer.name)]) pNode.insert(len(pNode.contents), nt) pNode.insert(len(pNode.contents), Tag(newbs, 'br')) return newbs.prettify()
def AllCategories(request): print 'allcat' x = BeautifulSoup() #root = Tag(x,'ul', [('class', "tree"), ( 'id', "tree")]) #x.insert(0,root) AllCategories = RECategory.objects.filter(parent__isnull=True).order_by('-number') AllAnswered = {} #в logs добавляем только самые поздние по дате RELog for log in RELog.objects.filter(user=request.user).order_by('-date'): if not log.category_id in AllAnswered: AllAnswered[log.category_id] = {} if not log.type_log in AllAnswered[log.category_id]: AllAnswered[log.category_id][log.type_log] = log for category in AllCategories: print category.id nt = Tag(x,'li', [("id", str(category.id))]) log = AllAnswered.get(category.id) rating = '' if log: log = log.get(5) if log : rating = 'Оценка: ' + str(log.rating) div = Tag(x,'div') div.string = rating div["class"] = "rating" #div["style"] = "width: 150px; float: right;" nt.insert(0, div) if category.is_3d: isDDD = "Есть"; else: isDDD = "Нет"; div = Tag(x,'div') div.string = isDDD div["class"] = "is3d" #div["style"] = "margin-right: 0px;width: 110px; float: right;" nt.insert(0, div) div = Tag(x,'div') div["class"] = "demo" #div["style"] = "margin-right: 0px;width: 110px; float: right;" div.string = str(category.type_category) nt.insert(0, div) div = Tag(x,'div') div.string = category.name nt.insert(0, div) x.insert(0,nt) recurseCategories(category, nt, x, AllAnswered) res = x.prettify() #print res print 'endallcat' return res
def generate_table_of_contents(soup, prefix): header_ids = Counter() headers = soup.findAll(header_re) if not headers: return tocdiv = Tag(soup, "div", [("class", "toc")]) parent = Tag(soup, "ul") parent.level = 0 tocdiv.append(parent) level = 0 previous = 0 for header in headers: contents = u''.join(header.findAll(text=True)) # In the event of an empty header, skip if not contents: continue # Convert html entities to avoid ugly header ids aid = unicode( BeautifulSoup(contents, convertEntities=BeautifulSoup.XML_ENTITIES)) # Prefix with PREFIX_ to avoid ID conflict with the rest of the page aid = u'%s_%s' % (prefix, aid.replace(" ", "_").lower()) # Convert down to ascii replacing special characters with hex aid = str(title_re.sub(lambda c: '.%X' % ord(c.group()), aid)) # Check to see if a tag with the same ID exists id_num = header_ids[aid] + 1 header_ids[aid] += 1 # Only start numbering ids with the second instance of an id if id_num > 1: aid = '%s%d' % (aid, id_num) header['id'] = aid li = Tag(soup, "li", [("class", aid)]) a = Tag(soup, "a", [("href", "#%s" % aid)]) a.string = contents li.append(a) thislevel = int(header.name[-1]) if previous and thislevel > previous: newul = Tag(soup, "ul") newul.level = thislevel newli = Tag(soup, "li", [("class", "toc_child")]) newli.append(newul) parent.append(newli) parent = newul level += 1 elif level and thislevel < previous: while level and parent.level > thislevel: parent = parent.findParent("ul") level -= 1 previous = thislevel parent.append(li) return tocdiv
def makeHTMLQuestion(fn, htmldata): soup = BeautifulSoup(htmldata) #add JS soup.find('body')['onload'] = "populateAssignmentID('myAssignmentId')" scripttag = Tag(soup, "script") scripttag['type'] = "text/javascript" scripttag.string = "__SUBMIT_JS__" soup.find('head').insert(0, scripttag) #replace forms forms = soup.findAll('form') if forms: for form in forms: if not form.has_key('method'): form['method'] = 'POST' if not form.has_key('action'): if testmode: form['action'] = 'https://workersandbox.mturk.com/mturk/externalSubmit' else: form['action'] = 'https://www.mturk.com/mturk/externalSubmit' if not form.has_key('onSubmit'): form['onSubmit'] = "return verifyTurkSubmit('myAssignmentId');" inputtag = Tag(soup,'input') inputtag['type'] = 'hidden' inputtag['name'] = 'assignmentId' inputtag['id'] = 'myAssignmentId' inputtag['value'] = '' form.insert(0, inputtag) html = str(soup).replace("__SUBMIT_JS__", SUBMIT_JS) mainurl = uploadfile(fn, html) for sub in soup.findAll('img'): # TODO fn = dirname(fn) + '/' + sub['src'] uploadfile(fn) return ExternalQuestion(escape(mainurl), frame_height)
def format_title_link(title, title_link): """Format the title header""" soup = BeautifulSoup('') tag = Tag(soup, 'a') tag['href'] = 'http://en.wikipedia.org/wiki/%s' % title_link tag.string = title return str(tag)
def save(): json_data = request.json status = False data={} with open(ret_url(json_data["doc"],"/papers"), "r+") as inf: txt = inf.read() soup = BeautifulSoup(txt) #Solo se e' una review faccio queste modifiche, altrimenti se e' una decisione lo inserisce direttamente nell'head if json_data["type"] == "review": #Controllo se lo script esiste o meno, se esiste lo elimino for script in soup.findAll("script",{"type":"application/ld+json"}): data = json.loads(script.text.strip()) if data[0]["@type"] == "review": if data[0]["article"]["eval"]["author"] == "mailto:"+json_data["author"]: script.extract() break #Rimuovo il contenuto del Body e lo riscrivo for section in soup.findAll("section"): section.extract() for section in json_data["sections"]: beauty = BeautifulSoup(section) soup.body.insert(len(soup.body.contents), beauty) #Creo lo script e lo inserisco new = Tag(soup, "script") new.attrs.append(("type", "application/ld+json")) new.string = json.dumps(json_data["script"]) soup.head.insert(len(soup.head.contents), new) #Salvo il file html = soup.prettify("utf_8") inf.seek(0) inf.write(html) inf.truncate() inf.close() status=True return jsonify({"result": status})
def generate_table_of_contents(soup, prefix): header_ids = Counter() headers = soup.findAll(header_re) if not headers: return tocdiv = Tag(soup, "div", [("class", "toc")]) parent = Tag(soup, "ul") parent.level = 0 tocdiv.append(parent) level = 0 previous = 0 for header in headers: contents = u''.join(header.findAll(text=True)) # In the event of an empty header, skip if not contents: continue # Convert html entities to avoid ugly header ids aid = unicode(BeautifulSoup(contents, convertEntities=BeautifulSoup.XML_ENTITIES)) # Prefix with PREFIX_ to avoid ID conflict with the rest of the page aid = u'%s_%s' % (prefix, aid.replace(" ", "_").lower()) # Convert down to ascii replacing special characters with hex aid = str(title_re.sub(lambda c: '.%X' % ord(c.group()), aid)) # Check to see if a tag with the same ID exists id_num = header_ids[aid] + 1 header_ids[aid] += 1 # Only start numbering ids with the second instance of an id if id_num > 1: aid = '%s%d' % (aid, id_num) header['id'] = aid li = Tag(soup, "li", [("class", aid)]) a = Tag(soup, "a", [("href", "#%s" % aid)]) a.string = contents li.append(a) thislevel = int(header.name[-1]) if previous and thislevel > previous: newul = Tag(soup, "ul") newul.level = thislevel newli = Tag(soup, "li", [("class", "toc_child")]) newli.append(newul) parent.append(newli) parent = newul level += 1 elif level and thislevel < previous: while level and parent.level > thislevel: parent = parent.findParent("ul") level -= 1 previous = thislevel parent.append(li) return tocdiv
def recurseCategories(parentCat, root, x, AllAnswered): childcats = parentCat.children() if childcats: nt = Tag(x,'ul', [('style', 'display:none')]) root.insert(len(root.contents),nt) root = nt for category in childcats: root.insert(len(root.contents),'\n') nt = Tag(x,"li", [("id", str(category.id))]) log = AllAnswered.get(category.id) rating = '' if log: log = log.get(5) if log : rating = 'Оценка: ' + str(log.rating) div = Tag(x,'div') div.string = rating div["class"] = "rating" #div["style"] = "width: 150px; float: right;" nt.insert(0, div) if category.is_3d: isDDD = "Есть"; else: isDDD = "Нет"; div = Tag(x,'div') div.string = isDDD div["class"] = "is3d" #div["style"] = "margin-right: 0px;width: 110px; float: right;" nt.insert(0, div) div = Tag(x,'div') div["class"] = "demo" #div["style"] = "margin-right: 0px;width: 110px; float: right;" div.string = str(category.type_category) nt.insert(0, div) div = Tag(x,'div') div.string = category.name nt.insert(0, div) root.insert(len(root.contents), nt) recurseCategories(category, nt, x, AllAnswered)
def CreateSidebar(self, tag): '''Создаем sidebar''' h3 = Tag(self.soup, 'h3') h3.string = self.textShortCap tag.append(h3) if self._Probability(20): tag.append(self.CreateParagraph()) if self._Probability(90): tag.append(self.CreateList(0)) else: tag.append(self.CreateSelect())
def CreateSelect(self): '''Создаем select и options''' select = Tag(self.soup, 'select') select['name'] = self.GenerateName() for _ in range(random.randint(3, 12)): option = Tag(self.soup, 'option') option['value'] = self.textShort option.string = self.textShortCap select.append(option) if self._Probability(80): select.option['selected'] = 'selected' self.AppendIds(select, 10, 30) self.ShuffleAttributes(select) return select
def replaceImageWithHeading(img, tag, title, soup): hdg = Tag(soup, tag) hdg.string = title img.replaceWith(hdg) # Delete the <br>s that follow, up to a maximum of 2 for _ in xrange(0,2): for sib in hdg.nextSiblingGenerator(): if isinstance(sib, Tag): if sib.name != 'br': return sib.decompose() break else: if str(sib).strip() != '': return
def CreateTitle(self): '''Создаем title''' title = Tag(self.soup, 'title') title.string = '{TITLECASE}%s{/TITLECASE}' % random.choice( self._GetFileLines('titles.txt')).strip() return title
def CreateLinkText(self): '''Создаем a с анкором в виде текста''' a = Tag(self.soup, 'a') a['href'] = self.urlPage a.string = self.textShort return a
def CreateSpan(self): '''Создаем span''' span = Tag(self.soup, 'span') span.string = self.textShort return span
def CreateTitle(self): '''Создаем title''' title = Tag(self.soup, 'title') title.string = '{TITLECASE}%s{/TITLECASE}' % random.choice(self._GetFileLines('titles.txt')).strip() return title
def CreateParagraph(self): '''Создаем p''' p = Tag(self.soup, 'p') p.string = self.textMiddle return p
def embed_photo_content(article, idx, photo, soup, sizex=320, sizey=240): """ Embeds a new photo at the top of a section :param photo: Photo info from flickr.photos.search :param section: Section header :return: modified section content """ tag = Tag(soup, 'div') tag['title'] = photo['title'] tag['class'] = settings.SECTION_IMG_CLASS tag['id'] = settings.SECTION_IMG_CLASS + '-' + str(idx) tag['photo-id'] = photo['id'] tag['votes'] = photo['votes'] # Tag for link glyph tag_link_container = Tag(soup, 'div') tag_link_container['id'] = 'link-glyph-' + str(idx) tag_link_container['class'] = 'link-glyph' tag_link_container['style'] = 'position: absolute; top:0; float:left; ' \ 'left:100; z-index:150' tag_link = Tag(soup, 'a') tag_link['href'] = 'https://www.flickr.com/photos/%s/%s' % ( photo['owner'], photo['photo_id']) tag_link.string = '<img style="opacity:0.4; background-color:#cccccc;" ' \ 'src="/static/img/link.png" width="25" height="25">' tag_link_container.string = str(tag_link) # Format the image block # # 1. Define the outer div # 2. Define the img element for Flickr images # 3. Define the inner divs which contains the vote glyph, endorse and reject glyphs outer_div = '<div style="position: relative; z-index:100">%s%s%s</div>' outer_div += '<div style="clear:both;"> </div>%s' inner_div = '<div id="vote-glyph-' + str(idx) + '"' + \ ' class="vote-glyph" style="position: absolute; bottom:0; ' \ 'left:10; z-index:150">' \ '<div id="endorse-' + str(idx) + '" class="endorse" ' \ 'style="float:left"></div>' \ '<div id="endorsecount-' + str(idx) + '" class="endorsecount" ' \ 'style="float:left"></div>' \ '<div id="exclude-' + str(idx) + '" class="exclude" ' \ 'style="float:left"></div>' \ '<div id="excludecount-' + str(idx) + '" class="excludecount" ' \ 'style="float:left"></div>' \ '</div>' img_url = 'https://farm%s.staticflickr.com/%s/%s_%s.jpg' % (photo['farm'], photo['server'], photo['photo_id'], photo['secret']) inner_img = '<img src="' + img_url + '" width="' + str(sizex) + '">' # Tag for upload glyph tag_upload = Tag(soup, 'a') tag_upload['href'] = settings.SITE_URL + '/mwupload?photourl=' + \ img_url + '&article=' + article + '&' + \ settings.GET_VAR_FLICKR_PHOTO_ID + '=' + photo['photo_id'] tag_upload['target'] = '_blank' tag_upload.string = 'Upload to Wikimedia Commons?' tag.string = outer_div % (inner_div, str(tag_link_container), inner_img, str(tag_upload)) return tag