def getContent(self, item, bump_headings=False): pdf = [] if isinstance(item, Tag): className=item.get('class', '').split() item_type = item.name if item_type in ['h2', 'h3', 'h4', 'h5', 'h6']: item_style = self.tag_to_style.get(item_type) h = Paragraph(self.getItemText(item), self.styles[item_style]) h.keepWithNext = True pdf.append(h) if item_type == 'h2' and not bump_headings: hr = HRFlowable(width='100%', thickness=0.25, spaceBefore=2, spaceAfter=4, color=self.styles[item_style].textColor) hr.keepWithNext = True pdf.append(hr) elif item_type in ['table']: (table_data, table_style, caption) = self.getTableData(item) table = Table(table_data) table.setStyle(table_style) table.hAlign = 'LEFT' table.spaceBefore = 10 table.spaceAfter = 10 if caption: caption_el = Paragraph(self.getInlineContents(caption), self.styles['Discreet']) pdf.append(KeepTogether([table, caption_el])) else: pdf.append(table) elif item_type in ['ul']: for i in item.findAll('li'): pdf.append(Paragraph('<bullet>•</bullet>%s' % self.getInlineContents(i), self.styles['BulletList'])) elif item_type in ['ol']: # Sequences were incrementing based on previous PDF generations. # Including explicit ID and reset li_uuid = uuid1().hex for i in item.findAll('li'): pdf.append(Paragraph('<seq id="%s" />. %s' % (li_uuid, self.getInlineContents(i)), self.styles['BulletList'])) pdf.append(Paragraph('<seqReset id="%s" />' % li_uuid, self.styles['Normal'])) elif item_type in ['p'] or (item_type in ['div'] and 'captionedImage' in className or 'callout' in className or 'pullquote' in className): has_image = False # Pull images out of items and add before for img in item.findAll('img'): img.extract() src = img['src'].replace(self.site.absolute_url(), '') if src.startswith('/'): src = src.replace('/', '', 1) img_obj = self.getPloneImageObject(src) if img_obj: has_image = True img_data = img_obj.image.data try: pil_image = self.getImageFromData(img_data) except IOError: pass else: pdf_image = self.getImage(pil_image) pdf.append(pdf_image) # If we had an image, and the next paragraph has the # 'discreet' class (is a caption) then keep them together if has_image: s = item.findNextSiblings() if s and 'discreet' in s[0].get('class', ''): pdf[-1].keepWithNext = True # Get paragraph contents p_contents = self.getInlineContents(item) # Don't add anything if no contents. if not p_contents: pass elif 'callout' in className or 'pullquote' in className: pdf.append(Paragraph(p_contents, self.styles['Callout'])) elif 'discreet' in className or 'captionedImage' in className: if len(pdf) and isinstance(pdf[-1], Image): pdf[-1].keepWithNext = True pdf.append(Paragraph(p_contents, self.styles['Discreet'])) else: pdf.append(Paragraph(p_contents, self.styles["Normal"])) elif item_type in ['div']: for i in item.contents: pdf.extend(self.getContent(i)) elif item_type == 'blockquote': pdf.append(Paragraph(self.getItemText(item), self.styles['Blockquote'])) else: pdf.append(Paragraph(self.getItemText(item), self.styles["Normal"])) elif isinstance(item, NavigableString): if item.strip(): pdf.append(Paragraph(item, self.styles["Normal"])) return pdf