def value_from_datadict(self, data: Dict[str, Any], files: Mapping[str, Iterable[Any]], name: str) -> str: """ Process the value returned from the JSON editor. Return the value to be saved. """ json_value = super().value_from_datadict(data, files, name) if isinstance(json_value, (dict, list)): json_data = json_value else: logging.debug(f'JSON string from editor: {json_value}') try: json_data = json.loads(soupify(json_value).get_text()) except Exception as err: logging.error( f'Error loading value from JSON editor widget: {err}') return json_value if isinstance(json_data, dict): logging.debug( f'JSON before removing null attributes: {pformat(json_data)}') json_data = { attribute: attribute_value for attribute, attribute_value in json_data.items() if attribute_value is not None } logging.debug( f'JSON after removing null attributes: {pformat(json_data)}') return json.dumps(json_data)
def get_slug(self): """Get a slug for the model instance.""" slug = None slug_base_field = getattr(self, 'slug_base_field', None) if slug_base_field: slug_base = str(getattr(self, slug_base_field, self.pk)) if '<' in slug_base: slug_base = soupify(slug_base).get_text() slug = slugify(slug_base) return slug or self.pk
def truncated_description(self) -> Optional[SafeString]: """Return the occurrence's description, truncated.""" if not self.description: return None description = soupify(self.description.html) if description.find('img'): description.find('img').decompose() return format_html( truncatechars_html(description.prettify(), TRUNCATED_DESCRIPTION_LENGTH))
def format_html(self, html: str) -> str: """Add or remove <p> tags if necessary.""" if html: if self.paragraphed is None: pass elif self.paragraphed: # TODO: move this to a util method if html.startswith('<p') and html.endswith('</p>'): pass else: html = f'<p>{html}</p>' else: # if paragraphed is False # TODO: move this to a util method if html.startswith('<p') and html.endswith('</p>'): html = soupify(html).p.decode_contents() return html
def clean(self): """Prepare the source to be saved.""" super().clean() self.citation_html = self.calculate_citation_html() self.citation_string = soupify(self.citation_html).get_text() if not self.file: if self.containment and self.containment.container.file: self.file = self.containment.container.file if self.pk: # If this source is not being newly created is_duplicate = (Source.objects.exclude(pk=self.pk).filter( citation_string=self.citation_string).exists()) if is_duplicate: raise ValidationError( f'Unable to save this source because it duplicates an existing source ' f'or has an identical string: {self.citation_string}') for container in self.containers.all(): if self in container.containers.all(): raise ValidationError( f'This source cannot be contained by {container}, ' f'because that source is already contained by this source.' )
def clean(self, html_value, model_instance: 'Model') -> HTML: """Return a cleaned, ready-to-save instance of HTML.""" html = super().clean(value=html_value, model_instance=model_instance) raw_html = html.raw_value if '{' in raw_html or '}' in raw_html: raise ValidationError( 'The "{" and "}" characters are illegal in HTML fields.') for pattern, replacement in REPLACEMENTS: try: raw_html = re.sub(pattern, replacement, raw_html).strip() except Exception as error: raise Exception( f'Failed to replace `{pattern}` ({type(pattern)}) ' f'with `{replacement}` ({type(replacement)} ' f'in {raw_html}\n({type(raw_html)})\n{error}') # Use html.parser to avoid adding <html> and <body> tags soup = soupify(raw_html, features='html.parser') for deletion in DELETIONS: try: soup.find(deletion).decompose() except AttributeError: # no match pass raw_html = dedupe_newlines(str(soup)) logging.debug(f'{raw_html}') if model_instance.pk: raw_html = model_instance.preprocess_html(raw_html) # Update obj placeholders. try: raw_html = self.update_placeholders(raw_html) except Exception as err: raise ValidationError(f'{err}') # Add or remove <p> tags if necessary raw_html = self.format_html(raw_html) html.raw_value = raw_html return html
def __str__(self) -> str: """Return the repository's string representation.""" return soupify(self.html).get_text()
def __str__(self) -> str: """Return the collection's string representation.""" return soupify(self.html).get_text()
def text(self) -> str: """Return the textual content with all HTML tags removed.""" if self.raw_value: return soupify(self.raw_value).get_text().strip() return ''
def date_string(self) -> str: """Return the string representation of the model instance's date.""" date_html = self.date_html return soupify(date_html).get_text() if date_html else ''
def attributee_string(self) -> Optional[str]: """See the `attributee_html` property.""" if self.attributee_html: return soupify(self.attributee_html).get_text() # type: ignore return None
def __str__(self) -> str: """Return the citation's string representation.""" try: return soupify(self.html).get_text() except Exception as error: return f'citation {self.pk}'
def __str__(self) -> str: """Return the string representation of the relation.""" return soupify(self.quote.bite.html).get_text()