def embed(self): """Get the html to embed into the crowdsource""" try: # first try to get embed code from oEmbed return mark_safe( PyEmbed( # we don't use the default discoverer because it contains a bug # that makes it always match spotify discoverer=ChainingDiscoverer([ FileDiscoverer( resource_filename( __name__, 'oembed_providers.json' ) ), AutoDiscoverer(), ]) ).embed(self.url, max_height=400) ) except PyEmbedConsumerError: # if this is a private document cloud document, it will not have # an oEmbed, create the embed manually doc_match = DOCUMENT_URL_RE.match(self.url) if doc_match: return mark_safe( DOCCLOUD_EMBED.format(doc_id=doc_match.group('doc_id')) ) else: # fall back to a simple iframe return format_html( '<iframe src="{}" width="100%" height="400px"></iframe>', self.url, )
def process_data_csv(self, crowdsource): """Create the crowdsource data from the uploaded CSV""" url_validator = URLValidator() data_csv = self.cleaned_data["data_csv"] doccloud_each_page = self.cleaned_data["doccloud_each_page"] if data_csv: reader = csv.reader(codecs.iterdecode(data_csv, "utf-8")) headers = [h.lower() for h in next(reader)] for line in reader: data = dict(list(zip(headers, line))) url = data.pop("url", "") doc_match = DOCUMENT_URL_RE.match(url) proj_match = PROJECT_URL_RE.match(url) if doccloud_each_page and doc_match: datum_per_page.delay(crowdsource.pk, doc_match.group("doc_id"), data) elif proj_match: import_doccloud_proj.delay( crowdsource.pk, proj_match.group("proj_id"), data, doccloud_each_page, ) elif url: # skip invalid URLs try: url_validator(url) except forms.ValidationError: pass else: crowdsource.data.create(url=url, metadata=data) else: crowdsource.data.create(metadata=data)
def save(self, commit=True, doccloud_each_page=False): """Apply special cases to Document Cloud URLs""" instances = super(CrowdsourceDataFormset, self).save(commit=False) return_instances = [] for instance in instances: doc_match = DOCUMENT_URL_RE.match(instance.url) proj_match = PROJECT_URL_RE.match(instance.url) if doccloud_each_page and doc_match: datum_per_page.delay( self.instance.pk, doc_match.group('doc_id'), {}, ) elif proj_match: import_doccloud_proj.delay( self.instance.pk, proj_match.group('proj_id'), {}, doccloud_each_page, ) else: return_instances.append(instance) if commit: instance.save() return return_instances
def embed(self): """Get the html to embed into the crowdsource""" try: # first try to get embed code from oEmbed return mark_safe(PyEmbed().embed(self.url, max_height=400)) except PyEmbedConsumerError: # if this is a private document cloud document, it will not have # an oEmbed, create the embed manually doc_match = DOCUMENT_URL_RE.match(self.url) if doc_match: return mark_safe( DOCCLOUD_EMBED.format(doc_id=doc_match.group('doc_id'))) else: # fall back to a simple iframe return format_html( '<iframe src="{}" width="100%" height="400px"></iframe>', self.url, )