Ejemplo n.º 1
0
 def embed(self):
     """Get the html to embed into the crowdsource"""
     try:
         # first try to get embed code from oEmbed
         return mark_safe(
             PyEmbed(
                 # we don't use the default discoverer because it contains a bug
                 # that makes it always match spotify
                 discoverer=ChainingDiscoverer([
                     FileDiscoverer(
                         resource_filename(
                             __name__, 'oembed_providers.json'
                         )
                     ),
                     AutoDiscoverer(),
                 ])
             ).embed(self.url, max_height=400)
         )
     except PyEmbedConsumerError:
         # if this is a private document cloud document, it will not have
         # an oEmbed, create the embed manually
         doc_match = DOCUMENT_URL_RE.match(self.url)
         if doc_match:
             return mark_safe(
                 DOCCLOUD_EMBED.format(doc_id=doc_match.group('doc_id'))
             )
         else:
             # fall back to a simple iframe
             return format_html(
                 '<iframe src="{}" width="100%" height="400px"></iframe>',
                 self.url,
             )
Ejemplo n.º 2
0
 def process_data_csv(self, crowdsource):
     """Create the crowdsource data from the uploaded CSV"""
     url_validator = URLValidator()
     data_csv = self.cleaned_data["data_csv"]
     doccloud_each_page = self.cleaned_data["doccloud_each_page"]
     if data_csv:
         reader = csv.reader(codecs.iterdecode(data_csv, "utf-8"))
         headers = [h.lower() for h in next(reader)]
         for line in reader:
             data = dict(list(zip(headers, line)))
             url = data.pop("url", "")
             doc_match = DOCUMENT_URL_RE.match(url)
             proj_match = PROJECT_URL_RE.match(url)
             if doccloud_each_page and doc_match:
                 datum_per_page.delay(crowdsource.pk,
                                      doc_match.group("doc_id"), data)
             elif proj_match:
                 import_doccloud_proj.delay(
                     crowdsource.pk,
                     proj_match.group("proj_id"),
                     data,
                     doccloud_each_page,
                 )
             elif url:
                 # skip invalid URLs
                 try:
                     url_validator(url)
                 except forms.ValidationError:
                     pass
                 else:
                     crowdsource.data.create(url=url, metadata=data)
             else:
                 crowdsource.data.create(metadata=data)
Ejemplo n.º 3
0
 def save(self, commit=True, doccloud_each_page=False):
     """Apply special cases to Document Cloud URLs"""
     instances = super(CrowdsourceDataFormset, self).save(commit=False)
     return_instances = []
     for instance in instances:
         doc_match = DOCUMENT_URL_RE.match(instance.url)
         proj_match = PROJECT_URL_RE.match(instance.url)
         if doccloud_each_page and doc_match:
             datum_per_page.delay(
                 self.instance.pk,
                 doc_match.group('doc_id'),
                 {},
             )
         elif proj_match:
             import_doccloud_proj.delay(
                 self.instance.pk,
                 proj_match.group('proj_id'),
                 {},
                 doccloud_each_page,
             )
         else:
             return_instances.append(instance)
             if commit:
                 instance.save()
     return return_instances
Ejemplo n.º 4
0
 def embed(self):
     """Get the html to embed into the crowdsource"""
     try:
         # first try to get embed code from oEmbed
         return mark_safe(PyEmbed().embed(self.url, max_height=400))
     except PyEmbedConsumerError:
         # if this is a private document cloud document, it will not have
         # an oEmbed, create the embed manually
         doc_match = DOCUMENT_URL_RE.match(self.url)
         if doc_match:
             return mark_safe(
                 DOCCLOUD_EMBED.format(doc_id=doc_match.group('doc_id')))
         else:
             # fall back to a simple iframe
             return format_html(
                 '<iframe src="{}" width="100%" height="400px"></iframe>',
                 self.url,
             )