Ejemplo n.º 1
0
 def process_data_csv(self, crowdsource):
     """Create the crowdsource data from the uploaded CSV"""
     url_validator = URLValidator()
     data_csv = self.cleaned_data["data_csv"]
     doccloud_each_page = self.cleaned_data["doccloud_each_page"]
     if data_csv:
         reader = csv.reader(codecs.iterdecode(data_csv, "utf-8"))
         headers = [h.lower() for h in next(reader)]
         for line in reader:
             data = dict(list(zip(headers, line)))
             url = data.pop("url", "")
             doc_match = DOCUMENT_URL_RE.match(url)
             proj_match = PROJECT_URL_RE.match(url)
             if doccloud_each_page and doc_match:
                 datum_per_page.delay(crowdsource.pk,
                                      doc_match.group("doc_id"), data)
             elif proj_match:
                 import_doccloud_proj.delay(
                     crowdsource.pk,
                     proj_match.group("proj_id"),
                     data,
                     doccloud_each_page,
                 )
             elif url:
                 # skip invalid URLs
                 try:
                     url_validator(url)
                 except forms.ValidationError:
                     pass
                 else:
                     crowdsource.data.create(url=url, metadata=data)
             else:
                 crowdsource.data.create(metadata=data)
Ejemplo n.º 2
0
 def save(self, commit=True, doccloud_each_page=False):
     """Apply special cases to Document Cloud URLs"""
     instances = super(CrowdsourceDataFormset, self).save(commit=False)
     return_instances = []
     for instance in instances:
         doc_match = DOCUMENT_URL_RE.match(instance.url)
         proj_match = PROJECT_URL_RE.match(instance.url)
         if doccloud_each_page and doc_match:
             datum_per_page.delay(
                 self.instance.pk,
                 doc_match.group('doc_id'),
                 {},
             )
         elif proj_match:
             import_doccloud_proj.delay(
                 self.instance.pk,
                 proj_match.group('proj_id'),
                 {},
                 doccloud_each_page,
             )
         else:
             return_instances.append(instance)
             if commit:
                 instance.save()
     return return_instances
Ejemplo n.º 3
0
 def _crowdsource_base(self, foias, user, post, split):
     """Helper function for both crowdsource actions"""
     foias = foias.prefetch_related('communications__files')
     foias = [f for f in foias if f.has_perm(user, 'view')]
     form = CrowdsourceChoiceForm(post, user=user)
     if form.is_valid():
         crowdsource = form.cleaned_data['crowdsource']
         if crowdsource is None:
             return 'No crowdsource selected'
         for foia in foias:
             for comm in foia.communications.all():
                 for file_ in comm.files.all():
                     if file_.doc_id and split:
                         datum_per_page.delay(
                             crowdsource.pk,
                             file_.doc_id,
                             {},
                         )
                     elif file_.doc_id and not split:
                         crowdsource.data.create(
                             url=
                             'https://www.documentcloud.org/documents/{}.html'.
                             format(file_.doc_id)
                         )
     return 'Files added to assignment'
Ejemplo n.º 4
0
 def _crowdsource_base(self, foias, user, post, split):
     """Helper function for both crowdsource actions"""
     foias = foias.prefetch_related("communications__files")
     foias = [f for f in foias if f.has_perm(user, "view")]
     form = CrowdsourceChoiceForm(post, user=user)
     if form.is_valid():
         crowdsource = form.cleaned_data["crowdsource"]
         if crowdsource is None:
             return "No crowdsource selected"
         for foia in foias:
             for comm in foia.communications.all():
                 for file_ in comm.files.all():
                     if file_.doc_id and split:
                         datum_per_page.delay(crowdsource.pk, file_.doc_id,
                                              {})
                     elif file_.doc_id and not split:
                         crowdsource.data.create(
                             url="https://beta.documentcloud.org/documents/"
                             f"{file_.doc_id}/")
     return "Files added to assignment"