Example #1
0
 def process_data_csv(self, crowdsource):
     """Create the crowdsource data from the uploaded CSV"""
     url_validator = URLValidator()
     data_csv = self.cleaned_data["data_csv"]
     doccloud_each_page = self.cleaned_data["doccloud_each_page"]
     if data_csv:
         reader = csv.reader(codecs.iterdecode(data_csv, "utf-8"))
         headers = [h.lower() for h in next(reader)]
         for line in reader:
             data = dict(list(zip(headers, line)))
             url = data.pop("url", "")
             doc_match = DOCUMENT_URL_RE.match(url)
             proj_match = PROJECT_URL_RE.match(url)
             if doccloud_each_page and doc_match:
                 datum_per_page.delay(crowdsource.pk,
                                      doc_match.group("doc_id"), data)
             elif proj_match:
                 import_doccloud_proj.delay(
                     crowdsource.pk,
                     proj_match.group("proj_id"),
                     data,
                     doccloud_each_page,
                 )
             elif url:
                 # skip invalid URLs
                 try:
                     url_validator(url)
                 except forms.ValidationError:
                     pass
                 else:
                     crowdsource.data.create(url=url, metadata=data)
             else:
                 crowdsource.data.create(metadata=data)
Example #2
0
 def save(self, commit=True, doccloud_each_page=False):
     """Apply special cases to Document Cloud URLs"""
     instances = super(CrowdsourceDataFormset, self).save(commit=False)
     return_instances = []
     for instance in instances:
         doc_match = DOCUMENT_URL_RE.match(instance.url)
         proj_match = PROJECT_URL_RE.match(instance.url)
         if doccloud_each_page and doc_match:
             datum_per_page.delay(
                 self.instance.pk,
                 doc_match.group('doc_id'),
                 {},
             )
         elif proj_match:
             import_doccloud_proj.delay(
                 self.instance.pk,
                 proj_match.group('proj_id'),
                 {},
                 doccloud_each_page,
             )
         else:
             return_instances.append(instance)
             if commit:
                 instance.save()
     return return_instances