def process_data_csv(self, crowdsource): """Create the crowdsource data from the uploaded CSV""" url_validator = URLValidator() data_csv = self.cleaned_data["data_csv"] doccloud_each_page = self.cleaned_data["doccloud_each_page"] if data_csv: reader = csv.reader(codecs.iterdecode(data_csv, "utf-8")) headers = [h.lower() for h in next(reader)] for line in reader: data = dict(list(zip(headers, line))) url = data.pop("url", "") doc_match = DOCUMENT_URL_RE.match(url) proj_match = PROJECT_URL_RE.match(url) if doccloud_each_page and doc_match: datum_per_page.delay(crowdsource.pk, doc_match.group("doc_id"), data) elif proj_match: import_doccloud_proj.delay( crowdsource.pk, proj_match.group("proj_id"), data, doccloud_each_page, ) elif url: # skip invalid URLs try: url_validator(url) except forms.ValidationError: pass else: crowdsource.data.create(url=url, metadata=data) else: crowdsource.data.create(metadata=data)
def save(self, commit=True, doccloud_each_page=False): """Apply special cases to Document Cloud URLs""" instances = super(CrowdsourceDataFormset, self).save(commit=False) return_instances = [] for instance in instances: doc_match = DOCUMENT_URL_RE.match(instance.url) proj_match = PROJECT_URL_RE.match(instance.url) if doccloud_each_page and doc_match: datum_per_page.delay( self.instance.pk, doc_match.group('doc_id'), {}, ) elif proj_match: import_doccloud_proj.delay( self.instance.pk, proj_match.group('proj_id'), {}, doccloud_each_page, ) else: return_instances.append(instance) if commit: instance.save() return return_instances