def process_data_csv(self, crowdsource): """Create the crowdsource data from the uploaded CSV""" url_validator = URLValidator() data_csv = self.cleaned_data["data_csv"] doccloud_each_page = self.cleaned_data["doccloud_each_page"] if data_csv: reader = csv.reader(codecs.iterdecode(data_csv, "utf-8")) headers = [h.lower() for h in next(reader)] for line in reader: data = dict(list(zip(headers, line))) url = data.pop("url", "") doc_match = DOCUMENT_URL_RE.match(url) proj_match = PROJECT_URL_RE.match(url) if doccloud_each_page and doc_match: datum_per_page.delay(crowdsource.pk, doc_match.group("doc_id"), data) elif proj_match: import_doccloud_proj.delay( crowdsource.pk, proj_match.group("proj_id"), data, doccloud_each_page, ) elif url: # skip invalid URLs try: url_validator(url) except forms.ValidationError: pass else: crowdsource.data.create(url=url, metadata=data) else: crowdsource.data.create(metadata=data)
def save(self, commit=True, doccloud_each_page=False): """Apply special cases to Document Cloud URLs""" instances = super(CrowdsourceDataFormset, self).save(commit=False) return_instances = [] for instance in instances: doc_match = DOCUMENT_URL_RE.match(instance.url) proj_match = PROJECT_URL_RE.match(instance.url) if doccloud_each_page and doc_match: datum_per_page.delay( self.instance.pk, doc_match.group('doc_id'), {}, ) elif proj_match: import_doccloud_proj.delay( self.instance.pk, proj_match.group('proj_id'), {}, doccloud_each_page, ) else: return_instances.append(instance) if commit: instance.save() return return_instances
def _crowdsource_base(self, foias, user, post, split): """Helper function for both crowdsource actions""" foias = foias.prefetch_related('communications__files') foias = [f for f in foias if f.has_perm(user, 'view')] form = CrowdsourceChoiceForm(post, user=user) if form.is_valid(): crowdsource = form.cleaned_data['crowdsource'] if crowdsource is None: return 'No crowdsource selected' for foia in foias: for comm in foia.communications.all(): for file_ in comm.files.all(): if file_.doc_id and split: datum_per_page.delay( crowdsource.pk, file_.doc_id, {}, ) elif file_.doc_id and not split: crowdsource.data.create( url= 'https://www.documentcloud.org/documents/{}.html'. format(file_.doc_id) ) return 'Files added to assignment'
def _crowdsource_base(self, foias, user, post, split): """Helper function for both crowdsource actions""" foias = foias.prefetch_related("communications__files") foias = [f for f in foias if f.has_perm(user, "view")] form = CrowdsourceChoiceForm(post, user=user) if form.is_valid(): crowdsource = form.cleaned_data["crowdsource"] if crowdsource is None: return "No crowdsource selected" for foia in foias: for comm in foia.communications.all(): for file_ in comm.files.all(): if file_.doc_id and split: datum_per_page.delay(crowdsource.pk, file_.doc_id, {}) elif file_.doc_id and not split: crowdsource.data.create( url="https://beta.documentcloud.org/documents/" f"{file_.doc_id}/") return "Files added to assignment"