def fetch_data_from_url(url, content): data = {"url": url} try: readable = Readability(url, content) data["title"] = reduce_whitespace(unescape_entities(readable.get_article_title())) # Try to get abstract from meta description: abstract = reduce_whitespace(unescape_entities(strip_tags(readable.get_meta_description()).strip())) if not abstract: abstract = reduce_whitespace(unescape_entities(strip_tags(readable.get_article_text()).strip())) abstract = truncate_words(abstract, 200) data["abstract"] = abstract except ReadabilityException: pass if VIDEO_URL_RE.search(url): data["media_formats"] = MediaFormat.objects.filter(name="Video") urls = URL_RE.findall(content) OLD_CC_LICENCES = [l[0] for l in CC_OLD_LICENSES[1:]] for url in urls: if CC_LICENSE_URL_RE.match(url): url = url.lower() if url in OLD_CC_LICENCES: data["license_type"] = "cc-old" data["license_cc_old"] = url else: data["license_type"] = "cc" data["license_cc"] = url return data
def prepare_value(self, value): if not value: return {} if hasattr(value, '_meta'): value = value.serializable_value("url") else: value = value name = License.objects.get_cc_license_name_from_url(value) if CC_LICENSE_URL_RE.match(value) else u"" return dict(url=value, name=name)
def clean_license_cc(self): value = self.cleaned_data["license_cc"] if self.cleaned_data.get("license_type") == "cc": if not value: raise forms.ValidationError(u"You should select the license.") if not CC_LICENSE_URL_RE.match(value): raise forms.ValidationError(u"Invalid license URL.") else: return u"" return value
def issue(request): if request.method != "POST": raise Http404() fields = License.objects.get_cc_issue_fields() answers = {} for field in fields: answers[field["id"]] = request.POST.get("cc-question-%s" % field["id"], u"") response = {"status": "error"} result = License.objects.issue(answers) if result: response["status"] = "success" response.update(result) license_type = CC_LICENSE_URL_RE.search(result["url"]).groupdict()["cc_type"] response["license_classes"] = license_type.split("-") else: response["status"] = "error" response["message"] = u"Unable to get license information from CreativeCommons.org. Try again later." return response
def render(self, name, value, attrs=None): license_url = value.get("url", u"") license_name = value.get("name", u"") derivatives = u"" commercial = u"" r = CC_LICENSE_URL_RE.search(license_url) if r: cc_type = r.groupdict()["cc_type"] if "nd" in cc_type: derivatives = "n" elif "sa" in cc_type: derivatives = "sa" else: derivatives = "y" commercial = "n" if "nc" in cc_type else "y" else: cc_type = "" license_url = u"" license_name = u"" return mark_safe(render_to_string( "authoring/forms/license-widget.html", dict( name=name, cc_type=cc_type, name_widget=forms.HiddenInput().render("%s_name" % name, license_name), url_widget=forms.HiddenInput().render("%s_url" % name, license_url), derivatives_widget=forms.RadioSelect(attrs={"disabled": "disabled"} if self.readonly else None).render( "%s_derivatives" % name, derivatives, choices=self.DERIVATIVES_CHOICES, ), commercial_widget=forms.RadioSelect(attrs={"disabled": "disabled"} if self.readonly else None).render( "%s_commercial" % name, commercial, choices=self.COMMERCIAL_CHOICES, ) ) ))
def post(self, request, *args, **kwargs): self.form = ValidateCSVForm(request.POST, request.FILES) if not self.form.is_valid(): messages.error(request, u"Data is not valid, see below.") return self.get(request, *args, **kwargs) if self.form.validation_errors: self.validation_errors = self.form.validation_errors messages.error(request, u"Data is not valid, see below.") return self.get(request, *args, **kwargs) if "validate" in request.REQUEST: messages.success(request, u"Data appears to be valid.") return self.get(request, *args, **kwargs) model = self.form.model if model == Course: field_name_prefix = "CR_" simple_fields = SIMPLE_FIELDS + COURSE_SIMPLE_FIELDS m2m_fields = M2M_FIELDS + COURSE_M2M_FIELDS elif model == Library: field_name_prefix = "LIB_" simple_fields = SIMPLE_FIELDS + LIBRARY_SIMPLE_FIELDS m2m_fields = M2M_FIELDS + LIBRARY_M2M_FIELDS authors_field = None keywords_field = None for field in model._meta.many_to_many: if field.name == "authors": authors_field = field elif field.name == "keywords": keywords_field = field institution_field = None collection_field = None license_field = None prerequisite_1_field = None prerequisite_2_field = None postrequisite_1_field = None postrequisite_2_field = None derived_from_field = None for field in model._meta.fields: if field.name == "institution": institution_field = field elif field.name == "collection": collection_field = field elif field.name == "license": license_field = field elif field.name == "prerequisite_1": prerequisite_1_field = field elif field.name == "prerequisite_2": prerequisite_2_field = field elif field.name == "postrequisite_1": postrequisite_1_field = field elif field.name == "postrequisite_2": postrequisite_2_field = field elif field.name == "derived_from": derived_from_field = field imported_objects = [] for row_index, row in enumerate(self.form.csv_data): data = {} for field_index, value in enumerate(row): field_name = self.form.header[field_index][len(field_name_prefix):] data[field_name] = value try: check_for_unique_url = True try: obj = model.objects.get(url=data["URL"]) new_url = data.get("NEW_URL") if new_url: obj.url = new_url else: check_for_unique_url = False except MultipleObjectsReturned: self.validation_errors.append( (row_index + 1, u"", u"URL '%s' is registered multiple times, can't find an object to update." % data["URL"]) ) continue except model.DoesNotExist: obj = model(creator=request.user) obj.url = data["URL"] obj.workflow_state = IMPORTED_STATE # Do not re-index the object until the transaction is finished obj.skip_indexing = True if check_for_unique_url and model.objects.filter(url=obj.url).exists(): self.validation_errors.append( (row_index + 1, u"", u"URL '%s' is registered in database already." % obj.url) ) continue for csv_field_name, obj_field_name in simple_fields: if csv_field_name in data: setattr(obj, obj_field_name, data[csv_field_name]) if institution_field and "INSTITUTION" in data: institution_field.save_form_data(obj, dict(name=data["INSTITUTION"])) if collection_field and "COLLECTION" in data: collection_field.save_form_data(obj, dict(name=data["COLLECTION"])) if license_field and ("COU_TITLE" in data or "COU_URL" in data): url = data.get("COU_URL", u"") name = data.get("COU_TITLE", u"") if url: if CC_LICENSE_URL_RE.match(url): name = License.objects.get_cc_license_name_from_url(url) elif PUBLIC_DOMAIN_URL_RE.match(url): name = PUBLIC_DOMAIN_NAME elif GNU_FDL_URL_RE.match(url): name = GNU_FDL_NAME description = data.get("COU_DESCRIPTION", u"") copyright_holder = data.get("COU_COPYRIGHT_HOLDER", u"") license_field.save_form_data(obj, dict(url=url, name=name, description=description, copyright_holder=copyright_holder) ) if prerequisite_1_field and "PREREQ_TITLE1" in data: title = data["PREREQ_TITLE1"] if title: url = data.get("PREREQ_URL1", u"") prerequisite_1_field.save_form_data(obj, dict(title=title, url=url) ) else: obj.prerequisite_1 = None if prerequisite_2_field and "PREREQ_TITLE2" in data: title = data["PREREQ_TITLE2"] if title: url = data.get("PREREQ_URL2", u"") prerequisite_2_field.save_form_data(obj, dict(title=title, url=url) ) else: obj.prerequisite_2 = None if postrequisite_1_field and "POSTREQ_TITLE1" in data: title = data["POSTREQ_TITLE1"] if title: url = data.get("POSTREQ_URL1", u"") postrequisite_1_field.save_form_data(obj, dict(title=title, url=url) ) else: obj.postrequisite_1 = None if postrequisite_2_field and "POSTREQ_TITLE2" in data: title = data["POSTREQ_TITLE2"] if title: url = data.get("POSTREQ_URL2", u"") postrequisite_2_field.save_form_data(obj, dict(title=title, url=url) ) else: obj.postrequisite_2 = None if derived_from_field: if data.get("PARENT_MODIFIED") == False: obj.derived_from = None else: title = data.get("PARENT_TITLE", u"") if not title: obj.derived_from = None else: url = data.get("PARENT_URL", u"") description = data.get("PARENT_CHANGES", u"") derived_from_field.save_form_data(obj, dict(title=title, url=url, description=description) ) obj.save() for csv_field_name, obj_field_name, field_model, field_model_key in m2m_fields: if csv_field_name in data: field = getattr(obj, obj_field_name) field.clear() for value in data[csv_field_name]: field.add(field_model.objects.get(**{field_model_key: value})) if authors_field and "AUTHOR_NAME" in data: author_names = data["AUTHOR_NAME"] author_emails = data.get("AUTHOR_EMAIL", []) author_countries = data.get("AUTHOR_COUNTRY", []) obj.authors.clear() authors_data = [] for i, name in enumerate(author_names): try: email = author_emails[i] except IndexError: email = u"" try: country = Country.objects.get(slug=author_countries[i]) except IndexError: country = None authors_data.append(dict(name=name, email=email, country=country)) authors_field.save_form_data(obj, authors_data) if keywords_field and "KEYWORDS" in data: obj.keywords.clear() keywords_data = [] for name in data["KEYWORDS"]: keywords_data.append(dict(name=name)) keywords_field.save_form_data(obj, keywords_data) imported_objects.append(obj) except: transaction.rollback() if settings.DEBUG: raise self.validation_errors.append( (row_index + 1, u"", unicode(sys.exc_info()[1])) ) if self.validation_errors: transaction.rollback() self.is_valid = False messages.error(request, u"There were some errors, see below.") else: if "dry_run" in request.REQUEST: transaction.rollback() messages.success(request, u"Data appears to be valid. " "It is not imported because 'Dry run' option is selected.") else: transaction.commit() for object in imported_objects: object.skip_indexing = False reindex(object) transaction.commit() messages.success(request, u"Data was imported successfully.") return self.get(request, *args, **kwargs)
def to_python(self, value): if not value: return None if not CC_LICENSE_URL_RE.match(value): raise forms.ValidationError(self.default_error_messages["invalid"]) return dict(url=value, name=License.objects.get_cc_license_name_from_url(value))