def clean(self): ret = super().clean() if "file" not in ret: return ret # has to raise ValidationError get_settings_func("SPIDER_UPLOAD_FILTER", "spkcspider.apps.spider.functions.allow_all_filter")( self.request, ret["file"], self) return ret
def dispatch_extra(self, request, *args, **kwargs): _ = gettext context = self.get_context_data() # fallback to intentions if no request_intentions # remove "domain", "sl" context["intentions"] = set( self.object.extra.get("request_intentions", self.object.extra.get("intentions", []))) # sl or domain should NOT be allowed for token updates # sl is only for clients, domain only initial and should be removed context["intentions"].difference_update({"domain", "sl"}) context["action"] = "update" context["uc"] = self.object.usercomponent rreferrer = request.POST.get("referrer", None) if rreferrer: context["referrer"] = merge_get_url(rreferrer) if not get_settings_func( "SPIDER_URL_VALIDATOR", "spkcspider.apps.spider.functions.validate_url_default")( context["referrer"], self): context["action"] = "referrer_invalid" # for donotact if self.object.referrer and self.object.referrer.url == rreferrer: rreferrer = None else: rreferrer = self.object.extra.get("request_referrer", None) if rreferrer: context["referrer"] = merge_get_url(rreferrer) if not get_settings_func( "SPIDER_URL_VALIDATOR", "spkcspider.apps.spider.functions.validate_url_default" )(context["referrer"], self): return HttpResponse(status=400, content=_('Insecure url: %(url)s') % {"url": context["referrer"]}) elif self.object.referrer: context["referrer"] = self.object.referrer.url else: context["referrer"] = "" context["ids"] = self.object.usercomponent.contents.values_list( "id", flat=True) context["ids"] = set(context["ids"]) # if requested referrer is available DO delete invalid and DO care ret = self.handle_referrer_request(context, self.object, dontact=not rreferrer, no_oldtoken=True) if isinstance(ret, HttpResponseRedirect): if context.get("post_success", False): messages.success(request, _("Intention update successful")) else: messages.error(request, _("Intention update failed")) return HttpResponseRedirect( self.get_redirect_url(context["sanitized_GET"])) return ret
def post(self, request, *args, **kwargs): if "payload" in request.POST: try: payload = parse_qs(request.POST["payload"]) # check if token parameter exists request.POST["token"] except Exception: return HttpResponse(400) ob = VerifySourceObject.objects.filter( url=payload.get("url", [None])[0], token=payload.get("update_secret", ["x"])[0]).first() if ob: GET = parse_qs(ob.get_params) GET["token"] = request.POST["token"] ob.get_params = urlencode(GET, doseq=True) ob.token = None ob.save(update_fields=["get_params", "token"]) return HttpResponse(200) return HttpResponse(404) form = self.get_form() if get_settings_func( "VERIFIER_REQUEST_VALIDATOR", "spkcspider.apps.verifier.functions.validate_request_default")( self.request, form): return self.form_valid(form) else: return self.form_invalid(form)
def calculate_allowed_content(self): ContentVariant = apps.get_model("spider_base.ContentVariant") allowed = [] cfilterfunc = get_settings_func( "SPIDER_CONTENTVARIANT_FILTER", "spkcspider.apps.spider.functions.allow_all_filter" ) # Content types which are not "installed" should be removed/never used # unlisted can be removed as sideproduct if not specified with feature # or machine for variant in ContentVariant.objects.exclude( ~( models.Q(ctype__contains=VariantType.content_feature) | models.Q(ctype__contains=VariantType.component_feature) | models.Q(ctype__contains=VariantType.machine) ), ctype__contains=VariantType.unlisted ).filter( code__in=registry.contents.keys() ): # always include special variants # elsewise unnecessary recalculations are done and other bugs if variant.name in {"DomainMode", "DefaultActions"}: allowed.append(variant) elif cfilterfunc(self.user, variant): allowed.append(variant) # save not required, m2m field self.allowed_content.set(allowed)
def dispatch(self, request, *args, **kwargs): try: return super().dispatch(request, *args, **kwargs) except Http404: return get_settings_func( "SPIDER_RATELIMIT_FUNC", "spkcspider.apps.spider.functions.rate_limit_default" )(request, self)
def get_quota(self, quota_type): quota = get_settings_func( "SPIDER_GET_QUOTA", "spkcspider.apps.spider.functions.get_quota" )(self.user, quota_type) if quota is None: return math.inf return quota
def handle_referrer(self): _ = gettext if (self.request.user != self.usercomponent.user and not self.request.auth_token): if self.request.user.is_authenticated: return self.handle_no_permission() return HttpResponseRedirect(redirect_to="{}?{}={}".format( self.get_login_url(), REDIRECT_FIELD_NAME, quote_plus( merge_get_url(self.request.build_absolute_uri(), token=None)))) context = self.get_context_data() context["action"] = "create" context["intentions"] = set(self.request.GET.getlist("intention")) if "referrer" in self.request.POST: context["referrer"] = merge_get_url(self.request.POST["referrer"]) if not get_settings_func( "SPIDER_URL_VALIDATOR", "spkcspider.apps.spider.functions.validate_url_default")( context["referrer"], self): context["action"] = "referrer_invalid" else: context["referrer"] = merge_get_url(self.request.GET["referrer"]) if not get_settings_func( "SPIDER_URL_VALIDATOR", "spkcspider.apps.spider.functions.validate_url_default")( context["referrer"], self): return HttpResponse(status=400, content=_('Insecure url: %(url)s') % {"url": context["referrer"]}) context["payload"] = self.request.GET.get("payload", None) token = self.request.auth_token if not token: token = AuthToken(usercomponent=self.usercomponent, extra={ "strength": 10, "taint": False }) if "domain" in context["intentions"]: return self.handle_domain_auth(context, token) else: context["ids"] = set(self.object_list.values_list("id", flat=True)) context["search"] = set(self.request.POST.getlist("search")) return self.handle_referrer_request(context, token)
def dispatch(self, request, *args, **kwargs): _ = gettext try: self.object = self.get_object() if not self.object.active: messages.success(self.request, _('Protection not active')) return redirect("home") return super().dispatch(request, *args, **kwargs) except Http404: return get_settings_func( "SPIDER_RATELIMIT_FUNC", "spkcspider.apps.spider.functions.rate_limit_default")(request, self)
def get_abilities(self, context): _abilities = set() if (context["request"].auth_token and context["request"].auth_token.referrer): if get_settings_func( "SPIDER_TAG_VERIFIER_VALIDATOR", "spkcspider.apps.spider.functions.clean_verifier")( context["request"], self): _abilities.add("verify") if self.updateable_by.filter( id=context["request"].auth_token.referrer_id).exists(): _abilities.add("push_update") return _abilities
def get_requests_params(url): _url = host_tld_matcher.match(url) if not _url: raise ValidationError(_("Invalid URL: \"%(url)s\""), code="invalid_url", params={"url": url}) _url = _url.groupdict() mapper = getattr(settings, "VERIFIER_REQUEST_KWARGS_MAP", settings.SPIDER_REQUEST_KWARGS_MAP) return ( mapper.get( _url["host"], mapper.get( _url["tld"], # maybe None but then fall to retrieval 3 mapper[b"default"])), get_settings_func( "VERIFIER_INLINE", "SPIDER_INLINE", "spkcspider.apps.spider.functions.clean_spider_inline", exclude=frozenset({True}))(_url["host"]))
def clean(self): ret = super().clean() if not ret.get("url", None) and not ret.get("dvfile", None): raise forms.ValidationError(_('Require either url or dvfile'), code="missing_parameter") if ret.get("url", None): self.cleaned_data["url"] = merge_get_url(self.cleaned_data["url"], raw="embed") url = self.cleaned_data["url"] if not get_settings_func( "SPIDER_URL_VALIDATOR", "spkcspider.apps.spider.functions.validate_url_default")( url): self.add_error( "url", forms.ValidationError(_('invalid url: %(url)s'), params={"url": url}, code="invalid_url")) return ret return ret
def map_data(self, name, field, data, graph, context): if isinstance(data, File): return get_settings_func( "SPIDER_FILE_EMBED_FUNC", "spkcspider.apps.spider.functions.embed_file_default" )(name, data, self, context) ret = literalize(data, field, domain_base=context["hostpart"]) if isinstance(ret, dict): base = ret["ref"] if ret["type"]: graph.add((base, RDF["type"], ret["type"])) # create subproperties for key, val in ret["items"].items(): value_node = add_property(graph, key, ref=base, literal=val) graph.add(( value_node, spkcgraph["hashable"], Literal(is_hashable(field, key)) )) return base return ret
def verify_tag(tag, hostpart=None, ffrom="sync_call", task=None): """ for auto validation or hooks""" if not hostpart: hostpart = get_anchor_domain() if task: task.update_state( state='VERIFY' ) if get_settings_func( "VERIFIER_TAG_VERIFIER", "spkcspider.apps.verifier.functions.verify_tag_default" )(tag, hostpart, ffrom): try: tag.callback(hostpart) except exceptions.ValidationError: logger.exception("Error while calling back") if task: task.update_state( state='SUCCESS' )
from spkcspider.apps.spider.views import ComponentPublicIndex from spkcspider.utils.settings import get_settings_func favicon_view = RedirectView.as_view(url='{}spider_base/favicon.svg'.format( settings.STATIC_URL), permanent=True) robots_view = RedirectView.as_view(url='{}spider_base/robots.txt'.format( settings.STATIC_URL), permanent=True) # disable admin login page admin.site.login = lambda *args, **kwargs: admin_login(admin.site, *args, ** kwargs) # default: allow only non faked user with superuser and staff permissions admin.site.has_permission = lambda *args, **kwargs: get_settings_func( "HAS_ADMIN_PERMISSION_FUNC", "spkcspider.apps.spider.functions.has_admin_permission")(admin.site, *args, **kwargs) urlpatterns = [ path('admin/', admin.site.urls), path('', ComponentPublicIndex.as_view(is_home=True, template_name="spider_base/home.html"), name="home"), path('i18n/', include('django.conf.urls.i18n')) ] for app in apps.get_app_configs(): url_path = getattr(app, "spider_url_path", None) if not url_path: continue
def validate(ob, hostpart, task=None, info_filters=None): dvfile = None source = None if not info_filters: info_filters = [] info_filters = set(info_filters) g = Graph() g.namespace_manager.bind("spkc", spkcgraph, replace=True) with requests.session() as session: view_url = None if isinstance(ob, tuple): current_size = ob[1] with open(ob[0], "rb") as f: retrieve_object(f, [current_size], graph=g, session=session) if ob[2]: try: os.unlink(ob[0]) except FileNotFoundError: pass else: current_size = 0 view_url = ob retrieve_object(ob, [current_size], graph=g, session=session) tmp = list(g.query( """ SELECT ?base ?scope ?pages ?view WHERE { ?base spkc:scope ?scope ; spkc:pages.num_pages ?pages ; spkc:pages.current_page ?current_page ; spkc:action:view ?view . } """, initNs={"spkc": spkcgraph}, initBindings={ "current_page": Literal(1, datatype=XSD.positiveInteger) } )) if len(tmp) != 1: raise exceptions.ValidationError( _('Invalid graph'), code="invalid_graph" ) tmp = tmp[0] start = tmp.base pages = tmp.pages.toPython() # scope = tmp.scope.toPython() if not view_url: view_url = tmp.view.toPython() # normalize view_url splitted = view_url.split("?", 1) if len(splitted) != 2: splitted = [splitted[0], ""] # update/create source object source = VerifySourceObject.objects.update_or_create( url=splitted[0], defaults={"get_params": splitted[1]} )[0] # create internal info filter (only those contents are checked) # differs from info field _filter_info = "" for filt in info_filters: fchar = filt[0] if fchar == "\x1e": _filter_info = \ f"{_filter_info}FILTER CONTAINS(?info, {filt})\n" elif fchar == "!" and not filt[1] == "!": _filter_info = \ f"{_filter_info}FILTER NOT CONTAINS(?info, {filt[1:]})\n" else: raise ValueError("Invalid filter") if task: task.update_state( state='RETRIEVING', meta={ 'page': 1, 'num_pages': pages } ) # retrieve further pages for page in range(2, pages+1): url = merge_get_url( view_url, raw="embed", page=str(page) ) retrieve_object(url, [current_size], graph=g, session=session) if task: task.update_state( state='RETRIEVING', meta={ 'page': page, 'num_pages': pages } ) # check and clean graph data_type = get_settings_func( "VERIFIER_CLEAN_GRAPH", "spkcspider.apps.verifier.functions.clean_graph" )(g, start, source, hostpart) if not data_type: raise exceptions.ValidationError( _('Invalid graph (Verification failed)'), code="graph_failed" ) g.remove((None, spkcgraph["csrftoken"], None)) hashable_nodes = g.query( f""" SELECT DISTINCT ?base ?info ?type ?name ?value WHERE {{ ?base spkc:type ?type ; spkc:properties ?pinfo, ?pval . ?pinfo spkc:name "info"^^xsd:string ; spkc:value ?info . ?pval spkc:hashable "true"^^xsd:boolean ; spkc:name ?name ; spkc:value ?value . OPTIONAL {{ ?value spkc:properties ?prop2 ; spkc:name "info"^^xsd:string ; spkc:value ?val_info . }} {_filter_info} }} """, initNs={"spkc": spkcgraph} ) if task: task.update_state( state='HASHING', meta={ 'hashable_nodes_checked': 0 } ) # make sure triples are linked to start # (user can provide arbitary data) g.remove((start, spkcgraph["hashed"], None)) # MAYBE: think about logic for incoperating hashes g.remove((start, spkcgraph["hash"], None)) nodes = {} resources_with_hash = {} for count, val in enumerate(hashable_nodes, start=1): if isinstance(val.value, URIRef): assert(val.val_info) h = get_hashob() # should always hash with xsd.string h.update(XSD.string.encode("utf8")) # don't strip id, as some contents seperate only by id h.update(str(val.val_info).encode("utf8")) _hash = h.finalize() elif val.value.datatype == spkcgraph["hashableURI"]: _hash = resources_with_hash.get(val.value.value) if not _hash: url = merge_get_url(val.value.value, raw="embed") if not get_settings_func( "SPIDER_URL_VALIDATOR", "spkcspider.apps.spider.functions.validate_url_default" )(url): raise exceptions.ValidationError( _('invalid url: %(url)s'), params={"url": url}, code="invalid_url" ) _hash = retrieve_object( url, [current_size], session=session ) # do not use add as it could be corrupted by user # (user can provide arbitary data) _uri = URIRef(val.value.value) g.set(( _uri, spkcgraph["hash"], Literal(_hash.hex()) )) else: h = get_hashob() if val.value.datatype == XSD.base64Binary: h.update(val.value.datatype.encode("utf8")) h.update(val.value.toPython()) elif val.value.datatype: h.update(val.value.datatype.encode("utf8")) h.update(val.value.encode("utf8")) else: h.update(XSD.string.encode("utf8")) h.update(val.value.encode("utf8")) _hash = h.finalize() h = get_hashob() h.update(val.name.toPython().encode("utf8")) h.update(_hash) base = str(val.base) nodes.setdefault(base, ([], val.type)) nodes[base][0].append(h.finalize()) if task: task.update_state( state='HASHING', meta={ 'hashable_nodes_checked': count } ) if task: task.update_state( state='HASHING', meta={ 'hashable_nodes_checked': "all" } ) # first sort hashes per node and create hash over sorted hashes # de-duplicate super-hashes (means: nodes are identical) hashes = set() for val, _type in nodes.values(): h = get_hashob() for _hob in sorted(val): h.update(_hob) h.update(_type.encode("utf8")) hashes.add(h.finalize()) # then create hash over sorted de-duplicated node hashes h = get_hashob() for i in sorted(hashes): h.update(i) # do not use add as it could be corrupted by user # (user can provide arbitary data) digest = h.finalize().hex() g.set(( start, spkcgraph["hash"], Literal(digest) )) with tempfile.NamedTemporaryFile(delete=True) as dvfile: # save in temporary file g.serialize( dvfile, format="turtle" ) result, created = DataVerificationTag.objects.get_or_create( defaults={ "dvfile": File(dvfile), "source": source, "data_type": data_type }, hash=digest ) update_fields = set() # and source, cannot remove source without replacement if not created and source and source != result.source: result.source = source update_fields.add("source") if data_type != result.data_type: result.data_type = data_type update_fields.add("data_type") result.save(update_fields=update_fields) verify_tag(result, task=task, ffrom="validate") if task: task.update_state( state='SUCCESS' ) return result
def get_component_quota(self): return get_settings_func( "SPIDER_GET_QUOTA", "spkcspider.apps.spider.functions.get_quota" )(self.user, "usercomponents")