def get(self, request, root=None, relations=None, template='graphviz/graphviz', depth=4, max_depth=5, exclude_types=None, properties=None, inverted=None, minimal=None): root = expand(root or request.GET.get('root', '')) relations = relations or [expand(relation) for relation in request.GET.getlist('relation')] exclude_types = exclude_types or [expand(t) for t in request.GET.getlist('exclude_type')] properties = properties or [expand(p) for p in request.GET.getlist('property')] try: depth = min(int(request.GET.get('depth', depth)), max_depth) except (TypeError, ValueError): return HttpResponseBadRequest() inverted = inverted if (inverted is not None) else request.GET.get('inverted') == 'true' minimal = minimal if (minimal is not None) else request.GET.get('minimal') == 'true' if inverted: subj, obj = '?entity', root.n3() relation_pattern = '?entity ?relation ?parent' else: subj, obj = root.n3(), '?entity' relation_pattern = '?parent ?relation ?entity' types = self.get_types(root) if not types or not relations: raise Http404 page_uri = rdflib.URIRef(request.build_absolute_uri()) query = self._QUERY % {'subject': subj, 'object': obj, 'depth': depth, 'relationList': ', '.join(r.n3() for r in relations), 'excludeTypesFilter': ('FILTER (?type not in (%s)) .' % ', '.join(t.n3() for t in exclude_types)) if exclude_types else '', 'relationAlternation': '|'.join(r.n3() for r in relations), 'relationPattern': relation_pattern, 'page_uri': page_uri.n3(), 'propertyPatterns': '\n '.join('OPTIONAL { ?entity %s ?p%s } .' % (p.n3(), i) for i, p in enumerate(properties)), 'propertyTriples': ''.join(';\n %s ?p%s' % (p.n3(), i) for i, p in enumerate(properties)) } graph = self.endpoint.query(query) context = { 'graph': graph, 'queries': [graph.query], 'subjects': [Resource(s, graph, self.endpoint) for s in set(graph.objects(page_uri, NS['foaf'].topic))], 'subject': Resource(root, graph, self.endpoint), 'inverted': inverted, 'relations': relations, 'minimal': minimal, } for subject in context['subjects']: if not inverted: subject.children = set(Resource(s, graph, self.endpoint) for relation in relations for s in graph.objects(subject._identifier, relation)) else: subject.children = set(Resource(s, graph, self.endpoint) for relation in relations for s in graph.subjects(relation, subject._identifier)) for child in subject.children: if (page_uri, NS['foaf'].topic, child._identifier) in graph: child.display = True return self.render(request, context, template)
class JournalArticle(object): template_name = 'doc/article' _ABSTRACT_URI = expand('fabio:Abstract') _REFERENCE_LIST_URI = expand('biro:REFERENCE_LIST') @classmethod def _describe_patterns(cls): return [ '%(uri)s frbr:part %(n0)s . %(n0)s a fabio:Abstract', '%(uri)s frbr:part %(n0)s . %(n0)s a biro:ReferenceList ; collections:item %(n1)s', '%(uri)s pro:isRelatedToRoleInTime %(n0)s . %(n1)s pro:holdsRoleInTime %(n0)s', '%(n0)s frbr:part+ %(uri)s', ] @classmethod def _construct_patterns(cls): return [ '%(uri)s cito:cites %(cited)s . %(cited)s a %(citedType)s ; dcterms:title %(citedTitle)s', '%(citedBy)s cito:cites %(uri)s . %(citedBy)s a %(citedByType)s ; dcterms:title %(citedByTitle)s', ('%(sup)s a %(supType)s ; frbr:part %(supPart)s ; dcterms:title %(supTitle)s', '%(sup)s frbr:part+ %(uri)s ; a %(supType)s ; frbr:part %(supPart)s . OPTIONAL { %(sup)s dcterms:title %(supTitle)s }' ), ] def _part_by_type(self, type): for part in self.all.frbr_part: if type in (t._identifier for t in part.all.rdf_type): return part return None abstract = property(lambda self: self._part_by_type(self._ABSTRACT_URI)) reference_list = property( lambda self: self._part_by_type(self._REFERENCE_LIST_URI)) @property def identifiers(self): ret = [] if self.prism_doi: ret.append(('DOI', self.prism_doi, 'http://dx.doi.org/%s' % self.prism_doi)) if self.fabio_hasPubMedId: ret.append(('PMID', self.fabio_hasPubMedId, 'http://www.ncbi.nlm.nih.gov/pubmed/%s' % self.fabio_hasPubMedId)) if self.fabio_hasPubMedCentralId: ret.append( ('PMC', self.fabio_hasPubMedCentralId, 'http://www.ncbi.nlm.nih.gov/sites/ppmc/articles/PMC%s/' % self.fabio_hasPubMedCentralId)) return ret
def __init__(self, *args): self._registry = defaultdict(set) for arg in args: if isinstance(arg, (list, tuple)): klass, types = arg[0], arg[1:] else: klass, types = arg, None klass = self._get_object(klass) if types is None: types = klass.types for t in types: self._registry[expand(t)].add(klass)
def _find_inner(graph, subject, predicates, datatypes=None, all=False): coerce = lambda o: unicode(o) if isinstance(o, rdflib.Literal) else o objects = set() for predicate in predicates: if predicate.startswith('^'): objects |= set(graph.subjects(expand(predicate[1:]), subject)) else: objects |= set(graph.objects(subject, expand(predicate))) objects = list(objects) if datatypes: for datatype in datatypes: found = [o for o in objects if isinstance(o, rdflib.Literal) and o.datatype == datatype] if found and all: return map(coerce, found) elif found: return coerce(found[0]) else: if all: return [] elif all: return map(coerce, objects) elif objects: return coerce(objects[0])
class RoleInTime(object): _LABEL_MAP = { 'pro:author': 'authorship of %s', 'pro:editor': 'editorship of %s', 'pro:peer-reviewer': 'peer-reviewing of %s', 'pro:translator': 'role as translator of %s', } _LABEL_MAP = dict((expand(qname), label_format) for qname, label_format in _LABEL_MAP.items()) @property def label(self): role_type = self._LABEL_MAP.get(self.pro_withRole._identifier) if role_type: return role_type % self.pro_holdsRoleInTime_inv.rdfs_label else: return super(RoleInTime, self).label @classmethod def _describe_patterns(cls): return [ '%(n0)s pro:isRelatedToRoleInTime %(uri)s', '%(n0)s pro:holdsRoleInTime %(uri)s', ]
def get_query(self, parameters, cleaned_data, start, page_size): default_operator = parameters.get('default_operator', '').upper() if default_operator not in ('AND', 'OR'): default_operator = 'AND' query = { 'query': {'query_string': {'query': cleaned_data['q'], 'default_operator': default_operator}}, 'from': start, 'size': page_size, # A blank conjunctive filter. We'll remove this later if necessary. 'filter': {'and': []}, } # Parse query parameters of the form 'FTYPE.FIELDNAME'. filter_fields = set() for key, values in self.request.GET.lists(): if '.' not in key: continue ftype, field = key.split('.', 1) filters = [] for value in values: if not value: continue if ftype == 'filter': if value == '-': filter = {'missing': {'field': field}} else: if field.endswith('.uri') and ':' in value: value = expand(value) filter = {'term': {field: value}} elif ftype == 'not': if field.endswith('.uri') and ':' in value: value = expand(value) filter = {'not': {'term': {field: value}}} elif ftype in ('gt', 'gte', 'lt', 'lte'): if value == 'now': value = int(calendar.timegm(time.gmtime()) * 1000) filter = {'range': {field : {ftype: value}}} else: continue filters.append(filter) if len(filters) == 1: query['filter']['and'].append(filters[0]) elif len(filters) > 1: query['filter']['and'].append({'or': filters}) else: continue filter_fields.add(field) if self.facets: # Copy the facet definitions as we'll be playing with them shortly. facets = copy.deepcopy(self.facets) # Add facet filters for all active filters except any acting on this # particular facet. if 'filter' in query: for facet in facets.itervalues(): for filter in query['filter']['and']: if facet['terms']['field'] not in filter_fields: if 'facet_filter' not in facet: facet['facet_filter'] = {'and': []} facet['facet_filter']['and'].append(filter) query['facets'] = facets # If default_types set, add a filter to restrict the results. if self.default_types and 'type' not in self.request.GET: query['filter']['and'].append({'or': [{'type': {'value': t}} for t in self.default_types]}) if not query['filter']['and']: del query['filter']['and'] if not query['filter']: del query['filter'] return query
def get(self, request, root=None, base_type=None, graph=None, relations=None, template='graphviz/graphviz', depth=4, max_depth=5, excluded_types=None, properties=None, inverted=None, minimal=None): make_uriref = lambda uri: expand(uri) if uri else None root = make_uriref(root or request.GET.get('root')) base_type = make_uriref(base_type or request.GET.get('base_type')) graph = make_uriref(graph or request.GET.get('graph')) relations = relations or [ expand(relation) for relation in request.GET.getlist('relation') ] inverted = inverted if ( inverted is not None) else request.GET.get('inverted') == 'true' minimal = minimal if ( minimal is not None) else request.GET.get('minimal') == 'true' if not relations: raise Http404 if inverted: relation_pattern = '?entity ?relation ?parent' else: relation_pattern = '?parent ?relation ?entity' if root and base_type: raise HttpBadRequest elif root: if not self.get_types(root): raise Http404 if inverted: subj, obj = '?entity', root.n3() else: subj, obj = root.n3(), '?entity' try: depth = min(int(request.GET.get('depth', depth)), max_depth) except (TypeError, ValueError): return HttpResponseBadRequest() selector = self.tree_selector.format( subject=subj, object=obj, depth=depth, relationAlternation='|'.join(r.n3() for r in relations)) elif base_type: selector = self.type_selector.format( graph=graph.n3() if graph else '?graph', baseType=base_type.n3()) excluded_types = excluded_types or [ expand(t) for t in request.GET.getlist('exclude_type') ] properties = properties or [ expand(p) for p in request.GET.getlist('property') ] page_uri = rdflib.URIRef(request.build_absolute_uri()) query = self.query.format( selector=selector, relations=' '.join(r.n3() for r in relations), excludedTypes=' '.join(t.n3() for t in excluded_types), relationPattern=relation_pattern, page_uri=page_uri.n3(), propertyPatterns='\n '.join( 'OPTIONAL { ?entity %s ?p%s } .' % (p.n3(), i) for i, p in enumerate(properties)), propertyTriples=''.join(';\n %s ?p%s' % (p.n3(), i) for i, p in enumerate(properties))) graph = self.endpoint.query(query) subjects = [ Resource(s, graph, self.endpoint) for s in set(graph.objects(page_uri, NS['foaf'].topic)) ] subjects.sort(key=lambda s: s.label) subject = Resource(root, graph, self.endpoint) if root else None context = { 'graph': graph, 'queries': [graph.query], 'subjects': subjects, 'subject': subject, 'inverted': inverted, 'relations': relations, 'minimal': minimal, 'filename_base': slugify(subject.label if subject else 'graphviz')[:32] } for subject in subjects: if not inverted: subject.children = set( Resource(s, graph, self.endpoint) for relation in relations for s in graph.objects(subject._identifier, relation)) else: subject.children = set( Resource(s, graph, self.endpoint) for relation in relations for s in graph.subjects(relation, subject._identifier)) for child in subject.children: if (page_uri, NS['foaf'].topic, child._identifier) in graph: child.display = True return self.render(request, context, template)
from celery.task import task import dateutil.parser from django.conf import settings import rdflib import pytz from humfrey.utils.namespaces import NS, expand, HUMFREY from humfrey.sparql.endpoint import Endpoint from humfrey.sparql.utils import get_labels, label_predicates from humfrey.streaming import parse, serialize from humfrey.update.uploader import Uploader from humfrey.signals import update_completed DATASET_NOTATION = getattr(settings, 'DATASET_NOTATION', None) if DATASET_NOTATION: DATASET_NOTATION = expand(DATASET_NOTATION) GRAPH_BASE = getattr(settings, 'GRAPH_BASE', None) SOURCE_DIRECTORY = getattr(settings, 'SOURCE_DIRECTORY', None) SOURCE_URL = getattr(settings, 'SOURCE_URL', None) logger = logging.getLogger(__name__) class DatasetArchiver(object): def __init__(self, store, dataset, notation, updated): self.store = store self.dataset = dataset self.notation = notation self.updated = updated.replace(microsecond=0) self.endpoint = Endpoint(store.query_endpoint)
def get_results(self, parameters, cleaned_data): page = cleaned_data.get('page') or 1 page_size = cleaned_data.get('page_size') or self.page_size start = (page - 1) * page_size query = { 'query': {'query_string': {'query': cleaned_data['q'], 'default_operator': 'AND'}}, 'from': start, 'size': page_size, # A blank conjunctive filter. We'll remove this later if necessary. 'filter': {'and': []}, } # Parse query parameters of the form 'filter.FIELDNAME'. for key in list(parameters): parameter = parameters[key] if key.startswith('filter.'): if not parameter: del parameters[key] continue elif parameter == '-': filter = {'missing': {'field': key[7:]}} else: if key.endswith('.uri') and ':' in parameter: parameter = expand(parameter) filter = {'term': {key[7:]: parameter}} query['filter']['and'].append(filter) # If there aren't any filters defined, we don't want a filter part of # our query. if not query['filter']['and']: del query['filter']['and'] if not query['filter']: del query['filter'] if self.facets: # Copy the facet definitions as we'll be playing with them shortly. facets = copy.deepcopy(self.facets) # Add facet filters for all active filters except any acting on this # particular facet. if 'filter' in query: for facet in facets.itervalues(): for filter in query['filter']['and']: if facet['terms']['field'] not in (filter.get('term') or filter['missing']): if 'facet_filter' not in facet: facet['facet_filter'] = {'and': []} facet['facet_filter']['and'].append(filter) query['facets'] = facets response = urllib2.urlopen(self.search_url, json.dumps(query)) results = self.Deunderscorer(json.load(response)) results.update(self.get_pagination(page_size, page, start, results)) results['q'] = cleaned_data['q'] facet_labels = set() for key in query['facets']: meta = results['facets'][key]['meta'] = query['facets'][key] filter_value = parameters.get('filter.%s' % query['facets'][key]['terms']['field']) results['facets'][key]['filter'] = {'present': filter_value is not None, 'value': filter_value} if meta['terms']['field'].endswith('.uri'): for term in results['facets'][key]['terms']: facet_labels.add(term['term']) term['value'] = contract(term['term']) else: for term in results['facets'][key]['terms']: term['value'] = term['term'] labels = get_labels(facet_labels, endpoint=self.endpoint) for key in query['facets']: if results['facets'][key]['meta']['terms']['field'].endswith('.uri'): for term in results['facets'][key]['terms']: uri = URIRef(term['term']) if uri in labels: term['label'] = unicode(labels[uri]) for hit in results['hits']['hits']: try: hit['_url'] = doc_forwards(hit['_source']['uri'])[None] except KeyError: raise return results
def get_query(self, parameters, cleaned_data, start, page_size): default_operator = parameters.get('default_operator', '').upper() if default_operator not in ('AND', 'OR'): default_operator = 'AND' query = { 'query': { 'query_string': { 'query': cleaned_data['q'], 'default_operator': default_operator } }, 'from': start, 'size': page_size, # A blank conjunctive filter. We'll remove this later if necessary. 'filter': { 'and': [] }, } # Parse query parameters of the form 'FTYPE.FIELDNAME'. filter_fields = set() for key, values in self.request.GET.lists(): if '.' not in key: continue ftype, field = key.split('.', 1) filters = [] for value in values: if not value: continue if ftype == 'filter': if value == '-': filter = {'missing': {'field': field}} else: if field.endswith('.uri') and ':' in value: value = expand(value) filter = {'term': {field: value}} elif ftype == 'not': if field.endswith('.uri') and ':' in value: value = expand(value) filter = {'not': {'term': {field: value}}} elif ftype in ('gt', 'gte', 'lt', 'lte'): if value == 'now': value = int(calendar.timegm(time.gmtime()) * 1000) filter = {'range': {field: {ftype: value}}} else: continue filters.append(filter) if len(filters) == 1: query['filter']['and'].append(filters[0]) elif len(filters) > 1: query['filter']['and'].append({'or': filters}) else: continue filter_fields.add(field) if self.facets: # Copy the facet definitions as we'll be playing with them shortly. facets = copy.deepcopy(self.facets) # Add facet filters for all active filters except any acting on this # particular facet. if 'filter' in query: for facet in facets.itervalues(): for filter in query['filter']['and']: if facet['terms']['field'] not in filter_fields: if 'facet_filter' not in facet: facet['facet_filter'] = {'and': []} facet['facet_filter']['and'].append(filter) query['facets'] = facets # If default_types set, add a filter to restrict the results. if self.default_types and 'type' not in self.request.GET: query['filter']['and'].append( {'or': [{ 'type': { 'value': t } } for t in self.default_types]}) if not query['filter']['and']: del query['filter']['and'] if not query['filter']: del query['filter'] return query
def get(self, request, root=None, base_type=None, graph=None, relations=None, template='graphviz/graphviz', depth=4, max_depth=5, excluded_types=None, properties=None, inverted=None, minimal=None): make_uriref = lambda uri: expand(uri) if uri else None root = make_uriref(root or request.GET.get('root')) base_type = make_uriref(base_type or request.GET.get('base_type')) graph = make_uriref(graph or request.GET.get('graph')) relations = relations or [expand(relation) for relation in request.GET.getlist('relation')] inverted = inverted if (inverted is not None) else request.GET.get('inverted') == 'true' minimal = minimal if (minimal is not None) else request.GET.get('minimal') == 'true' if not relations: raise Http404 if inverted: relation_pattern = '?entity ?relation ?parent' else: relation_pattern = '?parent ?relation ?entity' if root and base_type: raise HttpBadRequest elif root: if not self.get_types(root): raise Http404 if inverted: subj, obj = '?entity', root.n3() else: subj, obj = root.n3(), '?entity' try: depth = min(int(request.GET.get('depth', depth)), max_depth) except (TypeError, ValueError): return HttpResponseBadRequest() selector = self.tree_selector.format(subject=subj, object=obj, depth=depth, relationAlternation='|'.join(r.n3() for r in relations)) elif base_type: selector = self.type_selector.format(graph=graph.n3() if graph else '?graph', baseType=base_type.n3()) excluded_types = excluded_types or [expand(t) for t in request.GET.getlist('exclude_type')] properties = properties or [expand(p) for p in request.GET.getlist('property')] page_uri = rdflib.URIRef(request.build_absolute_uri()) query = self.query.format(selector=selector, relations=' '.join(r.n3() for r in relations), excludedTypes=' '.join(t.n3() for t in excluded_types), relationPattern=relation_pattern, page_uri=page_uri.n3(), propertyPatterns='\n '.join('OPTIONAL { ?entity %s ?p%s } .' % (p.n3(), i) for i, p in enumerate(properties)), propertyTriples=''.join(';\n %s ?p%s' % (p.n3(), i) for i, p in enumerate(properties)) ) graph = self.endpoint.query(query) subjects = [Resource(s, graph, self.endpoint) for s in set(graph.objects(page_uri, NS['foaf'].topic))] subjects.sort(key=lambda s: s.label) subject = Resource(root, graph, self.endpoint) if root else None context = { 'graph': graph, 'queries': [graph.query], 'subjects': subjects, 'subject': subject, 'inverted': inverted, 'relations': relations, 'minimal': minimal, 'filename_base': slugify(subject.label if subject else 'graphviz')[:32] } for subject in subjects: if not inverted: subject.children = set(Resource(s, graph, self.endpoint) for relation in relations for s in graph.objects(subject._identifier, relation)) else: subject.children = set(Resource(s, graph, self.endpoint) for relation in relations for s in graph.subjects(relation, subject._identifier)) for child in subject.children: if (page_uri, NS['foaf'].topic, child._identifier) in graph: child.display = True return self.render(request, context, template)
def has_type(obj, value): if isinstance(obj, BaseResource): return expand(value) in obj.get_all('rdf:type')