def compute_dataset(dataset): if dataset not in dataset_cache: g = get_eml(dataset) g_classes, local_idf, lsa = vectorize_ontology(g, idf, lsa_model) g.classes = g_classes dataset_cache[dataset] = g return dataset_cache[dataset]
def view(name=None): print name content_type = request.headers[ 'Content-Type'] if 'Content-Type' in request.headers else '*/*' source_graph = get_eml(name) classes, local_idf = vectorize_ontology(source_graph, self.idf) source_graph.classes = classes source_classes = source_graph.classes source_subtree = set( source_graph.transitive_subjects(self.NS.RDFS.subClassOf, self.NS.oboe.MeasurementType)) source_class_subtree = [ x for x in source_classes if x.identifier in source_subtree and x.identifier != self.NS.oboe.MeasurementType ] sources = dict([(x.identifier, x) for x in source_class_subtree]) distances = pairwise_sparsedist(source_class_subtree, self.target_class_subtree) result = rdflib.Graph() dataset = result.resource(self.NS.dataset[name]) dataset.add(self.NS.RDF.type, self.NS.dcat.Dataset) for c, dist in distances.items(): source_class = source_graph.resource(c) attr_id = source_class.value(self.urn.attributeId) entity_id = source_class.value(self.urn.entityId) selector = "xpointer(/eml/dataset/dataTable[%s]/attributeList/attribute[%s])" % ( entity_id, attr_id) attribute = result.resource(dataset.identifier + '#' + selector) attribute.add(self.NS.RDF.type, self.NS.csvw.Column) sel = result.resource(rdflib.BNode()) attribute.add(self.NS.oa.hasSelector, sel) sel.add(self.NS.RDF.type, self.NS.oa.FragmentSelector) sel.add(self.NS.dcterms.conformsTo, rdflib.URIRef("http://tools.ietf.org/rfc/rfc3023")) sel.add(self.NS.RDF.value, Literal(selector)) attribute.add(self.NS.RDFS.label, source_class.label()) for target, score in sorted( dist.items(), key=lambda x: x[1])[:self.config['top_hits']]: if score < self.config['max_distance']: attribute.add(self.NS.dcterms.subject, target) result.add((target, self.NS.RDFS.label, self.target_graph.label(target))) return sadi.serialize(result, accept=content_type)
def view(name=None): print name content_type = request.headers['Content-Type'] if 'Content-Type' in request.headers else '*/*' source_graph = get_eml(name) classes, local_idf = vectorize_ontology(source_graph, self.idf) source_graph.classes = classes source_classes = source_graph.classes source_subtree = set(source_graph.transitive_subjects(self.NS.RDFS.subClassOf, self.NS.oboe.MeasurementType)) source_class_subtree = [x for x in source_classes if x.identifier in source_subtree and x.identifier != self.NS.oboe.MeasurementType] sources = dict([(x.identifier, x) for x in source_class_subtree]) distances = pairwise_sparsedist(source_class_subtree, self.target_class_subtree) result = rdflib.Graph() dataset = result.resource(self.NS.dataset[name]) dataset.add(self.NS.RDF.type, self.NS.dcat.Dataset) for c, dist in distances.items(): source_class = source_graph.resource(c) attr_id = source_class.value(self.urn.attributeId) entity_id = source_class.value(self.urn.entityId) selector = "xpointer(/eml/dataset/dataTable[%s]/attributeList/attribute[%s])" % (entity_id, attr_id) attribute = result.resource(dataset.identifier+'#'+selector) attribute.add(self.NS.RDF.type, self.NS.csvw.Column) sel = result.resource(rdflib.BNode()) attribute.add(self.NS.oa.hasSelector, sel) sel.add(self.NS.RDF.type, self.NS.oa.FragmentSelector) sel.add(self.NS.dcterms.conformsTo, rdflib.URIRef("http://tools.ietf.org/rfc/rfc3023")) sel.add(self.NS.RDF.value, Literal(selector)) attribute.add(self.NS.RDFS.label, source_class.label()) for target, score in sorted(dist.items(), key=lambda x: x[1])[:self.config['top_hits']]: if score < self.config['max_distance']: attribute.add(self.NS.dcterms.subject, target) result.add((target, self.NS.RDFS.label, self.target_graph.label(target))) return sadi.serialize(result, accept=content_type)