def show(self, id=None, format='html'): if id is None: abort(404) try: c.rdfdoc = Session.query(model.RDFDoc).get(int(id)) except ValueError, e: c.rdfdoc = Session.query(model.RDFDoc).filter(model.RDFDoc.name==id).first()
def index(self, format='html'): """GET /rdfdocs: All items in the collection""" # url('rdfdocs') rdfdocs = Session.query(model.RDFDoc).filter(model.RDFDoc.active==True).join(model.RDFDoc.current_stats) c.query_string = '?' c.search = '' if request.GET.has_key('search'): rdfdocs = rdfdocs.filter(model.RDFDoc.name.ilike("%%%s%%" % request.GET['search'])) c.query_string += 'search=%s&' % request.GET['search'] c.search = request.GET['search'] if request.GET.has_key('errors'): rdfdocs = rdfdocs.filter(model.StatResult.errors!=None) c.query_string += 'errors=1&' if request.GET.has_key('valid'): rdfdocs = rdfdocs.filter(model.StatResult.errors==None) c.query_string += 'valid=1&' if request.GET.has_key('sparql'): rdfdocs = rdfdocs.filter(model.RDFDoc.format=='sparql') c.query_string += 'sparql=1&' if request.GET.has_key('dumps'): rdfdocs = rdfdocs.filter(model.RDFDoc.format!='sparql') c.query_string += 'dumps=1&' c.sort_order = request.GET.get('sort') if request.GET.has_key('sort'): if request.GET['sort'] == 'triples': c.rdfdocs = rdfdocs.order_by(desc(func.coalesce(model.StatResult.triples, '0'))) elif request.GET['sort'] == 'warnings': c.rdfdocs = rdfdocs.order_by(desc(func.coalesce(model.StatResult.warnings, '0'))) elif request.GET['sort'] == 'format': c.rdfdocs = rdfdocs.order_by(func.coalesce(model.RDFDoc.format, '0')) elif request.GET['sort'] == 'issue': c.rdfdocs = rdfdocs.order_by(model.StatResult.errors) elif request.GET['sort'] == 'update': c.rdfdocs = rdfdocs.order_by(model.RDFDoc.last_updated.desc()) else: c.rdfdocs = rdfdocs.order_by(model.RDFDoc.worked_on.desc(), model.RDFDoc.name, model.RDFDoc.last_updated.desc()) else: c.rdfdocs = rdfdocs.order_by(model.RDFDoc.worked_on.desc(), model.RDFDoc.name, model.RDFDoc.last_updated.desc()) if request.GET.has_key('page'): page = request.GET['page'] else: page = 1 page_url = PageURL_WebOb(request) c.rdfdocs_page = Page(c.rdfdocs, page=page, items_per_page=50, url=page_url) c.rdfdoc_count = c.rdfdocs.count() c.workers = Session.query(model.WorkerProc).order_by(model.WorkerProc.started.desc()).all() if format=='json' or 'application/json' in request.headers.get('accept', ''): response.content_type = 'application/json' json_rdfdocs = [] for r in rdfdocs: json_rdfdocs.append(r.name) return json.dumps(json_rdfdocs) return render('/rdfdoc/index.html')
def create(self): """POST /rdfdocs: Create a new item""" # url('rdfdocs') abort(403) rdfdoc_f = model.RDFDoc_fa.bind(data=request.POST) rdfdoc_f.configure(include=[rdfdoc_f.uri, rdfdoc_f.name, rdfdoc_f.format]) if rdfdoc_f.validate(): rdfdoc_f.sync() Session.add(rdfdoc_f.model) Session.commit() redirect(url('rdfdoc', id=rdfdoc_f.model.id)) else: c.rdfdoc_f = rdfdoc_f return render('/rdfdoc/new.html')
def show(self, id, format="html"): """GET /vocabulary/search/id: Show a specific item""" # url('vocabulary_search', id=ID) import re id = re.sub("http:/", "http://", id) query = """ SELECT DISTINCT rdfdoc.id, rdfdoc.uri FROM rdfdoc, stat_result, rdf_property_stat, rdf_property WHERE rdf_property.uri='%s' AND rdf_property.id=rdf_property_stat.rdf_property_id AND rdf_property_stat.stat_result_id=stat_result.id AND stat_result.rdfdoc_id=rdfdoc.id ORDER BY rdfdoc.id; """ % ( id ) try: datasets = [] result = Session.execute(query) for row in result: obj = {"id": row[0], "uri": row[1]} datasets.append(obj) return json.dumps(datasets) except: return json.dumps("")
def show(self, id, format='html'): """GET /stat_result/id: Show a specific item""" # url('stat_result', id=ID) if id is None: abort(404) try: c.stats = Session.query(model.StatResult).get(int(id)) except ValueError, e: abort(404)
def _getProperties(self, searchString, limit=20): searchterms = searchString.split(' ') searchterms = '|'.join(searchterms) q = Session.query(model.PropertyLabeled).filter('label_en_index_col ' \ '@@ to_tsquery(:terms)') q = q.params(terms=searchterms) q = q.order_by('count DESC') q = q.limit(limit) return q
def show(self, id, format='html'): """GET /rdf_classes/id: Show a specific item""" # url('rdf_class', id=ID) if id is None: abort(404) try: c.rdf_class = Session.query(model.RDFClass).get(int(id)) except ValueError, e: abort(404)
def show(self, id, format="html"): """GET /datatypes/id: Show a specific item""" # url('datatype', id=ID) if id is None: abort(404) try: c.datatype = Session.query(model.RDFDatatype).get(int(id)) except ValueError, e: abort(404)
def show(self, id, format='html'): """GET /vocabularies/id: Show info and current_of-usage about Vocabulary""" # url('vocabulary', id=ID) if id is None: abort(404) try: c.vocab = Session.query(model.Vocab).get(int(id)) except ValueError, e: abort(404)
def show(self, id, format="html"): """GET /properties/id: Show a specific item""" # url('property', id=ID) if id is None: abort(404) try: c.prop = Session.query(model.RDFProperty).get(int(id)) except ValueError, e: abort(404)
def show(self, id, format='html'): """GET /links/id: Show a specific item""" # url('link', id=ID) if id is None: abort(404) try: c.link = Session.query(model.Link).get(int(id)) except ValueError, e: abort(404)
def index(self, format='html'): """GET /languages: All items in the collection""" # url('languages') c.languages = Session.query(model.Language).join(model.LanguageStat).join(model.StatResult).filter( model.StatResult.current_of!=None).order_by(model.Language.code).all() c.count = len(c.languages) languages = Session.query(model.Language.code, model.Language.id, func.sum(model.LanguageStat.count), func.count(model.StatResult.id))\ .join(model.LanguageStat).join(model.StatResult)\ .filter(model.StatResult.current_of!=None)\ .group_by(model.Language.code, model.Language.id) c.query_string = '?' # optional search c.search = '' if request.GET.has_key('search'): languages = languages.filter(model.Language.code.ilike("%%%s%%" % request.GET['search'])) c.query_string += 'search=%s&' % request.GET['search'] c.search = request.GET['search'] # sort results c.sort_order = request.GET.get('sort') if request.GET.has_key('sort'): if request.GET['sort'] == 'uri': c.languages = languages.order_by(model.Language.code) elif request.GET['sort'] == 'overall': c.languages = languages.order_by(desc(func.sum(model.LanguageStat.count)), desc(func.count(model.StatResult.id)), model.Language.code) elif request.GET['sort'] == 'datasets': c.languages = languages.order_by(desc(func.count(model.StatResult.id)), desc(func.sum(model.LanguageStat.count)), model.Language.code) else: c.languages = languages.order_by(desc(func.count(model.StatResult.id)), desc(func.sum(model.LanguageStat.count)), model.Language.code) else: c.languages = languages.order_by(desc(func.count(model.StatResult.id)), desc(func.sum(model.LanguageStat.count)), model.Language.code) if request.GET.has_key('page'): page = request.GET['page'] else: page = 1 page_url = PageURL_WebOb(request) c.languages_page = Page(c.languages, page=page, items_per_page=50, url=page_url) c.count = c.languages_page.item_count return render('/languages/index.html')
def _rankSuggestionLodstats(self, suggestionUri, entities): #The most time consuming - implement caching here #just cache to /tmp for now onlyclasses = set() for entity in entities: onlyclasses.add(entity['class']) onlyclasses = list(onlyclasses) cacheId = uuid.uuid5(uuid.NAMESPACE_URL, suggestionUri.join(sorted(onlyclasses)).encode('utf-8')) cachePath = '/tmp/' cacheNamespace = 'suggestionsCache' cacheEntry = str(cachePath) + str(cacheNamespace) + str(cacheId) if(os.path.exists(cacheEntry)): return pickle.load(open(cacheEntry, 'rb')) propertyQuery = """SELECT stat_result_id FROM rdf_property_stat, rdf_property WHERE rdf_property.id=rdf_property_stat.rdf_property_id AND rdf_property.uri='%s';""" % suggestionUri q = Session.execute(propertyQuery) propertyDatasets = set() for row in q: propertyDatasets.add(row[0]) entitiesDatasets = set() for entityUrl in onlyclasses: classQuery = """SELECT stat_result_id FROM rdf_class_stat_result, rdf_class WHERE rdf_class_stat_result.rdf_class_id=rdf_class.id AND rdf_class.uri='%s';""" % entityUrl q = Session.execute(classQuery) for row in q: entitiesDatasets.add(row[0]) propertyQuery = """SELECT stat_result_id FROM rdf_property_stat, rdf_property WHERE rdf_property.id=rdf_property_stat.rdf_property_id AND rdf_property.uri='%s';""" % entityUrl q = Session.execute(propertyQuery) for row in q: entitiesDatasets.add(row[0]) common = propertyDatasets.intersection(entitiesDatasets) pickle.dump(len(common), open(cacheEntry, 'wb'), protocol=pickle.HIGHEST_PROTOCOL) return len(common)
def show(self, id, format='html'): """GET /datasets/id: Show a specific item""" # url('dataset', id=ID) import re id = re.sub("http:/", "http://", id) try: dataset = Session.query(model.RDFDoc).filter(model.RDFDoc.uri==id).one() output_url = h.url(controller="rdfdocs", action="show", id=dataset.id, qualified=True) return json.dumps(output_url) except: return json.dumps('')
def edit(self, id=None): """GET /rdfdocs/id/edit: Form to edit an existing item""" # url('edit_rdfdoc', id=ID) abort(403) if id is None: abort(404) rdfdoc_q = Session.query(model.RDFDoc) c.rdfdoc = rdfdoc_q.get(int(id)) if c.rdfdoc is None: abort(404) c.rdfdoc_f = model.RDFDoc_fa.bind(c.rdfdoc) return render('/rdfdoc/edit.html')
def void(self): """send VoID of every dataset in a ZIP file""" rdfdocs = Session.query(model.RDFDoc).filter(model.RDFDoc.active==True).join(model.RDFDoc.current_stats).filter(and_(model.StatResult.triples > 0, model.RDFDoc.format != 'sparql')) zip_temp_file = tempfile.NamedTemporaryFile(prefix='lodstatswww_voidzip') zip_temp = zipfile.ZipFile(zip_temp_file, 'w', zipfile.ZIP_DEFLATED) for r in rdfdocs: zip_temp.writestr("%s.ttl" % r.name, r.current_stats.void) zip_temp.close() zip_temp_file.seek(0) response.content_type = 'application/zip' response.headers['Content-Disposition'] = "filename=LODStats_all_void.zip" # FIXME: use paste.fileapp if this ever gets too large for data in zip_temp_file: response.write(data)
def index(self, format='html'): """GET /rdf_classes: All items in the collection""" # url('rdf_classes') rdf_classes = Session.query(model.RDFClass.uri, model.RDFClass.id, func.sum(model.RDFClassStat.count), func.count(model.StatResult.id))\ .join(model.RDFClassStat).join(model.StatResult)\ .filter(model.StatResult.current_of!=None)\ .group_by(model.RDFClass.uri, model.RDFClass.id) c.query_string = '?' # optional search c.search = '' if request.GET.has_key('search'): rdf_classes = rdf_classes.filter(model.RDFClass.uri.ilike("%%%s%%" % request.GET['search'])) c.query_string += 'search=%s&' % request.GET['search'] c.search = request.GET['search'] # json if format=='json' or 'application/json' in request.headers.get('accept', ''): response.content_type = 'application/json' json_rdf_classes = [] for cl in rdf_classes: json_rdf_classes.append({'uri': cl.uri, 'overall_sum': int(cl[1]), 'datasets': int(cl[2])}) return json.dumps(json_rdf_classes) # sort results c.sort_order = request.GET.get('sort') if request.GET.has_key('sort'): if request.GET['sort'] == 'uri': c.rdf_classes = rdf_classes.order_by(model.RDFClass.uri) elif request.GET['sort'] == 'overall': c.rdf_classes = rdf_classes.order_by(desc(func.sum(model.RDFClassStat.count)), desc(func.count(model.StatResult.id)), model.RDFClass.uri) elif request.GET['sort'] == 'datasets': c.rdf_classes = rdf_classes.order_by(desc(func.count(model.StatResult.id)), desc(func.sum(model.RDFClassStat.count)), model.RDFClass.uri) else: c.rdf_classes = rdf_classes.order_by(desc(func.count(model.StatResult.id)), desc(func.sum(model.RDFClassStat.count)), model.RDFClass.uri) else: c.rdf_classes = rdf_classes.order_by(desc(func.count(model.StatResult.id)), desc(func.sum(model.RDFClassStat.count)), model.RDFClass.uri) if request.GET.has_key('page'): page = request.GET['page'] else: page = 1 page_url = PageURL_WebOb(request) c.rdf_classes_page = Page(c.rdf_classes, page=page, items_per_page=50, url=page_url) c.count = c.rdf_classes_page.item_count return render('/rdf_classes/index.html')
def index(self, format="html"): """GET /properties: All items in the collection""" # url('properties') rdf_properties = ( Session.query( model.RDFProperty.uri, model.RDFProperty.id, func.sum(model.RDFPropertyStat.count), func.count(model.StatResult.id), ) .join(model.RDFPropertyStat) .join(model.StatResult) .filter(model.StatResult.current_of != None) .group_by(model.RDFProperty.uri, model.RDFProperty.id) ) c.query_string = "?" # optional search c.search = "" if request.GET.has_key("search"): rdf_properties = rdf_properties.filter(model.RDFProperty.uri.ilike("%%%s%%" % request.GET["search"])) c.query_string += "search=%s&" % request.GET["search"] c.search = request.GET["search"] # json if format == "json" or "application/json" in request.headers.get("accept", ""): response.content_type = "application/json" json_rdf_properties = [] for p in rdf_properties: json_rdf_properties.append({"uri": p.uri, "overall_sum": int(p[1]), "datasets": int(p[2])}) return json.dumps(json_rdf_properties) # sort results c.sort_order = request.GET.get("sort") if request.GET.has_key("sort"): if request.GET["sort"] == "uri": c.rdf_properties = rdf_properties.order_by(model.RDFProperty.uri) elif request.GET["sort"] == "overall": c.rdf_properties = rdf_properties.order_by( desc(func.sum(model.RDFPropertyStat.count)), desc(func.count(model.StatResult.id)), model.RDFProperty.uri, ) elif request.GET["sort"] == "datasets": c.rdf_properties = rdf_properties.order_by( desc(func.count(model.StatResult.id)), desc(func.sum(model.RDFPropertyStat.count)), model.RDFProperty.uri, ) else: c.rdf_properties = rdf_properties.order_by( desc(func.count(model.StatResult.id)), desc(func.sum(model.RDFPropertyStat.count)), model.RDFProperty.uri, ) else: c.rdf_properties = rdf_properties.order_by( desc(func.count(model.StatResult.id)), desc(func.sum(model.RDFPropertyStat.count)), model.RDFProperty.uri, ) if request.GET.has_key("page"): page = request.GET["page"] else: page = 1 page_url = PageURL_WebOb(request) c.rdf_properties_page = Page(c.rdf_properties, page=page, items_per_page=50, url=page_url) c.count = c.rdf_properties_page.item_count return render("/properties/index.html")
def stats(self): c.rdfdocs = Session.query(model.RDFDoc).filter(model.RDFDoc.active==True).order_by(model.RDFDoc.worked_on.desc(), model.RDFDoc.name, model.RDFDoc.last_updated.desc(), ).all() c.rdfdoc_count = len(c.rdfdocs) c.workers = Session.query(model.WorkerProc).order_by(model.WorkerProc.started.desc()).all() c.no_of_rdfdocs_with_triples = Session.query(model.RDFDoc).filter(model.RDFDoc.active==True).join(model.RDFDoc.current_stats).filter(model.StatResult.triples > 0).count() c.sparql_packages = Session.query(model.RDFDoc).filter(model.RDFDoc.active==True).join(model.RDFDoc.current_stats).filter(model.RDFDoc.format == "sparql").count() c.dump_packages = Session.query(model.RDFDoc).filter(model.RDFDoc.active==True).join(model.RDFDoc.current_stats).filter(model.RDFDoc.format != "sparql").count() c.error_packages_dump = Session.query(model.RDFDoc).filter(model.RDFDoc.active==True).join(model.RDFDoc.current_stats).filter( and_( model.StatResult.errors != None, model.RDFDoc.format != 'sparql', model.RDFDoc.format != None )).count() c.error_packages_sparql = Session.query(model.RDFDoc).filter(model.RDFDoc.active==True).join(model.RDFDoc.current_stats).filter( and_( model.StatResult.errors != None, model.RDFDoc.format == 'sparql', model.RDFDoc.format != None )).count() c.problem_packages = c.error_packages_sparql + c.error_packages_dump c.touched_packages = c.no_of_rdfdocs_with_triples+c.problem_packages c.triples_dump = Session.query(func.sum(model.StatResult.triples)).join(model.RDFDoc.current_stats).filter(model.RDFDoc.format != "sparql").scalar() c.triples_sparql = Session.query(func.sum(model.StatResult.triples)).join(model.RDFDoc.current_stats).filter(model.RDFDoc.format == "sparql").scalar() if c.triples_dump is None: c.triples_dump = 0 if c.triples_sparql is None: c.triples_sparql = 0 vocabs = Session.query(model.Vocab).join(model.RDFVocabStat).join(model.StatResult).filter( model.StatResult.current_of!=None).all() c.vocabs = len(vocabs) rdf_classes = Session.query(model.RDFClass).join(model.RDFClassStat).join(model.StatResult).filter( model.StatResult.current_of!=None).all() c.rdf_classes = len(rdf_classes) c.properties = Session.query(model.RDFProperty).join(model.RDFPropertyStat).join(model.StatResult).filter( model.StatResult.current_of!=None).count() #c.properties = len(properties) c.datatypes = Session.query(model.RDFDatatype).join(model.RDFDatatypeStat).join(model.StatResult).filter( model.StatResult.current_of!=None).count() c.link_count = Session.query(model.Link).join(model.LinkStat).join(model.StatResult).filter( model.StatResult.current_of!=None).count() c.languages = Session.query(func.count(model.Language.id)).join(model.LanguageStat).join(model.StatResult).filter( model.StatResult.current_of!=None).scalar() # most common vocab stmt="SELECT count(rdf_vocab_stat.count) AS counter, vocab.uri AS uri, vocab.id AS id FROM rdf_vocab_stat,stat_result,rdfdoc,vocab \ WHERE rdf_vocab_stat.stat_result_id=stat_result.id and rdfdoc.current_stats_id=stat_result.id \ and vocab.id=rdf_vocab_stat.vocab_id GROUP BY vocab.uri, vocab.id ORDER BY counter DESC LIMIT 5" c.v_usage = Session.query('uri', 'id', 'counter').from_statement(stmt).all() # most common classes stmt="SELECT count(rdf_class_stat_result.count) AS counter,rdf_class.uri, rdf_class.id FROM \ rdf_class_stat_result,stat_result,rdfdoc,rdf_class WHERE \ rdf_class_stat_result.stat_result_id=stat_result.id and rdfdoc.current_stats_id=stat_result.id \ and rdf_class.id=rdf_class_stat_result.rdf_class_id GROUP BY rdf_class.uri, rdf_class.id ORDER BY counter DESC LIMIT 5" c.c_usage = Session.query('uri', 'id', 'counter').from_statement(stmt).all() # most common properties stmt="SELECT count(rdf_property_stat.count) AS counter,rdf_property.uri, rdf_property.id FROM \ rdf_property_stat,stat_result,rdfdoc,rdf_property WHERE \ rdf_property_stat.stat_result_id=stat_result.id and rdfdoc.current_stats_id=stat_result.id \ and rdf_property.id=rdf_property_stat.rdf_property_id GROUP BY rdf_property.uri, rdf_property.id ORDER BY counter DESC LIMIT 5" c.p_usage = Session.query('uri', 'id', 'counter').from_statement(stmt).all() # most common datatypes stmt="SELECT count(rdf_datatype_stat.count) AS counter,rdf_datatype.uri, rdf_datatype.id FROM \ rdf_datatype_stat,stat_result,rdfdoc,rdf_datatype WHERE \ rdf_datatype_stat.stat_result_id=stat_result.id and rdfdoc.current_stats_id=stat_result.id \ and rdf_datatype.id=rdf_datatype_stat.rdf_datatype_id GROUP BY rdf_datatype.uri, rdf_datatype.id ORDER BY counter DESC LIMIT 5" c.t_usage = Session.query('uri', 'id', 'counter').from_statement(stmt).all() # most common languages stmt="SELECT count(language_stat.count) AS counter,language.code, language.id FROM \ language_stat,stat_result,rdfdoc,language WHERE \ language_stat.stat_result_id=stat_result.id and rdfdoc.current_stats_id=stat_result.id \ and language.id=language_stat.language_id GROUP BY language.code, language.id ORDER BY counter DESC LIMIT 5" c.l_usage = Session.query('code', 'id', 'counter').from_statement(stmt).all() # most common linksets stmt="SELECT count(link_stat.count) AS counter,link.code, link.id FROM \ link_stat,stat_result,rdfdoc,link WHERE \ link_stat.stat_result_id=stat_result.id and rdfdoc.current_stats_id=stat_result.id \ and link.id=link_stat.link_id GROUP BY link.code, link.id ORDER BY counter DESC LIMIT 5" c.link_usage = Session.query('code', 'id', 'counter').from_statement(stmt).all() # absolute Summen # most commons vocab absolut stmt="SELECT sum(rdf_vocab_stat.count) AS sum, vocab.uri AS uri, vocab.id FROM rdf_vocab_stat,stat_result,rdfdoc,vocab \ WHERE rdf_vocab_stat.stat_result_id=stat_result.id and rdfdoc.current_stats_id=stat_result.id \ and vocab.id=rdf_vocab_stat.vocab_id GROUP BY vocab.uri, vocab.id ORDER BY sum DESC LIMIT 5" c.v_sum = Session.query('uri', 'id', 'sum').from_statement(stmt).all() # most common classes absolut stmt="SELECT sum(rdf_class_stat_result.count) AS sum,rdf_class.uri, rdf_class.id FROM \ rdf_class_stat_result,stat_result,rdfdoc,rdf_class WHERE \ rdf_class_stat_result.stat_result_id=stat_result.id and rdfdoc.current_stats_id=stat_result.id \ and rdf_class.id=rdf_class_stat_result.rdf_class_id GROUP BY rdf_class.uri, rdf_class.id ORDER BY sum DESC LIMIT 5" c.c_sum = Session.query('uri', 'id', 'sum').from_statement(stmt).all() # most common properties stmt="SELECT sum(rdf_property_stat.count) AS sum,rdf_property.uri, rdf_property.id FROM \ rdf_property_stat,stat_result,rdfdoc,rdf_property WHERE \ rdf_property_stat.stat_result_id=stat_result.id and rdfdoc.current_stats_id=stat_result.id \ and rdf_property.id=rdf_property_stat.rdf_property_id GROUP BY rdf_property.uri, rdf_property.id ORDER BY sum DESC LIMIT 5" c.p_sum = Session.query('uri', 'id', 'sum').from_statement(stmt).all() # most common datatypes stmt="SELECT sum(rdf_datatype_stat.count) AS sum,rdf_datatype.uri, rdf_datatype.id FROM \ rdf_datatype_stat,stat_result,rdfdoc,rdf_datatype WHERE \ rdf_datatype_stat.stat_result_id=stat_result.id and rdfdoc.current_stats_id=stat_result.id \ and rdf_datatype.id=rdf_datatype_stat.rdf_datatype_id GROUP BY rdf_datatype.uri, rdf_datatype.id ORDER BY sum DESC LIMIT 5" c.t_sum = Session.query('uri', 'id', 'sum').from_statement(stmt).all() # most common languages stmt="SELECT sum(language_stat.count) AS sum,language.code, language.id FROM \ language_stat,stat_result,rdfdoc,language WHERE \ language_stat.stat_result_id=stat_result.id and rdfdoc.current_stats_id=stat_result.id \ and language.id=language_stat.language_id GROUP BY language.code, language.id ORDER BY sum DESC LIMIT 5" c.l_sum = Session.query('code', 'id', 'sum').from_statement(stmt).all() stmt="SELECT sum(link_stat.count) AS sum,link.code, link.id FROM \ link_stat,stat_result,rdfdoc,link WHERE \ link_stat.stat_result_id=stat_result.id and rdfdoc.current_stats_id=stat_result.id \ and link.id=link_stat.link_id GROUP BY link.code, link.id ORDER BY sum DESC LIMIT 5" c.link_sum = Session.query('code', 'id', 'sum').from_statement(stmt).all() # basics # entities stmt="SELECT avg(stat_result.entities) AS avg, min(stat_result.entities) AS min, max(stat_result.entities) AS max, median(stat_result.entities) AS median, sum(stat_result.entities) as sum FROM \ stat_result,rdfdoc WHERE \ rdfdoc.current_stats_id=stat_result.id and stat_result.entities is not NULL" c.entities = Session.query('avg', 'min', 'max', 'median', 'sum').from_statement(stmt).one() # literals stmt="SELECT avg(stat_result.literals) AS avg, min(stat_result.literals) AS min, max(stat_result.literals) AS max, median(stat_result.literals) AS median, sum(stat_result.literals) as sum FROM \ stat_result,rdfdoc WHERE \ rdfdoc.current_stats_id=stat_result.id and stat_result.entities is not NULL" c.literals = Session.query('avg', 'min', 'max', 'median', 'sum').from_statement(stmt).one() # blanks stmt="SELECT avg(stat_result.blanks) AS avg, min(stat_result.blanks) AS min, max(stat_result.blanks) AS max, median(stat_result.blanks) AS median, sum(stat_result.blanks) as sum FROM \ stat_result,rdfdoc WHERE \ rdfdoc.current_stats_id=stat_result.id and stat_result.entities is not NULL" c.blanks = Session.query('avg', 'min', 'max', 'median', 'sum').from_statement(stmt).one() # blanks as subject stmt="SELECT avg(stat_result.blanks_as_subject) AS avg, min(stat_result.blanks_as_subject) AS min, max(stat_result.blanks_as_subject) AS max, median(stat_result.blanks_as_subject) AS median, sum(stat_result.blanks_as_subject) AS sum FROM \ stat_result,rdfdoc WHERE \ rdfdoc.current_stats_id=stat_result.id and stat_result.entities is not NULL" c.blanks_as_subject = Session.query('avg', 'min', 'max', 'median', 'sum').from_statement(stmt).one() # blanks as object stmt="SELECT avg(stat_result.blanks_as_object) AS avg, min(stat_result.blanks_as_object) AS min, max(stat_result.blanks_as_object) AS max, median(stat_result.blanks_as_object) AS median, sum(stat_result.blanks_as_object) AS sum FROM \ stat_result,rdfdoc WHERE \ rdfdoc.current_stats_id=stat_result.id and stat_result.entities is not NULL" c.blanks_as_object = Session.query('avg', 'min', 'max', 'median', 'sum').from_statement(stmt).one() # subclasses stmt="SELECT avg(stat_result.subclasses) AS avg, min(stat_result.subclasses) AS min, max(stat_result.subclasses) AS max, median(stat_result.subclasses) AS median, sum(stat_result.subclasses) as sum FROM \ stat_result,rdfdoc WHERE \ rdfdoc.current_stats_id=stat_result.id and stat_result.entities is not NULL" c.subclasses = Session.query('avg', 'min', 'max', 'median', 'sum').from_statement(stmt).one() # typed subjects stmt="SELECT avg(stat_result.typed_subjects) AS avg, min(stat_result.typed_subjects) AS min, max(stat_result.typed_subjects) AS max, median(stat_result.typed_subjects) AS median, sum(stat_result.typed_subjects) as sum FROM \ stat_result,rdfdoc WHERE \ rdfdoc.current_stats_id=stat_result.id and stat_result.entities is not NULL" c.typed_subjects = Session.query('avg', 'min', 'max', 'median', 'sum').from_statement(stmt).one() # labeled subjects stmt="SELECT avg(stat_result.labeled_subjects) AS avg, min(stat_result.labeled_subjects) AS min, max(stat_result.labeled_subjects) AS max, median(stat_result.labeled_subjects) AS median, sum(stat_result.labeled_subjects) as sum FROM \ stat_result,rdfdoc WHERE \ rdfdoc.current_stats_id=stat_result.id and stat_result.entities is not NULL" c.labeled_subjects = Session.query('avg', 'min', 'max', 'median', 'sum').from_statement(stmt).one() # properties_per_entity stmt="SELECT avg(stat_result.properties_per_entity) AS avg, min(stat_result.properties_per_entity) AS min, max(stat_result.properties_per_entity) AS max, median(stat_result.properties_per_entity) AS median FROM \ stat_result,rdfdoc WHERE \ rdfdoc.current_stats_id=stat_result.id and stat_result.entities is not NULL" c.properties_per_entity = Session.query('avg', 'min', 'max', 'median').from_statement(stmt).one() # string_length_typed stmt="SELECT avg(stat_result.string_length_typed) AS avg, min(stat_result.string_length_typed) AS min, max(stat_result.string_length_typed) AS max, median(stat_result.string_length_typed) AS median FROM \ stat_result,rdfdoc WHERE \ rdfdoc.current_stats_id=stat_result.id and stat_result.entities is not NULL" c.string_length_typed = Session.query('avg', 'min', 'max', 'median').from_statement(stmt).one() # string length untyped stmt="SELECT avg(stat_result.string_length_untyped) AS avg, min(stat_result.string_length_untyped) AS min, max(stat_result.string_length_untyped) AS max, median(stat_result.string_length_untyped) AS median FROM \ stat_result,rdfdoc WHERE \ rdfdoc.current_stats_id=stat_result.id and stat_result.entities is not NULL" c.string_length_untyped = Session.query('avg', 'min', 'max', 'median').from_statement(stmt).one() # class hierarchy stmt="SELECT avg(stat_result.class_hierarchy_depth) AS avg, min(stat_result.class_hierarchy_depth) AS min, max(stat_result.class_hierarchy_depth) AS max, median(stat_result.class_hierarchy_depth) AS median FROM \ stat_result,rdfdoc WHERE \ rdfdoc.current_stats_id=stat_result.id and stat_result.entities is not NULL" c.class_hierarchy_depth = Session.query('avg', 'min', 'max', 'median').from_statement(stmt).one() # property hierarchy stmt="SELECT avg(stat_result.property_hierarchy_depth) AS avg, min(stat_result.property_hierarchy_depth) AS min, max(stat_result.property_hierarchy_depth) AS max, median(stat_result.property_hierarchy_depth) AS median FROM \ stat_result,rdfdoc WHERE \ rdfdoc.current_stats_id=stat_result.id and stat_result.entities is not NULL" c.property_hierarchy_depth = Session.query('avg', 'min', 'max', 'median').from_statement(stmt).one() # links stmt="SELECT avg(stat_result.links) AS avg, min(stat_result.links) AS min, max(stat_result.links) AS max, median(stat_result.links) AS median, sum(stat_result.links) AS sum FROM \ stat_result,rdfdoc WHERE \ rdfdoc.current_stats_id=stat_result.id and stat_result.entities is not NULL" c.links = Session.query('avg', 'min', 'max', 'median', 'sum').from_statement(stmt).one() # triples stmt="SELECT avg(stat_result.triples) AS avg, min(stat_result.triples) AS min, max(stat_result.triples) AS max, median(stat_result.triples) AS median, sum(stat_result.triples) as sum FROM \ stat_result,rdfdoc WHERE \ rdfdoc.current_stats_id=stat_result.id and stat_result.entities is not NULL and rdfdoc.format!='sparql'" c.triples = Session.query('avg', 'min', 'max', 'median', 'sum').from_statement(stmt).one() # # langs per dataset # stmt = "SELECT avg(count), min(count), max(count), median(count) FROM \ # (SELECT count(language_stat.count) as count,rdfdoc.id AS rdfd FROM \ # language_stat,stat_result,rdfdoc,language WHERE \ # language_stat.stat_result_id=stat_result.id and rdfdoc.current_stats_id=stat_result.id \ # and language.id=language_stat.language_id and stat_result.entities is not NULL GROUP BY rdfdoc.id) as counter" # c.dataset_lang = Session.query('avg', 'min', 'max', 'median').from_statement(stmt).one() # vocabs per dataset stmt = "SELECT avg(count), min(count), max(count), median(count), sum(count) FROM \ (SELECT count(rdf_vocab_stat.count) as count,rdfdoc.id AS rdfd FROM \ rdf_vocab_stat,stat_result,rdfdoc,vocab WHERE \ rdf_vocab_stat.stat_result_id=stat_result.id and rdfdoc.current_stats_id=stat_result.id \ and vocab.id=rdf_vocab_stat.vocab_id and stat_result.entities is not NULL GROUP BY rdfdoc.id) as counter" c.dataset_vocab = Session.query('avg', 'min', 'max', 'median', 'sum').from_statement(stmt).one() # classes per dataset stmt = "SELECT avg(count), min(count), max(count), median(count), sum(count) FROM \ (SELECT count(rdf_class_stat_result.count) as count,rdfdoc.id AS rdfd FROM \ rdf_class_stat_result,stat_result,rdfdoc,rdf_class WHERE \ rdf_class_stat_result.stat_result_id=stat_result.id and rdfdoc.current_stats_id=stat_result.id \ and rdf_class.id=rdf_class_stat_result.rdf_class_id and stat_result.entities is not NULL GROUP BY rdfdoc.id) as counter" c.dataset_classes = Session.query('avg', 'min', 'max', 'median', 'sum').from_statement(stmt).one() # properties per dataset stmt = "SELECT avg(count), min(count), max(count), median(count), sum(count) FROM \ (SELECT count(rdf_property_stat.count) as count,rdfdoc.id AS rdfd FROM \ rdf_property_stat,stat_result,rdfdoc,rdf_property WHERE \ rdf_property_stat.stat_result_id=stat_result.id and rdfdoc.current_stats_id=stat_result.id \ and rdf_property.id=rdf_property_stat.rdf_property_id and stat_result.entities is not NULL GROUP BY rdfdoc.id) as counter" c.dataset_props = Session.query('avg', 'min', 'max', 'median', 'sum').from_statement(stmt).one() return render('/rdfdoc/stats.html')
def home(self): c.rdfdocs = Session.query(model.RDFDoc).filter(model.RDFDoc.active==True).order_by(model.RDFDoc.worked_on.desc(), model.RDFDoc.name, model.RDFDoc.last_updated.desc(), ).all() c.rdfdoc_count = len(c.rdfdocs) c.workers = Session.query(model.WorkerProc).order_by(model.WorkerProc.started.desc()).all() c.no_of_rdfdocs_with_triples = Session.query(model.RDFDoc).filter(model.RDFDoc.active==True).join(model.RDFDoc.current_stats).filter(model.StatResult.triples > 0).count() c.sparql_packages = Session.query(model.RDFDoc).filter(model.RDFDoc.active==True).join(model.RDFDoc.current_stats).filter(model.RDFDoc.format == "sparql").count() c.dump_packages = Session.query(model.RDFDoc).filter(model.RDFDoc.active==True).join(model.RDFDoc.current_stats).filter(model.RDFDoc.format != "sparql").count() c.error_packages_dump = Session.query(model.RDFDoc).filter(model.RDFDoc.active==True).join(model.RDFDoc.current_stats).filter( and_( model.StatResult.errors != None, model.RDFDoc.format != 'sparql', model.RDFDoc.format != None )).count() c.error_packages_sparql = Session.query(model.RDFDoc).filter(model.RDFDoc.active==True).join(model.RDFDoc.current_stats).filter( and_( model.StatResult.errors != None, model.RDFDoc.format == 'sparql', model.RDFDoc.format != None )).count() c.problem_packages = c.error_packages_sparql + c.error_packages_dump c.touched_packages = c.no_of_rdfdocs_with_triples+c.problem_packages c.triples_dump = Session.query(func.sum(model.StatResult.triples)).join(model.RDFDoc.current_stats).filter(model.RDFDoc.format != "sparql").scalar() c.triples_sparql = Session.query(func.sum(model.StatResult.triples)).join(model.RDFDoc.current_stats).filter(model.RDFDoc.format == "sparql").scalar() if c.triples_dump is None: c.triples_dump = 0 if c.triples_sparql is None: c.triples_sparql = 0 # most recent successful updates c.recent_updates = Session.query(model.RDFDoc).filter(model.RDFDoc.active==True).join(model.RDFDoc.current_stats).filter( and_( model.StatResult.last_updated!=None, model.StatResult.errors==None, model.RDFDoc.last_updated!=None, ) ).order_by(desc(model.RDFDoc.last_updated))[:5] # most recent updates with errors c.recent_updates_errors = Session.query(model.RDFDoc).filter(model.RDFDoc.active==True).join(model.RDFDoc.current_stats).filter( and_( model.StatResult.last_updated!=None, model.StatResult.errors!=None, model.RDFDoc.last_updated!=None, ) ).order_by(desc(model.RDFDoc.last_updated))[:5] return render('/home.html')
def valid_and_available(self): c.rdfdocs = Session.query(model.RDFDoc).filter(model.RDFDoc.active==True).join(model.RDFDoc.current_stats).filter(and_(model.StatResult.triples > 0, model.RDFDoc.format != 'sparql')).all() response.content_type = 'text/plain' return render('/rdfdoc/txtlist.txt')
def index(self, format="html"): """GET /datatypes: All items in the collection""" # url('datatypes') # datatypes = Session.query(model.RDFDatatype).join(model.RDFDatatypeStat).join(model.StatResult).filter( # model.StatResult.current_of!=None) datatypes = ( Session.query( model.RDFDatatype.uri, model.RDFDatatype.id, func.sum(model.RDFDatatypeStat.count), func.count(model.StatResult.id), ) .join(model.RDFDatatypeStat) .join(model.StatResult) .filter(model.StatResult.current_of != None) .group_by(model.RDFDatatype.uri, model.RDFDatatype.id) ) c.query_string = "?" # optional search c.search = "" if request.GET.has_key("search"): datatypes = datatypes.filter(model.RDFDatatype.uri.ilike("%%%s%%" % request.GET["search"])) c.query_string += "search=%s&" % request.GET["search"] c.search = request.GET["search"] # sort results c.sort_order = request.GET.get("sort") if request.GET.has_key("sort"): if request.GET["sort"] == "uri": c.datatypes = datatypes.order_by(model.RDFDatatype.uri) elif request.GET["sort"] == "overall": c.datatypes = datatypes.order_by( desc(func.sum(model.RDFDatatypeStat.count)), desc(func.count(model.StatResult.id)), model.RDFDatatype.uri, ) elif request.GET["sort"] == "datasets": c.datatypes = datatypes.order_by( desc(func.count(model.StatResult.id)), desc(func.sum(model.RDFDatatypeStat.count)), model.RDFDatatype.uri, ) else: c.datatypes = datatypes.order_by( desc(func.count(model.StatResult.id)), desc(func.sum(model.RDFDatatypeStat.count)), model.RDFDatatype.uri, ) else: c.datatypes = datatypes.order_by( desc(func.count(model.StatResult.id)), desc(func.sum(model.RDFDatatypeStat.count)), model.RDFDatatype.uri, ) if request.GET.has_key("page"): page = request.GET["page"] else: page = 1 page_url = PageURL_WebOb(request) c.datatypes_page = Page(c.datatypes, page=page, items_per_page=50, url=page_url) c.count = c.datatypes_page.item_count return render("/datatypes/index.html")