Example #1
0
 def show(self, id=None, format='html'):
     if id is None:
         abort(404)
     try:
         c.rdfdoc = Session.query(model.RDFDoc).get(int(id))
     except ValueError, e:
         c.rdfdoc = Session.query(model.RDFDoc).filter(model.RDFDoc.name==id).first()
Example #2
0
 def index(self, format='html'):
     """GET /rdfdocs: All items in the collection"""
     # url('rdfdocs')
     rdfdocs = Session.query(model.RDFDoc).filter(model.RDFDoc.active==True).join(model.RDFDoc.current_stats)
     c.query_string = '?'
     c.search = ''
     if request.GET.has_key('search'):
         rdfdocs = rdfdocs.filter(model.RDFDoc.name.ilike("%%%s%%" % request.GET['search']))
         c.query_string += 'search=%s&' % request.GET['search']
         c.search = request.GET['search']
     if request.GET.has_key('errors'):
         rdfdocs = rdfdocs.filter(model.StatResult.errors!=None)
         c.query_string += 'errors=1&'
     if request.GET.has_key('valid'):
         rdfdocs = rdfdocs.filter(model.StatResult.errors==None)
         c.query_string += 'valid=1&'
     if request.GET.has_key('sparql'):
         rdfdocs = rdfdocs.filter(model.RDFDoc.format=='sparql')
         c.query_string += 'sparql=1&'
     if request.GET.has_key('dumps'):
         rdfdocs = rdfdocs.filter(model.RDFDoc.format!='sparql')
         c.query_string += 'dumps=1&'
     c.sort_order = request.GET.get('sort')
     if request.GET.has_key('sort'):
         if request.GET['sort'] == 'triples':
             c.rdfdocs = rdfdocs.order_by(desc(func.coalesce(model.StatResult.triples, '0')))
         elif request.GET['sort'] == 'warnings':
             c.rdfdocs = rdfdocs.order_by(desc(func.coalesce(model.StatResult.warnings, '0')))
         elif request.GET['sort'] == 'format':
             c.rdfdocs = rdfdocs.order_by(func.coalesce(model.RDFDoc.format, '0'))
         elif request.GET['sort'] == 'issue':
             c.rdfdocs = rdfdocs.order_by(model.StatResult.errors)
         elif request.GET['sort'] == 'update':
             c.rdfdocs = rdfdocs.order_by(model.RDFDoc.last_updated.desc())
         else:
             c.rdfdocs = rdfdocs.order_by(model.RDFDoc.worked_on.desc(), model.RDFDoc.name, model.RDFDoc.last_updated.desc())
     else:
         c.rdfdocs = rdfdocs.order_by(model.RDFDoc.worked_on.desc(), model.RDFDoc.name, model.RDFDoc.last_updated.desc())
     if request.GET.has_key('page'):
         page = request.GET['page']
     else:
         page = 1
     page_url = PageURL_WebOb(request)
     c.rdfdocs_page = Page(c.rdfdocs, page=page, items_per_page=50, url=page_url)
     c.rdfdoc_count = c.rdfdocs.count()
     c.workers = Session.query(model.WorkerProc).order_by(model.WorkerProc.started.desc()).all()
     if format=='json' or 'application/json' in request.headers.get('accept', ''):
         response.content_type = 'application/json'
         json_rdfdocs = []
         for r in rdfdocs:
             json_rdfdocs.append(r.name)
         return json.dumps(json_rdfdocs)
     return render('/rdfdoc/index.html')
Example #3
0
 def create(self):
     """POST /rdfdocs: Create a new item"""
     # url('rdfdocs')
     abort(403)
     rdfdoc_f = model.RDFDoc_fa.bind(data=request.POST)
     rdfdoc_f.configure(include=[rdfdoc_f.uri, rdfdoc_f.name, rdfdoc_f.format])
     if rdfdoc_f.validate():
         rdfdoc_f.sync()
         Session.add(rdfdoc_f.model)
         Session.commit()
         redirect(url('rdfdoc', id=rdfdoc_f.model.id))
     else:
         c.rdfdoc_f = rdfdoc_f
         return render('/rdfdoc/new.html')
Example #4
0
    def show(self, id, format="html"):
        """GET /vocabulary/search/id: Show a specific item"""
        # url('vocabulary_search', id=ID)
        import re

        id = re.sub("http:/", "http://", id)
        query = """
            SELECT DISTINCT rdfdoc.id, rdfdoc.uri
            FROM rdfdoc, stat_result, rdf_property_stat, rdf_property
            WHERE rdf_property.uri='%s' AND
            rdf_property.id=rdf_property_stat.rdf_property_id AND
            rdf_property_stat.stat_result_id=stat_result.id AND
            stat_result.rdfdoc_id=rdfdoc.id
            ORDER BY rdfdoc.id;
        """ % (
            id
        )
        try:
            datasets = []
            result = Session.execute(query)
            for row in result:
                obj = {"id": row[0], "uri": row[1]}
                datasets.append(obj)
            return json.dumps(datasets)
        except:
            return json.dumps("")
Example #5
0
 def show(self, id, format='html'):
     """GET /stat_result/id: Show a specific item"""
     # url('stat_result', id=ID)
     if id is None:
         abort(404)
     try:
         c.stats = Session.query(model.StatResult).get(int(id))
     except ValueError, e:
         abort(404)
Example #6
0
 def _getProperties(self, searchString, limit=20):
     searchterms = searchString.split(' ')
     searchterms = '|'.join(searchterms)
     q = Session.query(model.PropertyLabeled).filter('label_en_index_col ' \
                                                     '@@ to_tsquery(:terms)')
     q = q.params(terms=searchterms)
     q = q.order_by('count DESC')
     q = q.limit(limit)
     return q
Example #7
0
 def show(self, id, format='html'):
     """GET /rdf_classes/id: Show a specific item"""
     # url('rdf_class', id=ID)
     if id is None:
         abort(404)
     try:
         c.rdf_class = Session.query(model.RDFClass).get(int(id))
     except ValueError, e:
         abort(404)
Example #8
0
 def show(self, id, format="html"):
     """GET /datatypes/id: Show a specific item"""
     # url('datatype', id=ID)
     if id is None:
         abort(404)
     try:
         c.datatype = Session.query(model.RDFDatatype).get(int(id))
     except ValueError, e:
         abort(404)
Example #9
0
 def show(self, id, format='html'):
     """GET /vocabularies/id: Show info and current_of-usage about Vocabulary"""
     # url('vocabulary', id=ID)
     if id is None:
         abort(404)
     try:
         c.vocab = Session.query(model.Vocab).get(int(id))
     except ValueError, e:
         abort(404)
Example #10
0
 def show(self, id, format="html"):
     """GET /properties/id: Show a specific item"""
     # url('property', id=ID)
     if id is None:
         abort(404)
     try:
         c.prop = Session.query(model.RDFProperty).get(int(id))
     except ValueError, e:
         abort(404)
Example #11
0
 def show(self, id, format='html'):
     """GET /links/id: Show a specific item"""
     # url('link', id=ID)
     if id is None:
         abort(404)
     try:
         c.link = Session.query(model.Link).get(int(id))
     except ValueError, e:
         abort(404)
Example #12
0
    def index(self, format='html'):
        """GET /languages: All items in the collection"""
        # url('languages')
        c.languages = Session.query(model.Language).join(model.LanguageStat).join(model.StatResult).filter(
            model.StatResult.current_of!=None).order_by(model.Language.code).all()
        c.count = len(c.languages)

        languages = Session.query(model.Language.code, model.Language.id, func.sum(model.LanguageStat.count),
                                    func.count(model.StatResult.id))\
                                .join(model.LanguageStat).join(model.StatResult)\
                                .filter(model.StatResult.current_of!=None)\
                                .group_by(model.Language.code, model.Language.id)
        c.query_string = '?'
        # optional search
        c.search = ''
        if request.GET.has_key('search'):
            languages = languages.filter(model.Language.code.ilike("%%%s%%" % request.GET['search']))
            c.query_string += 'search=%s&' % request.GET['search']
            c.search = request.GET['search']
        # sort results
        c.sort_order = request.GET.get('sort')
        if request.GET.has_key('sort'):
            if request.GET['sort'] == 'uri':
                c.languages = languages.order_by(model.Language.code)
            elif request.GET['sort'] == 'overall':
                c.languages = languages.order_by(desc(func.sum(model.LanguageStat.count)),
                                desc(func.count(model.StatResult.id)), model.Language.code)
            elif request.GET['sort'] == 'datasets':
                c.languages = languages.order_by(desc(func.count(model.StatResult.id)),
                                desc(func.sum(model.LanguageStat.count)), model.Language.code)
            else:
                c.languages = languages.order_by(desc(func.count(model.StatResult.id)),
                                desc(func.sum(model.LanguageStat.count)), model.Language.code)
        else:
            c.languages = languages.order_by(desc(func.count(model.StatResult.id)),
                            desc(func.sum(model.LanguageStat.count)), model.Language.code)
        if request.GET.has_key('page'):
            page = request.GET['page']
        else:
            page = 1
        page_url = PageURL_WebOb(request)
        c.languages_page = Page(c.languages, page=page, items_per_page=50, url=page_url)
        c.count = c.languages_page.item_count
        return render('/languages/index.html')
Example #13
0
    def _rankSuggestionLodstats(self, suggestionUri, entities):
        #The most time consuming - implement caching here
        #just cache to /tmp for now
        onlyclasses = set()
        for entity in entities:
            onlyclasses.add(entity['class'])
        onlyclasses = list(onlyclasses)
        cacheId = uuid.uuid5(uuid.NAMESPACE_URL, suggestionUri.join(sorted(onlyclasses)).encode('utf-8'))
        cachePath = '/tmp/'
        cacheNamespace = 'suggestionsCache'
        cacheEntry = str(cachePath) + str(cacheNamespace) + str(cacheId)
        if(os.path.exists(cacheEntry)):
            return pickle.load(open(cacheEntry, 'rb'))

        propertyQuery = """SELECT stat_result_id 
                   FROM rdf_property_stat, rdf_property 
                   WHERE rdf_property.id=rdf_property_stat.rdf_property_id 
                   AND rdf_property.uri='%s';""" % suggestionUri
        q = Session.execute(propertyQuery)
        propertyDatasets = set()
        for row in q:
            propertyDatasets.add(row[0])

        entitiesDatasets = set()
        for entityUrl in onlyclasses:
            classQuery = """SELECT stat_result_id
                            FROM rdf_class_stat_result, rdf_class
                            WHERE rdf_class_stat_result.rdf_class_id=rdf_class.id 
                            AND rdf_class.uri='%s';""" % entityUrl
            q = Session.execute(classQuery)
            for row in q:
                entitiesDatasets.add(row[0])
            propertyQuery = """SELECT stat_result_id 
                       FROM rdf_property_stat, rdf_property 
                       WHERE rdf_property.id=rdf_property_stat.rdf_property_id 
                       AND rdf_property.uri='%s';""" % entityUrl
            q = Session.execute(propertyQuery)
            for row in q:
                entitiesDatasets.add(row[0])
                
        common = propertyDatasets.intersection(entitiesDatasets)
        pickle.dump(len(common), open(cacheEntry, 'wb'), protocol=pickle.HIGHEST_PROTOCOL)
        return len(common)
Example #14
0
 def show(self, id, format='html'):
     """GET /datasets/id: Show a specific item"""
     # url('dataset', id=ID)
     import re
     id = re.sub("http:/", "http://", id)
     try:
         dataset = Session.query(model.RDFDoc).filter(model.RDFDoc.uri==id).one()
         output_url = h.url(controller="rdfdocs", action="show", id=dataset.id, qualified=True)
         return json.dumps(output_url)
     except:
         return json.dumps('')
Example #15
0
 def edit(self, id=None):
     """GET /rdfdocs/id/edit: Form to edit an existing item"""
     # url('edit_rdfdoc', id=ID)
     abort(403)
     if id is None:
         abort(404)
     rdfdoc_q = Session.query(model.RDFDoc)
     c.rdfdoc = rdfdoc_q.get(int(id))
     if c.rdfdoc is None:
         abort(404)
     c.rdfdoc_f = model.RDFDoc_fa.bind(c.rdfdoc)
     return render('/rdfdoc/edit.html')
Example #16
0
 def void(self):
     """send VoID of every dataset in a ZIP file"""
     rdfdocs = Session.query(model.RDFDoc).filter(model.RDFDoc.active==True).join(model.RDFDoc.current_stats).filter(and_(model.StatResult.triples > 0, model.RDFDoc.format != 'sparql'))
     zip_temp_file = tempfile.NamedTemporaryFile(prefix='lodstatswww_voidzip')
     zip_temp = zipfile.ZipFile(zip_temp_file, 'w', zipfile.ZIP_DEFLATED)
     for r in rdfdocs:
         zip_temp.writestr("%s.ttl" % r.name, r.current_stats.void)
     zip_temp.close()
     zip_temp_file.seek(0)
     response.content_type = 'application/zip'
     response.headers['Content-Disposition'] = "filename=LODStats_all_void.zip"
     # FIXME: use paste.fileapp if this ever gets too large
     for data in zip_temp_file:
         response.write(data)
Example #17
0
 def index(self, format='html'):
     """GET /rdf_classes: All items in the collection"""
     # url('rdf_classes')
     rdf_classes = Session.query(model.RDFClass.uri, model.RDFClass.id, func.sum(model.RDFClassStat.count),
                                 func.count(model.StatResult.id))\
                                 .join(model.RDFClassStat).join(model.StatResult)\
                                 .filter(model.StatResult.current_of!=None)\
                                 .group_by(model.RDFClass.uri, model.RDFClass.id)
     c.query_string = '?'
     # optional search
     c.search = ''
     if request.GET.has_key('search'):
         rdf_classes = rdf_classes.filter(model.RDFClass.uri.ilike("%%%s%%" % request.GET['search']))
         c.query_string += 'search=%s&' % request.GET['search']
         c.search = request.GET['search']
     # json
     if format=='json' or 'application/json' in request.headers.get('accept', ''):
         response.content_type = 'application/json'
         json_rdf_classes = []
         for cl in rdf_classes:
             json_rdf_classes.append({'uri': cl.uri, 'overall_sum': int(cl[1]), 'datasets': int(cl[2])})
         return json.dumps(json_rdf_classes)
     # sort results
     c.sort_order = request.GET.get('sort')
     if request.GET.has_key('sort'):
         if request.GET['sort'] == 'uri':
             c.rdf_classes = rdf_classes.order_by(model.RDFClass.uri)
         elif request.GET['sort'] == 'overall':
             c.rdf_classes = rdf_classes.order_by(desc(func.sum(model.RDFClassStat.count)),
                             desc(func.count(model.StatResult.id)), model.RDFClass.uri)
         elif request.GET['sort'] == 'datasets':
             c.rdf_classes = rdf_classes.order_by(desc(func.count(model.StatResult.id)),
                             desc(func.sum(model.RDFClassStat.count)), model.RDFClass.uri)
         else:
             c.rdf_classes = rdf_classes.order_by(desc(func.count(model.StatResult.id)),
                             desc(func.sum(model.RDFClassStat.count)), model.RDFClass.uri)
     else:
         c.rdf_classes = rdf_classes.order_by(desc(func.count(model.StatResult.id)),
                         desc(func.sum(model.RDFClassStat.count)), model.RDFClass.uri)
     if request.GET.has_key('page'):
         page = request.GET['page']
     else:
         page = 1
     page_url = PageURL_WebOb(request)
     c.rdf_classes_page = Page(c.rdf_classes, page=page, items_per_page=50, url=page_url)
     c.count = c.rdf_classes_page.item_count
     return render('/rdf_classes/index.html')
Example #18
0
 def index(self, format="html"):
     """GET /properties: All items in the collection"""
     # url('properties')
     rdf_properties = (
         Session.query(
             model.RDFProperty.uri,
             model.RDFProperty.id,
             func.sum(model.RDFPropertyStat.count),
             func.count(model.StatResult.id),
         )
         .join(model.RDFPropertyStat)
         .join(model.StatResult)
         .filter(model.StatResult.current_of != None)
         .group_by(model.RDFProperty.uri, model.RDFProperty.id)
     )
     c.query_string = "?"
     # optional search
     c.search = ""
     if request.GET.has_key("search"):
         rdf_properties = rdf_properties.filter(model.RDFProperty.uri.ilike("%%%s%%" % request.GET["search"]))
         c.query_string += "search=%s&" % request.GET["search"]
         c.search = request.GET["search"]
     # json
     if format == "json" or "application/json" in request.headers.get("accept", ""):
         response.content_type = "application/json"
         json_rdf_properties = []
         for p in rdf_properties:
             json_rdf_properties.append({"uri": p.uri, "overall_sum": int(p[1]), "datasets": int(p[2])})
         return json.dumps(json_rdf_properties)
     # sort results
     c.sort_order = request.GET.get("sort")
     if request.GET.has_key("sort"):
         if request.GET["sort"] == "uri":
             c.rdf_properties = rdf_properties.order_by(model.RDFProperty.uri)
         elif request.GET["sort"] == "overall":
             c.rdf_properties = rdf_properties.order_by(
                 desc(func.sum(model.RDFPropertyStat.count)),
                 desc(func.count(model.StatResult.id)),
                 model.RDFProperty.uri,
             )
         elif request.GET["sort"] == "datasets":
             c.rdf_properties = rdf_properties.order_by(
                 desc(func.count(model.StatResult.id)),
                 desc(func.sum(model.RDFPropertyStat.count)),
                 model.RDFProperty.uri,
             )
         else:
             c.rdf_properties = rdf_properties.order_by(
                 desc(func.count(model.StatResult.id)),
                 desc(func.sum(model.RDFPropertyStat.count)),
                 model.RDFProperty.uri,
             )
     else:
         c.rdf_properties = rdf_properties.order_by(
             desc(func.count(model.StatResult.id)),
             desc(func.sum(model.RDFPropertyStat.count)),
             model.RDFProperty.uri,
         )
     if request.GET.has_key("page"):
         page = request.GET["page"]
     else:
         page = 1
     page_url = PageURL_WebOb(request)
     c.rdf_properties_page = Page(c.rdf_properties, page=page, items_per_page=50, url=page_url)
     c.count = c.rdf_properties_page.item_count
     return render("/properties/index.html")
Example #19
0
 def stats(self):
     c.rdfdocs = Session.query(model.RDFDoc).filter(model.RDFDoc.active==True).order_by(model.RDFDoc.worked_on.desc(), model.RDFDoc.name, model.RDFDoc.last_updated.desc(), ).all()
     c.rdfdoc_count = len(c.rdfdocs)
     c.workers = Session.query(model.WorkerProc).order_by(model.WorkerProc.started.desc()).all()
     c.no_of_rdfdocs_with_triples = Session.query(model.RDFDoc).filter(model.RDFDoc.active==True).join(model.RDFDoc.current_stats).filter(model.StatResult.triples > 0).count()
     c.sparql_packages = Session.query(model.RDFDoc).filter(model.RDFDoc.active==True).join(model.RDFDoc.current_stats).filter(model.RDFDoc.format == "sparql").count()
     c.dump_packages = Session.query(model.RDFDoc).filter(model.RDFDoc.active==True).join(model.RDFDoc.current_stats).filter(model.RDFDoc.format != "sparql").count()
     c.error_packages_dump = Session.query(model.RDFDoc).filter(model.RDFDoc.active==True).join(model.RDFDoc.current_stats).filter(
             and_(
                 model.StatResult.errors != None,
                 model.RDFDoc.format != 'sparql',
                 model.RDFDoc.format != None
             )).count()
     c.error_packages_sparql = Session.query(model.RDFDoc).filter(model.RDFDoc.active==True).join(model.RDFDoc.current_stats).filter(
             and_(
                 model.StatResult.errors != None,
                 model.RDFDoc.format == 'sparql',
                 model.RDFDoc.format != None
             )).count()
     c.problem_packages = c.error_packages_sparql + c.error_packages_dump
     c.touched_packages = c.no_of_rdfdocs_with_triples+c.problem_packages
     c.triples_dump = Session.query(func.sum(model.StatResult.triples)).join(model.RDFDoc.current_stats).filter(model.RDFDoc.format != "sparql").scalar()
     c.triples_sparql = Session.query(func.sum(model.StatResult.triples)).join(model.RDFDoc.current_stats).filter(model.RDFDoc.format == "sparql").scalar()
     if c.triples_dump is None:
         c.triples_dump = 0
     if c.triples_sparql is None:
         c.triples_sparql = 0
     vocabs = Session.query(model.Vocab).join(model.RDFVocabStat).join(model.StatResult).filter(
             model.StatResult.current_of!=None).all()
     c.vocabs = len(vocabs)
     rdf_classes = Session.query(model.RDFClass).join(model.RDFClassStat).join(model.StatResult).filter(
             model.StatResult.current_of!=None).all()
     c.rdf_classes = len(rdf_classes)
     c.properties = Session.query(model.RDFProperty).join(model.RDFPropertyStat).join(model.StatResult).filter(
         model.StatResult.current_of!=None).count()
     #c.properties = len(properties)
     c.datatypes = Session.query(model.RDFDatatype).join(model.RDFDatatypeStat).join(model.StatResult).filter(
         model.StatResult.current_of!=None).count()
     c.link_count = Session.query(model.Link).join(model.LinkStat).join(model.StatResult).filter(
         model.StatResult.current_of!=None).count()
     c.languages = Session.query(func.count(model.Language.id)).join(model.LanguageStat).join(model.StatResult).filter(
         model.StatResult.current_of!=None).scalar()
     # most common vocab
     stmt="SELECT count(rdf_vocab_stat.count) AS counter, vocab.uri AS uri, vocab.id AS id FROM rdf_vocab_stat,stat_result,rdfdoc,vocab \
         WHERE rdf_vocab_stat.stat_result_id=stat_result.id and rdfdoc.current_stats_id=stat_result.id \
         and vocab.id=rdf_vocab_stat.vocab_id GROUP BY vocab.uri, vocab.id ORDER BY counter DESC LIMIT 5"
     c.v_usage = Session.query('uri', 'id', 'counter').from_statement(stmt).all()
     # most common classes
     stmt="SELECT count(rdf_class_stat_result.count) AS counter,rdf_class.uri, rdf_class.id FROM \
         rdf_class_stat_result,stat_result,rdfdoc,rdf_class WHERE \
         rdf_class_stat_result.stat_result_id=stat_result.id and rdfdoc.current_stats_id=stat_result.id \
         and rdf_class.id=rdf_class_stat_result.rdf_class_id GROUP BY rdf_class.uri, rdf_class.id ORDER BY counter DESC LIMIT 5"
     c.c_usage = Session.query('uri', 'id', 'counter').from_statement(stmt).all()
     # most common properties
     stmt="SELECT count(rdf_property_stat.count) AS counter,rdf_property.uri, rdf_property.id FROM \
         rdf_property_stat,stat_result,rdfdoc,rdf_property WHERE \
         rdf_property_stat.stat_result_id=stat_result.id and rdfdoc.current_stats_id=stat_result.id \
         and rdf_property.id=rdf_property_stat.rdf_property_id GROUP BY rdf_property.uri, rdf_property.id ORDER BY counter DESC LIMIT 5"
     c.p_usage = Session.query('uri', 'id', 'counter').from_statement(stmt).all()
     # most common datatypes
     stmt="SELECT count(rdf_datatype_stat.count) AS counter,rdf_datatype.uri, rdf_datatype.id FROM \
         rdf_datatype_stat,stat_result,rdfdoc,rdf_datatype WHERE \
         rdf_datatype_stat.stat_result_id=stat_result.id and rdfdoc.current_stats_id=stat_result.id \
         and rdf_datatype.id=rdf_datatype_stat.rdf_datatype_id GROUP BY rdf_datatype.uri, rdf_datatype.id ORDER BY counter DESC LIMIT 5"
     c.t_usage = Session.query('uri', 'id', 'counter').from_statement(stmt).all()
     # most common languages
     stmt="SELECT count(language_stat.count) AS counter,language.code, language.id FROM \
         language_stat,stat_result,rdfdoc,language WHERE \
         language_stat.stat_result_id=stat_result.id and rdfdoc.current_stats_id=stat_result.id \
         and language.id=language_stat.language_id GROUP BY language.code, language.id ORDER BY counter DESC LIMIT 5"
     c.l_usage = Session.query('code', 'id', 'counter').from_statement(stmt).all()
     # most common linksets
     stmt="SELECT count(link_stat.count) AS counter,link.code, link.id FROM \
         link_stat,stat_result,rdfdoc,link WHERE \
         link_stat.stat_result_id=stat_result.id and rdfdoc.current_stats_id=stat_result.id \
         and link.id=link_stat.link_id GROUP BY link.code, link.id ORDER BY counter DESC LIMIT 5"
     c.link_usage = Session.query('code', 'id', 'counter').from_statement(stmt).all()
     # absolute Summen
     # most commons vocab absolut
     stmt="SELECT sum(rdf_vocab_stat.count) AS sum, vocab.uri AS uri, vocab.id FROM rdf_vocab_stat,stat_result,rdfdoc,vocab \
         WHERE rdf_vocab_stat.stat_result_id=stat_result.id and rdfdoc.current_stats_id=stat_result.id \
         and vocab.id=rdf_vocab_stat.vocab_id GROUP BY vocab.uri, vocab.id ORDER BY sum DESC LIMIT 5"
     c.v_sum = Session.query('uri', 'id', 'sum').from_statement(stmt).all()
     # most common classes absolut
     stmt="SELECT sum(rdf_class_stat_result.count) AS sum,rdf_class.uri, rdf_class.id FROM \
         rdf_class_stat_result,stat_result,rdfdoc,rdf_class WHERE \
         rdf_class_stat_result.stat_result_id=stat_result.id and rdfdoc.current_stats_id=stat_result.id \
         and rdf_class.id=rdf_class_stat_result.rdf_class_id GROUP BY rdf_class.uri, rdf_class.id ORDER BY sum DESC LIMIT 5"
     c.c_sum = Session.query('uri', 'id', 'sum').from_statement(stmt).all()
     # most common properties
     stmt="SELECT sum(rdf_property_stat.count) AS sum,rdf_property.uri, rdf_property.id FROM \
         rdf_property_stat,stat_result,rdfdoc,rdf_property WHERE \
         rdf_property_stat.stat_result_id=stat_result.id and rdfdoc.current_stats_id=stat_result.id \
         and rdf_property.id=rdf_property_stat.rdf_property_id GROUP BY rdf_property.uri, rdf_property.id ORDER BY sum DESC LIMIT 5"
     c.p_sum = Session.query('uri', 'id', 'sum').from_statement(stmt).all()
     # most common datatypes
     stmt="SELECT sum(rdf_datatype_stat.count) AS sum,rdf_datatype.uri, rdf_datatype.id FROM \
         rdf_datatype_stat,stat_result,rdfdoc,rdf_datatype WHERE \
         rdf_datatype_stat.stat_result_id=stat_result.id and rdfdoc.current_stats_id=stat_result.id \
         and rdf_datatype.id=rdf_datatype_stat.rdf_datatype_id GROUP BY rdf_datatype.uri, rdf_datatype.id ORDER BY sum DESC LIMIT 5"
     c.t_sum = Session.query('uri', 'id', 'sum').from_statement(stmt).all()
     # most common languages
     stmt="SELECT sum(language_stat.count) AS sum,language.code, language.id FROM \
         language_stat,stat_result,rdfdoc,language WHERE \
         language_stat.stat_result_id=stat_result.id and rdfdoc.current_stats_id=stat_result.id \
         and language.id=language_stat.language_id GROUP BY language.code, language.id ORDER BY sum DESC LIMIT 5"
     c.l_sum = Session.query('code', 'id', 'sum').from_statement(stmt).all()
     stmt="SELECT sum(link_stat.count) AS sum,link.code, link.id FROM \
         link_stat,stat_result,rdfdoc,link WHERE \
         link_stat.stat_result_id=stat_result.id and rdfdoc.current_stats_id=stat_result.id \
         and link.id=link_stat.link_id GROUP BY link.code, link.id ORDER BY sum DESC LIMIT 5"
     c.link_sum = Session.query('code', 'id', 'sum').from_statement(stmt).all()
     # basics
     # entities
     stmt="SELECT avg(stat_result.entities) AS avg, min(stat_result.entities) AS min, max(stat_result.entities) AS max, median(stat_result.entities) AS median, sum(stat_result.entities) as sum FROM \
         stat_result,rdfdoc WHERE \
         rdfdoc.current_stats_id=stat_result.id and stat_result.entities is not NULL"
     c.entities = Session.query('avg', 'min', 'max', 'median', 'sum').from_statement(stmt).one()
     # literals
     stmt="SELECT avg(stat_result.literals) AS avg, min(stat_result.literals) AS min, max(stat_result.literals) AS max, median(stat_result.literals) AS median, sum(stat_result.literals) as sum FROM \
         stat_result,rdfdoc WHERE \
         rdfdoc.current_stats_id=stat_result.id and stat_result.entities is not NULL"
     c.literals = Session.query('avg', 'min', 'max', 'median', 'sum').from_statement(stmt).one()
     # blanks
     stmt="SELECT avg(stat_result.blanks) AS avg, min(stat_result.blanks) AS min, max(stat_result.blanks) AS max, median(stat_result.blanks) AS median, sum(stat_result.blanks) as sum FROM \
         stat_result,rdfdoc WHERE \
         rdfdoc.current_stats_id=stat_result.id and stat_result.entities is not NULL"
     c.blanks = Session.query('avg', 'min', 'max', 'median', 'sum').from_statement(stmt).one()
     # blanks as subject
     stmt="SELECT avg(stat_result.blanks_as_subject) AS avg, min(stat_result.blanks_as_subject) AS min, max(stat_result.blanks_as_subject) AS max, median(stat_result.blanks_as_subject) AS median, sum(stat_result.blanks_as_subject) AS sum FROM \
         stat_result,rdfdoc WHERE \
         rdfdoc.current_stats_id=stat_result.id and stat_result.entities is not NULL"
     c.blanks_as_subject = Session.query('avg', 'min', 'max', 'median', 'sum').from_statement(stmt).one()
     # blanks as object
     stmt="SELECT avg(stat_result.blanks_as_object) AS avg, min(stat_result.blanks_as_object) AS min, max(stat_result.blanks_as_object) AS max, median(stat_result.blanks_as_object) AS median, sum(stat_result.blanks_as_object) AS sum FROM \
         stat_result,rdfdoc WHERE \
         rdfdoc.current_stats_id=stat_result.id and stat_result.entities is not NULL"
     c.blanks_as_object = Session.query('avg', 'min', 'max', 'median', 'sum').from_statement(stmt).one()
     # subclasses
     stmt="SELECT avg(stat_result.subclasses) AS avg, min(stat_result.subclasses) AS min, max(stat_result.subclasses) AS max, median(stat_result.subclasses) AS median, sum(stat_result.subclasses) as sum FROM \
         stat_result,rdfdoc WHERE \
         rdfdoc.current_stats_id=stat_result.id and stat_result.entities is not NULL"
     c.subclasses = Session.query('avg', 'min', 'max', 'median', 'sum').from_statement(stmt).one()
     # typed subjects
     stmt="SELECT avg(stat_result.typed_subjects) AS avg, min(stat_result.typed_subjects) AS min, max(stat_result.typed_subjects) AS max, median(stat_result.typed_subjects) AS median, sum(stat_result.typed_subjects) as sum FROM \
         stat_result,rdfdoc WHERE \
         rdfdoc.current_stats_id=stat_result.id and stat_result.entities is not NULL"
     c.typed_subjects = Session.query('avg', 'min', 'max', 'median', 'sum').from_statement(stmt).one()
     # labeled subjects
     stmt="SELECT avg(stat_result.labeled_subjects) AS avg, min(stat_result.labeled_subjects) AS min, max(stat_result.labeled_subjects) AS max, median(stat_result.labeled_subjects) AS median, sum(stat_result.labeled_subjects) as sum FROM \
         stat_result,rdfdoc WHERE \
         rdfdoc.current_stats_id=stat_result.id and stat_result.entities is not NULL"
     c.labeled_subjects = Session.query('avg', 'min', 'max', 'median', 'sum').from_statement(stmt).one()
     # properties_per_entity
     stmt="SELECT avg(stat_result.properties_per_entity) AS avg, min(stat_result.properties_per_entity) AS min, max(stat_result.properties_per_entity) AS max, median(stat_result.properties_per_entity) AS median FROM \
         stat_result,rdfdoc WHERE \
         rdfdoc.current_stats_id=stat_result.id and stat_result.entities is not NULL"
     c.properties_per_entity = Session.query('avg', 'min', 'max', 'median').from_statement(stmt).one()
     # string_length_typed
     stmt="SELECT avg(stat_result.string_length_typed) AS avg, min(stat_result.string_length_typed) AS min, max(stat_result.string_length_typed) AS max, median(stat_result.string_length_typed) AS median FROM \
         stat_result,rdfdoc WHERE \
         rdfdoc.current_stats_id=stat_result.id and stat_result.entities is not NULL"
     c.string_length_typed = Session.query('avg', 'min', 'max', 'median').from_statement(stmt).one()
     # string length untyped
     stmt="SELECT avg(stat_result.string_length_untyped) AS avg, min(stat_result.string_length_untyped) AS min, max(stat_result.string_length_untyped) AS max, median(stat_result.string_length_untyped) AS median FROM \
         stat_result,rdfdoc WHERE \
         rdfdoc.current_stats_id=stat_result.id and stat_result.entities is not NULL"
     c.string_length_untyped = Session.query('avg', 'min', 'max', 'median').from_statement(stmt).one()
     # class hierarchy
     stmt="SELECT avg(stat_result.class_hierarchy_depth) AS avg, min(stat_result.class_hierarchy_depth) AS min, max(stat_result.class_hierarchy_depth) AS max, median(stat_result.class_hierarchy_depth) AS median FROM \
         stat_result,rdfdoc WHERE \
         rdfdoc.current_stats_id=stat_result.id and stat_result.entities is not NULL"
     c.class_hierarchy_depth = Session.query('avg', 'min', 'max', 'median').from_statement(stmt).one()
     # property hierarchy
     stmt="SELECT avg(stat_result.property_hierarchy_depth) AS avg, min(stat_result.property_hierarchy_depth) AS min, max(stat_result.property_hierarchy_depth) AS max, median(stat_result.property_hierarchy_depth) AS median FROM \
         stat_result,rdfdoc WHERE \
         rdfdoc.current_stats_id=stat_result.id and stat_result.entities is not NULL"
     c.property_hierarchy_depth = Session.query('avg', 'min', 'max', 'median').from_statement(stmt).one()
     # links
     stmt="SELECT avg(stat_result.links) AS avg, min(stat_result.links) AS min, max(stat_result.links) AS max, median(stat_result.links) AS median, sum(stat_result.links) AS sum FROM \
         stat_result,rdfdoc WHERE \
         rdfdoc.current_stats_id=stat_result.id and stat_result.entities is not NULL"
     c.links = Session.query('avg', 'min', 'max', 'median', 'sum').from_statement(stmt).one()
     # triples
     stmt="SELECT avg(stat_result.triples) AS avg, min(stat_result.triples) AS min, max(stat_result.triples) AS max, median(stat_result.triples) AS median, sum(stat_result.triples) as sum FROM \
         stat_result,rdfdoc WHERE \
         rdfdoc.current_stats_id=stat_result.id and stat_result.entities is not NULL and rdfdoc.format!='sparql'"
     c.triples = Session.query('avg', 'min', 'max', 'median', 'sum').from_statement(stmt).one()
     # # langs per dataset
     # stmt = "SELECT avg(count), min(count), max(count), median(count) FROM \
     #     (SELECT count(language_stat.count) as count,rdfdoc.id AS rdfd FROM \
     #     language_stat,stat_result,rdfdoc,language WHERE \
     #     language_stat.stat_result_id=stat_result.id and rdfdoc.current_stats_id=stat_result.id \
     #     and language.id=language_stat.language_id and stat_result.entities is not NULL GROUP BY rdfdoc.id) as counter"
     # c.dataset_lang = Session.query('avg', 'min', 'max', 'median').from_statement(stmt).one()
     # vocabs per dataset
     stmt = "SELECT avg(count), min(count), max(count), median(count), sum(count) FROM \
         (SELECT count(rdf_vocab_stat.count) as count,rdfdoc.id AS rdfd FROM \
         rdf_vocab_stat,stat_result,rdfdoc,vocab WHERE \
         rdf_vocab_stat.stat_result_id=stat_result.id and rdfdoc.current_stats_id=stat_result.id \
         and vocab.id=rdf_vocab_stat.vocab_id and stat_result.entities is not NULL GROUP BY rdfdoc.id) as counter"
     c.dataset_vocab = Session.query('avg', 'min', 'max', 'median', 'sum').from_statement(stmt).one()
     # classes per dataset
     stmt = "SELECT avg(count), min(count), max(count), median(count), sum(count) FROM \
         (SELECT count(rdf_class_stat_result.count) as count,rdfdoc.id AS rdfd FROM \
         rdf_class_stat_result,stat_result,rdfdoc,rdf_class WHERE \
         rdf_class_stat_result.stat_result_id=stat_result.id and rdfdoc.current_stats_id=stat_result.id \
         and rdf_class.id=rdf_class_stat_result.rdf_class_id and stat_result.entities is not NULL GROUP BY rdfdoc.id) as counter"
     c.dataset_classes = Session.query('avg', 'min', 'max', 'median', 'sum').from_statement(stmt).one()
     # properties per dataset
     stmt = "SELECT avg(count), min(count), max(count), median(count), sum(count) FROM \
         (SELECT count(rdf_property_stat.count) as count,rdfdoc.id AS rdfd FROM \
         rdf_property_stat,stat_result,rdfdoc,rdf_property WHERE \
         rdf_property_stat.stat_result_id=stat_result.id and rdfdoc.current_stats_id=stat_result.id \
         and rdf_property.id=rdf_property_stat.rdf_property_id and stat_result.entities is not NULL GROUP BY rdfdoc.id) as counter"
     c.dataset_props = Session.query('avg', 'min', 'max', 'median', 'sum').from_statement(stmt).one()
     return render('/rdfdoc/stats.html')
Example #20
0
 def home(self):
     c.rdfdocs = Session.query(model.RDFDoc).filter(model.RDFDoc.active==True).order_by(model.RDFDoc.worked_on.desc(), model.RDFDoc.name, model.RDFDoc.last_updated.desc(), ).all()
     c.rdfdoc_count = len(c.rdfdocs)
     c.workers = Session.query(model.WorkerProc).order_by(model.WorkerProc.started.desc()).all()
     c.no_of_rdfdocs_with_triples = Session.query(model.RDFDoc).filter(model.RDFDoc.active==True).join(model.RDFDoc.current_stats).filter(model.StatResult.triples > 0).count()
     c.sparql_packages = Session.query(model.RDFDoc).filter(model.RDFDoc.active==True).join(model.RDFDoc.current_stats).filter(model.RDFDoc.format == "sparql").count()
     c.dump_packages = Session.query(model.RDFDoc).filter(model.RDFDoc.active==True).join(model.RDFDoc.current_stats).filter(model.RDFDoc.format != "sparql").count()
     c.error_packages_dump = Session.query(model.RDFDoc).filter(model.RDFDoc.active==True).join(model.RDFDoc.current_stats).filter(
             and_(
                 model.StatResult.errors != None,
                 model.RDFDoc.format != 'sparql',
                 model.RDFDoc.format != None
             )).count()
     c.error_packages_sparql = Session.query(model.RDFDoc).filter(model.RDFDoc.active==True).join(model.RDFDoc.current_stats).filter(
             and_(
                 model.StatResult.errors != None,
                 model.RDFDoc.format == 'sparql',
                 model.RDFDoc.format != None
             )).count()
     c.problem_packages = c.error_packages_sparql + c.error_packages_dump
     c.touched_packages = c.no_of_rdfdocs_with_triples+c.problem_packages
     c.triples_dump = Session.query(func.sum(model.StatResult.triples)).join(model.RDFDoc.current_stats).filter(model.RDFDoc.format != "sparql").scalar()
     c.triples_sparql = Session.query(func.sum(model.StatResult.triples)).join(model.RDFDoc.current_stats).filter(model.RDFDoc.format == "sparql").scalar()
     if c.triples_dump is None:
         c.triples_dump = 0
     if c.triples_sparql is None:
         c.triples_sparql = 0
     # most recent successful updates
     c.recent_updates = Session.query(model.RDFDoc).filter(model.RDFDoc.active==True).join(model.RDFDoc.current_stats).filter(
             and_(
                 model.StatResult.last_updated!=None,
                 model.StatResult.errors==None,
                 model.RDFDoc.last_updated!=None,
             )
             ).order_by(desc(model.RDFDoc.last_updated))[:5]
     # most recent updates with errors
     c.recent_updates_errors = Session.query(model.RDFDoc).filter(model.RDFDoc.active==True).join(model.RDFDoc.current_stats).filter(
             and_(
                 model.StatResult.last_updated!=None,
                 model.StatResult.errors!=None,
                 model.RDFDoc.last_updated!=None,
             )
             ).order_by(desc(model.RDFDoc.last_updated))[:5]
     return render('/home.html')
Example #21
0
 def valid_and_available(self):
     c.rdfdocs = Session.query(model.RDFDoc).filter(model.RDFDoc.active==True).join(model.RDFDoc.current_stats).filter(and_(model.StatResult.triples > 0, model.RDFDoc.format != 'sparql')).all()
     response.content_type = 'text/plain'
     return render('/rdfdoc/txtlist.txt')
Example #22
0
    def index(self, format="html"):
        """GET /datatypes: All items in the collection"""
        # url('datatypes')
        # datatypes = Session.query(model.RDFDatatype).join(model.RDFDatatypeStat).join(model.StatResult).filter(
        #     model.StatResult.current_of!=None)

        datatypes = (
            Session.query(
                model.RDFDatatype.uri,
                model.RDFDatatype.id,
                func.sum(model.RDFDatatypeStat.count),
                func.count(model.StatResult.id),
            )
            .join(model.RDFDatatypeStat)
            .join(model.StatResult)
            .filter(model.StatResult.current_of != None)
            .group_by(model.RDFDatatype.uri, model.RDFDatatype.id)
        )
        c.query_string = "?"
        # optional search
        c.search = ""
        if request.GET.has_key("search"):
            datatypes = datatypes.filter(model.RDFDatatype.uri.ilike("%%%s%%" % request.GET["search"]))
            c.query_string += "search=%s&" % request.GET["search"]
            c.search = request.GET["search"]
        # sort results
        c.sort_order = request.GET.get("sort")
        if request.GET.has_key("sort"):
            if request.GET["sort"] == "uri":
                c.datatypes = datatypes.order_by(model.RDFDatatype.uri)
            elif request.GET["sort"] == "overall":
                c.datatypes = datatypes.order_by(
                    desc(func.sum(model.RDFDatatypeStat.count)),
                    desc(func.count(model.StatResult.id)),
                    model.RDFDatatype.uri,
                )
            elif request.GET["sort"] == "datasets":
                c.datatypes = datatypes.order_by(
                    desc(func.count(model.StatResult.id)),
                    desc(func.sum(model.RDFDatatypeStat.count)),
                    model.RDFDatatype.uri,
                )
            else:
                c.datatypes = datatypes.order_by(
                    desc(func.count(model.StatResult.id)),
                    desc(func.sum(model.RDFDatatypeStat.count)),
                    model.RDFDatatype.uri,
                )
        else:
            c.datatypes = datatypes.order_by(
                desc(func.count(model.StatResult.id)),
                desc(func.sum(model.RDFDatatypeStat.count)),
                model.RDFDatatype.uri,
            )
        if request.GET.has_key("page"):
            page = request.GET["page"]
        else:
            page = 1
        page_url = PageURL_WebOb(request)
        c.datatypes_page = Page(c.datatypes, page=page, items_per_page=50, url=page_url)
        c.count = c.datatypes_page.item_count
        return render("/datatypes/index.html")