def portalResBody(snapshot, portalid): with Timer(key="get_portalRes", verbose=True): Session = current_app.config['dbsession'] dbc = current_app.config['dbc'] with Timer(key="query_portalRes", verbose=True): data = getResourceInfo(Session, dbc, portalid, snapshot) with Timer(key="query_getMetaResource", verbose=True): viewName = "view_{}_{}_{}".format('resinfo', portalid, snapshot) qorg = getResourceInfos(Session, snapshot, portalid) q = withView(qorg, viewName, Session, dbc) #print q start = time.time() data['uris'] = [row2dict(i) for i in q] end = time.time() if (end - start) > 5: print("Create View {}".format(viewName)) createView(qorg, viewName, Session) data.update(getPortalInfos(Session, portalid, snapshot)) #data['portals']= [ row2dict(r) for r in Session.query(Portal).all()] with Timer(key="query_portalResourceCount", verbose=True): r = current_app.config['dbsession'].query( Portal.resourcecount).filter(Portal.id == portalid) ps = [] for P in r: data['resources'] = P[0] return render("odpw_portal_resources_list.jinja", data=data, snapshot=snapshot, portalid=portalid)
def portalLinkCheckCSV(portalid): with Timer(key="get_portalLinkCheckCSV", verbose=True): si = StringIO.StringIO() cw = csv.writer(si) snapshot = getCurrentSnapshot() Session = current_app.config['dbsession'] data = getPortalInfos(Session, portalid, snapshot) with Timer(key="query_portalorgas", verbose=True): q = Session.query(Dataset.organisation) \ .filter(Dataset.portalid == portalid) \ .filter(Dataset.snapshot == snapshot).distinct(Dataset.organisation) data['organisations'] = [row2dict(res) for res in q] for o in data['organisations']: orga = o['organisation'] # with Timer(key="query_orga-emails", verbose=True): # portal=Session.query(Portal).filter(Portal.id==portalid).first() # # print('portal: ', portal, 'snapshot: ', snapshot, 'orga: ', orga) # data['contacts'] = contactPerOrga(Session, portal, snapshot, orga) # for cont in data['contacts']: linkcheck = 'https://tools.adequate.at' + url_for( '.orga_resources', portalid=portalid, snapshot=snapshot, orga=orga) cw.writerow([orga, linkcheck]) output = make_response(si.getvalue()) output.headers[ "Content-Disposition"] = "attachment; filename=export.csv" output.headers["Content-type"] = "text/csv" return output
def orga_resource(portalid, snapshot, orga): with Timer(key="get_orga_resource", verbose=True): Session = current_app.config['dbsession'] dbc = current_app.config['dbc'] data = getResourceInfo(Session, dbc, portalid, snapshot, orga) q = getResourceInfos(Session, snapshot, portalid, orga) data['resList'] = [] for i in q: dataset_id = i[1] dataset_title = i[2] orig_link = getDatasetURI(dataset_id, portalid) data['resList'].append({ 'uri': row2dict(i[0]), 'dataset': { 'uri': orig_link, 'title': dataset_title } }) data.update(getPortalInfos(Session, portalid, snapshot)) r = current_app.config['dbsession'].query( Portal.resourcecount).filter(Portal.id == portalid) for P in r: data['resources'] = P[0] return render("odpw_portal_resources_list.jinja", data=data, snapshot=snapshot, portalid=portalid)
def portalsdynamicy(): with Timer(key="get_portalsdynamicy", verbose=True): snapshot = getSnapshotfromTime(datetime.datetime.now()) Session = current_app.config['dbsession'] with Timer(key="query_portalsdynamicy", verbose=True): res = [ r for r in Session.query(Portal).join(PortalSnapshotDynamicity). filter(PortalSnapshotDynamicity.snapshot == snapshot).add_entity(PortalSnapshotDynamicity) ] results = [] keys = [ 'dindex', 'changefrequ', 'adddelratio', 'dyratio', 'staticRatio', 'addRatio', 'delRatio', 'updatedRatio' ] for r in res: d = row2dict(r) for k in keys: d[k] = r[1].__getattribute__(k) results.append(d) df = pd.DataFrame(results) for c in keys: df[c] = df[c].convert_objects(convert_numeric=True) return render('odpw_portals_dynamics.jinja', data={'portals': results}, keys=keys, snapshot=snapshot)
def systemevolv(): with Timer(key="get_systemevolv", verbose=True): Session = current_app.config['dbsession'] with Timer(key="query_systemevolv", verbose=True): t = Session.query( PortalSnapshot.snapshot.label('snapshot'), Portal.software, PortalSnapshot.datasetcount, PortalSnapshot.resourcecount).join(Portal).subquery() q = Session.query( t.c.snapshot, t.c.software, func.count().label('count'), func.sum(t.c.resourcecount).label('resources'), func.sum(t.c.datasetcount).label('datasets')).group_by( t.c.snapshot, t.c.software) data = [row2dict(r) for r in q.all()] df = pd.DataFrame(data) with Timer(key="plot_systemevolv", verbose=True): p = systemEvolutionPlot(df) script, div = components(p) js_resources = INLINE.render_js() css_resources = INLINE.render_css() return render("odpw_system_evolution.jinja", plot_script=script, plot_div=div, js_resources=js_resources, css_resources=css_resources)
def portalSnapshotQualityDF(self, portalid, snapshot): q = self.portalSnapshotQuality(portalid, snapshot) data = None for r in q: data = row2dict(r) break d = [] datasets = int(data['datasets']) for inD in qa: for k, v in inD['metrics'].items(): k = k.lower() # TODO what to do if metric has no value? if data[k] != None and data[k] != 'None': value = float(data[k]) perc = int(data[k + 'N']) / (datasets * 1.0) if datasets > 0 else 0 c = { 'Metric': k, 'Dimension': inD['dimension'], 'dim_color': inD['color'], 'value': value, 'perc': perc } c.update(v) d.append(c) return pd.DataFrame(d)
def get(self, portalid): with Timer(key="PortalSnapshots.get",verbose=True): session=current_app.config['dbsession'] q=session.query(PortalSnapshot.snapshot)\ .filter(PortalSnapshot.portalid==portalid) data=[row2dict(r) for r in q.all()] return jsonify(data)
def get(self, portalid,snapshot): #print portalid, snapshot #with Timer(key="portalQuality",verbose=True): session=current_app.config['dbsession'] q=session.query(PortalSnapshotQuality).filter(PortalSnapshotQuality.portalid==portalid).filter(PortalSnapshotQuality.snapshot==snapshot) data=[row2dict(r) for r in q.all()] return jsonify(data)
def portal(portalid, snapshot=getSnapshotfromTime(datetime.datetime.now())): with Timer(key="get_portal", verbose=True): current_sn = snapshot Session = current_app.config['dbsession'] data = getPortalInfos(Session, portalid, snapshot) with Timer(key="query_portal", verbose=True): r = Session.query(Portal).filter(Portal.id == portalid) \ .join(PortalSnapshotQuality, PortalSnapshotQuality.portalid == Portal.id) \ .filter(PortalSnapshotQuality.snapshot == snapshot) \ .join(PortalSnapshot, PortalSnapshot.portalid == Portal.id) \ .filter(PortalSnapshot.snapshot == snapshot) \ .join(PortalSnapshotDynamicity, PortalSnapshotDynamicity.portalid == Portal.id) \ .filter(PortalSnapshotDynamicity.snapshot == snapshot) \ .add_entity(PortalSnapshot) \ .add_entity(PortalSnapshotQuality) \ .add_entity(PortalSnapshotDynamicity).first() while r is None: snapshot = getPreviousWeek(snapshot) r = Session.query(Portal).filter(Portal.id == portalid) \ .join(PortalSnapshotQuality, PortalSnapshotQuality.portalid == Portal.id) \ .filter(PortalSnapshotQuality.snapshot == snapshot) \ .join(PortalSnapshot, PortalSnapshot.portalid == Portal.id) \ .filter(PortalSnapshot.snapshot == snapshot) \ .join(PortalSnapshotDynamicity, PortalSnapshotDynamicity.portalid == Portal.id) \ .filter(PortalSnapshotDynamicity.snapshot == snapshot) \ .add_entity(PortalSnapshot) \ .add_entity(PortalSnapshotQuality) \ .add_entity(PortalSnapshotDynamicity).first() data['portal'] = row2dict(r[0]) data['fetchInfo'] = row2dict(r[1]) data['fetchInfo']['duration'] = data['fetchInfo']['end'] - data[ 'fetchInfo']['start'] data['dynamicity'] = row2dict(r[3]) data['quality'] = row2dict(r[2]) #with Timer(key="query_portal_agg", verbose=True): # data.update(aggregatePortalInfo(Session,portalid,snapshot,dbc)) return render("odpw_portal.jinja", snapshot=current_sn, portalid=portalid, data=data)
def resourceInfo(snapshot, portalid, uri): with Timer(key="get_resourceInfo", verbose=True): #print snapshot,portalid,uri Session = current_app.config['dbsession'] dbc = current_app.config['dbc'] data = getPortalInfos(Session, portalid, snapshot) with Timer(key="query_resources", verbose=True): viewName = "view_{}_{}_{}".format('resinfo', portalid, snapshot) qorg = getResourceInfos(Session, snapshot, portalid) q = withView(qorg, viewName, Session, dbc) start = time.time() data['resources'] = [row2dict(r) for r in q.all()] end = time.time() if (end - start) > 5: print("Create View {}".format(viewName)) try: createView(qorg, viewName, Session) except Exception as e: if 'already exists' in e.message: pass else: raise e with Timer(key="query_resourceInfo", verbose=True): q = Session.query(ResourceInfo) \ .filter(ResourceInfo.uri == uri) #print q data['resourceInfo'] = [row2dict(r) for r in q.all()] for r in data['resourceInfo']: if 'header' in r: if r['header'] is None: r['header'] = "" else: #print type(r['header']),r['header'],r r['header'] = ast.literal_eval(str(r['header'])) return render("odpw_portal_resource.jinja", snapshot=snapshot, portalid=portalid, uri=uri, data=data)
def portaldash(): with Timer(key="get_portaldash", verbose=True): data = {} cursn = getSnapshotfromTime(datetime.datetime.now()) Session = current_app.config['dbsession'] with Timer(key="query_portaldash", verbose=True): data['portals'] = [ row2dict(r) for r in Session.query(Portal).all() ] return render("odpw_portaldash.jinja", data=data, snapshot=cursn)
def getPortalDatasets(Session, portalid, snapshot): with Timer(key="getPortalDatasets", verbose=True): return { "datasets": [ row2dict(r) for r in Session.query(Dataset.title, Dataset.id).filter( Dataset.portalid == portalid).filter( Dataset.snapshot == snapshot).all() ] }
def get(self): """ Returns list of portals. """ #args = pagination_arguments.parse_args(request) #page = args.get('page', 1) #per_page = args.get('per_page', 10) session = current_app.config['dbsession'] data = [row2dict(i) for i in session.query(Portal).all()] #return json.dumps(data) return Response(json.dumps(data), mimetype='application/json')
def get(self, portalid,snapshot): q=PortalSnapshot.query if snapshot is not None: q=q.filter(PortalSnapshotQuality.snapshot==snapshot) q=q.filter(PortalSnapshot.portalid==portalid)\ .outerjoin(PortalSnapshotQuality, and_(PortalSnapshot.portalid==PortalSnapshotQuality.portalid,PortalSnapshot.snapshot==PortalSnapshotQuality.snapshot))\ .join(Portal)\ .add_entity(PortalSnapshotQuality)\ .add_entity(Portal) return jsonify([row2dict(i) for i in q.all()])
def get(self, portalid,snapshot, datasetid): with Timer(key="PortalDatasetData.get",verbose=True): session=current_app.config['dbsession'] q=session.query(DatasetData) \ .join(Dataset, DatasetData.md5 == Dataset.md5) \ .filter(Dataset.snapshot==snapshot)\ .filter(Dataset.portalid==portalid)\ .filter(Dataset.id == datasetid) data = [row2dict(r) for r in q.all()] return jsonify(data)
def portalCSVW(snapshot, portalid): with Timer(key="portalCSVW", verbose=True): Session = current_app.config['dbsession'] data = getPortalInfos(Session, portalid, snapshot) data['portals'] = [row2dict(r) for r in Session.query(Portal).all()] data.update(getPortalDatasets(Session, portalid, snapshot)) q = Session.query(Dataset, MetaResource)\ .filter(MetaResource.md5==Dataset.md5)\ .filter(MetaResource.format=='csv')\ .filter(Dataset.portalid==portalid)\ .filter(Dataset.snapshot==snapshot) data['resources'] = [row2dict(r) for r in q.all()] return render("odpw_portal_csvw.jinja", data=data, snapshot=snapshot, portalid=portalid, qa=qa, error=errorStatus)
def portalOrganisations(snapshot, portalid): with Timer(key="portalRes", verbose=True): Session = current_app.config['dbsession'] dbc = current_app.config['dbc'] data = getPortalInfos(Session, portalid, snapshot) data['portals'] = [row2dict(r) for r in Session.query(Portal).all()] data.update( aggregatePortalInfo(Session, portalid, snapshot, dbc, limit=None)) return render("odpw_portal_dist.jinja", data=data, snapshot=snapshot, portalid=portalid)
def portalLinkCheck(snapshot, portalid): Session = current_app.config['dbsession'] data = getPortalInfos(Session, portalid, snapshot) q = Session.query(Dataset.organisation) \ .filter(Dataset.portalid == portalid) \ .filter(Dataset.snapshot == snapshot).distinct(Dataset.organisation) data['organisations'] = [row2dict(res) for res in q] return render("odpw_portal_linkchecker.jinja", snapshot=snapshot, portalid=portalid, data=data)
def portalDynamicy(snapshot, portalid): Session = current_app.config['dbsession'] q = Session.query(PortalSnapshotDynamicity).filter( PortalSnapshotDynamicity.portalid == portalid).filter( PortalSnapshotDynamicity.snapshot <= snapshot) data = [] keys = [ 'dindex', 'changefrequ', 'adddelratio', 'dyratio', 'staticRatio', 'addRatio', 'delRatio', 'updatedRatio' ] for psd in q: d = row2dict(psd) for k in keys: d[k] = psd.__getattribute__(k) data.append(d) df = pd.DataFrame(data) with Timer(key="dynPlot", verbose=True) as t: p = portalDynamicity(df) script, div = components(p) js_resources = INLINE.render_js() css_resources = INLINE.render_css() data = getPortalInfos(Session, portalid, snapshot) data['portals'] = [row2dict(r) for r in Session.query(Portal).all()] return render("odpw_portal_dynamicity.jinja", plot_script=script, plot_div=div, js_resources=js_resources, css_resources=css_resources, snapshot=snapshot, portalid=portalid, data=data)
def portalreport(portalid, snapshot=getSnapshotfromTime(datetime.datetime.now())): with Timer(key="get_portal", verbose=True): Session = current_app.config['dbsession'] data = getPortalInfos(Session, portalid, snapshot) with Timer(key="query_portalreport", verbose=True): q = Session.query(Dataset.organisation) \ .filter(Dataset.portalid == portalid) \ .filter(Dataset.snapshot == snapshot).distinct(Dataset.organisation) data['organisations'] = [row2dict(res) for res in q] return render("odpw_portal_report.jinja", snapshot=snapshot, portalid=portalid, data=data)
def get(self): """ get list of portals with their current quality assessment metrics. """ #args = pagination_arguments.parse_args(request) #page = args.get('page', 1) #per_page = args.get('per_page', 10) session = current_app.config['dbsession'] #snapshot=getPreviousWeek(getSnapshotfromTime(datetime.datetime.now())) data = [ row2dict(r) for r in session.query( Portal, Portal.datasetcount, Portal.resourcecount).join( PortalSnapshotQuality).add_entity(PortalSnapshotQuality) ] return Response(json.dumps(data), mimetype='application/json')
def get(self): """ Returns list of portals with additon stats. """ #args = pagination_arguments.parse_args(request) #page = args.get('page', 1) #per_page = args.get('per_page', 10) session = current_app.config['dbsession'] data = [ row2dict(r) for r in session.query(Portal, Portal.snapshot_count, Portal.first_snapshot, Portal.last_snapshot, Portal.datasetcount, Portal.resourcecount) ] return Response(json.dumps(data), mimetype='application/json')
def get(self, snapshot): """ get list of portals with their quality assessment metrics for the specified snapshot """ #args = pagination_arguments.parse_args(request) #page = args.get('page', 1) #per_page = args.get('per_page', 10) session = current_app.config['dbsession'] data = [ row2dict(r) for r in session.query( Portal, Portal.datasetcount, Portal.resourcecount).join( PortalSnapshotQuality).filter( PortalSnapshotQuality.snapshot == snapshot).add_entity( PortalSnapshotQuality) ] return Response(json.dumps(data), mimetype='application/json')
def portalsquality(): with Timer(key="get_portalsquality", verbose=True): Session = current_app.config['dbsession'] snapshot = getSnapshotfromTime(datetime.datetime.now()) with Timer(key="query_portalsquality"): results = [ row2dict(r) for r in Session.query( Portal, Portal.datasetcount, Portal.resourcecount).join(PortalSnapshotQuality).filter( PortalSnapshotQuality.snapshot == snapshot).add_entity( PortalSnapshotQuality) ] keys = [i.lower() for q in qa for i in q['metrics']] df = pd.DataFrame(results) #print df for c in keys: #print c,df[c] #print '___'*10 df[c] = df[c].convert_objects(convert_numeric=True) dfiso = df.groupby(['iso']) dfiso=dfiso.agg('mean')\ .join(pd.DataFrame(dfiso.size(),columns=['count'])) resultsIso = dfiso.reset_index().to_dict(orient='records') dfsoft = df.groupby(['software']) dfsoft=dfsoft.agg('mean')\ .join(pd.DataFrame(dfsoft.size(),columns=['count'])) resultSoft = dfsoft.reset_index().to_dict(orient='records') return render('odpw_portals_quality.jinja', data={ 'portals': results, 'iso': resultsIso, 'soft': resultSoft }, keys=keys, snapshot=snapshot)
def get(self, portalid, snapshot): with Timer(key="PortalSnapshotResources.get", verbose=True): session = current_app.config['dbsession'] q = session.query(MetaResource.uri) \ .join(Dataset, Dataset.md5 == MetaResource.md5) \ .filter(Dataset.snapshot == snapshot) \ .filter(Dataset.portalid == portalid) format = request.args.get("format") if format: q = q.filter(MetaResource.format == format) size = request.args.get("size") if size: q = q.filter((MetaResource.size <= size) | (MetaResource.size == None)) data = [row2dict(r)['uri'] for r in q.all()] return jsonify(data)
def getPortalsInfo(): with Timer(key="getPortalsInfo", verbose=True): ps = [] r = current_app.config['dbsession'].query( Portal, Portal.snapshot_count, Portal.first_snapshot, Portal.last_snapshot, Portal.datasetcount, Portal.resourcecount).filter(Portal.active == True) for P in r: #print 'P',P d = {} d.update(row2dict(P[0])) d['snCount'] = P[1] d['snFirst'] = P[2] d['snLast'] = P[3] d['datasets'] = P[4] d['resources'] = P[5] ps.append(d) return ps
def contactPerOrga(Session, portal, snapshot, orga): q = Session.query(Dataset) \ .filter(Dataset.portalid == portal.id) \ .filter(Dataset.snapshot == snapshot) \ .filter(Dataset.organisation == orga) \ .join(DatasetData, DatasetData.md5 == Dataset.md5) \ .join(DatasetQuality, DatasetQuality.md5 == Dataset.md5) \ .add_entity(DatasetData).add_entity(DatasetQuality) pereMail = set([]) for res in q: # Dataset, DatasetData, DatasetQuality ds = row2dict(res) d = portal_fetch_processors.Dataset(snapshot=snapshot, portalID=portal.id, did=ds['id'], data=ds['raw'], status=200, software=portal.software) d.dcat = dict_to_dcat(ds['raw'], portal) contact = getContactPointValues(d) if len(contact) > 1: pereMail.add(contact[1]) return pereMail
def get_dataset(portalid, snapshot, datasetid): session = current_app.config['dbsession'] q = session.query(DatasetData) \ .join(Dataset, DatasetData.md5 == Dataset.md5) \ .filter(Dataset.snapshot <= snapshot) \ .filter(Dataset.portalid == portalid) \ .filter(Dataset.id == datasetid) \ .order_by(Dataset.snapshot.desc()) data = q.first() if data: resp = jsonify(row2dict(data)) portal = session.query(Portal).filter(Portal.id == portalid).first() g = rdflib.Graph() dataset_ref = add_dcat_to_graph(data.raw, portal, g, None) timegate = '<' + HOST + '/' + portalid + '/' + datasetid + '>' return add_memento_header(resp, dataset_ref.n3(), timegate, snapshot) else: resp = jsonify( {'error': 'There is no version of dataset ' + datasetid + ' available that is older than ' + str(tofirstdayinisoweek(snapshot)), 'portalid': portalid}) resp.status_code = 404 return resp
def licensesearch(uri=None): with Timer(key="get_licensesearch", verbose=True): data = {} if uri != None: cursn = getPreviousWeek( getSnapshotfromTime(datetime.datetime.now())) Session = current_app.config['dbsession'] with Timer(key="query_licensesearch"): q = Session.query(Dataset, DatasetData) \ .join(MetaResource, Dataset.md5 == MetaResource.md5) \ .join(DatasetData, Dataset.md5 == DatasetData.md5) \ .filter(Dataset.snapshot == cursn) \ .filter(MetaResource.uri == uri) results = [] for r in q: results.append(row2dict(r)) data['uri'] = uri data['snapshot'] = cursn data['results'] = results return render("odpw_license_search.jinja", data=data)
def portalssize(): with Timer(key="get_portalsstats", verbose=True): Session = current_app.config['dbsession'] with Timer(key="query_portalsstats", verbose=True): results = [ row2dict(r) for r in Session.query(Portal, Portal.snapshot_count, Portal.first_snapshot, Portal.last_snapshot, Portal.datasetcount, Portal.resourcecount) ] df = pd.DataFrame(results) with Timer(key="plot_portalsstats", verbose=True): p = portalsScatter(df) script, div = components(p) js_resources = INLINE.render_js() css_resources = INLINE.render_css() return render("odpw_portals_stats.jinja", plot_script=script, plot_div=div, js_resources=js_resources, css_resources=css_resources)