def evaluate(w: Union[str, dict], allow_run=True): goal_uri = None if isinstance(w, str): goal_uri = w b = odakb.sparql.select_one("{} oda:bucket ?b".format( render_uri(w)))['b'] w = odakb.datalake.restore(b) jsonschema.validate(w, json.loads(open("workflow-schema.json").read())) print("evaluate this", w) r = restore(w) if r is not None: return 'restored', r else: if allow_run: r = {'origin': "run", **odarun.run(w)} s = store(w, r) if nuri(s['goal_uri']) != nuri(goal_uri): print("stored goal uri", s['goal_uri']) print("requested goal uri", goal_uri) raise Exception("inconsistent storage") return 'ran', r else: return None
def offer_goal(): rdf_init() n = request.args.get('n', 1, type=int) f = request.args.get('f', None) r = [] design_goals() unreached_goals = get_goals("unreached", wf=f) if len(unreached_goals) > n: goal_uri = unreached_goals[n] #goal_uri = w2uri(goal) print("goal to offer", goal_uri) goal = get_data(goal_uri) assertEqual(nuri(goal_uri), nuri(w2uri(goal, "goal"))) print("offering goal", goal) print("offering goal uri", goal_uri) return jsonify(dict(goal_uri=goal_uri, goal=goal)) else: return jsonify(dict(warning="no goals"))
def worker(url, dry_run, one_shot): rdf_init() nskip=0 while True: t0 = time.time() r = requests.get(url+"/offer-goal", params=dict(n=nskip)) logger.info("query took %.2lg seconds", time.time() - t0) if r.status_code != 200: logger.error("problem fetching goal: %s", r) print(r.text) time.sleep(15) continue goal = r.json().get('goal', None) if goal is None: logger.warning("no more goals! sleeping") time.sleep(15) nskip=0 continue goal_uri = r.json()['goal_uri'] logger.info("goal: %s", pprint.pformat(goal)) logger.info("got goal uri: %s", goal_uri) validate_workflow(goal) if nuri(w2uri(goal, "goal")) != nuri(goal_uri): raise Exception("goal uri mismatch:", nuri(w2uri(goal, "goal")), nuri(goal_uri)) try: data = odarun.run(goal) nskip=0 except odarun.UnsupportedCallType: nskip+=1 logger.error("has been offerred unsupported call type! we must have made wrong request; skipping to %i", nskip) time.sleep(15) continue worker = dict(hostname=socket.gethostname(), time=time.time()) if not dry_run: r = requests.post(url+"/report-goal", json=dict(goal=goal, data=data, worker=worker, goal_uri=goal_uri)) print(r.text) print(pprint.pformat(r.json())) else: print("dry run, not reporting") if one_shot: break time.sleep(15)
def test_testgoals(client): from odakb.sparql import nuri import odatestsapp odatestsapp.design_goals() r = client.get(url_for("goals_get")) print("all", r, len(r.json), r.json[:3],"...") r_all = r.json r = client.get(url_for("goals_get", f="unreached")) print("unreached", r, len(r.json), r.json[:3],"...") r_unr = r.json r = client.get(url_for("goals_get", f="reached")) print("reached", r, len(r.json), r.json[:3],"...") r_r = r.json g = client.get(url_for("offer_goal")).json u = nuri(g['goal_uri']).strip("<>") print("goal", u) print("in unr?", u in r_unr) print("in r?", u in r_r) print("in all?", u in r_all) assert u in r_unr assert u not in r_r ev = client.get(url_for("evaluate_one")).json u = nuri(g['goal_uri']).strip("<>") assert u in r_unr assert u not in r_r r = client.get(url_for("goals_get", f="unreached")) print("unreached", r, len(r.json), r.json[:3],"...") r_unr = r.json r = client.get(url_for("goals_get", f="reached")) print("reached", r, len(r.json), r.json[:3],"...") r_r = r.json assert u in r_r assert u not in r_unr
def get_tests(f=None): tests = [] for t in odakb.sparql.select(query=""" ?workflow oda:belongsTo oda:basic_testkit; a oda:test; a oda:workflow; oda:callType ?call_type; oda:callContext ?call_context; oda:location ?location . OPTIONAL { ?workflow dc:contributor ?email } NOT EXISTS { ?workflow oda:realm oda:expired } """ + (f or "")): logger.info("selected workflow entry: %s", t) t['domains'] = odakb.sparql.select(query=""" {workflow} oda:domain ?domain """.format(workflow=nuri(t['workflow']))) t['expects'] = {} for r in odakb.sparql.select(query=""" <{workflow}> oda:expects ?expectation . ?expectation a ?ex_type . """.format(workflow=t['workflow'])): #if binding = r['expectation'].split("#")[1][len("input_"):] t['expects'][binding] = r['ex_type'] logger.info("test: \n" + pprint.pformat(t)) tests.append(t) return tests
def design_goals(f=None): goals = [] for test in get_tests(f): logger.info("goal for test: %s", test) for bind, ex in test['expects'].items(): for option in odakb.sparql.select('?opt a <%s>' % ex): if not '#input_' in option['opt']: goals.append( { "base": test, 'inputs': { bind: option['opt'] } } ) #, 'reason': odakb.sparql.render_rdf('?opt a <%s>'%ex, option)}}) tgoals = [] for _g in goals: #tgoals.append(_g) g = copy.deepcopy(_g) g['inputs']['timestamp'] = midnight_timestamp() tgoals.append(g) g = copy.deepcopy(_g) g['inputs']['timestamp'] = recent_timestamp(6000) tgoals.append(g) toinsert = "" byuri = {} for goal in tgoals: goal_uri = w2uri(goal, "goal") byuri[goal_uri] = goal toinsert += "\n {goal_uri} a oda:workflow; a oda:testgoal; oda:curryingOf {base_uri} .".format( goal_uri=goal_uri, base_uri=nuri(goal['base']['workflow']), ) print("toinsert", toinsert[:300]) odakb.sparql.insert(toinsert) bucketless = odakb.sparql.select( "?goal_uri a oda:testgoal . NOT EXISTS { ?goal_uri oda:bucket ?b }", form="?goal_uri") toinsert = "" for goal_uri in [r['goal_uri'] for r in bucketless]: goal_uri = goal_uri.replace("http://ddahub.io/ontology/data#", "data:") if goal_uri not in byuri: logging.warning( "bucketless goal %s not currently designable: ignoring", goal_uri) continue print("bucketless goal:", goal_uri) bucket = odakb.datalake.store(byuri[goal_uri]) assertEqual(nuri(w2uri(byuri[goal_uri], "goal")), nuri(goal_uri)) toinsert += "\n {goal_uri} oda:bucket \"{bucket}\" .".format( goal_uri=goal_uri, bucket=bucket) # reconstructed_goal = get_data(goal_uri) # assert nuri(w2uri(reconstructed_goal, "goal")) == goal_uri print("toinsert", len(toinsert)) odakb.sparql.insert(toinsert) return tgoals
def expire_uri(): odakb.sparql.insert( "{} oda:realm oda:expired".format(nuri(request.args.get('uri'))), ) return jsonify(dict(status="ok"))