def test_jsonpointer(): tl = TypedLeafs(nested_data, datamodel) for p, v in tl.leafs(): try: assert tl.jsonpointer(p.path).json() == v.json() except AttributeError: assert tl.jsonpointer(p.path) == v
def __init__(self, name, spec, state, parameters=None, inputs=None): self.metadata = {'name': name} self.inputs = inputs or [] self.parameters = TypedLeafs(parameters or {}, state.datamodel if state else None) self.spec = spec self.state = state
def test_jsonpointer(): tl = TypedLeafs(nested_data,datamodel) for p, v in tl.leafs(): try: assert tl.jsonpointer(p.path).json() == v.json() except AttributeError: assert tl.jsonpointer(p.path) == v
def test_jsonpath(): tl = TypedLeafs(nested_data, datamodel) assert tl.jsonpath('single_thing').json() == tl['single_thing'].json() assert tl.jsonpath( 'list_of_things[*]', multiple_output=True)[0].json() == tl['list_of_things'][0].json()
def test_modify(): import jq tl = TypedLeafs(nested_data,datamodel) tlnew = TypedLeafs({'$type': 'MyClass', 'second_attr':'newsecond', 'first_attr': 'newfirst'}, datamodel) tl['single_thing'] = tlnew.typed() assert type(tlnew.typed()) == MyClass assert tl['single_thing'].json() == tlnew.typed().json()
def pub(publisher, parameters, state): topublish = TypedLeafs(publisher['outputmap'], parameters.leafmodel) for p, v in topublish.leafs(): value = parameters[v] if type(value) == known_types.SimpleFile: if not value.path: ensure_publicity(value, state) p.set(topublish, value) return topublish
def test_init(): tl = TypedLeafs(simple_data,datamodel) assert type(tl['hello']) == MyClass assert tl['hello'].first_attr == 'hello' assert tl['hello'].second_attr == 'world' assert tl.json() == simple_data tl = TypedLeafs.fromJSON(simple_data, deserialization_opts = {'leafmodel': datamodel}) assert tl.json() == simple_data
def test_jq(): tl = TypedLeafs(nested_data, datamodel) assert tl.jq( '.list_of_things[]', multiple_output=True)[0].json() == tl['list_of_things'][0].json() assert tl.jq( '.list_of_things[]', multiple_output=True)[1].json() == tl['list_of_things'][1].json() assert tl.jq('[.list_of_things[]]').json() == nested_data['list_of_things']
def test_refs(): import jq refs = TypedLeafs(nested_data,datamodel).asrefs() assert refs['list_of_things'][0].path == '/list_of_things/0' import jsonpointer jp = jsonpointer.JsonPointer('/list_of_things/0') tl = TypedLeafs(nested_data,datamodel) assert tl.resolve_ref(jp).json() == tl['list_of_things'][0].json()
def test_init(): tl = TypedLeafs(simple_data, datamodel) assert type(tl['hello']) == MyClass assert tl['hello'].first_attr == 'hello' assert tl['hello'].second_attr == 'world' assert tl.json() == simple_data tl = TypedLeafs.fromJSON(simple_data, deserialization_opts={'leafmodel': datamodel}) assert tl.json() == simple_data
def test_refs(): import jq refs = TypedLeafs(nested_data, datamodel).asrefs() assert refs['list_of_things'][0].path == '/list_of_things/0' import jsonpointer jp = jsonpointer.JsonPointer('/list_of_things/0') tl = TypedLeafs(nested_data, datamodel) assert tl.resolve_ref(jp).json() == tl['list_of_things'][0].json()
def jq_stage(stage, spec): ''' :param stage: common stage parent object :param spec: stage JSON-like spec :return: None ''' binds = spec['bindings'] binds = process_jsonlike(binds, 'has("$wflowref")', lambda x: process_wflowref(x, stage.view)) log.info('transforming binds: %s', binds) stagescript = spec['stepscript'] singlesteps = jq.jq(stagescript).transform(binds, multiple_output=False) singlesteppars = map( lambda x: process_jsonlike(x, 'has("$wflowpointer")', process_wflowpointer), singlesteps) postscript = spec['postscript'] for i, pars in enumerate(singlesteppars): singlename = '{}_{}'.format(stage.name, i) finalized, inputs = finalize_input(pars, stage.view) log.info('postscripting: %s', finalized) after_post = jq.jq(postscript).transform(finalized, multiple_output=False) after_post = TypedLeafs(after_post) log.info('finalized to: %s', after_post) addStepOrWorkflow(singlename, stage, after_post, inputs, spec) registerExpressions(stage, spec.get('register_values'))
def multistep_stage(stage, spec): ''' a stage that attaches an array of nodes to the DAG. The number of nodes is determined by a scattering recipe. Currently two algs are supported - ``zip``: one or more arrays of length n are iterated through in lock-step. n nodes are added to the DAG where the parameters values are set to the values in the iteration - ``cartesian``: a cartesian product of a number of arrays (possibly different sizes) adds n1 x n2 x ... nj nodes. Nodes are attached to the DAG based on used upstream inputs :param stage: common stage parent object :param spec: stage JSON-like spec :return: None ''' log.debug('scheduling multistep stage with spec:\n%s', spec) parameters = { k: select_parameter(stage.view, v) for k, v in get_parameters(spec['parameters']).items() } singlesteppars = scatter(parameters, spec['scatter'], spec.get('batchsize'), spec.get('partitionsize')) for i, pars in enumerate(singlesteppars): singlename = '{}_{}'.format(stage.name, i) finalized, inputs = finalize_input(pars, stage.view) finalized = TypedLeafs( finalized, getattr(stage.state_provider, 'datamodel', None)) addStepOrWorkflow(singlename, stage, finalized, inputs, spec) registerExpressions(stage, spec.get('register_values'))
def main(): submissionspec = sys.argv[1] log.info('staging out data according to specfile %s', submissionspec) subdata = json.load(open(submissionspec)) pubspec = subdata['publisher_spec'] parameters = subdata['parameters'] state = subdata['state'] resultfile = subdata['resultfile'] log.info('pub: \n' + json.dumps(pubspec, indent=4)) log.info('pars: \n' + json.dumps(parameters, indent=4)) log.info('stat: \n' + json.dumps(state, indent=4)) ydgconfig = json.load(open(os.environ.get('YDGCONFIG', 'ydgconfig.json'))) state = LocalFSGlobalObjectsState.fromJSON(state) parameters = TypedLeafs(parameters, state.datamodel) teardown_spec, pubdata = publish(pubspec, parameters, state) for upload in teardown_spec['uploads']: state.put_file(upload['source'], upload['target']) with open('result.json', 'wb') as fl: fl.write(json.dumps(pubdata).encode('utf-8')) client = Minio(ydgconfig['resultstorage']['host'], access_key=ydgconfig['resultstorage']['access_key'], secret_key=ydgconfig['resultstorage']['secret_key'], secure=True) client.fput_object(ydgconfig['resultstorage']['bucket'], resultfile, 'result.json') log.info('writing result data to: %s', resultfile)
def create(data, model=None): dmimpl = os.environ.get("PACKTIVITY_DATAMODEL_IMPL", "typedleafs") if dmimpl == "typedleafs": return TypedLeafs(data, model) elif dmimpl == "purejson": return PureJsonModel(data, model) else: raise RuntimeError("unknown implementation")
def fromparpub_handler(spec, parameters, state): topublish = {} for targetname, sourcename in spec['outputmap'].items(): value = parameters[sourcename] if type(value) == yadageobjstore.known_types.SimpleFile: value.local_path = value.local_path.format( workdir=state.local_workdir) topublish[targetname] = value return TypedLeafs(topublish, state.datamodel)
def teardown_spec(topublish, state): teardown_spec = {'uploads': []} for p, value in topublish.leafs(): if type(value) == yadageobjstore.known_types.SimpleFile: if not value.path: log.info('this has no public path, so we need to upload it %s', value.json()) teardown_spec['uploads'].append(upload_spec(value, state)) topublish.replace(p, TypedLeafs(value, state.datamodel).json()) log.info('topublish:\n%s', topublish.json()) return teardown_spec
def cachedresult(self, cacheid, state, silent=True): ''' returns the cached result. when silent = True the mthod exits gracefully and returns None ''' if silent: if not self.cacheexists(cacheid): return None return { 'result': TypedLeafs(self.cache[cacheid]['result']['result'], state.datamodel), 'status': self.cache[cacheid]['result']['status'] }
def fromJSON(cls, data, deserialization_opts=None): if data['task']['type'] == 'packtivity_task': task = packtivity_task.fromJSON(data['task'], deserialization_opts) result = TypedLeafs(data['result'], getattr(task.state, 'datamodel', {})) if data['result'] else None instance = cls(data['name'], task, data['id'], result) adage.serialize.set_generic_data(instance, data) instance.resultproxy = load_proxy( data['proxy'], deserialization_opts, best_effort_backend=False) if data['proxy'] else None return instance else: raise RuntimeError('unknown task type', data['task']['type'])
def test_modify(): import jq tl = TypedLeafs(nested_data, datamodel) tlnew = TypedLeafs( { '$type': 'MyClass', 'second_attr': 'newsecond', 'first_attr': 'newfirst' }, datamodel) tl['single_thing'] = tlnew.typed() assert type(tlnew.typed()) == MyClass assert tl['single_thing'].json() == tlnew.typed().json()
def singlestep_stage(stage, spec): ''' a simple state that adds a single step/workflow. The node is attached to the DAG based on used upstream outputs :param stage: common stage parent object :param spec: stage JSON-like spec :return: None ''' log.debug('scheduling singlestep stage with spec:\n%s', spec) parameters = { k: select_parameter(stage.view, v) for k, v in get_parameters(spec['parameters']).items() } finalized, inputs = finalize_input(parameters, stage.view) finalized = TypedLeafs(finalized, getattr(stage.state_provider, 'datamodel', None)) addStepOrWorkflow(stage.name, stage, finalized, inputs, spec) registerExpressions(stage, spec.get('register_values'))
def interpolated_pub_handler(publisher, parameters, state): workdir = state.local_workdir forinterp = {} for p, v in parameters.leafs(): if isinstance(v, yadageobjstore.known_types.SimpleFile): continue p.set(forinterp, v) log.info('interpolation dict: %s', forinterp) result = copy.deepcopy(publisher['publish']) for path, value in leaf_iterator(publisher['publish']): if not isinstance(value, string_types): continue resultval = value.format(**forinterp) resultval = resultval.format(workdir=workdir) globexpr = resultval log.info('value: %s | expression %s', value, globexpr) if publisher['relative_paths'] and os.path.commonprefix( [workdir, globexpr]) == '': globexpr = os.path.join(workdir, resultval) if publisher['glob']: globbed = glob2.glob(globexpr) if globbed: resultval = [ yadageobjstore.known_types.SimpleFile(local_path=p) for p in globbed ] else: #if it's a string and the full path exists replace relative path resultval = yadageobjstore.known_types.SimpleFile( local_path=globexpr) log.info('result value: %s', resultval) path.set(result, resultval) log.info('returning result: %s', result) return TypedLeafs(result, state.datamodel)
class packtivity_task(object): ''' packtivity task ''' def __init__(self, name, spec, state, parameters=None, inputs=None): self.metadata = {'name': name} self.inputs = inputs or [] self.parameters = TypedLeafs(parameters or {}, state.datamodel if state else None) self.spec = spec self.state = state def pubOnlyTask(self): return (self.spec['environment'] is None) and (self.spec['process'] is None) #(de-)serialization @classmethod def fromJSON(cls, data, deserialization_opts=None): instance = cls(data['metadata']['name'], data['spec'], load_state(data['state'], deserialization_opts) if data['state'] else None, data['parameters'], inputs=map(outputReference.fromJSON, data['inputs'])) instance.metadata.update(**data['metadata']) return instance def json(self): return { 'metadata': self.metadata, 'parameters': self.parameters.json(), 'inputs': [x.json() for x in self.inputs], 'type': 'packtivity_task', 'spec': self.spec, 'state': self.state.json() if self.state else None, }
def result(self, resultproxy): return TypedLeafs(resultproxy.resultdata, resultproxy.datamodel)
def test_deepnest(): tl = TypedLeafs(nested_data,datamodel) paths = [p.path for p,v in tl.leafs()] assert set(paths) == set(['/list_of_things/0','/list_of_things/1','/single_thing'])
def test_jq(): tl = TypedLeafs(nested_data,datamodel) assert tl.jq('.list_of_things[]',multiple_output = True)[0].json() == tl['list_of_things'][0].json() assert tl.jq('.list_of_things[]',multiple_output = True)[1].json() == tl['list_of_things'][1].json() assert tl.jq('[.list_of_things[]]').json() == nested_data['list_of_things']
def test_jsonpath(): tl = TypedLeafs(nested_data,datamodel) assert tl.jsonpath('single_thing').json() == tl['single_thing'].json() assert tl.jsonpath('list_of_things[*]', multiple_output = True)[0].json() == tl['list_of_things'][0].json()
def test_deepnest(): tl = TypedLeafs(nested_data, datamodel) paths = [p.path for p, v in tl.leafs()] assert set(paths) == set( ['/list_of_things/0', '/list_of_things/1', '/single_thing'])