def split_job(job): parallel_input = None for input_name, input_val in six.iteritems(job.inputs): io = job.app.get_input(input_name) val_d = Executor.depth(input_val) if val_d == io.depth: continue if val_d > io.depth + 1: raise RabixError("Depth difference to large") if val_d < io.depth: raise RabixError("Insufficient dimensionality") if parallel_input: raise RabixError("Already parallelized by input '%%s'" % parallel_input) parallel_input = input_name if parallel_input: jobs = [] for i, val in enumerate(job.inputs[parallel_input]): inputs = copy.deepcopy(job.inputs) inputs[parallel_input] = val jobs.append(Job(job.id+"_"+six.text_type(i), job.app, inputs, {}, job.context)) return jobs else: return job
def __init__(self, context, schema): if 'type' not in schema: raise RabixError( "Invalid JSON schema: schema doesn't have a type.") if schema['type'] != 'object': raise RabixError("Invalid JSON schema: schema type isn't 'object'") if 'properties' not in schema: raise RabixError( "Invalid JSON schema: schema doesn't have properties") self.schema = schema self.schema['@type'] = 'JsonSchema' required = schema.get('required', []) self.io = [ IO.from_dict( context, { '@id': k, 'schema': v, 'required': k in required, 'annotations': v.get('adapter') }) for k, v in six.iteritems(schema['properties']) ]
def run_container(client, from_img, kwargs, container_kwargs): cmd = kwargs.pop('cmd', None) if not cmd: raise RabixError("Commands ('cmd') not specified!") repo, tag = parse_repository_tag(from_img) img = get_image(client, from_img) if not img: raise RabixError("Unable to find image: %s" % img) mount_point = kwargs.pop('mount_point', MOUNT_POINT) run_cmd = make_cmd(cmd, join=True) container = Container(client, img['Id'], "{}:{}".format(repo, tag), run_cmd, volumes={mount_point: {}}, working_dir=mount_point, **container_kwargs) container.start({abspath('.'): mount_point}) container.write_stdout() return container
def run(self, config): steps = config['steps'] for step in steps: step_name, step_conf = step.popitem() type_name = step_conf.pop('type', None) if not type_name: raise RabixError("Step type not specified!") step_type = self.types.get(type_name) if not step_type: raise RabixError("Unknown step type: %" % type_name) resolved = { k: self.resolve(v) for k, v in six.iteritems(step_conf) } img = resolved.pop('from', None) if not img: raise RabixError("Base image ('from') not specified!") log.info("Running step: %s" % step_name) self.context[step_name] = \ step_type(self.docker, img, **resolved) pass
def remap(self, mappings): if not self.islocal(): raise RabixError("Can't remap non-local paths") if not isabs(self.path): raise RabixError("Can't remap non-absolute paths") for k, v in six.iteritems(mappings): if self.path.startswith(k): ls = self.path[len(k):] return URL(v + ls) return self
def __init__(self, process_id, inputs, outputs, requirements, hints, label, description, steps, context, data_links=None): super(Workflow, self).__init__(process_id, inputs, outputs, requirements, hints, label, description) self.graph = Graph() self.executor = context.executor self.steps = steps self.data_links = data_links or [] self.context = context self.port_step_index = {} for step in steps: node = AppNode(step.app, {}) self.add_node(step.id, node) for inp in step.inputs: self.port_step_index[inp.id] = step.id self.move_connect_to_datalink(inp) if inp.value: node.inputs[inp.id] = inp.value for out in step.outputs: self.port_step_index[out.id] = step.id for inp in self.inputs: self.add_node(inp.id, inp) for out in self.outputs: self.move_connect_to_datalink(out) self.add_node(out.id, out) for dl in self.data_links: dst = dl['destination'].lstrip('#') src = dl['source'].lstrip('#') if src in self.port_step_index and dst in self.port_step_index: rel = Relation(src, dst, dl.get('position', 0)) src = self.port_step_index[src] dst = self.port_step_index[dst] elif src in self._inputs: rel = InputRelation(dst, dl.get('position', 0)) dst = self.port_step_index[dst] elif dst in self._outputs: rel = OutputRelation(src, dl.get('position', 0)) src = self.port_step_index[src] else: raise RabixError("invalid data link %s" % dl) self.graph.add_edge(src, dst, rel) if not self.graph.connected(): pass
def start(self, binds=None, port_bindings=None): try: self.docker_client.start(container=self.container, binds=binds, port_bindings=port_bindings) except APIError: logging.error('Failed to run container %s' % self.container) raise RabixError('Unable to run container from image %s:' % self.image_id)
def log_level(int_level): if int_level <= 0: level = logging.WARN elif int_level == 1: level = logging.INFO elif int_level >= 2: level = logging.DEBUG else: raise RabixError("Invalid log level: %s" % int_level) return level
def resolve_input(self, input_port, results): input_count = self.input_counts[input_port] if input_count <= 0: raise RabixError("Input already satisfied") self.input_counts[input_port] = input_count - 1 prev_result = self.inputs.get(input_port) if prev_result is None: self.inputs[input_port] = results elif isinstance(prev_result, list): prev_result.append(results) else: self.inputs[input_port] = [prev_result, results] return self.resolved
def run(self, cmd, job_dir): if not os.path.isabs(job_dir): raise RabixError('job_dir must be an abslute path.') for k, v in six.iteritems(self.binds): if job_dir.startswith(k): working_dir = '/'.join([v, job_dir[len(k):]]) break else: raise RabixError("Invalid working dir: " + job_dir) cfg = { "Image": self.image_id, "User": self.user, "Volumes": self.volumes, "WorkingDir": working_dir } self.config = make_config(**cfg) self._start(cmd) self.get_stderr(file='/'.join([job_dir, 'out.err'])) if not self.is_success(): raise RabixError("Tool failed:\n%s" % self.get_stderr())
def build(client, from_img, **kwargs): container = run_container(client, from_img, kwargs, {}) if container.is_success(): message = kwargs.pop('message', None) register = kwargs.pop('register', {}) cfg = {"Cmd": []} cfg.update(**kwargs) container.commit(message, cfg, repository=register.get('repo'), tag=register.get('tag')) else: raise RabixError("Build failed!") return container.produced_image['Id']
def install(self, *args, **kwargs): image = get_image(self.docker_client, image_id=self.image_id, repo=self.uri) if not image: log.info('Image %s not found:' % self.image_id) raise RabixError('Image %s not found:' % self.image_id) # if not image['Id'].startswith(self.image_id): # # raise RabixError( # 'Wrong id of pulled image: expected "%s", got "%s"' # % (self.image_id, image['Id']) # ) self.image_id = image['Id']
def update_engines(process): eer = process.get_requirement(ExpressionEngineRequirement) if not eer: return engine = None if eer.id: engine = ExpressionEvaluator.get_engine_by_id(eer.id) if not engine and eer.docker_image: engine = ExpressionEvaluator.get_engine_by_image(eer.docker_image) if not engine: raise RabixError("Unsupported expression engine: {}".format( eer.id or eer.docker_image)) engine.ids.add(eer.id) if eer.engine_config: engine.engine_config = eer.engine_config
def process_builder(context, d): if not isinstance(d, dict): return d inputs = d.get('inputs') outputs = d.get('outputs') schemas = [] for s in context.get_requirement(SchemaDefRequirement): schemas.extend(s.types) for i in inputs: i['type'] = make_avro(i['type'], schemas) for o in outputs: o['type'] = make_avro(o['type'], schemas) process = context.from_dict(d) for req in process.requirements: if isinstance(req, dict): raise RabixError("Can't fulfill requirement: " + req.get('class')) return process
def run(client, from_img, **kwargs): container = run_container(client, from_img, kwargs, kwargs) if not container.is_success(): raise RabixError(container.docker_client.logs(container.container))
def build(path='.rabix.yml'): if not isfile(path): raise RabixError('Config file %s not found!' % path) with open(path) as cfg: config = yaml.load(cfg) run_steps(config)
def run(self, cmd): # should be run(self, cmd_line, job) self._start(cmd) self.get_stderr( file='/'.join([os.path.abspath(self.job_dir), self.stderr])) if not self.is_success(): raise RabixError("Tool failed:\n%s" % self.get_stderr())
def run(self, job, job_dir=None): self.load_input_content(job) job_dir = os.path.abspath(job_dir or job.id) if not job_dir.endswith('/'): job_dir += '/' if not os.path.exists(job_dir): os.mkdir(job_dir) os.chmod(job_dir, os.stat(job_dir).st_mode | stat.S_IROTH | stat.S_IWOTH) self.cli_job = CLIJob(job) eval = ValueResolver(job) cfr = self.get_requirement_or_hint(CreateFileRequirement) if cfr: cfr.create_files(job_dir, eval) env = None evr = self.get_requirement_or_hint(EnvVarRequirement) if evr: env = evr.var_map(eval) self.ensure_files(job, job_dir) self.install(job=job) abspath_job = Job(job.id, job.app, copy.deepcopy(job.inputs), job.allocated_resources, job.context) cmd_line = self.command_line(job, job_dir) log.info("Running: %s" % cmd_line) self.job_dump(job, job_dir) if self.container: self.container.run(cmd_line, job_dir, env) else: ret = subprocess.call(['bash', '-c', cmd_line], cwd=job_dir) if ret != 0: raise RabixError("Command failed with exit status %s" % ret) result_path = os.path.abspath(job_dir) + '/cwl.output.json' if os.path.exists(result_path): with open(result_path, 'r') as res: outputs = json.load(res) else: with open(result_path, 'w') as res: outputs = self.cli_job.get_outputs(os.path.abspath(job_dir), abspath_job) json.dump(job.context.to_primitive(outputs), res) self.unmap_paths(outputs) def write_rbx(f): if isinstance(f, File): with open(f.path + '.rbx.json', 'w') as rbx: json.dump(f.to_dict(), rbx) map_rec_collection(write_rbx, outputs) return outputs