def run_copy(docstore, out_docstore): doc = FileDocStore(file_path=docstore) out_doc = FileDocStore(file_path=out_docstore) for id, entry in doc.filter(): if out_doc.get(id) is None: print "copy", id out_doc.put(id, entry) if doc.exists(entry): src_path = doc.get_filename(entry) out_doc.create(entry) dst_path = out_doc.get_filename(entry) shutil.copy(src_path, dst_path) out_doc.update_from_file(entry) else: #print "skip", id, doc.size(entry), out_doc.size(entry) if doc.size(entry) != out_doc.size(entry): print "mismatch", id
def run_workflow(args): data_map = {} for meta_path in glob(os.path.join(args['lib_data'], "*.json")): data_path = re.sub(r'.json$', "", meta_path) if os.path.exists(data_path): try: with open(meta_path) as handle: meta = json.loads(handle.read()) if 'uuid' in meta: data_map[meta['uuid']] = data_path except: pass d_url = urlparse(args['doc_store']) if d_url.scheme == '': doc = FileDocStore(file_path=d_url.path) else: raise Exception("Object Store type not supported: %s" % (o_url.scheme)) #this side happens on the master node tasks = {} task_request = {} input_uuids = {} for i, input_file in enumerate(args['inputs']): with open(input_file) as handle: meta = json.loads(handle.read()) inputs = {} for k, v in meta.get('ds_map').items(): input_uuids[v['uuid']] = True t = Target(v['uuid']) if not doc.exists(t): if t.uuid not in data_map: raise Exception("Can't find input data: %s" % (t.uuid)) doc.update_from_file(t, data_map[t.uuid], create=True) doc.put(t.uuid, t.to_dict()) inputs[k] = t params = meta.get("parameters", {}) task_name = 'task_%s' % (i) if args['workflow'] is not None: task = GalaxyWorkflow(task_name, args['workflow'], inputs=inputs, parameters=params, tags=meta.get("tags", None), galaxy=args['galaxy'], tool_dir=args['tool_dir'], tool_data=args['tool_data']) else: with open(args['yaml_workflow']) as handle: yaml_text = handle.read() task = GalaxyWorkflow(task_name, yaml=yaml_text, inputs=inputs, parameters=params, tags=meta.get("tags", None), docker=args['galaxy'], tool_dir=args['tools'], tool_data=args['tool_data']) task_request[task_name] = meta task_data = task.get_task_data() tasks[task_name] = task_data #this side happens on the worker node service = ServiceFactory('galaxy', objectstore=doc, lib_data=[doc.file_path], tool_dir=args['tool_dir'], tool_data=args['tool_data'], galaxy=args['galaxy'], config_dir=args['config_dir'], sudo=args['sudo'], force=True, tool_docker=True, smp=args['smp'], cpus=args['cpus'], work_dir=args['work_dir']) service.start() task_job_ids = {} for task_name, task_data in tasks.items(): task = TaskJob(task_data) i = service.submit(task) task_job_ids[task_name] = i sleep_time = 1 while True: waiting = False for i in task_job_ids.values(): status = service.status(i) logging.info("Status check %s %s" % (status, i)) if status not in ['ok', 'error']: waiting = True if not waiting: break time.sleep(sleep_time) if sleep_time < 60: sleep_time += 1 #move the output data into the datastore for task_name, i in task_job_ids.items(): job = service.get_job(i) if job.error is None: for a in job.get_outputs(): meta = service.get_meta(a) #if 'tags' in task_request[task_name]: # meta["tags"] = task_request[task_name]["tags"] #print "meta!!!", json.dumps(meta, indent=4) doc.put(meta['uuid'], meta) if meta.get('visible', True): if meta['state'] == "ok": if meta['uuid'] not in input_uuids: logging.info("Downloading: %s" % (meta['uuid'])) service.store_data(a, doc) else: logging.info("Skipping input file %s" % (a)) else: logging.info("Skipping non-ok file: %s" % (meta['state'])) else: logging.info("Skipping Download %s (not visible)" % (a)) logging.info("Done") if not args['hold']: service.stop()