def link_file(container_guid, lfn): """ GET: /pilot/file/<container_guid>/<lfn>/link Returns ftp link to file :param container_guid: Guid of container :type container_guid: str :param lfn: Local FileName :type lfn: str :return: id/guid/ftp :rtype: json """ if ':' in container_guid: container_guid = container_guid.split(':')[-1] container = conts_.first(guid=container_guid) site = sites_.first(se=current_app.config['DEFAULT_SE']) cc = container.files for c in cc: f = c.file if f.lfn == lfn: replicas = f.replicas for r in replicas: if r.se == site.se and r.status == 'ready': data = {} data['lfn'] = f.lfn data['guid'] = f.guid data['ftp'] = getFtpLink(r.lfn) return make_response(jsonify(data), 200) raise WebpandaError('File not found')
def file_info(container_guid, lfn): """ GET: /pilot/file/<container_guid>/<lfn>/info Returns file metadata :param container_guid: Guid of container :type container_guid: str :param lfn: Local FileName :type lfn: str :return: lfn/guid/modtime/fsize/adler32/md5sum :rtype: json """ if ':' in container_guid: container_guid = container_guid.split(':')[-1] container = conts_.first(guid=container_guid) cc = container.files for c in cc: f = c.file if f.lfn == lfn: data = {} data['lfn'] = f.lfn data['guid'] = f.guid data['modification_time'] = str(f.modification_time) data['fsize'] = f.fsize data['adler32'] = f.checksum data['md5sum'] = f.md5sum return make_response(jsonify(data), 200) raise WebpandaError('File not found')
def get_token_by_code(code): url = current_app.config["AUTH_TOKEN_ENDPOINT"] redirect_uri = current_app.config["AUTH_REDIRECT_URI"] client = current_app.config["AUTH_CLIENT"] secret = current_app.config["AUTH_SECRET"] headers = dict() headers["Authorization"] = "Basic {code}".format(code=encode_cred(client, secret)) params = dict() params['grant_type'] = "authorization_code" params["code"] = code params["redirect_uri"] = redirect_uri rv = requests.post(url, data=params, headers=headers, verify=False) data = rv.json() if "error" not in data.keys(): access_token = data["access_token"] token_type = data["token_type"] refresh_token = data["refresh_token"] expires_in = data["expires_in"] id_token = data["id_token"] return access_token raise WebpandaError("Bad get_token response: " + str(data))
def send_job_(task, container, script): """ Prepares a job for task with container and trf script :param task: Task obj to append a job :param container: Container obj for job :param script: str Trf script :return: True """ # Get default ComputingElement site = sites_.first(ce=current_app.config['DEFAULT_CE']) if site is None: raise WebpandaError("ComputingElement not found") # Get distributive distr = task.task_type.distr # Define jobs job = Job() job.pandaid = None job.status = 'pending' job.owner = users_.get(task.owner_id) job.params = b64encode(script) job.distr = distr job.container = container job.creation_time = datetime.utcnow() job.modification_time = datetime.utcnow() #job.ninputfiles = 0 #job.noutputfiles = 0 job.corecount = 1 job.tags = task.tag jobs_.save(job) # Async sendjob async_send_job.delay(jobid=job.id, siteid=site.id) return True
def new_replica(container_guid, lfn, se): """ POST: /pilot/file/<container_guid>/<lfn>/makereplica/<se> Creates task to make new file replica :param container_guid: Guid of container :type container_guid: str :param lfn: Local FileName :type lfn: str :param se: SE codename :type se: str :return: Id of task :rtype: json """ nsite = sites_.find(se=se).count() if nsite == 0: return make_response(jsonify({'error': 'SE not found'}), 400) if ':' in container_guid: container_guid = container_guid.split(':')[-1] container = conts_.first(guid=container_guid) cc = container.files for c in cc: f = c.file if f.lfn == lfn: rep_num = f.replicas.count() replicas = f.replicas if rep_num == 0: raise WebpandaError('No replicas available') ready_replica = None for r in replicas: if r.se == se: return {'status': r.status} if r.se == current_app.config[ 'DEFAULT_SE']: # and r.status == 'ready' ready_replica = r if ready_replica is None: ready_replica = replicas[0] task = async_cloneReplica.delay(ready_replica.id, se) return {'task_id': task.id} raise WebpandaError('File not found')
def sso_get_user(token): url = current_app.config["AUTH_USERINFO_ENDPOINT"] headers = dict() headers["Authorization"] = "Bearer {code}".format(code=token) rv = requests.get(url, headers=headers, verify=False) data = rv.json() if "error" not in data.keys(): return data['sub'].split(":")[-1] raise WebpandaError("Bad sso_get_user response: " + str(data))
def rm(self, lfn, rel=True): if rel: lfn = self.datadir + lfn _logger.debug('HPC: Try to rm file %s' % lfn) try: proc = subprocess.Popen(['/bin/bash'], stdin=subprocess.PIPE, stdout=subprocess.PIPE) out = proc.communicate("ssh -i %s %s@%s 'rm %s'" % (self.key, self.user, self.host, lfn)) return True except: raise WebpandaError('Unable to rm:%s' % lfn)
def stage_in(container_guid, lfn): """ POST: /pilot/file/<container_guid>/<lfn>/copy Creates task to copy file in path on se :param container_guid: Guid of container :type container_guid: str :param lfn: Local FileName :type lfn: str :return: Id of task :rtype: json """ args = request.form if not ('to_se' in args.keys() and 'to_path' in args.keys()): raise WebpandaError('Please specify correct request params') to_se = args.get('to_se', type=str) to_path = args.get('to_path', type=str) if ':' in container_guid: container_guid = container_guid.split(':')[-1] container = conts_.first(guid=container_guid) cc = container.files for c in cc: f = c.file if f.lfn == lfn: replicas = f.replicas for r in replicas: if r.status == 'ready': task = async_copyReplica.delay(r.id, to_se, to_path) return {'task_id': task.id} raise WebpandaError('No replicas available') raise WebpandaError('File not found')
def link(self, lfn, d, rel=True): if rel: lfn = self.datadir + lfn d = self.datadir + d _logger.debug('HPC: Try to link file from %s to %s' % (lfn, d)) try: proc = subprocess.Popen(['/bin/bash'], stdin=subprocess.PIPE, stdout=subprocess.PIPE) out = proc.communicate( "ssh -i %s %s@%s 'mkdir -p %s && ln -s %s %s'" % (self.key, self.user, self.host, d, lfn, d)) return True except: raise WebpandaError('Unable to link:%s to %s' % (lfn, d))
def cont_open(guid): """ POST: /pilot/container/<guid>/open Changes container status to 'open'. :param guid: Container guid :type guid: str """ cont = conts_.first(guid=guid) if cont is None: raise WebpandaError("Container not found") cont.status = 'open' conts_.save(cont) return {'response': 'Container status: open'}
def mv(self, lfn, lfn2, rel=True): if rel: lfn = self.datadir + lfn lfn2 = self.datadir + lfn2 lfn2dir = "/".join(lfn2.split("/")[:-1]) _logger.debug('HPC: Try to link file from %s to %s' % (lfn, lfn2)) try: proc = subprocess.Popen(['/bin/bash'], stdin=subprocess.PIPE, stdout=subprocess.PIPE) out = proc.communicate( "ssh -i %s %s@%s 'mkdir -p %s && mv %s %s'" % (self.key, self.user, self.host, lfn2dir, lfn, lfn2)) return True except: raise WebpandaError('Unable to mv:%s to %s' % (lfn, lfn2))
def ls(self, path, rel=True): if rel: path = self.datadir + path _logger.debug('HPC: Try to list files in path %s' % (path)) try: proc = subprocess.Popen(['/bin/bash'], stdin=subprocess.PIPE, stdout=subprocess.PIPE) out = proc.communicate( "ssh -i {key} {user}@{host} 'ls -p {path} | grep -v /'".format( key=self.key, user=self.user, host=self.host, path=path)) _logger.debug(out[0]) return out[0].split('\n') except OSError: raise WebpandaError('Unable to list files in: %s' % (path))
def adler32(self, path, rel=True): if rel: path = self.datadir + path _logger.debug('HPC: Trying to get adler32 of file: %s' % (path)) cmd = "TODO:add script" _logger.debug(cmd) try: # proc = subprocess.Popen(['/bin/bash'], stdin=subprocess.PIPE, stdout=subprocess.PIPE) # out = proc.communicate("ssh -i {key} {user}@{host} 'ls -p {path} | grep -v /'".format(key=self.key, user=self.user, host=self.host, path=path)) #_logger.debug(out[0]) #_logger.debug(out[1]) # return out[0].split('\n') return "testtest" except OSError: raise WebpandaError('Unable to calculate adler32: %s' % (path))
def md5sum(self, path, rel=True): if rel: path = self.datadir + path _logger.debug('HPC: Trying to get md5sum of file: %s' % (path)) cmd = "ssh -i {key} {user}@{host} 'md5sum {path}'".format( key=self.key, user=self.user, host=self.host, path=path) _logger.debug(cmd) try: proc = subprocess.Popen(['/bin/bash'], stdin=subprocess.PIPE, stdout=subprocess.PIPE) out = proc.communicate(cmd) _logger.debug(out[0]) _logger.debug(out[1]) return out[0].split(' ')[0] except OSError: raise WebpandaError('Unable to calculate md5: %s' % (path))
def cont_info(guid): """ POST: /pilot/container/<guid>/info Returns container metadata :param guid: Container guid :type guid: str :return: id/guid/status/nfiles :rtype: json """ cont = conts_.first(guid=guid) if cont is None: raise WebpandaError("Container not found") data = dict() data['id'] = cont.id data['guid'] = cont.guid data['status'] = cont.status data['nfiles'] = len(cont.files) return data
def cont_close(guid): """ POST: /pilot/container/<guid>/close Changes container status to 'close' :param guid: Container guid :type guid: str """ cont = conts_.first(guid=guid) if cont is None: raise WebpandaError("Container not found") path = os.path.join(current_app.config['UPLOAD_FOLDER'], getScope(g.user.username), cont.guid) os.path.walk(path, registerLocalFile, cont.guid) cont.status = 'close' conts_.save(cont) return {'response': 'Container status: close'}
def prepareInputFiles(cont_id, se): _logger.debug('prepareInputFiles') container = conts_.get(cont_id) cc = container.files tasks = [] for c in cc: f = c.file replicas_len = f.replicas.count() if not replicas_len: raise WebpandaError("No available replicas for file %s" % f.guid) replicas = f.replicas hasReplica = False _logger.debug('prepareInputFiles: file.lfn={}'.format(f.lfn)) for replica in replicas: if replica.se == se and replica.status == 'ready': hasReplica = True _logger.debug( 'prepareInputFiles: replica.se={} replica.status={} hasReplica={}'. format(replica.se, replica.status, hasReplica)) if not hasReplica: tasks.append(async_cloneReplica.s(replicas[0].id, se)) return tasks
def run(task): try: method = task.task_type.method if task.status != 'sent': return False # raise WebpandaError('Illegal task status to start') # Change task state to 'running' task.status = 'preparing' task.modification_time = datetime.utcnow() tasks_.save(task) # Custom payload if method == 'init_task': payload1(task) elif method == 'split_task': payload2(task) elif method == 'run1_task': payload3(task) elif method == 'merge_task': payload4(task) else: raise WebpandaError("Task payload error: method not found") # Change task state to 'finished' task.status = 'running' task.modification_time = datetime.utcnow() tasks_.save(task) return True except WebpandaError as e: # Change task state to 'finished' task.status = 'failed' task.modification_time = datetime.utcnow() task.comment = e.msg tasks_.save(task) return False
def new_pipeline_from_cont(): form = RunForm(request.form) if request.method == 'POST': icont = conts_.first(guid=form.guid.data) if icont is None: raise WebpandaError("Container not found") current_user = g.user # Prepare pipeline pp = Pipeline() pp.status = 'running' pp.type_id = pipeline_types_.get(1).id pp.owner_id = current_user.id pipelines_.save(pp) # Prepare container pp_cont = Container() pp_cont.guid = 'pipeline.' + commands.getoutput('uuidgen') conts_.save(pp_cont) # Add guids to container for item in icont.files: f = item.file # Register file in catalog fc.reg_file_in_cont(f, pp_cont, 'input') # Set current task start_task = pclient.get_start_task(pp) start_task.input = pp_cont.id start_task.output = pp_cont.id tasks_.save(start_task) return redirect(url_for('pipelines.list_all')) return render_template('dashboard/pp/new.html', form=form)
def file_fetch(container_guid, lfn): """ GET: /pilot/file/<container_guid>/<lfn>/fetch Returns file in response :param container_guid: Guid of container :type container_guid: str :param lfn: Local FileName :type lfn: str :return: File :rtype: application/octet-stream """ if ':' in container_guid: container_guid = container_guid.split(':')[-1] container = conts_.first(guid=container_guid) cc = container.files for c in cc: f = c.file if f.lfn == lfn: replicas = f.replicas for replica in replicas: if replica.se == current_app.config['DEFAULT_SE']: fullpath = current_app.config['DATA_PATH'] + replica.lfn f = open(fullpath, 'r') rr = Response(f.read(), status=200, content_type='application/octet-stream') rr.headers[ 'Content-Disposition'] = 'inline; filename="%s"' % f.lfn rr.headers['Content-MD5'] = f.md5sum f.downloaded += 1 files_.save(f) return rr raise WebpandaError('File not found')
def new_job(): """Creates new job """ g.user = request.oauth.user scope = getScope(request.oauth.user.username) js = request.json data = js['data'] distr_id = data['sw_id'] params = data['script'] corecount = data['cores'] site = sites_.first(ce=current_app.config['DEFAULT_CE']) distr = distrs_.get(id) container = Container() guid = 'job.' + commands.getoutput('uuidgen') container.guid = guid container.status = 'open' conts_.save(container) # Process ftp files if 'ftp_dir' in data.keys(): ftp_dir = data['ftp_dir'] register_ftp_files(ftp_dir, scope, container.guid) # Process guid list if 'guids' in data.keys(): guids = data['guids'] for f in guids: if f != '': file_ = files_.first(guid=f) if file_ is not None: # Register file in catalog fc.reg_file_in_cont(file_, container, 'input') else: raise WebpandaError('File with guid %s not found' % f) ofiles = ['results.tgz'] # Starts cloneReplica tasks ftasks = prepareInputFiles(container.id, site.se) # Saves output files meta for lfn in ofiles: file = File() file.scope = scope file.guid = getGUID(scope, lfn) file.lfn = lfn file.status = 'defined' files_.save(file) # Register file in catalog fc.reg_file_in_cont(file, container, 'output') # Counts files allfiles = container.files nifiles = 0 nofiles = 0 for f in allfiles: if f.type == 'input': nifiles += 1 if f.type == 'output': nofiles += 1 # Defines job meta job = Job() job.pandaid = None job.status = 'pending' job.owner = request.oauth.user job.params = params job.distr = distr job.container = container job.creation_time = datetime.utcnow() job.modification_time = datetime.utcnow() job.ninputfiles = nifiles job.noutputfiles = nofiles job.corecount = corecount job.tags = data['tags'] if 'tags' in data.keys() else "" jobs_.save(job) # Async sendjob res = chord(ftasks)(async_send_job.s(jobid=job.id, siteid=site.id)) return {'id': job.id, 'container_id': guid}
def mv(self, lfn, lfn2, rel=True): raise WebpandaError("SEPlugin.mv not implemented")
def payload2(task): """ split_task Split input *.1.fastq and *.2.fastq into 'rn' pieces= run panda /bin/bash split.sh :param task: :return: """ logger.debug("payload2: Start") #### Prepare # Check type of task task_type = task.task_type if task_type.id != 1: raise WebpandaError("Illegal task_type.id") logger.debug("payload2: tasktype " + str(task_type.id)) # Get user user = users_.get(task.owner_id) logger.debug("payload2: user " + str(user.id)) # Get containers input_cont = conts_.get(task.input) #TODO do smth with output container? output_cont = conts_.get(task.output) task.tag = "task." + commands.getoutput('uuidgen') tasks_.save(task) logger.debug("payload2: tag " + task.tag) # Get container container = Container() container.guid = task.tag + ".0" conts_.save(container) logger.debug("payload2: cont " + container.guid) script_add = "" rn = 0 # Add input files to container files_template_list = task_type.ifiles_template.split(',') for item in input_cont.files: f = item.file if rn == 0: if f.lfn.endswith('fastq'): rn = getn(f.fsize) elif f.lfn.endswith('fastq.bz2'): rn = getn2(f.fsize) for file_template in files_template_list: # TODO: Change file template here m = re.match(file_template, f.lfn) if m is not None: # Register file in container fc.reg_file_in_cont(f, container, 'input') if f.lfn.endswith('.fastq'): for fi in gen_sfx(f.lfn[:-5]+'a', rn, '.fastq'): fc.reg_file_in_cont_byname(user, fi, container, 'output') if f.lfn.endswith('.fastq.bz2'): for fi in gen_sfx(f.lfn[:-9]+'a', rn, '.fastq'): fc.reg_file_in_cont_byname(user, fi, container, 'output') if f.lfn.endswith('.fasta'): fn=f.lfn+'.' fc.reg_file_in_cont_byname(user, fn[:-6]+'dict', container, 'output') # itert: validated file has null size #for sfx in ('amb','ann','bwt','fai','pac','sa','validated'): for sfx in ('amb','ann','bwt','fai','pac','sa', 'validated'): fc.reg_file_in_cont_byname(user, fn+sfx, container, 'output') script_add += "; echo 123 > ../{fname}".format(fname=fn+"validated") logger.debug("payload2: reg Makefile") #reg additional output for fi in gen_sfx('Makefile.a', rn, '.yaml'): fc.reg_file_in_cont_byname(user, fi, container, 'output') #guids = ["web.it_4b7d4757-9ba4-4ed7-8bc0-6edb8bcc68d2", # "web.it_3bc78e60-241b-418a-a631-2461d4ba1977", # "web.it_1b88049e-463b-4b4f-8454-9587301a53e5", # "web.it_a02271ea-8a9b-42f3-add2-ed6d0f9ff07e", # "web.it_61bb7c80-e53c-4641-88b0-fbd16b0f3d56", # "web.it_3930f596-25ea-49b0-8943-7a83c84c7940", # "web.it_aa7b77a3-c765-464e-a4fa-29ce6dd50346", # "web.it_211f2187-41f2-489f-ba63-73f004f21c66" # ] #for guid in guids: # fc.reg_file_in_cont(files_.first(guid=guid), container, 'input') # Prepare trf script script = task.task_type.trf_template # TO_DO just for test add "1" - script1.sh- only emulate, not real jobs pipeline_path_name = 'paleomix_bam' swdir='/s/ls2/users/poyda/swp/' + pipeline_path_name +'/' script = "/bin/bash " + swdir + "genref.sh && /bin/bash " + swdir + "runtmplgen.sh -t 1>bam.out 2>bam.err & ;" script += "/bin/bash " + swdir + "split.sh -t " + str(rn) script += script_add # Save rn as task param task.params = str(rn) tasks_.save(task) logger.debug("payload2: script " + script) logger.debug("payload2: send_job " + container.guid) send_job_(task, container, script) return True
def rm(self, lfn, rel=True): raise WebpandaError("SEPlugin.rm not implemented")
def ls(self, path, rel=True): raise WebpandaError("SEPlugin.ls not implemented")
def fsize(self, path, rel=True): raise WebpandaError("SEPlugin.fsize not implemented")
def file_save(container_guid, lfn): """ POST: /pilot/file/<container_guid>/<lfn>/save Saves file from request, returns file guid :param container_guid: Guid of container :type container_guid: str :param lfn: Local FileName :type lfn: str :return: guid :rtype: json """ site = sites_.first(se=current_app.config['DEFAULT_SE']) if ':' in container_guid: container_guid = container_guid.split(':')[-1] container = conts_.first(guid=container_guid) if container.status != 'open': raise WebpandaError('Unable to upload: Container is not open') cc = container.files ff = None for c in cc: f = c.file if f.lfn == lfn: ff = f if not ff: ff = File() ff.scope = getScope(g.user.username) ff.lfn = lfn ff.guid = getGUID(ff.scope, ff.lfn) ff.status = 'defined' files_.save(ff) # Register file in container fc.reg_file_in_cont(ff, container, 'input') path = os.path.join(site.datadir, getScope(g.user.username), container.guid) replfn = '/' + os.path.join(getScope(g.user.username), container.guid, ff.lfn) destination = os.path.join(path, ff.lfn) for r in ff.replicas: if r.se == site.se: destination = site.datadir + r.lfn file_dir = '/'.join(destination.split('/')[:-1]) if r.status == 'ready': if os.path.isfile(destination): # Check fsize, md5 or adler raise WebpandaError('Replica exists') else: r.status = 'broken' replicas_.save(r) raise WebpandaError('Broken replica') elif r.status == 'defined': try: os.makedirs(file_dir) except (Exception): pass f = open(destination, 'wb') f.write(request.data) f.close() # Update file info setFileMeta(ff.id, destination) r.status = 'ready' replicas_.save(r) return {'guid': ff.guid} else: raise WebpandaError('Replica status: %s' % r.status) replica = Replica() if os.path.isfile(destination): raise WebpandaError('Unable to upload: File exists') try: os.makedirs(path) except (Exception): _logger.debug('Path exists: %s' % path) f = open(destination, 'wb') f.write(request.data) f.close() # Update file info setFileMeta(ff.id, destination) # Create/change replica replica.se = site.se replica.status = 'ready' replica.lfn = replfn replica.token = '' replica.original = ff replicas_.save(replica) return {'guid': ff.guid}
def md5sum(self, path, rel=True): raise WebpandaError("SEPlugin.md5sum not implemented")
def adler32(self, path, rel=True): raise WebpandaError("SEPlugin.adler32 not implemented")
def run(): """ Starts current defined task :return: """ print "main started" # Fetch pipelines (init state) #TODO add SQL filter on status if possible pipelines = pipelines_.all() for pipeline in pipelines: # Check if finished if pipeline.status in ['finished', 'failed', 'cancelled']: continue # Fetch task object current_task = pclient.get_current_task(pipeline) if current_task is None: return WebpandaError('Illegal task ID') if current_task.status == 'failed': #TODO: What to do if failed? pipeline.status = 'failed' current_task.modification_time = datetime.utcnow() pipelines_.save(pipeline) continue if current_task.status == 'cancelled': #TODO: What to do if cancelled? Who or by whom? If by system - resubmit, if by user -nothing? pipeline.status = 'cancelled' current_task.modification_time = datetime.utcnow() pipelines_.save(pipeline) continue if current_task.status == 'finished': # Get next_task current_task = pclient.get_next_task(pipeline) if current_task.status == 'defined': if current_task.task_type.method == 'start': # Do some general pipeline checks current_task.status = 'finished' current_task.modification_time = datetime.utcnow() tasks_.save(current_task) continue elif current_task.task_type.method == 'finish': current_task.status = 'finished' current_task.modification_time = datetime.utcnow() tasks_.save(current_task) # Process system finish task pipeline.status = 'finished' pipeline.modification_time = datetime.utcnow() pipelines_.save(pipeline) continue else: # Process system start task # Do some checks if it usefull - or we have all files already, or there would be never enough of them. if not paleomix.has_input(current_task): current_task.status = "failed" current_task.modification_time = datetime.utcnow() current_task.comment = "Input files check failed" tasks_.save(current_task) pipeline.status = 'failed' pipeline.modification_time = datetime.utcnow() pipelines_.save(pipeline) continue # Run task if defined current_task.status = 'sent' tasks_.save(current_task) #TO_DO: Run async regime paleomix.run( current_task ) #, current_task.task_type.method) - we already get task from id. Not need to obtain again, is it? # if we use async run, all params must be serializable (BaseQuery is not) continue