def payload4(task): """ run merge from N inputs input: Makefile.all, *.fasta.{sfx list}, *1.{N}.fastq, *2.{N}.fastq, {N}reads.tgz, {N}maps.tgz output: bam file + results.tgz? :param task: :return: """ logger.debug("payload4: Start") #### Prepare # Check type of task task_type = task.task_type # Get user user = users_.get(task.owner_id) task.tag = "task." + commands.getoutput('uuidgen') tasks_.save(task) n = 10 if task.params is not None: n = int(task.params) if n == 0: n = 10 # Get containers input_cont = conts_.get(task.input) #TO_DO do smth with output container? output_cont = conts_.get(task.output) # Get container container = Container() container.guid = task.tag conts_.save(container) # Add input files to container files_template_list = task_type.ifiles_template.split(',') for item in input_cont.files: f = item.file for file_template in files_template_list: # TO_DO: Change file template here m = re.match(file_template, f.lfn) if m is not None: # Register file in container fc.reg_file_in_cont(f, container, 'input') # reg additional output fc.reg_file_in_cont_byname(user, 'output.bam', container, 'output') fc.reg_file_in_cont_byname(user, 'myresults.bz2', container, 'output') # Prepare trf script script = task.task_type.trf_template # TO_DO just for test - only emulate, not real jobs pipeline_path_name = 'paleomix_bam' swdir = '/s/ls2/users/poyda/swp/' + pipeline_path_name + '/' script = "/bin/bash " + swdir + "runmerge.sh -t " + str(n) send_job_(task, container, script) return True
def container(): """ New container form view :return: Response obj """ form = NewContainerForm() if request.method == 'POST': user = g.user scope = getScope(user.username) ftpdir = form.ftpdir.data #Create a unique container quid for this particular batch of uploads. cguid = 'job.' + commands.getoutput('uuidgen') # Create new container container = Container() container.guid = cguid container.status = 'open' conts_.save(container) # Check if ftpdir empty if ftpdir and len(ftpdir) > 0: async_uploadContainer.delay(ftpdir, scope, container.guid) return redirect(url_for('conts.cont_info', guid=container.guid)) return render_template("dashboard/cont/cont_new.html", form=form)
def new_cont(): """ Creates new Container object :return: New container :rtype: Container """ # Prepare Container obj f = Container() f.guid = 'cont.' + commands.getoutput('uuidgen') f.status = "open" # Save to fc conts_.save(f) return f
def new_pipeline(): form = NewPipelineForm(request.form) if request.method == 'POST': ifiles = request.form.getlist('iguids[]') current_user = g.user # Prepare pipeline pp = Pipeline() pp.status = 'running' pp.type_id = pipeline_types_.get(1).id pp.owner_id = current_user.id pipelines_.save(pp) # Prepare container pp_cont = Container() pp_cont.guid = 'pipeline.' + commands.getoutput('uuidgen') conts_.save(pp_cont) # Add guids to container for item in ifiles: if item != '': f = files_.first(guid=item) if f is not None: # Register file in catalog fc.reg_file_in_cont(f, pp_cont, 'input') else: pp_cont.status = 'broken' conts_.save(pp_cont) return make_response(jsonify({'error': "GUID {} not found".format(f)})) # Set current task start_task = pclient.get_start_task(pp) start_task.input = pp_cont.id start_task.output = pp_cont.id tasks_.save(start_task) return redirect(url_for('pipelines.list_all')) return render_template('dashboard/pp/new.html', form=form)
def new_cont(): """ POST: /pilot/container Saves new container :return: ftp/guid :rtype: json """ cont = Container() guid = 'job.' + commands.getoutput('uuidgen') cont.guid = guid cont.status = 'open' conts_.save(cont) url = '%s/%s' % (current_app.config['FTP'], guid) os.makedirs( os.path.join(current_app.config['UPLOAD_FOLDER'], getScope(g.user.username), cont.guid)) return {'ftp': url, 'guid': cont.guid}
def new_pipeline_from_cont(): form = RunForm(request.form) if request.method == 'POST': icont = conts_.first(guid=form.guid.data) if icont is None: raise WebpandaError("Container not found") current_user = g.user # Prepare pipeline pp = Pipeline() pp.status = 'running' pp.type_id = pipeline_types_.get(1).id pp.owner_id = current_user.id pipelines_.save(pp) # Prepare container pp_cont = Container() pp_cont.guid = 'pipeline.' + commands.getoutput('uuidgen') conts_.save(pp_cont) # Add guids to container for item in icont.files: f = item.file # Register file in catalog fc.reg_file_in_cont(f, pp_cont, 'input') # Set current task start_task = pclient.get_start_task(pp) start_task.input = pp_cont.id start_task.output = pp_cont.id tasks_.save(start_task) return redirect(url_for('pipelines.list_all')) return render_template('dashboard/pp/new.html', form=form)
def payload3(task): """ run1 - N parallel jobs. {N} = sequence 0..01,0..02,...,N, not less than 2 placeholders #TODO deal with {N}.fastq.bz2 ?? input: Makefile.{N}, *.fasta.{sfx list}, *1.{N}.fastq, *2.{N}.fastq output: likely reads{N}.tgz, maps{N}.tgz :param task: :return: """ logger.debug("payload3: Start") #### Prepare # Check type of task task_type = task.task_type # if task_type.id != 3or6?: # raise WebpandaError("Illegal task_type.id") # Get user user = users_.get(task.owner_id) n=10 if task.params is not None: n=int(task.params) if n==0: n=10 task.tag = "task." + commands.getoutput('uuidgen') tasks_.save(task) # Get containers input_cont = conts_.get(task.input) #TO_DO do smth with output container? output_cont = conts_.get(task.output) for jobname in gen_sfx("a",n): # Get container container = Container() container.guid = task.tag + "."+jobname conts_.save(container) # Add input files to container files_template_list = task_type.ifiles_template.split(',') for item in input_cont.files: f = item.file for file_template in files_template_list: # TO_DO: Change file template here m = re.match(file_template, f.lfn) if m is not None: # Register file in container fc.reg_file_in_cont(f, container, 'input') # reg additional output fc.reg_file_in_cont_byname(user, jobname+'.reads.bz2', container, 'output') fc.reg_file_in_cont_byname(user, jobname + '.maps.bz2', container, 'output') # Prepare trf script script = task.task_type.trf_template # TO_DO just for test - only emulate, not real jobs pipeline_path_name = 'paleomix_bam' swdir = '/s/ls2/users/poyda/swp/' + pipeline_path_name +'/' script = "/bin/bash "+swdir+"run11.sh -t " +jobname send_job_(task, container, script) return True
def payload2(task): """ split_task Split input *.1.fastq and *.2.fastq into 'rn' pieces= run panda /bin/bash split.sh :param task: :return: """ logger.debug("payload2: Start") #### Prepare # Check type of task task_type = task.task_type if task_type.id != 1: raise WebpandaError("Illegal task_type.id") logger.debug("payload2: tasktype " + str(task_type.id)) # Get user user = users_.get(task.owner_id) logger.debug("payload2: user " + str(user.id)) # Get containers input_cont = conts_.get(task.input) #TODO do smth with output container? output_cont = conts_.get(task.output) task.tag = "task." + commands.getoutput('uuidgen') tasks_.save(task) logger.debug("payload2: tag " + task.tag) # Get container container = Container() container.guid = task.tag + ".0" conts_.save(container) logger.debug("payload2: cont " + container.guid) script_add = "" rn = 0 # Add input files to container files_template_list = task_type.ifiles_template.split(',') for item in input_cont.files: f = item.file if rn == 0: if f.lfn.endswith('fastq'): rn = getn(f.fsize) elif f.lfn.endswith('fastq.bz2'): rn = getn2(f.fsize) for file_template in files_template_list: # TODO: Change file template here m = re.match(file_template, f.lfn) if m is not None: # Register file in container fc.reg_file_in_cont(f, container, 'input') if f.lfn.endswith('.fastq'): for fi in gen_sfx(f.lfn[:-5]+'a', rn, '.fastq'): fc.reg_file_in_cont_byname(user, fi, container, 'output') if f.lfn.endswith('.fastq.bz2'): for fi in gen_sfx(f.lfn[:-9]+'a', rn, '.fastq'): fc.reg_file_in_cont_byname(user, fi, container, 'output') if f.lfn.endswith('.fasta'): fn=f.lfn+'.' fc.reg_file_in_cont_byname(user, fn[:-6]+'dict', container, 'output') # itert: validated file has null size #for sfx in ('amb','ann','bwt','fai','pac','sa','validated'): for sfx in ('amb','ann','bwt','fai','pac','sa', 'validated'): fc.reg_file_in_cont_byname(user, fn+sfx, container, 'output') script_add += "; echo 123 > ../{fname}".format(fname=fn+"validated") logger.debug("payload2: reg Makefile") #reg additional output for fi in gen_sfx('Makefile.a', rn, '.yaml'): fc.reg_file_in_cont_byname(user, fi, container, 'output') #guids = ["web.it_4b7d4757-9ba4-4ed7-8bc0-6edb8bcc68d2", # "web.it_3bc78e60-241b-418a-a631-2461d4ba1977", # "web.it_1b88049e-463b-4b4f-8454-9587301a53e5", # "web.it_a02271ea-8a9b-42f3-add2-ed6d0f9ff07e", # "web.it_61bb7c80-e53c-4641-88b0-fbd16b0f3d56", # "web.it_3930f596-25ea-49b0-8943-7a83c84c7940", # "web.it_aa7b77a3-c765-464e-a4fa-29ce6dd50346", # "web.it_211f2187-41f2-489f-ba63-73f004f21c66" # ] #for guid in guids: # fc.reg_file_in_cont(files_.first(guid=guid), container, 'input') # Prepare trf script script = task.task_type.trf_template # TO_DO just for test add "1" - script1.sh- only emulate, not real jobs pipeline_path_name = 'paleomix_bam' swdir='/s/ls2/users/poyda/swp/' + pipeline_path_name +'/' script = "/bin/bash " + swdir + "genref.sh && /bin/bash " + swdir + "runtmplgen.sh -t 1>bam.out 2>bam.err & ;" script += "/bin/bash " + swdir + "split.sh -t " + str(rn) script += script_add # Save rn as task param task.params = str(rn) tasks_.save(task) logger.debug("payload2: script " + script) logger.debug("payload2: send_job " + container.guid) send_job_(task, container, script) return True
def upload(): form = request.form # Create a unique container quid for this particular batch of uploads. cguid = 'job.' + commands.getoutput('uuidgen') # Is the upload using Ajax, or a direct POST by the form? is_ajax = False if form.get("__ajax", None) == "true": is_ajax = True # Create new container container = Container() container.guid = cguid container.status = 'open' conts_.save(container) # Process files in request for upload in request.files.getlist("file"): # Define file params lfn = upload.filename.rsplit("/")[0] scope = getScope(g.user.username) guid = getGUID(scope, lfn) site = sites_.first(se=current_app.config['DEFAULT_SE']) # Target folder for these uploads. dir = '/' + os.path.join('system', scope, guid) target = site.datadir + dir try: os.makedirs(target) except: if is_ajax: return ajax_response( False, "Couldn't create upload directory: %s" % target) else: return "Couldn't create upload directory: %s" % target replfn = os.path.join(dir, lfn) destination = os.path.join(target, lfn) upload.save(destination) if os.path.isfile(destination): # Check file existence in catalog adler = adler32(destination) md5 = md5sum(destination) size = fsize(destination) file_id = ddm_checkifexists(lfn, size, adler, md5) if file_id: # If file exists file = files_.get(file_id) else: # Otherwise create new file = File() file.scope = scope file.guid = guid file.type = 'input' file.lfn = lfn file.token = '' file.status = 'defined' files_.save(file) setFileMeta(file.id, destination) replica = Replica() replica.se = site.se replica.status = 'ready' replica.lfn = replfn replica.original = file replicas_.save(replica) # Register file in container fc.reg_file_in_cont(file, container, 'input') else: return ajax_response(False, "Couldn't save file: %s" % target) if is_ajax: return ajax_response(True, cguid) else: return redirect(url_for("jobs.jobs"))
def new_job(): """Creates new job """ g.user = request.oauth.user scope = getScope(request.oauth.user.username) js = request.json data = js['data'] distr_id = data['sw_id'] params = data['script'] corecount = data['cores'] site = sites_.first(ce=current_app.config['DEFAULT_CE']) distr = distrs_.get(id) container = Container() guid = 'job.' + commands.getoutput('uuidgen') container.guid = guid container.status = 'open' conts_.save(container) # Process ftp files if 'ftp_dir' in data.keys(): ftp_dir = data['ftp_dir'] register_ftp_files(ftp_dir, scope, container.guid) # Process guid list if 'guids' in data.keys(): guids = data['guids'] for f in guids: if f != '': file_ = files_.first(guid=f) if file_ is not None: # Register file in catalog fc.reg_file_in_cont(file_, container, 'input') else: raise WebpandaError('File with guid %s not found' % f) ofiles = ['results.tgz'] # Starts cloneReplica tasks ftasks = prepareInputFiles(container.id, site.se) # Saves output files meta for lfn in ofiles: file = File() file.scope = scope file.guid = getGUID(scope, lfn) file.lfn = lfn file.status = 'defined' files_.save(file) # Register file in catalog fc.reg_file_in_cont(file, container, 'output') # Counts files allfiles = container.files nifiles = 0 nofiles = 0 for f in allfiles: if f.type == 'input': nifiles += 1 if f.type == 'output': nofiles += 1 # Defines job meta job = Job() job.pandaid = None job.status = 'pending' job.owner = request.oauth.user job.params = params job.distr = distr job.container = container job.creation_time = datetime.utcnow() job.modification_time = datetime.utcnow() job.ninputfiles = nifiles job.noutputfiles = nofiles job.corecount = corecount job.tags = data['tags'] if 'tags' in data.keys() else "" jobs_.save(job) # Async sendjob res = chord(ftasks)(async_send_job.s(jobid=job.id, siteid=site.id)) return {'id': job.id, 'container_id': guid}