def pause_pipeline(self, run_id, user): """ Interrupt pipeline by sending signal to corresponding worker's children """ pid = self.pids.get(run_id) if pid: pretty_print("Pausing pipeline: id=%d, user=%s" % (run_id, user)) try: parent = psutil.Process(pid) children = parent.children(recursive=True) for child in children: run_as(cmd=['kill', child.pid], user=user) except psutil.NoSuchProcess: pretty_print("Error pausing pipeline: no process with ID %d" % int(pid)) else: pretty_print("Error pausing pipeline: ID %d not found" % run_id)
def resume(user, cfg, run_id, pids): """ Resumes as user 'user' a pipeline defined by the given config Returns exit code, stdout, and stderr. """ pids[run_id] = mp.current_process().pid cmd = [which('np_submit.py'), cfg] (ec, err, out) = run_as(cmd=cmd, user=user) if ec == 0: return (err, out) else: raise Exception('Unable to execute cmd %s:\n %s' % (cmd, err))
def delete(user, output_dir, work_dir): """ Deletes the output directory of a pipeline. Returns exit code, stdout, and stderr. """ if not work_dir: cmd = ['rm', '-rf', output_dir] else: cmd = ['rm', '-rf', work_dir] if work_dir != output_dir: cmd.extend([';', 'rm', '-rf', output_dir]) (ec, err, out) = run_as(cmd, user=user) out = ' '.join(cmd)+'\n'+out if ec == 0: return (err, out) else: raise Exception('Unable to execute cmd %s:\n %s' % (cmd, err))
def submit(config, user, run_id, pids): """ Submits pipeline defined by 'config' as user 'user'. Dumps the config in a temp. file that is removed after succesful completion. Returns exit code, stdout, and stderr. """ pids[run_id] = mp.current_process().pid (fd, tmp_cfg) = tempfile.mkstemp(prefix='pypers_', suffix='.cfg', text=True) os.fchmod(fd, 0644) with os.fdopen(fd, 'w') as fh: json.dump(config, fh) cmd = [which('np_submit.py'), '-i', tmp_cfg] (ec, err, out) = run_as(cmd=cmd, user=user) if ec == 0: os.unlink(tmp_cfg) return (err, out) else: raise Exception('Unable to execute cmd %s:\n%s\n%s' % (cmd, err, out))
args = parser.parse_args() user = getpass.getuser() # create a step step = Step.load_step(args.cfg_file) if not os.path.exists(step.output_dir): os.makedirs(step.output_dir, 0775) # remove existing files, except step config and condor files full_list = glob.glob(step.output_dir + "/*") regex = re.compile("(job\.*|condor\.*|.*\.cfg)") to_remove = filter(lambda f: not regex.search(f), full_list) for entry in to_remove: cmd = ['rm', '-rvf', entry] (ec, err, out) = run_as(cmd=cmd, user=user) if ec: print "WARNING: failed to remove file %s: %s, %s" % (entry, err, out) else: print "Removed %s" % entry # launch watchdog mypid = os.getpid() cmd = [WATCHDOG_EXE, str(mypid)] watchlog = os.path.join(step.output_dir, 'watchdog.log') wp = subprocess.Popen(cmd, stdout=open(watchlog,'w'), stderr=open(watchlog,'a')) # run step step.run() # stop watchdog
def exec_monitoring(self): """ Check if all the flow cell IDs have been demultiplexed For each flow cell which has not been demultiplexed, then the demultiplexing pipeline is submitted to the cluster """ #Create a dictionary with {"Fw cell ID" : "path"} fw_cell_dirs = {} missing_ss_list = [] for hiseq_dir in self.hiseq_dirs: #Parse all the hiseq dirs and create a list of data directories #Only the directorise with the "RTAComplete.txt" file are considered for fwcell in os.listdir(hiseq_dir): fwcell_path = os.path.join(hiseq_dir, fwcell) if (re.search(".+_.+_.+_.+", fwcell) \ and "Temp" not in fwcell \ and os.path.exists(os.path.join(fwcell_path, "RTAComplete.txt"))): ss_found = False #search for the sample sheet in the fwcell_path for filename in os.listdir(fwcell_path): if ("SampleSheet" in filename) and (".csv" in filename): ss_found = True break if ss_found: fw_cell_dirs[fwcell] = os.path.join(hiseq_dir, fwcell) #otherwise add the directory to the list of missing sample sheet else: missing_ss_list.append(os.path.join(hiseq_dir, fwcell)) #log all the missing sample sheets detected if missing_ss_list: print ("******************************************************") for missing_ss in missing_ss_list: print ("Missing sample sheet in %s "% missing_ss) #create a set for the hiseq dirs and a set for the demultiplexed dirs hiseq_flow_cells = set([key for key in fw_cell_dirs]) demu_flow_cells = set(os.listdir(self.demu_dir)) if not hiseq_flow_cells.issubset(demu_flow_cells): #get the difference fwcell_diff = hiseq_flow_cells.difference(demu_flow_cells) if fwcell_diff: for fwcell_id in fwcell_diff: submit_cmd = which('np_submit.py') cmd = [ submit_cmd, pipeline_names['demultiplexing'], 'pipeline.output_dir=%s' % os.path.join(self.demu_dir, fwcell_id), 'pipeline.project_name=Demux', 'pipeline.description=Demultiplexing', 'steps.inputs.input_dir=%s' % fw_cell_dirs[fwcell_id] ] run_as(cmd=cmd, user=self.user) print("******************************************************") print(" %s Queued demux with:" % time.ctime()) print(" Input dir : %s" % fw_cell_dirs[fwcell_id]) print(" Output dir : %s" % os.path.join(self.demu_dir, fwcell_id)) print(" Cmd : %s" % ' '.join(cmd)) print("******************************************************")
args = parser.parse_args() user = getpass.getuser() # create a step step = Step.load_step(args.cfg_file) if not os.path.exists(step.output_dir): os.makedirs(step.output_dir, 0775) # remove existing files, except step config and condor files full_list = glob.glob(step.output_dir + "/*") regex = re.compile("(job\.*|condor\.*|.*\.cfg)") to_remove = filter(lambda f: not regex.search(f), full_list) for entry in to_remove: cmd = ['rm', '-rvf', entry] (ec, err, out) = run_as(cmd=cmd, user=user) if ec: print "WARNING: failed to remove file %s: %s, %s" % (entry, err, out) else: print "Removed %s" % entry # launch watchdog mypid = os.getpid() cmd = [WATCHDOG_EXE, str(mypid)] watchlog = os.path.join(step.output_dir, 'watchdog.log') wp = subprocess.Popen(cmd, stdout=open(watchlog, 'w'), stderr=open(watchlog, 'a')) # run step
def exec_monitoring(self): """ Check if all the flow cell IDs have been demultiplexed For each flow cell which has not been demultiplexed, then the demultiplexing pipeline is submitted to the cluster """ #Create a dictionary with {"Fw cell ID" : "path"} fw_cell_dirs = {} missing_ss_list = [] for hiseq_dir in self.hiseq_dirs: #Parse all the hiseq dirs and create a list of data directories #Only the directorise with the "RTAComplete.txt" file are considered for fwcell in os.listdir(hiseq_dir): fwcell_path = os.path.join(hiseq_dir, fwcell) if (re.search(".+_.+_.+_.+", fwcell) \ and "Temp" not in fwcell \ and os.path.exists(os.path.join(fwcell_path, "RTAComplete.txt"))): ss_found = False #search for the sample sheet in the fwcell_path for filename in os.listdir(fwcell_path): if ("SampleSheet" in filename) and (".csv" in filename): ss_found = True break if ss_found: fw_cell_dirs[fwcell] = os.path.join(hiseq_dir, fwcell) #otherwise add the directory to the list of missing sample sheet else: missing_ss_list.append(os.path.join(hiseq_dir, fwcell)) #log all the missing sample sheets detected if missing_ss_list: print("******************************************************") for missing_ss in missing_ss_list: print("Missing sample sheet in %s " % missing_ss) #create a set for the hiseq dirs and a set for the demultiplexed dirs hiseq_flow_cells = set([key for key in fw_cell_dirs]) demu_flow_cells = set(os.listdir(self.demu_dir)) if not hiseq_flow_cells.issubset(demu_flow_cells): #get the difference fwcell_diff = hiseq_flow_cells.difference(demu_flow_cells) if fwcell_diff: for fwcell_id in fwcell_diff: submit_cmd = which('np_submit.py') cmd = [ submit_cmd, pipeline_names['demultiplexing'], 'pipeline.output_dir=%s' % os.path.join(self.demu_dir, fwcell_id), 'pipeline.project_name=Demux', 'pipeline.description=Demultiplexing', 'steps.inputs.input_dir=%s' % fw_cell_dirs[fwcell_id] ] run_as(cmd=cmd, user=self.user) print( "******************************************************" ) print(" %s Queued demux with:" % time.ctime()) print(" Input dir : %s" % fw_cell_dirs[fwcell_id]) print(" Output dir : %s" % os.path.join(self.demu_dir, fwcell_id)) print(" Cmd : %s" % ' '.join(cmd)) print( "******************************************************" )