Beispiel #1
0
 def pause_pipeline(self, run_id, user):
     """
     Interrupt pipeline by sending signal to corresponding worker's children
     """
     pid = self.pids.get(run_id)
     if pid:
         pretty_print("Pausing pipeline: id=%d, user=%s" % (run_id, user))
         try:
             parent = psutil.Process(pid)
             children = parent.children(recursive=True)
             for child in children:
                 run_as(cmd=['kill', child.pid], user=user)
         except psutil.NoSuchProcess:
             pretty_print("Error pausing pipeline: no process with ID %d" % int(pid))
     else:
         pretty_print("Error pausing pipeline: ID %d not found" % run_id)
Beispiel #2
0
def resume(user, cfg, run_id, pids):
    """
    Resumes as user 'user' a pipeline defined by the given config
    Returns exit code, stdout, and stderr.
    """
    pids[run_id] = mp.current_process().pid
    cmd = [which('np_submit.py'), cfg]
    (ec, err, out) = run_as(cmd=cmd, user=user)
    if ec == 0:
        return (err, out)
    else:
        raise Exception('Unable to execute cmd %s:\n %s' % (cmd, err))
Beispiel #3
0
def delete(user, output_dir, work_dir):
    """
    Deletes the output directory of a pipeline.
    Returns exit code, stdout, and stderr.
    """
    if not work_dir:
        cmd = ['rm', '-rf', output_dir]
    else:
        cmd = ['rm', '-rf', work_dir]
        if work_dir != output_dir:
            cmd.extend([';', 'rm', '-rf', output_dir])
    (ec, err, out) = run_as(cmd, user=user)
    out = ' '.join(cmd)+'\n'+out
    if ec == 0:
        return (err, out)
    else:
        raise Exception('Unable to execute cmd %s:\n %s' % (cmd, err))
Beispiel #4
0
def submit(config, user, run_id, pids):
    """
    Submits pipeline defined by 'config' as user 'user'.
    Dumps the config in a temp. file that is removed after succesful completion.
    Returns exit code, stdout, and stderr.
    """
    pids[run_id] = mp.current_process().pid
    (fd, tmp_cfg) = tempfile.mkstemp(prefix='pypers_', suffix='.cfg', text=True)
    os.fchmod(fd, 0644)
    with os.fdopen(fd, 'w') as fh:
        json.dump(config, fh)
    cmd = [which('np_submit.py'), '-i', tmp_cfg]
    (ec, err, out) = run_as(cmd=cmd, user=user)
    if ec == 0:
        os.unlink(tmp_cfg)
        return (err, out)
    else:
        raise Exception('Unable to execute cmd %s:\n%s\n%s' % (cmd, err, out))
Beispiel #5
0
    args = parser.parse_args()
    user = getpass.getuser()

    # create a step    
    step = Step.load_step(args.cfg_file)
    if not os.path.exists(step.output_dir):
        os.makedirs(step.output_dir, 0775)

    # remove existing files, except step config and condor files
    full_list = glob.glob(step.output_dir + "/*")
    regex = re.compile("(job\.*|condor\.*|.*\.cfg)")
    to_remove = filter(lambda f: not regex.search(f), full_list)
    for entry in to_remove:
        cmd = ['rm', '-rvf', entry]
        (ec, err, out) = run_as(cmd=cmd, user=user)
        if ec:
            print "WARNING: failed to remove file %s: %s, %s" % (entry, err, out)
        else:
            print "Removed %s" % entry

    # launch watchdog
    mypid = os.getpid()
    cmd = [WATCHDOG_EXE, str(mypid)]
    watchlog = os.path.join(step.output_dir, 'watchdog.log')
    wp = subprocess.Popen(cmd, stdout=open(watchlog,'w'), stderr=open(watchlog,'a'))
    
    # run step
    step.run()

    # stop watchdog
Beispiel #6
0
    def exec_monitoring(self):
        """
        Check if all the flow cell IDs have been demultiplexed
        For each flow cell which has not been demultiplexed,
        then the demultiplexing pipeline is submitted to the cluster
        """
        #Create a dictionary with {"Fw cell ID" : "path"}
        fw_cell_dirs = {}
        missing_ss_list = []
        for hiseq_dir in self.hiseq_dirs:
            #Parse all the hiseq dirs and create a list of data directories
            #Only the directorise with the "RTAComplete.txt" file are considered
            for fwcell in os.listdir(hiseq_dir):
                fwcell_path = os.path.join(hiseq_dir, fwcell)

                if (re.search(".+_.+_.+_.+", fwcell) \
                and "Temp" not in fwcell \
                and os.path.exists(os.path.join(fwcell_path, "RTAComplete.txt"))):
                    ss_found = False
                    #search for the sample sheet in the fwcell_path
                    for filename in os.listdir(fwcell_path):
                        if ("SampleSheet" in filename) and (".csv" in filename):
                            ss_found = True
                            break
                    if ss_found:
                        fw_cell_dirs[fwcell] = os.path.join(hiseq_dir, fwcell)
                    #otherwise add the directory to the list of missing sample sheet
                    else:
                        missing_ss_list.append(os.path.join(hiseq_dir, fwcell))

        #log all the missing sample sheets detected
        if missing_ss_list:
            print ("******************************************************")
            for missing_ss in missing_ss_list:
                print ("Missing sample sheet in %s "% missing_ss)


        #create a set for the hiseq dirs and a set for the demultiplexed dirs
        hiseq_flow_cells = set([key for key in fw_cell_dirs])
        demu_flow_cells = set(os.listdir(self.demu_dir))
        if not hiseq_flow_cells.issubset(demu_flow_cells):
            #get the difference
            fwcell_diff = hiseq_flow_cells.difference(demu_flow_cells)
            if fwcell_diff:
                for fwcell_id in fwcell_diff:
                    submit_cmd = which('np_submit.py')
                    cmd = [
                        submit_cmd,
                        pipeline_names['demultiplexing'],
                        'pipeline.output_dir=%s' % os.path.join(self.demu_dir, fwcell_id),
                        'pipeline.project_name=Demux',
                        'pipeline.description=Demultiplexing',
                        'steps.inputs.input_dir=%s' % fw_cell_dirs[fwcell_id]
                    ]
                    run_as(cmd=cmd, user=self.user)

                    print("******************************************************")
                    print(" %s Queued demux  with:" % time.ctime())
                    print("   Input dir  : %s" % fw_cell_dirs[fwcell_id])
                    print("   Output dir : %s" % os.path.join(self.demu_dir, fwcell_id))
                    print("   Cmd : %s" % ' '.join(cmd))
                    print("******************************************************")
Beispiel #7
0
    args = parser.parse_args()
    user = getpass.getuser()

    # create a step
    step = Step.load_step(args.cfg_file)
    if not os.path.exists(step.output_dir):
        os.makedirs(step.output_dir, 0775)

    # remove existing files, except step config and condor files
    full_list = glob.glob(step.output_dir + "/*")
    regex = re.compile("(job\.*|condor\.*|.*\.cfg)")
    to_remove = filter(lambda f: not regex.search(f), full_list)
    for entry in to_remove:
        cmd = ['rm', '-rvf', entry]
        (ec, err, out) = run_as(cmd=cmd, user=user)
        if ec:
            print "WARNING: failed to remove file %s: %s, %s" % (entry, err,
                                                                 out)
        else:
            print "Removed %s" % entry

    # launch watchdog
    mypid = os.getpid()
    cmd = [WATCHDOG_EXE, str(mypid)]
    watchlog = os.path.join(step.output_dir, 'watchdog.log')
    wp = subprocess.Popen(cmd,
                          stdout=open(watchlog, 'w'),
                          stderr=open(watchlog, 'a'))

    # run step
Beispiel #8
0
    def exec_monitoring(self):
        """
        Check if all the flow cell IDs have been demultiplexed
        For each flow cell which has not been demultiplexed,
        then the demultiplexing pipeline is submitted to the cluster
        """
        #Create a dictionary with {"Fw cell ID" : "path"}
        fw_cell_dirs = {}
        missing_ss_list = []
        for hiseq_dir in self.hiseq_dirs:
            #Parse all the hiseq dirs and create a list of data directories
            #Only the directorise with the "RTAComplete.txt" file are considered
            for fwcell in os.listdir(hiseq_dir):
                fwcell_path = os.path.join(hiseq_dir, fwcell)

                if (re.search(".+_.+_.+_.+", fwcell) \
                and "Temp" not in fwcell \
                and os.path.exists(os.path.join(fwcell_path, "RTAComplete.txt"))):
                    ss_found = False
                    #search for the sample sheet in the fwcell_path
                    for filename in os.listdir(fwcell_path):
                        if ("SampleSheet" in filename) and (".csv"
                                                            in filename):
                            ss_found = True
                            break
                    if ss_found:
                        fw_cell_dirs[fwcell] = os.path.join(hiseq_dir, fwcell)
                    #otherwise add the directory to the list of missing sample sheet
                    else:
                        missing_ss_list.append(os.path.join(hiseq_dir, fwcell))

        #log all the missing sample sheets detected
        if missing_ss_list:
            print("******************************************************")
            for missing_ss in missing_ss_list:
                print("Missing sample sheet in %s " % missing_ss)

        #create a set for the hiseq dirs and a set for the demultiplexed dirs
        hiseq_flow_cells = set([key for key in fw_cell_dirs])
        demu_flow_cells = set(os.listdir(self.demu_dir))
        if not hiseq_flow_cells.issubset(demu_flow_cells):
            #get the difference
            fwcell_diff = hiseq_flow_cells.difference(demu_flow_cells)
            if fwcell_diff:
                for fwcell_id in fwcell_diff:
                    submit_cmd = which('np_submit.py')
                    cmd = [
                        submit_cmd, pipeline_names['demultiplexing'],
                        'pipeline.output_dir=%s' %
                        os.path.join(self.demu_dir, fwcell_id),
                        'pipeline.project_name=Demux',
                        'pipeline.description=Demultiplexing',
                        'steps.inputs.input_dir=%s' % fw_cell_dirs[fwcell_id]
                    ]
                    run_as(cmd=cmd, user=self.user)

                    print(
                        "******************************************************"
                    )
                    print(" %s Queued demux  with:" % time.ctime())
                    print("   Input dir  : %s" % fw_cell_dirs[fwcell_id])
                    print("   Output dir : %s" %
                          os.path.join(self.demu_dir, fwcell_id))
                    print("   Cmd : %s" % ' '.join(cmd))
                    print(
                        "******************************************************"
                    )