Example #1
0
def status(simfolder):
    """
    Check status of simulation. If on PBS scheduling, show status of nodes.
    Then, ask the fjd-dispatcher about status of jobs.

    :param string simfolder: relative path to simfolder
    :returns: True if successful, False otherwise
    """
    scheduler = utils.get_scheduler(simfolder)
    sim_name = utils.get_simulation_name(simfolder, "{}/stosim.conf".format(simfolder))
    if scheduler == 'pbs':
        num_nodes = len([n for n in os.listdir('{}/jobs'.format(simfolder))\
                        if n.endswith('.pbs')])
        if num_nodes > 0:
            print("[StoSim] State of our {} PBS computing nodes:".format(num_nodes))
            subprocess.call('echo "Waiting: $(qselect -u $USER -s W | wc -l)"', shell=True)
            subprocess.call('echo "Queued: $(qselect -u $USER -s Q | wc -l)"', shell=True)
            subprocess.call('echo "Running: $(qselect -u $USER -s R | wc -l)"', shell=True)
        else:
            print("[StoSim] No PBS computing nodes seem to be configured...")

    print("[StoSim] State of workers and jobs:")
    subprocess.call('fjd-dispatcher --project {} --status_only --interval {}'\
        .format(sim_name, utils.get_interval(simfolder)), shell=True)
    return True
Example #2
0
def resume(simfolder):
    """
    (Re)start dispatching jobs 

    :param string simfolder: relative path to simfolder
    :returns: True if successful, False otherwise
    """
    sim_name = utils.get_simulation_name(simfolder, "{}/stosim.conf".format(simfolder))
    subprocess.call('fjd-dispatcher --project {} --interval {}'\
          .format(sim_name, utils.get_interval(simfolder)), shell=True)
    return True
Example #3
0
def run(simfolder):
    ''' The main function to start running simulations

        :param string simfolder: relative path to simfolder
        :returns: True if successful, False otherwise
    '''
    print('*' * 80)
    sim_name = utils.get_simulation_name(simfolder, "{}/stosim.conf".format(simfolder))
    print("Running simulation {}".format(sim_name))
    print('*' * 80)
    print('')

    if not osp.exists("%s/stosim.conf" % simfolder):
        print("[StoSim] %s/stosim.conf does not exist!" % simfolder)
        utils.usage()
        return False

    # prepare all jobs to be run by FJD
    fjd_dir = fjd.utils.ensure_wdir(sim_name)
    fjd.utils.empty_queues(sim_name)
    for job in [j for j in os.listdir("{}/jobs".format(simfolder))\
                if j.endswith('.conf')]:
        copy("{}/jobs/{}".format(simfolder, job),
             "{}/jobqueue".format(fjd_dir))
    dispatch_cmd = 'fjd-dispatcher --project {} --end_when_jobs_are_done '\
                   ' --callback "stosim --kill" --interval {}'\
                   .format(sim_name, utils.get_interval(simfolder))

    # now decide if recruiting is done in a local network or on a PBS cluster
    scheduler = utils.get_scheduler(simfolder)
    if scheduler == 'fjd':
        # let FJD handle it in local network (default: only local PC)
        if os.path.exists('{}/remote.conf'.format(simfolder)):
            copy('{}/remote.conf'.format(simfolder), fjd_dir)
        subprocess.call('fjd-recruiter --project {} hire'.format(sim_name), shell=True)
        if not we_exited[0]:
            subprocess.call(dispatch_cmd, shell=True)
        # when recruiter got remote.conf, clean up in fjd dir
        if os.path.exists('{}/remote.conf'.format(simfolder)):
            os.remove('{}/remote.conf'.format(fjd_dir))

    elif scheduler == 'pbs':
        # queue the PBS jobs we created on a PBS job scheduler (e.g. clusters 
        # running Torque or PBS Pro). These simply start FJD workers.
        for job in [j for j in os.listdir('{}/jobs'.format(simfolder)) if j.endswith('.pbs')]:
            subprocess.call('qsub {}'.format('{}/jobs/{}'.format(simfolder, job)), shell=True)
        # Now we start dispatching
        subprocess.call(dispatch_cmd, shell=True)

    return True