def number_job_user(bj_check_user=None, verbose=True):
    """
    Internal function for sleep_job_user
    """
    username = getpass.getuser()

    if not bj_check_user:
        bj_check_user = utils.get_username()

    bj_command = "perl /dsk/igs2/soft_wrk/" + username + "/SOFT_EPOS8_BIN_TOOLS/SCRIPTS/e8_bjobs_local.pl"

    bj_list = subprocess.check_output(bj_command, shell='/bin/csh')
    bj_list = bj_list.decode("utf-8")
    bj_list = bj_list.split("\n")

    bj_pattern_checked = bj_check_user + ' *' + bj_check_user

    bj_list_checked = [bool(re.search(bj_pattern_checked, l)) for l in bj_list]
    bj_list_checked_sum = np.sum(bj_list_checked)

    if verbose:
        print("INFO: ", bj_list_checked_sum, "running jobs found for",
              bj_check_user)

    return bj_list_checked_sum, bj_list_checked, bj_pattern_checked
Exemple #2
0
def sleep_job_user(bj_check_user=None, maxjob=20, bj_check_wait_time=20):
    if not bj_check_user:
        bj_check_user = utils.get_username()

    n_job = number_job_user(bj_check_user)

    if n_job >= maxjob:
        print("INFO : sleeping @ " + str(dt.datetime.now()) + " for " +
              str(bj_check_wait_time) + "s b.c." + str(n_job) +
              " jobs are runing")
        time.sleep(bj_check_wait_time)

    return None
def sleep_job_user(bj_check_user=None, minjob=20, bj_check_wait_time=20):
    """
    Internal function for cluster_GFZ_run
    """
    if not bj_check_user:
        bj_check_user = utils.get_username()

    n_job, bj_list_checked, bj_pattern_checked = number_job_user(bj_check_user)

    if n_job >= minjob:
        check_tigger = False
        print("INFO : sleeping @ " + str(dt.datetime.now()) + " for " +
              str(bj_check_wait_time) + "s b.c." + str(n_job) +
              " jobs are runing")
        time.sleep(bj_check_wait_time)
    else:
        check_tigger = True
        print("INFO : let's continue, no job matchs the pattern " +
              bj_pattern_checked)

    return check_tigger
def cluster_GFZ_run(commands_list,
                    bunch_on_off=True,
                    bunch_job_nbr=10,
                    bunch_wait_time=600,
                    bj_check_on_off=True,
                    bj_check_mini_nbr=2,
                    bj_check_wait_time=120,
                    bj_check_user="******",
                    add_cjob_cmd_prefix=True,
                    wait_sleeping_before_launch=5):
    """
    Parameters
    ----------
    commands_list : list of str
        List of commands you want to run (one command per job).
    bunch_on_off : bool, optional
        If False, send all the jobs to the cluster at once.
        If True, just send <bunch_job_nbr> of them.
        See the options below.
        The default is True.
    bunch_job_nbr : int, optional
        number of jobs which will be sent to the cluser at once.
        The default is 10.
    bunch_wait_time : int, optional
        Minimal time between two bunch runs in sec.
        The default is 600.
    bj_check_on_off : bool, optional
        Do a check before a new bunch run, if previous jobs are still running.
        The default is True.
    bj_check_mini_nbr : int, optional
        The fuctions will wait <bj_check_wait_time> sec 
        more if <bj_check_mini_nbr> or more jobs are running.
        The default is 2.
    bj_check_wait_time : int, optional
        wait time between two checks in sec. The default is 120.
    bj_check_user : str, optional
        username you want to check in the batchjobs. 
        The default is "auto" i.e. your own username.
    add_cjob_cmd_prefix : bool, optional
        If, the input commands in command_list do not contain 
        the cjob prefix command, it will be added automatically
        The default is True.
    wait_sleeping_before_launch : int, optional
        Waiting time between two successive runs.
        (To cancel the run if necessary)
        The default is 5.

    Returns
    -------
    None.

    """

    username = getpass.getuser()

    history_file_path = None

    i_bunch = 0

    if bj_check_user == "auto":
        bj_check_user = utils.get_username()

    log_path = "/home/" + bj_check_user + "/test_tmp.log"
    LOGobj = open(log_path, 'w+')

    if not add_cjob_cmd_prefix:
        commands_list_opera = commands_list
    else:
        commands_list_opera = []
        for cmd in commands_list:
            cmd_ope = "cjob -c '" + cmd + "'"
            commands_list_opera.append(cmd_ope)

    print("****** JOBS THAT WILL BE LAUNCHED ******")
    print('Number of jobs : ' + str(len(commands_list_opera)))
    print("****************************************")

    for ikommand, kommand in enumerate(commands_list_opera):

        ########## LOG/PRINT command
        print(kommand)
        LOGobj.write(kommand + '\n')

        ########## LOG/PRINT sleep
        info_sleep = "INFO : script is sleeping for " + str(
            wait_sleeping_before_launch) + "sec (so you can cancel it) "
        print(info_sleep)
        LOGobj.write(info_sleep + '\n')
        time.sleep(wait_sleeping_before_launch)

        ########## LOG/PRINT start
        info_start = "INFO : script starts @ " + str(dt.datetime.now())
        print(info_start)
        LOGobj.write(info_start + '\n')

        ########## RUN command here !!
        if True:
            p = subprocess.Popen('',
                                 executable='/bin/csh',
                                 stdin=subprocess.PIPE,
                                 stdout=subprocess.PIPE,
                                 stderr=subprocess.PIPE)
            stdout, stderr = p.communicate(kommand.encode())
        else:
            os.system(kommand)

        if history_file_path:
            with open(history_file_path, "a") as myfile:
                myfile.write(kommand + '\n')

        i_bunch += 1

        ########## Bunch On/Off : check if a bunch of job has been launched
        if bunch_on_off and np.mod(i_bunch, bunch_job_nbr) == 0:
            info_bunch = "INFO : sleeping @ " + str(dt.datetime.now(
            )) + " for " + str(bunch_wait_time) + "s b.c. a bunch of " + str(
                bunch_job_nbr) + " jobs has been launched"
            print(info_bunch)
            time.sleep(bunch_wait_time)
            LOGobj.write(info_bunch + '\n')

            ########## Bunch On/Off Check : check if the bunch is finished (experimental but on is better)
            ###### THIS PART MUST BE MERGE WITH THE SMALLER FCT BELLOW
            if bj_check_on_off:
                print(
                    "INFO : BJ Check : All jobs should be finished now, let's see if there is some latecomers"
                )
                bj_check_tigger = False
            while not bj_check_tigger:
                bj_check_tigger = sleep_job_user(
                    minjob=bj_check_mini_nbr,
                    bj_check_wait_time=bj_check_wait_time)

    return None
Exemple #5
0
def cluster_GFZ_run(commands_list,
                    bunch_on_off=True,
                    bunch_job_nbr=10,
                    bunch_wait_time=600,
                    bj_check_on_off=True,
                    bj_check_mini_nbr=2,
                    bj_check_wait_time=120,
                    bj_check_user="******"):

    history_file_path = None
    wait_sleeping_before_launch = 5

    i_bunch = 0

    if bj_check_user == "auto":
        bj_check_user = utils.get_username()

    log_path = "/home/" + bj_check_user + "/test_tmp.log"
    LOGobj = open(log_path, 'w+')

    print("****** JOBS THAT WILL BE LAUNCHED ******")
    print('Number of jobs : ' + str(len(commands_list)))
    print("****************************************")

    for kommand in commands_list:

        ########## LOG/PRINT command
        print(kommand)
        LOGobj.write(kommand + '\n')

        ########## LOG/PRINT sleep
        info_sleep = "INFO : script is sleeping for " + str(
            wait_sleeping_before_launch) + "sec (so you can cancel it) "
        print(info_sleep)
        LOGobj.write(info_sleep + '\n')
        time.sleep(wait_sleeping_before_launch)

        ########## LOG/PRINT start
        info_start = "INFO : script starts @ " + str(dt.datetime.now())
        print(info_start)
        LOGobj.write(info_start + '\n')

        ########## RUN command here !!
        p = subprocess.Popen('',
                             executable='/bin/csh',
                             stdin=subprocess.PIPE,
                             stdout=subprocess.PIPE,
                             stderr=subprocess.PIPE)
        stdout, stderr = p.communicate(kommand.encode())

        if history_file_path:
            with open(history_file_path, "a") as myfile:
                myfile.write(kommand + '\n')

        i_bunch += 1

        ########## Bunch On/Off : check if a bunch of job has been launched
        if bunch_on_off and np.mod(i_bunch, bunch_job_nbr) == 0:
            info_bunch = "INFO : sleeping @ " + str(dt.datetime.now(
            )) + " for " + str(bunch_wait_time) + "s b.c. a bunch of " + str(
                bunch_job_nbr) + " jobs has been launched"
            print(info_bunch)
            time.sleep(bunch_wait_time)
            LOGobj.write(info_bunch + '\n')

            ########## Bunch On/Off Check : check if the bunch is finished (experimental but on is better)
            ###### THIS PART MUST BE MERGE WITH THE SMALLER FCT BELLOW
            if bj_check_on_off:
                print(
                    "INFO : BJ Check : All jobs should be finished now, let's see if there is some latecomers"
                )
                bj_check_tigger = False
                bj_command = "perl /dsk/igs2/soft_wrk/psakicki/SOFT_EPOS8_BIN_TOOLS/SCRIPTS/e8_bjobs_local.pl"
            while not bj_check_tigger:
                bj_list = subprocess.check_output(bj_command, shell='/bin/csh')
                bj_pattern_checked = bj_check_user + ' *' + bj_check_user

                bj_list_checked = [
                    re.search(bj_pattern_checked, l) for l in bj_list
                ]
                bj_list_checked_sum = np.sum(bj_list_checked)

                if bj_list_checked_sum > bj_check_mini_nbr:
                    print("INFO : sleeping @ " + str(dt.datetime.now()) +
                          " for " + str(bj_check_wait_time) + "s b.c." +
                          str(bj_list_checked_sum) + "job(s) match pattern " +
                          bj_pattern_checked)
                    print(bj_list)
                    time.sleep(bj_check_wait_time)

                else:
                    bj_check_tigger = True
                    print("INFO : let's continue, no job matchs the pattern " +
                          bj_pattern_checked)