Esempio n. 1
0
 def crab_resubmit(self, more_ram=False):
     try:
         if more_ram:
             out = crabCommand('resubmit', dir=self.task_dir, proxy=get_proxy_file(), maxmemory="3500")
         else:
             out = crabCommand('resubmit', dir=self.task_dir, proxy=get_proxy_file())
         return out["status"] == "SUCCESS"
     except httplib.HTTPException as e:
         self.logger.warning("got an http exception from crab resubmit")
         self.logger.warning(str(e))
         out = self.status_output.copy()
         return False
Esempio n. 2
0
    def crab_submit(self):
        if not self.crab_config:
            self.get_crab_config()

        # first try to see if the job already exists naively
        if self.get_unique_request_name():
            self.logger.debug("have unique_request_name so not submitting")
            return True


        def do_submit(q, config, proxy=None):
            if not proxy:
                out = crabCommand('submit', config=config)
            else:
                out = crabCommand('submit', config=config, proxy=proxy)
            q.put(out)

        mpq = multiprocessing.Queue()
        mpp = multiprocessing.Process(target=do_submit, args=(mpq, self.crab_config, get_proxy_file()))
        mpp.start()
        mpp.join()
        out = mpq.get()

        if not out:
            return False

        if "uniquerequestname" in out:
            self.unique_request_name = out["uniquerequestname"]
            self.logger.debug("submitted and found unique_request_name: {0}".format(self.unique_request_name))
            return True

        return False
Esempio n. 3
0
        def get(thedir):
            out = {}
            try:
                out = crabCommand('status', dir=thedir, long=False, proxy=get_proxy_file())
            except httplib.HTTPException as e:
                self.logger.warning("got an http exception from crab status, will use cached status_output")
                self.logger.warning(str(e))
                out = self.status_output.copy()

            # Cache the crab status output
            if out:
                self.status_output = out.copy()
            return out
Esempio n. 4
0
def condor_submit(**kwargs): # pragma: no cover
    """
    Takes in various keyword arguments to submit a condor job.
    Returns (succeeded:bool, cluster_id:str)
    fake=True kwarg returns (True, -1)
    multiple=True will let `arguments` and `selection_pairs` be lists (of lists)
    and will queue up one job for each element
    """

    if kwargs.get("fake",False):
        return True, -1

    for needed in ["executable","arguments","inputfiles","logdir"]:
        if needed not in kwargs:
            raise RuntimeError("To submit a proper condor job, please specify: {0}".format(needed))

    params = {}

    queue_multiple = kwargs.get("multiple",False)

    params["universe"] = kwargs.get("universe", "Vanilla")
    params["executable"] = kwargs["executable"]
    # params["inputfiles"] = ",".join(kwargs["inputfiles"])
    params["logdir"] = kwargs["logdir"]
    params["proxy"] = get_proxy_file()
    params["timestamp"] = get_timestamp()


    exe_dir = params["executable"].rsplit("/",1)[0]
    if "/" not in os.path.normpath(params["executable"]):
        exe_dir = "."

    # http://uaf-10.t2.ucsd.edu/~namin/dump/badsites.html
    good_sites = [

                "T2_US_Caltech",
                "T2_US_UCSD",
                "T2_US_MIT",
                "T2_US_Nebraska",
                # "T2_US_Purdue", # Issues with fortran?? even though we're in singularity??
                "T2_US_Vanderbilt",

            ]

    params["sites"] = kwargs.get("sites",",".join(good_sites))

    if queue_multiple:
        if len(kwargs["arguments"]) and (type(kwargs["arguments"][0]) not in [tuple,list]):
            raise RuntimeError("If queueing multiple jobs in one cluster_id, arguments must be a list of lists")
        params["arguments"] = map(lambda x: " ".join(map(str,x)), kwargs["arguments"])
        params["inputfiles"] = map(lambda x: ",".join(map(str,x)), kwargs["inputfiles"])
        params["extra"] = []
        if "selection_pairs" in kwargs:
            sps = kwargs["selection_pairs"]
            if len(sps) != len(kwargs["arguments"]):
                raise RuntimeError("Selection pairs must match argument list in length")
            for sel_pairs in sps:
                extra = ""
                for sel_pair in sel_pairs:
                    if len(sel_pair) != 2:
                        raise RuntimeError("This selection pair is not a 2-tuple: {0}".format(str(sel_pair)))
                    extra += '+{0}="{1}"\n'.format(*sel_pair)
                params["extra"].append(extra)
    else:
        params["arguments"] = " ".join(map(str,kwargs["arguments"]))
        params["inputfiles"] = ",".join(map(str,kwargs["inputfiles"]))
        params["extra"] = ""
        if "selection_pairs" in kwargs:
            for sel_pair in kwargs["selection_pairs"]:
                if len(sel_pair) != 2:
                    raise RuntimeError("This selection pair is not a 2-tuple: {0}".format(str(sel_pair)))
                params["extra"] += '+{0}="{1}"\n'.format(*sel_pair)

    params["proxyline"] = "x509userproxy={proxy}".format(proxy=params["proxy"])

    # Require singularity+cvmfs unless machine is uaf-*. or uafino.
    # NOTE, double {{ and }} because this gets str.format'ted later on
    # Must have singularity&cvmfs. Or, (it must be uaf or uafino computer AND if a uaf computer must not have too high of slotID number
    # so that we don't take all the cores of a uaf
    requirements_line = 'Requirements = ((HAS_SINGULARITY=?=True) && (HAS_CVMFS_cms_cern_ch =?= true)) || (regexp("(uaf-[0-9]{{1,2}}|uafino)\.", TARGET.Machine) && !(TARGET.SlotID>(TotalSlots<14 ? 3:7) && regexp("uaf-[0-9]", TARGET.machine)))'
    if kwargs.get("universe","").strip().lower() in ["local"]:
        kwargs["requirements_line"] = "Requirements = "
    if kwargs.get("requirements_line","").strip():
        requirements_line = kwargs["requirements_line"]

    template = """
universe={universe}
+DESIRED_Sites="{sites}"
executable={executable}
transfer_executable=True
transfer_output_files = ""
+Owner = undefined
+project_Name = \"cmssurfandturf\"
log={logdir}/{timestamp}.log
output={logdir}/std_logs/1e.$(Cluster).$(Process).out
error={logdir}/std_logs/1e.$(Cluster).$(Process).err
notification=Never
should_transfer_files = YES
when_to_transfer_output = ON_EXIT
"""
    template += "{0}\n".format(params["proxyline"])
    template += "{0}\n".format(requirements_line)
    if kwargs.get("stream_logs",False):
        template += "StreamOut=True\nstream_error=True\nTransferOut=True\nTransferErr=True\n"
    for ad in kwargs.get("classads",[]):
        if len(ad) != 2:
            raise RuntimeError("This classad pair is not a 2-tuple: {0}".format(str(ad)))
        template += '+{0}="{1}"\n'.format(*ad)
    do_extra = len(params["extra"]) == len(params["arguments"])
    if queue_multiple:
        template += "\n"
        for ijob,(args,inp) in enumerate(zip(params["arguments"],params["inputfiles"])):
            template += "arguments={0}\n".format(args)
            template += "transfer_input_files={0}\n".format(inp)
            if do_extra:
                template += "{0}\n".format(params["extra"][ijob])
            template += "queue\n"
            template += "\n"
    else:
        template += "arguments={0}\n".format(params["arguments"])
        template += "transfer_input_files={0}\n".format(params["inputfiles"])
        template += "{0}\n".format(params["extra"])
        template += "queue\n"

    if kwargs.get("return_template",False):
        return template.format(**params)

    buff = template.format(**params)

    with open("{0}/submit.cmd".format(exe_dir),"w") as fhout:
        fhout.write(buff)

    extra_cli = ""
    schedd = kwargs.get("schedd","") # see note in condor_q about `schedd`
    if schedd:
        extra_cli += " -name {} ".format(schedd)
    do_cmd("mkdir -p {}/std_logs/ ".format(params["logdir"]))

    # print buff
    # # FIXME
    print "Wrote the file, and now exiting"
    sys.exit()

    out = do_cmd("condor_submit {}/submit.cmd {}".format(exe_dir,extra_cli))


    succeeded = False
    cluster_id = -1
    if "job(s) submitted to cluster" in out:
        succeeded = True
        cluster_id = out.split("submitted to cluster ")[-1].split(".",1)[0].strip()
    else:
        raise RuntimeError("Couldn't submit job to cluster because:\n----\n{0}\n----".format(out))

    return succeeded, cluster_id