def GetScopes(select_user=False, select_group=False, select_official=False):
    logging.info("Reading in the scopes:")
    Scopes = getGmdOutput("rucio list-scopes")
    ScopeList = ClearFromDuplicates([
        Entry for Entry in Scopes
        if (select_user == True and Entry.find("user") != -1) or (select_group == True and Entry.find("group") != -1) or (
            select_official == True and Entry.find("user") == -1 and Entry.find("group") == -1)
    ])
    logging.info("Done found %d scopes" % (len(ScopeList)))
    return ScopeList
Beispiel #2
0
def GetDataSetReplicas(DS):
    Cmd = "rucio list-rules %s --csv" % (DS)
    logging.debug("Executing " + Cmd)
    Replicas = []
    all_rses = getRSEs()
    for line in getGmdOutput(Cmd):
        try:
            ds_rse = line.split(",")[4].strip()
        except:
            continue
        if ds_rse in all_rses: Replicas.append(ds_rse)
    return Replicas
Beispiel #3
0
def ListDisk(RSE):
    if len(RSE) == 0:
        logging.error("No disk is given")
        return []

    logging.info("Read content of " + RSE)
    OnDisk = getGmdOutput("rucio list-datasets-rse " + RSE)
    MyDataSets = []
    for Candidates in OnDisk:
        if Candidates.startswith('-'): continue
        elif Candidates.startswith('SCOPE'): continue
        MyDataSets.append(Candidates)
    return MyDataSets
Beispiel #4
0
def GetUserRules(user):
    Cmd = "rucio list-rules --account %s --csv" % (user)
    logging.debug("Executing " + Cmd)
    OnDisk = getGmdOutput(Cmd)
    MyRules = []
    for Rule in OnDisk:
        try:
            ID = Rule.split(",")[0].strip()
            DataSet = Rule.split(",")[2].strip()
            Rule_RSE = Rule.split(",")[4].strip()
        except:
            continue
        MyRules.append((ID, DataSet, Rule_RSE))
    return MyRules
Beispiel #5
0
def GetDataSetInfo(DS, RSE, Subscriber=None):
    Cmd = "rucio list-rules %s --csv" % (DS)
    logging.debug("Executing " + Cmd)
    Rules = getGmdOutput(Cmd)
    for i in range(len(Rules)):
        Rule = Rules[i]
        try:
            ID = Rule.split(",")[0].strip()
            Owner = Rule.split(",")[1].strip()
            RuleRSE = Rule.split(",")[4].strip()
            if RuleRSE == RSE and (Subscriber == None or Subscriber == Owner):
                return ID, Owner
        except:
            continue
    return None, None
def get_num_scheduled(user_name):
    cmd = "squeue --format=\"%%i\" -u %s" % (user_name)
    num_jobs = 0
    for i, line in enumerate(getGmdOutput(cmd)):
        if i == 0: continue
        if line.find("_") == -1 or (line.find("[") == -1
                                    and line.find("]") == -1):
            num_jobs += 1

        else:
            array_size = line[line.find("[") + 1:line.rfind("%") if line.
                              rfind("%") != -1 else line.rfind("]")]
            try:
                num_jobs += int(array_size.split("-")[-1])
            except:
                pass
    return num_jobs
Beispiel #7
0
def ListDiskWithSize(RSE):
    if len(RSE) == 0:
        logging.error("No disk is given")
        return []
    logging.info("Read content of %s and also save the size of each dataset" %
                 (RSE))
    OnDisk = getGmdOutput("rucio list-datasets-rse %s --long" % (RSE))
    MyDS = []
    for Candidates in OnDisk:
        try:
            DS = Candidates.split("|")[1].strip()
            Size = Candidates.split("|")[3].strip()
            Stored = float(Size[:Size.find("/")]) / 1024 / 1024 / 1024
            TotalSize = float(Size[Size.find("/") + 1:]) / 1024 / 1024 / 1024
        except:
            continue
        logging.info("%s   %s   %.2f GB" % (DS, Stored, TotalSize))
        MyDS.append((DS, TotalSize))
    return sorted(MyDS, key=lambda size: size[1], reverse=True)
    def __partition(self, RunTime):
        partition = ""
        OldTime = 1.e25
        part_cmd = "sinfo --format='%%P %%l %%a'"
        for L in getGmdOutput(part_cmd):
            if len(L.split()) < 3: continue

            name = L.split()[0].replace("*", "")
            Time = L.split()[1]
            Mode = L.split()[2]
            t0 = TimeToSeconds(Time)
            if t0 > 0 and t0 > TimeToSeconds(
                    RunTime) and t0 < OldTime and Mode == "up":
                partition = name
                OldTime = TimeToSeconds(Time)
        if len(partition) == 0:
            logging.error("Invalid run-time given %s" % (RunTime))
            exit(1)
        return " --partition %s --time='%s' " % (partition, RunTime)
Beispiel #9
0
def ListDataFilesWithSize(DS):
    file_list = []
    Cmd = "rucio list-files %s --csv" % (DS)
    for file_in_ds in getGmdOutput(Cmd):
        #  group.perf-muons:group.perf-muons.17916903.EXT0._000027.NTUP_MCPTP.root,6DD081BE-7CAE-4AAE-8F9C-E1E3612AA09C,8b139e60,3.893 GB,None
        #
        file_name = file_in_ds.split(",")[0]
        ## Skim away the scope
        file_name = file_name[file_name.find(":") + 1:]

        file_size_str = file_in_ds.split(",")[3]
        unit = file_size_str.split(" ")[1].replace("i", "").upper()
        file_size = float(file_size_str.split(" ")[0])
        if unit == "GB": file_size *= 1024 * 1024 * 1024
        elif unit == "MB": file_size *= 1024 * 1024
        elif unit == "KB": file_size *= 1024
        else:
            logging.error("Unkown file-size %s" % (file_size_str))
            exit(1)
        file_list += [(file_name, file_size)]
    return file_list
def GetDataSetFiles(dsname, RSE, protocols):
    logging.info("Get the files of the dataset %s at %s" % (dsname, RSE))
    logging.info("Issuing command: rucio list-file-replicas --protocols %s --rse %s %s " % (protocols, RSE, dsname))
    DSReplicas = getGmdOutput("rucio list-file-replicas --protocols %s --rse %s %s " % (protocols, RSE, dsname))
    DS = []
    for line in DSReplicas:
        Entry = None
        LineInfo = line.split()
        for i, column in enumerate(LineInfo):
            if RSE in column:
                try:
                    Entry = LineInfo[i + 1]
                    break
                except:
                    logging.warning("There was some strange noise here ", column)
                    pass
        if Entry:
            logging.info("Entry: " + Entry)
            ReplacePath = os.getenv("CLSUB_RUCIOREPLACEPATH")
            LocalPath = os.getenv("CLSUB_RUCIOLOCALPATH")
            if ReplacePath and LocalPath:
                Entry = Entry.replace(ReplacePath, LocalPath)
            DS.append(Entry)
    return DS
Beispiel #11
0
def getRSEs():
    Cmd = "rucio list-rses"
    logging.debug("Executing " + Cmd)
    return sorted(getGmdOutput(Cmd))
Beispiel #12
0
    def __slurm_id(self, job):
        Ids = []
        jobName = ''
        ### Bare subids as requested by the user
        sub_ids = []
        if isinstance(job, str):
            jobName = job
            #### feature to get only sub id's in a certain range
        elif isinstance(job, tuple):
            jobName = job[0]
            #### Users have the possibility to pipe either the string
            #### of job names [ "FirstJobToHold", "SecondJobToHold", "TheCake"]
            #### or to pipe a tuple which can be either of the form
            ####    [ ("MyJobsArray" , [1,2,3,4,5,6,7,8,9,10,11]), "The cake"]
            #### meaning that the job ID's are constructed following the
            #### sequence 1-11. It's important to emphazise that the array
            #### *must* start with a 1. 0's are ignored by the system. There
            #### is also an third option, where the user parses
            ####     ["MyJobArray", -1]
            #### This option is used to indicate a one-by-one dependency of
            #### tasks in 2 consecutive arrays.
            if isinstance(job[1], list):
                sub_ids = sorted([int(i) for i in job[1]])
                if -1 in sub_ids:
                    logging.warning(
                        "<__slurm_id>: -1 found in sub ids. If you want to pipe a 1 by 1 dependence of subjobs in two arrays please add [ (%s, -1) ]"
                        % (jobName))
        else:
            logging.error("Invalid object:")
            logging.error(job)
            exit(1)
        ### Find all associated job-ids
        for J in getGmdOutput("squeue --format=\"%%j %%i\""):
            if len(J.strip()) == 0: continue
            fragments = J.split()
            if len(fragments) == 0: continue
            if fragments[0].strip() == jobName:
                cand_id = fragments[1].strip()
                ### The pending job is an array
                if cand_id.find("_") != -1:
                    main_job = cand_id[:cand_id.find("_")]
                    job_range = cand_id[cand_id.find("_") + 1:]
                    ### We simply do not care about particular subjobs
                    if len(sub_ids) == 0:
                        if main_job not in Ids: Ids += [main_job]
                    elif len(sub_ids) > 0:
                        Ids += [
                            "%s_%d" % (main_job, i) for i in sub_ids if i > 0
                        ]

                elif cand_id not in Ids:
                    Ids += [cand_id]
        Ids.sort()
        ### The allowed array size on the cluster is smaller than the
        ### requested size by the user --> jobs got split into multiple arrays
        if len(sub_ids) > 0 and max(sub_ids) > self.max_array_size():
            split_Ids = []
            for sub in sub_ids:
                ## How many times do we exceed the array size
                n = (sub - sub % self.max_array_size()) / self.max_array_size()
                rel_jobs = [
                    int(j.split("_")[0]) for j in Ids
                    if int(j.split("_")[1]) == sub
                ]
                if len(rel_jobs) <= n:
                    logging.warning(
                        "<__slurm_id>: Failed to establish dependency on %s." %
                        (jobName))
                    continue
                split_Ids += [
                    "%d_%d" %
                    (rel_jobs[n], sub % self.max_array_size() if sub %
                     self.max_array_size() != 0 else self.max_array_size())
                ]
            return split_Ids
        return Ids