Ejemplo n.º 1
0
def main():
    CheckRucioSetup()
    CheckRemainingProxyTime()
    """"""
    RunOptions = getArgumentParser().parse_args()

    all_files = []
    if RunOptions.single_out_file and len(RunOptions.out_file_name) == 0:
        logging.error("Please provide a file name if you run with --single-out_file")
        exit(1)
    # Do we have one dataset, or a file with a list of them?
    if os.path.exists(RunOptions.dataset):
        with open(RunOptions.dataset) as dsfile:
            for line in dsfile:
                # Ignore comment lines and empty lines
                if line.startswith('#'): continue
                realline = line.strip()
                if realline.find("_tid") > -1: realline = realline[0:realline.find("_tid")]
                if not realline: continue  # Ignore whitespace

                if not RunOptions.single_out_file:
                    createFileList(realline, RunOptions)
                else:
                    all_files += GetDataSetFiles(realline, RunOptions.RSE, RunOptions.protocols)

    else:
        createFileList(RunOptions.dataset, RunOptions)

    if len(all_files) > 0:
        WriteList(all_files, options.out_file)
Ejemplo n.º 2
0
    def __init__(
        self,
        #### Container with all dataset names
        rucio_container,
        ### RSE where the container is stored
        dest_rse,
        #### Download the container to the disk
        download=False,
        #### Merge the datasets to a common file
        merge=False,
        ## Download directory
        download_dir="/tmp/download",
        #### Destination_dir
        destination_dir="/tmp",
        #### Cluster_engine for potential merge
        cluster_engine=None,
        #### max_size_per_merged_file (B)
        max_merged_size=25 * 1024 * 1024 * 1024,
        ### Logical dataset_name (optional)
        logical_name="",
        #### Rucio groupdisk protocol
        protocol="root",
        ## hold jobs
        hold_jobs=[],
        ### Files per merge job
        files_per_merge_job=20,
    ):

        self.__container_name = rucio_container
        self.__rse = dest_rse

        self.__download = download
        self.__merge = merge

        self.__download_dir = download_dir
        self.__files_per_merge = files_per_merge_job
        while self.__download_dir.find("//") != -1:
            self.__download_dir = self.__download_dir.replace("//", "/")
        self.__dest_dir = destination_dir

        self.__engine = cluster_engine
        self.__max_file_size = max_merged_size

        self.__logical_name = logical_name
        CheckRemainingProxyTime()
        self.__files_on_rse = [f for f in GetDataSetFiles(self.container(), self.rse(), protocol)
                               if self._is_good_file(f)] if len(rucio_container) > 0 and not self.__download else []
        #### List of files to be downloaded on disk
        self.__files_on_disk = []
        if self.__download:
            CreateDirectory(self.ds_download_dir(), False)
            downloadDataSets(InputDatasets=[self.container()], Destination=self.__download_dir, use_singularity=True)
            self.__files_on_disk = [
                "%s/%s" % (self.ds_download_dir(), f) for f in os.listdir(self.ds_download_dir())
                if self._is_good_file(self.ds_download_dir() + "/" + f)
            ]
        self.__merge_interfaces = []
        self.__hold_jobs = hold_jobs
Ejemplo n.º 3
0
def main():
    """Request datasets to RSE location."""
    CheckRucioSetup()
    CheckRemainingProxyTime()

    RunOptions = getArgumentParser().parse_args()
    List = ClearFromDuplicates(ReadListFromFile(RunOptions.list))

    ### Start replication of the datasets
    initiateReplication(ListOfDataSets=List,
                        Rucio=RunOptions.rucio,
                        RSE=RunOptions.RSE,
                        lifeTime=RunOptions.lifetime,
                        approve=RunOptions.askapproval,
                        comment=RunOptions.comment)
Ejemplo n.º 4
0
def getAmiClient():
    global m_AMIClient
    if m_AMIClient: return m_AMIClient
    try:
        import pyAMI.client
        import pyAMI.atlas.api as AtlasAPI

    except ImportError:
        print 'No AMI setup is found please SETUP AMI using "localSetupPyAMI"'
        sys.exit(1)

    if os.getenv("RUCIO_ACCOUNT") is None:
        print "No RUCIO ACCOUNT is available.. please define a rucio Account"
        exit(1)

    while CheckRemainingProxyTime() < 600:
        print "VOMS-PROXY is running out, renewing..."

    m_AMIClient = pyAMI.client.Client('atlas')
    AtlasAPI.init()
    return m_AMIClient
Ejemplo n.º 5
0
def getAmiClient():
    global m_AMIClient
    if m_AMIClient: return m_AMIClient
    try:
        import pyAMI.client
        import pyAMI.atlas.api as AtlasAPI

    except ImportError:
        logging.error(
            'No AMI setup is found please SETUP AMI using "localSetupPyAMI"')
        sys.exit(1)

    if not RUCIO_ACCOUNT:
        logging.error(
            "No RUCIO ACCOUNT is available.. please define a rucio Account")
        exit(1)

    while CheckRemainingProxyTime() < 600:
        logging.info("VOMS-PROXY is running out, renewing...")

    m_AMIClient = pyAMI.client.Client('atlas')
    AtlasAPI.init()
    return m_AMIClient
Ejemplo n.º 6
0
def main():
    """List datasets located at a RSE location."""
    CheckRucioSetup()
    CheckRemainingProxyTime()

    RunOptions = getArgumentParser().parse_args()

    Today = time.strftime("%Y-%m-%d")
    Patterns = RunOptions.pattern
    OutDir = RunOptions.OutDir
    RSE = RunOptions.RSE
    if ',' in RSE:
        RSE = RSE.split(
            ','
        )[0]  # in case people have more than one RSE in their environment variable for grid submits

    Prefix = ''
    if RunOptions.MyRequests:
        Prefix = 'MyRequestTo_'
        DS = ListUserRequests(RSE, RunOptions.rucio)
    else:
        DS = ListDisk(RSE)


###    MetaFile = open("Content_%s.txt"%(RSE), 'w')
###    for DataSet, Size in ListDiskWithSize(RSE):
###           Owner, ID = GetDataSetInfo(DataSet,RSE)
###           line = "%s  |   %s   | %s  | %.2f GB"%(ID, Owner,DataSet, Size)
###           MetaFile.write("%s\n"%(line))
###           print line
###    MetaFile.close()
###    exit(0)

    if len(DS) == 0:
        logging.warning("Disk is empty.")
        exit(0)
    CreateDirectory(OutDir, False)

    ###########
    #   Define the file list name
    ###########
    FileList = "%s%s_%s" % (Prefix, RSE, Today)
    if len(Patterns) > 0: FileList += "_%s" % ('_'.join(Patterns))
    if len(RunOptions.exclude) > 0:
        FileList += "_exl_%s" % ('_'.join(RunOptions.exclude))
    FileList += '.txt'
    Write = []
    for d in sorted(DS):
        allPatternsFound = True
        for Pattern in Patterns:
            if not Pattern in d:
                allPatternsFound = False
                break
        for Pattern in RunOptions.exclude:
            if Pattern in d:
                allPatternsFound = False
                break
        if allPatternsFound:
            IsInWrite = False
            if d.split(".")[-1].isdigit(): d = d[:d.rfind(".")]
            if d.find("_tid") != -1: d = d[0:d.rfind("_tid")]
            if len([w for w in Write if w.find(d) != -1]) > 0: continue
            logging.info("Write dataset %s" % (d))
            Write.append(d)
    if len(Write) == 0:
        logging.error("No datasets containing given pattern(s) found!")
        exit(0)

    WriteList(Write, "%s/%s" % (OutDir, FileList))
    logging.info("Datasets written to file %s/%s" % (OutDir, FileList))
Ejemplo n.º 7
0
        if line.startswith("|"): line = line[1:]
        else: continue
        Candidate = line.split("|")[0].strip()
        if Candidate in RSE: Replicas.append(Candidate)
    return Replicas


def getRSEs():
    Cmd = "rucio list-rses"
    return commands.getoutput(Cmd).split()


if __name__ == '__main__':

    CheckRucioSetup()
    CheckRemainingProxyTime()

    OutDir = os.getcwd()

    parser = argparse.ArgumentParser(
        description=
        'This script lists datasets located at a RSE location. Futher patterns to find or exclude can be specified.',
        prog='ListDisk',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('-p',
                        '-P',
                        '--pattern',
                        help='specify a pattern which is part of dataset name',
                        nargs='+',
                        default=[])
    parser.add_argument(
Ejemplo n.º 8
0
def main():
    """Merge files from a list using the MergeClass in ClusterEngine."""
    RunOptions = getArgumentParser().parse_args()
    if RunOptions.fileListsFolder != "":
        if len(RunOptions.fileLists) > 0:
            logging.warning(
                'You gave both a folder containing filelists and separate filelists, will merge both!'
            )
        if not os.path.isdir(RunOptions.fileListsFolder):
            logging.error(' %s is not a directory, exiting...' %
                          RunOptions.fileListsFolder)
            sys.exit(1)
        for l in os.listdir(RunOptions.fileListsFolder):
            if not os.path.isdir('%s/%s' % (RunOptions.fileListsFolder, l)):
                RunOptions.fileLists.append('%s/%s' %
                                            (RunOptions.fileListsFolder, l))
    submit_engine = setup_engine(RunOptions)
    merging = [
        submit_engine.create_merge_interface(
            out_name=L[L.rfind("/") + 1:L.rfind(".")],
            files_to_merge=ReadListFromFile(L),
            files_per_job=RunOptions.nFilesPerJob,
            hold_jobs=RunOptions.HoldJob,
            final_split=RunOptions.remainingSplit)
        for L in RunOptions.fileLists
    ]
    ### Rucio lists
    if len(RunOptions.RucioDSList) > 0:
        CheckRucioSetup()
        CheckRemainingProxyTime()
        #### Check that we can actually obtain the datasets
        if len(RunOptions.RucioRSE) == 0 and not RunOptions.download:
            logging.error(
                "Please specifiy either the RSE on which the datasets are stored via --RucioRSE or activate the download option"
            )
            exit(1)

        ds_to_merge = ReadListFromFile(RunOptions.RucioDSList)
        download_dir = submit_engine.tmp_dir() + "TMP_DOWNLOAD/"
        if RunOptions.download:
            downloadDataSets(InputDatasets=ds_to_merge,
                             Destination=download_dir,
                             RSE=RunOptions.RucioRSE,
                             use_singularity=False)

        to_wait = []
        hold_jobs = []
        for ds in ds_to_merge:
            ds_name = ds[ds.find(":") + 1:]
            if RunOptions.batch_size <= 0:
                merging += [
                    submit_engine.create_merge_interface(
                        out_name=ds_name,
                        files_to_merge=GetDataSetFiles(dsname=ds,
                                                       RSE=RunOptions.RucioRSE,
                                                       protocols="root")
                        if not RunOptions.download else [
                            download_dir + ds_name + "/" + x
                            for x in os.listdir(download_dir + ds_name)
                        ],
                        files_per_job=RunOptions.nFilesPerJob,
                        hold_jobs=RunOptions.HoldJob + hold_jobs,
                        final_split=RunOptions.remainingSplit)
                ]

            else:
                merging += [
                    DataSetFileHandler(rucio_container=ds,
                                       dest_rse=RunOptions.RucioRSE,
                                       download=RunOptions.download,
                                       merge=True,
                                       download_dir=download_dir,
                                       destination_dir=submit_engine.out_dir(),
                                       cluster_engine=submit_engine,
                                       max_merged_size=RunOptions.batch_size *
                                       1024 * 1024 * 1024,
                                       hold_jobs=RunOptions.HoldJob +
                                       hold_jobs,
                                       files_per_merge_job=2)
                ]
            to_wait += [submit_engine.subjob_name(merging[-1].job_name())]
            if len(to_wait) % 5 == 0:
                hold_jobs = [w for w in to_wait]
                to_wait = []
    for merge in merging:
        merge.submit_job()

    clean_hold = [
        submit_engine.subjob_name(merge.job_name()) for merge in merging
    ]

    submit_engine.submit_clean_all(clean_hold)
    submit_engine.finish()