Exemple #1
0
def list_file_replicas(run_number, dtype, hash, rse='UC_DALI_USERDISK'):

#    db = ConnectMongoDB()
    rc = RucioSummoner(helper.get_hostconfig("rucio_backend"))

#    print("Looking for run "+str(run_number)+", data type "+dtype+", hash "+hash+", in rse="+rse)

    # checks if run is present in run database
    # this will improve the reaction speed in case the run is not existing
    # since we do not call Rucio commands
#    cursor = db.GetRunByNumber(run_number)
#    if len(cursor)==0:
#        print("Error. Run not existing in database")
#        return list()

    # build did
    did = make_did(run_number, dtype, hash)

    file_replicas = {}

    # check if the did esists in the given rse
    if rc.CheckRule(did, rse) != 'OK':
#        print("Error. Not found in this rse")
        return list()

    file_replicas = rc.ListFileReplicas(did,rse,localpath=True)

    return list(file_replicas.values())
Exemple #2
0
    def _raw_data_exists(self, raw_type='raw_records'):
        """Property that returns a boolean for whether or not raw data exists in rucio"""
        h = self.hashes.get(raw_type)
        if not h:
            raise ValueError(f"Dtype {raw_type} does not exist for the context in question")
        # check rucio
        did = make_did(self.number, raw_type, h)
        scope, name = did.split(':')

        # returns a generator
        rules = RUCIO_CLIENT.list_did_rules(scope, name)

        rules = [r['rse_expression'] for r in rules if r['state'] == 'OK' and r['locks_ok_cnt'] > 0]
        rules = [r for r in rules if 'TAPE' not in r and r != 'LNGS_USERDISK']
        return len(rules) > 0
Exemple #3
0
def download(number, dtype, hash, chunks=None, location='.',  tries=3, metadata=True,
             num_threads=8, **kwargs):
    """Function download()

    Downloads a given run number using rucio
    :param number: A run number (integer)
    :param dtype: The datatype to download.
    :param chunks: List of integers representing the desired chunks. If None, the whole run will be downloaded.
    :param location: String for the path where you want to put the data. Defaults to current directory.
    :param tries: Integer specifying number of times to try downloading the data. Defaults to 2.
    :param version: Context version as listed in the data_hashes collection
    :param kwargs: Keyword args passed to DownloadDids
    """

    # setup rucio client
    rc = RucioSummoner()

    # get DID
    did = make_did(number, dtype, hash)

    # if we didn't pass an rse, determine the best one
    rse = kwargs.pop('rse', None)
    if not rse:
        # determine which rses this did is on
        rules = rc.ListDidRules(did)
        rses = []
        for r in rules:
            if r['state'] == 'OK':
                rses.append(r['rse_expression'])
        # find closest one, otherwise start at the US end at TAPE
        glidein_region = os.environ.get('GLIDEIN_Country', 'US')
        rse = determine_rse(rses, glidein_region)

    if chunks:
        dids = []
        for c in chunks:
            cdid = did + '-' + str(c).zfill(6)
            dids.append(cdid)
        # also download metadata
        if metadata:
            dids.append(did + '-metadata.json')

    else:
        dids = [did]

    # rename the folder that will be downloaded
    path = did.replace(':', '-')
    # drop the xnt at the beginning
    path = path.replace('xnt_', '')

    location = os.path.join(location, path)
    os.makedirs(location, exist_ok=True)

    # TODO check if files already exist?
    print(did)

    print(f"Downloading {did} from {rse}")

    _try = 1
    success = False

    while _try <= tries and not success:
        if _try == tries:
            rse = None
        result = rc.DownloadDids(dids, download_path=location, no_subdir=True, rse=rse,
                                 num_threads=num_threads, **kwargs)
        if isinstance(result, int):
            print(f"Download try #{_try} failed.")
            time.sleep(5 ** _try)
            _try += 1
        else:
            success = True

    if success:
        print(f"Download successful to {location}")
Exemple #4
0
def main():
    parser = argparse.ArgumentParser(description="Combine strax output")
    parser.add_argument('dataset', help='Run number', type=int)
    parser.add_argument('dtype', help='dtype to combine')
    parser.add_argument('--context', help='Strax context')
    parser.add_argument('--input', help='path where the temp directory is')
    parser.add_argument('--rse', help='RSE to upload to')
    parser.add_argument('--cmt', help='CMT global version')
    parser.add_argument('--update-db',
                        help='flag to update runsDB',
                        dest='update_db',
                        action='store_true')
    parser.add_argument('--upload-to-rucio',
                        help='flag to upload to rucio',
                        dest='upload_to_rucio',
                        action='store_true')

    args = parser.parse_args()

    runid = args.dataset
    runid_str = "%06d" % runid
    dtype = args.dtype
    path = args.input

    final_path = 'finished_data'

    # get context
    st = getattr(straxen.contexts, args.context)()
    st.storage = [
        strax.DataDirectory('./'),
        strax.DataDirectory(final_path)  # where we are copying data to
    ]
    apply_global_version(st, args.cmt)

    # check what data is in the output folder
    dtypes = [d.split('-')[1] for d in os.listdir(path)]

    if 'records' in dtypes:
        plugin_levels = ['records', 'peaklets']
    else:
        plugin_levels = ['peaklets']

    # merge
    for dtype in plugin_levels:
        print(f"Merging {dtype} level")
        merge(runid_str, dtype, st, path)

    print(f"Current contents of {final_path}:")
    print(os.listdir(final_path))

    # now upload the merged metadata
    # setup the rucio client(s)
    if not args.upload_to_rucio:
        print("Ignoring rucio upload. Exiting")
        return

    # need to patch the storage one last time
    st.storage = [strax.DataDirectory(final_path)]

    updonkey = UploadClient()
    donkey = Client()

    for this_dir in os.listdir(final_path):
        # prepare list of dicts to be uploaded
        _run, keystring, straxhash = this_dir.split('-')
        dataset_did = make_did(runid, keystring, straxhash)
        scope, dset_name = dataset_did.split(':')

        files = os.listdir(os.path.join(final_path, this_dir))
        to_upload = []
        existing_files = [
            f for f in donkey.list_dids(scope, {'type': 'file'}, type='file')
        ]
        existing_files = [f for f in existing_files if dset_name in f]

        try:
            existing_files_in_dataset = [
                f['name'] for f in donkey.list_files(scope, dset_name)
            ]
        except rucio.common.exception.DataIdentifierNotFound:
            existing_files_in_dataset = []

        # for some reason files get uploaded but not attached correctly
        need_attached = list(
            set(existing_files) - set(existing_files_in_dataset))

        if len(need_attached) > 0:
            dids_to_attach = [
                dict(scope=scope, name=name) for name in need_attached
            ]

            donkey.attach_dids(scope, dset_name, dids_to_attach)

        for f in files:
            if f in existing_files:
                print(f"Skipping {f} since it is already uploaded")
                continue

            this_path = os.path.join(final_path, this_dir, f)
            d = dict(path=this_path,
                     did_scope=scope,
                     did_name=f,
                     dataset_scope=scope,
                     dataset_name=dset_name,
                     rse=args.rse,
                     register_after_upload=True)
            to_upload.append(d)

        # now do the upload!
        if len(to_upload) == 0:
            print(f"No files to upload for {this_dir}")
            continue

        # now do the upload!
        try:
            updonkey.upload(to_upload)
        except:
            print(f'Upload of {keystring} failed')
            raise
        print(
            f"Upload of {len(files)} files in {this_dir} finished successfully"
        )
        for f in files:
            print(f"{scope}:{f}")

        # now check the rucio data matche what we expect
        rucio_files = [f for f in donkey.list_files(scope, dset_name)]

        # how many chunks?
        md = st.get_meta(runid_str, keystring)

        expected_chunks = len([c for c in md['chunks'] if c['n'] > 0])

        # we should have n+1 files in rucio (counting metadata)
        if len(rucio_files) != expected_chunks + 1:
            # we're missing some data, uh oh
            successful_chunks = set(
                [int(f['name'].split('-')[-1]) for f in rucio_files])
            expected_chunks = set(np.arange(expected_chunks))

            missing_chunks = expected_chunks - successful_chunks

            missing_chunk_str = '/n'.join(missing_chunks)
            raise RuntimeError(
                f"File mismatch! There are {len(rucio_files)} but the metadata thinks there "
                f"should be {expected_chunks} chunks + 1 metadata. "
                f"The missing chunks are:\n{missing_chunk_str}")

        chunk_mb = [chunk['nbytes'] / (1e6) for chunk in md['chunks']]
        data_size_mb = np.sum(chunk_mb)
        avg_data_size_mb = np.mean(chunk_mb)

        # let's do one last check of the rule
        rc = RucioSummoner()

        rses = [args.rse]
        if (keystring not in ['records', 'veto_regions', 'pulse_counts']
                and "UC_DALI_USERDISK" not in rses):
            rses.append('UC_DALI_USERDISK')

        for rse in rses:
            rule = rc.GetRule(dataset_did, rse)
            if rule['state'] == 'OK':
                status = 'transferred'
            elif rule['state'] == 'REPLICATING':
                status = 'transferring'
            else:
                status = 'error'

            if args.update_db:
                # update runDB
                new_data_dict = dict()
                new_data_dict['location'] = rse
                new_data_dict['did'] = dataset_did
                new_data_dict['status'] = status
                new_data_dict['host'] = "rucio-catalogue"
                new_data_dict['type'] = keystring
                new_data_dict['protocol'] = 'rucio'
                new_data_dict['creation_time'] = datetime.datetime.utcnow(
                ).isoformat()
                new_data_dict['creation_place'] = "OSG"
                #new_data_dict['file_count'] = file_count
                new_data_dict['meta'] = dict(  #lineage=plugin.lineage_hash,
                    avg_chunk_mb=avg_data_size_mb,
                    file_count=len(rucio_files),
                    size_mb=data_size_mb,
                    strax_version=strax.__version__,
                    straxen_version=straxen.__version__)

                db.update_data(runid, new_data_dict)

                print(f"Database updated for {keystring} at {rse}")
            else:
                print("Skipping database update.")

        # if everything is good, let's close the dataset
        # this will make it so no more data can be added to this dataset
        if status == 'transferred':
            try:
                donkey.close(scope, dset_name)
            except:
                print(f"Closing {scope}:{dset_name} failed")
Exemple #5
0
    def showrun(self, arg_number, arg_to, arg_dtypes, arg_compact,
                arg_dumpjson, arg_status, arg_latest, arg_pending):

        #Define data types
        RAW_RECORDS_TPC_TYPES = helper.get_hostconfig(
        )['raw_records_tpc_types']
        RAW_RECORDS_MV_TYPES = helper.get_hostconfig()['raw_records_mv_types']
        RAW_RECORDS_NV_TYPES = helper.get_hostconfig()['raw_records_nv_types']
        LIGHT_RAW_RECORDS_TPC_TYPES = helper.get_hostconfig(
        )['light_raw_records_tpc_types']
        LIGHT_RAW_RECORDS_MV_TYPES = helper.get_hostconfig(
        )['light_raw_records_mv_types']
        LIGHT_RAW_RECORDS_NV_TYPES = helper.get_hostconfig(
        )['light_raw_records_nv_types']
        HIGH_LEVEL_TYPES = helper.get_hostconfig()['high_level_types']
        RECORDS_TYPES = helper.get_hostconfig()['records_types']

        #Get other parameters
        DATADIR = helper.get_hostconfig()['path_data_to_upload']
        RSES = helper.get_hostconfig()['rses']

        minimum_number_acceptable_rses = 2
        minimum_deltadays_allowed = 3

        # Storing some backup hashes in case DID information is not available
        bkp_hashes = {
            'raw_records': 'rfzvpzj4mf',
            'raw_records_he': 'rfzvpzj4mf',
            'raw_records_mv': 'rfzvpzj4mf',
            'raw_records_aqmon': 'rfzvpzj4mf',
            'records': '56ausr64s7',
            'lone_hits': 'b7dgmtzaef'
        }

        context = 'xenonnt_online'

        #Init the runDB
        db = ConnectMongoDB()

        #Init Rucio for later uploads and handling:
        rc = RucioSummoner(helper.get_hostconfig("rucio_backend"))
        rc.SetRucioAccount(helper.get_hostconfig('rucio_account'))
        rc.SetConfigPath(helper.get_hostconfig("rucio_cli"))
        rc.SetProxyTicket(helper.get_hostconfig('rucio_x509'))
        rc.SetHost(helper.get_hostconfig('host'))
        rc.ConfigHost()
        rc.SetProxyTicket("rucio_x509")

        data_types = RAW_RECORDS_TPC_TYPES + RAW_RECORDS_MV_TYPES + RAW_RECORDS_NV_TYPES + LIGHT_RAW_RECORDS_TPC_TYPES + LIGHT_RAW_RECORDS_MV_TYPES + LIGHT_RAW_RECORDS_NV_TYPES + HIGH_LEVEL_TYPES + RECORDS_TYPES

        # if arg_number has been given
        if arg_number != "":

            # if the "number" argument is a number, it is converted as integer
            if arg_number.isdigit():
                arg_number = int(arg_number)
            # otherwise it is assumed that a DID has been given and run number and other parameters are extracted from the DID
            else:
                arg_number, dtype, hash = get_did(arg_number)
                arg_dtypes = [dtype]

        # if no arg_number has been given, then the "latest" option is activated (with 5 run numbers by default) in compact modality
        else:
            if arg_latest == 0:
                arg_latest = 5
                arg_compact = True

        if arg_latest > 0:
            cursor = db.db.find({}).sort('number', pymongo.DESCENDING).limit(1)
            cursor = list(cursor)
            arg_to = cursor[0]['number']
            arg_number = arg_to - arg_latest + 1
            print('Processing latest {0} runs'.format(arg_latest))

        if arg_to > arg_number:
            cursor = db.db.find({
                'number': {
                    '$gte': arg_number,
                    '$lte': arg_to
                }
            }).sort('number', pymongo.ASCENDING)
            print('Runs that will be processed are from {0} to {1}'.format(
                arg_number, arg_to))
        else:
            cursor = db.db.find({'number': arg_number})

        print('Run that will be processed is {0}'.format(arg_number))
        cursor = list(cursor)

        # Runs over all listed runs
        for run in cursor:

            print("")

            # Gets run number
            number = run['number']
            print('Run: {0}'.format(number))

            # Gets the status
            if 'status' in run:
                print('Status: {0}'.format(run['status']))
            else:
                print('Status: {0}'.format('Not available'))

            if arg_status:
                continue

            # Extracts the correct Event Builder machine who processed this run
            # Then also the bootstrax state and, in case it was abandoned, the reason
            if 'bootstrax' in run:
                bootstrax = run['bootstrax']
                eb = bootstrax['host'].split('.')[0]
                print('Processed by: {0}'.format(eb))
                if 'state' in bootstrax:
                    print('Bootstrax state: {0}'.format(bootstrax['state']))
                    if bootstrax['state'] == 'abandoned':
                        if 'reason' in bootstrax:
                            print('Reason: {0}'.format(bootstrax['reason']))
            else:
                print('Not processed')

            # Gets the date
            if 'start' in run:
                start_time = run['start'].replace(tzinfo=timezone.utc)
                print("Date: ", start_time.astimezone(tz=None))

                # Calculates the duration
                if 'end' in run:
                    if run['end'] is not None:
                        end_time = run['end'].replace(tzinfo=timezone.utc)
                        duration = end_time - start_time
                        print("Duration: ", duration)
                    else:
                        print("Duration: ", "unknown")

                # Prints if run is still enough recent (three days from now)
                now_time = datetime.now().replace(tzinfo=timezone.utc)
                delta_time = now_time - start_time
                if delta_time < timedelta(days=minimum_deltadays_allowed):
                    print("Less than {0} days old".format(
                        minimum_deltadays_allowed))
            else:
                print("Warning : no time info available")

            # Gets the comments
            if 'comments' in run:
                if len(run['comments']) > 0:
                    last_comment = run['comments'][-1]
                    print("Latest comment ({0}): {1}".format(
                        last_comment['user'], last_comment['comment']))

            # Dumps the entire rundoc under json format
            if arg_dumpjson:
                print(dumps(run, indent=4))

            if arg_compact:
                continue

            # Merges data and deleted_data

    #        if 'deleted_data' in run:
    #            data = run['data'] + run['deleted_data']
    #        else:
            data = run['data']

            # Check is there are more instances in more EventBuilders
            extra_ebs = set()
            for d in data:
                if 'eb' in d['host'] and eb not in d['host']:
                    extra_ebs.add(d['host'].split('.')[0])
            if len(extra_ebs) > 0:
                print(
                    '\t\t Warning : The run has been processed by more than one EventBuilder: {0}'
                    .format(extra_ebs))

            # Runs over all data types to be monitored
            for dtype in data_types:

                if len(arg_dtypes) > 0:
                    if dtype not in arg_dtypes:
                        continue

                # Take the official number of files accordingto run DB
                # and the eb status
                Nfiles = -1
                ebstatus = ""
                for d in data:
                    if d['type'] == dtype and eb in d['host']:
                        if 'file_count' in d:
                            Nfiles = d['file_count']
                        if 'status' in d:
                            ebstatus = d['status']

                if arg_pending:
                    if ebstatus in ["", "transferred"]:
                        continue

                # Data type name
                print('{0}'.format(dtype))

                if Nfiles == -1:
                    print('\t Number of files: missing in DB')
                else:
                    print('\t Number of files: {0}'.format(Nfiles))

                if ebstatus != "":
                    print('\t EB status: {0}'.format(ebstatus))
                else:
                    print('\t EB status: not available')

                # Check if data are still in the data list and not in deleted_data
                DB_InEB = False
                for d in run['data']:
                    if d['type'] == dtype and eb in d['host']:
                        DB_InEB = True
                DB_NotInEB = False
                if 'deleted_data' in run:
                    for d in run['deleted_data']:
                        if d['type'] == dtype and eb in d['host']:
                            DB_NotInEB = True
                if DB_InEB and not DB_NotInEB:
                    print('\t DB : still in EB')
                if not DB_InEB and DB_NotInEB:
                    print('\t DB : deleted from EB')
                if DB_InEB and DB_NotInEB:
                    print(
                        '\t\t Incoherency in DB: it is both in data list and in deleted_data list'
                    )
                #if (DB_InEB and DB_NotInEB) or (not DB_InEB and not DB_NotInEB):
                #  print('\t\t incoherency in DB: it is neither in data list nor in deleted_data list')

                # Check if data are still in the EB disks without using the DB
                upload_path = ""
                for d in run['data']:
                    if d['type'] == dtype and eb in d['host']:
                        file = d['location'].split('/')[-1]
                        upload_path = os.path.join(DATADIR, eb, file)
                path_exists = os.path.exists(upload_path)
                if upload_path != "" and path_exists:
                    path, dirs, files = next(os.walk(upload_path))
                    print('\t Disk: still in EB disk and with', len(files),
                          'files')
                else:
                    print('\t Disk: not in EB disk')
                if DB_InEB and not path_exists:
                    print(
                        '\t\t Incoherency in DB and disk: it is in DB data list but it is not in the disk'
                    )
                if DB_NotInEB and path_exists:
                    print(
                        '\t\t Incoherency in DB and disk: it is in DB deleted_data list but it is still in the disk'
                    )

                # The list of DIDs (usually just one)
                dids = set()
                for d in data:
                    if d['type'] == dtype and d['host'] == 'rucio-catalogue':
                        if 'did' in d:
                            dids.add(d['did'])
                print('\t DID:', dids)

                # Check the presence in each available RSE
                Nrses = 0
                for rse in RSES:
                    is_in_rse = False
                    for d in run['data']:
                        if d['type'] == dtype and rse in d['location']:
                            if 'status' in d:
                                status = d['status']
                            else:
                                status = 'Not available'
                            if 'did' in d:
                                hash = d['did'].split('-')[-1]
                                did = d['did']
                            else:
                                print(
                                    '\t\t Warning : DID information is absent in DB data list (old admix version). Using standard hashes for RSEs'
                                )
                                #hash = bkp_hashes.get(dtype)
                                #hash = utilix.db.get_hash(context, dtype)
                                hash = db.GetHashByContext(context, dtype)
                                did = make_did(number, dtype, hash)
                            rucio_rule = rc.GetRule(upload_structure=did,
                                                    rse=rse)
                            files = list_file_replicas(number, dtype, hash,
                                                       rse)
                            if rucio_rule['exists']:
                                print('\t', rse + ': DB Yes, Status', status,
                                      ', Rucio Yes, State',
                                      rucio_rule['state'], ",", len(files),
                                      'files')
                                if len(files) < Nfiles and rucio_rule[
                                        'state'] != "REPLICATING":
                                    print(
                                        '\t\t Warning : Wrong number of files in Rucio!!!'
                                    )
                            else:
                                print('\t', rse + ': DB Yes, Status', status,
                                      ', Rucio No')
                            # print(files)
                            is_in_rse = True
                            Nrses += 1
                    if not is_in_rse:
                        #                    print('\t\t Warning : data information is absent in DB data list. Trying using standard hashes to query Rucio')
                        #                    hash = bkp_hashes.get(dtype)
                        #hash = utilix.db.get_hash(context, dtype)
                        hash = db.GetHashByContext(context, dtype)
                        did = make_did(number, dtype, hash)
                        print('\t Guessed DID:', did)
                        rucio_rule = rc.GetRule(upload_structure=did, rse=rse)
                        files = list_file_replicas(number, dtype, hash, rse)
                        if rucio_rule['exists']:
                            print('\t', rse + ': DB No, Rucio Yes, State',
                                  rucio_rule['state'], ",", len(files),
                                  'files')
                            if len(files) < Nfiles and rucio_rule[
                                    'state'] != "REPLICATING":
                                print(
                                    '\t\t Warning : Wrong number of files in Rucio!!!'
                                )
                        else:
                            print('\t', rse + ': DB No, Rucio No')
                print('\t Number of sites: ', Nrses)
Exemple #6
0
    def showdataset(self, run, datum):

        #print(dumps(datum, indent=4))

        # skip dataset if it does not have location
        if 'location' not in datum:
            print('Dataset: type {0} without location. Skipping'.format(
                datum['type']))
            return

        # Dataset name
        number = run['number']
        dtype = datum['type']
        hash = datum['location'].split('-')[-1]
        did = make_did(number, dtype, hash)
        print('Dataset: {0}'.format(did))

        # Event builder who treated it
        eb = datum['host'].split('.')[0]

        # Directory name
        directory = datum['location'].split('/')[-1]

        # Take the official number of files according to run DB
        Nfiles = -1
        if 'file_count' in datum:
            Nfiles = datum['file_count']
        if Nfiles == -1:
            print('\t Number of files: missing in DB')
        else:
            print('\t Number of files: {0}'.format(Nfiles))

        # Take the status of the EB dataset according to run DB
        ebstatus = ""
        if 'status' in datum:
            ebstatus = datum['status']
        if ebstatus != "":
            print('\t EB status: {0}'.format(ebstatus))
        else:
            print('\t EB status: not available')

        # Check if there are double entries in the DB
        Copies = 0
        for d in run['data']:
            if d['type'] == dtype and eb in d['host'] and hash in d['location']:
                Copies = Copies + 1
        if Copies > 1:
            print('\t\t Warning {0}: EB datum has a double entry in the DB'.
                  format(did))

        # Check if there are other entries in the deleted_data (even with different EBs)
        #DeletedCopies = []
        #for d in run['deleted_data']:
        #    if d['type'] == dtype and hash in d['location']:
        #        DeletedCopies.append(d['host'].split('.')[0])
        #if len(DeletedCopies)>0:
        #    print('\t Previously deleted data processed with those EBs: {0}'.format(DeletedCopies))

        # Read the real number of files present in EB disks
        upload_path = os.path.join(self.DATADIR, eb, directory)
        path_exists = os.path.exists(upload_path)
        Nfiles_disk = 0
        if path_exists:
            path, dirs, files = next(os.walk(upload_path))
            Nfiles_disk = len(files)

        # If data are supposed to be (according to DB) still present in EB, check if they are there
        if datum in run['data']:
            print('\t Still in EB')
            if Nfiles_disk != Nfiles:
                print(
                    '\t\t Warning {0}: number of files in EB disk ({1}) does not match with the DB info ({2})'
                    .format(did, Nfiles_disk, Nfiles))

        # Otherwise, if data are supposed to be (according to DB) deleted, check if they are really absent
        elif datum in run['deleted_data']:
            print('\t Deleted from EB')
            if Nfiles_disk > 0:
                print(
                    '\t\t Warning {0}: files are still in EB disk (nfiles={1}) while DB says they are deleted '
                    .format(did, Nfiles_disk))

        # Query rucio to see how many RSEs have those data
        rules = list(
            self.didclient.list_did_rules(
                did.split(':')[0],
                did.split(':')[1]))
        rses_with_data = []
        for rule in rules:
            rses_with_data.append(rule['rse_expression'])
        if len(rses_with_data) > 0:
            print('\t Rucio replicas in {0} RSEs : {1}'.format(
                len(rses_with_data), rses_with_data))
        else:
            print('\t No replicas in Rucio')

        # Check the presence of data in each available RSE and compatibility with DB

        # Step 1: prepare the dictionary
        rses = []
        for rse in self.RSES:
            r = {}
            r['name'] = rse
            rses.append(r)

        Nrses = 0

        # Step 2: filling the dictionary with RSEs info from DB and from Rucio
        for rse in rses:
            is_in_rse = False

            # Get info available in the DB
            rse['DBentries'] = 0
            rse['DBStatus'] = ""
            for d in run['data']:
                if 'rucio' in d['host']:
                    if d['did'] == did and rse['name'] in d['location']:
                        if 'status' in d:
                            rse['DBStatus'] = d['status']
                        rse['DBentries'] = rse['DBentries'] + 1

            # Get info available in Rucio
            rucio_rule = self.rc.GetRule(upload_structure=did, rse=rse['name'])
            #            files = list_file_replicas(number, dtype, hash, rse['name'])
            #            files = list(self.rc.ListFileReplicas(did,rse['name'],localpath=True).values())
            did_dictionary = [{
                'scope': did.split(':')[0],
                'name': did.split(':')[1]
            }]
            replicas = list(
                self.replicaclient.list_replicas(did_dictionary,
                                                 rse_expression=rse['name']))
            #print(dumps(replicas, indent=4))
            rse['RucioExists'] = rucio_rule['exists']
            rse['RucioNFiles'] = len(replicas)

        # Step 3: analysis of data
        for rse in rses:

            #print(rse)

            # analysis specific for uploading
            if rse['name'] == self.UPLOAD_TO:

                # Case 1 : loss of Rucio connection at the end of the upload before creating the rule
                if rse['RucioNFiles'] == Nfiles and not rse[
                        'RucioExists'] and rse['DBStatus'] == "" and rse[
                            'DBentries'] == 0 and len(rses_with_data) == 0:
                    print(
                        '\t\t Warning: files have been uploaded but the rule has not been created'
                    )
                    print(
                        '\t\t Hint: create the rule manually, then continue uploading, using the following three commands:'
                    )
                    print('\t\t\t rucio add-rule {0} 1 {1}'.format(
                        did, rse['name']))
                    print('\t\t\t admix-fix --fix_upload_db {0}'.format(did))
                    print('\t\t\t admix-fix --create_upload_rules {0}'.format(
                        did))
#                    os.system('rucio add-rule {0} 1 {1}'.format(did,rse['name']))
#                    os.system('~/.local/bin/admix-fix --fix_upload_db {0}'.format(did))
#                    os.system('~/.local/bin/admix-fix --create_upload_rules {0}'.format(did))

# Case 2 : loss of Rucio connection at the end of the upload before updating the DB
                if rse['RucioNFiles'] == Nfiles and rse['RucioExists'] and rse[
                        'DBStatus'] == "" and rse['DBentries'] == 0 and len(
                            rses_with_data) == 1:
                    print(
                        '\t\t Warning: the upload is completed, but DB needs to be updated and rules have to be created abroad'
                    )
                    print('\t\t Hint: fix it manually with the two commands:')
                    print('\t\t\t admix-fix --fix_upload_db {0}'.format(did))
                    print('\t\t\t admix-fix --create_upload_rules {0}'.format(
                        did))
#                    os.system('~/.local/bin/admix-fix --fix_upload_db {0}'.format(did))
#                    os.system('~/.local/bin/admix-fix --create_upload_rules {0}'.format(did))

# Case 3 : loss of Rucio connection at the end of the upload before creating the rules abroad
                if rse['RucioNFiles'] == Nfiles and rse['RucioExists'] and rse[
                        'DBStatus'] == "transferred" and rse[
                            'DBentries'] == 1 and len(rses_with_data) == 1:
                    print(
                        '\t\t Warning: the upload is completed and the DB updated, but rules have to be created abroad'
                    )
                    print('\t\t Hint: fix it manually with the command:')
                    print('\t\t\t admix-fix --create_upload_rules {0}'.format(
                        did))
#                    os.system('~/.local/bin/admix-fix --create_upload_rules {0}'.format(did))

# Case 4 : data still to be uploaded but the value if the EB status is not empty so admix cannot upload it
                if rse['RucioNFiles'] == 0 and not rse['RucioExists'] and rse[
                        'DBStatus'] == "" and rse['DBentries'] == 0 and len(
                            rses_with_data) == 0 and ebstatus not in [
                                "", "transferred"
                            ]:
                    print(
                        '\t\t Warning: the upload never started but the EB status is not empty, hence admix cannot upload it'
                    )
                    print(
                        '\t\t Hint: fix it manually with the following command to allow admix upload manager to take care of it:'
                    )
                    print(
                        '\t\t\t admix-fix --set_eb_status {0} eb_ready_to_upload'
                        .format(did))
#                    os.system('~/.local/bin/admix-fix --set_eb_status {0} eb_ready_to_upload'.format(did))

# Case 4 : data still to be uploaded but the value if the EB status is not empty so admix cannot upload it
                if rse['RucioNFiles'] == Nfiles and rse['RucioExists'] and rse[
                        'DBStatus'] == "transferred" and rse[
                            'DBentries'] == 1 and len(
                                rses_with_data) > 0 and ebstatus not in [
                                    "", "transferred"
                                ]:
                    print(
                        '\t\t Warning: the upload is completed and there are also copies abroad'
                    )
                    print(
                        '\t\t Hint: fix it manually with the command below to flag the EB datum as transferred:'
                    )
                    print('\t\t\t admix-fix --set_eb_status {0} transferred'.
                          format(did))
#                    os.system('~/.local/bin/admix-fix --set_eb_status {0} transferred'.format(did))

# Case 5 : data still to be uploaded but the value if the EB status is not empty so admix cannot upload it
                if rse['RucioNFiles'] != Nfiles and rse['RucioExists'] and rse[
                        'DBStatus'] == "" and rse['DBentries'] == 0 and len(
                            rses_with_data
                        ) == 1 and ebstatus == "transferring":
                    print(
                        '\t\t Warning: the upload has been interrupted during the copy'
                    )
                    print(
                        '\t\t Hint: fix it manually with the command below to resume the upload:'
                    )
                    print('\t\t\t admix-fix --fix_upload {0}'.format(did))

            # analysis for all RSEs other than datamanager
            else:

                if not ((rse['RucioNFiles'] == Nfiles and rse['RucioExists']
                         and rse['DBentries'] == 1
                         and rse['DBStatus'] == 'transferred') or
                        (rse['RucioNFiles'] == 0 and not rse['RucioExists']
                         and rse['DBentries'] == 0
                         and rse['DBStatus'] != 'transferred')):
                    print(
                        '\t\t Warning {0}: data in RSE {1} are inconsistent:'.
                        format(did, rse['name']))
                    print('\t\t ', rse)
Exemple #7
0
def main():
    parser = argparse.ArgumentParser(description="Upload combined output to rucio")
    parser.add_argument('dataset', help='Run number', type=int)
    parser.add_argument('dtype', help='dtype to upload')
    parser.add_argument('rse', help='Target RSE')
    parser.add_argument('--context', help='Strax context')

    args = parser.parse_args()

    tmp_path = tempfile.mkdtemp()


    runid = args.dataset
    runid_str = "%06d" % runid
    dtype = args.dtype
    rse = args.rse

    # get context
    st = eval(f'straxen.contexts.{args.context}()')
    st.storage = [strax.DataDirectory(tmp_path)]

    plugin = st._get_plugins((dtype,), runid_str)[dtype]

    rc = RucioSummoner()

    for keystring in plugin.provides:
        key = strax.DataKey(runid_str, keystring, plugin.lineage)
        hash = key.lineage_hash
        # TODO check with utilix DB call that the hashes match?

        dirname = f"{runid_str}-{keystring}-{hash}"
        upload_path = os.path.join('combined', dirname)


        print(f"Uploading {dirname}")
        os.listdir(upload_path)

        # make a rucio DID
        did = make_did(runid, keystring, hash)

        # check if a rule already exists for this DID
        rucio_rule = rc.GetRule(upload_structure=did)

        # if not in rucio already and no rule exists, upload into rucio
        if not rucio_rule['exists']:
            result = rc.Upload(did,
                               upload_path,
                               rse,
                               lifetime=None)

            # check that upload was successful
            new_rule = rc.GetRule(upload_structure=did, rse=rse)

            # TODO check number of files

            new_data_dict={}
            new_data_dict['location'] = rse
            new_data_dict['did'] = did
            new_data_dict['status'] = "transferred"
            new_data_dict['host'] = "rucio-catalogue"
            new_data_dict['type'] = keystring
            new_data_dict['lifetime'] = new_rule['expires'],
            new_data_dict['protocol'] = 'rucio'
            new_data_dict['creation_time'] = datetime.datetime.utcnow().isoformat()
            new_data_dict['checksum'] = 'shit'
            db.update_data(runid, new_data_dict)
        else:
            print(f"Rucio rule already exists for {did}")
Exemple #8
0
def main():
    parser = ArgumentParser()
    parser.add_argument('runid', type=int, help='Run number')
    parser.add_argument('--dtype', help='dtype', required=True)
    parser.add_argument('--context', help='Context name', required=True)
    parser.add_argument('--rse', help='RSE to create replication rule at')
    parser.add_argument('--cmt', help='Global CMT version', default='ONLINE')
    parser.add_argument('--update-db',
                        help='flag to update runsDB',
                        dest='update_db',
                        action='store_true')
    parser.add_argument('--upload-to-rucio',
                        help='flag to upload to rucio',
                        dest='upload_to_rucio',
                        action='store_true')

    args = parser.parse_args()

    runid = args.runid
    runid_str = "%06d" % runid
    dtype = args.dtype

    dtypes = ['records', 'peaklets']

    # setup rucio client
    C = Client()

    # get context
    st = getattr(straxen.contexts, args.context)()

    # apply global version
    apply_global_version(st, args.cmt)

    for dtype in dtypes:

        # initialize plugin needed for processing this output type
        plugin = st._get_plugins((dtype, ), runid_str)[dtype]

        st._set_plugin_config(plugin, runid_str, tolerant=False)
        plugin.setup()

        for _dtype in plugin.provides:
            hash = get_hashes(st)[_dtype]

            # need to create the dataset we will be uploading data to out on the grid
            dataset = make_did(args.runid, _dtype, hash)
            scope, name = dataset.split(':')

            # check if this dataset exists
            existing_datasets = [
                i for i in C.list_dids(scope, filters=dict(type='dataset'))
            ]

            if name not in existing_datasets:
                C.add_dataset(scope, name)
                print(f"Dataset {dataset} created")
            else:
                print(f"Warning: The dataset {dataset} already exists!")
                #raise ValueError(f"The dataset {dataset} already exists!")

            #check if a rule already exists
            existing_rules = [
                i['rse_expression'] for i in C.list_did_rules(scope, name)
            ]

            # update runDB
            new_data_dict = dict()
            new_data_dict['location'] = args.rse
            new_data_dict['did'] = dataset
            new_data_dict['status'] = 'processing'
            new_data_dict['host'] = "rucio-catalogue"
            new_data_dict['type'] = _dtype
            new_data_dict['protocol'] = 'rucio'
            new_data_dict['creation_time'] = datetime.datetime.utcnow(
            ).isoformat()
            new_data_dict['creation_place'] = "OSG"
            new_data_dict['meta'] = dict(lineage=None,
                                         avg_chunk_mb=None,
                                         file_count=None,
                                         size_mb=None,
                                         strax_version=strax.__version__,
                                         straxen_version=straxen.__version__)

            if args.rse not in existing_rules:
                # 1 is the number of copies
                if args.upload_to_rucio:
                    C.add_replication_rule([dict(scope=scope, name=name)], 1,
                                           args.rse)
                    print(f"Replication rule at {args.rse} created")

                if args.update_db:
                    db.update_data(runid, new_data_dict)

                # send peaklets data to dali
                if dtype == 'peaklets' and args.rse != 'UC_DALI_USERDISK':
                    if args.upload_to_rucio:
                        C.add_replication_rule(
                            [dict(scope=scope, name=name)],
                            1,
                            'UC_DALI_USERDISK',
                            source_replica_expression=args.rse,
                            priority=5)
Exemple #9
0
def main():

    parser = argparse.ArgumentParser(description="Strax Processing With Outsource")
    parser.add_argument('dataset', help='Run number', type=int)
    parser.add_argument('--output', help='desired strax(en) output')
    parser.add_argument('--context', help='name of context')
    parser.add_argument('--chunks', nargs='*', help='chunk ids to download')
    parser.add_argument('--rse', type=str, default="UC_OSG_USERDISK")
    parser.add_argument('--cmt', type=str, default='ONLINE')
    parser.add_argument('--upload-to-rucio', action='store_true', dest='upload_to_rucio')
    parser.add_argument('--update-db', action='store_true', dest='update_db')
    parser.add_argument('--download-only', action='store_true', dest='download_only')
    parser.add_argument('--no-download', action='store_true', dest='no_download')

    args = parser.parse_args()

    # directory where we will be putting everything
    data_dir = './data'

    # make sure this is empty
    # if os.path.exists(data_dir):
    #     rmtree(data_dir)

    # get context
    st = getattr(straxen.contexts, args.context)()
    st.storage = [strax.DataDirectory(data_dir)]

    apply_global_version(st, args.cmt)

    runid = args.dataset
    runid_str = "%06d" % runid
    out_dtype = args.output

    # determine which input dtypes we need
    bottom = 'peaklets' if args.chunks is None else 'raw_records'
    to_download = find_data_to_download(runid, out_dtype, st, bottom=bottom)

    if not args.no_download:
        t0 = time.time()
        # download all the required datatypes to produce this output file
        if args.chunks:
            for in_dtype, hash in to_download:
                # download the input data
                if not os.path.exists(os.path.join(data_dir, f"{runid:06d}-{in_dtype}-{hash}")):
                    admix.download(runid, in_dtype, hash, chunks=args.chunks, location=data_dir)
        else:

            for in_dtype, hash in to_download:
                if not os.path.exists(os.path.join(data_dir, f"{runid:06d}-{in_dtype}-{hash}")):
                    admix.download(runid, in_dtype, hash, location=data_dir)
    
        download_time = time.time() - t0 # seconds
        print(f"=== Download time (minutes): {download_time/60:0.2f}")

    # initialize plugin needed for processing this output type
    plugin = st._get_plugins((out_dtype,), runid_str)[out_dtype]
    st._set_plugin_config(plugin, runid_str, tolerant=False)
    plugin.setup()

    # figure out what plugins we need to process/initialize
    to_process = [args.output]
    downloaded = [dtype for dtype, _ in to_download]
    missing = set(plugin.depends_on) - set(downloaded)
    if len(missing) > 0:
        missing_str = ', '.join(missing)
        print(f"Need to create intermediate data: {missing_str}")
        to_process = list(missing) + to_process

    # keep track of the data we just downloaded -- will be important for the upload step later
    downloaded_data = os.listdir(data_dir)
    print("--Downloaded data--")
    for dd in downloaded_data:
        print(dd)
    print("-------------------\n")

    if args.download_only:
        sys.exit(0)

    print(f"To process: {', '.join(to_process)}")

    _tmp_path = tempfile.mkdtemp()
    for dtype in to_process:
        close_savers = dtype != args.output
        process(runid,
                dtype,
                st,
                args.chunks,
                close_savers=close_savers,
                tmp_path=_tmp_path
                )

    print("Done processing. Now check if we should upload to rucio")

    # now we move the tmpfiles back to main directory, if needed
    # this is for cases where we went from raw_records-->records-->peaklets in one go
    if os.path.exists(_tmp_path):
        for dtype_path_thing in os.listdir(_tmp_path):
            tmp_path = os.path.join(_tmp_path, dtype_path_thing)
            merged_dir = os.path.join(data_dir, dtype_path_thing.split('_temp')[0])

            for file in os.listdir(tmp_path):
                copyfile(os.path.join(tmp_path, file), os.path.join(merged_dir, file))

            os.rename(merged_dir, os.path.join(data_dir, dtype_path_thing))


    # initiate the rucio client
    upload_client = UploadClient()
    rucio_client = Client()

    # if we processed the entire run, we upload everything including metadata
    # otherwise, we just upload the chunks
    upload_meta = args.chunks is None

    # now loop over datatypes we just made and upload the data
    processed_data = [d for d in os.listdir(data_dir) if d not in downloaded_data]
    print("---- Processed data ----")
    for d in processed_data:
        print(d)
    print("------------------------\n")

    if not args.upload_to_rucio:
        print("Ignoring rucio upload. Exiting. ")
        return

    for dirname in processed_data:
        # get rucio dataset
        this_run, this_dtype, this_hash = dirname.split('-')
        if this_dtype in rechunk_dtypes:
            print(f"Skipping upload of {this_dtype} since we need to rechunk it")
            continue

        # remove the _temp if we are processing chunks in parallel
        if args.chunks is not None:
            this_hash = this_hash.replace('_temp', '')
        dataset = make_did(int(this_run), this_dtype, this_hash)

        scope, dset_name = dataset.split(':')

        files = [f for f in os.listdir(os.path.join(data_dir, dirname))]

        if not upload_meta:
            files = [f for f in files if not f.endswith('.json')]

            # check that the output number of files is what we expect
            if len(files) != len(args.chunks):
                processed_chunks = set([int(f.split('-')[-1]) for f in files])
                expected_chunks = set(args.chunks)
                missing_chunks = expected_chunks - processed_chunks
                missing_chunks = ' '.join(missing_chunks)
                raise RuntimeError("File mismatch! We are missing output data for the following chunks: "
                                   f"{missing_chunks}"
                                   )


        # if there are no files, we can't upload them
        if len(files) == 0:
            print(f"No files to upload in {dirname}. Skipping.")
            continue

        # get list of files that have already been uploaded
        # this is to allow us re-run workflow for some chunks
        try:
            existing_files = [f for f in rucio_client.list_dids(scope,
                                                                       {'type': 'file'},
                                                                        type='file')
                              ]
            existing_files = [f for f in existing_files if dset_name in f]

            existing_files_in_dataset = [f['name'] for f in rucio_client.list_files(scope, dset_name)]

            # for some reason files get uploaded but not attached correctly
            need_attached = list(set(existing_files) - set(existing_files_in_dataset))

            # only consider the chunks here
            need_attached = [f for f in need_attached if str(int(f.split('-')[-1])) in args.chunks]


            if len(need_attached) > 0:
                dids_to_attach = [dict(scope=scope, name=name) for name in need_attached]

                rucio_client.attach_dids(scope, dset_name, dids_to_attach)


        except rucio.common.exception.DataIdentifierNotFound:
            existing_files = []

        # prepare list of dicts to be uploaded
        to_upload = []


        for f in files:
            path = os.path.join(data_dir, dirname, f)
            if f in existing_files:
                print(f"Skipping {f} since it is already uploaded")
                continue

            print(f"Uploading {f}")
            d = dict(path=path,
                     did_scope=scope,
                     did_name=f,
                     dataset_scope=scope,
                     dataset_name=dset_name,
                     rse=args.rse,
                     register_after_upload=True
                     )
            to_upload.append(d)

        # skip upload for now

        # now do the upload!
        if len(to_upload) == 0:
            print(f"No files to upload for {dirname}")
            continue
        try:
            upload_client.upload(to_upload)
        except:
            print(f"Upload of {dset_name} failed for some reason")
            raise

        # TODO check rucio that the files are there?
        print(f"Upload of {len(files)} files in {dirname} finished successfully")

        # if we processed the whole thing, add a rule at DALI update the runDB here
        if args.chunks is None:
            rucio_client.add_replication_rule([dict(scope=scope, name=dset_name)], 1, 'UC_DALI_USERDISK',
                                                source_replica_expression=args.rse,
                                              priority=5)
            # skip if update_db flag is false
            if args.update_db:
                md = st.get_meta(runid_str, this_dtype)
                chunk_mb = [chunk['nbytes'] / (1e6) for chunk in md['chunks']]
                data_size_mb = np.sum(chunk_mb)
                avg_data_size_mb = np.mean(chunk_mb)

                # update runDB
                new_data_dict = dict()
                new_data_dict['location'] = args.rse
                new_data_dict['did'] = dataset
                new_data_dict['status'] = 'transferred'
                new_data_dict['host'] = "rucio-catalogue"
                new_data_dict['type'] = this_dtype
                new_data_dict['protocol'] = 'rucio'
                new_data_dict['creation_time'] = datetime.datetime.utcnow().isoformat()
                new_data_dict['creation_place'] = "OSG"
                new_data_dict['meta'] = dict(lineage=md.get('lineage'),
                                             avg_chunk_mb=avg_data_size_mb,
                                             file_count=len(files),
                                             size_mb=data_size_mb,
                                             strax_version=strax.__version__,
                                             straxen_version=straxen.__version__
                                             )

                db.update_data(runid, new_data_dict)
                print(f"Database updated for {this_dtype} at {args.rse}")

                # now update dali db entry
                rule = rc.GetRule(dataset, 'UC_DALI_USERDISK')
                if rule['state'] == 'OK':
                    status = 'transferred'
                elif rule['state'] == 'REPLICATING':
                    status = 'transferring'
                elif rule['state'] == 'STUCK':
                    status = 'stuck'
                new_data_dict['location'] = 'UC_DALI_USERDISK'
                new_data_dict['status'] = status
                db.update_data(runid, new_data_dict)

        # cleanup the files we uploaded
        # this is likely only done for records data because we will rechunk the others
        for f in files:
            print(f"Removing {f}")
            os.remove(os.path.join(data_dir, dirname, f))

    print("ALL DONE!")