def list_file_replicas(run_number, dtype, hash, rse='UC_DALI_USERDISK'): # db = ConnectMongoDB() rc = RucioSummoner(helper.get_hostconfig("rucio_backend")) # print("Looking for run "+str(run_number)+", data type "+dtype+", hash "+hash+", in rse="+rse) # checks if run is present in run database # this will improve the reaction speed in case the run is not existing # since we do not call Rucio commands # cursor = db.GetRunByNumber(run_number) # if len(cursor)==0: # print("Error. Run not existing in database") # return list() # build did did = make_did(run_number, dtype, hash) file_replicas = {} # check if the did esists in the given rse if rc.CheckRule(did, rse) != 'OK': # print("Error. Not found in this rse") return list() file_replicas = rc.ListFileReplicas(did,rse,localpath=True) return list(file_replicas.values())
def _raw_data_exists(self, raw_type='raw_records'): """Property that returns a boolean for whether or not raw data exists in rucio""" h = self.hashes.get(raw_type) if not h: raise ValueError(f"Dtype {raw_type} does not exist for the context in question") # check rucio did = make_did(self.number, raw_type, h) scope, name = did.split(':') # returns a generator rules = RUCIO_CLIENT.list_did_rules(scope, name) rules = [r['rse_expression'] for r in rules if r['state'] == 'OK' and r['locks_ok_cnt'] > 0] rules = [r for r in rules if 'TAPE' not in r and r != 'LNGS_USERDISK'] return len(rules) > 0
def download(number, dtype, hash, chunks=None, location='.', tries=3, metadata=True, num_threads=8, **kwargs): """Function download() Downloads a given run number using rucio :param number: A run number (integer) :param dtype: The datatype to download. :param chunks: List of integers representing the desired chunks. If None, the whole run will be downloaded. :param location: String for the path where you want to put the data. Defaults to current directory. :param tries: Integer specifying number of times to try downloading the data. Defaults to 2. :param version: Context version as listed in the data_hashes collection :param kwargs: Keyword args passed to DownloadDids """ # setup rucio client rc = RucioSummoner() # get DID did = make_did(number, dtype, hash) # if we didn't pass an rse, determine the best one rse = kwargs.pop('rse', None) if not rse: # determine which rses this did is on rules = rc.ListDidRules(did) rses = [] for r in rules: if r['state'] == 'OK': rses.append(r['rse_expression']) # find closest one, otherwise start at the US end at TAPE glidein_region = os.environ.get('GLIDEIN_Country', 'US') rse = determine_rse(rses, glidein_region) if chunks: dids = [] for c in chunks: cdid = did + '-' + str(c).zfill(6) dids.append(cdid) # also download metadata if metadata: dids.append(did + '-metadata.json') else: dids = [did] # rename the folder that will be downloaded path = did.replace(':', '-') # drop the xnt at the beginning path = path.replace('xnt_', '') location = os.path.join(location, path) os.makedirs(location, exist_ok=True) # TODO check if files already exist? print(did) print(f"Downloading {did} from {rse}") _try = 1 success = False while _try <= tries and not success: if _try == tries: rse = None result = rc.DownloadDids(dids, download_path=location, no_subdir=True, rse=rse, num_threads=num_threads, **kwargs) if isinstance(result, int): print(f"Download try #{_try} failed.") time.sleep(5 ** _try) _try += 1 else: success = True if success: print(f"Download successful to {location}")
def main(): parser = argparse.ArgumentParser(description="Combine strax output") parser.add_argument('dataset', help='Run number', type=int) parser.add_argument('dtype', help='dtype to combine') parser.add_argument('--context', help='Strax context') parser.add_argument('--input', help='path where the temp directory is') parser.add_argument('--rse', help='RSE to upload to') parser.add_argument('--cmt', help='CMT global version') parser.add_argument('--update-db', help='flag to update runsDB', dest='update_db', action='store_true') parser.add_argument('--upload-to-rucio', help='flag to upload to rucio', dest='upload_to_rucio', action='store_true') args = parser.parse_args() runid = args.dataset runid_str = "%06d" % runid dtype = args.dtype path = args.input final_path = 'finished_data' # get context st = getattr(straxen.contexts, args.context)() st.storage = [ strax.DataDirectory('./'), strax.DataDirectory(final_path) # where we are copying data to ] apply_global_version(st, args.cmt) # check what data is in the output folder dtypes = [d.split('-')[1] for d in os.listdir(path)] if 'records' in dtypes: plugin_levels = ['records', 'peaklets'] else: plugin_levels = ['peaklets'] # merge for dtype in plugin_levels: print(f"Merging {dtype} level") merge(runid_str, dtype, st, path) print(f"Current contents of {final_path}:") print(os.listdir(final_path)) # now upload the merged metadata # setup the rucio client(s) if not args.upload_to_rucio: print("Ignoring rucio upload. Exiting") return # need to patch the storage one last time st.storage = [strax.DataDirectory(final_path)] updonkey = UploadClient() donkey = Client() for this_dir in os.listdir(final_path): # prepare list of dicts to be uploaded _run, keystring, straxhash = this_dir.split('-') dataset_did = make_did(runid, keystring, straxhash) scope, dset_name = dataset_did.split(':') files = os.listdir(os.path.join(final_path, this_dir)) to_upload = [] existing_files = [ f for f in donkey.list_dids(scope, {'type': 'file'}, type='file') ] existing_files = [f for f in existing_files if dset_name in f] try: existing_files_in_dataset = [ f['name'] for f in donkey.list_files(scope, dset_name) ] except rucio.common.exception.DataIdentifierNotFound: existing_files_in_dataset = [] # for some reason files get uploaded but not attached correctly need_attached = list( set(existing_files) - set(existing_files_in_dataset)) if len(need_attached) > 0: dids_to_attach = [ dict(scope=scope, name=name) for name in need_attached ] donkey.attach_dids(scope, dset_name, dids_to_attach) for f in files: if f in existing_files: print(f"Skipping {f} since it is already uploaded") continue this_path = os.path.join(final_path, this_dir, f) d = dict(path=this_path, did_scope=scope, did_name=f, dataset_scope=scope, dataset_name=dset_name, rse=args.rse, register_after_upload=True) to_upload.append(d) # now do the upload! if len(to_upload) == 0: print(f"No files to upload for {this_dir}") continue # now do the upload! try: updonkey.upload(to_upload) except: print(f'Upload of {keystring} failed') raise print( f"Upload of {len(files)} files in {this_dir} finished successfully" ) for f in files: print(f"{scope}:{f}") # now check the rucio data matche what we expect rucio_files = [f for f in donkey.list_files(scope, dset_name)] # how many chunks? md = st.get_meta(runid_str, keystring) expected_chunks = len([c for c in md['chunks'] if c['n'] > 0]) # we should have n+1 files in rucio (counting metadata) if len(rucio_files) != expected_chunks + 1: # we're missing some data, uh oh successful_chunks = set( [int(f['name'].split('-')[-1]) for f in rucio_files]) expected_chunks = set(np.arange(expected_chunks)) missing_chunks = expected_chunks - successful_chunks missing_chunk_str = '/n'.join(missing_chunks) raise RuntimeError( f"File mismatch! There are {len(rucio_files)} but the metadata thinks there " f"should be {expected_chunks} chunks + 1 metadata. " f"The missing chunks are:\n{missing_chunk_str}") chunk_mb = [chunk['nbytes'] / (1e6) for chunk in md['chunks']] data_size_mb = np.sum(chunk_mb) avg_data_size_mb = np.mean(chunk_mb) # let's do one last check of the rule rc = RucioSummoner() rses = [args.rse] if (keystring not in ['records', 'veto_regions', 'pulse_counts'] and "UC_DALI_USERDISK" not in rses): rses.append('UC_DALI_USERDISK') for rse in rses: rule = rc.GetRule(dataset_did, rse) if rule['state'] == 'OK': status = 'transferred' elif rule['state'] == 'REPLICATING': status = 'transferring' else: status = 'error' if args.update_db: # update runDB new_data_dict = dict() new_data_dict['location'] = rse new_data_dict['did'] = dataset_did new_data_dict['status'] = status new_data_dict['host'] = "rucio-catalogue" new_data_dict['type'] = keystring new_data_dict['protocol'] = 'rucio' new_data_dict['creation_time'] = datetime.datetime.utcnow( ).isoformat() new_data_dict['creation_place'] = "OSG" #new_data_dict['file_count'] = file_count new_data_dict['meta'] = dict( #lineage=plugin.lineage_hash, avg_chunk_mb=avg_data_size_mb, file_count=len(rucio_files), size_mb=data_size_mb, strax_version=strax.__version__, straxen_version=straxen.__version__) db.update_data(runid, new_data_dict) print(f"Database updated for {keystring} at {rse}") else: print("Skipping database update.") # if everything is good, let's close the dataset # this will make it so no more data can be added to this dataset if status == 'transferred': try: donkey.close(scope, dset_name) except: print(f"Closing {scope}:{dset_name} failed")
def showrun(self, arg_number, arg_to, arg_dtypes, arg_compact, arg_dumpjson, arg_status, arg_latest, arg_pending): #Define data types RAW_RECORDS_TPC_TYPES = helper.get_hostconfig( )['raw_records_tpc_types'] RAW_RECORDS_MV_TYPES = helper.get_hostconfig()['raw_records_mv_types'] RAW_RECORDS_NV_TYPES = helper.get_hostconfig()['raw_records_nv_types'] LIGHT_RAW_RECORDS_TPC_TYPES = helper.get_hostconfig( )['light_raw_records_tpc_types'] LIGHT_RAW_RECORDS_MV_TYPES = helper.get_hostconfig( )['light_raw_records_mv_types'] LIGHT_RAW_RECORDS_NV_TYPES = helper.get_hostconfig( )['light_raw_records_nv_types'] HIGH_LEVEL_TYPES = helper.get_hostconfig()['high_level_types'] RECORDS_TYPES = helper.get_hostconfig()['records_types'] #Get other parameters DATADIR = helper.get_hostconfig()['path_data_to_upload'] RSES = helper.get_hostconfig()['rses'] minimum_number_acceptable_rses = 2 minimum_deltadays_allowed = 3 # Storing some backup hashes in case DID information is not available bkp_hashes = { 'raw_records': 'rfzvpzj4mf', 'raw_records_he': 'rfzvpzj4mf', 'raw_records_mv': 'rfzvpzj4mf', 'raw_records_aqmon': 'rfzvpzj4mf', 'records': '56ausr64s7', 'lone_hits': 'b7dgmtzaef' } context = 'xenonnt_online' #Init the runDB db = ConnectMongoDB() #Init Rucio for later uploads and handling: rc = RucioSummoner(helper.get_hostconfig("rucio_backend")) rc.SetRucioAccount(helper.get_hostconfig('rucio_account')) rc.SetConfigPath(helper.get_hostconfig("rucio_cli")) rc.SetProxyTicket(helper.get_hostconfig('rucio_x509')) rc.SetHost(helper.get_hostconfig('host')) rc.ConfigHost() rc.SetProxyTicket("rucio_x509") data_types = RAW_RECORDS_TPC_TYPES + RAW_RECORDS_MV_TYPES + RAW_RECORDS_NV_TYPES + LIGHT_RAW_RECORDS_TPC_TYPES + LIGHT_RAW_RECORDS_MV_TYPES + LIGHT_RAW_RECORDS_NV_TYPES + HIGH_LEVEL_TYPES + RECORDS_TYPES # if arg_number has been given if arg_number != "": # if the "number" argument is a number, it is converted as integer if arg_number.isdigit(): arg_number = int(arg_number) # otherwise it is assumed that a DID has been given and run number and other parameters are extracted from the DID else: arg_number, dtype, hash = get_did(arg_number) arg_dtypes = [dtype] # if no arg_number has been given, then the "latest" option is activated (with 5 run numbers by default) in compact modality else: if arg_latest == 0: arg_latest = 5 arg_compact = True if arg_latest > 0: cursor = db.db.find({}).sort('number', pymongo.DESCENDING).limit(1) cursor = list(cursor) arg_to = cursor[0]['number'] arg_number = arg_to - arg_latest + 1 print('Processing latest {0} runs'.format(arg_latest)) if arg_to > arg_number: cursor = db.db.find({ 'number': { '$gte': arg_number, '$lte': arg_to } }).sort('number', pymongo.ASCENDING) print('Runs that will be processed are from {0} to {1}'.format( arg_number, arg_to)) else: cursor = db.db.find({'number': arg_number}) print('Run that will be processed is {0}'.format(arg_number)) cursor = list(cursor) # Runs over all listed runs for run in cursor: print("") # Gets run number number = run['number'] print('Run: {0}'.format(number)) # Gets the status if 'status' in run: print('Status: {0}'.format(run['status'])) else: print('Status: {0}'.format('Not available')) if arg_status: continue # Extracts the correct Event Builder machine who processed this run # Then also the bootstrax state and, in case it was abandoned, the reason if 'bootstrax' in run: bootstrax = run['bootstrax'] eb = bootstrax['host'].split('.')[0] print('Processed by: {0}'.format(eb)) if 'state' in bootstrax: print('Bootstrax state: {0}'.format(bootstrax['state'])) if bootstrax['state'] == 'abandoned': if 'reason' in bootstrax: print('Reason: {0}'.format(bootstrax['reason'])) else: print('Not processed') # Gets the date if 'start' in run: start_time = run['start'].replace(tzinfo=timezone.utc) print("Date: ", start_time.astimezone(tz=None)) # Calculates the duration if 'end' in run: if run['end'] is not None: end_time = run['end'].replace(tzinfo=timezone.utc) duration = end_time - start_time print("Duration: ", duration) else: print("Duration: ", "unknown") # Prints if run is still enough recent (three days from now) now_time = datetime.now().replace(tzinfo=timezone.utc) delta_time = now_time - start_time if delta_time < timedelta(days=minimum_deltadays_allowed): print("Less than {0} days old".format( minimum_deltadays_allowed)) else: print("Warning : no time info available") # Gets the comments if 'comments' in run: if len(run['comments']) > 0: last_comment = run['comments'][-1] print("Latest comment ({0}): {1}".format( last_comment['user'], last_comment['comment'])) # Dumps the entire rundoc under json format if arg_dumpjson: print(dumps(run, indent=4)) if arg_compact: continue # Merges data and deleted_data # if 'deleted_data' in run: # data = run['data'] + run['deleted_data'] # else: data = run['data'] # Check is there are more instances in more EventBuilders extra_ebs = set() for d in data: if 'eb' in d['host'] and eb not in d['host']: extra_ebs.add(d['host'].split('.')[0]) if len(extra_ebs) > 0: print( '\t\t Warning : The run has been processed by more than one EventBuilder: {0}' .format(extra_ebs)) # Runs over all data types to be monitored for dtype in data_types: if len(arg_dtypes) > 0: if dtype not in arg_dtypes: continue # Take the official number of files accordingto run DB # and the eb status Nfiles = -1 ebstatus = "" for d in data: if d['type'] == dtype and eb in d['host']: if 'file_count' in d: Nfiles = d['file_count'] if 'status' in d: ebstatus = d['status'] if arg_pending: if ebstatus in ["", "transferred"]: continue # Data type name print('{0}'.format(dtype)) if Nfiles == -1: print('\t Number of files: missing in DB') else: print('\t Number of files: {0}'.format(Nfiles)) if ebstatus != "": print('\t EB status: {0}'.format(ebstatus)) else: print('\t EB status: not available') # Check if data are still in the data list and not in deleted_data DB_InEB = False for d in run['data']: if d['type'] == dtype and eb in d['host']: DB_InEB = True DB_NotInEB = False if 'deleted_data' in run: for d in run['deleted_data']: if d['type'] == dtype and eb in d['host']: DB_NotInEB = True if DB_InEB and not DB_NotInEB: print('\t DB : still in EB') if not DB_InEB and DB_NotInEB: print('\t DB : deleted from EB') if DB_InEB and DB_NotInEB: print( '\t\t Incoherency in DB: it is both in data list and in deleted_data list' ) #if (DB_InEB and DB_NotInEB) or (not DB_InEB and not DB_NotInEB): # print('\t\t incoherency in DB: it is neither in data list nor in deleted_data list') # Check if data are still in the EB disks without using the DB upload_path = "" for d in run['data']: if d['type'] == dtype and eb in d['host']: file = d['location'].split('/')[-1] upload_path = os.path.join(DATADIR, eb, file) path_exists = os.path.exists(upload_path) if upload_path != "" and path_exists: path, dirs, files = next(os.walk(upload_path)) print('\t Disk: still in EB disk and with', len(files), 'files') else: print('\t Disk: not in EB disk') if DB_InEB and not path_exists: print( '\t\t Incoherency in DB and disk: it is in DB data list but it is not in the disk' ) if DB_NotInEB and path_exists: print( '\t\t Incoherency in DB and disk: it is in DB deleted_data list but it is still in the disk' ) # The list of DIDs (usually just one) dids = set() for d in data: if d['type'] == dtype and d['host'] == 'rucio-catalogue': if 'did' in d: dids.add(d['did']) print('\t DID:', dids) # Check the presence in each available RSE Nrses = 0 for rse in RSES: is_in_rse = False for d in run['data']: if d['type'] == dtype and rse in d['location']: if 'status' in d: status = d['status'] else: status = 'Not available' if 'did' in d: hash = d['did'].split('-')[-1] did = d['did'] else: print( '\t\t Warning : DID information is absent in DB data list (old admix version). Using standard hashes for RSEs' ) #hash = bkp_hashes.get(dtype) #hash = utilix.db.get_hash(context, dtype) hash = db.GetHashByContext(context, dtype) did = make_did(number, dtype, hash) rucio_rule = rc.GetRule(upload_structure=did, rse=rse) files = list_file_replicas(number, dtype, hash, rse) if rucio_rule['exists']: print('\t', rse + ': DB Yes, Status', status, ', Rucio Yes, State', rucio_rule['state'], ",", len(files), 'files') if len(files) < Nfiles and rucio_rule[ 'state'] != "REPLICATING": print( '\t\t Warning : Wrong number of files in Rucio!!!' ) else: print('\t', rse + ': DB Yes, Status', status, ', Rucio No') # print(files) is_in_rse = True Nrses += 1 if not is_in_rse: # print('\t\t Warning : data information is absent in DB data list. Trying using standard hashes to query Rucio') # hash = bkp_hashes.get(dtype) #hash = utilix.db.get_hash(context, dtype) hash = db.GetHashByContext(context, dtype) did = make_did(number, dtype, hash) print('\t Guessed DID:', did) rucio_rule = rc.GetRule(upload_structure=did, rse=rse) files = list_file_replicas(number, dtype, hash, rse) if rucio_rule['exists']: print('\t', rse + ': DB No, Rucio Yes, State', rucio_rule['state'], ",", len(files), 'files') if len(files) < Nfiles and rucio_rule[ 'state'] != "REPLICATING": print( '\t\t Warning : Wrong number of files in Rucio!!!' ) else: print('\t', rse + ': DB No, Rucio No') print('\t Number of sites: ', Nrses)
def showdataset(self, run, datum): #print(dumps(datum, indent=4)) # skip dataset if it does not have location if 'location' not in datum: print('Dataset: type {0} without location. Skipping'.format( datum['type'])) return # Dataset name number = run['number'] dtype = datum['type'] hash = datum['location'].split('-')[-1] did = make_did(number, dtype, hash) print('Dataset: {0}'.format(did)) # Event builder who treated it eb = datum['host'].split('.')[0] # Directory name directory = datum['location'].split('/')[-1] # Take the official number of files according to run DB Nfiles = -1 if 'file_count' in datum: Nfiles = datum['file_count'] if Nfiles == -1: print('\t Number of files: missing in DB') else: print('\t Number of files: {0}'.format(Nfiles)) # Take the status of the EB dataset according to run DB ebstatus = "" if 'status' in datum: ebstatus = datum['status'] if ebstatus != "": print('\t EB status: {0}'.format(ebstatus)) else: print('\t EB status: not available') # Check if there are double entries in the DB Copies = 0 for d in run['data']: if d['type'] == dtype and eb in d['host'] and hash in d['location']: Copies = Copies + 1 if Copies > 1: print('\t\t Warning {0}: EB datum has a double entry in the DB'. format(did)) # Check if there are other entries in the deleted_data (even with different EBs) #DeletedCopies = [] #for d in run['deleted_data']: # if d['type'] == dtype and hash in d['location']: # DeletedCopies.append(d['host'].split('.')[0]) #if len(DeletedCopies)>0: # print('\t Previously deleted data processed with those EBs: {0}'.format(DeletedCopies)) # Read the real number of files present in EB disks upload_path = os.path.join(self.DATADIR, eb, directory) path_exists = os.path.exists(upload_path) Nfiles_disk = 0 if path_exists: path, dirs, files = next(os.walk(upload_path)) Nfiles_disk = len(files) # If data are supposed to be (according to DB) still present in EB, check if they are there if datum in run['data']: print('\t Still in EB') if Nfiles_disk != Nfiles: print( '\t\t Warning {0}: number of files in EB disk ({1}) does not match with the DB info ({2})' .format(did, Nfiles_disk, Nfiles)) # Otherwise, if data are supposed to be (according to DB) deleted, check if they are really absent elif datum in run['deleted_data']: print('\t Deleted from EB') if Nfiles_disk > 0: print( '\t\t Warning {0}: files are still in EB disk (nfiles={1}) while DB says they are deleted ' .format(did, Nfiles_disk)) # Query rucio to see how many RSEs have those data rules = list( self.didclient.list_did_rules( did.split(':')[0], did.split(':')[1])) rses_with_data = [] for rule in rules: rses_with_data.append(rule['rse_expression']) if len(rses_with_data) > 0: print('\t Rucio replicas in {0} RSEs : {1}'.format( len(rses_with_data), rses_with_data)) else: print('\t No replicas in Rucio') # Check the presence of data in each available RSE and compatibility with DB # Step 1: prepare the dictionary rses = [] for rse in self.RSES: r = {} r['name'] = rse rses.append(r) Nrses = 0 # Step 2: filling the dictionary with RSEs info from DB and from Rucio for rse in rses: is_in_rse = False # Get info available in the DB rse['DBentries'] = 0 rse['DBStatus'] = "" for d in run['data']: if 'rucio' in d['host']: if d['did'] == did and rse['name'] in d['location']: if 'status' in d: rse['DBStatus'] = d['status'] rse['DBentries'] = rse['DBentries'] + 1 # Get info available in Rucio rucio_rule = self.rc.GetRule(upload_structure=did, rse=rse['name']) # files = list_file_replicas(number, dtype, hash, rse['name']) # files = list(self.rc.ListFileReplicas(did,rse['name'],localpath=True).values()) did_dictionary = [{ 'scope': did.split(':')[0], 'name': did.split(':')[1] }] replicas = list( self.replicaclient.list_replicas(did_dictionary, rse_expression=rse['name'])) #print(dumps(replicas, indent=4)) rse['RucioExists'] = rucio_rule['exists'] rse['RucioNFiles'] = len(replicas) # Step 3: analysis of data for rse in rses: #print(rse) # analysis specific for uploading if rse['name'] == self.UPLOAD_TO: # Case 1 : loss of Rucio connection at the end of the upload before creating the rule if rse['RucioNFiles'] == Nfiles and not rse[ 'RucioExists'] and rse['DBStatus'] == "" and rse[ 'DBentries'] == 0 and len(rses_with_data) == 0: print( '\t\t Warning: files have been uploaded but the rule has not been created' ) print( '\t\t Hint: create the rule manually, then continue uploading, using the following three commands:' ) print('\t\t\t rucio add-rule {0} 1 {1}'.format( did, rse['name'])) print('\t\t\t admix-fix --fix_upload_db {0}'.format(did)) print('\t\t\t admix-fix --create_upload_rules {0}'.format( did)) # os.system('rucio add-rule {0} 1 {1}'.format(did,rse['name'])) # os.system('~/.local/bin/admix-fix --fix_upload_db {0}'.format(did)) # os.system('~/.local/bin/admix-fix --create_upload_rules {0}'.format(did)) # Case 2 : loss of Rucio connection at the end of the upload before updating the DB if rse['RucioNFiles'] == Nfiles and rse['RucioExists'] and rse[ 'DBStatus'] == "" and rse['DBentries'] == 0 and len( rses_with_data) == 1: print( '\t\t Warning: the upload is completed, but DB needs to be updated and rules have to be created abroad' ) print('\t\t Hint: fix it manually with the two commands:') print('\t\t\t admix-fix --fix_upload_db {0}'.format(did)) print('\t\t\t admix-fix --create_upload_rules {0}'.format( did)) # os.system('~/.local/bin/admix-fix --fix_upload_db {0}'.format(did)) # os.system('~/.local/bin/admix-fix --create_upload_rules {0}'.format(did)) # Case 3 : loss of Rucio connection at the end of the upload before creating the rules abroad if rse['RucioNFiles'] == Nfiles and rse['RucioExists'] and rse[ 'DBStatus'] == "transferred" and rse[ 'DBentries'] == 1 and len(rses_with_data) == 1: print( '\t\t Warning: the upload is completed and the DB updated, but rules have to be created abroad' ) print('\t\t Hint: fix it manually with the command:') print('\t\t\t admix-fix --create_upload_rules {0}'.format( did)) # os.system('~/.local/bin/admix-fix --create_upload_rules {0}'.format(did)) # Case 4 : data still to be uploaded but the value if the EB status is not empty so admix cannot upload it if rse['RucioNFiles'] == 0 and not rse['RucioExists'] and rse[ 'DBStatus'] == "" and rse['DBentries'] == 0 and len( rses_with_data) == 0 and ebstatus not in [ "", "transferred" ]: print( '\t\t Warning: the upload never started but the EB status is not empty, hence admix cannot upload it' ) print( '\t\t Hint: fix it manually with the following command to allow admix upload manager to take care of it:' ) print( '\t\t\t admix-fix --set_eb_status {0} eb_ready_to_upload' .format(did)) # os.system('~/.local/bin/admix-fix --set_eb_status {0} eb_ready_to_upload'.format(did)) # Case 4 : data still to be uploaded but the value if the EB status is not empty so admix cannot upload it if rse['RucioNFiles'] == Nfiles and rse['RucioExists'] and rse[ 'DBStatus'] == "transferred" and rse[ 'DBentries'] == 1 and len( rses_with_data) > 0 and ebstatus not in [ "", "transferred" ]: print( '\t\t Warning: the upload is completed and there are also copies abroad' ) print( '\t\t Hint: fix it manually with the command below to flag the EB datum as transferred:' ) print('\t\t\t admix-fix --set_eb_status {0} transferred'. format(did)) # os.system('~/.local/bin/admix-fix --set_eb_status {0} transferred'.format(did)) # Case 5 : data still to be uploaded but the value if the EB status is not empty so admix cannot upload it if rse['RucioNFiles'] != Nfiles and rse['RucioExists'] and rse[ 'DBStatus'] == "" and rse['DBentries'] == 0 and len( rses_with_data ) == 1 and ebstatus == "transferring": print( '\t\t Warning: the upload has been interrupted during the copy' ) print( '\t\t Hint: fix it manually with the command below to resume the upload:' ) print('\t\t\t admix-fix --fix_upload {0}'.format(did)) # analysis for all RSEs other than datamanager else: if not ((rse['RucioNFiles'] == Nfiles and rse['RucioExists'] and rse['DBentries'] == 1 and rse['DBStatus'] == 'transferred') or (rse['RucioNFiles'] == 0 and not rse['RucioExists'] and rse['DBentries'] == 0 and rse['DBStatus'] != 'transferred')): print( '\t\t Warning {0}: data in RSE {1} are inconsistent:'. format(did, rse['name'])) print('\t\t ', rse)
def main(): parser = argparse.ArgumentParser(description="Upload combined output to rucio") parser.add_argument('dataset', help='Run number', type=int) parser.add_argument('dtype', help='dtype to upload') parser.add_argument('rse', help='Target RSE') parser.add_argument('--context', help='Strax context') args = parser.parse_args() tmp_path = tempfile.mkdtemp() runid = args.dataset runid_str = "%06d" % runid dtype = args.dtype rse = args.rse # get context st = eval(f'straxen.contexts.{args.context}()') st.storage = [strax.DataDirectory(tmp_path)] plugin = st._get_plugins((dtype,), runid_str)[dtype] rc = RucioSummoner() for keystring in plugin.provides: key = strax.DataKey(runid_str, keystring, plugin.lineage) hash = key.lineage_hash # TODO check with utilix DB call that the hashes match? dirname = f"{runid_str}-{keystring}-{hash}" upload_path = os.path.join('combined', dirname) print(f"Uploading {dirname}") os.listdir(upload_path) # make a rucio DID did = make_did(runid, keystring, hash) # check if a rule already exists for this DID rucio_rule = rc.GetRule(upload_structure=did) # if not in rucio already and no rule exists, upload into rucio if not rucio_rule['exists']: result = rc.Upload(did, upload_path, rse, lifetime=None) # check that upload was successful new_rule = rc.GetRule(upload_structure=did, rse=rse) # TODO check number of files new_data_dict={} new_data_dict['location'] = rse new_data_dict['did'] = did new_data_dict['status'] = "transferred" new_data_dict['host'] = "rucio-catalogue" new_data_dict['type'] = keystring new_data_dict['lifetime'] = new_rule['expires'], new_data_dict['protocol'] = 'rucio' new_data_dict['creation_time'] = datetime.datetime.utcnow().isoformat() new_data_dict['checksum'] = 'shit' db.update_data(runid, new_data_dict) else: print(f"Rucio rule already exists for {did}")
def main(): parser = ArgumentParser() parser.add_argument('runid', type=int, help='Run number') parser.add_argument('--dtype', help='dtype', required=True) parser.add_argument('--context', help='Context name', required=True) parser.add_argument('--rse', help='RSE to create replication rule at') parser.add_argument('--cmt', help='Global CMT version', default='ONLINE') parser.add_argument('--update-db', help='flag to update runsDB', dest='update_db', action='store_true') parser.add_argument('--upload-to-rucio', help='flag to upload to rucio', dest='upload_to_rucio', action='store_true') args = parser.parse_args() runid = args.runid runid_str = "%06d" % runid dtype = args.dtype dtypes = ['records', 'peaklets'] # setup rucio client C = Client() # get context st = getattr(straxen.contexts, args.context)() # apply global version apply_global_version(st, args.cmt) for dtype in dtypes: # initialize plugin needed for processing this output type plugin = st._get_plugins((dtype, ), runid_str)[dtype] st._set_plugin_config(plugin, runid_str, tolerant=False) plugin.setup() for _dtype in plugin.provides: hash = get_hashes(st)[_dtype] # need to create the dataset we will be uploading data to out on the grid dataset = make_did(args.runid, _dtype, hash) scope, name = dataset.split(':') # check if this dataset exists existing_datasets = [ i for i in C.list_dids(scope, filters=dict(type='dataset')) ] if name not in existing_datasets: C.add_dataset(scope, name) print(f"Dataset {dataset} created") else: print(f"Warning: The dataset {dataset} already exists!") #raise ValueError(f"The dataset {dataset} already exists!") #check if a rule already exists existing_rules = [ i['rse_expression'] for i in C.list_did_rules(scope, name) ] # update runDB new_data_dict = dict() new_data_dict['location'] = args.rse new_data_dict['did'] = dataset new_data_dict['status'] = 'processing' new_data_dict['host'] = "rucio-catalogue" new_data_dict['type'] = _dtype new_data_dict['protocol'] = 'rucio' new_data_dict['creation_time'] = datetime.datetime.utcnow( ).isoformat() new_data_dict['creation_place'] = "OSG" new_data_dict['meta'] = dict(lineage=None, avg_chunk_mb=None, file_count=None, size_mb=None, strax_version=strax.__version__, straxen_version=straxen.__version__) if args.rse not in existing_rules: # 1 is the number of copies if args.upload_to_rucio: C.add_replication_rule([dict(scope=scope, name=name)], 1, args.rse) print(f"Replication rule at {args.rse} created") if args.update_db: db.update_data(runid, new_data_dict) # send peaklets data to dali if dtype == 'peaklets' and args.rse != 'UC_DALI_USERDISK': if args.upload_to_rucio: C.add_replication_rule( [dict(scope=scope, name=name)], 1, 'UC_DALI_USERDISK', source_replica_expression=args.rse, priority=5)
def main(): parser = argparse.ArgumentParser(description="Strax Processing With Outsource") parser.add_argument('dataset', help='Run number', type=int) parser.add_argument('--output', help='desired strax(en) output') parser.add_argument('--context', help='name of context') parser.add_argument('--chunks', nargs='*', help='chunk ids to download') parser.add_argument('--rse', type=str, default="UC_OSG_USERDISK") parser.add_argument('--cmt', type=str, default='ONLINE') parser.add_argument('--upload-to-rucio', action='store_true', dest='upload_to_rucio') parser.add_argument('--update-db', action='store_true', dest='update_db') parser.add_argument('--download-only', action='store_true', dest='download_only') parser.add_argument('--no-download', action='store_true', dest='no_download') args = parser.parse_args() # directory where we will be putting everything data_dir = './data' # make sure this is empty # if os.path.exists(data_dir): # rmtree(data_dir) # get context st = getattr(straxen.contexts, args.context)() st.storage = [strax.DataDirectory(data_dir)] apply_global_version(st, args.cmt) runid = args.dataset runid_str = "%06d" % runid out_dtype = args.output # determine which input dtypes we need bottom = 'peaklets' if args.chunks is None else 'raw_records' to_download = find_data_to_download(runid, out_dtype, st, bottom=bottom) if not args.no_download: t0 = time.time() # download all the required datatypes to produce this output file if args.chunks: for in_dtype, hash in to_download: # download the input data if not os.path.exists(os.path.join(data_dir, f"{runid:06d}-{in_dtype}-{hash}")): admix.download(runid, in_dtype, hash, chunks=args.chunks, location=data_dir) else: for in_dtype, hash in to_download: if not os.path.exists(os.path.join(data_dir, f"{runid:06d}-{in_dtype}-{hash}")): admix.download(runid, in_dtype, hash, location=data_dir) download_time = time.time() - t0 # seconds print(f"=== Download time (minutes): {download_time/60:0.2f}") # initialize plugin needed for processing this output type plugin = st._get_plugins((out_dtype,), runid_str)[out_dtype] st._set_plugin_config(plugin, runid_str, tolerant=False) plugin.setup() # figure out what plugins we need to process/initialize to_process = [args.output] downloaded = [dtype for dtype, _ in to_download] missing = set(plugin.depends_on) - set(downloaded) if len(missing) > 0: missing_str = ', '.join(missing) print(f"Need to create intermediate data: {missing_str}") to_process = list(missing) + to_process # keep track of the data we just downloaded -- will be important for the upload step later downloaded_data = os.listdir(data_dir) print("--Downloaded data--") for dd in downloaded_data: print(dd) print("-------------------\n") if args.download_only: sys.exit(0) print(f"To process: {', '.join(to_process)}") _tmp_path = tempfile.mkdtemp() for dtype in to_process: close_savers = dtype != args.output process(runid, dtype, st, args.chunks, close_savers=close_savers, tmp_path=_tmp_path ) print("Done processing. Now check if we should upload to rucio") # now we move the tmpfiles back to main directory, if needed # this is for cases where we went from raw_records-->records-->peaklets in one go if os.path.exists(_tmp_path): for dtype_path_thing in os.listdir(_tmp_path): tmp_path = os.path.join(_tmp_path, dtype_path_thing) merged_dir = os.path.join(data_dir, dtype_path_thing.split('_temp')[0]) for file in os.listdir(tmp_path): copyfile(os.path.join(tmp_path, file), os.path.join(merged_dir, file)) os.rename(merged_dir, os.path.join(data_dir, dtype_path_thing)) # initiate the rucio client upload_client = UploadClient() rucio_client = Client() # if we processed the entire run, we upload everything including metadata # otherwise, we just upload the chunks upload_meta = args.chunks is None # now loop over datatypes we just made and upload the data processed_data = [d for d in os.listdir(data_dir) if d not in downloaded_data] print("---- Processed data ----") for d in processed_data: print(d) print("------------------------\n") if not args.upload_to_rucio: print("Ignoring rucio upload. Exiting. ") return for dirname in processed_data: # get rucio dataset this_run, this_dtype, this_hash = dirname.split('-') if this_dtype in rechunk_dtypes: print(f"Skipping upload of {this_dtype} since we need to rechunk it") continue # remove the _temp if we are processing chunks in parallel if args.chunks is not None: this_hash = this_hash.replace('_temp', '') dataset = make_did(int(this_run), this_dtype, this_hash) scope, dset_name = dataset.split(':') files = [f for f in os.listdir(os.path.join(data_dir, dirname))] if not upload_meta: files = [f for f in files if not f.endswith('.json')] # check that the output number of files is what we expect if len(files) != len(args.chunks): processed_chunks = set([int(f.split('-')[-1]) for f in files]) expected_chunks = set(args.chunks) missing_chunks = expected_chunks - processed_chunks missing_chunks = ' '.join(missing_chunks) raise RuntimeError("File mismatch! We are missing output data for the following chunks: " f"{missing_chunks}" ) # if there are no files, we can't upload them if len(files) == 0: print(f"No files to upload in {dirname}. Skipping.") continue # get list of files that have already been uploaded # this is to allow us re-run workflow for some chunks try: existing_files = [f for f in rucio_client.list_dids(scope, {'type': 'file'}, type='file') ] existing_files = [f for f in existing_files if dset_name in f] existing_files_in_dataset = [f['name'] for f in rucio_client.list_files(scope, dset_name)] # for some reason files get uploaded but not attached correctly need_attached = list(set(existing_files) - set(existing_files_in_dataset)) # only consider the chunks here need_attached = [f for f in need_attached if str(int(f.split('-')[-1])) in args.chunks] if len(need_attached) > 0: dids_to_attach = [dict(scope=scope, name=name) for name in need_attached] rucio_client.attach_dids(scope, dset_name, dids_to_attach) except rucio.common.exception.DataIdentifierNotFound: existing_files = [] # prepare list of dicts to be uploaded to_upload = [] for f in files: path = os.path.join(data_dir, dirname, f) if f in existing_files: print(f"Skipping {f} since it is already uploaded") continue print(f"Uploading {f}") d = dict(path=path, did_scope=scope, did_name=f, dataset_scope=scope, dataset_name=dset_name, rse=args.rse, register_after_upload=True ) to_upload.append(d) # skip upload for now # now do the upload! if len(to_upload) == 0: print(f"No files to upload for {dirname}") continue try: upload_client.upload(to_upload) except: print(f"Upload of {dset_name} failed for some reason") raise # TODO check rucio that the files are there? print(f"Upload of {len(files)} files in {dirname} finished successfully") # if we processed the whole thing, add a rule at DALI update the runDB here if args.chunks is None: rucio_client.add_replication_rule([dict(scope=scope, name=dset_name)], 1, 'UC_DALI_USERDISK', source_replica_expression=args.rse, priority=5) # skip if update_db flag is false if args.update_db: md = st.get_meta(runid_str, this_dtype) chunk_mb = [chunk['nbytes'] / (1e6) for chunk in md['chunks']] data_size_mb = np.sum(chunk_mb) avg_data_size_mb = np.mean(chunk_mb) # update runDB new_data_dict = dict() new_data_dict['location'] = args.rse new_data_dict['did'] = dataset new_data_dict['status'] = 'transferred' new_data_dict['host'] = "rucio-catalogue" new_data_dict['type'] = this_dtype new_data_dict['protocol'] = 'rucio' new_data_dict['creation_time'] = datetime.datetime.utcnow().isoformat() new_data_dict['creation_place'] = "OSG" new_data_dict['meta'] = dict(lineage=md.get('lineage'), avg_chunk_mb=avg_data_size_mb, file_count=len(files), size_mb=data_size_mb, strax_version=strax.__version__, straxen_version=straxen.__version__ ) db.update_data(runid, new_data_dict) print(f"Database updated for {this_dtype} at {args.rse}") # now update dali db entry rule = rc.GetRule(dataset, 'UC_DALI_USERDISK') if rule['state'] == 'OK': status = 'transferred' elif rule['state'] == 'REPLICATING': status = 'transferring' elif rule['state'] == 'STUCK': status = 'stuck' new_data_dict['location'] = 'UC_DALI_USERDISK' new_data_dict['status'] = status db.update_data(runid, new_data_dict) # cleanup the files we uploaded # this is likely only done for records data because we will rechunk the others for f in files: print(f"Removing {f}") os.remove(os.path.join(data_dir, dirname, f)) print("ALL DONE!")