def test_get_mgr_ok_single_pfn(self): """(RSE/PROTOCOLS): Get a single file from storage providing the PFN (Success)""" mgr.download( self.rse_settings, { 'name': '1_rse_remote_get.raw', 'scope': 'user.%s' % self.user, 'pfn': self.static_file }, self.gettmpdir)
def test_get_mgr_SourceNotFound_single_pfn(self): """(RSE/PROTOCOLS): Get a single file from storage providing PFN (SourceNotF ound)""" pfn = mgr.lfns2pfns(self.rse_settings, { 'name': 'not_existing_data.raw', 'scope': 'user.%s' % self.user }).values()[0] mgr.download( self.rse_settings, { 'name': 'not_existing_data.raw', 'scope': 'user.%s' % self.user, 'pfn': pfn }, self.gettmpdir)
def test_multi_get_mgr_ok(self): """(RSE/PROTOCOLS): Get multiple files from storage providing LFNs and PFNs (Success)""" pfn_b = mgr.lfns2pfns(self.rse_settings, { 'name': '4_rse_remote_get.raw', 'scope': 'user.%s' % self.user }).values()[0] status, details = mgr.download(self.rse_settings, [{ 'name': '1_rse_remote_get.raw', 'scope': 'user.%s' % self.user }, { 'name': '2_rse_remote_get.raw', 'scope': 'user.%s' % self.user }, { 'name': '3_rse_remote_get.raw', 'scope': 'user.%s' % self.user, 'pfn': self.static_file }, { 'name': '4_rse_remote_get.raw', 'scope': 'user.%s' % self.user, 'pfn': pfn_b }], self.gettmpdir) if not (status and details['user.%s:1_rse_remote_get.raw' % self.user] and details['user.%s:2_rse_remote_get.raw' % self.user] and details['user.%s:3_rse_remote_get.raw' % self.user] and details['user.%s:4_rse_remote_get.raw' % self.user]): raise Exception('Return not as expected: %s, %s' % (status, details))
def test_multi_get_mgr_ok(self): """(RSE/PROTOCOLS): Get multiple files from storage providing LFNs and PFNs (Success)""" pfn_b = mgr.lfns2pfns(self.rse_settings, {'name': '4_rse_remote_get.raw', 'scope': 'user.%s' % self.user}).values()[0] status, details = mgr.download(self.rse_settings, [{'name': '1_rse_remote_get.raw', 'scope': 'user.%s' % self.user}, {'name': '2_rse_remote_get.raw', 'scope': 'user.%s' % self.user}, {'name': '3_rse_remote_get.raw', 'scope': 'user.%s' % self.user, 'pfn': self.static_file}, {'name': '4_rse_remote_get.raw', 'scope': 'user.%s' % self.user, 'pfn': pfn_b}], self.tmpdir) if not (status and details['user.%s:1_rse_remote_get.raw' % self.user] and details['user.%s:2_rse_remote_get.raw' % self.user] and details['user.%s:3_rse_remote_get.raw' % self.user] and details['user.%s:4_rse_remote_get.raw' % self.user]): raise Exception('Return not as expected: %s, %s' % (status, details))
def test_get_mgr_SourceNotFound_multi(self): """(RSE/PROTOCOLS): Get multiple files from storage providing LFNs and PFNs (SourceNotFound)""" protocol = mgr.create_protocol(self.rse_settings, 'read') pfn_a = protocol.lfns2pfns({'name': '2_rse_remote_get.raw', 'scope': 'user.%s' % self.user}).values()[0] pfn_b = protocol.lfns2pfns({'name': '2_rse_remote_get_not_existing.raw', 'scope': 'user.%s' % self.user}).values()[0] status, details = mgr.download(self.rse_settings, [{'name': '1_not_existing_data.raw', 'scope': 'user.%s' % self.user}, {'name': '1_rse_remote_get.raw', 'scope': 'user.%s' % self.user}, {'name': '2_not_existing_data.raw', 'scope': 'user.%s' % self.user, 'pfn': pfn_b}, {'name': '2_rse_remote_get.raw', 'scope': 'user.%s' % self.user, 'pfn': pfn_a}], self.gettmpdir) if details['user.%s:1_rse_remote_get.raw' % self.user] and details['user.%s:2_rse_remote_get.raw' % self.user]: if details['user.%s:1_not_existing_data.raw' % self.user].__class__.__name__ == 'SourceNotFound' and details['user.%s:2_not_existing_data.raw' % self.user].__class__.__name__ == 'SourceNotFound': raise details['user.%s:1_not_existing_data.raw' % self.user] else: raise Exception('Return not as expected: %s, %s' % (status, details)) else: raise Exception('Return not as expected: %s, %s' % (status, details))
def test_get_mgr_SourceNotFound_multi(self): """(RSE/PROTOCOLS): Get multiple files from storage providing LFNs and PFNs (SourceNotFound)""" protocol = mgr.create_protocol(self.rse_settings, 'read') pfn_a = protocol.lfns2pfns({'name': '2_rse_remote_get.raw', 'scope': 'user.%s' % self.user}).values()[0] pfn_b = protocol.lfns2pfns({'name': '2_rse_remote_get_not_existing.raw', 'scope': 'user.%s' % self.user}).values()[0] status, details = mgr.download(self.rse_settings, [{'name': '1_not_existing_data.raw', 'scope': 'user.%s' % self.user}, {'name': '1_rse_remote_get.raw', 'scope': 'user.%s' % self.user}, {'name': '2_not_existing_data.raw', 'scope': 'user.%s' % self.user, 'pfn': pfn_b}, {'name': '2_rse_remote_get.raw', 'scope': 'user.%s' % self.user, 'pfn': pfn_a}], self.tmpdir) if details['user.%s:1_rse_remote_get.raw' % self.user] and details['user.%s:2_rse_remote_get.raw' % self.user]: if details['user.%s:1_not_existing_data.raw' % self.user].__class__.__name__ == 'SourceNotFound' and details['user.%s:2_not_existing_data.raw' % self.user].__class__.__name__ == 'SourceNotFound': raise details['user.%s:1_not_existing_data.raw' % self.user] else: raise Exception('Return not as expected: %s, %s' % (status, details)) else: raise Exception('Return not as expected: %s, %s' % (status, details))
def _downloader(self, pfn, protocol, human, input_queue, output_queue, user_agent, threadnb, total_threads, trace_endpoint, trace_pattern, transfer_timeout=None): rse_dict = {} thread_prefix = 'Thread %s/%s' % (threadnb, total_threads) while True: try: file = input_queue.get_nowait() except Empty: return dest_dir = file['dest_dir'] file_scope = file['scope'] file_name = file['name'] file_didstr = '%s:%s' % (file_scope, file_name) # arguments for rsemgr.download already known dlfile = {} dlfile['name'] = file_name dlfile['scope'] = file_scope dlfile['adler32'] = file['adler32'] ignore_checksum = True if pfn else False if pfn: dlfile['pfn'] = pfn logger.info('%s : Starting the download of %s' % (thread_prefix, file_didstr)) trace = deepcopy(trace_pattern) trace.update({'scope': file_scope, 'filename': file_name, 'datasetScope': file['dataset_scope'], 'dataset': file['dataset_name'], 'filesize': file['bytes']}) rses = list(file['rses'].keys()) if rses == []: logger.warning('%s : File %s has no available replicas. Cannot be downloaded.' % (thread_prefix, file_didstr)) trace['clientState'] = 'FILE_NOT_FOUND' self.send_trace(trace, trace_endpoint, user_agent) input_queue.task_done() continue random.shuffle(rses) logger.debug('%s : Potential sources : %s' % (thread_prefix, str(rses))) success = False while not success and len(rses): rse_name = rses.pop() if rse_name not in rse_dict: try: rse_dict[rse_name] = rsemgr.get_rse_info(rse_name) except RSENotFound: logger.warning('%s : Could not get info of RSE %s' % (thread_prefix, rse_name)) continue rse = rse_dict[rse_name] if not rse['availability_read']: logger.info('%s : %s is blacklisted for reading' % (thread_prefix, rse_name)) continue try: if pfn: protocols = [rsemgr.select_protocol(rse, operation='read', scheme=pfn.split(':')[0])] else: protocols = rsemgr.get_protocols_ordered(rse, operation='read', scheme=protocol) protocols.reverse() except RSEProtocolNotSupported as error: logger.info('%s : The protocol specfied (%s) is not supported by %s' % (thread_prefix, protocol, rse_name)) logger.debug(error) continue logger.debug('%s : %d possible protocol(s) for read' % (thread_prefix, len(protocols))) trace['remoteSite'] = rse_name trace['clientState'] = 'DOWNLOAD_ATTEMPT' while not success and len(protocols): protocol_retry = protocols.pop() logger.debug('%s : Trying protocol %s at %s' % (thread_prefix, protocol_retry['scheme'], rse_name)) trace['protocol'] = protocol_retry['scheme'] out = {} out['dataset_scope'] = file['dataset_scope'] out['dataset_name'] = file['dataset_name'] out['scope'] = file_scope out['name'] = file_name attempt = 0 retries = 2 while not success and attempt < retries: attempt += 1 out['attemptnr'] = attempt logger.info('%s : File %s trying from %s' % (thread_prefix, file_didstr, rse_name)) try: trace['transferStart'] = time.time() rsemgr.download(rse, files=[dlfile], dest_dir=dest_dir, force_scheme=protocol_retry['scheme'], ignore_checksum=ignore_checksum, transfer_timeout=transfer_timeout) trace['transferEnd'] = time.time() trace['clientState'] = 'DONE' out['clientState'] = 'DONE' success = True output_queue.put(out) logger.info('%s : File %s successfully downloaded from %s' % (thread_prefix, file_didstr, rse_name)) except KeyboardInterrupt: logger.warning('You pressed Ctrl+C! Exiting gracefully') os.kill(os.getpgid(), signal.SIGINT) return except FileConsistencyMismatch as error: logger.warning(str(error)) try: pfns_dict = rsemgr.lfns2pfns(rse, lfns=[{'name': file_name, 'scope': file_scope}], operation='read', scheme=protocol) pfn = pfns_dict[file_didstr] out['clientState'] = 'CORRUPTED' out['pfn'] = pfn output_queue.put(out) except Exception as error: logger.debug('%s : %s' % (thread_prefix, str(error))) trace['clientState'] = 'FAIL_VALIDATE' logger.debug('%s : Failed attempt %s/%s' % (thread_prefix, attempt, retries)) except Exception as error: logger.warning(str(error)) trace['clientState'] = str(type(error).__name__) logger.debug('%s : Failed attempt %s/%s' % (thread_prefix, attempt, retries)) self.send_trace(trace, trace_endpoint, user_agent, threadnb=threadnb, total_threads=total_threads) if success: duration = round(trace['transferEnd'] - trace['transferStart'], 2) if pfn: logger.info('%s : File %s successfully downloaded in %s seconds' % (thread_prefix, file_didstr, duration)) else: logger.info('%s : File %s successfully downloaded. %s in %s seconds = %s MBps' % (thread_prefix, file_didstr, sizefmt(file['bytes'], human), duration, round((file['bytes'] / duration) * 1e-6, 2))) else: logger.error('%s : Cannot download file %s' % (thread_prefix, file_didstr)) input_queue.task_done()
def test_get_mgr_SourceNotFound_single_lfn(self): """(RSE/PROTOCOLS): Get a single file from storage providing LFN (SourceNot Found)""" mgr.download(self.rse_settings, { 'name': 'not_existing_data.raw', 'scope': 'user.%s' % self.user }, self.gettmpdir)
def _download_item(self, item, trace, log_prefix=''): """ Downloads the given item and sends traces for success/failure. (This function is meant to be used as class internal only) :param item: dictionary that describes the item to download :param trace: dictionary representing a pattern of trace that will be send :param log_prefix: string that will be put at the beginning of every log message :returns: dictionary with all attributes from the input item and a clientState attribute """ logger = self.logger did_scope = item['scope'] did_name = item['name'] did_str = '%s:%s' % (did_scope, did_name) logger.info('%sPreparing download of %s' % (log_prefix, did_str)) trace['scope'] = did_scope trace['filename'] = did_name trace.setdefault('dataset_scope', item.get('dataset_scope', '')) trace.setdefault('dataset', item.get('dataset_name', '')) trace.setdefault('filesize', item.get('bytes')) # if file already exists, set state, send trace, and return dest_dir_path = item['dest_dir_path'] dest_file_path = os.path.join(dest_dir_path, did_name) if os.path.isfile(dest_file_path): logger.info('%sFile exists already locally: %s' % (log_prefix, did_str)) item['clientState'] = 'ALREADY_DONE' trace['transferStart'] = time.time() trace['transferEnd'] = time.time() trace['clientState'] = 'ALREADY_DONE' send_trace(trace, self.client.host, self.user_agent) return item # check if file has replicas rse_names = list(item['rses'].keys()) if not len(rse_names): logger.warning( '%sFile %s has no available replicas. Cannot be downloaded' % (log_prefix, did_str)) item['clientState'] = 'FILE_NOT_FOUND' trace['clientState'] = 'FILE_NOT_FOUND' send_trace(trace, self.client.host, self.user_agent) return item # list_replicas order is: best rse at [0] rse_names.reverse() logger.debug('%sPotential sources: %s' % (log_prefix, str(rse_names))) success = False # retry with different rses if one is not available or fails while not success and len(rse_names): rse_name = rse_names.pop() try: rse = rsemgr.get_rse_info(rse_name) except RSENotFound: logger.warning('%sCould not get info of RSE %s' % (log_prefix, rse_name)) continue if not rse['availability_read']: logger.info('%s%s is blacklisted for reading' % (log_prefix, rse_name)) continue force_scheme = item.get('force_scheme') try: protocols = rsemgr.get_protocols_ordered(rse, operation='read', scheme=force_scheme) protocols.reverse() except RSEProtocolNotSupported as error: logger.info( '%sThe protocol specfied (%s) is not supported by %s' % (log_prefix, force_scheme, rse_name)) logger.debug(error) continue logger.debug('%sPotential protocol(s) read: %s' % (log_prefix, protocols)) trace['remoteSite'] = rse_name trace['clientState'] = 'DOWNLOAD_ATTEMPT' # retry with different protocols on the given rse while not success and len(protocols): protocol = protocols.pop() cur_scheme = protocol['scheme'] trace['protocol'] = cur_scheme logger.info('%sTrying to download with %s from %s: %s ' % (log_prefix, cur_scheme, rse_name, did_str)) attempt = 0 retries = 2 # do some retries with the same rse and protocol if the download fails while not success and attempt < retries: attempt += 1 item['attemptnr'] = attempt try: start_time = time.time() rsemgr.download( rse, files=item, dest_dir=dest_dir_path, force_scheme=cur_scheme, ignore_checksum=item.get('ignore_checksum', False), transfer_timeout=item.get('transfer_timeout')) end_time = time.time() trace['transferStart'] = start_time trace['transferEnd'] = end_time trace['clientState'] = 'DONE' item['clientState'] = 'DONE' success = True except FileConsistencyMismatch as error: logger.warning(str(error)) try: pfn = item.get('pfn') if not pfn: pfns_dict = rsemgr.lfns2pfns(rse, lfns={ 'name': did_name, 'scope': did_scope }, operation='read', scheme=cur_scheme) pfn = pfns_dict[did_str] corrupted_item = copy.deepcopy(item) corrupted_item['clientState'] = 'FAIL_VALIDATE' corrupted_item['pfn'] = pfn # self.corrupted_files.append(corrupted_item) except Exception as error: logger.debug('%s%s' % (log_prefix, str(error))) trace['clientState'] = 'FAIL_VALIDATE' except Exception as error: logger.warning(str(error)) trace['clientState'] = str(type(error).__name__) if not success: logger.debug('%sFailed attempt %s/%s' % (log_prefix, attempt, retries)) send_trace(trace, self.client.host, self.user_agent) if not success: logger.error('%sFailed to download file %s' % (log_prefix, did_str)) item['clientState'] = 'FAILED' return item duration = round(end_time - start_time, 2) size = item.get('bytes') size_str = sizefmt(size, self.is_human_readable) if size and duration: rate = round((size / duration) * 1e-6, 2) logger.info( '%sFile %s successfully downloaded. %s in %s seconds = %s MBps' % (log_prefix, did_str, size_str, duration, rate)) else: logger.info('%sFile %s successfully downloaded in %s seconds' % (log_prefix, did_str, duration)) return item
def test_get_mgr_SourceNotFound_single_pfn(self): """(RSE/PROTOCOLS): Get a single file from storage providing PFN (SourceNotF ound)""" pfn = mgr.lfns2pfns(self.rse_settings, {'name': 'not_existing_data.raw', 'scope': 'user.%s' % self.user}).values()[0] mgr.download(self.rse_settings, {'name': 'not_existing_data.raw', 'scope': 'user.%s' % self.user, 'pfn': pfn}, self.tmpdir)
def test_get_mgr_SourceNotFound_single_lfn(self): """(RSE/PROTOCOLS): Get a single file from storage providing LFN (SourceNot Found)""" mgr.download(self.rse_settings, {'name': 'not_existing_data.raw', 'scope': 'user.%s' % self.user}, self.tmpdir)
def test_get_mgr_ok_single_pfn(self): """(RSE/PROTOCOLS): Get a single file from storage providing the PFN (Success)""" mgr.download(self.rse_settings, {'name': '1_rse_remote_get.raw', 'scope': 'user.%s' % self.user, 'pfn': self.static_file}, self.tmpdir)