def _downloader(self, pfn, protocol, human, input_queue, output_queue, user_agent, threadnb, total_threads, trace_endpoint, trace_pattern, transfer_timeout=None): rse_dict = {} thread_prefix = 'Thread %s/%s' % (threadnb, total_threads) while True: try: file = input_queue.get_nowait() except Empty: return dest_dir = file['dest_dir'] file_scope = file['scope'] file_name = file['name'] file_didstr = '%s:%s' % (file_scope, file_name) # arguments for rsemgr.download already known dlfile = {} dlfile['name'] = file_name dlfile['scope'] = file_scope dlfile['adler32'] = file['adler32'] ignore_checksum = True if pfn else False if pfn: dlfile['pfn'] = pfn logger.info('%s : Starting the download of %s' % (thread_prefix, file_didstr)) trace = deepcopy(trace_pattern) trace.update({'scope': file_scope, 'filename': file_name, 'datasetScope': file['dataset_scope'], 'dataset': file['dataset_name'], 'filesize': file['bytes']}) rses = list(file['rses'].keys()) if rses == []: logger.warning('%s : File %s has no available replicas. Cannot be downloaded.' % (thread_prefix, file_didstr)) trace['clientState'] = 'FILE_NOT_FOUND' self.send_trace(trace, trace_endpoint, user_agent) input_queue.task_done() continue random.shuffle(rses) logger.debug('%s : Potential sources : %s' % (thread_prefix, str(rses))) success = False while not success and len(rses): rse_name = rses.pop() if rse_name not in rse_dict: try: rse_dict[rse_name] = rsemgr.get_rse_info(rse_name) except RSENotFound: logger.warning('%s : Could not get info of RSE %s' % (thread_prefix, rse_name)) continue rse = rse_dict[rse_name] if not rse['availability_read']: logger.info('%s : %s is blacklisted for reading' % (thread_prefix, rse_name)) continue try: if pfn: protocols = [rsemgr.select_protocol(rse, operation='read', scheme=pfn.split(':')[0])] else: protocols = rsemgr.get_protocols_ordered(rse, operation='read', scheme=protocol) protocols.reverse() except RSEProtocolNotSupported as error: logger.info('%s : The protocol specfied (%s) is not supported by %s' % (thread_prefix, protocol, rse_name)) logger.debug(error) continue logger.debug('%s : %d possible protocol(s) for read' % (thread_prefix, len(protocols))) trace['remoteSite'] = rse_name trace['clientState'] = 'DOWNLOAD_ATTEMPT' while not success and len(protocols): protocol_retry = protocols.pop() logger.debug('%s : Trying protocol %s at %s' % (thread_prefix, protocol_retry['scheme'], rse_name)) trace['protocol'] = protocol_retry['scheme'] out = {} out['dataset_scope'] = file['dataset_scope'] out['dataset_name'] = file['dataset_name'] out['scope'] = file_scope out['name'] = file_name attempt = 0 retries = 2 while not success and attempt < retries: attempt += 1 out['attemptnr'] = attempt logger.info('%s : File %s trying from %s' % (thread_prefix, file_didstr, rse_name)) try: trace['transferStart'] = time.time() rsemgr.download(rse, files=[dlfile], dest_dir=dest_dir, force_scheme=protocol_retry['scheme'], ignore_checksum=ignore_checksum, transfer_timeout=transfer_timeout) trace['transferEnd'] = time.time() trace['clientState'] = 'DONE' out['clientState'] = 'DONE' success = True output_queue.put(out) logger.info('%s : File %s successfully downloaded from %s' % (thread_prefix, file_didstr, rse_name)) except KeyboardInterrupt: logger.warning('You pressed Ctrl+C! Exiting gracefully') os.kill(os.getpgid(), signal.SIGINT) return except FileConsistencyMismatch as error: logger.warning(str(error)) try: pfns_dict = rsemgr.lfns2pfns(rse, lfns=[{'name': file_name, 'scope': file_scope}], operation='read', scheme=protocol) pfn = pfns_dict[file_didstr] out['clientState'] = 'CORRUPTED' out['pfn'] = pfn output_queue.put(out) except Exception as error: logger.debug('%s : %s' % (thread_prefix, str(error))) trace['clientState'] = 'FAIL_VALIDATE' logger.debug('%s : Failed attempt %s/%s' % (thread_prefix, attempt, retries)) except Exception as error: logger.warning(str(error)) trace['clientState'] = str(type(error).__name__) logger.debug('%s : Failed attempt %s/%s' % (thread_prefix, attempt, retries)) self.send_trace(trace, trace_endpoint, user_agent, threadnb=threadnb, total_threads=total_threads) if success: duration = round(trace['transferEnd'] - trace['transferStart'], 2) if pfn: logger.info('%s : File %s successfully downloaded in %s seconds' % (thread_prefix, file_didstr, duration)) else: logger.info('%s : File %s successfully downloaded. %s in %s seconds = %s MBps' % (thread_prefix, file_didstr, sizefmt(file['bytes'], human), duration, round((file['bytes'] / duration) * 1e-6, 2))) else: logger.error('%s : Cannot download file %s' % (thread_prefix, file_didstr)) input_queue.task_done()
def _download_item(self, item, trace, log_prefix=''): """ Downloads the given item and sends traces for success/failure. (This function is meant to be used as class internal only) :param item: dictionary that describes the item to download :param trace: dictionary representing a pattern of trace that will be send :param log_prefix: string that will be put at the beginning of every log message :returns: dictionary with all attributes from the input item and a clientState attribute """ logger = self.logger did_scope = item['scope'] did_name = item['name'] did_str = '%s:%s' % (did_scope, did_name) logger.info('%sPreparing download of %s' % (log_prefix, did_str)) trace['scope'] = did_scope trace['filename'] = did_name trace.setdefault('datasetScope', item.get('dataset_scope', '')) trace.setdefault('dataset', item.get('dataset_name', '')) trace.setdefault('filesize', item.get('bytes')) # if file already exists, set state, send trace, and return dest_file_path = item['dest_file_path'] if os.path.isfile(dest_file_path): logger.info('%sFile exists already locally: %s' % (log_prefix, did_str)) item['clientState'] = 'ALREADY_DONE' trace['transferStart'] = time.time() trace['transferEnd'] = time.time() trace['clientState'] = 'ALREADY_DONE' send_trace(trace, self.client.host, self.client.user_agent) return item # check if file has replicas sources = item.get('sources') if not sources or not len(sources): logger.warning('%sNo available source found for file: %s' % (log_prefix, did_str)) item['clientState'] = 'FILE_NOT_FOUND' trace['clientState'] = 'FILE_NOT_FOUND' send_trace(trace, self.client.host, self.client.user_agent) return item success = False # try different PFNs until one succeeded i = 0 while not success and i < len(sources): pfn = sources[i]['pfn'] rse_name = sources[i]['rse'] i += 1 scheme = pfn.split(':')[0] try: rse = rsemgr.get_rse_info(rse_name) except RSENotFound: logger.warning('%sCould not get info of RSE %s' % (log_prefix, rse_name)) continue trace['remoteSite'] = rse_name trace['clientState'] = 'DOWNLOAD_ATTEMPT' trace['protocol'] = scheme logger.info('%sTrying to download with %s from %s: %s ' % (log_prefix, scheme, rse_name, did_str)) try: protocol = rsemgr.create_protocol(rse, operation='read', scheme=scheme) protocol.connect() except Exception as error: logger.warning('%sFailed to create protocol for PFN: %s' % (log_prefix, pfn)) logger.debug('scheme: %s, exception: %s' % (scheme, error)) continue attempt = 0 retries = 2 # do some retries with the same PFN if the download fails while not success and attempt < retries: attempt += 1 item['attemptnr'] = attempt temp_file_path = item['temp_file_path'] if os.path.isfile(temp_file_path): logger.debug('%sDeleting existing temporary file: %s' % (log_prefix, temp_file_path)) os.unlink(temp_file_path) start_time = time.time() try: protocol.get(pfn, temp_file_path, transfer_timeout=item.get('transfer_timeout')) success = True except Exception as error: logger.debug(error) trace['clientState'] = str(type(error).__name__) end_time = time.time() if success and not item.get('ignore_checksum', False): rucio_checksum = item.get('adler32') local_checksum = None if not rucio_checksum: rucio_checksum = item.get('md5') local_checksum = md5(temp_file_path) else: local_checksum = adler32(temp_file_path) if rucio_checksum != local_checksum: success = False os.unlink(temp_file_path) logger.warning('%sChecksum validation failed for file: %s' % (log_prefix, did_str)) logger.debug('Local checksum: %s, Rucio checksum: %s' % (local_checksum, rucio_checksum)) try: self.client.declare_suspicious_file_replicas([pfn], reason='Corrupted') except Exception: pass trace['clientState'] = 'FAIL_VALIDATE' if not success: logger.warning('%sDownload attempt failed. Try %s/%s' % (log_prefix, attempt, retries)) send_trace(trace, self.client.host, self.client.user_agent) protocol.close() if not success: logger.error('%sFailed to download file %s' % (log_prefix, did_str)) item['clientState'] = 'FAILED' return item os.rename(temp_file_path, dest_file_path) trace['transferStart'] = start_time trace['transferEnd'] = end_time trace['clientState'] = 'DONE' item['clientState'] = 'DONE' send_trace(trace, self.client.host, self.client.user_agent) duration = round(end_time - start_time, 2) size = item.get('bytes') size_str = sizefmt(size, self.is_human_readable) if size and duration: rate = round((size / duration) * 1e-6, 2) logger.info('%sFile %s successfully downloaded. %s in %s seconds = %s MBps' % (log_prefix, did_str, size_str, duration, rate)) else: logger.info('%sFile %s successfully downloaded in %s seconds' % (log_prefix, did_str, duration)) return item
def _download_items_aria2c(self, items, aria_rpc, rpc_auth, trace_custom_fields={}): """ Uses aria2c to download the given items. Aria2c needs to be started as RPC background process first and a RPC proxy is needed. (This function is meant to be used as class internal only) :param items: list of dictionaries containing one dict for each file to download :param aria_rcp: RPCProxy to the aria2c process :param rpc_auth: the rpc authentication token :param trace_custom_fields: Custom key value pairs to send with the traces :returns: a list of dictionaries with an entry for each file, containing the input options, the did, and the clientState """ logger = self.logger gid_to_item = {} # maps an aria2c download id (gid) to the download item pfn_to_rse = {} items_to_queue = [item for item in items] # items get removed from gid_to_item when they are complete or failed while len(gid_to_item) or len(items_to_queue): num_queued = 0 # queue up to 100 files and then check arias status while (num_queued < 100) and len(items_to_queue): item = items_to_queue.pop() file_scope = item['scope'] file_name = item['name'] file_did_str = '%s:%s' % (file_scope, file_name) trace = {'scope': file_scope, 'filename': file_name, 'datasetScope': item.get('dataset_scope', ''), 'dataset': item.get('dataset_name', ''), 'protocol': 'https', 'remoteSite': '', 'filesize': item.get('bytes', None), 'transferStart': time.time(), 'transferEnd': time.time()} trace.update(self.trace_tpl) trace.update(trace_custom_fields) # get pfns from all replicas pfns = [] for src in item['sources']: pfn = src['pfn'] if pfn[0:4].lower() == 'davs': pfn = pfn.replace('davs', 'https', 1) pfns.append(pfn) pfn_to_rse[pfn] = src['rse'] # does file exist and are sources available? if os.path.isfile(item['dest_file_path']): logger.info('File exists already locally: %s' % file_did_str) item['clientState'] = 'ALREADY_DONE' trace['clientState'] = 'ALREADY_DONE' send_trace(trace, self.client.host, self.client.user_agent) elif len(pfns) == 0: logger.warning('No available source found for file: %s' % file_did_str) item['clientState'] = 'FILE_NOT_FOUND' trace['clientState'] = 'FILE_NOT_FOUND' send_trace(trace, self.client.host, self.client.user_agent) else: item['trace'] = trace options = {'dir': item['dest_dir_path'], 'out': os.path.basename(item['temp_file_path'])} gid = aria_rpc.aria2.addUri(rpc_auth, pfns, options) gid_to_item[gid] = item num_queued += 1 logger.debug('Queued file: %s' % file_did_str) # get some statistics aria_stat = aria_rpc.aria2.getGlobalStat(rpc_auth) num_active = int(aria_stat['numActive']) num_waiting = int(aria_stat['numWaiting']) num_stopped = int(aria_stat['numStoppedTotal']) # save start time if one of the active downloads has started active = aria_rpc.aria2.tellActive(rpc_auth, ['gid', 'completedLength']) for dlinfo in active: gid = dlinfo['gid'] if int(dlinfo['completedLength']) > 0: gid_to_item[gid].setdefault('transferStart', time.time()) stopped = aria_rpc.aria2.tellStopped(rpc_auth, -1, num_stopped, ['gid', 'status', 'files']) for dlinfo in stopped: gid = dlinfo['gid'] item = gid_to_item[gid] file_scope = item['scope'] file_name = item['name'] file_did_str = '%s:%s' % (file_scope, file_name) temp_file_path = item['temp_file_path'] dest_file_path = item['dest_file_path'] # ensure we didnt miss the active state (e.g. a very fast download) start_time = item.setdefault('transferStart', time.time()) end_time = item.setdefault('transferEnd', time.time()) # get used pfn for traces trace = item['trace'] for uri in dlinfo['files'][0]['uris']: if uri['status'].lower() == 'used': trace['remoteSite'] = pfn_to_rse.get(uri['uri'], '') trace['transferStart'] = start_time trace['transferEnd'] = end_time # ensure file exists status = dlinfo.get('status', '').lower() if status == 'complete' and os.path.isfile(temp_file_path): # checksum check skip_check = item.get('ignore_checksum', False) rucio_checksum = 0 if skip_check else item.get('adler32') local_checksum = 0 if skip_check else adler32(temp_file_path) if rucio_checksum == local_checksum: item['clientState'] = 'DONE' trace['clientState'] = 'DONE' # remove .part ending os.rename(temp_file_path, dest_file_path) # calculate duration duration = round(end_time - start_time, 2) duration = max(duration, 0.01) # protect against 0 division size = item.get('bytes', 0) rate = round((size / duration) * 1e-6, 2) size_str = sizefmt(size, self.is_human_readable) logger.info('File %s successfully downloaded. %s in %s seconds = %s MBps' % (file_did_str, size_str, duration, rate)) else: os.unlink(temp_file_path) logger.warning('Checksum validation failed for file: %s' % file_did_str) logger.debug('Local checksum: %s, Rucio checksum: %s' % (local_checksum, rucio_checksum)) item['clientState'] = 'FAIL_VALIDATE' trace['clientState'] = 'FAIL_VALIDATE' else: logger.error('Failed to download file: %s' % file_did_str) logger.debug('Aria2c status: %s' % status) item['clientState'] = 'FAILED' trace['clientState'] = 'DOWNLOAD_ATTEMPT' send_trace(trace, self.client.host, self.client.user_agent) del item['trace'] aria_rpc.aria2.removeDownloadResult(rpc_auth, gid) del gid_to_item[gid] if len(stopped) > 0: logger.info('Active: %d, Waiting: %d, Stopped: %d' % (num_active, num_waiting, num_stopped)) return items
def _download_item(self, item, trace, log_prefix=''): """ Downloads the given item and sends traces for success/failure. (This function is meant to be used as class internal only) :param item: dictionary that describes the item to download :param trace: dictionary representing a pattern of trace that will be send :param log_prefix: string that will be put at the beginning of every log message :returns: dictionary with all attributes from the input item and a clientState attribute """ logger = self.logger did_scope = item['scope'] did_name = item['name'] did_str = '%s:%s' % (did_scope, did_name) logger.info('%sPreparing download of %s' % (log_prefix, did_str)) trace['scope'] = did_scope trace['filename'] = did_name trace.setdefault('dataset_scope', item.get('dataset_scope', '')) trace.setdefault('dataset', item.get('dataset_name', '')) trace.setdefault('filesize', item.get('bytes')) # if file already exists, set state, send trace, and return dest_dir_path = item['dest_dir_path'] dest_file_path = os.path.join(dest_dir_path, did_name) if os.path.isfile(dest_file_path): logger.info('%sFile exists already locally: %s' % (log_prefix, did_str)) item['clientState'] = 'ALREADY_DONE' trace['transferStart'] = time.time() trace['transferEnd'] = time.time() trace['clientState'] = 'ALREADY_DONE' send_trace(trace, self.client.host, self.user_agent) return item # check if file has replicas rse_names = list(item['rses'].keys()) if not len(rse_names): logger.warning( '%sFile %s has no available replicas. Cannot be downloaded' % (log_prefix, did_str)) item['clientState'] = 'FILE_NOT_FOUND' trace['clientState'] = 'FILE_NOT_FOUND' send_trace(trace, self.client.host, self.user_agent) return item # list_replicas order is: best rse at [0] rse_names.reverse() logger.debug('%sPotential sources: %s' % (log_prefix, str(rse_names))) success = False # retry with different rses if one is not available or fails while not success and len(rse_names): rse_name = rse_names.pop() try: rse = rsemgr.get_rse_info(rse_name) except RSENotFound: logger.warning('%sCould not get info of RSE %s' % (log_prefix, rse_name)) continue if not rse['availability_read']: logger.info('%s%s is blacklisted for reading' % (log_prefix, rse_name)) continue force_scheme = item.get('force_scheme') try: protocols = rsemgr.get_protocols_ordered(rse, operation='read', scheme=force_scheme) protocols.reverse() except RSEProtocolNotSupported as error: logger.info( '%sThe protocol specfied (%s) is not supported by %s' % (log_prefix, force_scheme, rse_name)) logger.debug(error) continue logger.debug('%sPotential protocol(s) read: %s' % (log_prefix, protocols)) trace['remoteSite'] = rse_name trace['clientState'] = 'DOWNLOAD_ATTEMPT' # retry with different protocols on the given rse while not success and len(protocols): protocol = protocols.pop() cur_scheme = protocol['scheme'] trace['protocol'] = cur_scheme logger.info('%sTrying to download with %s from %s: %s ' % (log_prefix, cur_scheme, rse_name, did_str)) attempt = 0 retries = 2 # do some retries with the same rse and protocol if the download fails while not success and attempt < retries: attempt += 1 item['attemptnr'] = attempt try: start_time = time.time() rsemgr.download( rse, files=item, dest_dir=dest_dir_path, force_scheme=cur_scheme, ignore_checksum=item.get('ignore_checksum', False), transfer_timeout=item.get('transfer_timeout')) end_time = time.time() trace['transferStart'] = start_time trace['transferEnd'] = end_time trace['clientState'] = 'DONE' item['clientState'] = 'DONE' success = True except FileConsistencyMismatch as error: logger.warning(str(error)) try: pfn = item.get('pfn') if not pfn: pfns_dict = rsemgr.lfns2pfns(rse, lfns={ 'name': did_name, 'scope': did_scope }, operation='read', scheme=cur_scheme) pfn = pfns_dict[did_str] corrupted_item = copy.deepcopy(item) corrupted_item['clientState'] = 'FAIL_VALIDATE' corrupted_item['pfn'] = pfn # self.corrupted_files.append(corrupted_item) except Exception as error: logger.debug('%s%s' % (log_prefix, str(error))) trace['clientState'] = 'FAIL_VALIDATE' except Exception as error: logger.warning(str(error)) trace['clientState'] = str(type(error).__name__) if not success: logger.debug('%sFailed attempt %s/%s' % (log_prefix, attempt, retries)) send_trace(trace, self.client.host, self.user_agent) if not success: logger.error('%sFailed to download file %s' % (log_prefix, did_str)) item['clientState'] = 'FAILED' return item duration = round(end_time - start_time, 2) size = item.get('bytes') size_str = sizefmt(size, self.is_human_readable) if size and duration: rate = round((size / duration) * 1e-6, 2) logger.info( '%sFile %s successfully downloaded. %s in %s seconds = %s MBps' % (log_prefix, did_str, size_str, duration, rate)) else: logger.info('%sFile %s successfully downloaded in %s seconds' % (log_prefix, did_str, duration)) return item