Beispiel #1
0
    def _downloader(self, pfn, protocol, human, input_queue, output_queue, user_agent, threadnb, total_threads, trace_endpoint, trace_pattern, transfer_timeout=None):

        rse_dict = {}
        thread_prefix = 'Thread %s/%s' % (threadnb, total_threads)
        while True:
            try:
                file = input_queue.get_nowait()
            except Empty:
                return

            dest_dir = file['dest_dir']
            file_scope = file['scope']
            file_name = file['name']
            file_didstr = '%s:%s' % (file_scope, file_name)

            # arguments for rsemgr.download already known
            dlfile = {}
            dlfile['name'] = file_name
            dlfile['scope'] = file_scope
            dlfile['adler32'] = file['adler32']
            ignore_checksum = True if pfn else False
            if pfn:
                dlfile['pfn'] = pfn

            logger.info('%s : Starting the download of %s' % (thread_prefix, file_didstr))
            trace = deepcopy(trace_pattern)
            trace.update({'scope': file_scope,
                          'filename': file_name,
                          'datasetScope': file['dataset_scope'],
                          'dataset': file['dataset_name'],
                          'filesize': file['bytes']})

            rses = list(file['rses'].keys())
            if rses == []:
                logger.warning('%s : File %s has no available replicas. Cannot be downloaded.' % (thread_prefix, file_didstr))
                trace['clientState'] = 'FILE_NOT_FOUND'
                self.send_trace(trace, trace_endpoint, user_agent)
                input_queue.task_done()
                continue
            random.shuffle(rses)

            logger.debug('%s : Potential sources : %s' % (thread_prefix, str(rses)))
            success = False
            while not success and len(rses):
                rse_name = rses.pop()
                if rse_name not in rse_dict:
                    try:
                        rse_dict[rse_name] = rsemgr.get_rse_info(rse_name)
                    except RSENotFound:
                        logger.warning('%s : Could not get info of RSE %s' % (thread_prefix, rse_name))
                        continue

                rse = rse_dict[rse_name]

                if not rse['availability_read']:
                    logger.info('%s : %s is blacklisted for reading' % (thread_prefix, rse_name))
                    continue

                try:
                    if pfn:
                        protocols = [rsemgr.select_protocol(rse, operation='read', scheme=pfn.split(':')[0])]
                    else:
                        protocols = rsemgr.get_protocols_ordered(rse, operation='read', scheme=protocol)
                        protocols.reverse()
                except RSEProtocolNotSupported as error:
                    logger.info('%s : The protocol specfied (%s) is not supported by %s' % (thread_prefix, protocol, rse_name))
                    logger.debug(error)
                    continue
                logger.debug('%s : %d possible protocol(s) for read' % (thread_prefix, len(protocols)))
                trace['remoteSite'] = rse_name
                trace['clientState'] = 'DOWNLOAD_ATTEMPT'

                while not success and len(protocols):
                    protocol_retry = protocols.pop()
                    logger.debug('%s : Trying protocol %s at %s' % (thread_prefix, protocol_retry['scheme'], rse_name))
                    trace['protocol'] = protocol_retry['scheme']
                    out = {}
                    out['dataset_scope'] = file['dataset_scope']
                    out['dataset_name'] = file['dataset_name']
                    out['scope'] = file_scope
                    out['name'] = file_name

                    attempt = 0
                    retries = 2
                    while not success and attempt < retries:
                        attempt += 1
                        out['attemptnr'] = attempt

                        logger.info('%s : File %s trying from %s' % (thread_prefix, file_didstr, rse_name))
                        try:
                            trace['transferStart'] = time.time()

                            rsemgr.download(rse,
                                            files=[dlfile],
                                            dest_dir=dest_dir,
                                            force_scheme=protocol_retry['scheme'],
                                            ignore_checksum=ignore_checksum,
                                            transfer_timeout=transfer_timeout)

                            trace['transferEnd'] = time.time()
                            trace['clientState'] = 'DONE'
                            out['clientState'] = 'DONE'
                            success = True
                            output_queue.put(out)
                            logger.info('%s : File %s successfully downloaded from %s' % (thread_prefix, file_didstr, rse_name))
                        except KeyboardInterrupt:
                            logger.warning('You pressed Ctrl+C! Exiting gracefully')
                            os.kill(os.getpgid(), signal.SIGINT)
                            return
                        except FileConsistencyMismatch as error:
                            logger.warning(str(error))
                            try:
                                pfns_dict = rsemgr.lfns2pfns(rse,
                                                             lfns=[{'name': file_name, 'scope': file_scope}],
                                                             operation='read',
                                                             scheme=protocol)
                                pfn = pfns_dict[file_didstr]

                                out['clientState'] = 'CORRUPTED'
                                out['pfn'] = pfn
                                output_queue.put(out)
                            except Exception as error:
                                logger.debug('%s : %s' % (thread_prefix, str(error)))
                            trace['clientState'] = 'FAIL_VALIDATE'
                            logger.debug('%s : Failed attempt %s/%s' % (thread_prefix, attempt, retries))
                        except Exception as error:
                            logger.warning(str(error))
                            trace['clientState'] = str(type(error).__name__)
                            logger.debug('%s : Failed attempt %s/%s' % (thread_prefix, attempt, retries))

                    self.send_trace(trace, trace_endpoint, user_agent, threadnb=threadnb, total_threads=total_threads)

            if success:
                duration = round(trace['transferEnd'] - trace['transferStart'], 2)
                if pfn:
                    logger.info('%s : File %s successfully downloaded in %s seconds' % (thread_prefix, file_didstr, duration))
                else:
                    logger.info('%s : File %s successfully downloaded. %s in %s seconds = %s MBps' % (thread_prefix,
                                                                                                      file_didstr,
                                                                                                      sizefmt(file['bytes'], human),
                                                                                                      duration,
                                                                                                      round((file['bytes'] / duration) * 1e-6, 2)))
            else:
                logger.error('%s : Cannot download file %s' % (thread_prefix, file_didstr))

            input_queue.task_done()
Beispiel #2
0
    def _download_item(self, item, trace, log_prefix=''):
        """
        Downloads the given item and sends traces for success/failure.
        (This function is meant to be used as class internal only)

        :param item: dictionary that describes the item to download
        :param trace: dictionary representing a pattern of trace that will be send
        :param log_prefix: string that will be put at the beginning of every log message

        :returns: dictionary with all attributes from the input item and a clientState attribute
        """
        logger = self.logger

        did_scope = item['scope']
        did_name = item['name']
        did_str = '%s:%s' % (did_scope, did_name)

        logger.info('%sPreparing download of %s' % (log_prefix, did_str))

        trace['scope'] = did_scope
        trace['filename'] = did_name
        trace.setdefault('datasetScope', item.get('dataset_scope', ''))
        trace.setdefault('dataset', item.get('dataset_name', ''))
        trace.setdefault('filesize', item.get('bytes'))

        # if file already exists, set state, send trace, and return
        dest_file_path = item['dest_file_path']
        if os.path.isfile(dest_file_path):
            logger.info('%sFile exists already locally: %s' % (log_prefix, did_str))
            item['clientState'] = 'ALREADY_DONE'

            trace['transferStart'] = time.time()
            trace['transferEnd'] = time.time()
            trace['clientState'] = 'ALREADY_DONE'
            send_trace(trace, self.client.host, self.client.user_agent)
            return item

        # check if file has replicas
        sources = item.get('sources')
        if not sources or not len(sources):
            logger.warning('%sNo available source found for file: %s' % (log_prefix, did_str))
            item['clientState'] = 'FILE_NOT_FOUND'

            trace['clientState'] = 'FILE_NOT_FOUND'
            send_trace(trace, self.client.host, self.client.user_agent)
            return item

        success = False
        # try different PFNs until one succeeded
        i = 0
        while not success and i < len(sources):
            pfn = sources[i]['pfn']
            rse_name = sources[i]['rse']
            i += 1
            scheme = pfn.split(':')[0]

            try:
                rse = rsemgr.get_rse_info(rse_name)
            except RSENotFound:
                logger.warning('%sCould not get info of RSE %s' % (log_prefix, rse_name))
                continue

            trace['remoteSite'] = rse_name
            trace['clientState'] = 'DOWNLOAD_ATTEMPT'
            trace['protocol'] = scheme

            logger.info('%sTrying to download with %s from %s: %s ' % (log_prefix, scheme, rse_name, did_str))

            try:
                protocol = rsemgr.create_protocol(rse, operation='read', scheme=scheme)
                protocol.connect()
            except Exception as error:
                logger.warning('%sFailed to create protocol for PFN: %s' % (log_prefix, pfn))
                logger.debug('scheme: %s, exception: %s' % (scheme, error))
                continue

            attempt = 0
            retries = 2
            # do some retries with the same PFN if the download fails
            while not success and attempt < retries:
                attempt += 1
                item['attemptnr'] = attempt

                temp_file_path = item['temp_file_path']
                if os.path.isfile(temp_file_path):
                    logger.debug('%sDeleting existing temporary file: %s' % (log_prefix, temp_file_path))
                    os.unlink(temp_file_path)

                start_time = time.time()

                try:
                    protocol.get(pfn, temp_file_path, transfer_timeout=item.get('transfer_timeout'))
                    success = True
                except Exception as error:
                    logger.debug(error)
                    trace['clientState'] = str(type(error).__name__)

                end_time = time.time()

                if success and not item.get('ignore_checksum', False):
                    rucio_checksum = item.get('adler32')
                    local_checksum = None
                    if not rucio_checksum:
                        rucio_checksum = item.get('md5')
                        local_checksum = md5(temp_file_path)
                    else:
                        local_checksum = adler32(temp_file_path)

                    if rucio_checksum != local_checksum:
                        success = False
                        os.unlink(temp_file_path)
                        logger.warning('%sChecksum validation failed for file: %s' % (log_prefix, did_str))
                        logger.debug('Local checksum: %s, Rucio checksum: %s' % (local_checksum, rucio_checksum))
                        try:
                            self.client.declare_suspicious_file_replicas([pfn], reason='Corrupted')
                        except Exception:
                            pass
                        trace['clientState'] = 'FAIL_VALIDATE'
                if not success:
                    logger.warning('%sDownload attempt failed. Try %s/%s' % (log_prefix, attempt, retries))
                    send_trace(trace, self.client.host, self.client.user_agent)

            protocol.close()

        if not success:
            logger.error('%sFailed to download file %s' % (log_prefix, did_str))
            item['clientState'] = 'FAILED'
            return item

        os.rename(temp_file_path, dest_file_path)

        trace['transferStart'] = start_time
        trace['transferEnd'] = end_time
        trace['clientState'] = 'DONE'
        item['clientState'] = 'DONE'
        send_trace(trace, self.client.host, self.client.user_agent)

        duration = round(end_time - start_time, 2)
        size = item.get('bytes')
        size_str = sizefmt(size, self.is_human_readable)
        if size and duration:
            rate = round((size / duration) * 1e-6, 2)
            logger.info('%sFile %s successfully downloaded. %s in %s seconds = %s MBps' % (log_prefix, did_str, size_str, duration, rate))
        else:
            logger.info('%sFile %s successfully downloaded in %s seconds' % (log_prefix, did_str, duration))
        return item
Beispiel #3
0
    def _download_items_aria2c(self, items, aria_rpc, rpc_auth, trace_custom_fields={}):
        """
        Uses aria2c to download the given items. Aria2c needs to be started
        as RPC background process first and a RPC proxy is needed.
        (This function is meant to be used as class internal only)

        :param items: list of dictionaries containing one dict for each file to download
        :param aria_rcp: RPCProxy to the aria2c process
        :param rpc_auth: the rpc authentication token
        :param trace_custom_fields: Custom key value pairs to send with the traces

        :returns: a list of dictionaries with an entry for each file, containing the input options, the did, and the clientState
        """
        logger = self.logger

        gid_to_item = {}  # maps an aria2c download id (gid) to the download item
        pfn_to_rse = {}
        items_to_queue = [item for item in items]

        # items get removed from gid_to_item when they are complete or failed
        while len(gid_to_item) or len(items_to_queue):
            num_queued = 0

            # queue up to 100 files and then check arias status
            while (num_queued < 100) and len(items_to_queue):
                item = items_to_queue.pop()

                file_scope = item['scope']
                file_name = item['name']
                file_did_str = '%s:%s' % (file_scope, file_name)
                trace = {'scope': file_scope,
                         'filename': file_name,
                         'datasetScope': item.get('dataset_scope', ''),
                         'dataset': item.get('dataset_name', ''),
                         'protocol': 'https',
                         'remoteSite': '',
                         'filesize': item.get('bytes', None),
                         'transferStart': time.time(),
                         'transferEnd': time.time()}
                trace.update(self.trace_tpl)
                trace.update(trace_custom_fields)

                # get pfns from all replicas
                pfns = []
                for src in item['sources']:
                    pfn = src['pfn']
                    if pfn[0:4].lower() == 'davs':
                        pfn = pfn.replace('davs', 'https', 1)
                    pfns.append(pfn)
                    pfn_to_rse[pfn] = src['rse']

                # does file exist and are sources available?
                if os.path.isfile(item['dest_file_path']):
                    logger.info('File exists already locally: %s' % file_did_str)
                    item['clientState'] = 'ALREADY_DONE'
                    trace['clientState'] = 'ALREADY_DONE'
                    send_trace(trace, self.client.host, self.client.user_agent)
                elif len(pfns) == 0:
                    logger.warning('No available source found for file: %s' % file_did_str)
                    item['clientState'] = 'FILE_NOT_FOUND'
                    trace['clientState'] = 'FILE_NOT_FOUND'
                    send_trace(trace, self.client.host, self.client.user_agent)
                else:
                    item['trace'] = trace
                    options = {'dir': item['dest_dir_path'],
                               'out': os.path.basename(item['temp_file_path'])}
                    gid = aria_rpc.aria2.addUri(rpc_auth, pfns, options)
                    gid_to_item[gid] = item
                    num_queued += 1
                    logger.debug('Queued file: %s' % file_did_str)

            # get some statistics
            aria_stat = aria_rpc.aria2.getGlobalStat(rpc_auth)
            num_active = int(aria_stat['numActive'])
            num_waiting = int(aria_stat['numWaiting'])
            num_stopped = int(aria_stat['numStoppedTotal'])

            # save start time if one of the active downloads has started
            active = aria_rpc.aria2.tellActive(rpc_auth, ['gid', 'completedLength'])
            for dlinfo in active:
                gid = dlinfo['gid']
                if int(dlinfo['completedLength']) > 0:
                    gid_to_item[gid].setdefault('transferStart', time.time())

            stopped = aria_rpc.aria2.tellStopped(rpc_auth, -1, num_stopped, ['gid', 'status', 'files'])
            for dlinfo in stopped:
                gid = dlinfo['gid']
                item = gid_to_item[gid]

                file_scope = item['scope']
                file_name = item['name']
                file_did_str = '%s:%s' % (file_scope, file_name)
                temp_file_path = item['temp_file_path']
                dest_file_path = item['dest_file_path']

                # ensure we didnt miss the active state (e.g. a very fast download)
                start_time = item.setdefault('transferStart', time.time())
                end_time = item.setdefault('transferEnd', time.time())

                # get used pfn for traces
                trace = item['trace']
                for uri in dlinfo['files'][0]['uris']:
                    if uri['status'].lower() == 'used':
                        trace['remoteSite'] = pfn_to_rse.get(uri['uri'], '')

                trace['transferStart'] = start_time
                trace['transferEnd'] = end_time

                # ensure file exists
                status = dlinfo.get('status', '').lower()
                if status == 'complete' and os.path.isfile(temp_file_path):
                    # checksum check
                    skip_check = item.get('ignore_checksum', False)
                    rucio_checksum = 0 if skip_check else item.get('adler32')
                    local_checksum = 0 if skip_check else adler32(temp_file_path)
                    if rucio_checksum == local_checksum:
                        item['clientState'] = 'DONE'
                        trace['clientState'] = 'DONE'
                        # remove .part ending
                        os.rename(temp_file_path, dest_file_path)

                        # calculate duration
                        duration = round(end_time - start_time, 2)
                        duration = max(duration, 0.01)  # protect against 0 division
                        size = item.get('bytes', 0)
                        rate = round((size / duration) * 1e-6, 2)
                        size_str = sizefmt(size, self.is_human_readable)
                        logger.info('File %s successfully downloaded. %s in %s seconds = %s MBps' % (file_did_str,
                                                                                                     size_str,
                                                                                                     duration,
                                                                                                     rate))
                    else:
                        os.unlink(temp_file_path)
                        logger.warning('Checksum validation failed for file: %s' % file_did_str)
                        logger.debug('Local checksum: %s, Rucio checksum: %s' % (local_checksum, rucio_checksum))
                        item['clientState'] = 'FAIL_VALIDATE'
                        trace['clientState'] = 'FAIL_VALIDATE'
                else:
                    logger.error('Failed to download file: %s' % file_did_str)
                    logger.debug('Aria2c status: %s' % status)
                    item['clientState'] = 'FAILED'
                    trace['clientState'] = 'DOWNLOAD_ATTEMPT'

                send_trace(trace, self.client.host, self.client.user_agent)
                del item['trace']

                aria_rpc.aria2.removeDownloadResult(rpc_auth, gid)
                del gid_to_item[gid]

            if len(stopped) > 0:
                logger.info('Active: %d, Waiting: %d, Stopped: %d' % (num_active, num_waiting, num_stopped))

        return items
Beispiel #4
0
    def _download_item(self, item, trace, log_prefix=''):
        """
        Downloads the given item and sends traces for success/failure.
        (This function is meant to be used as class internal only)

        :param item: dictionary that describes the item to download
        :param trace: dictionary representing a pattern of trace that will be send
        :param log_prefix: string that will be put at the beginning of every log message

        :returns: dictionary with all attributes from the input item and a clientState attribute
        """
        logger = self.logger

        did_scope = item['scope']
        did_name = item['name']
        did_str = '%s:%s' % (did_scope, did_name)

        logger.info('%sPreparing download of %s' % (log_prefix, did_str))

        trace['scope'] = did_scope
        trace['filename'] = did_name
        trace.setdefault('dataset_scope', item.get('dataset_scope', ''))
        trace.setdefault('dataset', item.get('dataset_name', ''))
        trace.setdefault('filesize', item.get('bytes'))

        # if file already exists, set state, send trace, and return
        dest_dir_path = item['dest_dir_path']
        dest_file_path = os.path.join(dest_dir_path, did_name)
        if os.path.isfile(dest_file_path):
            logger.info('%sFile exists already locally: %s' %
                        (log_prefix, did_str))
            item['clientState'] = 'ALREADY_DONE'

            trace['transferStart'] = time.time()
            trace['transferEnd'] = time.time()
            trace['clientState'] = 'ALREADY_DONE'
            send_trace(trace, self.client.host, self.user_agent)
            return item

        # check if file has replicas
        rse_names = list(item['rses'].keys())
        if not len(rse_names):
            logger.warning(
                '%sFile %s has no available replicas. Cannot be downloaded' %
                (log_prefix, did_str))
            item['clientState'] = 'FILE_NOT_FOUND'

            trace['clientState'] = 'FILE_NOT_FOUND'
            send_trace(trace, self.client.host, self.user_agent)
            return item

        # list_replicas order is: best rse at [0]
        rse_names.reverse()

        logger.debug('%sPotential sources: %s' % (log_prefix, str(rse_names)))

        success = False
        # retry with different rses if one is not available or fails
        while not success and len(rse_names):
            rse_name = rse_names.pop()
            try:
                rse = rsemgr.get_rse_info(rse_name)
            except RSENotFound:
                logger.warning('%sCould not get info of RSE %s' %
                               (log_prefix, rse_name))
                continue

            if not rse['availability_read']:
                logger.info('%s%s is blacklisted for reading' %
                            (log_prefix, rse_name))
                continue

            force_scheme = item.get('force_scheme')
            try:
                protocols = rsemgr.get_protocols_ordered(rse,
                                                         operation='read',
                                                         scheme=force_scheme)
                protocols.reverse()
            except RSEProtocolNotSupported as error:
                logger.info(
                    '%sThe protocol specfied (%s) is not supported by %s' %
                    (log_prefix, force_scheme, rse_name))
                logger.debug(error)
                continue

            logger.debug('%sPotential protocol(s) read: %s' %
                         (log_prefix, protocols))

            trace['remoteSite'] = rse_name
            trace['clientState'] = 'DOWNLOAD_ATTEMPT'

            # retry with different protocols on the given rse
            while not success and len(protocols):
                protocol = protocols.pop()
                cur_scheme = protocol['scheme']
                trace['protocol'] = cur_scheme

                logger.info('%sTrying to download with %s from %s: %s ' %
                            (log_prefix, cur_scheme, rse_name, did_str))

                attempt = 0
                retries = 2
                # do some retries with the same rse and protocol if the download fails
                while not success and attempt < retries:
                    attempt += 1
                    item['attemptnr'] = attempt

                    try:

                        start_time = time.time()
                        rsemgr.download(
                            rse,
                            files=item,
                            dest_dir=dest_dir_path,
                            force_scheme=cur_scheme,
                            ignore_checksum=item.get('ignore_checksum', False),
                            transfer_timeout=item.get('transfer_timeout'))
                        end_time = time.time()

                        trace['transferStart'] = start_time
                        trace['transferEnd'] = end_time
                        trace['clientState'] = 'DONE'
                        item['clientState'] = 'DONE'
                        success = True
                    except FileConsistencyMismatch as error:
                        logger.warning(str(error))
                        try:
                            pfn = item.get('pfn')
                            if not pfn:
                                pfns_dict = rsemgr.lfns2pfns(rse,
                                                             lfns={
                                                                 'name':
                                                                 did_name,
                                                                 'scope':
                                                                 did_scope
                                                             },
                                                             operation='read',
                                                             scheme=cur_scheme)
                                pfn = pfns_dict[did_str]

                            corrupted_item = copy.deepcopy(item)
                            corrupted_item['clientState'] = 'FAIL_VALIDATE'
                            corrupted_item['pfn'] = pfn
                            # self.corrupted_files.append(corrupted_item)
                        except Exception as error:
                            logger.debug('%s%s' % (log_prefix, str(error)))
                        trace['clientState'] = 'FAIL_VALIDATE'
                    except Exception as error:
                        logger.warning(str(error))
                        trace['clientState'] = str(type(error).__name__)

                    if not success:
                        logger.debug('%sFailed attempt %s/%s' %
                                     (log_prefix, attempt, retries))

                    send_trace(trace, self.client.host, self.user_agent)

        if not success:
            logger.error('%sFailed to download file %s' %
                         (log_prefix, did_str))
            item['clientState'] = 'FAILED'
            return item

        duration = round(end_time - start_time, 2)
        size = item.get('bytes')
        size_str = sizefmt(size, self.is_human_readable)
        if size and duration:
            rate = round((size / duration) * 1e-6, 2)
            logger.info(
                '%sFile %s successfully downloaded. %s in %s seconds = %s MBps'
                % (log_prefix, did_str, size_str, duration, rate))
        else:
            logger.info('%sFile %s successfully downloaded in %s seconds' %
                        (log_prefix, did_str, duration))
        return item