def test_to_repair_a_rule_with_only_1_rse_whose_transfers_failed(self): """ JUDGE REPAIRER: Test to repair a rule with only 1 rse whose transfers failed (lock)""" rule_repairer(once=True) # Clean out the repairer scope = InternalScope('mock', **self.vo) files = create_files(4, scope, self.rse4_id, bytes=100) dataset = 'dataset_' + str(uuid()) add_did(scope, dataset, DIDType.DATASET, self.jdoe) attach_dids(scope, dataset, files, self.jdoe) rule_id = add_rule(dids=[{'scope': scope, 'name': dataset}], account=self.jdoe, copies=1, rse_expression=self.rse1, grouping='DATASET', weight=None, lifetime=None, locked=False, subscription_id=None)[0] successful_transfer(scope=scope, name=files[0]['name'], rse_id=get_replica_locks(scope=files[0]['scope'], name=files[2]['name'])[0].rse_id, nowait=False) successful_transfer(scope=scope, name=files[1]['name'], rse_id=get_replica_locks(scope=files[1]['scope'], name=files[2]['name'])[0].rse_id, nowait=False) failed_transfer(scope=scope, name=files[2]['name'], rse_id=get_replica_locks(scope=files[2]['scope'], name=files[2]['name'])[0].rse_id) failed_transfer(scope=scope, name=files[3]['name'], rse_id=get_replica_locks(scope=files[3]['scope'], name=files[3]['name'])[0].rse_id) cancel_request_did(scope=scope, name=files[2]['name'], dest_rse_id=get_replica_locks(scope=files[2]['scope'], name=files[2]['name'])[0].rse_id) cancel_request_did(scope=scope, name=files[3]['name'], dest_rse_id=get_replica_locks(scope=files[3]['scope'], name=files[2]['name'])[0].rse_id) assert(rule_id == get_rule(rule_id)['id'].replace('-', '').lower()) assert(RuleState.STUCK == get_rule(rule_id)['state']) rule_repairer(once=True) # Stil assert STUCK because of delays: assert(RuleState.STUCK == get_rule(rule_id)['state']) assert(get_replica_locks(scope=files[2]['scope'], name=files[2]['name'])[0].rse_id == get_replica_locks(scope=files[3]['scope'], name=files[3]['name'])[0].rse_id)
def test_to_repair_a_rule_with_only_1_rse_whose_transfers_failed(self): """ JUDGE REPAIRER: Test to repair a rule with only 1 rse whose transfers failed (lock)""" rule_repairer(once=True) # Clean out the repairer scope = 'mock' files = create_files(4, scope, self.rse4, bytes=100) dataset = 'dataset_' + str(uuid()) add_did(scope, dataset, DIDType.from_sym('DATASET'), 'jdoe') attach_dids(scope, dataset, files, 'jdoe') rule_id = add_rule(dids=[{'scope': scope, 'name': dataset}], account='jdoe', copies=1, rse_expression=self.rse1, grouping='DATASET', weight=None, lifetime=None, locked=False, subscription_id=None)[0] successful_transfer(scope=scope, name=files[0]['name'], rse_id=get_replica_locks(scope=files[0]['scope'], name=files[2]['name'])[0].rse_id, nowait=False) successful_transfer(scope=scope, name=files[1]['name'], rse_id=get_replica_locks(scope=files[1]['scope'], name=files[2]['name'])[0].rse_id, nowait=False) failed_transfer(scope=scope, name=files[2]['name'], rse_id=get_replica_locks(scope=files[2]['scope'], name=files[2]['name'])[0].rse_id) failed_transfer(scope=scope, name=files[3]['name'], rse_id=get_replica_locks(scope=files[3]['scope'], name=files[3]['name'])[0].rse_id) cancel_request_did(scope=scope, name=files[2]['name'], dest_rse_id=get_replica_locks(scope=files[2]['scope'], name=files[2]['name'])[0].rse_id) cancel_request_did(scope=scope, name=files[3]['name'], dest_rse_id=get_replica_locks(scope=files[3]['scope'], name=files[2]['name'])[0].rse_id) assert(rule_id == get_rule(rule_id)['id'].replace('-', '').lower()) assert(RuleState.STUCK == get_rule(rule_id)['state']) rule_repairer(once=True) # Stil assert STUCK because of delays: assert(RuleState.STUCK == get_rule(rule_id)['state']) assert(get_replica_locks(scope=files[2]['scope'], name=files[2]['name'])[0].rse_id == get_replica_locks(scope=files[3]['scope'], name=files[3]['name'])[0].rse_id)
def cancel_request_did(scope, name, dest_rse, request_type, issuer, account, vo='def'): """ Cancel a request based on a DID and request type. :param scope: Data identifier scope as a string. :param name: Data identifier name as a string. :param dest_rse: RSE name as a string. :param request_type: Type of the request as a string. :param issuer: Issuing account as a string. :param account: Account identifier as a string. :param vo: The VO to act on. """ dest_rse_id = get_rse_id(rse=dest_rse, vo=vo) kwargs = {'account': account, 'issuer': issuer} if not permission.has_permission( issuer=issuer, vo=vo, action='cancel_request_did', kwargs=kwargs): raise exception.AccessDenied( '%(account)s cannot cancel %(request_type)s request for %(scope)s:%(name)s' % locals()) scope = InternalScope(scope, vo=vo) return request.cancel_request_did(scope, name, dest_rse_id, request_type)
def cancel_request_did(scope, name, dest_rse, request_type, issuer, account): """ Cancel a request based on a DID and request type. :param scope: Data identifier scope as a string. :param name: Data identifier name as a string. :param dest_rse: RSE name as a string. :param request_type: Type of the request as a string. :param issuer: Issuing account as a string. :param account: Account identifier as a string. """ kwargs = {'account': account, 'issuer': issuer} if not permission.has_permission(issuer=issuer, action='cancel_request_did', kwargs=kwargs): raise exception.AccessDenied('%(account)s cannot cancel %(request_type)s request for %(scope)s:%(name)s' % locals()) return request.cancel_request_did(scope, name, dest_rse, request_type)
def submitter(once=False, rses=[], process=0, total_processes=1, thread=0, total_threads=1, mock=False, bulk=100, activities=None): """ Main loop to submit a new transfer primitive to a transfertool. """ logging.info('submitter starting - process (%i/%i) thread (%i/%i)' % (process, total_processes, thread, total_threads)) try: scheme = config_get('conveyor', 'scheme') except NoOptionError: scheme = 'srm' logging.info('submitter started - process (%i/%i) thread (%i/%i)' % (process, total_processes, thread, total_threads)) while not graceful_stop.is_set(): try: if activities is None: activities = [None] for activity in activities: if rses is None: rses = [None] for rse in rses: if rse: # run in rse list mode rse_info = rsemgr.get_rse_info(rse['rse']) logging.info("Working on RSE: %s" % rse['rse']) ts = time.time() reqs = get_requests(rse_id=rse['id'], process=process, total_processes=total_processes, thread=thread, total_threads=total_threads, mock=mock, bulk=bulk, activity=activity) record_timer('daemons.conveyor.submitter.get_requests', (time.time() - ts) * 1000) else: # no rse list, run FIFO mode rse_info = None ts = time.time() reqs = get_requests(process=process, total_processes=total_processes, thread=thread, total_threads=total_threads, mock=mock, bulk=bulk, activity=activity) record_timer('daemons.conveyor.submitter.get_requests', (time.time() - ts) * 1000) if reqs: logging.debug('%i:%i - submitting %i requests' % (process, thread, len(reqs))) if not reqs or reqs == []: time.sleep(1) continue for req in reqs: try: if not rse: # no rse list, in FIFO mode dest_rse = rse_core.get_rse(rse=None, rse_id=req['dest_rse_id']) rse_info = rsemgr.get_rse_info(dest_rse['rse']) ts = time.time() transfer = get_transfer(rse_info, req, scheme, mock) record_timer('daemons.conveyor.submitter.get_transfer', (time.time() - ts) * 1000) logging.debug('Transfer for request %s: %s' % (req['request_id'], transfer)) if transfer is None: logging.warn("Request %s DID %s:%s RSE %s failed to get transfer" % (req['request_id'], req['scope'], req['name'], rse_info['rse'])) # TODO: Merge these two calls request.set_request_state(req['request_id'], RequestState.LOST) # if the DID does not exist anymore request.archive_request(req['request_id']) continue ts = time.time() tmp_metadata = transfer['file_metadata'] eids = request.submit_transfers(transfers=[transfer, ], transfertool='fts3', job_metadata=tmp_metadata) record_timer('daemons.conveyor.submitter.submit_transfer', (time.time() - ts) * 1000) ts = time.time() if req['previous_attempt_id']: logging.info('COPYING RETRY %s REQUEST %s PREVIOUS %s DID %s:%s FROM %s TO %s USING %s with eid: %s' % (req['retry_count'], req['request_id'], req['previous_attempt_id'], req['scope'], req['name'], transfer['src_urls'], transfer['dest_urls'], eids[req['request_id']]['external_host'], eids[req['request_id']]['external_id'])) else: logging.info('COPYING REQUEST %s DID %s:%s FROM %s TO %s USING %s with eid: %s' % (req['request_id'], req['scope'], req['name'], transfer['src_urls'], transfer['dest_urls'], eids[req['request_id']]['external_host'], eids[req['request_id']]['external_id'])) record_counter('daemons.conveyor.submitter.submit_request') except UnsupportedOperation, e: # The replica doesn't exist, need to cancel the request logging.warning(e) logging.info('Cancelling transfer request %s' % req['request_id']) try: # TODO: for now, there is only ever one destination request.cancel_request_did(req['scope'], req['name'], transfer['dest_urls'][0]) except Exception, e: logging.warning('Cannot cancel request: %s' % str(e))