def test_put_mgr_ok_multi(self):
        """(RSE/PROTOCOLS): Put multiple files to storage (Success)"""

        if self.rse_settings['protocols'][0]['hostname'] == 'ssh1':
            result = mgr.upload(self.rse_settings, [{'name': '1_rse_local_put.raw', 'scope': 'user.%s' % self.user,
                                                     'md5': md5(str(self.tmpdir) + '/1_rse_local_put.raw'),
                                                     'filesize': os.stat('%s/1_rse_local_put.raw' % self.tmpdir)[
                                                         os.path.stat.ST_SIZE]},
                                                    {'name': '2_rse_local_put.raw', 'scope': 'user.%s' % self.user,
                                                     'md5': md5(str(self.tmpdir) + '/2_rse_local_put.raw'),
                                                     'filesize': os.stat('%s/2_rse_local_put.raw' % self.tmpdir)[
                                                         os.path.stat.ST_SIZE]}], source_dir=self.tmpdir, vo=self.vo,
                                impl=self.impl)
        else:
            result = mgr.upload(self.rse_settings, [{'name': '1_rse_local_put.raw', 'scope': 'user.%s' % self.user,
                                                     'adler32': adler32('%s/1_rse_local_put.raw' % self.tmpdir),
                                                     'filesize': os.stat('%s/1_rse_local_put.raw' % self.tmpdir)[
                                                         os.path.stat.ST_SIZE]},
                                                    {'name': '2_rse_local_put.raw', 'scope': 'user.%s' % self.user,
                                                     'adler32': adler32('%s/2_rse_local_put.raw' % self.tmpdir),
                                                     'filesize': os.stat('%s/2_rse_local_put.raw' % self.tmpdir)[
                                                         os.path.stat.ST_SIZE]}], source_dir=self.tmpdir, vo=self.vo)

        status = result[0]
        details = result[1]
        if not (status and details['user.%s:1_rse_local_put.raw' % self.user] and details['user.%s:2_rse_local_put.raw' % self.user]):
            raise Exception('Return not as expected: %s, %s' % (status, details))
Exemple #2
0
 def test_put_mgr_ok_single(self):
     """(RSE/PROTOCOLS): Put a single file to storage (Success)"""
     if self.rse_settings['protocols'][0]['hostname'] == 'ssh1':
         mgr.upload(self.rse_settings, {
             'name':
             '3_rse_local_put.raw',
             'scope':
             'user.%s' % self.user,
             'md5':
             md5('%s/3_rse_local_put.raw' % self.tmpdir),
             'filesize':
             os.stat('%s/3_rse_local_put.raw' %
                     self.tmpdir)[os.path.stat.ST_SIZE]
         },
                    source_dir=self.tmpdir,
                    vo=self.vo,
                    impl=self.impl)
     else:
         mgr.upload(self.rse_settings, {
             'name':
             '3_rse_local_put.raw',
             'scope':
             'user.%s' % self.user,
             'adler32':
             adler32('%s/3_rse_local_put.raw' % self.tmpdir),
             'filesize':
             os.stat('%s/3_rse_local_put.raw' %
                     self.tmpdir)[os.path.stat.ST_SIZE]
         },
                    source_dir=self.tmpdir,
                    vo=self.vo)
Exemple #3
0
 def test_put_mgr_SourceNotFound_single(self):
     """(RSE/PROTOCOLS): Put a single file to storage (SourceNotFound)"""
     mgr.upload(self.rse_settings, {
         'name': 'not_existing_data2.raw',
         'scope': 'user.%s' % self.user,
         'adler32': 'random_stuff',
         'filesize': 0
     },
                source_dir=self.tmpdir)
Exemple #4
0
 def test_put_mgr_FileReplicaAlreadyExists_single(self):
     """(RSE/PROTOCOLS): Put a single file to storage (FileReplicaAlreadyExists)"""
     mgr.upload(self.rse_settings, {
         'name': '1_rse_remote_get.raw',
         'scope': 'user.%s' % self.user,
         'adler32': 'bla-bla',
         'filesize': 4711
     },
                source_dir=self.tmpdir)
 def test_delete_mgr_ok_multi(self):
     """MOCK (RSE/PROTOCOLS): Delete multiple files from storage (Success)"""
     for fichier in MgrTestCases.files_remote:
         mgr.upload(mgr.get_rse_info(self.rse_id), [
             {
                 'name': fichier,
                 'scope': 'user.%s' % self.user
             },
         ])
     self.mtc.test_delete_mgr_ok_multi()
Exemple #6
0
 def test_get_mgr_SourceNotFound_single_pfn(self):
     """MOCK (RSE/PROTOCOLS): Get a single file from storage providing PFN (SourceNotFound)"""
     for fichier in MgrTestCases.files_remote:
         mgr.upload(mgr.get_rse_info(self.rse_id), [
             {
                 'name': fichier,
                 'scope': 'user.%s' % self.user
             },
         ])
     self.mtc.test_get_mgr_SourceNotFound_single_pfn()
 def test_get_mgr_SourceNotFound_multi(self):
     """MOCK (RSE/PROTOCOLS): Get multiple files from storage providing LFNs and PFNs (SourceNotFound)"""
     for fichier in MgrTestCases.files_remote:
         mgr.upload(mgr.get_rse_info(self.rse_id), [
             {
                 'name': fichier,
                 'scope': 'user.%s' % self.user
             },
         ])
     with pytest.raises(exception.SourceNotFound):
         self.mtc.test_get_mgr_SourceNotFound_multi()
Exemple #8
0
 def test_put_mgr_ok_single(self):
     """(RSE/PROTOCOLS): Put a single file to storage (Success)"""
     mgr.upload(self.rse_settings, {
         'name':
         '3_rse_local_put.raw',
         'scope':
         'user.%s' % self.user,
         'adler32':
         adler32('%s/3_rse_local_put.raw' % self.tmpdir),
         'filesize':
         os.stat(
             '%s/3_rse_local_put.raw' % self.tmpdir)[os.path.stat.ST_SIZE]
     },
                source_dir=self.tmpdir)
Exemple #9
0
 def test_put_mgr_ok_multi(self):
     """(RSE/PROTOCOLS): Put multiple files to storage (Success)"""
     result = mgr.upload(self.rse_settings, [{
         'name':
         '1_rse_local_put.raw',
         'scope':
         'user.%s' % self.user,
         'adler32':
         adler32('%s/1_rse_local_put.raw' % self.tmpdir),
         'filesize':
         os.stat(
             '%s/1_rse_local_put.raw' % self.tmpdir)[os.path.stat.ST_SIZE]
     }, {
         'name':
         '2_rse_local_put.raw',
         'scope':
         'user.%s' % self.user,
         'adler32':
         adler32('%s/2_rse_local_put.raw' % self.tmpdir),
         'filesize':
         os.stat(
             '%s/2_rse_local_put.raw' % self.tmpdir)[os.path.stat.ST_SIZE]
     }],
                         source_dir=self.tmpdir)
     status = result[0]
     details = result[1]
     if not (status and details['user.%s:1_rse_local_put.raw' % self.user]
             and details['user.%s:2_rse_local_put.raw' % self.user]):
         raise Exception('Return not as expected: %s, %s' %
                         (status, details))
Exemple #10
0
 def test_put_mgr_SourceNotFound_multi(self):
     """(RSE/PROTOCOLS): Put multiple files to storage (SourceNotFound)"""
     result = mgr.upload(self.rse_settings, [{
         'name': 'not_existing_data.raw',
         'scope': 'user.%s' % self.user,
         'adler32': 'some_random_stuff',
         'filesize': 4711
     }, {
         'name':
         '4_rse_local_put.raw',
         'scope':
         'user.%s' % self.user,
         'adler32':
         adler32('%s/4_rse_local_put.raw' % self.tmpdir),
         'filesize':
         os.stat(
             '%s/4_rse_local_put.raw' % self.tmpdir)[os.path.stat.ST_SIZE]
     }],
                         source_dir=self.tmpdir)
     status = result[0]
     details = result[1]
     if details['user.%s:4_rse_local_put.raw' % self.user]:
         raise details['user.%s:not_existing_data.raw' % self.user]
     else:
         raise Exception('Return not as expected: %s, %s' %
                         (status, details))
Exemple #11
0
 def test_put_mgr_FileReplicaAlreadyExists_multi(self):
     """(RSE/PROTOCOLS): Put multiple files to storage (FileReplicaAlreadyExists)"""
     status, details = mgr.upload(self.rse_settings, [{'name': '1_rse_remote_get.raw', 'scope': 'user.%s' % self.user, 'adler32': "bla-bla", 'filesize': 4711},
                                                      {'name': '2_rse_remote_get.raw', 'scope': 'user.%s' % self.user, 'adler32': "bla-bla", 'filesize': 4711}],
                                  self.tmpdir)
     if details['user.%s:1_rse_remote_get.raw' % self.user]:
         raise details['user.%s:2_rse_remote_get.raw' % self.user]
     else:
         raise Exception('Return not as expected: %s, %s' % (status, details))
Exemple #12
0
 def test_put_mgr_FileReplicaAlreadyExists_multi(self):
     """(RSE/PROTOCOLS): Put multiple files to storage (FileReplicaAlreadyExists)"""
     status, details = mgr.upload(self.rse_settings, [{'name': '1_rse_remote_get.raw', 'scope': 'user.%s' % self.user, 'adler32': "bla-bla", 'filesize': 4711},
                                                      {'name': '2_rse_remote_get.raw', 'scope': 'user.%s' % self.user, 'adler32': "bla-bla", 'filesize': 4711}],
                                  self.tmpdir)
     if details['user.%s:1_rse_remote_get.raw' % self.user]:
         raise details['user.%s:2_rse_remote_get.raw' % self.user]
     else:
         raise Exception('Return not as expected: %s, %s' % (status, details))
Exemple #13
0
 def test_put_mgr_ok_multi(self):
     """(RSE/PROTOCOLS): Put multiple files to storage (Success)"""
     status, details = mgr.upload(self.rse_settings, [{'name': '1_rse_local_put.raw', 'scope': 'user.%s' % self.user,
                                                       'adler32': adler32('%s/1_rse_local_put.raw' % self.tmpdir), 'filesize': os.stat('%s/1_rse_local_put.raw' % self.tmpdir)[os.path.stat.ST_SIZE]},
                                                      {'name': '2_rse_local_put.raw', 'scope': 'user.%s' % self.user,
                                                       'adler32': adler32('%s/2_rse_local_put.raw' % self.tmpdir), 'filesize': os.stat('%s/2_rse_local_put.raw' % self.tmpdir)[os.path.stat.ST_SIZE]}],
                                  self.tmpdir)
     if not (status and details['user.%s:1_rse_local_put.raw' % self.user] and details['user.%s:2_rse_local_put.raw' % self.user]):
         raise Exception('Return not as expected: %s, %s' % (status, details))
Exemple #14
0
 def test_put_mgr_SourceNotFound_multi(self):
     """(RSE/PROTOCOLS): Put multiple files to storage (SourceNotFound)"""
     status, details = mgr.upload(self.rse_settings, [{'name': 'not_existing_data.raw', 'scope': 'user.%s' % self.user,
                                                       'adler32': 'some_random_stuff', 'filesize': 4711},
                                                      {'name': '4_rse_local_put.raw', 'scope': 'user.%s' % self.user,
                                                       'adler32': adler32('%s/4_rse_local_put.raw' % self.tmpdir), 'filesize': os.stat('%s/4_rse_local_put.raw' % self.tmpdir)[os.path.stat.ST_SIZE]}],
                                  self.tmpdir)
     if details['user.%s:4_rse_local_put.raw' % self.user]:
         raise details['user.%s:not_existing_data.raw' % self.user]
     else:
         raise Exception('Return not as expected: %s, %s' % (status, details))
Exemple #15
0
def upload(files, scope, metadata, rse, account, source_dir, worker_number, total_workers, dataset_lifetime, did=None, set_metadata=False):
    logging.debug('In upload')
    dsn = None
    if did:
        dsn = {'scope': did.split(':')[0], 'name': did.split(':')[1]}
    client = Client()

    list_files = []
    lfns = []
    prepend_str = 'Thread [%i/%i] : ' % (worker_number, total_workers)
    logging.debug(prepend_str + 'Looping over the files')
    for filename in files:
        fullpath = '%s/%s' % (source_dir, filename)
        size = stat(fullpath).st_size
        checksum = adler32(fullpath)
        logging.info(prepend_str + 'File %s : Size %s , adler32 %s' % (fullpath, str(size), checksum))
        list_files.append({'scope': scope, 'name': filename, 'bytes': size, 'adler32': checksum, 'meta': {'guid': generate_uuid()}})
        lfns.append({'name': filename, 'scope': scope, 'filesize': size, 'adler32': checksum, 'filename': filename})

    # Physical upload
    logging.info(prepend_str + 'Uploading physically the files %s on %s' % (str(lfns), rse))
    rse_info = rsemgr.get_rse_info(rse)
    try:
        success_upload = True
        for cnt in xrange(0, 3):
            global_status, ret = rsemgr.upload(rse_info, lfns=lfns, source_dir=source_dir)
            logging.info(prepend_str + 'Returned global status : %s, Returned : %s' % (str(global_status), str(ret)))
            if not global_status:
                for item in ret:
                    if (not isinstance(ret[item], FileReplicaAlreadyExists)) and ret[item] is not True:
                        sleep(exp(cnt))
                        success_upload = False
                        logging.error(prepend_str + 'Problem to upload file %s with error %s' % (item, str(ret[item])))
                        break
            else:
                break
        if not success_upload:
            logging.error(prepend_str + 'Upload operation to %s failed, removing leftovers' % (rse))
            rsemgr.delete(rse_info, lfns=lfns)
            return False
    except Exception, error:
        logging.error(prepend_str + '%s' % (str(error)))
        return False
Exemple #16
0
def upload(files, scope, metadata, rse, account, source_dir, worker_number, total_workers, dataset_lifetime, did=None):
    logging.debug('In upload')
    dsn = None
    if did:
        dsn = {'scope': did.split(':')[0], 'name': did.split(':')[1]}
    client = Client()

    list_files = []
    lfns = []
    logging.debug('Thread [%i/%i] : Looping over the files' % (worker_number, total_workers))
    for filename in files:
        fullpath = '%s/%s' % (source_dir, filename)
        size = stat(fullpath).st_size
        checksum = adler32(fullpath)
        logging.info('Thread [%i/%i] : File %s : Size %s , adler32 %s' % (worker_number, total_workers, fullpath, str(size), checksum))
        list_files.append({'scope': scope, 'name': filename, 'bytes': size, 'adler32': checksum, 'meta': {'guid': generate_uuid()}})
        lfns.append({'name': filename, 'scope': scope, 'filesize': size, 'adler32': checksum})

    # Physical upload
    logging.info('Thread [%i/%i] : Uploading physically the files %s on %s' % (worker_number, total_workers, str(lfns), rse))
    rse_info = rsemgr.get_rse_info(rse)
    try:
        success_upload = True
        for i in xrange(0, 3):
            gs, ret = rsemgr.upload(rse_info, lfns=lfns, source_dir=source_dir)
            logging.info('Returned global status : %s, Returned : %s' % (str(gs), str(ret)))
            if not gs:
                for x in ret:
                    if (not isinstance(ret[x], FileReplicaAlreadyExists)) and ret[x] is not True:
                        sleep(exp(i))
                        success_upload = False
                        logging.error('Problem to upload file %s with error %s' % (x, str(ret[x])))
                        break
            else:
                break
        if not success_upload:
            logging.error('Thread [%i/%i] : Upload operation to %s failed, removing leftovers' % (worker_number, total_workers, rse))
            rsemgr.delete(rse_info, lfns=lfns)
            return False
    except Exception, e:
        return False
 def test_get_mgr_SourceNotFound_single_pfn(self):
     """MOCK (RSE/PROTOCOLS): Get a single file from storage providing PFN (SourceNotFound)"""
     for f in MgrTestCases.files_remote:
         mgr.upload(mgr.get_rse_info(self.rse_id), [{'name': f, 'scope': 'user.%s' % self.user}, ])
     self.mtc.test_get_mgr_SourceNotFound_single_pfn()
Exemple #18
0
 def test_put_mgr_ok_single(self):
     """(RSE/PROTOCOLS): Put a single file to storage (Success)"""
     mgr.upload(self.rse_settings, {'name': '3_rse_local_put.raw', 'scope': 'user.%s' % self.user,
                                    'adler32': adler32('%s/3_rse_local_put.raw' % self.tmpdir), 'filesize': os.stat('%s/3_rse_local_put.raw' % self.tmpdir)[os.path.stat.ST_SIZE]}, self.tmpdir)
Exemple #19
0
 def test_put_mgr_FileReplicaAlreadyExists_single(self):
     """(RSE/PROTOCOLS): Put a single file to storage (FileReplicaAlreadyExists)"""
     mgr.upload(self.rse_settings, {'name': '1_rse_remote_get.raw', 'scope': 'user.%s' % self.user, 'adler32': 'bla-bla', 'filesize': 4711}, self.tmpdir)
Exemple #20
0
    def upload(self, items, summary_file_path=None):
        """

        :param items: List of dictionaries. Each dictionary describing a file to upload. Keys:
            path             - path of the file that will be uploaded
            rse              - rse name (e.g. 'CERN-PROD_DATADISK') where to upload the file
            did_scope        - Optional: custom did scope (Default: user.<account>)
            did_name         - Optional: custom did name (Default: name of the file)
            dataset_scope    - Optional: custom dataset scope
            dataset_name     - Optional: custom dataset name
            force_scheme     - Optional: force a specific scheme (if PFN upload this will be overwritten) (Default: None)
            pfn              - Optional: use a given PFN (this sets no_register to True)
            no_register      - Optional: if True, the file will not be registered in the rucio catalogue
            lifetime         - Optional: the lifetime of the file after it was uploaded
            transfer_timeout - Optional: time after the upload will be aborted
            guid             - Optional: guid of the file
        :param summary_file_path: Optional: a path where a summary in form of a json file will be stored

        :returns: 0 on success

        :raises InputValidationError: if any input arguments are in a wrong format
        :raises RSEBlacklisted: if a given RSE is not available for writing
        :raises NoFilesUploaded: if no files were successfully uploaded
        :raises NotAllFilesUploaded: if not all files were successfully uploaded
        """
        logger = self.logger

        self.trace['uuid'] = generate_uuid()

        # check given sources, resolve dirs into files, and collect meta infos
        files = self._collect_and_validate_file_info(items)

        # check if RSE of every file is available for writing
        # and cache rse settings
        registered_dataset_dids = set()
        registered_file_dids = set()
        for file in files:
            rse = file['rse']
            if not self.rses.get(rse):
                rse_settings = self.rses.setdefault(rse,
                                                    rsemgr.get_rse_info(rse))
                if rse_settings['availability_write'] != 1:
                    raise RSEBlacklisted(
                        '%s is blacklisted for writing. No actions have been taken'
                        % rse)

            dataset_scope = file.get('dataset_scope')
            dataset_name = file.get('dataset_name')
            if dataset_scope and dataset_name:
                dataset_did_str = ('%s:%s' % (dataset_scope, dataset_name))
                file['dataset_did_str'] = dataset_did_str
                registered_dataset_dids.add(dataset_did_str)

            registered_file_dids.add('%s:%s' %
                                     (file['did_scope'], file['did_name']))

        wrong_dids = registered_file_dids.intersection(registered_dataset_dids)
        if len(wrong_dids):
            raise InputValidationError(
                'DIDs used to address both files and datasets: %s' %
                str(wrong_dids))

        # clear this set again to ensure that we only try to register datasets once
        registered_dataset_dids = set()
        num_succeeded = 0
        for file in files:
            basename = file['basename']
            logger.info('Preparing upload for file %s' % basename)

            no_register = file.get('no_register')
            pfn = file.get('pfn')
            force_scheme = file.get('force_scheme')

            self.trace['scope'] = file['did_scope']
            self.trace['datasetScope'] = file.get('dataset_scope', '')
            self.trace['dataset'] = file.get('dataset_name', '')
            self.trace['remoteSite'] = rse
            self.trace['filesize'] = file['bytes']

            file_did = {'scope': file['did_scope'], 'name': file['did_name']}
            dataset_did_str = file.get('dataset_did_str')

            if not no_register:
                self._register_file(file, registered_dataset_dids)

            rse = file['rse']
            rse_settings = self.rses[rse]
            # if file already exists on RSE we're done
            if rsemgr.exists(rse_settings, file_did):
                logger.info('File already exists on RSE. Skipping upload')
                continue

            protocols = rsemgr.get_protocols_ordered(rse_settings=rse_settings,
                                                     operation='write',
                                                     scheme=force_scheme)
            protocols.reverse()
            success = False
            summary = []
            while not success and len(protocols):
                protocol = protocols.pop()
                cur_scheme = protocol['scheme']
                logger.info('Trying upload with %s to %s' % (cur_scheme, rse))
                lfn = {}
                lfn['filename'] = basename
                lfn['scope'] = file['did_scope']
                lfn['name'] = file['did_name']
                lfn['adler32'] = file['adler32']
                lfn['filesize'] = file['bytes']

                self.trace['protocol'] = cur_scheme
                self.trace['transferStart'] = time.time()
                try:
                    state = rsemgr.upload(
                        rse_settings=rse_settings,
                        lfns=lfn,
                        source_dir=file['dirname'],
                        force_scheme=cur_scheme,
                        force_pfn=pfn,
                        transfer_timeout=file.get('transfer_timeout'))
                    success = True
                    file['upload_result'] = state
                except (ServiceUnavailable,
                        ResourceTemporaryUnavailable) as error:
                    logger.warning('Upload attempt failed')
                    logger.debug('Exception: %s' % str(error))

            if success:
                num_succeeded += 1
                self.trace['transferEnd'] = time.time()
                self.trace['clientState'] = 'DONE'
                file['state'] = 'A'
                logger.info('Successfully uploaded file %s' % basename)
                send_trace(self.trace, self.client.host,
                           self.client.user_agent)

                if summary_file_path:
                    summary.append(copy.deepcopy(file))

                # add file to dataset if needed
                if dataset_did_str and not no_register:
                    try:
                        self.client.attach_dids(file['dataset_scope'],
                                                file['dataset_name'],
                                                [file_did])
                    except Exception as error:
                        logger.warning('Failed to attach file to the dataset')
                        logger.debug(error)
                if not no_register:
                    replica_for_api = self._convert_file_for_api(file)
                    if not self.client.update_replicas_states(
                            rse, files=[replica_for_api]):
                        logger.warning('Failed to update replica state')
            else:
                logger.error('Failed to upload file %s' % basename)

        if summary_file_path:
            final_summary = {}
            for file in summary:
                file_scope = file['did_scope']
                file_name = file['did_name']
                file_did_str = '%s:%s' % (file_scope, file_name)
                final_summary[file_did_str] = {
                    'scope': file['scope'],
                    'name': file['name'],
                    'bytes': file['bytes'],
                    'rse': file['rse'],
                    'pfn': file['upload_result']['pfn'],
                    'guid': file['meta']['guid'],
                    'adler32': file['adler32'],
                    'md5': file['md5']
                }
            with open(summary_file_path, 'wb') as summary_file:
                json.dump(final_summary,
                          summary_file,
                          sort_keys=True,
                          indent=1)

        if num_succeeded == 0:
            raise NoFilesUploaded()
        elif num_succeeded != len(files):
            raise NotAllFilesUploaded()
        return 0
Exemple #21
0
def upload(files,
           scope,
           metadata,
           rse,
           account,
           source_dir,
           dataset_lifetime,
           did=None,
           set_metadata=False,
           logger=logging.log):
    logger(logging.DEBUG, 'In upload')
    dsn = None
    if did:
        dsn = {'scope': did.split(':')[0], 'name': did.split(':')[1]}
    client = Client()

    list_files = []
    lfns = []
    for filename in files:
        physical_fname = filename
        if physical_fname.find('/') > -1:
            physical_fname = "".join(filename.split('/'))
        fullpath = '%s/%s' % (source_dir, physical_fname)
        size = stat(fullpath).st_size
        checksum = adler32(fullpath)
        logger(logging.INFO, 'File %s : Size %s , adler32 %s', fullpath,
               str(size), checksum)
        list_files.append({
            'scope': scope,
            'name': filename,
            'bytes': size,
            'adler32': checksum,
            'meta': {
                'guid': generate_uuid()
            }
        })
        lfns.append({
            'name': filename,
            'scope': scope,
            'filesize': size,
            'adler32': checksum,
            'filename': physical_fname
        })

    # Physical upload
    logger(logging.INFO, 'Uploading physically the files %s on %s', str(lfns),
           rse)
    rse_info = rsemgr.get_rse_info(rse, vo=client.vo)
    try:
        success_upload = True
        for cnt in range(0, 3):
            rows = rsemgr.upload(rse_info,
                                 lfns=lfns,
                                 source_dir=source_dir,
                                 logger=logger)
            # temporary hack
            global_status, ret = rows['success'], rows[1]
            logger(logging.INFO, 'Returned global status : %s, Returned : %s',
                   str(global_status), str(ret))
            if not global_status:
                for item in ret:
                    if (not isinstance(ret[item], FileReplicaAlreadyExists)
                        ) and ret[item] is not True:
                        sleep(exp(cnt))
                        success_upload = False
                        logger(logging.ERROR,
                               'Problem to upload file %s with error %s', item,
                               str(ret[item]))
                        break
            else:
                break
        if not success_upload:
            logger(logging.ERROR,
                   'Upload operation to %s failed, removing leftovers', rse)
            rsemgr.delete(rse_info, lfns=lfns)
            return False
    except Exception as error:
        logger(logging.DEBUG, "Exception", exc_info=True)
        logger(logging.ERROR, '%s', str(error))
        return False
    logger(logging.INFO, 'Files successfully copied on %s', rse)

    # Registering DIDs and replicas in Rucio
    logger(logging.INFO, 'Registering DIDs and replicas in Rucio')
    meta = metadata
    if not set_metadata:
        meta = None
    if dsn:
        try:
            client.add_dataset(scope=dsn['scope'],
                               name=dsn['name'],
                               rules=[{
                                   'account': account,
                                   'copies': 1,
                                   'rse_expression': rse,
                                   'grouping': 'DATASET',
                                   'activity': 'Functional Test'
                               }],
                               meta=meta,
                               lifetime=dataset_lifetime)
            client.add_files_to_dataset(scope=dsn['scope'],
                                        name=dsn['name'],
                                        files=list_files,
                                        rse=rse)
            logger(logging.INFO, 'Upload operation for %s:%s done',
                   dsn['scope'], dsn['name'])
        except Exception as error:
            logger(logging.DEBUG, "Exception", exc_info=True)
            logger(logging.ERROR, 'Failed to upload %s', str(list_files))
            logger(logging.ERROR, '%s', str(error))
            logger(logging.ERROR, 'removing files from the Storage')
            rsemgr.delete(rse_info, lfns=lfns)
            return False
    else:
        logger(logging.WARNING, 'No dsn is specified')
        try:
            client.add_replicas(files=list_files, rse=rse)
            client.add_replication_rule(list_files,
                                        copies=1,
                                        rse_expression=rse,
                                        activity='Functional Test')
            logger(logging.INFO, 'Upload operation for %s done',
                   str(list_files))
        except Exception as error:
            logger(logging.DEBUG, "Exception", exc_info=True)
            logger(logging.ERROR, 'Failed to upload %s', str(list_files))
            logger(logging.ERROR, '%s', str(error))
            logger(logging.ERROR, 'Removing files from the Storage')
            rsemgr.delete(rse_info, lfns=lfns)
            return False
    return True
 def test_delete_mgr_ok_single(self):
     """MOCK (RSE/PROTOCOLS): Delete a single file from storage (Success)"""
     for f in MgrTestCases.files_remote:
         mgr.upload(mgr.get_rse_info(self.rse_id), [{'name': f, 'scope': 'user.%s' % self.user}, ])
     self.mtc.test_delete_mgr_ok_single()
Exemple #23
0
def upload(files, scope, metadata, rse, account, source_dir, worker_number, total_workers, dataset_lifetime, did=None, set_metadata=False):
    logging.debug('In upload')
    dsn = None
    if did:
        dsn = {'scope': did.split(':')[0], 'name': did.split(':')[1]}
    client = Client()

    list_files = []
    lfns = []
    prepend_str = 'Thread [%i/%i] : ' % (worker_number, total_workers)
    logging.debug(prepend_str + 'Looping over the files')
    for filename in files:
        fullpath = '%s/%s' % (source_dir, filename)
        size = stat(fullpath).st_size
        checksum = adler32(fullpath)
        logging.info(prepend_str + 'File %s : Size %s , adler32 %s' % (fullpath, str(size), checksum))
        list_files.append({'scope': scope, 'name': filename, 'bytes': size, 'adler32': checksum, 'meta': {'guid': generate_uuid()}})
        lfns.append({'name': filename, 'scope': scope, 'filesize': size, 'adler32': checksum, 'filename': filename})

    # Physical upload
    logging.info(prepend_str + 'Uploading physically the files %s on %s' % (str(lfns), rse))
    rse_info = rsemgr.get_rse_info(rse)
    try:
        success_upload = True
        for cnt in range(0, 3):
            rows = rsemgr.upload(rse_info, lfns=lfns, source_dir=source_dir)
            # temporary hack
            global_status, ret = rows['success'], rows[1]
            logging.info(prepend_str + 'Returned global status : %s, Returned : %s' % (str(global_status), str(ret)))
            if not global_status:
                for item in ret:
                    if (not isinstance(ret[item], FileReplicaAlreadyExists)) and ret[item] is not True:
                        sleep(exp(cnt))
                        success_upload = False
                        logging.error(prepend_str + 'Problem to upload file %s with error %s' % (item, str(ret[item])))
                        break
            else:
                break
        if not success_upload:
            logging.error(prepend_str + 'Upload operation to %s failed, removing leftovers' % (rse))
            rsemgr.delete(rse_info, lfns=lfns)
            return False
    except Exception as error:
        logging.debug(traceback.format_exc())
        logging.error(prepend_str + '%s' % (str(error)))
        return False
    logging.info(prepend_str + 'Files successfully copied on %s' % (rse))

    # Registering DIDs and replicas in Rucio
    logging.info(prepend_str + 'Registering DIDs and replicas in Rucio')
    meta = metadata
    if not set_metadata:
        meta = None
    if dsn:
        try:
            client.add_dataset(scope=dsn['scope'], name=dsn['name'], rules=[{'account': account, 'copies': 1, 'rse_expression': rse, 'grouping': 'DATASET', 'activity': 'Functional Test'}], meta=meta, lifetime=dataset_lifetime)
            client.add_files_to_dataset(scope=dsn['scope'], name=dsn['name'], files=list_files, rse=rse)
            logging.info(prepend_str + 'Upload operation for %s:%s done' % (dsn['scope'], dsn['name']))
        except Exception as error:
            logging.debug(traceback.format_exc())
            logging.error(prepend_str + 'Failed to upload %(files)s' % locals())
            logging.error(prepend_str + '%s' % (str(error)))
            logging.error(prepend_str + 'Removing files from the Storage')
            rsemgr.delete(rse_info, lfns=lfns)
            return False
    else:
        logging.warning(prepend_str + 'No dsn is specified')
        try:
            client.add_replicas(files=list_files, rse=rse)
            client.add_replication_rule(list_files, copies=1, rse_expression=rse, activity='Functional Test')
            logging.info(prepend_str + 'Upload operation for %s done' % (str(list_files)))
        except Exception as error:
            logging.debug(traceback.format_exc())
            logging.error(prepend_str + 'Failed to upload %(files)s' % locals())
            logging.error(prepend_str + '%s' % (str(error)))
            logging.error(prepend_str + 'Removing files from the Storage')
            rsemgr.delete(rse_info, lfns=lfns)
            return False
    return True
Exemple #24
0
    def upload(self, sources_with_settings, summary_file_path=None):
        """
        List of dictionaries of file descriptions. None means optional
        [{'path': 'file1',
          'rse': 'rse_name1',
          'did_scope': None,
          'did_name': None,
          'dataset_name': None,
          'dataset_scope': None,
          'scheme': None,
          'pfn': None,
          'no_register': None,
          'lifetime': None },

         {'path': 'file2',
          'rse': 'rse_name2',
          'did_scope': None,
          'did_name': None,
          'dataset_name': None,
          'dataset_scope': None,
          'scheme': None,
          'pfn': None,
          'no_register': None,
          'lifetime': None }]

          raises InputValidationError
          raises RSEBlacklisted
        """
        logger = self.logger

        self.trace['uuid'] = generate_uuid()

        # check given sources, resolve dirs into files, and collect meta infos
        files = self.collect_and_validate_file_info(sources_with_settings)

        # check if RSE of every file is available for writing
        # and cache rse settings
        registered_dataset_dids = set()
        registered_file_dids = set()
        for file in files:
            rse = file['rse']
            if not self.rses.get(rse):
                rse_settings = self.rses.setdefault(rse, rsemgr.get_rse_info(rse))
                if rse_settings['availability_write'] != 1:
                    raise RSEBlacklisted('%s is blacklisted for writing. No actions have been taken' % rse)

            dataset_scope = file.get('dataset_scope')
            dataset_name = file.get('dataset_name')
            if dataset_scope and dataset_name:
                dataset_did_str = ('%s:%s' % (dataset_scope, dataset_name))
                file['dataset_did_str'] = dataset_did_str
                registered_dataset_dids.add(dataset_did_str)

            registered_file_dids.add('%s:%s' % (file['did_scope'], file['did_name']))

        wrong_dids = registered_file_dids.intersection(registered_dataset_dids)
        if len(wrong_dids):
            raise InputValidationError('DIDs used to address both files and datasets: %s' % str(wrong_dids))

        # clear this set again to ensure that we only try to register datasets once
        registered_dataset_dids = set()
        for file in files:
            basename = file['basename']
            logger.info('Preparing upload for file %s' % basename)

            no_register = file.get('no_register')
            pfn = file.get('pfn')
            scheme = file.get('scheme')

            self.trace['scope'] = file['did_scope']
            self.trace['datasetScope'] = file.get('dataset_scope', '')
            self.trace['dataset'] = file.get('dataset_name', '')
            self.trace['remoteSite'] = rse
            self.trace['filesize'] = file['bytes']

            file_scope = file['did_scope']
            file_name = file['did_name']
            file_did = {'scope': file_scope, 'name': file_name}
            file_did_str = '%s:%s' % (file_scope, file_name)
            dataset_did_str = file.get('dataset_did_str')

            rse = file['rse']
            rse_settings = self.rses[rse]

            # register a dataset if we need to
            if dataset_did_str and dataset_did_str not in registered_dataset_dids and not no_register:
                registered_dataset_dids.add(dataset_did_str)
                try:
                    self.client.add_dataset(scope=file['dataset_scope'],
                                            name=file['dataset_name'],
                                            rules=[{'account': self.account,
                                                    'copies': 1,
                                                    'rse_expression': rse,
                                                    'grouping': 'DATASET',
                                                    'lifetime': file['lifetime']}])
                    logger.info('Dataset %s successfully created' % dataset_did_str)
                except DataIdentifierAlreadyExists:
                    # TODO: Need to check the rules thing!!
                    logger.info("Dataset %s already exists" % dataset_did_str)

            replica_for_api = self.convert_file_for_api(file)
            try:
                # if the remote checksum is different this did must not be used
                meta = self.client.get_metadata(file_scope, file_name)
                logger.info('Comparing checksums of %s and %s' % (basename, file_did_str))
                if meta['adler32'] != file['adler32']:
                    logger.error('Local checksum %s does not match remote checksum %s' % (file['adler32'], meta['adler32']))
                    raise DataIdentifierAlreadyExists

                # add file to rse if it is not registered yet
                replicastate = list(self.client.list_replicas([file_did], all_states=True))
                if rse not in replicastate[0]['rses'] and not no_register:
                    logger.info('Adding replica at %s in Rucio catalog' % rse)
                    self.client.add_replicas(rse=file['rse'], files=[replica_for_api])
            except DataIdentifierNotFound:
                if not no_register:
                    logger.info('Adding replica at %s in Rucio catalog' % rse)
                    self.client.add_replicas(rse=file['rse'], files=[replica_for_api])
                    if not dataset_did_str:
                        # only need to add rules for files if no dataset is given
                        logger.info('Adding replication rule at %s' % rse)
                        self.client.add_replication_rule([file_did], copies=1, rse_expression=rse, lifetime=file['lifetime'])

            # if file already exists on RSE we're done
            if not rsemgr.exists(rse_settings, file_did):
                protocols = rsemgr.get_protocols_ordered(rse_settings=rse_settings, operation='write', scheme=scheme)
                protocols.reverse()
                success = False
                summary = []
                while not success and len(protocols):
                    protocol = protocols.pop()
                    logger.info('Trying upload to %s with protocol %s' % (rse, protocol['scheme']))
                    lfn = {}
                    lfn['filename'] = file['basename']
                    lfn['scope'] = file['did_scope']
                    lfn['name'] = file['did_name']
                    lfn['adler32'] = file['adler32']
                    lfn['filesize'] = file['bytes']

                    self.trace['protocol'] = protocol['scheme']
                    self.trace['transferStart'] = time.time()
                    try:
                        state = rsemgr.upload(rse_settings=rse_settings,
                                              lfns=lfn,
                                              source_dir=file['dirname'],
                                              force_scheme=protocol['scheme'],
                                              force_pfn=pfn)
                        success = True
                        file['upload_result'] = state
                    except (ServiceUnavailable, ResourceTemporaryUnavailable) as error:
                        logger.warning('Upload attempt failed')
                        logger.debug('Exception: %s' % str(error))

                if success:
                    self.trace['transferEnd'] = time.time()
                    self.trace['clientState'] = 'DONE'
                    file['state'] = 'A'
                    logger.info('File %s successfully uploaded' % basename)
                    send_trace(self.trace, self.client.host, self.user_agent, logger=logger)
                    if summary_file_path:
                        summary.append(copy.deepcopy(file))
                else:
                    logger.error('Failed to upload file %s' % basename)
                    # TODO trace?
                    continue  # skip attach_did and update_states for this file
            else:
                logger.info('File already exists on RSE. Skipped upload')

            if not no_register:
                # add file to dataset if needed
                if dataset_did_str:
                    try:
                        logger.info('Attaching file to dataset %s' % dataset_did_str)
                        self.client.attach_dids(file['dataset_scope'], file['dataset_name'], [file_did])
                    except Exception as error:
                        logger.warning('Failed to attach file to the dataset')
                        logger.warning(error)

                logger.info('Setting replica state to available')
                replica_for_api = self.convert_file_for_api(file)
                self.client.update_replicas_states(rse, files=[replica_for_api])

        if summary_file_path:
            final_summary = {}
            for file in summary:
                file_scope = file['did_scope']
                file_name = file['did_name']
                file_did_str = '%s:%s' % (file_scope, file_name)
                final_summary[file_did_str] = {'scope': file['scope'],
                                               'name': file['name'],
                                               'bytes': file['bytes'],
                                               'rse': file['rse'],
                                               'pfn': file['upload_result']['pfn'],
                                               'guid': file['meta']['guid'],
                                               'adler32': file['adler32'],
                                               'md5': file['md5']}
            with open(summary_file_path, 'wb') as summary_file:
                json.dump(final_summary, summary_file, sort_keys=True, indent=1)
Exemple #25
0
    def upload(self, items, summary_file_path=None, traces_copy_out=None):
        """
        :param items: List of dictionaries. Each dictionary describing a file to upload. Keys:
            path                  - path of the file that will be uploaded
            rse                   - rse name (e.g. 'CERN-PROD_DATADISK') where to upload the file
            did_scope             - Optional: custom did scope (Default: user.<account>)
            did_name              - Optional: custom did name (Default: name of the file)
            dataset_scope         - Optional: custom dataset scope
            dataset_name          - Optional: custom dataset name
            force_scheme          - Optional: force a specific scheme (if PFN upload this will be overwritten) (Default: None)
            pfn                   - Optional: use a given PFN (this sets no_register to True, and no_register becomes mandatory)
            no_register           - Optional: if True, the file will not be registered in the rucio catalogue
            register_after_upload - Optional: if True, the file will be registered after successful upload
            lifetime              - Optional: the lifetime of the file after it was uploaded
            transfer_timeout      - Optional: time after the upload will be aborted
            guid                  - Optional: guid of the file
        :param summary_file_path: Optional: a path where a summary in form of a json file will be stored
        :param traces_copy_out: reference to an external list, where the traces should be uploaded

        :returns: 0 on success

        :raises InputValidationError: if any input arguments are in a wrong format
        :raises RSEBlacklisted: if a given RSE is not available for writing
        :raises NoFilesUploaded: if no files were successfully uploaded
        :raises NotAllFilesUploaded: if not all files were successfully uploaded
        """
        logger = self.logger

        self.trace['uuid'] = generate_uuid()

        # check given sources, resolve dirs into files, and collect meta infos
        files = self._collect_and_validate_file_info(items)

        # check if RSE of every file is available for writing
        # and cache rse settings
        registered_dataset_dids = set()
        registered_file_dids = set()
        for file in files:
            rse = file['rse']
            if not self.rses.get(rse):
                rse_settings = self.rses.setdefault(rse,
                                                    rsemgr.get_rse_info(rse))
                if rse_settings['availability_write'] != 1:
                    raise RSEBlacklisted(
                        '%s is blacklisted for writing. No actions have been taken'
                        % rse)

            dataset_scope = file.get('dataset_scope')
            dataset_name = file.get('dataset_name')
            if dataset_scope and dataset_name:
                dataset_did_str = ('%s:%s' % (dataset_scope, dataset_name))
                file['dataset_did_str'] = dataset_did_str
                registered_dataset_dids.add(dataset_did_str)

            registered_file_dids.add('%s:%s' %
                                     (file['did_scope'], file['did_name']))
        wrong_dids = registered_file_dids.intersection(registered_dataset_dids)
        if len(wrong_dids):
            raise InputValidationError(
                'DIDs used to address both files and datasets: %s' %
                str(wrong_dids))

        # clear this set again to ensure that we only try to register datasets once
        registered_dataset_dids = set()
        num_succeeded = 0
        summary = []
        for file in files:
            basename = file['basename']
            logger.info('Preparing upload for file %s' % basename)

            no_register = file.get('no_register')
            register_after_upload = file.get(
                'register_after_upload') and not no_register
            pfn = file.get('pfn')
            force_scheme = file.get('force_scheme')
            delete_existing = False

            trace = copy.deepcopy(self.trace)
            # appending trace to list reference, if the reference exists
            if traces_copy_out is not None:
                traces_copy_out.append(trace)

            trace['scope'] = file['did_scope']
            trace['datasetScope'] = file.get('dataset_scope', '')
            trace['dataset'] = file.get('dataset_name', '')
            trace['remoteSite'] = rse
            trace['filesize'] = file['bytes']

            file_did = {'scope': file['did_scope'], 'name': file['did_name']}
            dataset_did_str = file.get('dataset_did_str')
            rse = file['rse']
            rse_settings = self.rses[rse]
            rse_sign_service = rse_settings.get('sign_url', None)
            is_deterministic = rse_settings.get('deterministic', True)
            if not is_deterministic and not pfn:
                logger.error(
                    'PFN has to be defined for NON-DETERMINISTIC RSE.')
                continue
            if pfn and is_deterministic:
                logger.warning(
                    'Upload with given pfn implies that no_register is True, except non-deterministic RSEs'
                )
                no_register = True

            # resolving local area networks
            domain = 'wan'
            rse_attributes = {}
            try:
                rse_attributes = self.client.list_rse_attributes(rse)
            except:
                logger.warning('Attributes of the RSE: %s not available.' %
                               rse)
            if (self.client_location and 'lan' in rse_settings['domain']
                    and 'site' in rse_attributes):
                if self.client_location['site'] == rse_attributes['site']:
                    domain = 'lan'

            if not no_register and not register_after_upload:
                self._register_file(file, registered_dataset_dids)
            # if register_after_upload, file should be overwritten if it is not registered
            # otherwise if file already exists on RSE we're done
            if register_after_upload:
                if rsemgr.exists(rse_settings,
                                 pfn if pfn else file_did,
                                 domain=domain,
                                 auth_token=self.auth_token,
                                 logger=logger):
                    try:
                        self.client.get_did(file['did_scope'],
                                            file['did_name'])
                        logger.info(
                            'File already registered. Skipping upload.')
                        trace['stateReason'] = 'File already exists'
                        continue
                    except DataIdentifierNotFound:
                        logger.info(
                            'File already exists on RSE. Previous left overs will be overwritten.'
                        )
                        delete_existing = True
            elif not is_deterministic and not no_register:
                if rsemgr.exists(rse_settings,
                                 pfn,
                                 domain=domain,
                                 auth_token=self.auth_token):
                    logger.info(
                        'File already exists on RSE with given pfn. Skipping upload. Existing replica has to be removed first.'
                    )
                    trace['stateReason'] = 'File already exists'
                    continue
                elif rsemgr.exists(rse_settings,
                                   file_did,
                                   domain=domain,
                                   auth_token=self.auth_token):
                    logger.info(
                        'File already exists on RSE with different pfn. Skipping upload.'
                    )
                    trace['stateReason'] = 'File already exists'
                    continue
            else:
                if rsemgr.exists(rse_settings,
                                 pfn if pfn else file_did,
                                 domain=domain,
                                 auth_token=self.auth_token):
                    logger.info('File already exists on RSE. Skipping upload')
                    trace['stateReason'] = 'File already exists'
                    continue

            # protocol handling and upload
            protocols = rsemgr.get_protocols_ordered(rse_settings=rse_settings,
                                                     operation='write',
                                                     scheme=force_scheme,
                                                     domain=domain)
            protocols.reverse()
            success = False
            state_reason = ''
            while not success and len(protocols):
                protocol = protocols.pop()
                cur_scheme = protocol['scheme']
                logger.info('Trying upload with %s to %s' % (cur_scheme, rse))
                lfn = {}
                lfn['filename'] = basename
                lfn['scope'] = file['did_scope']
                lfn['name'] = file['did_name']

                for checksum_name in GLOBALLY_SUPPORTED_CHECKSUMS:
                    if checksum_name in file:
                        lfn[checksum_name] = file[checksum_name]

                lfn['filesize'] = file['bytes']

                sign_service = None
                if cur_scheme == 'https':
                    sign_service = rse_sign_service

                trace['protocol'] = cur_scheme
                trace['transferStart'] = time.time()
                try:
                    state = rsemgr.upload(
                        rse_settings=rse_settings,
                        lfns=lfn,
                        domain=domain,
                        source_dir=file['dirname'],
                        force_scheme=cur_scheme,
                        force_pfn=pfn,
                        transfer_timeout=file.get('transfer_timeout'),
                        delete_existing=delete_existing,
                        sign_service=sign_service,
                        auth_token=self.auth_token,
                        logger=logger)
                    success = state['success']
                    file['upload_result'] = state
                except (ServiceUnavailable,
                        ResourceTemporaryUnavailable) as error:
                    logger.warning('Upload attempt failed')
                    logger.debug('Exception: %s' % str(error))
                    state_reason = str(error)

            if success:
                num_succeeded += 1
                trace['transferEnd'] = time.time()
                trace['clientState'] = 'DONE'
                file['state'] = 'A'
                logger.info('Successfully uploaded file %s' % basename)
                self._send_trace(trace)

                if summary_file_path:
                    summary.append(copy.deepcopy(file))

                if not no_register:
                    if register_after_upload:
                        self._register_file(file, registered_dataset_dids)
                    replica_for_api = self._convert_file_for_api(file)
                    if not self.client.update_replicas_states(
                            rse, files=[replica_for_api]):
                        logger.warning('Failed to update replica state')

                # add file to dataset if needed
                if dataset_did_str and not no_register:
                    try:
                        self.client.attach_dids(file['dataset_scope'],
                                                file['dataset_name'],
                                                [file_did])
                    except Exception as error:
                        logger.warning('Failed to attach file to the dataset')
                        logger.debug(error)
            else:
                trace['clientState'] = 'FAILED'
                trace['stateReason'] = state_reason
                self._send_trace(trace)
                logger.error('Failed to upload file %s' % basename)

        if summary_file_path:
            final_summary = {}
            for file in summary:
                file_scope = file['did_scope']
                file_name = file['did_name']
                file_did_str = '%s:%s' % (file_scope, file_name)
                final_summary[file_did_str] = {
                    'scope': file_scope,
                    'name': file_name,
                    'bytes': file['bytes'],
                    'rse': file['rse'],
                    'pfn': file['upload_result'].get('pfn', ''),
                    'guid': file['meta']['guid']
                }

                for checksum_name in GLOBALLY_SUPPORTED_CHECKSUMS:
                    if checksum_name in file:
                        final_summary[file_did_str][checksum_name] = file[
                            checksum_name]

            with open(summary_file_path, 'wb') as summary_file:
                json.dump(final_summary,
                          summary_file,
                          sort_keys=True,
                          indent=1)

        if num_succeeded == 0:
            raise NoFilesUploaded()
        elif num_succeeded != len(files):
            raise NotAllFilesUploaded()
        return 0
Exemple #26
0
 def test_put_mgr_SourceNotFound_single(self):
     """(RSE/PROTOCOLS): Put a single file to storage (SourceNotFound)"""
     mgr.upload(self.rse_settings, {'name': 'not_existing_data2.raw', 'scope': 'user.%s' % self.user, 'adler32': 'random_stuff', 'filesize': 0}, self.tmpdir)