def _download(self, obj, local_path, **options):
        if os.path.isdir(local_path):
            file = os.path.join(local_path, irods_basename(obj))
        else:
            file = local_path

        # Check for force flag if file exists
        if os.path.exists(file) and kw.FORCE_FLAG_KW not in options:
            raise ex.OVERWRITE_WITHOUT_FORCE_FLAG

        with open(file, 'wb') as f, self.open(obj, 'r', **options) as o:
            for chunk in chunks(o, self.READ_BUFFER_SIZE):
                f.write(chunk)
Ejemplo n.º 2
0
    def put(self, file, irods_path, **options):
        if irods_path.endswith('/'):
            obj = irods_path + os.path.basename(file)
        else:
            obj = irods_path

        # Set operation type to trigger acPostProcForPut
        if kw.OPR_TYPE_KW not in options:
            options[kw.OPR_TYPE_KW] = 1  # PUT_OPR

        with open(file, 'rb') as f, self.open(obj, 'w', **options) as o:
            for chunk in chunks(f, WRITE_BUFFER_SIZE):
                o.write(chunk)

        if kw.ALL_KW in options:
            options[kw.UPDATE_REPL_KW] = ''
            self.replicate(obj, **options)
    def put(self, file, irods_path, **options):
        if irods_path.endswith('/'):
            obj = irods_path + os.path.basename(file)
        else:
            obj = irods_path

        # Set operation type to trigger acPostProcForPut
        if kw.OPR_TYPE_KW not in options:
            options[kw.OPR_TYPE_KW] = 1 # PUT_OPR

        with open(file, 'rb') as f, self.open(obj, 'w', **options) as o:
            for chunk in chunks(f, self.WRITE_BUFFER_SIZE):
                o.write(chunk)

        if kw.ALL_KW in options:
            options[kw.UPDATE_REPL_KW] = ''
            self.replicate(obj, **options)
Ejemplo n.º 4
0
    def local_file_cksum(self, filename, algorithm=None):
        def get_digest(h):
            if h.name == 'sha256':
                return 'sha2:' + base64.b64encode(h.digest()).decode()

            return h.hexdigest()

        if algorithm is None:
            algorithm = 'md5'
            if self.session.pool.account.default_hash_scheme == 'SHA256':
                algorithm = 'sha256'

        scheme = hashlib.new(algorithm)
        with open(filename, 'rb') as f:
            for chunk in chunks(f, self.BUFFER_SIZE):
                scheme.update(chunk)
                yield len(chunk), ''

        yield 0, get_digest(scheme)
Ejemplo n.º 5
0
    def test_open_file_with_options(self):
        '''
        Similar to checksum test above,
        except that we use an optional keyword on open
        instead of a PEP.
        '''

        # skip if server is 4.1.4 or older
        if self.sess.server_version <= (4, 1, 4):
            self.skipTest('Not supported')

        # test data
        collection = self.coll_path
        filename = 'test_open_file_with_options.txt'
        file_path = '/tmp/{filename}'.format(**locals())
        obj_path = '{collection}/{filename}'.format(**locals())
        contents = u"blah blah " * 10000
        checksum = base64.b64encode(
            hashlib.sha256(contents.encode('utf-8')).digest()).decode()

        objs = self.sess.data_objects

        # make test file
        with open(file_path, 'w') as f:
            f.write(contents)

        # options for open/close
        options = {kw.REG_CHKSUM_KW: ''}

        # write contents of file to object
        with open(file_path, 'rb') as f, objs.open(obj_path, 'w',
                                                   **options) as o:
            for chunk in chunks(f):
                o.write(chunk)

        # update object and verify checksum
        obj = self.sess.data_objects.get(obj_path)
        self.assertEqual(obj.checksum, "sha2:{checksum}".format(**locals()))

        # cleanup
        obj.unlink(force=True)
        os.unlink(file_path)
    def put(self,
            local_path,
            irods_path,
            return_data_object=False,
            num_threads=DEFAULT_NUMBER_OF_THREADS,
            **options):

        if self.sess.collections.exists(irods_path):
            obj = iRODSCollection.normalize_path(irods_path,
                                                 os.path.basename(local_path))
        else:
            obj = irods_path

        with open(local_path, 'rb') as f:
            sizelist = []
            if self.should_parallelize_transfer(num_threads,
                                                f,
                                                measured_obj_size=sizelist):
                o = deferred_call(self.open, (obj, 'w'), options)
                f.close()
                if not self.parallel_put(
                        local_path, (obj, o),
                        total_bytes=sizelist[0],
                        num_threads=num_threads,
                        target_resource_name=options.get(kw.RESC_NAME_KW, '')
                        or options.get(kw.DEST_RESC_NAME_KW, ''),
                        open_options=options):
                    raise RuntimeError("parallel put failed")
            else:
                with self.open(obj, 'w', **options) as o:
                    # Set operation type to trigger acPostProcForPut
                    if kw.OPR_TYPE_KW not in options:
                        options[kw.OPR_TYPE_KW] = 1  # PUT_OPR
                    for chunk in chunks(f, self.WRITE_BUFFER_SIZE):
                        o.write(chunk)
        if kw.ALL_KW in options:
            options[kw.UPDATE_REPL_KW] = ''
            self.replicate(obj, **options)

        if return_data_object:
            return self.get(obj)
Ejemplo n.º 7
0
    def test_open_file_with_options(self):
        '''
        Similar to checksum test above,
        except that we use an optional keyword on open
        instead of a PEP.
        '''

        # skip if server is 4.1.4 or older
        if self.sess.server_version <= (4, 1, 4):
            self.skipTest('Not supported')

        # test data
        collection = self.coll_path
        filename = 'test_open_file_with_options.txt'
        file_path = '/tmp/{filename}'.format(**locals())
        obj_path = '{collection}/{filename}'.format(**locals())
        contents = u"blah blah " * 10000
        checksum = base64.b64encode(hashlib.sha256(contents.encode('utf-8')).digest()).decode()

        objs = self.sess.data_objects

        # make test file
        with open(file_path, 'w') as f:
            f.write(contents)

        # options for open/close
        options = {kw.REG_CHKSUM_KW: ''}

        # write contents of file to object
        with open(file_path, 'rb') as f, objs.open(obj_path, 'w', **options) as o:
            for chunk in chunks(f):
                o.write(chunk)

        # update object and verify checksum
        obj = self.sess.data_objects.get(obj_path)
        self.assertEqual(obj.checksum, "sha2:{checksum}".format(**locals()))

        # cleanup
        obj.unlink(force=True)
        os.unlink(file_path)
Ejemplo n.º 8
0
        def _put(file, obj, **options):
            # adapted from https://github.com/irods/python-irodsclient
            # data_object_manager.py#L60

            # Set operation type to trigger acPostProcForPut
            if kw.OPR_TYPE_KW not in options:
                options[kw.OPR_TYPE_KW] = 1  # PUT_OPR

            with open(file, 'rb') as f:
                closed = False
                o = self.dom.open(obj, 'w', **options)

                try:
                    for chunk in chunks(f, self.BUFFER_SIZE):
                        o.write(chunk)
                        yield len(chunk)
                except GeneratorExit:
                    # generator was interrupted

                    closed = True
                    try:
                        o.close()
                    except irods.exception.USER_CHKSUM_MISMATCH:
                        # it's normal that cksum fails since we were interrupted
                        # -> ignore
                        pass
                    return

                finally:
                    # close data object except already done
                    if not closed:
                        o.close()

            if kw.ALL_KW in options:
                options[kw.UPDATE_REPL_KW] = ''
                self.dom.replicate(obj, **options)
Ejemplo n.º 9
0
    def _download(self, obj, local_path, num_threads, **options):

        if os.path.isdir(local_path):
            local_file = os.path.join(local_path, irods_basename(obj))
        else:
            local_file = local_path

        # Check for force flag if local_file exists
        if os.path.exists(local_file) and kw.FORCE_FLAG_KW not in options:
            raise ex.OVERWRITE_WITHOUT_FORCE_FLAG

        with open(local_file, 'wb') as f, self.open(obj, 'r', **options) as o:

            if self.should_parallelize_transfer(num_threads, o):
                f.close()
                if not self.parallel_get(
                    (obj, o),
                        local_path,
                        num_threads=num_threads,
                        target_resource_name=options.get(kw.RESC_NAME_KW, '')):
                    raise RuntimeError("parallel get failed")
            else:
                for chunk in chunks(o, self.READ_BUFFER_SIZE):
                    f.write(chunk)
Ejemplo n.º 10
0
    def put(self, local_path, irods_path, return_data_object = False, num_threads = DEFAULT_NUMBER_OF_THREADS, **options):

        if self.sess.collections.exists(irods_path):
            obj = iRODSCollection.normalize_path(irods_path, os.path.basename(local_path))
        else:
            obj = irods_path

        with open(local_path, 'rb') as f:
            sizelist = []
            if self.should_parallelize_transfer (num_threads, f, measured_obj_size = sizelist):
                o = deferred_call( self.open, (obj, 'w'), options)
                f.close()
                if not self.parallel_put( local_path, (obj,o), total_bytes = sizelist[0], num_threads = num_threads,
                                          target_resource_name = options.get(kw.RESC_NAME_KW,'') or
                                                                 options.get(kw.DEST_RESC_NAME_KW,''),
                                          open_options = options ):
                    raise RuntimeError("parallel put failed")
            else:
                with self.open(obj, 'w', **options) as o:
                    # Set operation type to trigger acPostProcForPut
                    if kw.OPR_TYPE_KW not in options:
                        options[kw.OPR_TYPE_KW] = 1 # PUT_OPR
                    for chunk in chunks(f, self.WRITE_BUFFER_SIZE):
                        o.write(chunk)
        if kw.ALL_KW in options:
            repl_options = options.copy()
            repl_options[kw.UPDATE_REPL_KW] = ''
            # Leaving REG_CHKSUM_KW set would raise the error:
            # Requested to register checksum without verifying, but source replica has a checksum. This can result
            # in multiple replicas being marked good with different checksums, which is an inconsistency.
            del repl_options[kw.REG_CHKSUM_KW]
            self.replicate(obj, **repl_options)


        if return_data_object:
            return self.get(obj)
Ejemplo n.º 11
0
 def sha256_checksum(self, filename, block_size=65536):
     sha256 = hashlib.sha256()
     with open(filename, 'rb') as f:
         for chunk in chunks(f, block_size):
             sha256.update(chunk)
     return sha256.hexdigest()
Ejemplo n.º 12
0
 def sha256_checksum(self, filename, block_size=65536):
     sha256 = hashlib.sha256()
     with open(filename, 'rb') as f:
         for chunk in chunks(f, block_size):
             sha256.update(chunk)
     return sha256.hexdigest()