def _download(self, obj, local_path, **options): if os.path.isdir(local_path): file = os.path.join(local_path, irods_basename(obj)) else: file = local_path # Check for force flag if file exists if os.path.exists(file) and kw.FORCE_FLAG_KW not in options: raise ex.OVERWRITE_WITHOUT_FORCE_FLAG with open(file, 'wb') as f, self.open(obj, 'r', **options) as o: for chunk in chunks(o, self.READ_BUFFER_SIZE): f.write(chunk)
def put(self, file, irods_path, **options): if irods_path.endswith('/'): obj = irods_path + os.path.basename(file) else: obj = irods_path # Set operation type to trigger acPostProcForPut if kw.OPR_TYPE_KW not in options: options[kw.OPR_TYPE_KW] = 1 # PUT_OPR with open(file, 'rb') as f, self.open(obj, 'w', **options) as o: for chunk in chunks(f, WRITE_BUFFER_SIZE): o.write(chunk) if kw.ALL_KW in options: options[kw.UPDATE_REPL_KW] = '' self.replicate(obj, **options)
def put(self, file, irods_path, **options): if irods_path.endswith('/'): obj = irods_path + os.path.basename(file) else: obj = irods_path # Set operation type to trigger acPostProcForPut if kw.OPR_TYPE_KW not in options: options[kw.OPR_TYPE_KW] = 1 # PUT_OPR with open(file, 'rb') as f, self.open(obj, 'w', **options) as o: for chunk in chunks(f, self.WRITE_BUFFER_SIZE): o.write(chunk) if kw.ALL_KW in options: options[kw.UPDATE_REPL_KW] = '' self.replicate(obj, **options)
def local_file_cksum(self, filename, algorithm=None): def get_digest(h): if h.name == 'sha256': return 'sha2:' + base64.b64encode(h.digest()).decode() return h.hexdigest() if algorithm is None: algorithm = 'md5' if self.session.pool.account.default_hash_scheme == 'SHA256': algorithm = 'sha256' scheme = hashlib.new(algorithm) with open(filename, 'rb') as f: for chunk in chunks(f, self.BUFFER_SIZE): scheme.update(chunk) yield len(chunk), '' yield 0, get_digest(scheme)
def test_open_file_with_options(self): ''' Similar to checksum test above, except that we use an optional keyword on open instead of a PEP. ''' # skip if server is 4.1.4 or older if self.sess.server_version <= (4, 1, 4): self.skipTest('Not supported') # test data collection = self.coll_path filename = 'test_open_file_with_options.txt' file_path = '/tmp/{filename}'.format(**locals()) obj_path = '{collection}/{filename}'.format(**locals()) contents = u"blah blah " * 10000 checksum = base64.b64encode( hashlib.sha256(contents.encode('utf-8')).digest()).decode() objs = self.sess.data_objects # make test file with open(file_path, 'w') as f: f.write(contents) # options for open/close options = {kw.REG_CHKSUM_KW: ''} # write contents of file to object with open(file_path, 'rb') as f, objs.open(obj_path, 'w', **options) as o: for chunk in chunks(f): o.write(chunk) # update object and verify checksum obj = self.sess.data_objects.get(obj_path) self.assertEqual(obj.checksum, "sha2:{checksum}".format(**locals())) # cleanup obj.unlink(force=True) os.unlink(file_path)
def put(self, local_path, irods_path, return_data_object=False, num_threads=DEFAULT_NUMBER_OF_THREADS, **options): if self.sess.collections.exists(irods_path): obj = iRODSCollection.normalize_path(irods_path, os.path.basename(local_path)) else: obj = irods_path with open(local_path, 'rb') as f: sizelist = [] if self.should_parallelize_transfer(num_threads, f, measured_obj_size=sizelist): o = deferred_call(self.open, (obj, 'w'), options) f.close() if not self.parallel_put( local_path, (obj, o), total_bytes=sizelist[0], num_threads=num_threads, target_resource_name=options.get(kw.RESC_NAME_KW, '') or options.get(kw.DEST_RESC_NAME_KW, ''), open_options=options): raise RuntimeError("parallel put failed") else: with self.open(obj, 'w', **options) as o: # Set operation type to trigger acPostProcForPut if kw.OPR_TYPE_KW not in options: options[kw.OPR_TYPE_KW] = 1 # PUT_OPR for chunk in chunks(f, self.WRITE_BUFFER_SIZE): o.write(chunk) if kw.ALL_KW in options: options[kw.UPDATE_REPL_KW] = '' self.replicate(obj, **options) if return_data_object: return self.get(obj)
def test_open_file_with_options(self): ''' Similar to checksum test above, except that we use an optional keyword on open instead of a PEP. ''' # skip if server is 4.1.4 or older if self.sess.server_version <= (4, 1, 4): self.skipTest('Not supported') # test data collection = self.coll_path filename = 'test_open_file_with_options.txt' file_path = '/tmp/{filename}'.format(**locals()) obj_path = '{collection}/{filename}'.format(**locals()) contents = u"blah blah " * 10000 checksum = base64.b64encode(hashlib.sha256(contents.encode('utf-8')).digest()).decode() objs = self.sess.data_objects # make test file with open(file_path, 'w') as f: f.write(contents) # options for open/close options = {kw.REG_CHKSUM_KW: ''} # write contents of file to object with open(file_path, 'rb') as f, objs.open(obj_path, 'w', **options) as o: for chunk in chunks(f): o.write(chunk) # update object and verify checksum obj = self.sess.data_objects.get(obj_path) self.assertEqual(obj.checksum, "sha2:{checksum}".format(**locals())) # cleanup obj.unlink(force=True) os.unlink(file_path)
def _put(file, obj, **options): # adapted from https://github.com/irods/python-irodsclient # data_object_manager.py#L60 # Set operation type to trigger acPostProcForPut if kw.OPR_TYPE_KW not in options: options[kw.OPR_TYPE_KW] = 1 # PUT_OPR with open(file, 'rb') as f: closed = False o = self.dom.open(obj, 'w', **options) try: for chunk in chunks(f, self.BUFFER_SIZE): o.write(chunk) yield len(chunk) except GeneratorExit: # generator was interrupted closed = True try: o.close() except irods.exception.USER_CHKSUM_MISMATCH: # it's normal that cksum fails since we were interrupted # -> ignore pass return finally: # close data object except already done if not closed: o.close() if kw.ALL_KW in options: options[kw.UPDATE_REPL_KW] = '' self.dom.replicate(obj, **options)
def _download(self, obj, local_path, num_threads, **options): if os.path.isdir(local_path): local_file = os.path.join(local_path, irods_basename(obj)) else: local_file = local_path # Check for force flag if local_file exists if os.path.exists(local_file) and kw.FORCE_FLAG_KW not in options: raise ex.OVERWRITE_WITHOUT_FORCE_FLAG with open(local_file, 'wb') as f, self.open(obj, 'r', **options) as o: if self.should_parallelize_transfer(num_threads, o): f.close() if not self.parallel_get( (obj, o), local_path, num_threads=num_threads, target_resource_name=options.get(kw.RESC_NAME_KW, '')): raise RuntimeError("parallel get failed") else: for chunk in chunks(o, self.READ_BUFFER_SIZE): f.write(chunk)
def put(self, local_path, irods_path, return_data_object = False, num_threads = DEFAULT_NUMBER_OF_THREADS, **options): if self.sess.collections.exists(irods_path): obj = iRODSCollection.normalize_path(irods_path, os.path.basename(local_path)) else: obj = irods_path with open(local_path, 'rb') as f: sizelist = [] if self.should_parallelize_transfer (num_threads, f, measured_obj_size = sizelist): o = deferred_call( self.open, (obj, 'w'), options) f.close() if not self.parallel_put( local_path, (obj,o), total_bytes = sizelist[0], num_threads = num_threads, target_resource_name = options.get(kw.RESC_NAME_KW,'') or options.get(kw.DEST_RESC_NAME_KW,''), open_options = options ): raise RuntimeError("parallel put failed") else: with self.open(obj, 'w', **options) as o: # Set operation type to trigger acPostProcForPut if kw.OPR_TYPE_KW not in options: options[kw.OPR_TYPE_KW] = 1 # PUT_OPR for chunk in chunks(f, self.WRITE_BUFFER_SIZE): o.write(chunk) if kw.ALL_KW in options: repl_options = options.copy() repl_options[kw.UPDATE_REPL_KW] = '' # Leaving REG_CHKSUM_KW set would raise the error: # Requested to register checksum without verifying, but source replica has a checksum. This can result # in multiple replicas being marked good with different checksums, which is an inconsistency. del repl_options[kw.REG_CHKSUM_KW] self.replicate(obj, **repl_options) if return_data_object: return self.get(obj)
def sha256_checksum(self, filename, block_size=65536): sha256 = hashlib.sha256() with open(filename, 'rb') as f: for chunk in chunks(f, block_size): sha256.update(chunk) return sha256.hexdigest()