def reset_files_extension(self, storage_name, prefix): bucket = self.__s3.get_bucket(storage_name) for key in bucket.list(prefix=prefix): if key.name.endswith('-done'): new_key_name = key.name.replace('-done','') bucket.copy_key(new_key_name=new_key_name, src_bucket_name=storage_name, src_key_name=key.name) bucket.delete_key(key.name)
def test_write_s3_check_consistency_wrong_items_count(self): # given items_to_write = self.get_batch() options = self.get_writer_config() options['options']['check_consistency'] = True # when: try: writer = S3Writer(options, ExportMeta(options)) writer.write_batch(items_to_write) writer.flush() finally: writer.close() bucket = self.s3_conn.get_bucket('fake_bucket') key = bucket.get_key('tests/0.jl.gz') content = key.get_contents_as_string() bucket.delete_key('tests/0.jl.gz') new_key = bucket.new_key('tests/0.jl.gz') new_key.update_metadata({'total': 999}) new_key.set_contents_from_string(content) # then: with self.assertRaisesRegexp(InconsistentWriteState, 'Unexpected number of records'): writer.finish_writing()
def s3_delete(bucket_name, s3_filename): conn = boto.connect_s3() bucket = boto.s3.bucket.Bucket(conn, bucket_name) try: bucket.delete_key(s3_filename) except Exception: logging.warning('Failed to delete ' + s3_filename)
def __get_file_contents_list_from_bucket(bucket, prefix, bucket_name): json_files_list = [] for key in bucket.list(prefix=prefix): if key.name.endswith('/') or key.name.endswith('-done'): continue try: new_key_name = "{}-done".format(key.name) bucket.copy_key(new_key_name=new_key_name, src_bucket_name=bucket_name, src_key_name=key.name) bucket.delete_key(key.name) new_key = bucket.get_key(new_key_name) new_key.get_contents_to_filename(filename="tmp.json.gz") f = gzip.open('tmp.json.gz', 'rb') json_files_list.append(f.read()) f.close() except Exception as ex: Logger.log("warning", "{} FAILED: {}".format(key.name, ex.message)) return json_files_list
def files_delete(self, files_saved, dry_run=False, ): """workhorse for deletion `files_saved` `dict` format = files_saved[size] = (target_filename, bucket_name) `dry_run` default = `False` should we just pretend to save? """ # setup the s3 connection s3_buckets = self.s3_buckets # convert to a list, because we delete the items from the dict for size in list(files_saved.keys()): # grab the stash (target_filename, bucket_name) = files_saved[size] # active bucket bucket = s3_buckets[bucket_name] # delete it log.debug("going to delete %s from %s" % (target_filename, bucket_name)) if not dry_run: bucket.delete_key(target_filename) # external logging if self._saverLogger: self._saverLogger.log_delete( bucket_name=bucket_name, key=target_filename, ) # internal cleanup del files_saved[size] return files_saved
def test_write_s3_check_consistency_key_not_present(self): # given items_to_write = self.get_batch() options = self.get_writer_config() options['options']['check_consistency'] = True # when: try: writer = S3Writer(options, ExportMeta(options)) writer.write_batch(items_to_write) writer.flush() finally: writer.close() bucket = self.s3_conn.get_bucket('fake_bucket') bucket.delete_key('tests/0.jl.gz') # then: with self.assertRaisesRegexp(InconsistentWriteState, 'not found in bucket'): writer.finish_writing()
def files_delete(self, files_saved, dry_run=False, ): """workhorse for deletion `files_saved` `dict` format = files_saved[size] = (target_filename, bucket_name) `dry_run` default = `False` should we just pretend to save? """ # setup the s3 connection s3_buckets = self.s3_buckets for size in files_saved.keys(): # grab the stash (target_filename, bucket_name) = files_saved[size] # active bucket bucket = s3_buckets[bucket_name] # delete it log.debug("going to delete %s from %s" % (target_filename, bucket_name)) if not dry_run: bucket.delete_key(target_filename) # external logging if self._saverLogger: self._saverLogger.log_delete( bucket_name=bucket_name, key=target_filename, ) # internal cleanup del files_saved[size] return files_saved
def test_write_s3_check_consistency_key_not_present(self): # given items_to_write = self.get_batch() options = self.get_writer_config() options['options']['check_consistency'] = True # when: try: writer = S3Writer(options, ExportMeta(options)) writer.write_batch(items_to_write) writer.flush() finally: writer.close() bucket = self.s3_conn.get_bucket('fake_bucket') bucket.delete_key('tests/0.jl.gz') # then: with self.assertRaisesRegexp(InconsistentWriteState, 'not found in bucket'): writer.finish_writing()
def delete_from_s3(bucket_name, file_name, prefix=None, access_key=None, secret_key=None, dry_run=False): valid = True result_string = "" s3_conn = _get_s3_connection(access_key, secret_key) if s3_conn.lookup(bucket_name): bucket = s3_conn.get_bucket(bucket_name) k = Key(bucket) if prefix: k.key = os.path.join(prefix, file_name) else: k.key = file_name if dry_run: result_string += "Skipping actual delete from S3 due to dry run.\n" else: bucket.delete_key(k) result_string += "Deleted file " + file_name + " from S3 bucket " + bucket_name + "\n" else: result_string += "Cannot find S3 bucket with name " + bucket_name + "\n" valid = False return {"valid": valid, "result_string": result_string}
def delete_from_s3(bucket_name, file_name, prefix=None, access_key=None, secret_key=None, dry_run=False): valid = True result_string = "" s3_conn = _get_s3_connection(access_key, secret_key) if s3_conn.lookup(bucket_name): bucket = s3_conn.get_bucket(bucket_name) k = Key(bucket) if prefix: k.key = os.path.join(prefix, file_name) else: k.key = file_name if dry_run: result_string += 'Skipping actual delete from S3 due to dry run.\n' else: bucket.delete_key(k) result_string += "Deleted file " + file_name + " from S3 bucket " + bucket_name + '\n' else: result_string += "Cannot find S3 bucket with name " + bucket_name + "\n" valid = False return {"valid": valid, "result_string": result_string}
def test_write_s3_check_consistency_wrong_items_count(self): # given items_to_write = self.get_batch() options = self.get_writer_config() options['options']['check_consistency'] = True # when: try: writer = S3Writer(options, ExportMeta(options)) writer.write_batch(items_to_write) writer.flush() finally: writer.close() bucket = self.s3_conn.get_bucket('fake_bucket') key = bucket.get_key('tests/0.jl.gz') content = key.get_contents_as_string() bucket.delete_key('tests/0.jl.gz') new_key = bucket.new_key('tests/0.jl.gz') new_key.update_metadata({'total': 999}) new_key.set_contents_from_string(content) # then: with self.assertRaisesRegexp(InconsistentWriteState, 'Unexpected number of records'): writer.finish_writing()