def clear_bucket(endpoint, bucket_name): objects = hlist_keys(endpoint, bucket_name) while objects != None: if len(objects) > 1: print("delete files {} in bucket {}".format(objects, bucket_name)) hdelete_keys(endpoint, bucket_name, objects) objects = hlist_keys(endpoint, bucket_name) return True
def handler(event, context): from archived.elasticache import hlist_keys location = "test.fifamc.ng.0001.euc1.cache.amazonaws.com" endpoint = redis.Redis(host=location, port=6379, db=0) heyhey = hlist_keys(endpoint, "tmp-updates") print(heyhey) hdelete_keys(endpoint, "tmp-updates", he) print(hlist_keys(endpoint, "tmp-updates"))
def merge_w_b_layers(endpoint, bucket_name, num_workers, prefix): #whatever merges w/b grads or model num_files = 0 merged_value = [] while num_files < num_workers: objects = hlist_keys(endpoint, bucket_name) if objects is not None: for obj in objects: file_key = bytes.decode(obj) data_bytes = hget_object(endpoint, bucket_name, file_key) data = pickle.loads(data_bytes) for i in range(len(data)): if num_files == 0: merged_value.append( np.zeros(data[i].shape, dtype=data[i].dtype)) merged_value[i] = merged_value[i] + data[i] num_files = num_files + 1 hdelete_keys(endpoint, bucket_name, [file_key]) # average weights if prefix == 'w_': merged_value = [value / float(num_workers) for value in merged_value] return merged_value
def merge_w_b_grads(endpoint, bucket_name, num_workers, dtype, w_shape, b_shape, w_grad_prefix="w_grad_", b_grad_prefix="b_grad_"): num_w_files = 0 num_b_files = 0 w_grad_sum = np.zeros(w_shape, dtype=dtype) b_grad_sum = np.zeros(b_shape, dtype=dtype) while num_w_files < num_workers or num_b_files < num_workers: objects = hlist_keys(endpoint, bucket_name) while objects is not None: for obj in objects: file_key = bytes.decode(obj) print("the name of the file being processed = {}".format( file_key)) bytes_data = np.fromstring( hget_object(endpoint, bucket_name, file_key), dtype) if file_key.startswith(w_grad_prefix): w_grad = bytes_data.reshape(w_shape) #print("merge the {}-th weight grad {} in bucket {} = {}".format(num_w_files, file_key, bucket_name, w_grad[0][:5])) w_grad_sum = w_grad_sum + w_grad num_w_files = num_w_files + 1 elif file_key.startswith(b_grad_prefix): b_grad = bytes_data.reshape(b_shape) #print("merge the {}-th bias grad {} in bucket {} = {}".format(num_b_files, file_key, bucket_name, b_grad)) b_grad_sum = b_grad_sum + b_grad num_b_files = num_b_files + 1 hdelete_keys(endpoint, bucket_name, [file_key]) objects = hlist_keys(endpoint, bucket_name) #print("the keys being deleted = {}".format(objects)) return w_grad_sum / num_workers, b_grad_sum / num_workers
def delete_expired_w_b_grads(endpoint, bucket_name, cur_epoch, cur_batch, w_prefix="w_grad_", b_prefix="b_grad_"): objects = hlist_keys(endpoint, bucket_name) if objects is not None: for obj in objects: file_key = bytes.decode(obj) if file_key.startswith(w_prefix) or file_key.startswith(b_prefix): key_splits = file_key.split("_") key_batch = int(key_splits[-1]) key_epoch = int(key_splits[-2]) if key_epoch < cur_epoch or (key_epoch == cur_epoch and key_batch < cur_batch): print("delete object {} in bucket {}".format( file_key, bucket_name)) hdelete_keys(endpoint, bucket_name, [file_key])
def compute_average_centroids(endpoint, avg_cent_bucket, worker_cent_bucket, num_workers, shape, epoch, dt): num_files = 0 centroids_vec_list = [] error_list = [] while num_files < num_workers: num_files = 0 centroids_vec_list = [] error_list = [] objects = hlist_keys(endpoint, worker_cent_bucket) if objects is not None: for obj in objects: file_key = bytes.decode(obj) cent_with_error = np.frombuffer(hget_object( endpoint, worker_cent_bucket, file_key), dtype=dt) cent = cent_with_error[0:-1].reshape(shape) error = cent_with_error[-1] centroids_vec_list.append(cent) error_list.append(error) num_files = num_files + 1 else: print(f"no object in the {worker_cent_bucket}") print("All workers are ready.") avg = avg_centroids(centroids_vec_list) avg_error = np.mean(np.array(error_list)) clear_bucket(endpoint, worker_cent_bucket) print( f"Write averaged centroids {avg} for {epoch}-th epoch to bucket {avg_cent_bucket}" ) print(f"Average error: {avg_error}") res = avg.reshape(-1) res = np.append(res, avg_error) hset_object(endpoint, avg_cent_bucket, f"avg-{epoch}", res.tobytes()) return 1