def merge_w_b(bucket_name, num_workers, dtype, w_shape, b_shape, w_prefix="tmp_w_", b_prefix="tmp_b_"): num_w_files = 0 num_b_files = 0 w_files = [] b_files = [] w_sum = np.zeros(w_shape, dtype=dtype) b_sum = np.zeros(b_shape, dtype=dtype) while num_w_files < num_workers or num_b_files < num_workers: objects = list_bucket_objects(bucket_name) if objects is not None: for obj in objects: file_key = urllib.parse.unquote_plus(obj["Key"], encoding='utf-8') #print("found file {} in bucket {}".format(obj, bucket_name)) if file_key.startswith(w_prefix): data = get_object(bucket_name, file_key).read() bytes_data = np.frombuffer(data, dtype=dtype) w_files.append(file_key) w_grad = bytes_data.reshape(w_shape) #print("merge the {}-th weight grad {} in bucket {} = {}".format(num_w_files, file_key, bucket_name, w_grad[0][:5])) w_sum = w_sum + w_grad num_w_files = num_w_files + 1 delete_object( bucket_name, file_key ) # do not put this outside 'if', in case of deleting w_ and b_ elif file_key.startswith(b_prefix): data = get_object(bucket_name, file_key).read() bytes_data = np.frombuffer(data, dtype=dtype) b_files.append(file_key) b_grad = bytes_data.reshape(b_shape) #print("merge the {}-th bias grad {} in bucket {} = {}".format(num_b_files, file_key, bucket_name, b_grad)) b_sum = b_sum + b_grad num_b_files = num_b_files + 1 delete_object( bucket_name, file_key ) # do not put this outside 'if', in case of deleting w_ and b_ #print("found {} w files: {}".format(len(w_files), w_files)) #print("found {} b files: {}".format(len(b_files), b_files)) # else: # # Didn't get any keys # print('No objects in {}'.format(bucket_name)) return w_sum / float(num_workers), b_sum / float(num_workers)
def merge_all_workers(bucket_name, num_workers, prefix): num_files = 0 # merged_value = np.zeros(dshape, dtype=dtype) merged_value = [] while num_files < num_workers: objects = list_bucket_objects(bucket_name) if objects is not None: for obj in objects: file_key = urllib.parse.unquote_plus(obj["Key"], encoding='utf-8') data_bytes = get_object(bucket_name, file_key).read() data = pickle.loads(data_bytes) for i in range(len(data)): if num_files == 0: merged_value.append( np.zeros(data[i].shape, dtype=data[i].dtype)) merged_value[i] = merged_value[i] + data[i] num_files = num_files + 1 delete_object(bucket_name, file_key) # average weights # if prefix == 'w_': merged_value = [value / float(num_workers) for value in merged_value] return merged_value
def compute_average_centroids(avg_cent_bucket, worker_cent_bucket, num_workers, shape, epoch, dt): num_files = 0 centroids_vec_list = [] error_list = [] while num_files < num_workers: num_files = 0 centroids_vec_list = [] error_list = [] objects = list_bucket_objects(worker_cent_bucket) if objects is not None: for obj in objects: file_key = urllib.parse.unquote_plus(obj["Key"], encoding='utf-8') data = get_object(worker_cent_bucket, file_key).read() cent_with_error = np.frombuffer(data, dtype=dt) cent = cent_with_error[0:-1].reshape(shape) error = cent_with_error[-1] centroids_vec_list.append(cent) error_list.append(error) num_files = num_files + 1 else: print('No objects in {}'.format(worker_cent_bucket)) avg = avg_centroids(centroids_vec_list) avg_error = np.mean(np.array(error_list)) clear_bucket(worker_cent_bucket) print(f"Average error for {epoch}-th epoch: {avg_error}") res = avg.reshape(-1) res = np.append(res, avg_error).astype(dt) put_object(avg_cent_bucket, f"avg-{epoch}", res.tobytes()) return 1
def reduce_batch(vector, tmp_bucket, merged_bucket, num_workers, worker_index, postfix): # vector is supposed to be a 1-d numpy array vec_shape = vector.shape vec_dtype = vector.dtype merged_vec = np.zeros(vec_shape, dtype=vec_dtype) postfix_splits = postfix.split("_") curr_epoch = int(postfix_splits[0]) curr_batch = int(postfix_splits[1]) # put object to s3, format of key: workerID_epoch_batch key = "{}_{}".format(worker_index, postfix) put_object(tmp_bucket, key, vector.tobytes()) # the first worker read and aggregate the corresponding chunk if worker_index == 0: num_files = 0 while num_files < num_workers: objects = list_bucket_objects(tmp_bucket) if objects is not None: delete_list = [] for obj in objects: file_key = urllib.parse.unquote_plus(obj["Key"], encoding='utf-8') key_splits = file_key.split("_") key_epoch = key_splits[1] key_batch = key_splits[2] if key_epoch == str(curr_epoch) and key_batch == str( curr_batch): data = get_object(tmp_bucket, file_key).read() bytes_data = np.frombuffer(data, dtype=vec_dtype) tmp_vec = bytes_data.reshape(vec_shape) merged_vec += tmp_vec num_files += 1 delete_list.append(file_key) delete_objects(tmp_bucket, delete_list) # write the merged data back to s3 merged_file_name = 'merged_' + postfix put_object(merged_bucket, merged_file_name, merged_vec.tobytes()) delete_expired_merged_batch(merged_bucket, curr_epoch, curr_batch) else: merged_file_name = 'merged_' + postfix merged_data = get_object_or_wait(merged_bucket, merged_file_name, 0.1).read() merged_vec = np.frombuffer(merged_data, dtype=vec_dtype).reshape(vec_shape) return merged_vec
def merge_w_b_grads(bucket_name, num_workers, dtype, w_shape, b_shape, w_grad_prefix="w_grad_", b_grad_prefix="b_grad"): num_w_files = 0 num_b_files = 0 w_grad_sum = np.zeros(w_shape, dtype=dtype) b_grad_sum = np.zeros(b_shape, dtype=dtype) while num_w_files < num_workers or num_b_files < num_workers: objects = list_bucket_objects(bucket_name) if objects is not None: for obj in objects: file_key = urllib.parse.unquote_plus(obj["Key"], encoding='utf-8') data = get_object(bucket_name, file_key).read() bytes_data = np.frombuffer(data, dtype=dtype) if file_key.startswith(w_grad_prefix): w_grad = bytes_data.reshape(w_shape) print("merge the {}-th weight grad {} in bucket {} = {}". format(num_w_files, file_key, bucket_name, w_grad)) w_grad_sum = w_grad_sum + w_grad num_w_files = num_w_files + 1 elif file_key.startswith(b_grad_prefix): b_grad = bytes_data.reshape(b_shape) print("merge the {}-th bias grad {} in bucket {} = {}". format(num_b_files, file_key, bucket_name, b_grad)) b_grad_sum = b_grad_sum + b_grad num_b_files = num_b_files + 1 delete_object(bucket_name, file_key) # else: # # Didn't get any keys # print('No objects in {}'.format(bucket_name)) return w_grad_sum / float(num_workers), b_grad_sum / float(num_workers)
def merge_np_bytes(bucket_name, num_workers, dtype, shape): num_files = 0 sum_arr = np.zeros(shape, dtype=dtype) while num_files < num_workers: objects = list_bucket_objects(bucket_name) if objects is not None: for obj in objects: file_key = urllib.parse.unquote_plus(obj["Key"], encoding='utf-8') print('file in bucket {} = {}'.format(bucket_name, file_key)) data = get_object(bucket_name, file_key).read() tmp_arr = np.frombuffer(data, dtype=dtype).reshape(shape) print("the {}-th numpy array".format(num_files)) print(tmp_arr) sum_arr = sum_arr + tmp_arr num_files = num_files + 1 delete_object(bucket_name, file_key) else: # Didn't get any keys print('No objects in {}'.format(bucket_name)) return sum_arr
def handler(event, context): start_time = time.time() bucket = event['bucket_name'] worker_index = event['rank'] num_workers = event['num_workers'] key = event['file'].split(",") num_classes = event['num_classes'] num_features = event['num_features'] pos_tag = event['pos_tag'] num_epochs = event['num_epochs'] learning_rate = event['learning_rate'] batch_size = event['batch_size'] host = event['host'] port = event['port'] print('bucket = {}'.format(bucket)) print('number of workers = {}'.format(num_workers)) print('worker index = {}'.format(worker_index)) print("file = {}".format(key)) print('number of workers = {}'.format(num_workers)) print('worker index = {}'.format(worker_index)) print('num epochs = {}'.format(num_epochs)) print('num classes = {}'.format(num_classes)) print('num features = {}'.format(num_features)) print('positive tag = {}'.format(pos_tag)) print('learning rate = {}'.format(learning_rate)) print("batch_size = {}".format(batch_size)) print("host = {}".format(host)) print("port = {}".format(port)) # Set thrift connection # Make socket transport = TSocket.TSocket(host, port) # Buffering is critical. Raw sockets are very slow transport = TTransport.TBufferedTransport(transport) # Wrap in a protocol protocol = TBinaryProtocol.TBinaryProtocol(transport) # Create a client to use the protocol encoder t_client = ParameterServer.Client(protocol) # Connect! transport.open() # test thrift connection ps_client.ping(t_client) print("create and ping thrift server >>> HOST = {}, PORT = {}".format( host, port)) # read file from s3 file = get_object(bucket, key[0]).read().decode('utf-8').split("\n") dataset = DenseLibsvmDataset(file, num_features, pos_tag) if len(key) > 1: for more_key in key[1:]: file = get_object(bucket, more_key).read().decode('utf-8').split("\n") dataset.add_more(file) print("read data cost {} s".format(time.time() - start_time)) parse_start = time.time() total_count = dataset.__len__() pos_count = 0 for i in range(total_count): if dataset.__getitem__(i)[1] == 1: pos_count += 1 print("{} positive observations out of {}".format(pos_count, total_count)) print("parse data cost {} s".format(time.time() - parse_start)) preprocess_start = time.time() # Creating data indices for training and validation splits: dataset_size = len(dataset) indices = list(range(dataset_size)) split = int(np.floor(VALIDATION_RATIO * dataset_size)) if SHUFFLE_DATASET: np.random.seed(RANDOM_SEED) np.random.shuffle(indices) train_indices, val_indices = indices[split:], indices[:split] # Creating PT data samplers and loaders: train_sampler = SubsetRandomSampler(train_indices) valid_sampler = SubsetRandomSampler(val_indices) train_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, sampler=train_sampler) validation_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, sampler=valid_sampler) print("preprocess data cost {} s, dataset size = {}".format( time.time() - preprocess_start, dataset_size)) model = SVM(NUM_FEATURES, NUM_CLASSES) # Loss and Optimizer # Softmax is internally computed. # Set parameters to be updated. criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.SGD(model.parameters(), lr=LEARNING_RATE) # register model model_name = "w.b" weight_shape = model.linear.weight.data.numpy().shape weight_length = weight_shape[0] * weight_shape[1] bias_shape = model.linear.bias.data.numpy().shape bias_length = bias_shape[0] model_length = weight_length + bias_length ps_client.register_model(t_client, worker_index, model_name, model_length, num_workers) ps_client.exist_model(t_client, model_name) print("register and check model >>> name = {}, length = {}".format( model_name, model_length)) # Training the Model train_start = time.time() iter_counter = 0 for epoch in range(NUM_EPOCHS): epoch_start = time.time() for batch_index, (items, labels) in enumerate(train_loader): print("------worker {} epoch {} batch {}------".format( worker_index, epoch, batch_index)) batch_start = time.time() # pull latest model ps_client.can_pull(t_client, model_name, iter_counter, worker_index) latest_model = ps_client.pull_model(t_client, model_name, iter_counter, worker_index) model.linear.weight = Parameter( torch.from_numpy( np.asarray(latest_model[:weight_length], dtype=np.double).reshape(weight_shape))) model.linear.bias = Parameter( torch.from_numpy( np.asarray(latest_model[weight_length:], dtype=np.double).reshape(bias_shape[0]))) items = Variable(items.view(-1, NUM_FEATURES)) labels = Variable(labels) # Forward + Backward + Optimize optimizer.zero_grad() outputs = model(items.double()) loss = criterion(outputs, labels) loss.backward() # flatten and concat gradients of weight and bias w_b_grad = np.concatenate( (model.linear.weight.grad.data.numpy().flatten(), model.linear.bias.grad.data.numpy().flatten())) cal_time = time.time() - batch_start # push gradient to PS sync_start = time.time() ps_client.can_push(t_client, model_name, iter_counter, worker_index) ps_client.push_grad(t_client, model_name, w_b_grad, LEARNING_RATE, iter_counter, worker_index) ps_client.can_pull(t_client, model_name, iter_counter + 1, worker_index) # sync all workers sync_time = time.time() - sync_start print( 'Epoch: [%d/%d], Step: [%d/%d] >>> Time: %.4f, Loss: %.4f, epoch cost %.4f, ' 'batch cost %.4f s: cal cost %.4f s and communication cost %.4f s' % (epoch + 1, NUM_EPOCHS, batch_index + 1, len(train_indices) / BATCH_SIZE, time.time() - train_start, loss.data, time.time() - epoch_start, time.time() - batch_start, cal_time, sync_time)) iter_counter += 1 # Test the Model correct = 0 total = 0 test_loss = 0 for items, labels in validation_loader: items = Variable(items.view(-1, NUM_FEATURES)) labels = Variable(labels) outputs = model(items) test_loss += criterion(outputs, labels).data _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum() print( 'Time = %.4f, accuracy of the model on the %d test samples: %d %%, loss = %f' % (time.time() - train_start, len(val_indices), 100 * correct / total, test_loss)) end_time = time.time() print("Elapsed time = {} s".format(end_time - start_time))
def handler(event, context): start_time = time.time() bucket = event['bucket_name'] worker_index = event['rank'] num_workers = event['num_workers'] key = event['file'].split(",") tmp_bucket = event['tmp_bucket'] merged_bucket = event['merged_bucket'] num_classes = event['num_classes'] num_features = event['num_features'] pos_tag = event['pos_tag'] num_epochs = event['num_epochs'] learning_rate = event['learning_rate'] batch_size = event['batch_size'] print('bucket = {}'.format(bucket)) print("file = {}".format(key)) print('number of workers = {}'.format(num_workers)) print('worker index = {}'.format(worker_index)) print('tmp bucket = {}'.format(tmp_bucket)) print('merge bucket = {}'.format(merged_bucket)) print('num epochs = {}'.format(num_epochs)) print('num classes = {}'.format(num_classes)) print('num features = {}'.format(num_features)) print('positive tag = {}'.format(pos_tag)) print('learning rate = {}'.format(learning_rate)) print("batch_size = {}".format(batch_size)) # read file from s3 file = get_object(bucket, key[0]).read().decode('utf-8').split("\n") dataset = DenseLibsvmDataset(file, num_features, pos_tag) if len(key) > 1: for more_key in key[1:]: file = get_object(bucket, more_key).read().decode('utf-8').split("\n") dataset.add_more(file) print("read data cost {} s".format(time.time() - start_time)) parse_start = time.time() total_count = dataset.__len__() pos_count = 0 for i in range(total_count): if dataset.__getitem__(i)[1] == 1: pos_count += 1 print("{} positive observations out of {}".format(pos_count, total_count)) print("parse data cost {} s".format(time.time() - parse_start)) preprocess_start = time.time() # Creating data indices for training and validation splits: dataset_size = len(dataset) indices = list(range(dataset_size)) split = int(np.floor(validation_ratio * dataset_size)) if shuffle_dataset: np.random.seed(random_seed) np.random.shuffle(indices) train_indices, val_indices = indices[split:], indices[:split] # Creating PT data samplers and loaders: train_sampler = SubsetRandomSampler(train_indices) valid_sampler = SubsetRandomSampler(val_indices) train_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, sampler=train_sampler) validation_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, sampler=valid_sampler) print("preprocess data cost {} s, dataset size = {}" .format(time.time() - preprocess_start, dataset_size)) model = SVM(num_features, num_classes) # Loss and Optimizer # Softmax is internally computed. # Set parameters to be updated. optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) # Training the Model train_start = time.time() for epoch in range(num_epochs): epoch_start = time.time() epoch_loss = 0 for batch_index, (items, labels) in enumerate(train_loader): batch_start = time.time() items = Variable(items.view(-1, num_features)) labels = Variable(labels).float() # Forward + Backward + Optimize optimizer.zero_grad() outputs = model(items) loss = torch.mean(torch.clamp(1 - outputs.t() * labels, min=0)) # hinge loss loss += 0.01 * torch.mean(model.linear.weight ** 2) / 2.0 # l2 penalty epoch_loss += loss loss.backward() optimizer.step() w = model.linear.weight.data.numpy() w_shape = w.shape b = model.linear.bias.data.numpy() b_shape = b.shape w_and_b = np.concatenate((w.flatten(), b.flatten())) cal_time = time.time() - epoch_start sync_start = time.time() postfix = "{}".format(epoch) u_w_b_merge = reduce_epoch(w_and_b, tmp_bucket, merged_bucket, num_workers, worker_index, postfix) w_mean = u_w_b_merge[: w_shape[0] * w_shape[1]].reshape(w_shape) / float(num_workers) b_mean = u_w_b_merge[w_shape[0] * w_shape[1]:].reshape(b_shape[0]) / float(num_workers) model.linear.weight.data = torch.from_numpy(w_mean) model.linear.bias.data = torch.from_numpy(b_mean) sync_time = time.time() - sync_start # Test the Model test_start = time.time() correct = 0 total = 0 test_loss = 0 for items, labels in validation_loader: items = Variable(items.view(-1, num_features)) labels = Variable(labels) outputs = model(items) test_loss = torch.mean(torch.clamp(1 - outputs.t() * labels.float(), min=0)) # hinge loss _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum() test_time = time.time() - test_start print('Epoch: [%d/%d] has %d batches, Time: %.4f, Loss: %.4f, ' 'epoch cost %.4f: computation cost %.4f s communication cost %.4f s test cost %.4f s, ' 'accuracy of the model on the %d test samples: %d %%, loss = %f' % (epoch + 1, num_epochs, batch_index, time.time() - train_start, epoch_loss.data, time.time() - epoch_start, cal_time, sync_time, test_time, len(val_indices), 100 * correct / total, test_loss / total)) if worker_index == 0: delete_expired_merged_epoch(merged_bucket, epoch) # Test the Model correct = 0 total = 0 for items, labels in validation_loader: items = Variable(items.view(-1, num_features)) labels = Variable(labels) outputs = model(items) test_loss = torch.mean(torch.clamp(1 - outputs.t() * labels.float(), min=0)) # hinge loss _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum() print('Accuracy of the model on the %d test samples: %d %%' % (len(val_indices), 100 * correct / total)) if worker_index == 0: clear_bucket(merged_bucket) clear_bucket(tmp_bucket) end_time = time.time() print("Elapsed time = {} s".format(end_time - start_time))
def scatter_reduce(vector, tmp_bucket, merged_bucket, num_workers, myrank, postfix): # vector is supposed to be a 1-d numpy array num_all_values = vector.size num_values_per_worker = num_all_values // num_workers residue = num_all_values % num_workers curr_epoch = postfix.split("_")[0] curr_batch = postfix.split("_")[1] my_offset = (num_values_per_worker * myrank) + min(residue, myrank) my_length = num_values_per_worker + (1 if myrank < residue else 0) my_chunk = vector[my_offset:my_offset + my_length] # write partitioned vector to the shared memory, except the chunk charged by myself for i in range(num_workers): if i != myrank: offset = (num_values_per_worker * i) + min(residue, i) length = num_values_per_worker + (1 if i < residue else 0) # indicating the chunk number and which worker it comes from key = "{}_{}".format(i, myrank) # format of key in tmp-bucket: chunkID_workerID_epoch_batch put_object(tmp_bucket, key + '_' + postfix, vector[offset:offset + length].tobytes()) # read and aggergate the corresponding chunk num_files = 0 while num_files < num_workers - 1: objects = list_bucket_objects(tmp_bucket) if objects is not None: for obj in objects: file_key = urllib.parse.unquote_plus(obj["Key"], encoding='utf-8') key_splits = file_key.split("_") # if it's the chunk I care and it is from the current step # format of key in tmp-bucket: chunkID_workerID_epoch_batch if key_splits[0] == str(myrank) and key_splits[ 2] == curr_epoch and key_splits[3] == curr_batch: data = get_object(tmp_bucket, file_key).read() bytes_data = np.frombuffer(data, dtype=vector.dtype) my_chunk = my_chunk + bytes_data num_files += 1 delete_object(tmp_bucket, file_key) # write the aggregated chunk back # key format in merged_bucket: chunkID_epoch_batch put_object(merged_bucket, str(myrank) + '_' + postfix, my_chunk.tobytes()) # read other aggregated chunks merged_value = {} merged_value[myrank] = my_chunk num_merged_files = 0 already_read = [] while num_merged_files < num_workers - 1: objects = list_bucket_objects(merged_bucket) if objects is not None: for obj in objects: file_key = urllib.parse.unquote_plus(obj["Key"], encoding='utf-8') key_splits = file_key.split("_") #key format in merged_bucket: chunkID_epoch_batch if key_splits[0] != str( myrank) and key_splits[1] == curr_epoch and key_splits[ 2] == curr_batch and file_key not in already_read: # if not file_key.startswith(str(myrank)) and file_key not in already_read: # key_splits = file_key.split("_") data = get_object(merged_bucket, file_key).read() bytes_data = np.frombuffer(data, dtype=vector.dtype) merged_value[int(key_splits[0])] = bytes_data already_read.append(file_key) num_merged_files += 1 # reconstruct the whole vector result = merged_value[0] for k in range(1, num_workers): result = np.concatenate((result, merged_value[k])) # elif k == myrank: # result = np.concatenate((result, my_chunk)) # else: # result = np.concatenate((result, merged_value[k])) return result
def handler(event, context): start_time = time.time() bucket = event['bucket'] key = event['name'] num_features = event['num_features'] num_classes = event['num_classes'] redis_location = event['elasticache'] endpoint = redis_init(redis_location) print('bucket = {}'.format(bucket)) print('key = {}'.format(key)) key_splits = key.split("_") num_worker = event['num_files'] worker_index = event['worker_index'] batch_size = 100000 batch_size = int(np.ceil(batch_size / num_worker)) torch.manual_seed(random_seed) # read file(dataset) from s3 file = get_object(bucket, key).read().decode('utf-8').split("\n") print("read data cost {} s".format(time.time() - start_time)) parse_start = time.time() dataset = DenseDatasetWithLines(file, num_features) preprocess_start = time.time() print("libsvm operation cost {}s".format(parse_start - preprocess_start)) # Creating data indices for training and validation splits: dataset_size = len(dataset) print("dataset size = {}".format(dataset_size)) indices = list(range(dataset_size)) split = int(np.floor(validation_ratio * dataset_size)) if shuffle_dataset: np.random.seed(random_seed) np.random.shuffle(indices) train_indices, val_indices = indices[split:], indices[:split] # Creating PT data samplers and loaders: train_sampler = SubsetRandomSampler(train_indices) valid_sampler = SubsetRandomSampler(val_indices) train_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, sampler=train_sampler) validation_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, sampler=valid_sampler) print("preprocess data cost {} s".format(time.time() - preprocess_start)) model = LogisticRegression(num_features, num_classes) # Loss and Optimizer # Softmax is internally computed. # Set parameters to be updated. criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) train_loss = [] test_loss = [] test_acc = [] epoch_time = 0 epoch_start = time.time() # Training the Model for epoch in range(num_epochs): tmp_train = 0 for batch_index, (items, labels) in enumerate(train_loader): print("------worker {} epoch {} batch {}------".format( worker_index, epoch, batch_index)) items = Variable(items.view(-1, num_features)) labels = Variable(labels) # Forward + Backward + Optimize optimizer.zero_grad() outputs = model(items) loss = criterion(outputs, labels) loss.backward() w_grad = model.linear.weight.grad.data.numpy() b_grad = model.linear.bias.grad.data.numpy() #synchronization starts from that every worker writes their gradients of this batch and epoch sync_start = time.time() hset_object(endpoint, grad_bucket, w_grad_prefix + str(worker_index), w_grad.tobytes()) hset_object(endpoint, grad_bucket, b_grad_prefix + str(worker_index), b_grad.tobytes()) tmp_write_local_epoch_time = time.time() - sync_start print("write local gradient cost = {}".format( tmp_write_local_epoch_time)) #merge gradients among files file_postfix = "{}_{}".format(epoch, batch_index) if worker_index == 0: w_grad_merge, b_grad_merge = \ merge_w_b_grads(endpoint, grad_bucket, num_worker, w_grad.dtype, w_grad.shape, b_grad.shape, w_grad_prefix, b_grad_prefix) put_merged_w_b_grads(endpoint, model_bucket, w_grad_merge, b_grad_merge, file_postfix, w_grad_prefix, b_grad_prefix) hset_object(endpoint, model_bucket, "epoch", epoch) hset_object(endpoint, model_bucket, "index", batch_index) else: w_grad_merge, b_grad_merge = get_merged_w_b_grads( endpoint, model_bucket, file_postfix, w_grad.dtype, w_grad.shape, b_grad.shape, w_grad_prefix, b_grad_prefix) model.linear.weight.grad = Variable(torch.from_numpy(w_grad_merge)) model.linear.bias.grad = Variable(torch.from_numpy(b_grad_merge)) tmp_sync_time = time.time() - sync_start print("synchronization cost {} s".format(tmp_sync_time)) optimizer.step() tmp_train = tmp_train + loss.item() train_loss.append(tmp_train / (batch_index + 1)) epoch_time += time.time() - epoch_start # Test the Model correct = 0 total = 0 tmp_test = 0 count = 0 for items, labels in validation_loader: items = Variable(items.view(-1, num_features)) outputs = model(items) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum() loss = criterion(outputs, labels) tmp_test = tmp_test + loss.item() count += 1 print('Accuracy of the model on the %d test samples: %d %%' % (len(val_indices), 100 * correct / total)) test_loss.append(tmp_test / count) test_acc.append(100 * correct / total) epoch_start = time.time() loss_record = [test_loss, test_acc, train_loss, epoch_time] put_object("grad-average-loss", "grad-loss{}".format(worker_index), bytes(loss_record))
def reduce_scatter_batch_multi_bucket(vector, tmp_bucket_prefix, merged_bucket_prefix, num_buckets, num_workers, myrank, postfix): # vector is supposed to be a 1-d numpy array num_all_values = vector.size num_values_per_worker = num_all_values // num_workers residue = num_all_values % num_workers curr_epoch = postfix.split("_")[0] curr_batch = postfix.split("_")[1] my_offset = (num_values_per_worker * myrank) + min(residue, myrank) my_length = num_values_per_worker + (1 if myrank < residue else 0) my_chunk = vector[my_offset:my_offset + my_length] # write partitioned vector to the shared memory, except the chunk charged by myself for i in range(num_workers): if i != myrank: offset = (num_values_per_worker * i) + min(residue, i) length = num_values_per_worker + (1 if i < residue else 0) # indicating the chunk number and which worker it comes from key = "{}_{}".format(i, myrank) tmp_bucket_ind = i % num_buckets tmp_bucket = "{}-{}".format(tmp_bucket_prefix, tmp_bucket_ind) # format of key in tmp-bucket: chunkID_workerID_epoch_batch put_object(tmp_bucket, key + '_' + postfix, vector[offset:offset + length].tobytes()) # read and aggeregate the corresponding chunk num_files = 0 tmp_bucket_ind = myrank % num_buckets tmp_bucket = "{}-{}".format(tmp_bucket_prefix, tmp_bucket_ind) print( "worker [{}] read and aggregate the corresponding chunks in bucket {}". format(myrank, tmp_bucket)) while num_files < num_workers - 1: objects = list_bucket_objects(tmp_bucket) if objects is not None: for obj in objects: file_key = urllib.parse.unquote_plus(obj["Key"], encoding='utf-8') key_splits = file_key.split("_") # if it's the chunk I care and it is from the current step # format of key in tmp-bucket: chunkID_workerID_epoch_batch if key_splits[0] == str(myrank) and key_splits[ 2] == curr_epoch and key_splits[3] == curr_batch: print("get obj = {}".format(file_key)) data = get_object(tmp_bucket, file_key).read() bytes_data = np.frombuffer(data, dtype=vector.dtype) my_chunk = my_chunk + bytes_data num_files += 1 delete_object(tmp_bucket, file_key) merged_bucket_ind = myrank % num_buckets my_merged_bucket = "{}-{}".format(merged_bucket_prefix, merged_bucket_ind) # write the aggregated chunk back # key format in merged_bucket: chunkID_epoch_batch put_object(my_merged_bucket, str(myrank) + '_' + postfix, my_chunk.tobytes()) # read other aggregated chunks merged_value = {myrank: my_chunk} bucket_num_objs = [] if num_workers % num_buckets == 0: bucket_num_objs = [ num_workers / num_buckets for _ in range(num_buckets) ] else: for i in range(num_buckets % num_buckets): num_buckets.append(num_workers / num_buckets + 1) for i in range(num_buckets % num_buckets, num_buckets): num_buckets.append(num_workers / num_buckets) # check boundary # do not count responsible chunk bucket_num_objs[myrank % num_buckets] -= 1 print("bucket num objs = {}".format(bucket_num_objs)) num_merged_files = 0 already_read = [] bucket_num_merged = [0 for _ in range(num_buckets)] while num_merged_files < num_workers - 1: for i in range(num_buckets): if bucket_num_merged[i] < bucket_num_objs[i]: merged_bucket = "{}-{}".format(merged_bucket_prefix, i) objects = list_bucket_objects(merged_bucket) if objects is not None: for obj in objects: file_key = urllib.parse.unquote_plus(obj["Key"], encoding='utf-8') key_splits = file_key.split("_") # key format in merged_bucket: chunkID_epoch_batch # if not file_key.startswith(str(myrank)) and file_key not in already_read: if key_splits[0] != str(myrank) and key_splits[1] == curr_epoch \ and key_splits[2] == curr_batch and file_key not in already_read: print("merge obj = {}".format(file_key)) data = get_object(merged_bucket, file_key).read() bytes_data = np.frombuffer(data, dtype=vector.dtype) merged_value[int(key_splits[0])] = bytes_data already_read.append(file_key) bucket_num_merged[i] += 1 num_merged_files += 1 # reconstruct the whole vector result = merged_value[0] for k in range(1, num_workers): result = np.concatenate((result, merged_value[k])) return result
def handler(event, context): startTs = time.time() bucket = event['Records'][0]['s3']['bucket']['name'] key = urllib.parse.unquote_plus(event['Records'][0]['s3']['object']['key'], encoding='utf-8') print('bucket = {}'.format(bucket)) print('key = {}'.format(key)) key_splits = key.split("_") worker_index = int(key_splits[0]) num_worker = int(key_splits[1]) sync_meta = SyncMeta(worker_index, num_worker) print("synchronization meta {}".format(sync_meta.__str__())) # read file from s3 file = get_object(bucket, key).read().decode('utf-8').split("\n") print("read data cost {} s".format(time.time() - startTs)) parse_start = time.time() dataset = DenseDatasetWithLines(file, num_features) print("parse data cost {} s".format(time.time() - parse_start)) preprocess_start = time.time() # Creating data indices for training and validation splits: dataset_size = len(dataset) indices = list(range(dataset_size)) split = int(np.floor(validation_ratio * dataset_size)) if shuffle_dataset: np.random.seed(random_seed) np.random.shuffle(indices) train_indices, val_indices = indices[split:], indices[:split] # Creating PT data samplers and loaders: train_sampler = SubsetRandomSampler(train_indices) valid_sampler = SubsetRandomSampler(val_indices) train_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, sampler=train_sampler) validation_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, sampler=valid_sampler) print("preprocess data cost {} s".format(time.time() - preprocess_start)) model = LogisticRegression(num_features, num_classes) # Loss and Optimizer # Softmax is internally computed. # Set parameters to be updated. criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) # Training the Model for epoch in range(num_epochs): for batch_index, (items, labels) in enumerate(train_loader): print("------worker {} epoch {} batch {}------".format(worker_index, epoch, batch_index)) batch_start = time.time() items = Variable(items.view(-1, num_features)) labels = Variable(labels) # Forward + Backward + Optimize optimizer.zero_grad() outputs = model(items) loss = criterion(outputs, labels) loss.backward() print("forward and backward cost {} s".format(time.time()-batch_start)) w_grad = model.linear.weight.grad.data.numpy() b_grad = model.linear.bias.grad.data.numpy() #print("dtype of grad = {}".format(w_grad.dtype)) print("w_grad before merge = {}".format(w_grad[0][0:5])) print("b_grad before merge = {}".format(b_grad)) sync_start = time.time() put_object(grad_bucket, w_grad_prefix + str(worker_index), w_grad.tobytes()) put_object(grad_bucket, b_grad_prefix + str(worker_index), b_grad.tobytes()) file_postfix = "{}_{}".format(epoch, batch_index) if worker_index == 0: w_grad_merge, b_grad_merge = \ merge_w_b_grads(grad_bucket, num_worker, w_grad.dtype, w_grad.shape, b_grad.shape, w_grad_prefix, b_grad_prefix) put_merged_w_b_grad(model_bucket, w_grad_merge, b_grad_merge, file_postfix, w_grad_prefix, b_grad_prefix) delete_expired_w_b(model_bucket, epoch, batch_index, w_grad_prefix, b_grad_prefix) model.linear.weight.grad = Variable(torch.from_numpy(w_grad_merge)) model.linear.bias.grad = Variable(torch.from_numpy(b_grad_merge)) else: w_grad_merge, b_grad_merge = get_merged_w_b_grad(model_bucket, file_postfix, w_grad.dtype, w_grad.shape, b_grad.shape, w_grad_prefix, b_grad_prefix) model.linear.weight.grad = Variable(torch.from_numpy(w_grad_merge)) model.linear.bias.grad = Variable(torch.from_numpy(b_grad_merge)) print("w_grad after merge = {}".format(model.linear.weight.grad.data.numpy()[0][:5])) print("b_grad after merge = {}".format(model.linear.bias.grad.data.numpy())) print("synchronization cost {} s".format(time.time() - sync_start)) optimizer.step() print("batch cost {} s".format(time.time() - batch_start)) if (batch_index + 1) % 10 == 0: print('Epoch: [%d/%d], Step: [%d/%d], Loss: %.4f' % (epoch + 1, num_epochs, batch_index + 1, len(train_indices) / batch_size, loss.data)) if worker_index == 0: clear_bucket(model_bucket) clear_bucket(grad_bucket) # Test the Model correct = 0 total = 0 for items, labels in validation_loader: items = Variable(items.view(-1, num_features)) # items = Variable(items) outputs = model(items) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum() print('Accuracy of the model on the %d test samples: %d %%' % (len(val_indices), 100 * correct / total)) endTs = time.time() print("elapsed time = {} s".format(endTs - startTs))
def handler(event, context): start_time = time.time() bucket = event['bucket_name'] worker_index = event['rank'] num_workers = event['num_workers'] key = event['file'] tmp_bucket = event['tmp_bucket'] merged_bucket = event['merged_bucket'] num_epochs = event['num_epochs'] learning_rate = event['learning_rate'] batch_size = event['batch_size'] print('bucket = {}'.format(bucket)) print("file = {}".format(key)) print('tmp bucket = {}'.format(tmp_bucket)) print('merged bucket = {}'.format(merged_bucket)) print('number of workers = {}'.format(num_workers)) print('worker index = {}'.format(worker_index)) print('num epochs = {}'.format(num_epochs)) print('learning rate = {}'.format(learning_rate)) print("batch size = {}".format(batch_size)) # read file from s3 file = get_object(bucket, key).read().decode('utf-8').split("\n") print("read data cost {} s".format(time.time() - start_time)) parse_start = time.time() dataset = DenseDatasetWithLines(file, num_features) print("parse data cost {} s".format(time.time() - parse_start)) preprocess_start = time.time() # Creating data indices for training and validation splits: dataset_size = len(dataset) indices = list(range(dataset_size)) split = int(np.floor(validation_ratio * dataset_size)) if shuffle_dataset: np.random.seed(random_seed) np.random.shuffle(indices) train_indices, val_indices = indices[split:], indices[:split] # Creating PT data samplers and loaders: train_sampler = SubsetRandomSampler(train_indices) valid_sampler = SubsetRandomSampler(val_indices) train_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, sampler=train_sampler) validation_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, sampler=valid_sampler) print("preprocess data cost {} s, dataset size = {}".format( time.time() - preprocess_start, dataset_size)) model = LogisticRegression(num_features, num_classes) # Loss and Optimizer # Softmax is internally computed. # Set parameters to be updated. criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) # Training the Model train_start = time.time() for epoch in range(num_epochs): epoch_start = time.time() epoch_loss = 0 for batch_index, (items, labels) in enumerate(train_loader): # print("------worker {} epoch {} batch {}------".format(worker_index, epoch, batch_index)) batch_start = time.time() items = Variable(items.view(-1, num_features)) labels = Variable(labels) # Forward + Backward + Optimize optimizer.zero_grad() outputs = model(items) loss = criterion(outputs, labels) epoch_loss += loss.data loss.backward() optimizer.step() # Test the Model test_start = time.time() correct = 0 total = 0 test_loss = 0 for items, labels in validation_loader: items = Variable(items.view(-1, num_features)) labels = Variable(labels) outputs = model(items) test_loss += criterion(outputs, labels).data _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum() test_time = time.time() - test_start print( 'Epoch: [%d/%d], Step: [%d/%d], Time: %.4f, Loss: %.4f, epoch cost %.4f, ' 'batch cost %.4f s: test cost %.4f s, ' 'accuracy of the model on the %d test samples: %d %%, loss = %f' % (epoch + 1, num_epochs, batch_index + 1, len(train_indices) / batch_size, time.time() - train_start, epoch_loss.data, time.time() - epoch_start, time.time() - batch_start, test_time, len(val_indices), 100 * correct / total, test_loss / total)) w = model.linear.weight.data.numpy() w_shape = w.shape b = model.linear.bias.data.numpy() b_shape = b.shape w_and_b = np.concatenate((w.flatten(), b.flatten())) cal_time = time.time() - epoch_start print("Epoch {} calculation cost = {} s".format(epoch, cal_time)) sync_start = time.time() postfix = "{}".format(epoch) u_w_b_merge = reduce_epoch(w_and_b, tmp_bucket, merged_bucket, num_workers, worker_index, postfix) w_mean = u_w_b_merge[:w_shape[0] * w_shape[1]].reshape(w_shape) / float(num_workers) b_mean = u_w_b_merge[w_shape[0] * w_shape[1]:].reshape( b_shape[0]) / float(num_workers) model.linear.weight.data = torch.from_numpy(w_mean) model.linear.bias.data = torch.from_numpy(b_mean) sync_time = time.time() - sync_start print("Epoch {} synchronization cost {} s".format(epoch, sync_time)) if worker_index == 0: delete_expired_merged_epoch(merged_bucket, epoch) # # # #file_postfix = "{}_{}".format(epoch, worker_index) # if epoch < num_epochs - 1: # if worker_index == 0: # w_merge, b_merge = merge_w_b(model_bucket, num_workers, w.dtype, # w.shape, b.shape, tmp_w_prefix, tmp_b_prefix) # put_merged_w_b(model_bucket, w_merge, b_merge, # str(epoch), w_prefix, b_prefix) # delete_expired_w_b_by_epoch(model_bucket, epoch, tmp_w_prefix, tmp_b_prefix) # model.linear.weight.data = torch.from_numpy(w_merge) # model.linear.bias.data = torch.from_numpy(b_merge) # else: # w_merge, b_merge = get_merged_w_b(model_bucket, str(epoch), w.dtype, # w.shape, b.shape, w_prefix, b_prefix) # model.linear.weight.data = torch.from_numpy(w_merge) # model.linear.bias.data = torch.from_numpy(b_merge) #print("weight after sync = {}".format(model.linear.weight.data.numpy()[0][:5])) #print("bias after sync = {}".format(model.linear.bias.data.numpy())) # print("epoch {} synchronization cost {} s".format(epoch, time.time() - sync_start)) # Test the Model correct = 0 total = 0 for items, labels in validation_loader: items = Variable(items.view(-1, num_features)) # items = Variable(items) outputs = model(items) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum() print('Accuracy of the model on the %d test samples: %d %%' % (len(val_indices), 100 * correct / total)) if worker_index == 0: clear_bucket(merged_bucket) clear_bucket(tmp_bucket) end_time = time.time() print("Elapsed time = {} s".format(end_time - start_time))
def handler(event, context): start_time = time.time() bucket = event['bucket_name'] worker_index = event['rank'] num_workers = event['num_workers'] key = event['file'] merged_bucket = event['merged_bucket'] num_epochs = event['num_epochs'] learning_rate = event['learning_rate'] batch_size = event['batch_size'] elasti_location = event['elasticache'] endpoint = memcached_init(elasti_location) print('bucket = {}'.format(bucket)) print("file = {}".format(key)) print('merged bucket = {}'.format(merged_bucket)) print('number of workers = {}'.format(num_workers)) print('worker index = {}'.format(worker_index)) print('num epochs = {}'.format(num_epochs)) print('learning rate = {}'.format(learning_rate)) print("batch size = {}".format(batch_size)) # read file from s3 file = get_object(bucket, key).read().decode('utf-8').split("\n") print("read data cost {} s".format(time.time() - start_time)) parse_start = time.time() dataset = DenseDatasetWithLines(file, num_features) print("parse data cost {} s".format(time.time() - parse_start)) preprocess_start = time.time() # Creating data indices for training and validation splits: dataset_size = len(dataset) indices = list(range(dataset_size)) split = int(np.floor(validation_ratio * dataset_size)) if shuffle_dataset: np.random.seed(random_seed) np.random.shuffle(indices) train_indices, val_indices = indices[split:], indices[:split] # Creating PT data samplers and loaders: train_sampler = SubsetRandomSampler(train_indices) valid_sampler = SubsetRandomSampler(val_indices) train_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, sampler=train_sampler) validation_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, sampler=valid_sampler) print("preprocess data cost {} s, dataset size = {}".format( time.time() - preprocess_start, dataset_size)) model = SVM(num_features, num_classes) # Loss and Optimizer # Softmax is internally computed. # Set parameters to be updated. criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) train_loss = [] test_loss = [] test_acc = [] epoch_time = 0 # Training the Model epoch_start = time.time() for epoch in range(num_epochs): tmp_train = 0 for batch_index, (items, labels) in enumerate(train_loader): print("------worker {} epoch {} batch {}------".format( worker_index, epoch, batch_index)) batch_start = time.time() items = Variable(items.view(-1, num_features)) labels = Variable(labels) # Forward + Backward + Optimize optimizer.zero_grad() outputs = model(items) loss = criterion(outputs, labels) loss.backward() optimizer.step() if (batch_index + 1) % 1 == 0: print('Epoch: [%d/%d], Step: [%d/%d], Loss: %.4f' % (epoch + 1, num_epochs, batch_index + 1, len(train_indices) / batch_size, loss.data)) tmp_train = tmp_train + loss.item() train_loss.append(tmp_train / (batch_index + 1)) # sync model w_model = model.linear.weight.data.numpy() b_model = model.linear.bias.data.numpy() epoch_time = time.time() - epoch_start + epoch_time # synchronization starts from that every worker writes their model after this epoch sync_start = time.time() hset_object(endpoint, merged_bucket, w_prefix + str(worker_index), w_model.tobytes()) hset_object(endpoint, merged_bucket, b_prefix + str(worker_index), b_model.tobytes()) tmp_write_local_epoch_time = time.time() - sync_start print("write local model cost = {}".format(tmp_write_local_epoch_time)) # merge gradients among files file_postfix = "{}".format(epoch) if worker_index == 0: merge_start = time.time() w_model_merge, b_model_merge = merge_w_b_grads( endpoint, merged_bucket, num_workers, w_model.dtype, w_model.shape, b_model.shape, w_prefix, b_prefix) put_merged_w_b_grads(endpoint, merged_bucket, w_model_merge, b_model_merge, file_postfix, w_prefix, b_prefix) else: w_model_merge, b_model_merge = get_merged_w_b_grads( endpoint, merged_bucket, file_postfix, w_model.dtype, w_model.shape, b_model.shape, w_prefix, b_prefix) model.linear.weight.data = Variable(torch.from_numpy(w_model_merge)) model.linear.bias.data = Variable(torch.from_numpy(b_model_merge)) tmp_sync_time = time.time() - sync_start print("synchronization cost {} s".format(tmp_sync_time)) # Test the Model correct = 0 total = 0 count = 0 tmp_test = 0 for items, labels in validation_loader: items = Variable(items.view(-1, num_features)) outputs = model(items) loss = criterion(outputs, labels) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum() tmp_test = tmp_test + loss.item() count = count + 1 # print('Accuracy of the model on the %d test samples: %d %%' % (len(val_indices), 100 * correct / total)) test_acc.append(100 * correct / total) test_loss.append(tmp_test / count) epoch_start = time.time() end_time = time.time() print("elapsed time = {} s".format(end_time - start_time)) loss_record = [test_loss, test_acc, train_loss, epoch_time] put_object("model-average-loss", "average_loss{}".format(worker_index), pickle.dumps(loss_record))
def handler(event, context): start_time = time.time() bucket = event['bucket'] key = event['name'] num_features = event['num_features'] num_classes = event['num_classes'] print('bucket = {}'.format(bucket)) print('key = {}'.format(key)) key_splits = key.split("_") worker_index = int(key_splits[0]) num_worker = int(key_splits[1]) # read file(dataset) from s3 file = get_object(bucket, key).read().decode('utf-8').split("\n") print("read data cost {} s".format(time.time() - start_time)) parse_start = time.time() dataset = DenseDatasetWithLines(file, num_features) preprocess_start = time.time() print("libsvm operation cost {}s".format(parse_start - preprocess_start)) # Creating data indices for training and validation splits: dataset_size = len(dataset) print("dataset size = {}".format(dataset_size)) indices = list(range(dataset_size)) split = int(np.floor(validation_ratio * dataset_size)) if shuffle_dataset: np.random.seed(random_seed) np.random.shuffle(indices) train_indices, val_indices = indices[split:], indices[:split] # Creating PT data samplers and loaders: train_sampler = SubsetRandomSampler(train_indices) valid_sampler = SubsetRandomSampler(val_indices) train_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, sampler=train_sampler) validation_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, sampler=valid_sampler) print("preprocess data cost {} s".format(time.time() - preprocess_start)) model = LogisticRegression(num_features, num_classes) # Loss and Optimizer # Softmax is internally computed. # Set parameters to be updated. criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) # Training the Model for epoch in range(num_epochs): for batch_index, (items, labels) in enumerate(train_loader): print("------worker {} epoch {} batch {}------".format( worker_index, epoch, batch_index)) batch_start = time.time() items = Variable(items.view(-1, num_features)) labels = Variable(labels) # Forward + Backward + Optimize optimizer.zero_grad() outputs = model(items) loss = criterion(outputs, labels) loss.backward() print("forward and backward cost {} s".format(time.time() - batch_start)) w_grad = model.linear.weight.grad.data.numpy() b_grad = model.linear.bias.grad.data.numpy() print("w_grad before merge = {}".format(w_grad[0][0:5])) print("b_grad before merge = {}".format(b_grad)) #synchronization starts from that every worker writes their gradients of this batch and epoch sync_start = time.time() hset_object(endpoint, grad_bucket, w_grad_prefix + str(worker_index), w_grad.tobytes()) hset_object(endpoint, grad_bucket, b_grad_prefix + str(worker_index), b_grad.tobytes()) #merge gradients among files merge_start = time.time() file_postfix = "{}_{}".format(epoch, batch_index) if worker_index == 0: merge_start = time.time() w_grad_merge, b_grad_merge = \ merge_w_b_grads(endpoint, grad_bucket, num_worker, w_grad.dtype, w_grad.shape, b_grad.shape, w_grad_prefix, b_grad_prefix) print("model average time = {}".format(time.time() - merge_start)) #possible rewrite the file before being accessed. wait until anyone finishes accessing. put_merged_w_b_grads(endpoint, model_bucket, w_grad_merge, b_grad_merge, w_grad_prefix, b_grad_prefix) hset_object(endpoint, model_bucket, "epoch", epoch) hset_object(endpoint, model_bucket, "index", batch_index) #delete_expired_w_b(endpoint, # model_bucket, epoch, batch_index, w_grad_prefix, b_grad_prefix) model.linear.weight.grad = Variable( torch.from_numpy(w_grad_merge)) model.linear.bias.grad = Variable( torch.from_numpy(b_grad_merge)) else: # wait for flag to access while hget_object(endpoint, model_bucket, "epoch") != None: if int(hget_object(endpoint, model_bucket, "epoch")) == epoch \ and int(hget_object(endpoint, model_bucket, "index")) == batch_index: break time.sleep(0.01) w_grad_merge, b_grad_merge = get_merged_w_b_grads( endpoint, model_bucket, w_grad.dtype, w_grad.shape, b_grad.shape, w_grad_prefix, b_grad_prefix) hcounter(endpoint, model_bucket, "counter") #flag it if it's accessed. print("number of access at this time = {}".format( int(hget_object(endpoint, model_bucket, "counter")))) model.linear.weight.grad = Variable( torch.from_numpy(w_grad_merge)) model.linear.bias.grad = Variable( torch.from_numpy(b_grad_merge)) print("w_grad after merge = {}".format( model.linear.weight.grad.data.numpy()[0][:5])) print("b_grad after merge = {}".format( model.linear.bias.grad.data.numpy())) print("synchronization cost {} s".format(time.time() - sync_start)) optimizer.step() print("batch cost {} s".format(time.time() - batch_start)) if (batch_index + 1) % 10 == 0: print('Epoch: [%d/%d], Step: [%d/%d], Loss: %.4f' % (epoch + 1, num_epochs, batch_index + 1, len(train_indices) / batch_size, loss.data)) """ if worker_index == 0: while sync_counter(endpoint, bucket, num_workers): time.sleep(0.001) clear_bucket(endpoint, model_bucket) clear_bucket(endpoint, grad_bucket) """ # Test the Model correct = 0 total = 0 for items, labels in validation_loader: items = Variable(items.view(-1, num_features)) outputs = model(items) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum() print('Accuracy of the model on the %d test samples: %d %%' % (len(val_indices), 100 * correct / total)) end_time = time.time() print("elapsed time = {} s".format(end_time - start_time))
def handler(event, context): avg_error = np.iinfo(np.int16).max num_features = event['num_features'] num_clusters = event['num_clusters'] worker_cent_bucket = event["worker_cent_bucket"] avg_cent_bucket = event["avg_cent_bucket"] num_epochs = event["num_epochs"] threshold = event["threshold"] dataset_type = event["dataset_type"] elastic_location = event["elasticache"] elastic_endpoint = memcached_init(elastic_location) print(elastic_endpoint) #Reading data from S3 bucket_name = event['bucket_name'] key = urllib.parse.unquote_plus(event['key'], encoding='utf-8') logger.info( f"Reading training data from bucket = {bucket_name}, key = {key}") key_splits = key.split("_") num_worker = int(key_splits[-1]) worker_index = int(key_splits[0]) event_start = time.time() file = get_object(bucket_name, key).read().decode('utf-8').split("\n") s3_end = time.time() logger.info(f"Getting object from s3 takes {s3_end - event_start}s") if dataset_type == "dense": # dataset is stored as numpy array dataset = DenseDatasetWithLines(file, num_features).ins_np dt = dataset.dtype centroid_shape = (num_clusters, dataset.shape[1]) else: # dataset is sparse, stored as sparse tensor dataset = SparseDatasetWithLines(file, num_features) first_entry = dataset.ins_list[0].to_dense().numpy() dt = first_entry.dtype centroid_shape = (num_clusters, first_entry.shape[1]) parse_end = time.time() logger.info(f"Parsing dataset takes {parse_end - s3_end}s") logger.info( f"worker index: {worker_index},Dataset: {dataset_type}, dtype: {dt}. Centroids shape: {centroid_shape}. num_features: {num_features}" ) if worker_index == 0: if dataset_type == "dense": centroids = dataset[0:num_clusters].reshape(-1) hset_object(elastic_endpoint, avg_cent_bucket, "initial", centroids.tobytes()) centroids = centroids.reshape(centroid_shape) else: centroids = store_centroid_as_numpy( dataset.ins_list[0:num_clusters], num_clusters) hset_object(elastic_endpoint, avg_cent_bucket, "initial", centroids.tobytes()) else: cent = hget_object_or_wait(elastic_endpoint, avg_cent_bucket, "initial", 0.00001) centroids = process_centroid(cent, num_clusters, dt) #centroids = np.frombuffer(cent,dtype=dt) if centroid_shape != centroids.shape: logger.error("The shape of centroids does not match.") logger.info( f"Waiting for initial centroids takes {time.time() - parse_end} s") training_start = time.time() sync_time = 0 for epoch in range(num_epochs): logger.info(f"{worker_index}-th worker in {epoch}-th epoch") epoch_start = time.time() if epoch != 0: last_epoch = epoch - 1 cent_with_error = hget_object_or_wait(elastic_endpoint, avg_cent_bucket, f"avg-{last_epoch}", 0.00001) wait_end = time.time() if worker_index != 0: logger.info( f"Wait for centroid for {epoch}-th epoch. Takes {wait_end - epoch_start}" ) sync_time += wait_end - epoch_start avg_error, centroids = process_centroid(cent_with_error, num_clusters, dt, True) if avg_error >= threshold: print("get new centro") res = get_new_centroids(dataset, dataset_type, centroids, epoch, num_features, num_clusters) #dt = res.dtype sync_start = time.time() success = hset_object(elastic_endpoint, worker_cent_bucket, f"{worker_index}_{epoch}", res.tobytes()) if worker_index == 0 and success: compute_average_centroids(elastic_endpoint, avg_cent_bucket, worker_cent_bucket, num_worker, centroid_shape, epoch, dt) logger.info( f"Waiting for all workers takes {time.time() - sync_start} s" ) if epoch != 0: sync_time += time.time() - sync_start else: print("sync time = {}".format(sync_time)) logger.info( f"{worker_index}-th worker finished training. Error = {avg_error}, centroids = {centroids}" ) logger.info(f"Whole process time : {time.time() - training_start}") return print("sync time = {}".format(sync_time)) put_object("kmeans-time", "time_{}".format(worker_index), np.asarray(sync_time).tostring())
def handler(event, context): start_time = time.time() bucket = event['bucket_name'] worker_index = event['rank'] num_workers = event['num_workers'] key = event['file'] merged_bucket = event['merged_bucket'] num_classes = event['num_classes'] num_features = event['num_features'] pos_tag = event['pos_tag'] num_epochs = event['num_epochs'] learning_rate = event['learning_rate'] batch_size = event['batch_size'] elasti_location = event['elasticache'] endpoint = memcached_init(elasti_location) print('bucket = {}'.format(bucket)) print("file = {}".format(key)) print('merged bucket = {}'.format(merged_bucket)) print('number of workers = {}'.format(num_workers)) print('worker index = {}'.format(worker_index)) print('num epochs = {}'.format(num_epochs)) print('learning rate = {}'.format(learning_rate)) print("batch size = {}".format(batch_size)) # read file from s3 file = get_object(bucket, key).read().decode('utf-8').split("\n") print("read data cost {} s".format(time.time() - start_time)) parse_start = time.time() dataset = DenseLibsvmDataset(file, num_features, pos_tag) totol_count = dataset.__len__() pos_count = 0 for i in range(totol_count): if dataset.__getitem__(i)[1] == 1: pos_count += 1 print("{} positive observations out of {}".format(pos_count, totol_count)) print("parse data cost {} s".format(time.time() - parse_start)) preprocess_start = time.time() # Creating data indices for training and validation splits: dataset_size = len(dataset) indices = list(range(dataset_size)) split = int(np.floor(validation_ratio * dataset_size)) if shuffle_dataset: np.random.seed(random_seed) np.random.shuffle(indices) train_indices, val_indices = indices[split:], indices[:split] # Creating PT data samplers and loaders: train_sampler = SubsetRandomSampler(train_indices) valid_sampler = SubsetRandomSampler(val_indices) train_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, sampler=train_sampler) validation_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, sampler=valid_sampler) print("preprocess data cost {} s".format(time.time() - preprocess_start)) model = SVM(num_features, num_classes) # Loss and Optimizer # Softmax is internally computed. # Set parameters to be updated. criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) # Training the Model train_start = time.time() for epoch in range(num_epochs): epoch_start = time.time() epoch_loss = 0 cal_time = 0 sync_time = 0 for batch_index, (items, labels) in enumerate(train_loader): batch_start = time.time() items = Variable(items.view(-1, num_features)) labels = Variable(labels) # Forward + Backward + Optimize optimizer.zero_grad() outputs = model(items) loss = criterion(outputs, labels) epoch_loss += loss.data loss.backward() w_grad = model.linear.weight.grad.data.numpy() w_grad_shape = w_grad.shape b_grad = model.linear.bias.grad.data.numpy() b_grad_shape = b_grad.shape w_b_grad = np.concatenate((w_grad.flatten(), b_grad.flatten())) cal_time += time.time() - batch_start sync_start = time.time() postfix = "{}_{}".format(epoch, batch_index) w_b_grad_merge = reduce_batch(endpoint, w_b_grad, merged_bucket, num_workers, worker_index, postfix) w_grad_merge = \ w_b_grad_merge[:w_grad_shape[0] * w_grad_shape[1]].reshape(w_grad_shape) / float(num_workers) b_grad_merge = \ w_b_grad_merge[w_grad_shape[0] * w_grad_shape[1]:].reshape(b_grad_shape[0]) / float(num_workers) model.linear.weight.grad = Variable(torch.from_numpy(w_grad_merge)) model.linear.bias.grad = Variable(torch.from_numpy(b_grad_merge)) sync_time += time.time() - sync_start optimizer.step() # print('Epoch: [%d/%d], Step: [%d/%d], Time: %.4f, Loss: %.4f, epoch cost %.4f, ' # 'batch cost %.4f s: cal cost %.4f s communication cost %.4f s, ' # % (epoch + 1, num_epochs, batch_index, len(train_indices) / batch_size, # time.time() - train_start, loss.data, time.time() - epoch_start, # time.time() - batch_start, cal_time, sync_time)) # Test the Model test_start = time.time() correct = 0 total = 0 test_loss = 0 for items, labels in validation_loader: items = Variable(items.view(-1, num_features)) labels = Variable(labels) outputs = model(items) test_loss += criterion(outputs, labels).data _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum() test_time = time.time() - test_start print( 'Epoch %d has %d batches, time = %.4f, epoch cost %.4f s: ' 'computation cost %.4f s communication cost %.4f s, ' 'train loss = %.4f, test cost %.4f s, accuracy of the model on the %d test samples: %d %%, loss = %f' % (epoch, batch_index, time.time() - train_start, time.time() - epoch_start, cal_time, sync_time, epoch_loss, test_time, len(val_indices), 100 * correct / total, test_loss / total)) if worker_index == 0: clear_bucket(endpoint) end_time = time.time() print("Elapsed time = {} s".format(end_time - start_time))
def handler(event, context): try: start_time = time.time() bucket_name = event['bucket_name'] worker_index = event['rank'] num_workers = event['num_workers'] key = event['file'] merged_bucket = event['merged_bucket'] num_features = event['num_features'] learning_rate = event["learning_rate"] batch_size = event["batch_size"] num_epochs = event["num_epochs"] validation_ratio = event["validation_ratio"] elasti_location = event['elasticache'] endpoint = memcached_init(elasti_location) # Reading data from S3 print(f"Reading training data from bucket = {bucket_name}, key = {key}") file = get_object(bucket_name, key).read().decode('utf-8').split("\n") print("read data cost {} s".format(time.time() - start_time)) parse_start = time.time() dataset = SparseDatasetWithLines(file, num_features) print("parse data cost {} s".format(time.time() - parse_start)) preprocess_start = time.time() dataset_size = len(dataset) indices = list(range(dataset_size)) split = int(np.floor(validation_ratio * dataset_size)) if shuffle_dataset: np.random.seed(random_seed) np.random.shuffle(indices) train_indices, val_indices = indices[split:], indices[:split] train_set = [dataset[i] for i in train_indices] val_set = [dataset[i] for i in val_indices] print("preprocess data cost {} s".format(time.time() - preprocess_start)) svm = SparseSVM(train_set, val_set, num_features, num_epochs, learning_rate, batch_size) # Training the Model train_start = time.time() for epoch in range(num_epochs): epoch_start = time.time() num_batches = math.floor(len(train_set) / batch_size) print("worker {} epoch {}".format(worker_index, epoch)) for batch_idx in range(num_batches): batch_start = time.time() batch_ins, batch_label = svm.next_batch(batch_idx) acc = svm.one_epoch(batch_idx, epoch) if (batch_idx + 1) % 10 == 0: print("Epoch: {}/{}, Step: {}/{}, train acc: {}" .format(epoch + 1, num_epochs, batch_idx + 1, num_batches, acc)) cal_time = time.time() - epoch_start sync_start = time.time() np_w = svm.weights.numpy().flatten() postfix = str(epoch) w_merge = reduce_epoch(endpoint, np_w, merged_bucket, num_workers, worker_index, postfix) svm.weights = torch.from_numpy(w_merge).reshape(num_features, 1) sync_time = time.time() - sync_start test_start = time.time() val_acc = svm.evaluate() test_time = time.time() - test_start print('Epoch: [%d/%d], Step: [%d/%d], Time: %.4f, epoch cost %.4f, ' 'cal cost %.4f s, sync cost %.4f s, test cost %.4f s, test accuracy: %s %%' % (epoch + 1, num_epochs, batch_idx + 1, num_batches, time.time() - train_start, time.time() - epoch_start, cal_time, sync_time, test_time, val_acc)) if worker_index == 0: clear_bucket(endpoint) print("elapsed time = {} s".format(time.time() - start_time)) except Exception as e: print("Error {}".format(e))
def handler(event, context): start_time = time.time() bucket = event['bucket_name'] worker_index = event['rank'] num_workers = event['num_workers'] key = event['file'] merged_bucket = event['merged_bucket'] num_epochs = event['num_epochs'] num_admm_epochs = event['num_admm_epochs'] learning_rate = event['learning_rate'] lam = event['lambda'] rho = event['rho'] batch_size = event['batch_size'] elasti_location = event['elasticache'] endpoint = memcached_init(elasti_location) print('bucket = {}'.format(bucket)) print("file = {}".format(key)) print('number of workers = {}'.format(num_workers)) print('worker index = {}'.format(worker_index)) print('merge bucket = {}'.format(merged_bucket)) print('num epochs = {}'.format(num_epochs)) print('num admm epochs = {}'.format(num_admm_epochs)) print('learning rate = {}'.format(learning_rate)) print("lambda = {}".format(lam)) print("rho = {}".format(rho)) print("batch_size = {}".format(batch_size)) # read file from s3 file = get_object(bucket, key).read().decode('utf-8').split("\n") print("read data cost {} s".format(time.time() - start_time)) # file_path = "../../dataset/agaricus_127d_train.libsvm" # file = open(file_path).readlines() parse_start = time.time() dataset = DenseDatasetWithLines(file, num_features) print("parse data cost {} s".format(time.time() - parse_start)) preprocess_start = time.time() # Creating data indices for training and validation splits: dataset_size = len(dataset) indices = list(range(dataset_size)) split = int(np.floor(validation_ratio * dataset_size)) if shuffle_dataset: np.random.seed(random_seed) np.random.shuffle(indices) train_indices, val_indices = indices[split:], indices[:split] # Creating PT data samplers and loaders: train_sampler = SubsetRandomSampler(train_indices) valid_sampler = SubsetRandomSampler(val_indices) train_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, sampler=train_sampler) validation_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, sampler=valid_sampler) print("preprocess data cost {} s, dataset size = {}".format( time.time() - preprocess_start, dataset_size)) model = LogisticRegression(num_features, num_classes).double() print("size of w = {}".format(model.linear.weight.data.size())) z, u = initialize_z_and_u(model.linear.weight.data.size()) print("size of z = {}".format(z.shape)) print("size of u = {}".format(u.shape)) # Loss and Optimizer # Softmax is internally computed. # Set parameters to be updated. criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) # Training the Model train_start = time.time() stop = False for admm_epoch in range(num_admm_epochs): print("ADMM Epoch >>> {}".format(admm_epoch)) for epoch in range(num_epochs): epoch_start = time.time() epoch_loss = 0 for batch_index, (items, labels) in enumerate(train_loader): # print("------worker {} epoch {} batch {}------".format(worker_index, epoch, batch_index)) batch_start = time.time() items = Variable(items.view(-1, num_features)) labels = Variable(labels) # Forward + Backward + Optimize optimizer.zero_grad() outputs = model(items.double()) classify_loss = criterion(outputs, labels) epoch_loss += classify_loss.data u_z = torch.from_numpy(u).double() - torch.from_numpy( z).double() loss = classify_loss for name, param in model.named_parameters(): if name.split('.')[-1] == "weight": loss += rho / 2.0 * torch.norm(param + u_z, p=2) #loss = classify_loss + rho / 2.0 * torch.norm(torch.sum(model.linear.weight, u_z)) optimizer.zero_grad() loss.backward(retain_graph=True) optimizer.step() # Test the Model test_start = time.time() correct = 0 total = 0 test_loss = 0 for items, labels in validation_loader: items = Variable(items.view(-1, num_features)) labels = Variable(labels) outputs = model(items.double()) test_loss += criterion(outputs, labels).data _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum() test_time = time.time() - test_start print( 'Epoch: [%d/%d], Step: [%d/%d], Time: %.4f, Loss: %.4f, epoch cost %.4f, ' 'batch cost %.4f s: test cost %.4f s, ' 'accuracy of the model on the %d test samples: %d %%, loss = %f' % (epoch + 1, num_epochs, batch_index + 1, len(train_indices) / batch_size, time.time() - train_start, epoch_loss.data, time.time() - epoch_start, time.time() - batch_start, test_time, len(val_indices), 100 * correct / total, test_loss / total)) w = model.linear.weight.data.numpy() w_shape = w.shape b = model.linear.bias.data.numpy() b_shape = b.shape u_shape = u.shape w_and_b = np.concatenate((w.flatten(), b.flatten())) u_w_b = np.concatenate((u.flatten(), w_and_b.flatten())) cal_time = time.time() - epoch_start print("Epoch {} calculation cost = {} s".format(epoch, cal_time)) sync_start = time.time() postfix = str(admm_epoch) u_w_b_merge = reduce_epoch(endpoint, u_w_b, merged_bucket, num_workers, worker_index, postfix) u_mean = u_w_b_merge[:u_shape[0] * u_shape[1]].reshape(u_shape) / float(num_workers) w_mean = u_w_b_merge[u_shape[0] * u_shape[1]:u_shape[0] * u_shape[1] + w_shape[0] * w_shape[1]].reshape(w_shape) / float(num_workers) b_mean = u_w_b_merge[u_shape[0] * u_shape[1] + w_shape[0] * w_shape[1]:].reshape( b_shape[0]) / float(num_workers) # model.linear.weight.data = torch.from_numpy(w) model.linear.bias.data = torch.from_numpy(b_mean) sync_time = time.time() - sync_start print("Epoch {} synchronization cost {} s".format(epoch, sync_time)) #z, u, r, s = update_z_u(w, z, u, rho, num_workers, lam) #stop = check_stop(ep_abs, ep_rel, r, s, dataset_size, num_features, w, z, u, rho) #print("stop = {}".format(stop)) #z = num_workers * rho / (2 * lam + num_workers * rho) * (w + u_mean) z = update_z(w_mean, u_mean, rho, num_workers, lam) #print(z) u = u + model.linear.weight.data.numpy() - z #print(u) # Test the Model correct = 0 total = 0 test_loss = 0 for items, labels in validation_loader: items = Variable(items.view(-1, num_features)) labels = Variable(labels) outputs = model(items.double()) test_loss += criterion(outputs, labels).data _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum() print( 'Epoch: %d, time = %.4f, accuracy of the model on the %d test samples: %d %%, loss = %f' % (epoch, time.time() - train_start, len(val_indices), 100 * correct / total, test_loss / total)) if worker_index == 0: clear_bucket(endpoint) end_time = time.time() print("Elapsed time = {} s".format(end_time - start_time))
def handler(event, context): start_time = time.time() bucket = event['bucket_name'] worker_index = event['rank'] num_workers = event['num_workers'] key = event['file'].split(",") merged_bucket = event['merged_bucket'] num_classes = event['num_classes'] num_features = event['num_features'] pos_tag = event['pos_tag'] num_epochs = event['num_epochs'] num_admm_epochs = event['num_admm_epochs'] learning_rate = event['learning_rate'] batch_size = event['batch_size'] lam = event['lambda'] rho = event['rho'] elasti_location = event['elasticache'] endpoint = memcached_init(elasti_location) print('bucket = {}'.format(bucket)) print("file = {}".format(key)) print('number of workers = {}'.format(num_workers)) print('worker index = {}'.format(worker_index)) print('merge bucket = {}'.format(merged_bucket)) print('num epochs = {}'.format(num_epochs)) print('num admm epochs = {}'.format(num_admm_epochs)) print('num classes = {}'.format(num_classes)) print('num features = {}'.format(num_features)) print('positive tag = {}'.format(pos_tag)) print('learning rate = {}'.format(learning_rate)) print("batch_size = {}".format(batch_size)) print("lambda = {}".format(lam)) print("rho = {}".format(rho)) # read file from s3 file = get_object(bucket, key[0]).read().decode('utf-8').split("\n") dataset = DenseLibsvmDataset(file, num_features, pos_tag) if len(key) > 1: for more_key in key[1:]: file = get_object(bucket, more_key).read().decode('utf-8').split("\n") dataset.add_more(file) print("read data cost {} s".format(time.time() - start_time)) parse_start = time.time() total_count = dataset.__len__() pos_count = 0 for i in range(total_count): if dataset.__getitem__(i)[1] == 1: pos_count += 1 print("{} positive observations out of {}".format(pos_count, total_count)) print("parse data cost {} s".format(time.time() - parse_start)) preprocess_start = time.time() # Creating data indices for training and validation splits: dataset_size = len(dataset) indices = list(range(dataset_size)) split = int(np.floor(validation_ratio * dataset_size)) if shuffle_dataset: np.random.seed(random_seed) np.random.shuffle(indices) train_indices, val_indices = indices[split:], indices[:split] # Creating PT data samplers and loaders: train_sampler = SubsetRandomSampler(train_indices) valid_sampler = SubsetRandomSampler(val_indices) train_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, sampler=train_sampler) validation_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, sampler=valid_sampler) print("preprocess data cost {} s, dataset size = {}".format( time.time() - preprocess_start, dataset_size)) model = SVM(num_features, num_classes).float() print("size of w = {}".format(model.linear.weight.data.size())) z, u = initialize_z_and_u(model.linear.weight.data.size()) print("size of z = {}".format(z.shape)) print("size of u = {}".format(u.shape)) # Loss and Optimizer # Softmax is internally computed. # Set parameters to be updated. optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) # Training the Model train_start = time.time() stop = False for admm_epoch in range(num_admm_epochs): admm_epoch_start = time.time() for epoch in range(num_epochs): epoch_start = time.time() epoch_loss = 0 for batch_index, (items, labels) in enumerate(train_loader): batch_start = time.time() items = Variable(items.view(-1, num_features)) labels = Variable(labels) # Forward + Backward + Optimize optimizer.zero_grad() outputs = model(items) classify_loss = torch.mean( torch.clamp(1 - outputs.t() * labels.float(), min=0)) # hinge loss epoch_loss += classify_loss u_z = torch.from_numpy(u).float() - torch.from_numpy(z).float() loss = classify_loss for name, param in model.named_parameters(): if name.split('.')[-1] == "weight": loss += rho / 2.0 * torch.norm(param + u_z, p=2) #loss = classify_loss + rho / 2.0 * torch.norm(torch.sum(model.linear.weight, u_z)) optimizer.zero_grad() loss.backward(retain_graph=True) optimizer.step() # Test the Model test_start = time.time() correct = 0 total = 0 test_loss = 0 for items, labels in validation_loader: items = Variable(items.view(-1, num_features)) labels = Variable(labels) outputs = model(items) test_loss += torch.mean( torch.clamp(1 - outputs.t() * labels.float(), min=0)) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum() test_time = time.time() - test_start print( 'ADMM Epoch: [%d/%d], Epoch: [%d/%d], Batch [%d], ' 'Time: %.4f, Loss: %.4f, epoch cost %.4f, test cost %.4f s, ' 'accuracy of the model on the %d test samples: %d %%, loss = %f' % (admm_epoch, num_admm_epochs, epoch, num_epochs, batch_index, time.time() - train_start, epoch_loss.data, time.time() - epoch_start, test_time, len(val_indices), 100 * correct / total, test_loss / total)) w = model.linear.weight.data.numpy() w_shape = w.shape b = model.linear.bias.data.numpy() b_shape = b.shape u_shape = u.shape w_and_b = np.concatenate((w.flatten(), b.flatten())) u_w_b = np.concatenate((u.flatten(), w_and_b.flatten())) cal_time = time.time() - epoch_start sync_start = time.time() postfix = "{}".format(admm_epoch) u_w_b_merge = reduce_epoch(endpoint, u_w_b, merged_bucket, num_workers, worker_index, postfix) u_mean = u_w_b_merge[:u_shape[0] * u_shape[1]].reshape(u_shape) / float(num_workers) w_mean = u_w_b_merge[u_shape[0] * u_shape[1]:u_shape[0] * u_shape[1] + w_shape[0] * w_shape[1]].reshape(w_shape) / float(num_workers) b_mean = u_w_b_merge[u_shape[0] * u_shape[1] + w_shape[0] * w_shape[1]:].reshape( b_shape[0]) / float(num_workers) #model.linear.weight.data = torch.from_numpy(w) model.linear.bias.data = torch.from_numpy(b_mean).float() sync_time = time.time() - sync_start #z, u, r, s = update_z_u(w, z, u, rho, num_workers, lam) #stop = check_stop(ep_abs, ep_rel, r, s, dataset_size, num_features, w, z, u, rho) #print("stop = {}".format(stop)) #z = num_workers * rho / (2 * lam + num_workers * rho) * (w + u_mean) z = update_z(w_mean, u_mean, rho, num_workers, lam) #print(z) u = u + model.linear.weight.data.numpy() - z #print(u) # Test the Model test_start = time.time() correct = 0 total = 0 test_loss = 0 for items, labels in validation_loader: items = Variable(items.view(-1, num_features)) labels = Variable(labels) outputs = model(items) test_loss += torch.mean( torch.clamp(1 - outputs.t() * labels.float(), min=0)) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum() test_time = time.time() - test_start print( 'ADMM Epoch: [%d/%d], Time: %.4f, Loss: %.4f, ' 'ADMM epoch cost %.4f: computation cost %.4f s communication cost %.4f s test cost %.4f s, ' 'accuracy of the model on the %d test samples: %d %%, loss = %f' % (admm_epoch, num_admm_epochs, time.time() - train_start, epoch_loss.data, time.time() - admm_epoch_start, cal_time, sync_time, test_time, len(val_indices), 100 * correct / total, test_loss / total)) # Test the Model correct = 0 total = 0 test_loss = 0 for items, labels in validation_loader: items = Variable(items.view(-1, num_features)) labels = Variable(labels) outputs = model(items) test_loss += torch.mean( torch.clamp(1 - outputs.t() * labels.float(), min=0)) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum() print( 'Time = %.4f, accuracy of the model on the %d test samples: %d %%, loss = %f' % (time.time() - train_start, len(val_indices), 100 * correct / total, test_loss / total)) if worker_index == 0: clear_bucket(endpoint) end_time = time.time() print("Elapsed time = {} s".format(end_time - start_time))
def handler(event, context): start_time = time.time() bucket = event['bucket'] key = event['name'] num_features = event['num_features'] num_classes = event['num_classes'] elasti_location = event['elasticache'] endpoint = memcached_init(elasti_location) print('bucket = {}'.format(bucket)) print('key = {}'.format(key)) key_splits = key.split("_") worker_index = int(key_splits[0]) num_worker = event['num_files'] batch_size = 100000 batch_size = int(np.ceil(batch_size / num_worker)) torch.manual_seed(random_seed) # read file(dataset) from s3 file = get_object(bucket, key).read().decode('utf-8').split("\n") print("read data cost {} s".format(time.time() - start_time)) parse_start = time.time() dataset = DenseDatasetWithLines(file, num_features) preprocess_start = time.time() print("libsvm operation cost {}s".format(parse_start - preprocess_start)) # Creating data indices for training and validation splits: dataset_size = len(dataset) print("dataset size = {}".format(dataset_size)) indices = list(range(dataset_size)) split = int(np.floor(validation_ratio * dataset_size)) if shuffle_dataset: np.random.seed(random_seed) np.random.shuffle(indices) train_indices, val_indices = indices[split:], indices[:split] # Creating PT data samplers and loaders: train_sampler = SubsetRandomSampler(train_indices) valid_sampler = SubsetRandomSampler(val_indices) train_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, sampler=train_sampler) validation_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, sampler=valid_sampler) print("preprocess data cost {} s".format(time.time() - preprocess_start)) model = LogisticRegression(num_features, num_classes) # Loss and Optimizer # Softmax is internally computed. # Set parameters to be updated. criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) train_loss = [] test_loss = [] test_acc = [] total_time = 0 # Training the Model epoch_start = time.time() for epoch in range(num_epochs): tmp_train = 0 for batch_index, (items, labels) in enumerate(train_loader): #batch_start = time.time() print("------worker {} epoch {} batch {}------".format( worker_index, epoch, batch_index)) items = Variable(items.view(-1, num_features)) labels = Variable(labels) # Forward + Backward + Optimize optimizer.zero_grad() outputs = model(items) loss = criterion(outputs, labels) loss.backward() optimizer.step() w = model.linear.weight.data.numpy() b = model.linear.bias.data.numpy() file_postfix = "{}_{}".format(batch_index, epoch) #asynchronization / shuffle starts from that every worker writes their gradients of this batch and epoch #upload individual gradient if batch_index == 0 and epoch == 0: hset_object(endpoint, model_bucket, w_prefix, w.tobytes()) hset_object(endpoint, model_bucket, b_prefix, b.tobytes()) time.sleep(0.0001) #randomly get one from others. (Asynchronized) w_new = np.fromstring(hget_object(endpoint, model_bucket, w_prefix), dtype=w.dtype).reshape(w.shape) b_new = np.fromstring(hget_object(endpoint, model_bucket, b_prefix), dtype=b.dtype).reshape(b.shape) else: w_new = np.fromstring(hget_object(endpoint, model_bucket, w_prefix), dtype=w.dtype).reshape(w.shape) b_new = np.fromstring(hget_object(endpoint, model_bucket, b_prefix), dtype=b.dtype).reshape(b.shape) hset_object(endpoint, model_bucket, w_prefix, w.tobytes()) hset_object(endpoint, model_bucket, b_prefix, b.tobytes()) model.linear.weight.data = torch.from_numpy(w_new) model.linear.bias.data = torch.from_numpy(b_new) #report train loss and test loss for every mini batch if (batch_index + 1) % 1 == 0: print('Epoch: [%d/%d], Step: [%d/%d], Loss: %.4f' % (epoch + 1, num_epochs, batch_index + 1, len(train_indices) / batch_size, loss.data)) tmp_train += loss.item() total_time += time.time() - epoch_start train_loss.append(tmp_train) tmp_test, tmp_acc = test(model, validation_loader, criterion) test_loss.append(tmp_test) test_acc.append(tmp_acc) epoch_start = time.time() print("total time = {}".format(total_time)) end_time = time.time() print("elapsed time = {} s".format(end_time - start_time)) loss_record = [test_loss, test_acc, train_loss, total_time] put_object("async-model-loss", "async-loss{}".format(worker_index), pickle.dumps(loss_record))
def handler(event, context): try: start_time = time.time() bucket_name = event['bucket_name'] worker_index = event['rank'] num_workers = event['num_workers'] key = event['file'] merged_bucket = event['merged_bucket'] num_features = event['num_features'] learning_rate = event["learning_rate"] batch_size = event["batch_size"] num_epochs = event["num_epochs"] validation_ratio = event["validation_ratio"] elasti_location = event['elasticache'] endpoint = memcached_init(elasti_location) # read file from s3 file = get_object(bucket_name, key).read().decode('utf-8').split("\n") print("read data cost {} s".format(time.time() - start_time)) parse_start = time.time() dataset = SparseDatasetWithLines(file, num_features) print("parse data cost {} s".format(time.time() - parse_start)) preprocess_start = time.time() dataset_size = len(dataset) indices = list(range(dataset_size)) split = int(np.floor(validation_ratio * dataset_size)) if shuffle_dataset: np.random.seed(random_seed) np.random.shuffle(indices) train_indices, val_indices = indices[split:], indices[:split] train_set = [dataset[i] for i in train_indices] val_set = [dataset[i] for i in val_indices] print("preprocess data cost {} s".format(time.time() - preprocess_start)) lr = LogisticRegression(train_set, val_set, num_features, num_epochs, learning_rate, batch_size) # Training the Model train_start = time.time() for epoch in range(num_epochs): epoch_start = time.time() num_batches = math.floor(len(train_set) / batch_size) print(f"worker {worker_index} epoch {epoch}") for batch_idx in range(num_batches): batch_start = time.time() batch_ins, batch_label = lr.next_batch(batch_idx) batch_grad = torch.zeros(lr.n_input, 1, requires_grad=False) batch_bias = np.float(0) train_loss = Loss() train_acc = Accuracy() for i in range(len(batch_ins)): z = lr.forward(batch_ins[i]) h = lr.sigmoid(z) loss = lr.loss(h, batch_label[i]) #print("z= {}, h= {}, loss = {}".format(z, h, loss)) train_loss.update(loss, 1) train_acc.update(h, batch_label[i]) g = lr.backward(batch_ins[i], h.item(), batch_label[i]) batch_grad.add_(g) batch_bias += np.sum(h.item() - batch_label[i]) batch_grad = batch_grad.div(len(batch_ins)) batch_bias = batch_bias / len(batch_ins) batch_grad.mul_(-1.0 * learning_rate) lr.grad.add_(batch_grad) lr.bias = lr.bias - batch_bias * learning_rate sync_start = time.time() np_grad = lr.grad.numpy().flatten() np_bias = np.array(lr.bias, dtype=np_grad.dtype) w_and_b = np.concatenate((np_grad, np_bias)) postfix = "{}_{}".format(epoch, batch_idx) w_b_merge = reduce_batch(endpoint, w_and_b, merged_bucket, num_workers, worker_index, postfix) lr.grad, lr.bias = w_b_merge[:-1].reshape(num_features, 1) / float(num_workers), \ float(w_b_merge[-1]) / float(num_workers) sync_time = time.time() - sync_start print("synchronization cost {}s, batch takes {}s".format( sync_time, time.time() - batch_start)) if (batch_idx + 1) % 10 == 0: print("Epoch: {}/{}, Step: {}/{}, Loss: {}".format( epoch + 1, num_epochs, batch_idx + 1, num_batches, train_loss)) cal_time = time.time() - epoch_start test_start = time.time() val_loss, val_acc = lr.evaluate() test_time = time.time() - test_start print( 'Epoch: [%d/%d], Step: [%d/%d], Time: %.4f, Loss: %s, Accuracy: %s, epoch cost %.4f, ' 'cal cost %.4f s, sync cost %.4f s, test cost %.4f s, ' 'test accuracy: %s %%, test loss: %s' % (epoch + 1, num_epochs, batch_idx + 1, num_batches, time.time() - train_start, train_loss, train_acc, time.time() - epoch_start, cal_time, sync_time, test_time, val_acc, val_loss)) if worker_index == 0: clear_bucket(endpoint) print("elapsed time = {} s".format(time.time() - start_time)) except Exception as e: print("Error {}".format(e))