def main(): # Note that msms.py has many other imports import msms import torch print("Reading data") # Initialize a dataset example_set = msms.Dataset("data", "metadata/lookup.npy", "metadata/ms_param.csv") print("Initializing dataloader") # Initialize a dataloader with the dataset # - Note that you currently can't add more `num_workers`, maybe due to fighting over GPU resources? example_loader = torch.utils.data.DataLoader(example_set, batch_size=2, shuffle=True, num_workers=0) # Set the network parameters # Max pooling size for all convolutional layers. Our low `num_indivs` generally makes this a bad idea, but you can make it work with few convolutional layers. pool_size = 1 # The number of channels per convolution layer (think colour channels in an image), input channels assumed to be 1 channels = [4, 6, 8] # The size of the kernel at each convolution step. Length of this list should match length of `channels` kernels = [5, 4, 3] # The nodes in each hidden fully connected layer. The last should be the number of labels. The length of the list is independent of the other lists. nodes = [500, 100, 5] print("Creating neural network") # Create the network with the given parameters and send it to the gpu example_net = msms.Net(example_set.num_indivs, example_set.num_sites, pool_size, channels, kernels, nodes).cuda() print("Training neural network") # Try running the network for snp, pos, label in example_loader: # The data loader reads in each file as though it were a training example # As each file is actually a collection of training examples (a chunk), we must reshape with `view()` # We also need to add a dimension to snp (with unsqueeze) to indicate there is only one colour channel snp = snp.view(-1, example_set.num_indivs, example_set.num_sites).unsqueeze(1) pos = pos.view(-1, example_set.num_sites) label = label.view(-1) # Perform one forward pass out = example_net(snp, pos) # Print output and shape print(out) print(out.shape)
def main(): # Start timing tic = time.perf_counter() # Initialize dataset ds = msms.Dataset("../pipeline/data", "../pipeline/metadata/lookup.npy", "../pipeline/metadata/ms_param.csv") # Initialize data loader dataloader = torch.utils.data.DataLoader( ds, batch_size=20, shuffle=False, # NO SHUFFLE num_workers=0) # Set network parameters pool_size = 1 channels = [4, 6, 8] kernels = [5, 4, 3] nodes = [500, 100, 5] # Create model and wrap in DDP net = msms.Net(ds.num_indivs, ds.num_sites, pool_size, channels, kernels, nodes).cuda() # Define criterion and optimizer functions optimizer = torch.optim.SGD(net.parameters(), 1e-4) total_step = len(dataloader) # Extract info from dataloader and run network for epoch in range(2): count = 0 for snp, pos, label in dataloader: count += 1 # Reshape each chunk of training examples snp = snp.view(-1, ds.num_indivs, ds.num_sites).unsqueeze(1) pos = pos.view(-1, ds.num_sites) print(f"label shape before reshape: {label.shape}") label = label.view(-1) label_ohe = F.one_hot(label) label_ohe = label_ohe.to(torch.float32) # Perform one forward pass out = net(snp, pos) out = out.to(torch.float32) print(f"net output shape: {out.shape}") print(f"label tensor shape: {label.shape}") loss = F.mse_loss(out, label_ohe).cuda() # Perform backward pass optimizer.zero_grad() loss.backward() optimizer.step() if (count) % 100 == 0 and gpu == 0: print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format( epoch + 1, 2, count, total_step, loss.item())) toc = time.perf_counter() print(f"Total time run: {(toc-tic)/60:0.4f} minutes")
def train(gpu, args): # Start timing tic = time.perf_counter() # Calculate GPU rank rank = args.nr * args.gpus + gpu dist.init_process_group(backend='nccl', init_method='env://', world_size=args.world_size, rank=rank) torch.cuda.set_device(gpu) # Decide on train / dev split lookup = np.load(args.metadata, allow_pickle=True).item() num_dev = np.int(min(np.floor(lookup['num_files'] * args.test_prop), 2**5)) shuffled_ids = np.random.choice(range(lookup['num_files']), lookup['num_files'], replace=False) train_ids = shuffled_ids[:-num_dev] dev_ids = shuffled_ids[-num_dev:] # Initialize dataset train_set = msms.Dataset(args.data, args.metadata, args.params, train_ids) dev_set = msms.Dataset(args.data, args.metadata, args.params, dev_ids) # Create a training sampler for DDP train_sampler = torch.utils.data.distributed.DistributedSampler( train_set, num_replicas=args.world_size, rank=rank) # Create a dev sampler for DDP dev_sampler = torch.utils.data.distributed.DistributedSampler( dev_set, num_replicas=args.world_size, rank=rank) # Initialize data loaders train_loader = torch.utils.data.DataLoader( train_set, batch_size=args.batch_size, shuffle=False, # NO SHUFFLE num_workers=0, sampler=train_sampler) dev_loader = torch.utils.data.DataLoader( dev_set, batch_size=args.batch_size, shuffle=False, # NO SHUFFLE num_workers=0, sampler=dev_sampler) # Set network parameters try: channels = list(map(int, args.channels.split(','))) kernels = list(map(int, args.kernels.split(','))) pools = list(map(int, args.pools.split(','))) nodes = list(map(int, args.hiddennodes.split(','))) except ValueError: raise Exception( "One of the network hyperparameters (channels, kernels, pools, or hidde nods) could not be interpreted as a comma-delimited string of integers." ) # Create model and wrap in DDP net = msms.Net(train_set.num_indivs, train_set.num_sites, channels, kernels, pools, nodes, train_set.num_labels).cuda() net = nn.parallel.DistributedDataParallel(net, device_ids=[gpu]) # Define criterion functions criterion = torch.nn.CrossEntropyLoss() # Initialize overall loss accumulator losses = [] report_every = args.report_every epoch_times = [] # Extract info from dataloader and run network for epoch in range(args.epochs): epoch_start = time.time() lr = args.lr_0 * (args.lr_r**epoch) optimizer = torch.optim.Adam(net.parameters(), lr=lr, weight_decay=args.l2_lambda) # Init running loss accumulator running_loss = 0. running_correct = 0. for i, (snp, pos, label) in enumerate(train_loader): # Reshape each chunk of training examples snp = snp.view(-1, train_set.num_indivs, train_set.num_sites).unsqueeze(1) pos = pos.view(-1, train_set.num_sites) label = label.view(-1) # Perform one forward pass out = net(snp, pos) loss = criterion(out, label) # Perform backward pass optimizer.zero_grad() loss.backward() optimizer.step() # Calculate number correct (hardmax) for this minibatch # add to running total temp = torch.argmax(out, 1) running_correct += (temp == label).float().sum() # Print output and shape running_loss += loss.item() if i % report_every == (report_every - 1) and (len( train_set.filelist) / args.batch_size) - i > report_every: # Print running loss and accuracy. Format is [epoch, minibatch] print( f'[{epoch}, {i + 1}] Loss: {running_loss / report_every} Acc: {running_correct / (train_set.num_sims * args.batch_size * report_every) * 100}' ) losses.append(running_loss) # reset accumulators running_loss = 0. running_correct = 0. epoch_end = time.time() epoch_times.append(epoch_end - epoch_start) print(f"Single epoch training time: {epoch_end - epoch_start}") avg_epoch_time = np.mean(np.array(epoch_times)) print(f"Iteration time: {avg_epoch_time / 8}") print("Done Training") # Check testing accuracy running_correct = 0. with torch.no_grad(): for snp, pos, label in dev_loader: # Reshape! snp = snp.view(-1, train_set.num_indivs, train_set.num_sites).unsqueeze(1) pos = pos.view(-1, train_set.num_sites) label = label.view(-1) # Predict and count number of correct labels out = torch.argmax(net(snp, pos), 1) running_correct += (out == label).float().sum() toc = time.perf_counter() if gpu == 0: print(f"Total time run: {(toc-tic)/60:0.4f} minutes") # print(f'Dev Accuracy: {running_correct / (dev_set.num_sims * num_dev) * 100}') # Save recorded losses for plotting np.save("training_loss.npy", losses)
lr_0 = 0.00005 # Initial learning rate lr_r = 0.8 # Learning rate decary rate l2_lambd = 0.5 # Weight decay rate (L2 regularization) criterion = torch.nn.CrossEntropyLoss() # Initialize datasets train_set = msms.Dataset("data", "metadata/lookup.npy", "metadata/ms_param.csv", train_ids) dev_set = msms.Dataset("data", "metadata/lookup.npy", "metadata/ms_param.csv", dev_ids) # Initialize dataloaders # - Note that you currently can't add more `num_workers`, maybe due to fighting over GPU resources? train_loader = torch.utils.data.DataLoader(train_set, batch_size = batch_size, shuffle = True, num_workers = 0) dev_loader = torch.utils.data.DataLoader(dev_set, batch_size = batch_size, shuffle = True, num_workers = 0) # Create the network with the given parameters and send it to the gpu net = msms.Net(train_set.num_indivs, train_set.num_sites, channels, kernels, pools, nodes, num_labels).cuda() # Init overall loss accumulator losses = [] # Try running the network for epoch in range(epochs): # Define optimizer with hyperparameters for current epoch # Done per epoch to implement learning rate decay optimizer = torch.optim.Adam(net.parameters(), lr = lr_0 * (lr_r ** epoch), weight_decay = l2_lambd) # Init running loss accumulator running_loss = 0. running_correct = 0. for i, (snp, pos, label) in enumerate(train_loader):