def test_tensor_dataset(test_case): num_inputs = 2 num_examples = 1000 true_w = [2, -3.4] true_b = 4.2 net = LinearNet(num_inputs) flow.nn.init.normal_(net.linear.weight, mean=0, std=0.01) flow.nn.init.constant_(net.linear.bias, val=0) loss = nn.MSELoss() optimizer = optim.SGD(net.parameters(), lr=0.03) features = flow.tensor(np.random.normal(0, 1, (num_examples, num_inputs)), dtype=flow.float) labels = true_w[0] * features[:, 0] + true_w[1] * features[:, 1] + true_b labels += flow.tensor(np.random.normal(0, 0.01, size=labels.size()), dtype=flow.float) batch_size = 10 dataset = flow.utils.data.TensorDataset(features, labels) data_iter = flow.utils.data.DataLoader(dataset, batch_size, shuffle=True, num_workers=0) num_epochs = 10 for epoch in range(1, num_epochs + 1): for (X, y) in data_iter: output = net(X) l = loss(output, y).sum() optimizer.zero_grad() l.backward() optimizer.step() if epoch == num_epochs: test_case.assertLess(l.numpy(), 0.00025)
def main(args): random.seed(args.seed) dataset_path = "./data/names" n_categories = processDataset(dataset_path) n_hidden = 128 rnn = RNN(n_letters, n_hidden, n_categories) criterion = nn.NLLLoss() rnn.to("cuda") criterion.to("cuda") of_sgd = optim.SGD(rnn.parameters(), lr=learning_rate) # Keep track of losses for plotting current_loss = 0 all_losses = [] start = time.time() samples = 0.0 correct_guess = 0.0 for iter in range(1, n_iters + 1): category, line, category_tensor, line_tensor = randomTrainingExample() output, loss = train(category_tensor, line_tensor, rnn, criterion, of_sgd) current_loss += loss # Print iter number, loss, name and guess if iter % print_every == 0: start, time_str = timeSince(start) guess, guess_i = categoryFromOutput(output) correct = "✓" if guess == category else "✗ (%s)" % category if correct == "✓": correct_guess += 1 samples += 1 print( "iter: %d / %f%%, time_for_every_%d_iter: %s, loss: %.4f, predict: %s / %s, correct? %s, acc: %f" % ( iter, float(iter) / n_iters * 100, print_every, time_str, loss, line, guess, correct, correct_guess / samples, ) ) # Add current loss avg to list of losses if iter % plot_every == 0: all_losses.append(current_loss / plot_every) current_loss = 0 writer = open("all_losses.txt", "w") for o in all_losses: writer.write("%f\n" % o) writer.close()
def _test(test_case): if os.getenv("ONEFLOW_TEST_CPU_ONLY"): device = flow.device("cpu") else: device = flow.device("cuda") net = Net() net.to(device) optimizer = optim.SGD(net.parameters(), lr=0.002, momentum=0.9) criterion = nn.CrossEntropyLoss() criterion.to(device) transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ]) train_epoch = 1 batch_size = 4 num_workers = 0 data_dir = os.path.join(os.getenv("ONEFLOW_TEST_CACHE_DIR", "./data-test"), "cifar10") train_iter, test_iter = load_data_cifar10( batch_size=batch_size, data_dir=data_dir, download=True, transform=transform, source_url= "https://oneflow-public.oss-cn-beijing.aliyuncs.com/datasets/cifar/cifar-10-python.tar.gz", num_workers=num_workers, ) final_loss = 0 for epoch in range(1, train_epoch + 1): # loop over the dataset multiple times running_loss = 0.0 for i, data in enumerate(train_iter, 1): # get the inputs; data is a list of [inputs, labels] inputs, labels = data inputs = inputs.to(dtype=flow.float32, device=device) labels = labels.to(dtype=flow.int64, device=device) # zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize outputs = net(inputs) loss = criterion(outputs, labels) loss.backward() optimizer.step() # print statistics running_loss += loss.item() if i % 200 == 0: # print every 200 mini-batches final_loss = running_loss / 200 print("epoch: %d step: %5d loss: %.3f " % (epoch, i, final_loss)) running_loss = 0.0 break print("final loss : ", final_loss)
# net = SimpleDLA() net = net.to(device) net.train() if args.resume: # Load checkpoint. print('==> Resuming from checkpoint..') assert os.path.isdir('checkpoint'), 'Error: no checkpoint directory found!' checkpoint = flow.load('./checkpoint/ckpt.pth') net.load_state_dict(checkpoint['net']) best_acc = checkpoint['acc'] start_epoch = checkpoint['epoch'] criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4) scheduler = flow.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200) class ResNet18TrainGraph(flow.nn.Graph): def __init__(self): super().__init__() self.model = net self.loss_fn = criterion self.add_optimizer(optimizer, lr_sch=scheduler) def build(self, x, y): y_pred = self.model(x) loss = self.loss_fn(y_pred, y) loss.backward()
def trainIters( encoder, decoder, n_iters, pairs, input_lang, output_lang, print_every=1000, plot_every=100, learning_rate=0.01, ): start = time.time() plot_losses = [] print_loss_total = 0 # Reset every print_every plot_loss_total = 0 # Reset every plot_every encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate) decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate) training_pairs = [ tensorsFromPair(random.choice(pairs), input_lang, output_lang) for _ in range(n_iters) ] criterion = nn.NLLLoss() for iter in range(1, n_iters + 1): training_pair = training_pairs[iter - 1] input_tensor = training_pair[0] target_tensor = training_pair[1] loss = train( input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, ) print_loss_total += loss plot_loss_total += loss if iter % print_every == 0: print_loss_avg = print_loss_total / print_every print_loss_total = 0 print( "%s (%d %d%%) %.4f" % ( timeSince(start, iter / n_iters), iter, iter / n_iters * 100, print_loss_avg, ) ) if iter % plot_every == 0: plot_loss_avg = plot_loss_total / plot_every plot_losses.append(plot_loss_avg) plot_loss_total = 0 showPlot(plot_losses)
def main(args): transform = vision.transforms.Compose([ vision.transforms.ToTensor(), vision.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ]) trainset = vision.datasets.CIFAR10(root=args.data_root, train=True, download=True, transform=transform) trainloader = flow.utils.data.DataLoader(trainset, batch_size=args.train_batch_size, shuffle=True, num_workers=1) testset = vision.datasets.CIFAR10(root=args.data_root, train=False, download=True, transform=transform) testloader = flow.utils.data.DataLoader(testset, batch_size=args.val_batch_size, shuffle=False, num_workers=1) classes = ( "plane", "car", "bird", "cat", "deer", "dog", "frog", "horse", "ship", "truck", ) device = flow.device("cuda") expert_network = MLP(input_size=3072, output_size=10, hidden_size=256) net = MoE(expert_network, 3072, 10, num_experts=10, noisy_gating=True, k=4) net.to(device) optimizer = optim.SGD(net.parameters(), lr=args.learning_rate, momentum=args.mom) criterion = nn.CrossEntropyLoss() criterion.to(device) for epoch in range(args.epochs): # loop over the dataset multiple times running_loss = 0.0 for i, data in enumerate(trainloader, 0): # get the inputs; data is a list of [inputs, labels] inputs, labels = data inputs, labels = inputs.to(device), labels.to(device) # zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize inputs = inputs.view(inputs.shape[0], -1) outputs, aux_loss = net(inputs) loss = criterion(outputs, labels) total_loss = loss + aux_loss total_loss.backward() optimizer.step() # print statistics running_loss += loss.item() if i % 100 == 99: # print every 2000 mini-batches print("[%d, %5d] loss: %.3f" % (epoch + 1, i + 1, running_loss / 100)) running_loss = 0.0 print("Finished Training") correct = 0 total = 0 with torch.no_grad(): for i, data in enumerate(testloader, 0): images, labels = data images, labels = images.to(device), labels.to(device) outputs, _ = net(images.view(images.shape[0], -1)) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() print("Accuracy of the network on the 10000 test images: %d %%" % (100 * correct / total))