def main(): batch_size = 128 lr, eps = .0001, 1e-3 # lr, eps = .0001, 1e-1 root = '../../Datasets' train_iter, test_iter = fmnist.load_data(batch_size, 224, root) net = custom.VGG11(4) optimizer = torch.optim.Adam(net.parameters(), lr) ckpt_path = '../../checkpoint/vgg.pt' ckpt = None if os.path.exists(ckpt_path): ckpt = torch.load(ckpt_path) net.load_state_dict(ckpt['net']) optimizer.load_state_dict(ckpt['optimizer']) loss = nn.CrossEntropyLoss() base.train(net, train_iter, test_iter, loss, eps=eps, optimizer=optimizer, checkpoint_path=ckpt_path, checkpoint=ckpt)
def train(num_inputs, num_hiddens, num_outputs, train_iter, test_iter, eps, drop_prob): # epoch 25, loss 0.272, train acc 0.898, test acc 0.860, 467.3 examples/sec # if eps = 1e-3, learning rate = 0.5 cnt = len(num_hiddens) + 1 odict = OrderedDict() odict['flatten'] = custom.FlattenLayer() odict['linear_0'] = nn.Linear(num_inputs, num_hiddens[0]) for i in range(1, cnt): odict['relu_%d' % i] = nn.ReLU() odict['dropout_%d' % i] = nn.Dropout(drop_prob[i - 1]) if i == cnt - 1: odict['linear_%d' % i] = nn.Linear(num_hiddens[i - 1], num_outputs) else: odict['linear_%d' % i] = nn.Linear(num_hiddens[i - 1], num_hiddens[i]) net = nn.Sequential(odict) for params in net.parameters(): init.normal_(params, mean=0, std=.01) loss = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(net.parameters(), lr=.5) base.train(net, train_iter, test_iter, loss, eps=eps, num_epochs=50, optimizer=optimizer)
def main(): batch_size = 256 lr, eps = .001, 1e-3 # lr, eps = .001, 1e-1 root = '../../Datasets' train_iter, test_iter = fmnist.load_data(batch_size, root=root) net = custom.LeNet() optimizer = torch.optim.Adam(net.parameters(), lr) loss = nn.CrossEntropyLoss() base.train(net, train_iter, test_iter, loss, eps=eps, num_epochs=50, optimizer=optimizer)
def train(num_inputs, num_outputs, train_iter, test_iter, eps): # epoch 22, loss 0.417, train acc 0.858, test acc 0.840, 2905.9 examples/sec # if eps = 1e-3, learning rate = 0.1 net = nn.Sequential( OrderedDict([ ('flatten', custom.FlattenLayer()), ('linear', nn.Linear(num_inputs, num_outputs)) ]) ) init.normal_(net.linear.weight, mean=0, std=.01) init.constant_(net.linear.bias, val=0) loss = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(net.parameters(), lr=.1) base.train(net, train_iter, test_iter, loss, eps=eps, num_epochs=50, optimizer=optimizer) return net
def train(num_inputs, num_hiddens, num_outputs, train_iter, test_iter, eps): # epoch 46, loss 0.155, train acc 0.943, test acc 0.883, 546.2 examples/sec # if eps = 1e-3, learning rate = 0.5 net = nn.Sequential(custom.FlattenLayer(), nn.Linear(num_inputs, num_hiddens), nn.ReLU(), nn.Linear(num_hiddens, num_outputs)) for params in net.parameters(): init.normal_(params, mean=0, std=.01) loss = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(net.parameters(), lr=.5) base.train(net, train_iter, test_iter, loss, eps=eps, num_epochs=50, optimizer=optimizer)