def main(): args = get_arguments() if not os.path.isdir(args.output_dir): os.makedirs(args.output_dir) face_dataset = ImageData(root_dir=args.input_dir,\ transform=transforms.Compose([PreprocessData(args.scale_size, args.crop_size)])) dataloader = DataLoader(face_dataset, batch_size=args.batch_size, shuffle=True) ########### setup network ############## net = ConvNet(3, args.K).to(device) #----------Weight Initialization--------------- def init_weights(m): if type(m) == nn.Conv2d: nn.init.normal_(m.weight, 0, 0.02) net.apply(init_weights) #--------------------------------------------- summary(net, (3, 128, 128)) # for name, param in net.named_parameters(): # if param.requires_grad: # print(name, param.data.size(), type(param)) optimizer = torch.optim.Adam(net.parameters(), lr=args.learning_rate, weight_decay=args.weight_decay) # ######################################### print() train(dataloader, net, optimizer, args)
def main(): X_tr, y_tr, X_te, y_te = load_data() X_tr, y_tr = X_tr[:1024], y_tr[:1024] X_te, y_te = X_te[:128], y_te[:128] if args.model == 'cnn': model = ConvNet() model_save_path = config.CNN_MODEL_PATH else: model = CapsuleNet() model_save_path = config.CAPSULE_MODEL_PATH model.to(device) optimizer = Adam(model.parameters()) train_loss = [] train_accuracy = [] best_acc = 0.0 for epoch in range(10): print(("Epoch %d " + "-" * 70) % (epoch + 1)) loss = train(model, optimizer, X_tr, y_tr) train_loss.append(loss) acc = test(model, X_tr, y_tr, "Train") train_accuracy.append(acc) if acc > best_acc: best_acc = acc torch.save(model.state_dict(), model_save_path) pickle.dump((train_loss, train_accuracy), \ open('result/' + args.model + '_train.p', 'wb'))
def main(opt): use_cuda = torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") train_set, val_set, test_set = load_dataset(opt) train_loader = data_utils.DataLoader(train_set, batch_size=opt.batch_size, shuffle=True, num_workers=4) val_loader = data_utils.DataLoader(val_set, batch_size=opt.batch_size, shuffle=True, num_workers=4) test_loader = torch.utils.data.DataLoader(test_set, batch_size=opt.test_batch_size, shuffle=True, num_workers=4) model = ConvNet().to(device) # optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum) # optimizer = optim.SGD(model.parameters(), lr=1e-2) optimizer = optim.Adam(model.parameters(), lr=opt.lr) train_loss_list, train_acc_list = [], [] val_loss_list, val_acc_list = [], [] for epoch in range(1, opt.epochs + 1): loss, acc = train(model, device, train_loader, optimizer, epoch, opt) train_loss_list.append(loss) train_acc_list.append(acc) # print(train_loss_list[-1], train_acc_list[-1]) if (epoch % opt.val_intervals == 0): loss, acc = test(model, device, val_loader, 'Validation') val_loss_list.append(loss) val_acc_list.append(acc) # print(val_loss_list[-1], val_acc_list[-1]) test(model, device, test_loader, 'Test') plt.figure(1) plt.plot(train_loss_list) plt.plot(val_loss_list) plt.ylabel("loss") plt.xlabel("epochs") plt.legend(['Train', 'Validation']) plt.title('Loss Plot') plt.savefig("loss.png") plt.show() plt.figure(2) plt.plot(train_acc_list) plt.plot(val_acc_list) plt.ylabel("accuarcy") plt.xlabel("epochs") plt.legend(['Train', 'Validation']) plt.title('Accuracy Plot') plt.savefig("accuracy.png") plt.show()
def normal_train(args, loader_train, loader_test, dtype): model = ConvNet() model = model.type(dtype) model.train() loss_f = nn.CrossEntropyLoss() SCHEDULE_EPOCHS = [15] learning_rate = 0.01 for num_epochs in SCHEDULE_EPOCHS: print('\nTraining %d epochs with learning rate %.4f' % (num_epochs, learning_rate)) optimizer = optim.Adam(model.parameters(), lr=learning_rate) for epoch in range(num_epochs): print('\nTraining epoch %d / %d ...\n' % (epoch + 1, num_epochs)) # print(model.training) for i, (X_, y_) in enumerate(loader_train): X = Variable(X_.type(dtype), requires_grad=False) y = Variable(y_.type(dtype), requires_grad=False).long() preds = model(X) loss = loss_f(preds, y) if (i + 1) % args.print_every == 0: print('Batch %d done, loss = %.7f' % (i + 1, loss.item())) optimizer.zero_grad() loss.backward() optimizer.step() print('Batch %d done, loss = %.7f' % (i + 1, loss.item())) test(model, loader_test, dtype) learning_rate *= 0.1 return model
def unrolled(args, loader_train, loader_test, dtype): model = ConvNet() model = model.type(dtype) model.train() SCHEDULE_EPOCHS = [50, 50] learning_rate = 5e-4 for num_epochs in SCHEDULE_EPOCHS: print('\nTraining %d epochs with learning rate %.7f' % (num_epochs, learning_rate)) optimizer = optim.Adam(model.parameters(), lr=learning_rate) for epoch in range(num_epochs): print('\nTraining epoch %d / %d ...\n' % (epoch + 1, num_epochs)) # print(model.training) for i, (X_, y_) in enumerate(loader_train): X = Variable(X_.type(dtype), requires_grad=False) y = Variable(y_.type(dtype), requires_grad=False) loss = cw_train_unrolled(model, X, y, dtype) optimizer.zero_grad() loss.backward() optimizer.step() if (i + 1) % args.print_every == 0: print('Batch %d done, loss = %.7f' % (i + 1, loss.item())) test(model, loader_test, dtype) print('Batch %d done, loss = %.7f' % (i + 1, loss.item())) learning_rate *= 0.1 return model
class Training: def __init__(self, epoch, learningRate, batchSize, imageSize, L2Rate, trainPath): super(Training, self).__init__() self.epoch = epoch self.learningRate = learningRate self.batchSize = batchSize self.imageSize = imageSize self.L2Rate = L2Rate self.trainPath = trainPath self.data_size = calculate_data_size(self.trainPath) self.num_batches = self.data_size // batchSize self.data_loader = run_loader('train', trainPath, batchSize, imageSize, shuffle=True) self.model = ConvNet(10) self.train() def train(self): self.model.train() crossentropy = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(self.model.parameters(), lr=self.learningRate, weight_decay=self.L2Rate) for epoch in range(self.epoch): epoch_loss = 0 epoch_acc = 0 for X, y in tqdm(self.data_loader): optimizer.zero_grad() out = self.model(X) loss = crossentropy(out, y) loss.backward() optimizer.step() epoch_loss += loss.item() # makes it to python float predictions = torch.argmax(out, 1) epoch_acc += torch.sum(predictions == y).item() epoch_loss = epoch_loss / self.num_batches epoch_acc = epoch_acc / self.data_size print(f"Epoch {epoch}:", "ACC:", epoch_acc, "LOSS:", epoch_loss) torch.save(self.model.state_dict(), f"Trained/Model_{epoch}.model")
def main(): # Device configuration device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print("Using device: ", device) model = ConvNet().to(device) try: model.load_state_dict(torch.load(FILE)) print("Finished loading model.") model.eval() except IOError: print("Failed to load model. Model might not exist.") return print("Print Network Parameters:") for param in model.parameters(): print(param) print("Print model state dict: ", model.state_dict()) with torch.no_grad(): print("Perform inference/testing here...")
with open('char_dict', 'rb') as f: class_dict = pickle.load(f) num_classes = len(class_dict) # 读取数据 transform = transforms.Compose([ transforms.Resize((64, 64)), transforms.ToTensor(), ]) dataset = HWDB(path=data_path, transform=transform) print("训练集数据:", dataset.train_size) print("测试集数据:", dataset.test_size) trainloader, testloader = dataset.get_loader(batch_size) net = ConvNet(num_classes) if torch.cuda.is_available(): net = net.cuda() net.load_state_dict(torch.load('checkpoints/handwriting_iter_009.pth')) print('网络结构:\n') #summary(net, input_size=(3, 64, 64), device='cuda') criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=lr) writer = SummaryWriter(log_path) for epoch in range(10, epochs): train(epoch, net, criterion, optimizer, trainloader, writer=writer) valid(epoch, net, testloader, writer=writer) print("epoch%d 结束, 正在保存模型..." % epoch) torch.save(net.state_dict(), save_path + 'handwriting_iter_%03d.pth' % epoch)
sent_num_layers=args.sent_num_layers, output_dim=output_dim, output_attn=args.output_attn) if args.net_type == 'transformer': model = TransformerNet(vocab_size=input_dim, embedding_dim=args.embed_dim, num_heads=args.num_heads, ff_dim=args.ff_dim, num_enc_layers=args.num_enc_layers, output_dim=output_dim, dropout=args.dropout, pad_idx=pad_idx, embed_trainable=args.embed_trainable) n_pars = sum(p.numel() for p in model.parameters()) print(model) print("Number of parameters: {}".format(n_pars)) #%% Load pre-trained embedding pretrained_embeddings = helper.TEXT.vocab.vectors if args.net_type == 'han': model.word_attn.embedding.weight.data.copy_(pretrained_embeddings) model.word_attn.embedding.weight.data[unk_idx] = torch.zeros( args.embed_dim) # Zero the initial weights for <unk> tokens model.word_attn.embedding.weight.data[pad_idx] = torch.zeros( args.embed_dim) # Zero the initial weights for <pad> tokens else: model.embedding.weight.data.copy_(pretrained_embeddings) model.embedding.weight.data[unk_idx] = torch.zeros(
parser.add_argument('-n', '--n_epochs', type=int, default=10) parser.add_argument('-o', '--optimizer', default='optim.Adam(parameters)') parser.add_argument('-sl', '--schedule_lr', action='store_true') args = parser.parse_args() # Create the CUDA device if available device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') # Create the ConvNet from model import ConvNet net = ConvNet() # Parse the optimizer optimizer = eval(args.optimizer, { 'parameters': net.parameters(), 'optim': torch.optim }) # Using data augmentation from data import load train_data = load.get_dogs_and_cats(batch_size=args.batch_size, random_crop=(128, 128), random_horizontal_flip=True, normalize=not args.no_normalization) valid_data = load.get_dogs_and_cats('valid', resize=(128, 128), batch_size=args.batch_size, normalize=not args.no_normalization) # Train train(net,
# build model if args.arc == 'ConvNet': model = ConvNet() elif args.arc == 'FcNet': model = FcNet() else: model = ConvNet() if args.cuda: print('Using CUDA with {0} GPUs'.format(torch.cuda.device_count())) model = torch.nn.DataParallel(model).cuda() # define optimizer if args.optimizer.lower() == 'adam': optimizer = optim.Adam(model.parameters(), lr=args.lr) elif args.optimizer.lower() == 'sgd': optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum) else: optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum) best_valid_loss = np.inf iteration = 0 epoch = 1 # train with early stopping while (epoch < args.epochs + 1) and (iteration < args.patience):
train_dataset = InstantNoodles(train_root) test_dataset = InstantNoodles(test_root) # train_root = r'./birds/train' # test_root = r'./birds/test' # train_dataset = Birds(train_root) # test_dataset = Birds(test_root) train_data_loader = DataLoader(train_dataset, batch_size=4, shuffle=True) test_data_loader = DataLoader(test_dataset, batch_size=4, shuffle=True) # load module net = ConvNet(12).to(device) # choose loss function and optimizer criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(net.parameters(), lr=lr) # statistics stats = {} # train print('Start training...') stats['loss_train'] = [] for epoch in range(max_epoch): running_loss = 0.0 for i, (inputs, lables) in enumerate(train_data_loader): inputs, lables = inputs.to(device), lables.to(device) # clear gradient optimizer.zero_grad() # forward outputs = net(inputs)
from torch import nn import numpy as np from kaggle_environments import make from model import SimpleNet, ResNet, ConvNet from mcts import mcts from agents import netAgent, processObservation from epoch_training import selfplay, net_update from evaluation import evaluate model = ConvNet(42, 7, 64) defaultModel = ConvNet(42, 7, 64) log = open("log.txt", 'w') # defaultModel.load_state_dict(torch.load('parameters_simple128.pth')) optimizer = torch.optim.SGD(model.parameters(), lr=0.01) for epoch in range(1000): agent = netAgent(model, return_probs=True) against = netAgent(defaultModel, incorrect_moves=False) training_data = selfplay(agent, against, num=10) net_update(model, training_data, optimizer) agent = netAgent(model, incorrect_moves=False, best_move=False) against = netAgent(defaultModel, incorrect_moves=False, best_move=False) result = evaluate(agent, against, 1000) log.write("Epoch " + str(epoch) + " Result: " + str(result) + "\n") print("Test result: ", result) if (result > 0.65): torch.save(model.state_dict(), "parameters_simple128.pth")
shuffle=True, num_workers=2) test_dataloader = data.DataLoader(test_dataset, config.batch_size, shuffle=False, num_workers=2) print(f"{datetime.now().ctime()} - Finish Loading Dataset") print( f"{datetime.now().ctime()} - Start Creating Net, Criterion, Optimizer and Scheduler..." ) conv_net = ConvNet(config.input_channel, 2) lr_model = LogisticRegression(config.cifar10_input_size) conv_criterion = nn.CrossEntropyLoss() lr_criterion = nn.BCEWithLogitsLoss() conv_optimizer = optim.SGD(conv_net.parameters(), config.lr, momentum=config.momentum, weight_decay=config.weight_decay) lr_optimizer = optim.SGD(lr_model.parameters(), config.lr, momentum=config.momentum, weight_decay=config.weight_decay) conv_scheduler = optim.lr_scheduler.CosineAnnealingLR(conv_optimizer, len(train_dataloader) * config.epochs, eta_min=config.eta_min) lr_scheduler = optim.lr_scheduler.CosineAnnealingLR(lr_optimizer, len(train_dataloader) * config.epochs, eta_min=config.eta_min)
# Torch DataLoader train_dataset = MillionBlocksDataset(labels_arr=train_labels_arr, images=train_data) test_dataset = MillionBlocksDataset(labels_arr=test_labels_arr, images=test_data) train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) test_loader = DataLoader(test_dataset, batch_size=test_batch_size, shuffle=True) # ConvNet model model = ConvNet(num_classes).to(device) # Loss and optimizer criterion = nn.BCELoss() optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) # Train the model total_step = len(train_loader) for epoch in range(num_epochs): for i, im_batch in enumerate(train_loader): images = im_batch['image'] images = images.to(device) labels = im_batch['arrangement'] labels = labels.reshape(-1, num_classes) labels = labels.long().to(device) # Forward pass outputs = model(images) loss = criterion(outputs, torch.max(labels, 1)[1])
model = ConvNet(embedding_dim=args.embed_dim, n_filters=args.num_filters, filter_sizes=sizes, output_dim=2, dropout=args.dropout) if args.net_type == 'attn': model = AttnNet(embedding_dim=args.embed_dim, rnn_hidden_dim=args.rnn_hidden_dim, rnn_num_layers=args.rnn_num_layers, output_dim=2, bidirection=args.bidirection, rnn_cell_type=args.rnn_cell_type, dropout=args.dropout) n_pars = sum(p.numel() for p in model.parameters() if p.requires_grad) print(model) print("Number of trainable parameters: {}".format(n_pars)) #%% Define the optimizer, criterion and metrics optimizer = optim.Adam(model.parameters()) metrics_fn = metrics # Weight balancing if args.weight_balance == True and torch.cuda.device_count() == 0: criterion = nn.CrossEntropyLoss( weight=torch.FloatTensor(train_set.cls_weight())) elif args.weight_balance == True and torch.cuda.device_count() > 0: criterion = nn.CrossEntropyLoss( weight=torch.FloatTensor(train_set.cls_weight()).cuda()) else:
def main(args): best_acc1 = 0 os.makedirs('checkpoints', exist_ok=True) args.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print('device: {}'.format(args.device)) # create model model = ConvNet(cfg.NUM_CLASSES).to(args.device) #model.apply(weights_init_normal) # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().to(args.device) optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume, map_location=args.device) args.start_epoch = checkpoint['epoch'] best_acc1 = checkpoint['best_acc1'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) cudnn.benchmark = True # Data loading code train_dataset = ImageFolder(cfg.TRAIN_PATH) val_dataset = ImageFolder(cfg.VAL_PATH) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) logger = Logger('./logs') for epoch in range(args.start_epoch, args.epochs): # train for one epoch adjust_learning_rate(optimizer, epoch, args) train_loss, train_acc = train(train_loader, model, criterion, optimizer, epoch, args) # evaluate on validation set val_loss, val_acc = validate(val_loader, model, criterion, args) # remember best acc@1 and save checkpoint is_best = val_acc > best_acc1 best_acc1 = max(val_acc, best_acc1) # log info = { 'train_loss': float(train_loss), 'train_acc': float(train_acc), 'val_loss': float(val_loss), 'val_acc': float(val_acc) } for tag, value in info.items(): logger.scalar_summary(tag, value, epoch) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_acc1': best_acc1, 'optimizer': optimizer.state_dict(), }, is_best)
""" Train Dataset """ train_dataloader = DataLoader(cnn_dataset, shuffle=True, num_workers=0, batch_size=config.train_batch_size) if os.path.exists(args.savefile): print("Loading Existing Model") net = torch.load(args.savefile) else: print("Creating New Model") net = ConvNet(num_classes=len(labels)).cuda() criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(net.parameters(), lr = args.learnrate) counter = [] loss_history = [] iteration_number= 0 total_step = len(train_dataloader) for epoch in range(config.train_number_epochs): for i, (image_file_names, images, labels) in enumerate(train_dataloader): images = images.cuda() labels = labels.cuda() # Forward pass outputs = net(images) loss = criterion(outputs, labels)
class Learner: def __init__(self, args, q_batch): self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') self.q_batch = q_batch self.learn_step_counter = 0 self.gamma = args.gamma self.batch_size = args.batch_size self.env = gym.make(args.env) self.n_act = self.env.action_space.n self.n_state = self.env.observation_space.shape[0] self.n_atom = args.atom self.v_min = args.v_min self.v_max = args.v_max self.dz = (self.v_max - self.v_min) / (self.n_atom - 1) self.z = [self.v_min + i * self.dz for i in range(self.n_atom)] self.z_space = torch.FloatTensor(self.z).to(self.device) self.net = ConvNet(self.n_state, self.n_act, self.n_atom).to(self.device) self.target_net = ConvNet(self.n_state, self.n_act, self.n_atom).to(self.device) self.optimizer = optim.Adam(self.net.parameters(), lr=args.lr) def learn(self): while True: self.learn_step_counter += 1 # target parameter update if self.learn_step_counter % 10 == 0: self.update_target() states, actions, rewards, next_states, dones = self.q_batch.get(block=True) states = torch.FloatTensor(states).to(self.device) actions = torch.LongTensor(actions).to(self.device) next_states = torch.FloatTensor(next_states).to(self.device) dones = [int(i) for i in dones] # action value distribution prediction # (m, N_ACTIONS, N_ATOM) curr_q = self.net(states) # 実際に行動したQだけを取り出す curr_q = torch.stack([curr_q[i].index_select(0, actions[i]) for i in range(self.batch_size)]).squeeze(1) # get next state value next_q = self.net(next_states).detach() # (m, N_ACTIONS, N_ATOM) next_q = torch.sum(next_q * self.z_space.view(1, 1, -1), dim=2) # (m, N_ACTIONS) next_action = next_q.argmax(dim=1) # (m) # target_q target_q = self.target_net(next_states).detach().cpu().numpy() target_q = [target_q[i, action, :] for i, action in enumerate(next_action)] target_q = np.array(target_q) # (m, N_ATOM) m_prob = np.zeros((self.batch_size, self.n_atom)) # (m, N_ATOM) # we didn't vectorize the computation of target assignment. for i in range(self.batch_size): for j in range(self.n_atom): Tz = np.fmin(self.v_max, np.fmax(self.v_min, rewards[i] + (1 - dones[i]) * 0.99 * (self.v_min + j * self.dz) ) ) bj = (Tz - self.v_min) / self.dz lj = np.floor(bj).astype(int) # m_l uj = np.ceil(bj).astype(int) # m_u # calc prob mass of relative position weighted with distance m_prob[i, lj] += (dones[i] + (1 - dones[i]) * target_q[i][j]) * (uj - bj) m_prob[i, uj] += (dones[i] + (1 - dones[i]) * target_q[i][j]) * (bj - lj) m_prob = m_prob / m_prob.sum(axis=1, keepdims=1) m_prob = torch.FloatTensor(m_prob).to(self.device) # print(curr_q) # calc huber loss, dont reduce for importance weight loss = - torch.mean(torch.sum(m_prob * torch.log(curr_q + 1e-20), dim=1)) # (m , N_ATOM) if self.learn_step_counter % 100 == 0: print('loss:', loss.item()) # backprop loss self.optimizer.zero_grad() loss.backward() self.optimizer.step() def update_target(self): self.target_net.load_state_dict(self.net.state_dict())
class Learner: def __init__(self, args, q_batch): self.device = torch.device( 'cuda' if torch.cuda.is_available() else 'cpu') self.q_batch = q_batch self.update_count = 0 self.gamma = args.gamma self.batch_size = args.batch_size self.env_eval = gym.make(args.env) self.n_act = self.env_eval.action_space.n self.n_state = self.env_eval.observation_space.shape[0] self.n_quant = args.quant self.target_net_update_freq = args.target_net_update_freq self.net = ConvNet(self.n_state, self.n_act, self.n_quant).to(self.device) self.target_net = ConvNet(self.n_state, self.n_act, self.n_quant).to(self.device) self.optimizer = optim.Adam(self.net.parameters(), lr=args.lr) def learn(self): while True: self.update_count += 1 if self.update_count % 10 == 0: rewards = self.evaluation() rewards_mu = np.array( [np.sum(np.array(l_i), 0) for l_i in rewards]).mean() print('update cnt %d Eval Reward %.2f' % (self.update_count, rewards_mu)) # target parameter update if self.update_count % self.target_net_update_freq == 0: self.update_target() states, actions, rewards, next_states, dones = self.q_batch.get( block=True) states = torch.FloatTensor(states).to(self.device) actions = torch.LongTensor(actions).to(self.device) next_states = torch.FloatTensor(next_states).to(self.device) dones = np.array([int(i) for i in dones]) # action value distribution prediction # [BATCH, N_QUANT, N_ACTIONS] curr_q, tau = self.net(states) # 実際に行動したQだけを取り出す # [BATCH, N_QUANT, 1] curr_q = torch.stack([ curr_q[i].index_select(1, actions[i]) for i in range(self.batch_size) ]) # # [BATCH, N_QUANT, N_QUANT] curr_q = curr_q.repeat(1, 1, self.n_quant) # get next state value # [BATCH, N_QUANT, N_ACTIONS] next_q, _ = self.net(next_states) next_action = next_q.sum(dim=1).argmax(dim=1) # target_q with torch.no_grad(): # [BATCH, N_QUANT, N_ACT] target_q, _ = self.target_net(next_states) target_q = target_q.detach().cpu().numpy() # [BATCH, N_QUANT, 1] target_q = np.array([ target_q[i, :, action] for i, action in enumerate(next_action) ]) target_q = rewards.reshape( -1, 1) + self.gamma * target_q * (1 - dones.reshape(-1, 1)) target_q = torch.FloatTensor(target_q).to( self.device).unsqueeze(2) # # [BATCH, N_QUANT, N_QUANT] target_q = target_q.repeat(1, 1, self.n_quant) target_q = target_q.permute(0, 2, 1) # loss = F.smooth_l1_loss(curr_q, target_q.detach(), reduction='none') # (BATCH, N_QUANT, N_QUANT) tau = tau.repeat(1, 1, self.n_quant) diff = target_q - curr_q loss = self.huber(diff) I_delta = (diff < 0).double() loss *= torch.abs(tau - I_delta) # huber loss loss = torch.mean(torch.sum(torch.mean(loss, dim=2), dim=1)) # backprop loss self.optimizer.zero_grad() loss.backward() self.optimizer.step() def huber(self, x): cond = (x.abs() < 1.0).float().detach() return 0.5 * x.pow(2) * cond + (x.abs() - 0.5) * (1.0 - cond) def update_target(self): self.target_net.load_state_dict(self.net.state_dict()) def evaluation(self): rewards = [] for _ in range(10): rewards_i = [] state = self.env_eval.reset() action = self.action(state) state, reward, done, _ = self.env_eval.step(action) rewards_i.append(reward) while not done: action = self.action(state) state, reward, done, _ = self.env_eval.step(action) rewards_i.append(reward) rewards.append(rewards_i) return rewards def action(self, state): state = torch.FloatTensor(state).to(self.device).unsqueeze(0) action_value, _ = self.net(state) # if self.update_count > 3000: # dist_action = action_value[0].detach().cpu().numpy() # sns.distplot(dist_action[:, 0], bins=10, color='red') # sns.distplot(dist_action[:, 1], bins=10, color='blue') # plt.show() action_value = action_value[0].sum(dim=0) action = torch.argmax(action_value).detach().cpu().item() return action
def train(pre_trained=None): # create folder to save models and loss graphs reference = hp['net_type'] + str(time.strftime("_%Y%m%d_%H%M%S")) checkpoints_folder = hp["output_dir"] + '/checkpoints/' + reference os.makedirs(checkpoints_folder, exist_ok=True) # save hyper parameter settings pickle_file_location = checkpoints_folder + "/hp.pkl" pickle_file = open(pickle_file_location, "wb") pickle.dump(hp, pickle_file) pickle_file.close() # create data iterator train_data_set = DataGenerator(hp) iterator = DataLoader(dataset=train_data_set, batch_size=hp['batch_size'], num_workers=hp['num_workers'], pin_memory=True, shuffle=False, drop_last=True) val_set = ValidationDataGenerator(hp) val_set_iterator = DataLoader(dataset=val_set, batch_size=50, num_workers=hp['num_workers'], pin_memory=True, shuffle=False, drop_last=True) # create model and loss model = ConvNet().to(device) loss = CrossEntropyLoss().to(device) # optimizer optimizer = torch.optim.Adam(params=model.parameters(), lr=hp['learning_rate']) start_epoch = 0 # load pre trained model if pre_trained is not None: ckpt = torch.load(pre_trained) model.load_state_dict(ckpt['net']) optimizer.load_state_dict(ckpt['opt']) start_epoch = ckpt['epoch'] + 1 # init loss arrays classification_loss = np.zeros(hp['num_epochs']) train_accuracy = np.zeros(hp['num_epochs']) val_accuracy = np.zeros(hp['num_epochs']) # training loop for epoch in range(start_epoch, hp['num_epochs']): c_loss = 0 acc = 0 for i, (img, label) in enumerate(iterator): img = img.to(device, dtype=torch.float) label = label.to(device, dtype=torch.float) optimizer.zero_grad() logits = model(img) l = loss(logits, label.long()) l.backward() optimizer.step() c_loss += l.item() # calc accuracy logits = logits.detach().cpu().numpy() label = label.detach().cpu().numpy() acc += utils.classification_accuracy(logits, label) print("epoch = {}, Training_sample={}, classification loss ={}". format(epoch, i, l.item())) # average loss per epoch classification_loss[epoch] = c_loss / (i + 1) # average accuracy per epoch train_accuracy[epoch] = acc / (i + 1) print("epoch = {}, average classification loss ={}".format( epoch, classification_loss[epoch])) print("epoch = {}, Training accuracy ={}".format( epoch, train_accuracy[epoch])) with torch.no_grad(): val_acc = 0 for i, (img, label) in enumerate(val_set_iterator): img = img.to(device, dtype=torch.float) label = label.to(device, dtype=torch.float) logits = model(img) # calc accuracy logits = logits.detach().cpu().numpy() label = label.detach().cpu().numpy() val_acc += utils.classification_accuracy(logits, label) val_accuracy[epoch] = val_acc / (i + 1) print("epoch = {}, Validation set accuracy ={}".format( epoch, val_accuracy[epoch])) # plot accuracy curves and save model plt.plot(range(1, len(train_accuracy) + 1), train_accuracy, 'b-', label=" Train Accuracy") plt.plot(range(1, len(val_accuracy) + 1), val_accuracy, 'r-', label="Validation Accuracy") plt.xlabel("epochs") plt.ylabel("accuracy") plt.legend(loc='best') plt.savefig(checkpoints_folder + "/accuracy.jpeg", bbox_inches="tight") plt.clf() net_save = { 'net': model.state_dict(), 'opt': optimizer.state_dict(), 'epoch': epoch } torch.save( net_save, checkpoints_folder + "/convnet_ethiopian_mnist_epoch{}.pth".format(epoch))
total = 0 net.eval() with torch.no_grad(): for (images, labels) in dataloader: images, labels = images.to(device), labels.to(device) outputs = net(images) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() return correct / total net = ConvNet().to(device) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9) mt = MetricTracker() for epoch in range(EPOCHS): net.train() running_loss = 0.0 for i, (inputs, labels) in enumerate(trainloader): inputs, labels = inputs.to(device), labels.to(device) # zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize outputs = net(inputs)
if __name__ == "__main__": # Specifiy data folder path and model type(fully/conv) folder, model_type = sys.argv[1], sys.argv[2] # Get data loaders of training set and validation set train_loader, val_loader = get_dataloader(folder, batch_size=32) # Specify the type of model if model_type == 'conv': model = ConvNet() elif model_type == 'fully': model = Fully() # Set the type of gradient optimizer and the model it update optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9) # Choose loss function criterion = nn.CrossEntropyLoss() # Check if GPU is available, otherwise CPU is used use_cuda = torch.cuda.is_available() if use_cuda: model.cuda() # Four list to plot learning curve train_loss = [] train_acc = [] validation_loss = [] validation_acc = []
parser.add_argument('--model-dir', type=str, default=os.environ['SM_MODEL_DIR']) parser.add_argument('--train-dir', type=str, default=os.environ['SM_CHANNEL_TRAIN']) parser.add_argument('--test-dir', type=str, default=os.environ['SM_CHANNEL_TEST']) parser.add_argument('--num-gpus', type=int, default=os.environ['SM_NUM_GPUS']) parser.add_argument('--num-cpus', type=int, default=os.environ['SM_NUM_CPUS']) args = parser.parse_args() model_path = os.path.join(args.model_dir, 'model.pth') device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print("Using device {}.".format(device)) torch.manual_seed(args.seed) # Load the training data. data_loaders = {'train': _get_train_data_loader(args.batch_size, args.train_dir, args.num_cpus), 'val': _get_test_data_loader(args.batch_size, args.test_dir, args.num_cpus)} # Build the model. model = ConvNet(args.hidden_dim, args.output_dim).to(device) model = torch.nn.DataParallel(model) # recommended by sagemaker sdk python devs optimizer = optim.Adam([param for param in model.parameters() if param.requires_grad], lr=args.lr) criterion = torch.nn.CrossEntropyLoss() # train model train(model, data_loaders, args.epochs, optimizer, criterion, device) # Save the model and its parameters save_model_params(args) save_model(model, model_path)
writer = SummaryWriter( 'logs/nogaussion_batch_{}_lr_{}_obj_{}_nobj_{}_loc_{}'.format(batch_size, learing_rate, obj_scale, nobj_scale, loc_scale)) model = ConvNet() if torch.cuda.is_available(): model = model.cuda() summary(model, input_size=(3, 640, 640), device='cuda') else: summary(model, input_size=(3, 640, 640), device='cpu') # model.load_state_dict(torch.load('no_gassuion_epoch35.pth')) criterion = MultiBranchLoss(input_size=(640, 640), writer=writer, obj_scale=obj_scale, nobj_scale=nobj_scale, loc_scale=loc_scale) optimizer = Adam(model.parameters(), lr=learing_rate) batchs_loss = 0 for epoch in range(epochs): model.train() dataset = WIDERFaceDetection(WIDERFace_ROOT, transform=SSDAugmentation(640, (127.5, 127.5, 127.5))) dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn) for i, (images, labels) in enumerate(dataloader): batch_num = epoch * len(dataloader) + i + 1 optimizer.zero_grad() if torch.cuda.is_available(): images = images.cuda() outputs = model(images) loss = criterion(outputs, labels, batch_num) batchs_loss += loss.item() loss.backward()
test_set = TimitDataset('./data', labels, stepsize, freq_bins, frame_step, frame_size, traintest='TEST') testloader = get_batch_data(test_set, batch_size) device = torch.cuda.device(0) capsnet = CapsuleNet(num_classes=nr_classes) capsnet.cuda() capsnet_optimizer = optim.Adam(capsnet.parameters()) convnet = ConvNet(num_classes = nr_classes) convnet.cuda() convnet_loss = torch.nn.MSELoss() convnet_optimizer = optim.Adam(convnet.parameters()) def train_model(model, optimizer, num_epochs=10): for epoch in range(num_epochs): print('Epoch {}/{}'.format(epoch, num_epochs - 1)) print('-' * 10) model.train() running_loss = 0.0 running_accuracy = 0.0 for idx, (inputs, labels) in enumerate(trainloader, 0): inputs = Variable(inputs).cuda() labels = Variable(labels).cuda()
# make sure that all the addon have true labels assert all([x[1] == 1 for x in train_data_addon]) # stack the addon to the original trainning data and shuffle again train_data = np.concatenate((train_data, train_data_addon), axis=0) train_data_size = len(train_data) shuffle_idx = np.random.permutation(train_data_size) train_data = train_data[shuffle_idx] # init model model = ConvNet() model = model.cuda() criterion = nn.BCELoss() optimizer = torch.optim.Adam(model.parameters(), lr=1e-5 * 2, weight_decay=1e-2) # train loop # use k-fold validation k_fold = 10 fold_size = int(train_data_size // k_fold) for i in range(k_fold): # split data into train/val val_data_curr_fold = train_data[i * fold_size:(i + 1) * fold_size] train_data_curr_fold_head = train_data[:i * fold_size] train_data_curr_fold_tail = train_data[(i + 1) * fold_size:] train_data_curr_fold = np.concatenate( (train_data_curr_fold_head, train_data_curr_fold_tail))
criterion = nn.L1Loss() optimizer = optim.Adam(net.parameters(), lr=1e-4) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") net = ConvNet() net.to(device) for epoch in range(lastepoch, 4001): if os.path.isdir("result/%04d" % epoch): continue cnt = 0 if epoch > 2000: learning_rate = 1e-5 optimizer = optim.Adam(net.parameters(), lr=learning_rate) net.to(device) num_ids = len(train_ids) running_loss = 0.0 for ind in np.random.permutation(num_ids): # get the path from image id train_id = train_ids[ind] in_files = glob.glob(input_dir + '%05d_00*.ARW' % train_id) in_path = in_files[np.random.random_integers(0, len(in_files) - 1)] in_fn = os.path.basename(in_path) gt_files = glob.glob(gt_dir + '%05d_00*.ARW' % train_id) gt_path = gt_files[0] gt_fn = os.path.basename(gt_path) in_exposure = float(in_fn[9:-5])
path=os.path.join("audio", "validation"), sample_rate=config["sample_rate"], n_mels=config["n_mels"], n_fft=config["n_fft"], win_length=config["win_length"], hop_length=config["hop_length"], ), batch_size=config["batch_size"], shuffle=True, pin_memory=True) # Initialize model, loss function, optimizers, and lr scheduler model = ConvNet(base=4) model.to(device) loss_fn = nn.BCELoss() optimizer = optim.Adam(model.parameters(), lr=config["learning_rate"]) lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1) # Initialize wandb wandb.init(project="torch", config=config) wandb.watch(model, log="all") # Start training for epoch in range(1, config["n_epochs"] + 1): print(f"Epoch {epoch}/{config['n_epochs']}") start_time = time.time() train_loss = 0 train_acc = 0 val_loss = 0
train_losses = [] val_accs = [] val_losses = [] best_acc = 0 if trainMode: torch.manual_seed(1) # Set pseudo-random generator seeds to make multiple runs comparable if haveCuda: torch.cuda.manual_seed(1) net = ConvNet(4) if haveCuda: net = net.cuda() criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, nesterov=True, weight_decay=1e-4) scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, numEpoch, eta_min=1e-2) for epoch in range(numEpoch): loss, acc = train(epoch, train_loader) train_accs.append(acc) train_losses.append(loss) loss, acc = val(epoch, test_loader) val_accs.append(acc) val_losses.append(loss) scheduler.step() if acc > best_acc: