start_idx = batch_idx * args.tr_batch_size end_idx = min((batch_idx + 1) * args.tr_batch_size, nr_trn_num) X = X_trn[start_idx:end_idx] Y = Y_trn[start_idx:end_idx] # data = Variable(torch.from_numpy(X).long()).cuda() data = Variable(torch.from_numpy(X).long()) batch_labels, batch_target = transformLabels(Y) # batch_target = Variable(torch.from_numpy(batch_target).float()).cuda() batch_target = Variable(torch.from_numpy(batch_target).float()) Y = Variable(torch.from_numpy(Y).float()) optimizer.zero_grad() poses, activations = capsule_net(data, Y) loss = BCE_loss(activations, Y) loss.backward() optimizer.step() torch.cuda.empty_cache() done = time.time() elapsed = done - start # print("Labels: {}, batch_labels: {}, batch_target: {}, predictions: {} ".format(Y, batch_labels, batch_target, activations.squeeze(2))) print("labels: {}, predictions: {} ".format(Y, activations.squeeze(2))) print("\rIteration: {}/{} ({:.1f}%) Loss: {:.5f} {:.5f}".format( (iteration+1), nr_batches, (iteration+1) * 100 / nr_batches, loss.item(), elapsed), end="") total_loss = total_loss + loss.item() losses.append(total_loss/float(nr_batches))
start_idx = batch_idx * args.tr_batch_size end_idx = min((batch_idx + 1) * args.tr_batch_size, nr_trn_num) X = X_trn[start_idx:end_idx] Y = Y_trn_o[start_idx:end_idx] # data = Variable(torch.from_numpy(X).long()).cuda() data = Variable(torch.from_numpy(X).long()) batch_labels, batch_target = transformLabels(Y) # batch_target = Variable(torch.from_numpy(batch_target).float()).cuda() batch_target = Variable(torch.from_numpy(batch_target).float()) Y = Variable(torch.from_numpy(Y).float()) optimizer.zero_grad() poses, activations = capsule_net(data, Y) loss = BCE_loss(activations, Y) loss.backward() optimizer.step() torch.cuda.empty_cache() done = time.time() elapsed = done - start # print("Labels: {}, batch_labels: {}, batch_target: {}, predictions: {} ".format(Y, batch_labels, batch_target, activations.squeeze(2))) print("labels: {}, predictions: {} ".format( Y, activations.squeeze(2))) print("\rIteration: {}/{} ({:.1f}%) Loss: {:.5f} {:.5f}".format( (iteration + 1), nr_batches, (iteration + 1) * 100 / nr_batches, loss.item(), elapsed), end="") total_loss = total_loss + loss.item() losses.append(total_loss / float(nr_batches))
capsule_net.train() for iteration, batch_idx in enumerate(np.random.permutation(range(nr_batches))): start = time.time() start_idx = batch_idx * args.tr_batch_size end_idx = min((batch_idx + 1) * args.tr_batch_size, nr_trn_num) X = X_trn[start_idx:end_idx] Y = Y_trn_o[start_idx:end_idx] data = Variable(torch.from_numpy(X).long()).cuda() batch_labels, batch_target = transformLabels(Y) batch_target = Variable(torch.from_numpy(batch_target).float()).cuda() optimizer.zero_grad() poses, activations = capsule_net(data, batch_labels) loss = BCE_loss(activations, batch_target) loss.backward() optimizer.step() torch.cuda.empty_cache() done = time.time() elapsed = done - start print("\rIteration: {}/{} ({:.1f}%) Loss: {:.5f} {:.5f}".format( iteration, nr_batches, iteration * 100 / nr_batches, loss.item(), elapsed), end="") torch.cuda.empty_cache() if (epoch + 1) > 20:
X = X_trn[start_idx:end_idx] Y = Y_trn_o[start_idx:end_idx] batch_steps = int(np.ceil(len(X)) / (float(args.tr_batch_size) / float(args.gradient_accumulation_steps))) batch_loss = 0 for i in range(batch_steps): step_size = int(float(args.tr_batch_size) // float(args.gradient_accumulation_steps)) step_X = X[i * step_size: (i+1) * step_size] step_Y = Y[i * step_size: (i+1) * step_size] step_X = Variable(torch.from_numpy(step_X).long()).cuda() step_labels, step_target = transformLabels(step_Y, Y) step_target = Variable(torch.from_numpy(step_target).float()).cuda() poses, activations = capsule_net(step_X, step_labels) step_loss = BCE_loss(activations, step_target) step_loss = step_loss / args.gradient_accumulation_steps step_loss.backward() batch_loss += step_loss.item() optimizer.step() optimizer.zero_grad() done = time.time() elapsed = done - start print("\rIteration: {}/{} ({:.1f}%) Loss: {:.5f} {:.5f}".format( iteration, nr_batches, iteration * 100 / nr_batches, batch_loss, elapsed), end="")