# Import the libraries we need for this lab import torch.nn as nn import torch import torch.nn.functional as F import matplotlib.pyplot as plt torch.manual_seed(2) #Logistic Function---------------------------------- #Create a tensor ranging from -10 to 10: # Create a tensor z = torch.arange(-10, 10, 0.1).view(-1, 1) #When you use sequential, you can create a sigmoid object: # Create a sigmoid object sig = nn.Sigmoid() #Apply the element-wise function Sigmoid with the object: # Make a prediction of sigmoid function yhat = sig(z) # Plot the result plt.plot(z.numpy(),yhat.numpy()) plt.xlabel('z') plt.ylabel('yhat') #For custom modules, call the sigmoid from the torch (nn.functional for the old version), which applies the element-wise sigmoid from #the function module and plots the results: # Use the build in function to predict the result yhat = torch.sigmoid(z) plt.plot(z.numpy(), yhat.numpy())
print('Creating model') NUM_CLASSES = args.number_of_classes model_ft = models.resnet18(pretrained=True) num_ftrs = model_ft.fc.in_features model_ft.fc = nn.Linear(num_ftrs, NUM_CLASSES) if torch.cuda.device_count() > 1 and args.use_cuda: print("Let's use", torch.cuda.device_count(), "GPUs!") # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs model_ft = nn.DataParallel(model_ft) model_ft = model_ft.to(device) act = nn.Sigmoid().to(device) criterion = nn.BCELoss().to(device) # Observe that all parameters are being optimized optimizer_ft = optim.SGD(model_ft.parameters(), args.learning_rate, args.momentum) # Decay LR by a factor of 0.1 every 7 epochs exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, args.step_size, args.gamma) dataloaders = {'train': train_loader, 'val': val_loader, 'test': test_loader} dataset_sizes = { 'train': len(train_dataset),
def validate(val_loader, model, criterion, scheduler, source_resl, target_resl): global valid_minib_counter global logger # scheduler.batch_step() batch_time = AverageMeter() losses = AverageMeter() f1_scores = AverageMeter() map_scores_wt = AverageMeter() map_scores_wt_seed = AverageMeter() # switch to evaluate mode model.eval() # sigmoid for f1 calculation and illustrations m = nn.Sigmoid() end = time.time() for i, (input, target, or_resl, target_resl,img_sample) in enumerate(val_loader): # permute to pytorch format input = input.permute(0,3,1,2).contiguous().float().cuda(async=True) # take only mask and boundary at first target = target[:,:,:,0:args.channels].permute(0,3,1,2).contiguous().float().cuda(async=True) input_var = torch.autograd.Variable(input, volatile=True) target_var = torch.autograd.Variable(target, volatile=True) # compute output output = model(input_var) loss = criterion(output, target_var) # go over all of the predictions # apply the transformation to each mask # calculate score for each of the images averaged_maps_wt = [] averaged_maps_wt_seed = [] y_preds_wt = [] y_preds_wt_seed = [] energy_levels = [] for j,pred_output in enumerate(output): or_w = or_resl[0][j] or_h = or_resl[1][j] # I keep only the latest preset pred_mask = m(pred_output[0,:,:]).data.cpu().numpy() pred_mask1 = m(pred_output[1,:,:]).data.cpu().numpy() pred_mask2 = m(pred_output[2,:,:]).data.cpu().numpy() pred_mask3 = m(pred_output[3,:,:]).data.cpu().numpy() pred_mask0 = m(pred_output[4,:,:]).data.cpu().numpy() pred_border = m(pred_output[5,:,:]).data.cpu().numpy() # pred_distance = m(pred_output[5,:,:]).data.cpu().numpy() pred_vector0 = pred_output[6,:,:].data.cpu().numpy() pred_vector1 = pred_output[7,:,:].data.cpu().numpy() pred_mask = cv2.resize(pred_mask, (or_h,or_w), interpolation=cv2.INTER_LINEAR) pred_mask1 = cv2.resize(pred_mask1, (or_h,or_w), interpolation=cv2.INTER_LINEAR) pred_mask2 = cv2.resize(pred_mask2, (or_h,or_w), interpolation=cv2.INTER_LINEAR) pred_mask3 = cv2.resize(pred_mask3, (or_h,or_w), interpolation=cv2.INTER_LINEAR) pred_mask0 = cv2.resize(pred_mask0, (or_h,or_w), interpolation=cv2.INTER_LINEAR) # pred_distance = cv2.resize(pred_distance, (or_h,or_w), interpolation=cv2.INTER_LINEAR) pred_border = cv2.resize(pred_border, (or_h,or_w), interpolation=cv2.INTER_LINEAR) pred_vector0 = cv2.resize(pred_vector0, (or_h,or_w), interpolation=cv2.INTER_LINEAR) pred_vector1 = cv2.resize(pred_vector1, (or_h,or_w), interpolation=cv2.INTER_LINEAR) # predict average energy by summing all the masks up pred_energy = (pred_mask+pred_mask1+pred_mask2+pred_mask3+pred_mask0)/5*255 pred_mask_255 = np.copy(pred_mask) * 255 # read the original masks for metric evaluation mask_glob = glob.glob('../data/stage1_train/{}/masks/*.png'.format(img_sample[j])) gt_masks = imread_collection(mask_glob).concatenate() # simple wt y_pred_wt = wt_baseline(pred_mask_255, args.ths) # wt with seeds y_pred_wt_seed = wt_seeds(pred_mask_255,pred_energy,args.ths) map_wt = calculate_ap(y_pred_wt, gt_masks) map_wt_seed = calculate_ap(y_pred_wt_seed, gt_masks) averaged_maps_wt.append(map_wt[1]) averaged_maps_wt_seed.append(map_wt_seed[1]) # apply colormap for easier tracking y_pred_wt = cv2.applyColorMap((y_pred_wt / y_pred_wt.max() * 255).astype('uint8'), cv2.COLORMAP_JET) y_pred_wt_seed = cv2.applyColorMap((y_pred_wt_seed / y_pred_wt_seed.max() * 255).astype('uint8'), cv2.COLORMAP_JET) y_preds_wt.append(y_pred_wt) y_preds_wt_seed.append(y_pred_wt_seed) energy_levels.append(pred_energy) # print('MAP for sample {} is {}'.format(img_sample[j],m_ap)) y_preds_wt = np.asarray(y_preds_wt) y_preds_wt_seed = np.asarray(y_preds_wt_seed) energy_levels = np.asarray(energy_levels) averaged_maps_wt = np.asarray(averaged_maps_wt).mean() averaged_maps_wt_seed = np.asarray(averaged_maps_wt_seed).mean() #============ TensorBoard logging ============# if args.tensorboard_images: if i == 0: if args.channels == 5: info = { 'images': to_np(input[:2,:,:,:]), 'gt_mask': to_np(target[:2,0,:,:]), 'gt_mask1': to_np(target[:2,1,:,:]), 'gt_mask2': to_np(target[:2,2,:,:]), 'gt_mask3': to_np(target[:2,3,:,:]), 'gt_mask0': to_np(target[:2,4,:,:]), 'pred_mask': to_np(m(output.data[:2,0,:,:])), 'pred_mask1': to_np(m(output.data[:2,1,:,:])), 'pred_mask2': to_np(m(output.data[:2,2,:,:])), 'pred_mask3': to_np(m(output.data[:2,3,:,:])), 'pred_mask0': to_np(m(output.data[:2,4,:,:])), 'pred_energy': energy_levels[:2,:,:], 'pred_wt': y_preds_wt[:2,:,:], 'pred_wt_seed': y_preds_wt_seed[:2,:,:,:], } for tag, images in info.items(): logger.image_summary(tag, images, valid_minib_counter) elif args.channels == 6: info = { 'images': to_np(input[:2,:,:,:]), 'gt_mask': to_np(target[:2,0,:,:]), 'gt_mask1': to_np(target[:2,1,:,:]), 'gt_mask2': to_np(target[:2,2,:,:]), 'gt_mask3': to_np(target[:2,3,:,:]), 'gt_mask0': to_np(target[:2,4,:,:]), 'gt_mask_distance': to_np(target[:2,5,:,:]), 'pred_mask': to_np(m(output.data[:2,0,:,:])), 'pred_mask1': to_np(m(output.data[:2,1,:,:])), 'pred_mask2': to_np(m(output.data[:2,2,:,:])), 'pred_mask3': to_np(m(output.data[:2,3,:,:])), 'pred_mask0': to_np(m(output.data[:2,4,:,:])), 'pred_distance': to_np(m(output.data[:2,5,:,:])), 'pred_energy': energy_levels[:2,:,:], 'pred_wt': y_preds_wt[:2,:,:], 'pred_wt_seed': y_preds_wt_seed[:2,:,:,:], } for tag, images in info.items(): logger.image_summary(tag, images, valid_minib_counter) elif args.channels == 7: info = { 'images': to_np(input[:2,:,:,:]), 'gt_mask': to_np(target[:2,0,:,:]), 'gt_mask1': to_np(target[:2,1,:,:]), 'gt_mask2': to_np(target[:2,2,:,:]), 'gt_mask3': to_np(target[:2,3,:,:]), 'gt_mask0': to_np(target[:2,4,:,:]), 'gt_mask_distance': to_np(target[:2,5,:,:]), 'gt_border': to_np(target[:2,6,:,:]), 'pred_mask': to_np(m(output.data[:2,0,:,:])), 'pred_mask1': to_np(m(output.data[:2,1,:,:])), 'pred_mask2': to_np(m(output.data[:2,2,:,:])), 'pred_mask3': to_np(m(output.data[:2,3,:,:])), 'pred_mask0': to_np(m(output.data[:2,4,:,:])), 'pred_distance': to_np(m(output.data[:2,5,:,:])), 'pred_border': to_np(m(output.data[:2,6,:,:])), 'pred_energy': energy_levels[:2,:,:], 'pred_wt': y_preds_wt[:2,:,:], 'pred_wt_seed': y_preds_wt_seed[:2,:,:,:], } for tag, images in info.items(): logger.image_summary(tag, images, valid_minib_counter) elif args.channels == 8: info = { 'images': to_np(input[:2,:,:,:]), 'gt_mask': to_np(target[:2,0,:,:]), 'gt_mask1': to_np(target[:2,1,:,:]), 'gt_mask2': to_np(target[:2,2,:,:]), 'gt_mask3': to_np(target[:2,3,:,:]), 'gt_mask0': to_np(target[:2,4,:,:]), 'gt_border': to_np(target[:2,5,:,:]), 'gt_vectors': to_np(target[:2,6,:,:]+target[:2,7,:,:]), # simple hack - just sum the vectors 'pred_mask': to_np(m(output.data[:2,0,:,:])), 'pred_mask1': to_np(m(output.data[:2,1,:,:])), 'pred_mask2': to_np(m(output.data[:2,2,:,:])), 'pred_mask3': to_np(m(output.data[:2,3,:,:])), 'pred_mask0': to_np(m(output.data[:2,4,:,:])), 'pred_border': to_np(m(output.data[:2,5,:,:])), 'pred_vectors': to_np(output.data[:2,6,:,:]+output.data[:2,7,:,:]), 'pred_energy': energy_levels[:2,:,:], 'pred_wt': y_preds_wt[:2,:,:], 'pred_wt_seed': y_preds_wt_seed[:2,:,:,:], } for tag, images in info.items(): logger.image_summary(tag, images, valid_minib_counter) # calcuale f1 scores only on inner cell masks # weird pytorch numerical issue when converting to float target_f1 = (target_var.data[:,0:1,:,:]>args.ths)*1 f1_scores_batch = batch_f1_score(output = m(output.data[:,0:1,:,:]), target = target_f1, threshold=args.ths) # measure accuracy and record loss losses.update(loss.data[0], input.size(0)) f1_scores.update(f1_scores_batch, input.size(0)) map_scores_wt.update(averaged_maps_wt, input.size(0)) map_scores_wt_seed.update(averaged_maps_wt_seed, input.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() #============ TensorBoard logging ============# # Log the scalar values if args.tensorboard: info = { 'valid_loss': losses.val, 'f1_score_val': f1_scores.val, 'map_wt': averaged_maps_wt, 'map_wt_seed': averaged_maps_wt_seed, } for tag, value in info.items(): logger.scalar_summary(tag, value, valid_minib_counter) valid_minib_counter += 1 if i % args.print_freq == 0: print('Test: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'F1 {f1_scores.val:.4f} ({f1_scores.avg:.4f})\t' 'MAP1 {map_scores_wt.val:.4f} ({map_scores_wt.avg:.4f})\t' 'MAP2 {map_scores_wt_seed.val:.4f} ({map_scores_wt_seed.avg:.4f})\t'.format( i, len(val_loader), batch_time=batch_time, loss=losses, f1_scores=f1_scores, map_scores_wt=map_scores_wt,map_scores_wt_seed=map_scores_wt_seed)) print(' * Avg Val Loss {loss.avg:.4f}'.format(loss=losses)) print(' * Avg F1 Score {f1_scores.avg:.4f}'.format(f1_scores=f1_scores)) print(' * Avg MAP1 Score {map_scores_wt.avg:.4f}'.format(map_scores_wt=map_scores_wt)) print(' * Avg MAP2 Score {map_scores_wt_seed.avg:.4f}'.format(map_scores_wt_seed=map_scores_wt_seed)) return losses.avg, f1_scores.avg, map_scores_wt.avg,map_scores_wt_seed.avg
def __init__(self): super(_Gate, self).__init__() self.one = torch.tensor([1.], requires_grad=False, device='cuda:0') self.fc = nn.Linear(1, 1, bias=False) self.fc.weight.data.fill_(0.) self.sig = nn.Sigmoid()
def __init__(self, num_classes=10): super(UNet, self).__init__() self.layer1 = nn.Sequential( nn.Conv2d(1, 8, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d(8), nn.ReLU()) self.layer2 = nn.Sequential( nn.Conv2d(8, 8, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d(8), nn.ReLU()) self.layer3 = nn.Sequential(nn.MaxPool2d(kernel_size=2)) self.layer4 = nn.Sequential( nn.Conv2d(8, 16, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d(16), nn.ReLU()) self.layer5 = nn.Sequential( nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d(16), nn.ReLU()) self.layer6 = nn.Sequential(nn.MaxPool2d(kernel_size=2)) self.layer7 = nn.Sequential( nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d(32), nn.ReLU()) self.layer8 = nn.Sequential( nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d(32), nn.ReLU()) self.layer9 = nn.Sequential(nn.MaxPool2d(kernel_size=2)) self.layer10 = nn.Sequential( nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d(64), nn.ReLU()) self.layer11 = nn.Sequential( nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d(64), nn.ReLU()) self.layer12 = nn.Sequential(nn.MaxPool2d(kernel_size=2)) self.layer13 = nn.Sequential( nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d(128), nn.ReLU()) self.layer14 = nn.Sequential( nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d(128), nn.ReLU()) self.layer15 = nn.Sequential( nn.ConvTranspose2d(128, 64, kernel_size=2, stride=2, padding=0), nn.BatchNorm2d(64), nn.ReLU()) self.layer16 = nn.Sequential( nn.Conv2d(128, 64, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d(64), nn.ReLU()) self.layer17 = nn.Sequential( nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d(64), nn.ReLU()) self.layer18 = nn.Sequential( nn.ConvTranspose2d(64, 32, kernel_size=2, stride=2, padding=0), nn.BatchNorm2d(32), nn.ReLU()) self.layer19 = nn.Sequential( nn.Conv2d(64, 32, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d(32), nn.ReLU()) self.layer20 = nn.Sequential( nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d(32), nn.ReLU()) self.layer21 = nn.Sequential( nn.ConvTranspose2d(32, 16, kernel_size=1, stride=1, padding=0), nn.BatchNorm2d(16), nn.ReLU()) self.layer22 = nn.Sequential( nn.Conv2d(48, 16, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d(16), nn.ReLU()) self.layer23 = nn.Sequential( nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d(16), nn.ReLU()) self.layer24 = nn.Sequential( nn.ConvTranspose2d(16, 8, kernel_size=4, stride=4, padding=0), nn.BatchNorm2d(8), nn.ReLU()) self.layer25 = nn.Sequential( nn.Conv2d(16, 8, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d(8), nn.ReLU()) self.layer26 = nn.Sequential( nn.Conv2d(8, 8, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d(8), nn.ReLU()) self.layer27 = nn.Sequential( nn.Conv2d(8, 1, kernel_size=1, stride=1, padding=0), nn.BatchNorm2d(1), nn.Sigmoid())
def __init__(self, block, layers, num_classes=1000, zero_init_residual=False, groups=1, width_per_group=64, replace_stride_with_dilation=None, norm_layer=None, end2end=True): super(AFMResNet, self).__init__() if norm_layer is None: norm_layer = nn.BatchNorm2d self._norm_layer = norm_layer self.inplanes = 64 self.dilation = 1 if replace_stride_with_dilation is None: # each element in the tuple indicates if we should replace # the 2x2 stride with a dilated convolution instead replace_stride_with_dilation = [False, False, False] if len(replace_stride_with_dilation) != 3: raise ValueError("replace_stride_with_dilation should be None " "or a 3-element tuple, got {}".format( replace_stride_with_dilation)) self.groups = groups self.base_width = width_per_group self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = norm_layer(self.inplanes) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 64, layers[0]) self.layer2 = self._make_layer(block, 128, layers[1], stride=2, dilate=replace_stride_with_dilation[0]) self.layer3 = self._make_layer(block, 256, layers[2], stride=2, dilate=replace_stride_with_dilation[1]) self.layer4 = self._make_layer(block, 512, layers[3], stride=2, dilate=replace_stride_with_dilation[2]) self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) """ weight """ self.fc_a = nn.Linear(512 * block.expansion, int(384 * block.expansion / 4)) self.fc_b = nn.Linear(512 * block.expansion, int(384 * block.expansion / 4)) self.fc_weight = nn.Linear(int(384 * block.expansion / 4), 1) self.fc_weight_sigmoid = nn.Sigmoid() """ classifier """ self.fc_classifier = nn.Linear(512 * block.expansion, num_classes) for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) # Zero-initialize the last BN in each residual branch, # so that the residual branch starts with zeros, and each residual block behaves like an identity. # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677 if zero_init_residual: for m in self.modules(): if isinstance(m, Bottleneck): nn.init.constant_(m.bn3.weight, 0) elif isinstance(m, BasicBlock): nn.init.constant_(m.bn2.weight, 0)
def __init__(self, word_to_ix, hidden_dim=128, num_heads=2, dim_feedforward=2048, dim_k=96, dim_v=96, dim_q=96, max_length=43): ''' :param word_to_ix: dictionary mapping words to unique indices :param hidden_dim: the dimensionality of the output embeddings that go into the final layer :param num_heads: the number of Transformer heads to use :param dim_feedforward: the dimension of the feedforward network model :param dim_k: the dimensionality of the key vectors :param dim_q: the dimensionality of the query vectors :param dim_v: the dimensionality of the value vectors ''' super(ClassificationTransformer, self).__init__() assert hidden_dim % num_heads == 0 self.num_heads = num_heads self.word_embedding_dim = hidden_dim self.hidden_dim = hidden_dim self.dim_feedforward = dim_feedforward self.max_length = max_length self.vocab_size = len(word_to_ix) self.dim_k = dim_k self.dim_v = dim_v self.dim_q = dim_q seed_torch(0) ############################################################################## # Deliverable 1: Initialize what you need for the embedding lookup (1 line). # # Hint: you will need to use the max_length parameter above. # ############################################################################## self.token_embed = nn.Embedding(self.vocab_size, self.hidden_dim) self.position_embed = nn.Embedding(self.max_length, self.hidden_dim) ############################################################################## # END OF YOUR CODE # ############################################################################## ############################################################################## # Deliverable 2: Initializations for multi-head self-attention. # # You don't need to do anything here. Do not modify this code. # ############################################################################## # Head #1 self.k1 = nn.Linear(self.hidden_dim, self.dim_k) self.v1 = nn.Linear(self.hidden_dim, self.dim_v) self.q1 = nn.Linear(self.hidden_dim, self.dim_q) # Head #2 self.k2 = nn.Linear(self.hidden_dim, self.dim_k) self.v2 = nn.Linear(self.hidden_dim, self.dim_v) self.q2 = nn.Linear(self.hidden_dim, self.dim_q) self.softmax = nn.Softmax(dim=2) self.attention_head_projection = nn.Linear(self.dim_v * self.num_heads, self.hidden_dim) self.norm_mh = nn.LayerNorm(self.hidden_dim) ############################################################################## # Deliverable 3: Initialize what you need for the feed-forward layer. # # Don't forget the layer normalization. # ############################################################################## self.fc1 = nn.Linear(self.hidden_dim, self.dim_feedforward) self.relu1 = nn.ReLU() self.fc2 = nn.Linear(self.dim_feedforward, self.hidden_dim) ############################################################################## # END OF YOUR CODE # ############################################################################## ############################################################################## # Deliverable 4: Initialize what you need for the final layer (1-2 lines). # ############################################################################## self.finalfc = nn.Linear(self.hidden_dim, 1) self.sigmoid = nn.Sigmoid()
def make_activation_net(token: Token) -> nn.Module: return nn.Sequential( nn.Linear(D_agent_state, 1), nn.Sigmoid(), )
def __init__(self, dec_hidden_size): super(Filter, self).__init__() self.arfa = nn.Linear(dec_hidden_size, 1, bias=True) self.sigmoid = nn.Sigmoid()
def __init__(self): super(Loss, self).__init__() self.classify_loss = nn.BCELoss() self.sigmoid = nn.Sigmoid() self.regress_loss = nn.SmoothL1Loss()
def __init__(self, in_channels, out_channels): super(ConvBlock_mix2, self).__init__() # self.conv1 = nn.Conv2d(in_channels=in_channels, # out_channels=out_channels, # kernel_size=(3, 3), stride=(1, 1), # padding=(1, 1), bias=False) # self.conv2 = nn.Conv2d(in_channels=out_channels, # out_channels=out_channels, # kernel_size=(3, 3), stride=(1, 1), # padding=(1, 1), bias=False) self.conv1 = Conv2dSame(in_channels=in_channels, out_channels=out_channels, kernel_size=3, bias=False) self.conv2 = Conv2dSame(in_channels=out_channels, out_channels=out_channels, kernel_size=3, bias=False) self.bn1 = nn.BatchNorm2d(out_channels) self.bn2 = nn.BatchNorm2d(out_channels) if out_channels == 64: # self.globalAvgPool = nn.AvgPool2d((100,40), stride=1) self.globalAvgPool2 = nn.AvgPool2d((100, 64), stride=1) self.globalAvgPool3 = nn.AvgPool2d((64, 40), stride=1) self.fc1_2 = nn.Linear(in_features=40, out_features=40) self.fc2_2 = nn.Linear(in_features=40, out_features=40) elif out_channels == 128: # self.globalAvgPool = nn.AvgPool2d((50,20), stride=1) self.globalAvgPool2 = nn.AvgPool2d((50, 128), stride=1) self.globalAvgPool3 = nn.AvgPool2d((128, 20), stride=1) self.fc1_2 = nn.Linear(in_features=20, out_features=20) self.fc2_2 = nn.Linear(in_features=20, out_features=20) elif out_channels == 256: # self.globalAvgPool = nn.AvgPool2d((25,10), stride=1) self.globalAvgPool2 = nn.AvgPool2d((25, 256), stride=1) self.globalAvgPool3 = nn.AvgPool2d((256, 10), stride=1) self.fc1_2 = nn.Linear(in_features=10, out_features=10) self.fc2_2 = nn.Linear(in_features=10, out_features=10) elif out_channels == 512: # self.globalAvgPool = nn.AvgPool2d((12,5), stride=1) self.globalAvgPool2 = nn.AvgPool2d((12, 512), stride=1) self.globalAvgPool3 = nn.AvgPool2d((512, 5), stride=1) self.fc1_2 = nn.Linear(in_features=5, out_features=5) self.fc2_2 = nn.Linear(in_features=5, out_features=5) # self.fc1 = nn.Linear(in_features=out_channels, out_features=round(out_channels / 16)) # self.fc2 = nn.Linear(in_features=round(out_channels / 16), out_features=out_channels) self.lstm = nn.LSTM(input_size=1, hidden_size=1, num_layers=1, batch_first=True, bidirectional=False) self.sigmoid = nn.Sigmoid() self.sigmoid2 = nn.Sigmoid() self.downsample = conv1x1(in_channels, out_channels) self.bn = nn.BatchNorm2d(out_channels) self.init_weights()
def __init__(self, latent_dim, seq_len, hidden_dim, n_layers, rnn='LSTM', n_feats=3, dropout=0., return_norm=True, latent_mode='repeat'): super(VAE_LSTM, self).__init__() self.latent_dim = latent_dim self.return_norm = return_norm self.seq_len = seq_len self.latent_mode = latent_mode self.latent_convt1 = nn.Sequential( nn.ConvTranspose1d(latent_dim, latent_dim, kernel_size=seq_len, dilation=1), nn.ReLU()) self.latent_linear = nn.Sequential( nn.Linear(latent_dim, latent_dim * seq_len), nn.ReLU()) ## batch_first --> [batch, seq, feature] if rnn == 'LSTM': self.enc_lstm = nn.LSTM(n_feats, hidden_dim, n_layers, batch_first=True, dropout=dropout, bidirectional=False) self.dec_lstm = nn.LSTM(latent_dim, hidden_dim, n_layers, batch_first=True, dropout=dropout, bidirectional=False) elif rnn == 'GRU': self.enc_lstm = nn.GRU(n_feats, hidden_dim, n_layers, batch_first=True, dropout=dropout, bidirectional=False) self.dec_lstm = nn.GRU(latent_dim, hidden_dim, n_layers, batch_first=True, dropout=dropout, bidirectional=False) self.enc_linear1 = nn.Linear(seq_len * hidden_dim, latent_dim) self.enc_linear2 = nn.Linear(seq_len * hidden_dim, latent_dim) self.dec_linear = nn.Linear(hidden_dim, n_feats) self.init_weights() self.tanh = nn.Tanh() self.relu = nn.ReLU() self.sigmoid = nn.Sigmoid()
def skip(num_input_channels=2, num_output_channels=3, num_channels_down=[16, 32, 64, 128, 128], num_channels_up=[16, 32, 64, 128, 128], num_channels_skip=[4, 4, 4, 4, 4], filter_size_down=3, filter_size_up=3, filter_skip_size=1, need_sigmoid=True, need_bias=True, pad='zero', upsample_mode='nearest', downsample_mode='stride', act_fun='LeakyReLU', need1x1_up=True): """Assembles encoder-decoder with skip connections. Arguments: act_fun: Either string 'LeakyReLU|Swish|ELU|none' or module (e.g. nn.ReLU) pad (string): zero|reflection (default: 'zero') upsample_mode (string): 'nearest|bilinear' (default: 'nearest') downsample_mode (string): 'stride|avg|max|lanczos2' (default: 'stride') """ assert len(num_channels_down) \ == len(num_channels_up) == len(num_channels_skip) n_scales = len(num_channels_down) if not (isinstance(upsample_mode, list) or isinstance(upsample_mode, tuple)): upsample_mode = [upsample_mode] * n_scales if not (isinstance(downsample_mode, list) or isinstance(downsample_mode, tuple)): downsample_mode = [downsample_mode] * n_scales if not (isinstance(filter_size_down, list) or isinstance(filter_size_down, tuple)): filter_size_down = [filter_size_down] * n_scales if not (isinstance(filter_size_up, list) or isinstance(filter_size_up, tuple)): filter_size_up = [filter_size_up] * n_scales last_scale = n_scales - 1 # cur_depth = None model = nn.Sequential() model_tmp = model input_depth = num_input_channels for i in range(len(num_channels_down)): deeper = nn.Sequential() skip = nn.Sequential() if num_channels_skip[i] != 0: model_tmp.add(Concat(1, skip, deeper)) else: model_tmp.add(deeper) model_tmp.add( bn(num_channels_skip[i] + (num_channels_up[i + 1] if i < last_scale else num_channels_down[i]))) if num_channels_skip[i] != 0: skip.add( conv(input_depth, num_channels_skip[i], filter_skip_size, bias=need_bias, pad=pad)) skip.add(bn(num_channels_skip[i])) skip.add(act(act_fun)) # skip.add(Concat(2, GenNoise(nums_noise[i]), skip_part)) deeper.add( conv(input_depth, num_channels_down[i], filter_size_down[i], 2, bias=need_bias, pad=pad, downsample_mode=downsample_mode[i])) deeper.add(bn(num_channels_down[i])) deeper.add(act(act_fun)) deeper.add( conv(num_channels_down[i], num_channels_down[i], filter_size_down[i], bias=need_bias, pad=pad)) deeper.add(bn(num_channels_down[i])) deeper.add(act(act_fun)) deeper_main = nn.Sequential() if i == len(num_channels_down) - 1: # The deepest k = num_channels_down[i] else: deeper.add(deeper_main) k = num_channels_up[i + 1] deeper.add( nn.Upsample(scale_factor=2, mode=upsample_mode[i], align_corners=False)) model_tmp.add( conv(num_channels_skip[i] + k, num_channels_up[i], filter_size_up[i], 1, bias=need_bias, pad=pad)) model_tmp.add(bn(num_channels_up[i])) model_tmp.add(act(act_fun)) if need1x1_up: model_tmp.add( conv(num_channels_up[i], num_channels_up[i], 1, bias=need_bias, pad=pad)) model_tmp.add(bn(num_channels_up[i])) model_tmp.add(act(act_fun)) input_depth = num_channels_down[i] model_tmp = deeper_main model.add( conv(num_channels_up[0], num_output_channels, 1, bias=need_bias, pad=pad)) if need_sigmoid: model.add(nn.Sigmoid()) return model
def forward(self, q_ids=None, char_ids=None, word_ids=None, token_type_ids=None, subject_ids=None, subject_labels=None, object_labels=None, eval_file=None, is_eval=False): mask = char_ids != 0 seq_mask = char_ids.eq(0) char_emb = self.char_emb(char_ids) word_emb = self.word_convert_char(self.word_emb(word_ids)) # word_emb = self.word_emb(word_ids) emb = char_emb + word_emb # emb = char_emb # subject_encoder = sent_encoder + self.token_entity_emb(token_type_id) sent_encoder = self.first_sentence_encoder(emb, seq_mask) if not is_eval: # subject_encoder = self.token_entity_emb(token_type_ids) # context_encoder = bert_encoder + subject_encoder sub_start_encoder = batch_gather(sent_encoder, subject_ids[:, 0]) sub_end_encoder = batch_gather(sent_encoder, subject_ids[:, 1]) subject = torch.cat([sub_start_encoder, sub_end_encoder], 1) context_encoder = self.LayerNorm(sent_encoder, subject) context_encoder = self.transformer_encoder(context_encoder.transpose(1, 0), src_key_padding_mask=seq_mask).transpose(0, 1) sub_preds = self.subject_dense(sent_encoder) po_preds = self.po_dense(context_encoder).reshape(char_ids.size(0), -1, self.classes_num, 2) subject_loss = self.loss_fct(sub_preds, subject_labels) subject_loss = subject_loss.mean(2) subject_loss = torch.sum(subject_loss * mask.float()) / torch.sum(mask.float()) po_loss = self.loss_fct(po_preds, object_labels) po_loss = torch.sum(po_loss.mean(3), 2) po_loss = torch.sum(po_loss * mask.float()) / torch.sum(mask.float()) loss = subject_loss + po_loss return loss else: subject_preds = nn.Sigmoid()(self.subject_dense(sent_encoder)) answer_list = list() for qid, sub_pred in zip(q_ids.cpu().numpy(), subject_preds.cpu().numpy()): context = eval_file[qid].context start = np.where(sub_pred[:, 0] > 0.5)[0] end = np.where(sub_pred[:, 1] > 0.4)[0] subjects = [] for i in start: j = end[end >= i] if i >= len(context): continue if len(j) > 0: j = j[0] if j >= len(context): continue subjects.append((i, j)) answer_list.append(subjects) qid_ids, sent_encoders, pass_ids, subject_ids, token_type_ids = [], [], [], [], [] for i, subjects in enumerate(answer_list): if subjects: qid = q_ids[i].unsqueeze(0).expand(len(subjects)) pass_tensor = char_ids[i, :].unsqueeze(0).expand(len(subjects), char_ids.size(1)) new_sent_encoder = sent_encoder[i, :, :].unsqueeze(0).expand(len(subjects), sent_encoder.size(1), sent_encoder.size(2)) token_type_id = torch.zeros((len(subjects), char_ids.size(1)), dtype=torch.long) for index, (start, end) in enumerate(subjects): token_type_id[index, start:end + 1] = 1 qid_ids.append(qid) pass_ids.append(pass_tensor) subject_ids.append(torch.tensor(subjects, dtype=torch.long)) sent_encoders.append(new_sent_encoder) token_type_ids.append(token_type_id) if len(qid_ids) == 0: # print('len(qid_list)==0:') qid_tensor = torch.tensor([-1, -1], dtype=torch.long).to(sent_encoder.device) return qid_tensor, qid_tensor, qid_tensor # print('len(qid_list)!=========================0:') qids = torch.cat(qid_ids).to(sent_encoder.device) pass_ids = torch.cat(pass_ids).to(sent_encoder.device) sent_encoders = torch.cat(sent_encoders).to(sent_encoder.device) # token_type_ids = torch.cat(token_type_ids).to(bert_encoder.device) subject_ids = torch.cat(subject_ids).to(sent_encoder.device) flag = False split_heads = 1024 sent_encoders_ = torch.split(sent_encoders, split_heads, dim=0) pass_ids_ = torch.split(pass_ids, split_heads, dim=0) # token_type_ids_ = torch.split(token_type_ids, split_heads, dim=0) subject_encoder_ = torch.split(subject_ids, split_heads, dim=0) # print('len(qid_list)!=========================1:') po_preds = list() for i in range(len(subject_encoder_)): sent_encoders = sent_encoders_[i] # token_type_ids = token_type_ids_[i] pass_ids = pass_ids_[i] subject_encoder = subject_encoder_[i] if sent_encoders.size(0) == 1: flag = True # print('flag = True**********') sent_encoders = sent_encoders.expand(2, sent_encoders.size(1), sent_encoders.size(2)) subject_encoder = subject_encoder.expand(2, subject_encoder.size(1)) pass_ids = pass_ids.expand(2, pass_ids.size(1)) # print('len(qid_list)!=========================2:') sub_start_encoder = batch_gather(sent_encoders, subject_encoder[:, 0]) sub_end_encoder = batch_gather(sent_encoders, subject_encoder[:, 1]) subject = torch.cat([sub_start_encoder, sub_end_encoder], 1) context_encoder = self.LayerNorm(sent_encoders, subject) context_encoder = self.transformer_encoder(context_encoder.transpose(1, 0), src_key_padding_mask=pass_ids.eq(0)).transpose(0, 1) # print('len(qid_list)!=========================3') # context_encoder = self.LayerNorm(context_encoder) po_pred = self.po_dense(context_encoder).reshape(subject_encoder.size(0), -1, self.classes_num, 2) if flag: po_pred = po_pred[1, :, :, :].unsqueeze(0) po_preds.append(po_pred) po_tensor = torch.cat(po_preds).to(qids.device) po_tensor = nn.Sigmoid()(po_tensor) return qids, subject_ids, po_tensor
conv = nn.Conv2d(3, 5, kernel_size=5, stride=2, padding=1) save_data_and_model("convolution", input, conv) input = Variable(torch.randn(1, 3, 10, 10)) deconv = nn.ConvTranspose2d(3, 5, kernel_size=5, stride=2, padding=1) save_data_and_model("deconvolution", input, deconv) input = Variable(torch.randn(2, 3)) linear = nn.Linear(3, 4, bias=True) linear.eval() save_data_and_model("linear", input, linear) input = Variable(torch.randn(2, 3, 12, 18)) maxpooling_sigmoid = nn.Sequential( nn.MaxPool2d(kernel_size=4, stride=2, padding=(1, 2), dilation=1), nn.Sigmoid() ) save_data_and_model("maxpooling_sigmoid", input, maxpooling_sigmoid) input = Variable(torch.randn(1, 3, 10, 20)) conv2 = nn.Sequential( nn.Conv2d(3, 6, kernel_size=(5,3), stride=1, padding=1), nn.Conv2d(6, 4, kernel_size=5, stride=2, padding=(0,2)) ) save_data_and_model("two_convolution", input, conv2) input = Variable(torch.randn(1, 3, 10, 20)) deconv2 = nn.Sequential( nn.ConvTranspose2d(3, 6, kernel_size=(5,3), stride=1, padding=1), nn.ConvTranspose2d(6, 4, kernel_size=5, stride=2, padding=(0,2))
def __init__(self): super(D, self).__init__() self.main = nn.Sequential(nn.Conv2d(1024, 1, 1), nn.Sigmoid())
def main(): # Setup Logging log_dir = "{}/models/{}/".format(args.dump_location, args.exp_name) dump_dir = "{}/dump/{}/".format(args.dump_location, args.exp_name) if not os.path.exists(log_dir): os.makedirs(log_dir) if not os.path.exists("{}/images/".format(dump_dir)): os.makedirs("{}/images/".format(dump_dir)) logging.basicConfig(filename=log_dir + 'train.log', level=logging.INFO) print("Dumping at {}".format(log_dir)) print(args) logging.info(args) # Logging and loss variables num_scenes = args.num_processes num_episodes = int(args.num_episodes) device = args.device = torch.device("cuda:0" if args.cuda else "cpu") policy_loss = 0 best_cost = 100000 costs = deque(maxlen=1000) exp_costs = deque(maxlen=1000) pose_costs = deque(maxlen=1000) g_masks = torch.ones(num_scenes).float().to(device) l_masks = torch.zeros(num_scenes).float().to(device) best_local_loss = np.inf best_g_reward = -np.inf if args.eval: traj_lengths = args.max_episode_length // args.num_local_steps explored_area_log = np.zeros((num_scenes, num_episodes, traj_lengths)) explored_ratio_log = np.zeros((num_scenes, num_episodes, traj_lengths)) g_episode_rewards = deque(maxlen=1000) l_action_losses = deque(maxlen=1000) g_value_losses = deque(maxlen=1000) g_action_losses = deque(maxlen=1000) g_dist_entropies = deque(maxlen=1000) per_step_g_rewards = deque(maxlen=1000) g_process_rewards = np.zeros((num_scenes)) # Starting environments torch.set_num_threads(1) envs = make_vec_envs(args) obs, infos = envs.reset() # Initialize map variables ### Full map consists of 4 channels containing the following: ### 1. Obstacle Map ### 2. Exploread Area ### 3. Current Agent Location ### 4. Past Agent Locations torch.set_grad_enabled(False) # Calculating full and local map sizes map_size = args.map_size_cm // args.map_resolution full_w, full_h = map_size, map_size local_w, local_h = int(full_w / args.global_downscaling), \ int(full_h / args.global_downscaling) # Initializing full and local map full_map = torch.zeros(num_scenes, 4, full_w, full_h).float().to(device) local_map = torch.zeros(num_scenes, 4, local_w, local_h).float().to(device) # Initial full and local pose full_pose = torch.zeros(num_scenes, 3).float().to(device) local_pose = torch.zeros(num_scenes, 3).float().to(device) # Origin of local map origins = np.zeros((num_scenes, 3)) # Local Map Boundaries lmb = np.zeros((num_scenes, 4)).astype(int) ### Planner pose inputs has 7 dimensions ### 1-3 store continuous global agent location ### 4-7 store local map boundaries planner_pose_inputs = np.zeros((num_scenes, 7)) def init_map_and_pose(): full_map.fill_(0.) full_pose.fill_(0.) full_pose[:, :2] = args.map_size_cm / 100.0 / 2.0 locs = full_pose.cpu().numpy() planner_pose_inputs[:, :3] = locs for e in range(num_scenes): r, c = locs[e, 1], locs[e, 0] loc_r, loc_c = [ int(r * 100.0 / args.map_resolution), int(c * 100.0 / args.map_resolution) ] full_map[e, 2:, loc_r - 1:loc_r + 2, loc_c - 1:loc_c + 2] = 1.0 lmb[e] = get_local_map_boundaries( (loc_r, loc_c), (local_w, local_h), (full_w, full_h)) planner_pose_inputs[e, 3:] = lmb[e] origins[e] = [ lmb[e][2] * args.map_resolution / 100.0, lmb[e][0] * args.map_resolution / 100.0, 0. ] for e in range(num_scenes): local_map[e] = full_map[e, :, lmb[e, 0]:lmb[e, 1], lmb[e, 2]:lmb[e, 3]] local_pose[e] = full_pose[e] - \ torch.from_numpy(origins[e]).to(device).float() init_map_and_pose() # Global policy observation space g_observation_space = gym.spaces.Box(0, 1, (8, local_w, local_h), dtype='uint8') # Global policy action space g_action_space = gym.spaces.Box(low=0.0, high=1.0, shape=(2, ), dtype=np.float32) # Local policy observation space l_observation_space = gym.spaces.Box( 0, 255, (3, args.frame_width, args.frame_width), dtype='uint8') # Local and Global policy recurrent layer sizes l_hidden_size = args.local_hidden_size g_hidden_size = args.global_hidden_size # slam nslam_module = Neural_SLAM_Module(args).to(device) slam_optimizer = get_optimizer(nslam_module.parameters(), args.slam_optimizer) # Global policy g_policy = RL_Policy(g_observation_space.shape, g_action_space, base_kwargs={ 'recurrent': args.use_recurrent_global, 'hidden_size': g_hidden_size, 'downscaling': args.global_downscaling }).to(device) g_agent = algo.PPO(g_policy, args.clip_param, args.ppo_epoch, args.num_mini_batch, args.value_loss_coef, args.entropy_coef, lr=args.global_lr, eps=args.eps, max_grad_norm=args.max_grad_norm) # Local policy l_policy = Local_IL_Policy( l_observation_space.shape, envs.action_space.n, recurrent=args.use_recurrent_local, hidden_size=l_hidden_size, deterministic=args.use_deterministic_local).to(device) local_optimizer = get_optimizer(l_policy.parameters(), args.local_optimizer) # Storage g_rollouts = GlobalRolloutStorage(args.num_global_steps, num_scenes, g_observation_space.shape, g_action_space, g_policy.rec_state_size, 1).to(device) slam_memory = FIFOMemory(args.slam_memory_size) # Loading model if args.load_slam != "0": print("Loading slam {}".format(args.load_slam)) state_dict = torch.load(args.load_slam, map_location=lambda storage, loc: storage) nslam_module.load_state_dict(state_dict) if not args.train_slam: nslam_module.eval() if args.load_global != "0": print("Loading global {}".format(args.load_global)) state_dict = torch.load(args.load_global, map_location=lambda storage, loc: storage) g_policy.load_state_dict(state_dict) if not args.train_global: g_policy.eval() if args.load_local != "0": print("Loading local {}".format(args.load_local)) state_dict = torch.load(args.load_local, map_location=lambda storage, loc: storage) l_policy.load_state_dict(state_dict) if not args.train_local: l_policy.eval() # Predict map from frame 1: poses = torch.from_numpy( np.asarray([ infos[env_idx]['sensor_pose'] for env_idx in range(num_scenes) ])).float().to(device) _, _, local_map[:, 0, :, :], local_map[:, 1, :, :], _, local_pose = \ nslam_module(obs, obs, poses, local_map[:, 0, :, :], local_map[:, 1, :, :], local_pose) # Compute Global policy input locs = local_pose.cpu().numpy() global_input = torch.zeros(num_scenes, 8, local_w, local_h) global_orientation = torch.zeros(num_scenes, 1).long() for e in range(num_scenes): r, c = locs[e, 1], locs[e, 0] loc_r, loc_c = [ int(r * 100.0 / args.map_resolution), int(c * 100.0 / args.map_resolution) ] local_map[e, 2:, loc_r - 1:loc_r + 2, loc_c - 1:loc_c + 2] = 1. global_orientation[e] = int((locs[e, 2] + 180.0) / 5.) global_input[:, 0:4, :, :] = local_map.detach() global_input[:, 4:, :, :] = nn.MaxPool2d(args.global_downscaling)(full_map) g_rollouts.obs[0].copy_(global_input) g_rollouts.extras[0].copy_(global_orientation) # Run Global Policy (global_goals = Long-Term Goal) g_value, g_action, g_action_log_prob, g_rec_states = \ g_policy.act( g_rollouts.obs[0], g_rollouts.rec_states[0], g_rollouts.masks[0], extras=g_rollouts.extras[0], deterministic=False ) cpu_actions = nn.Sigmoid()(g_action).cpu().numpy() global_goals = [[int(action[0] * local_w), int(action[1] * local_h)] for action in cpu_actions] # Compute planner inputs planner_inputs = [{} for e in range(num_scenes)] for e, p_input in enumerate(planner_inputs): p_input['goal'] = global_goals[e] p_input['map_pred'] = global_input[e, 0, :, :].detach().cpu().numpy() p_input['exp_pred'] = global_input[e, 1, :, :].detach().cpu().numpy() p_input['pose_pred'] = planner_pose_inputs[e] # Output stores local goals as well as the the ground-truth action output = envs.get_short_term_goal(planner_inputs) last_obs = obs.detach() local_rec_states = torch.zeros(num_scenes, l_hidden_size).to(device) start = time.time() total_num_steps = -1 g_reward = 0 torch.set_grad_enabled(False) for ep_num in range(num_episodes): for step in range(args.max_episode_length): total_num_steps += 1 g_step = (step // args.num_local_steps) % args.num_global_steps eval_g_step = step // args.num_local_steps + 1 l_step = step % args.num_local_steps # ------------------------------------------------------------------ # Local Policy del last_obs last_obs = obs.detach() local_masks = l_masks local_goals = output[:, :-1].to(device).long() if args.train_local: torch.set_grad_enabled(True) action, action_prob, local_rec_states = l_policy( obs, local_rec_states, local_masks, extras=local_goals, ) if args.train_local: action_target = output[:, -1].long().to(device) policy_loss += nn.CrossEntropyLoss()(action_prob, action_target) torch.set_grad_enabled(False) l_action = action.cpu() # ------------------------------------------------------------------ # ------------------------------------------------------------------ # Env step obs, rew, done, infos = envs.step(l_action) l_masks = torch.FloatTensor([0 if x else 1 for x in done]).to(device) g_masks *= l_masks # ------------------------------------------------------------------ # ------------------------------------------------------------------ # Reinitialize variables when episode ends if step == args.max_episode_length - 1: # Last episode step init_map_and_pose() del last_obs last_obs = obs.detach() # ------------------------------------------------------------------ # ------------------------------------------------------------------ # Neural SLAM Module if args.train_slam: # Add frames to memory for env_idx in range(num_scenes): env_obs = obs[env_idx].to("cpu") env_poses = torch.from_numpy( np.asarray( infos[env_idx]['sensor_pose'])).float().to("cpu") env_gt_fp_projs = torch.from_numpy( np.asarray(infos[env_idx]['fp_proj'])).unsqueeze( 0).float().to("cpu") env_gt_fp_explored = torch.from_numpy( np.asarray(infos[env_idx]['fp_explored'])).unsqueeze( 0).float().to("cpu") env_gt_pose_err = torch.from_numpy( np.asarray( infos[env_idx]['pose_err'])).float().to("cpu") slam_memory.push( (last_obs[env_idx].cpu(), env_obs, env_poses), (env_gt_fp_projs, env_gt_fp_explored, env_gt_pose_err)) poses = torch.from_numpy( np.asarray([ infos[env_idx]['sensor_pose'] for env_idx in range(num_scenes) ])).float().to(device) _, _, local_map[:, 0, :, :], local_map[:, 1, :, :], _, local_pose = \ nslam_module(last_obs, obs, poses, local_map[:, 0, :, :], local_map[:, 1, :, :], local_pose, build_maps=True) locs = local_pose.cpu().numpy() planner_pose_inputs[:, :3] = locs + origins local_map[:, 2, :, :].fill_(0.) # Resetting current location channel for e in range(num_scenes): r, c = locs[e, 1], locs[e, 0] loc_r, loc_c = [ int(r * 100.0 / args.map_resolution), int(c * 100.0 / args.map_resolution) ] local_map[e, 2:, loc_r - 2:loc_r + 3, loc_c - 2:loc_c + 3] = 1. # ------------------------------------------------------------------ # ------------------------------------------------------------------ # Global Policy if l_step == args.num_local_steps - 1: # For every global step, update the full and local maps for e in range(num_scenes): full_map[e, :, lmb[e, 0]:lmb[e, 1], lmb[e, 2]:lmb[e, 3]] = \ local_map[e] full_pose[e] = local_pose[e] + \ torch.from_numpy(origins[e]).to(device).float() locs = full_pose[e].cpu().numpy() r, c = locs[1], locs[0] loc_r, loc_c = [ int(r * 100.0 / args.map_resolution), int(c * 100.0 / args.map_resolution) ] lmb[e] = get_local_map_boundaries( (loc_r, loc_c), (local_w, local_h), (full_w, full_h)) planner_pose_inputs[e, 3:] = lmb[e] origins[e] = [ lmb[e][2] * args.map_resolution / 100.0, lmb[e][0] * args.map_resolution / 100.0, 0. ] local_map[e] = full_map[e, :, lmb[e, 0]:lmb[e, 1], lmb[e, 2]:lmb[e, 3]] local_pose[e] = full_pose[e] - \ torch.from_numpy(origins[e]).to(device).float() locs = local_pose.cpu().numpy() for e in range(num_scenes): global_orientation[e] = int((locs[e, 2] + 180.0) / 5.) global_input[:, 0:4, :, :] = local_map global_input[:, 4:, :, :] = \ nn.MaxPool2d(args.global_downscaling)(full_map) if False: for i in range(4): ax[i].clear() ax[i].set_yticks([]) ax[i].set_xticks([]) ax[i].set_yticklabels([]) ax[i].set_xticklabels([]) ax[i].imshow(global_input.cpu().numpy()[0, 4 + i]) plt.gcf().canvas.flush_events() # plt.pause(0.1) fig.canvas.start_event_loop(0.001) plt.gcf().canvas.flush_events() # Get exploration reward and metrics g_reward = torch.from_numpy( np.asarray([ infos[env_idx]['exp_reward'] for env_idx in range(num_scenes) ])).float().to(device) if args.eval: g_reward = g_reward * 50.0 # Convert reward to area in m2 g_process_rewards += g_reward.cpu().numpy() g_total_rewards = g_process_rewards * \ (1 - g_masks.cpu().numpy()) g_process_rewards *= g_masks.cpu().numpy() per_step_g_rewards.append(np.mean(g_reward.cpu().numpy())) if np.sum(g_total_rewards) != 0: for tr in g_total_rewards: g_episode_rewards.append(tr) if tr != 0 else None if args.eval: exp_ratio = torch.from_numpy( np.asarray([ infos[env_idx]['exp_ratio'] for env_idx in range(num_scenes) ])).float() for e in range(num_scenes): explored_area_log[e, ep_num, eval_g_step - 1] = \ explored_area_log[e, ep_num, eval_g_step - 2] + \ g_reward[e].cpu().numpy() explored_ratio_log[e, ep_num, eval_g_step - 1] = \ explored_ratio_log[e, ep_num, eval_g_step - 2] + \ exp_ratio[e].cpu().numpy() # Add samples to global policy storage g_rollouts.insert(global_input, g_rec_states, g_action, g_action_log_prob, g_value, g_reward, g_masks, global_orientation) # Sample long-term goal from global policy g_value, g_action, g_action_log_prob, g_rec_states = \ g_policy.act( g_rollouts.obs[g_step + 1], g_rollouts.rec_states[g_step + 1], g_rollouts.masks[g_step + 1], extras=g_rollouts.extras[g_step + 1], deterministic=False ) cpu_actions = nn.Sigmoid()(g_action).cpu().numpy() global_goals = [[ int(action[0] * local_w), int(action[1] * local_h) ] for action in cpu_actions] g_reward = 0 g_masks = torch.ones(num_scenes).float().to(device) # ------------------------------------------------------------------ # ------------------------------------------------------------------ # Get short term goal planner_inputs = [{} for e in range(num_scenes)] for e, p_input in enumerate(planner_inputs): p_input['map_pred'] = local_map[e, 0, :, :].cpu().numpy() p_input['exp_pred'] = local_map[e, 1, :, :].cpu().numpy() p_input['pose_pred'] = planner_pose_inputs[e] p_input['goal'] = global_goals[e] output = envs.get_short_term_goal(planner_inputs) # ------------------------------------------------------------------ ### TRAINING torch.set_grad_enabled(True) # ------------------------------------------------------------------ # Train Neural SLAM Module if args.train_slam and len(slam_memory) > args.slam_batch_size: for _ in range(args.slam_iterations): inputs, outputs = slam_memory.sample(args.slam_batch_size) b_obs_last, b_obs, b_poses = inputs gt_fp_projs, gt_fp_explored, gt_pose_err = outputs b_obs = b_obs.to(device) b_obs_last = b_obs_last.to(device) b_poses = b_poses.to(device) gt_fp_projs = gt_fp_projs.to(device) gt_fp_explored = gt_fp_explored.to(device) gt_pose_err = gt_pose_err.to(device) b_proj_pred, b_fp_exp_pred, _, _, b_pose_err_pred, _ = \ nslam_module(b_obs_last, b_obs, b_poses, None, None, None, build_maps=False) loss = 0 if args.proj_loss_coeff > 0: proj_loss = F.binary_cross_entropy( b_proj_pred, gt_fp_projs) costs.append(proj_loss.item()) loss += args.proj_loss_coeff * proj_loss if args.exp_loss_coeff > 0: exp_loss = F.binary_cross_entropy( b_fp_exp_pred, gt_fp_explored) exp_costs.append(exp_loss.item()) loss += args.exp_loss_coeff * exp_loss if args.pose_loss_coeff > 0: pose_loss = torch.nn.MSELoss()(b_pose_err_pred, gt_pose_err) pose_costs.append(args.pose_loss_coeff * pose_loss.item()) loss += args.pose_loss_coeff * pose_loss if args.train_slam: slam_optimizer.zero_grad() loss.backward() slam_optimizer.step() del b_obs_last, b_obs, b_poses del gt_fp_projs, gt_fp_explored, gt_pose_err del b_proj_pred, b_fp_exp_pred, b_pose_err_pred # ------------------------------------------------------------------ # ------------------------------------------------------------------ # Train Local Policy if (l_step + 1) % args.local_policy_update_freq == 0 \ and args.train_local: local_optimizer.zero_grad() policy_loss.backward() local_optimizer.step() l_action_losses.append(policy_loss.item()) policy_loss = 0 local_rec_states = local_rec_states.detach_() # ------------------------------------------------------------------ # ------------------------------------------------------------------ # Train Global Policy if g_step % args.num_global_steps == args.num_global_steps - 1 \ and l_step == args.num_local_steps - 1: if args.train_global: g_next_value = g_policy.get_value( g_rollouts.obs[-1], g_rollouts.rec_states[-1], g_rollouts.masks[-1], extras=g_rollouts.extras[-1]).detach() g_rollouts.compute_returns(g_next_value, args.use_gae, args.gamma, args.tau) g_value_loss, g_action_loss, g_dist_entropy = \ g_agent.update(g_rollouts) g_value_losses.append(g_value_loss) g_action_losses.append(g_action_loss) g_dist_entropies.append(g_dist_entropy) g_rollouts.after_update() # ------------------------------------------------------------------ # Finish Training torch.set_grad_enabled(False) # ------------------------------------------------------------------ # ------------------------------------------------------------------ # Logging if total_num_steps % args.log_interval == 0: end = time.time() time_elapsed = time.gmtime(end - start) log = " ".join([ "Time: {0:0=2d}d".format(time_elapsed.tm_mday - 1), "{},".format(time.strftime("%Hh %Mm %Ss", time_elapsed)), "num timesteps {},".format(total_num_steps * num_scenes), "FPS {},".format(int(total_num_steps * num_scenes \ / (end - start))) ]) log += "\n\tRewards:" if len(g_episode_rewards) > 0: log += " ".join([ " Global step mean/med rew:", "{:.4f}/{:.4f},".format(np.mean(per_step_g_rewards), np.median(per_step_g_rewards)), " Global eps mean/med/min/max eps rew:", "{:.3f}/{:.3f}/{:.3f}/{:.3f},".format( np.mean(g_episode_rewards), np.median(g_episode_rewards), np.min(g_episode_rewards), np.max(g_episode_rewards)) ]) log += "\n\tLosses:" if args.train_local and len(l_action_losses) > 0: log += " ".join([ " Local Loss:", "{:.3f},".format(np.mean(l_action_losses)) ]) if args.train_global and len(g_value_losses) > 0: log += " ".join([ " Global Loss value/action/dist:", "{:.3f}/{:.3f}/{:.3f},".format( np.mean(g_value_losses), np.mean(g_action_losses), np.mean(g_dist_entropies)) ]) if args.train_slam and len(costs) > 0: log += " ".join([ " SLAM Loss proj/exp/pose:" "{:.4f}/{:.4f}/{:.4f}".format(np.mean(costs), np.mean(exp_costs), np.mean(pose_costs)) ]) print(log) logging.info(log) # ------------------------------------------------------------------ # ------------------------------------------------------------------ # Save best models if (total_num_steps * num_scenes) % args.save_interval < \ num_scenes: # Save Neural SLAM Model if len(costs) >= 1000 and np.mean(costs) < best_cost \ and not args.eval: best_cost = np.mean(costs) torch.save(nslam_module.state_dict(), os.path.join(log_dir, "model_best.slam")) # Save Local Policy Model if len(l_action_losses) >= 100 and \ (np.mean(l_action_losses) <= best_local_loss) \ and not args.eval: torch.save(l_policy.state_dict(), os.path.join(log_dir, "model_best.local")) best_local_loss = np.mean(l_action_losses) # Save Global Policy Model if len(g_episode_rewards) >= 100 and \ (np.mean(g_episode_rewards) >= best_g_reward) \ and not args.eval: torch.save(g_policy.state_dict(), os.path.join(log_dir, "model_best.global")) best_g_reward = np.mean(g_episode_rewards) # Save periodic models if (total_num_steps * num_scenes) % args.save_periodic < \ num_scenes: step = total_num_steps * num_scenes if args.train_slam: torch.save( nslam_module.state_dict(), os.path.join(dump_dir, "periodic_{}.slam".format(step))) if args.train_local: torch.save( l_policy.state_dict(), os.path.join(dump_dir, "periodic_{}.local".format(step))) if args.train_global: torch.save( g_policy.state_dict(), os.path.join(dump_dir, "periodic_{}.global".format(step))) # ------------------------------------------------------------------ # Print and save model performance numbers during evaluation if args.eval: logfile = open("{}/explored_area.txt".format(dump_dir), "w+") for e in range(num_scenes): for i in range(explored_area_log[e].shape[0]): logfile.write(str(explored_area_log[e, i]) + "\n") logfile.flush() logfile.close() logfile = open("{}/explored_ratio.txt".format(dump_dir), "w+") for e in range(num_scenes): for i in range(explored_ratio_log[e].shape[0]): logfile.write(str(explored_ratio_log[e, i]) + "\n") logfile.flush() logfile.close() log = "Final Exp Area: \n" for i in range(explored_area_log.shape[2]): log += "{:.5f}, ".format(np.mean(explored_area_log[:, :, i])) log += "\nFinal Exp Ratio: \n" for i in range(explored_ratio_log.shape[2]): log += "{:.5f}, ".format(np.mean(explored_ratio_log[:, :, i])) print(log) logging.info(log)
def build_activation(act_func, inplace=True, upscale_factor=2): if act_func == 'relu': return nn.ReLU(inplace=inplace) elif act_func == 'relu6': return nn.ReLU6(inplace=inplace) elif act_func == 'tanh': return nn.Tanh() elif act_func == 'sigmoid': return nn.Sigmoid() elif act_func == 'h_swish': return Hswish(inplace=inplace) elif act_func == 'h_sigmoid': return Hsigmoid(inplace=inplace) elif act_func == 'prelu': return nn.PReLU(inplace=inplace) elif act_func == 'lrelu': return nn.LeakyReLU(0.1, inplace=inplace) elif act_func == 'pixelshuffle': return build_pixelshuffle(upscale_factor=2) elif act_func == 'pixelshuffle+relu': return nn.Sequential( build_pixelshuffle(upscale_factor=upscale_factor), nn.ReLU(inplace=inplace) ) elif act_func == 'pixelshuffle+relu6': return nn.Sequential( build_pixelshuffle(upscale_factor=upscale_factor), nn.ReLU6(inplace=inplace) ) elif act_func == 'pixelshuffle+prelu': return nn.Sequential( build_pixelshuffle(upscale_factor=upscale_factor), nn.PReLU(inplace=inplace) ) elif act_func == 'pixelshuffle+lrelu': return nn.Sequential( build_pixelshuffle(upscale_factor=upscale_factor), nn.LeakyReLU(0.1, inplace=inplace) ) elif act_func == 'pixelunshuffle': return build_pixelunshuffle(downscale_factor=2) elif act_func == 'pixelunshuffle+relu': return nn.Sequential( build_pixelunshuffle(downscale_factor=upscale_factor), nn.ReLU(inplace=inplace) ) elif act_func == 'pixelunshuffle+relu6': return nn.Sequential( build_pixelunshuffle(downscale_factor=upscale_factor), nn.ReLU6(inplace=inplace) ) elif act_func == 'pixelunshuffle+prelu': return nn.Sequential( build_pixelunshuffle(downscale_factor=upscale_factor), nn.PReLU(inplace=inplace) ) elif act_func == 'pixelunshuffle+lrelu': return nn.Sequential( build_pixelunshuffle(downscale_factor=upscale_factor), nn.LeakyReLU(0.1, inplace=inplace) ) elif act_func is None: return None else: raise ValueError('do not support: %s' % act_func)
nn.Linear(10, 1), nn.ReLU()).to(DEVICE) elif args.cp: env = environment.CPEnvironment(DEVICE) policy = DiscretePolicy(nn.Sequential( nn.Linear(4, 5), nn.ReLU(), nn.Linear(5, 2), nn.Softmax(dim=-1))).to(DEVICE) value = nn.Sequential( nn.Linear(4, 5), nn.Sigmoid(), nn.Linear(5, 1), nn.ReLU() ).to(DEVICE) elif args.ip: env = environment.IPEnvironment(DEVICE) mean = nn.Sequential(nn.Linear(3, 5), nn.ReLU(), nn.Linear(5, 1)) std = nn.Sequential(nn.Linear(3, 5), nn.ReLU(), nn.Linear(5, 1)) policy = ContinuousPolicy(mean, std).to(DEVICE) value = nn.Sequential( nn.Linear(3, 5),
def __init__(self, output_size): super(YoloClassifier, self).__init__() self.linear = nn.Linear(in_features=4 * 4 * 1024, out_features=output_size) self.softmax = nn.Softmax() self.sigmoid = nn.Sigmoid()
def __init__(self): super(FinalLayer, self).__init__() self.fc = nn.Linear(2048, 12) self.sigmoid = nn.Sigmoid()
def __init__(self, nfeat, nhid, nout, dropout): super(GCNLink, self).__init__() self.GCN = GCN(nfeat, nhid, nout, dropout) self.distmult = nn.Parameter(torch.rand(nout)) self.sigmoid = nn.Sigmoid()
import numpy as np import pandas as pd import matplotlib.pyplot as plt import torch import torch.nn as nn from torch.autograd import Variable import torch.nn.functional as F softmax = nn.Softmax() sigmoid = nn.Sigmoid() def batch_matmul_bias(seq, weight, bias, nonlinearity=''): s = None bias_dim = bias.size() for i in range(seq.size(0)): _s = torch.mm(seq[i], weight) _s_bias = _s + bias.expand(bias_dim[0], _s.size()[0]).transpose(0,1) if(nonlinearity=='tanh'): _s_bias = torch.tanh(_s_bias) _s_bias = _s_bias.unsqueeze(0) if(s is None): s = _s_bias else: s = torch.cat((s,_s_bias),0) return s.squeeze() def batch_matmul(seq, weight, nonlinearity=''): s = None for i in range(seq.size(0)): _s = torch.mm(seq[i], weight)
def __init__(self, feedback_bits): super(Decoder, self).__init__() self.feedback_bits = feedback_bits self.dequantize = DequantizationLayer(self.B) self.conv2nums = 2 self.conv3nums = 3 self.conv4nums = 5 self.conv5nums = 3 self.multiConvs2 = nn.ModuleList() self.multiConvs3 = nn.ModuleList() self.multiConvs4 = nn.ModuleList() self.multiConvs5 = nn.ModuleList() self.fc = nn.Linear(int(feedback_bits / self.B), 1024) self.out_cov = conv3x3(2, 2) self.sig = nn.Sigmoid() self.multiConvs2.append(nn.Sequential( conv3x3(2, 64), nn.BatchNorm2d(64), nn.ReLU(), conv3x3(64, 256), nn.BatchNorm2d(256), nn.ReLU())) self.multiConvs3.append(nn.Sequential( conv3x3(256, 512), nn.BatchNorm2d(512), nn.ReLU(), conv3x3(512, 512), nn.BatchNorm2d(512), nn.ReLU())) self.multiConvs4.append(nn.Sequential( conv3x3(512, 1024), nn.BatchNorm2d(1024), nn.ReLU(), conv3x3(1024, 1024), nn.BatchNorm2d(1024), nn.ReLU())) self.multiConvs5.append(nn.Sequential( conv3x3(1024, 128), nn.BatchNorm2d(128), nn.ReLU(), conv3x3(128, 32), nn.BatchNorm2d(32), nn.ReLU(), conv3x3(32, 2), nn.BatchNorm2d(2), nn.ReLU())) for _ in range(self.conv2nums): self.multiConvs2.append(nn.Sequential( conv3x3(256, 64), nn.BatchNorm2d(64), nn.ReLU(), conv3x3(64, 64), nn.BatchNorm2d(64), nn.ReLU(), conv3x3(64, 256), nn.BatchNorm2d(256), nn.ReLU())) for _ in range(self.conv3nums): self.multiConvs3.append(nn.Sequential( conv3x3(512, 128), nn.BatchNorm2d(128), nn.ReLU(), conv3x3(128, 128), nn.BatchNorm2d(128), nn.ReLU(), conv3x3(128, 512), nn.BatchNorm2d(512), nn.ReLU())) for _ in range(self.conv4nums): self.multiConvs4.append(nn.Sequential( conv3x3(1024, 256), nn.BatchNorm2d(256), nn.ReLU(), conv3x3(256, 256), nn.BatchNorm2d(256), nn.ReLU(), conv3x3(256, 1024), nn.BatchNorm2d(1024), nn.ReLU())) for _ in range(self.conv5nums): self.multiConvs5.append(nn.Sequential( conv3x3(2, 32), nn.BatchNorm2d(32), nn.ReLU(), conv3x3(32, 32), nn.BatchNorm2d(32), nn.ReLU(), conv3x3(32, 2), nn.BatchNorm2d(2), nn.ReLU()))
complete_dataset = TurnPredictionDataset(feature_dict_list, annotations_dir, complete_path, sequence_length, prediction_length, 'test', data_select=data_set_select) complete_dataloader = DataLoader(complete_dataset, batch_size=1, shuffle=False, num_workers=0, # previously shuffle = shuffle drop_last=False, pin_memory=p_memory) feature_size_dict = complete_dataset.get_feature_size_dict() print('time taken to load data: ' + str(t.time() - t1)) complete_file_list = list(pd.read_csv(complete_path, header=None, dtype=str)[0]) lstm = torch.load('lstm_models/ling_50ms.p') ffnn = torch.load('smol_from_big.p') s = nn.Sigmoid() def find_trps(): losses_test = list() results_dict = dict() losses_dict = dict() batch_sizes = list() trp_dict = dict() distance_dict = dict() losses_mse, losses_l1 = [], [] lstm.eval() # setup results_dict results_lengths = complete_dataset.get_results_lengths() for file_name in complete_file_list: # for g_f in ['g','f']: for g_f in ['g','f']:
def __init__(self, z_dim, device=None): super(DSVAELHR, self).__init__() self.z_dim = z_dim if device is None: self.cuda = False self.device = None else: self.device = device self.cuda = True #ENCODER RESIDUAL self.e1 = nn.Conv2d(3, 64, 4, stride=2, padding=1, bias=True, padding_mode='zeros') #[b, 64, 32, 32] weights_init(self.e1) self.instance_norm_e1 = nn.InstanceNorm2d(num_features=64, affine=False) self.e2 = nn.Conv2d(64, 128, 4, stride=2, padding=1, bias=True, padding_mode='zeros') #[b, 128, 16, 16] weights_init(self.e2) self.instance_norm_e2 = nn.InstanceNorm2d(num_features=128, affine=False) self.e3 = nn.Conv2d(128, 256, 4, stride=2, padding=1, bias=True, padding_mode='zeros') #[b, 256, 8, 8] weights_init(self.e3) self.instance_norm_e3 = nn.InstanceNorm2d(num_features=256, affine=False) self.e4 = nn.Conv2d(256, 512, 4, stride=2, padding=1, bias=True, padding_mode='zeros') #[b, 512, 4, 4] weights_init(self.e4) self.instance_norm_e4 = nn.InstanceNorm2d(num_features=512, affine=False) self.fc1 = nn.Linear(512 * 4 * 4, 256) weights_init(self.fc1) self.fc_mean = nn.Linear(256, z_dim) weights_init(self.fc_mean) self.fc_var = nn.Linear(256, z_dim) weights_init(self.fc_var) #DECODER self.d1 = nn.Conv2d(3, 64, kernel_size=4, stride=2, padding=1, bias=True, padding_mode='zeros') #[b, 64, 32, 32] weights_init(self.d1) self.d2 = nn.Conv2d(64, 128, kernel_size=4, stride=2, padding=1, bias=True, padding_mode='zeros') #[b, 128, 16, 16] weights_init(self.d2) self.mu2 = nn.Linear(self.z_dim, 128 * 16 * 16) self.sig2 = nn.Linear(self.z_dim, 128 * 16 * 16) self.instance_norm_d2 = nn.InstanceNorm2d(num_features=128, affine=False) self.d3 = nn.Conv2d(128, 256, kernel_size=4, stride=2, padding=1, bias=True, padding_mode='zeros') #[b, 64, 8, 8] weights_init(self.d3) self.mu3 = nn.Linear(self.z_dim, 256 * 8 * 8) self.sig3 = nn.Linear(self.z_dim, 256 * 8 * 8) self.instance_norm_d3 = nn.InstanceNorm2d(num_features=256, affine=False) self.d4 = nn.Conv2d(256, 512, kernel_size=4, stride=2, padding=1, bias=True, padding_mode='zeros') #[b, 64, 4, 4] weights_init(self.d4) self.mu4 = nn.Linear(self.z_dim, 512 * 4 * 4) self.sig4 = nn.Linear(self.z_dim, 512 * 4 * 4) self.instance_norm_d4 = nn.InstanceNorm2d(num_features=512, affine=False) self.fc2 = nn.Linear(512 * 4 * 4, 512 * 4 * 4) weights_init(self.fc2) self.d5 = nn.ConvTranspose2d(512, 256, kernel_size=4, stride=2, padding=1) #[b, 256, 8, 8] weights_init(self.d5) self.mu5 = nn.Linear(self.z_dim, 256 * 8 * 8) self.sig5 = nn.Linear(self.z_dim, 256 * 8 * 8) self.instance_norm_d5 = nn.InstanceNorm2d(num_features=256, affine=False) self.d6 = nn.ConvTranspose2d(256, 128, kernel_size=4, stride=2, padding=1) #[b, 128, 16, 16] weights_init(self.d6) self.mu6 = nn.Linear(self.z_dim, 128 * 16 * 16) self.sig6 = nn.Linear(self.z_dim, 128 * 16 * 16) self.instance_norm_d6 = nn.InstanceNorm2d(num_features=128, affine=False) self.d7 = nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, padding=1) #[b, 64, 32, 32] weights_init(self.d7) self.mu7 = nn.Linear(self.z_dim, 64 * 32 * 32) self.sig7 = nn.Linear(self.z_dim, 64 * 32 * 32) self.instance_norm_d7 = nn.InstanceNorm2d(num_features=64, affine=False) self.d8 = nn.ConvTranspose2d(64, 32, kernel_size=4, stride=2, padding=1) #[b, 32, 64, 64] weights_init(self.d8) self.mu8 = nn.Linear(self.z_dim, 32 * 64 * 64) self.sig8 = nn.Linear(self.z_dim, 32 * 64 * 64) self.instance_norm_d8 = nn.InstanceNorm2d(num_features=32, affine=False) self.d9 = nn.ConvTranspose2d(32, 3, kernel_size=4, stride=2, padding=1) #[b, 3, 128, 128] weights_init(self.d9) self.leakyrelu = nn.LeakyReLU(0.2) self.relu = nn.ReLU() self.sigmoid = nn.Sigmoid()
def train(train_loader, model, criterion, optimizer, epoch, scheduler, source_resl, target_resl): global train_minib_counter global logger # scheduler.batch_step() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() f1_scores = AverageMeter() # switch to train mode model.train() # sigmoid for f1 calculation and illustrations m = nn.Sigmoid() end = time.time() for i, (input, target, or_resl, target_resl,img_sample) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) # permute to pytorch format input = input.permute(0,3,1,2).contiguous().float().cuda(async=True) # take only mask and boundary at first target = target[:,:,:,0:args.channels].permute(0,3,1,2).contiguous().float().cuda(async=True) input_var = torch.autograd.Variable(input) target_var = torch.autograd.Variable(target) # compute output output = model(input_var) loss = criterion(output, target_var) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() # calcuale f1 scores only on cell masks # weird pytorch numerical issue when converting to float target_f1 = (target_var.data[:,0:1,:,:]>args.ths)*1 f1_scores_batch = batch_f1_score(output = m(output.data[:,0:1,:,:]), target = target_f1, threshold=args.ths) # measure accuracy and record loss losses.update(loss.data[0], input.size(0)) f1_scores.update(f1_scores_batch, input.size(0)) # log the current lr current_lr = optimizer.state_dict()['param_groups'][0]['lr'] #============ TensorBoard logging ============# # Log the scalar values if args.tensorboard: info = { 'train_loss': losses.val, 'f1_score_train': f1_scores.val, 'train_lr': current_lr, } for tag, value in info.items(): logger.scalar_summary(tag, value, train_minib_counter) train_minib_counter += 1 if i % args.print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'F1 {f1_scores.val:.4f} ({f1_scores.avg:.4f})\t'.format( epoch,i, len(train_loader), batch_time=batch_time,data_time=data_time, loss=losses,f1_scores=f1_scores)) print(' * Avg Train Loss {loss.avg:.4f}'.format(loss=losses)) print(' * Avg F1 Score {f1_scores.avg:.4f}'.format(f1_scores=f1_scores)) return losses.avg, f1_scores.avg
def __init__(self): super(Classifier, self).__init__() self.FC = torch.nn.Sequential(nn.Linear(Z_in, 1), nn.Dropout(rate), nn.Sigmoid())
def predict(predict_loader, model,model1,model2,model3): global logger global pred_minib_counter m = nn.Sigmoid() model.eval() model1.eval() model2.eval() model3.eval() temp_df = pd.DataFrame(columns = ['ImageId','EncodedPixels']) with tqdm.tqdm(total=len(predict_loader)) as pbar: for i, (input, target, or_resl, target_resl, img_ids) in enumerate(predict_loader): # reshape to PyTorch format input = input.permute(0,3,1,2).contiguous().float().cuda(async=True) input_var = torch.autograd.Variable(input, volatile=True) # compute output output = model(input_var) output1 = model1(input_var) output2 = model2(input_var) output3 = model3(input_var) for k,(pred_mask,pred_mask1,pred_mask2,pred_mask3) in enumerate(zip(output,output1,output2,output3)): or_w = or_resl[0][k] or_h = or_resl[1][k] print(or_w,or_h) mask_predictions = [] energy_predictions = [] # for pred_msk in [pred_mask,pred_mask1,pred_mask2,pred_mask3]: for pred_msk in [pred_mask]: _,__ = calculate_energy(pred_msk,or_h,or_w) mask_predictions.append(_) energy_predictions.append(__) avg_mask = np.asarray(mask_predictions).mean(axis=0) avg_energy = np.asarray(energy_predictions).mean(axis=0) imsave('../examples/mask_{}.png'.format(img_ids[k]),avg_mask.astype('uint8')) imsave('../examples/energy_{}.png'.format(img_ids[k]),avg_energy.astype('uint8')) labels = wt_seeds(avg_mask, avg_energy, args.ths) labels_seed = cv2.applyColorMap((labels / labels.max() * 255).astype('uint8'), cv2.COLORMAP_JET) imsave('../examples/labels_{}.png'.format(img_ids[k]),labels_seed) if args.tensorboard_images: info = { 'images': to_np(input), 'labels_wt': np.expand_dims(labels_seed,axis=0), 'pred_mask_fold0': np.expand_dims(mask_predictions[0],axis=0), 'pred_mask_fold1': np.expand_dims(mask_predictions[1],axis=0), 'pred_mask_fold2': np.expand_dims(mask_predictions[2],axis=0), 'pred_mask_fold3': np.expand_dims(mask_predictions[3],axis=0), 'pred_energy_fold0': np.expand_dims(energy_predictions[0],axis=0), 'pred_energy_fold1': np.expand_dims(energy_predictions[1],axis=0), 'pred_energy_fold2': np.expand_dims(energy_predictions[2],axis=0), 'pred_energy_fold3': np.expand_dims(energy_predictions[3],axis=0), } for tag, images in info.items(): logger.image_summary(tag, images, pred_minib_counter) pred_minib_counter += 1 wt_areas = [] for j,label in enumerate(np.unique(labels)): if j == 0: # pass the background pass else: wt_areas.append((labels == label) * 1) for wt_area in wt_areas: append_df = pd.DataFrame(columns = ['ImageId','EncodedPixels']) append_df['ImageId'] = [img_ids[k]] append_df['EncodedPixels'] = [' '.join(map(str, rle_encoding(wt_area))) ] temp_df = temp_df.append(append_df) pbar.update(1) return temp_df
def forward(self, x, y): x = torch.cat((self.linear_x(x), self.linear_y(y)), dim=1) x = F.dropout(self.linear_1(x), p=0.5) return nn.Sigmoid()(self.linear_2(x))