def __init__(self): super(CeliaNet, self).__init__() self.clf = Sequential(nn.Linear(784, 30), nn.ReLU(), nn.Linear(30, 10), nn.Softmax())
def gradient(y_true, y_predicted): pass def to_categorical(x, n_col=None): if not n_col: n_col = np.amax(x) + 1 one_hot = np.zeros((x.shape[0], n_col)) one_hot[np.arange(x.shape[0]), x] = 1 return one_hot if __name__ == '__main__': from deep_learning.activations import LogSoftmax torch.manual_seed(10) softmax = nn.Softmax(dim=1) loss = nn.CrossEntropyLoss() input = torch.rand(3, 5, requires_grad=True) target = torch.empty(3, dtype=torch.long).random_(5) output = loss(input, target) output.backward() # Mine y_pred = input.detach().numpy() y_true = target.numpy() print(to_categorical(y_true, 5)) print(y_true)
import torch from torch import nn # (N, k, d) region_feats = torch.rand(5, 3, 2) convKK = nn.Conv1d(3, 3 * 3, 2, groups=3) convd = convKK(region_feats) # torch.Size([5, 9, 1]) # print(convd) convd = convd.view(5, 3, 3) # print(convd) # print(convd.shape) # dim=-1也就是从最里面的一个维度计算 activation = nn.Softmax(dim=-1) multiplier = activation(convd) # torch.Size([5, 3, 3]) print(convd) print(convd.squeeze(1))
def __init__(self, num_class, num_segments, modality, base_model='resnet101', new_length=None, consensus_type='avg', before_softmax=True, dropout=0.8, img_feature_dim=256, crop_num=1, partial_bn=True, print_spec=True, pretrain='imagenet', is_shift=False, shift_div=8, shift_place='blockres', fc_lr5=False, temporal_pool=False, non_local=False, tin=False): super(TSN, self).__init__() self.modality = modality self.num_segments = num_segments self.reshape = True self.before_softmax = before_softmax self.dropout = dropout self.crop_num = crop_num self.consensus_type = consensus_type self.img_feature_dim = img_feature_dim # the dimension of the CNN feature to represent each frame self.pretrain = pretrain self.is_shift = is_shift self.shift_div = shift_div self.shift_place = shift_place self.tin = tin self.base_model_name = base_model self.fc_lr5 = fc_lr5 self.temporal_pool = temporal_pool self.non_local = non_local if not before_softmax and consensus_type != 'avg': raise ValueError("Only avg consensus can be used after Softmax") if new_length is None: self.new_length = 1 if modality == "RGB" else 5 else: self.new_length = new_length if print_spec: print((""" Initializing TSN with base model: {}. TSN Configurations: input_modality: {} num_segments: {} new_length: {} consensus_module: {} dropout_ratio: {} img_feature_dim: {} """.format(base_model, self.modality, self.num_segments, self.new_length, consensus_type, self.dropout, self.img_feature_dim))) self._prepare_base_model(base_model) feature_dim = self._prepare_tsn(num_class) if self.modality == 'Flow': print("Converting the ImageNet model to a flow init model") self.base_model = self._construct_flow_model(self.base_model) print("Done. Flow model ready...") elif self.modality == 'RGBDiff': print("Converting the ImageNet model to RGB+Diff init model") self.base_model = self._construct_diff_model(self.base_model) print("Done. RGBDiff model ready.") self.consensus = ConsensusModule(consensus_type) if not self.before_softmax: self.softmax = nn.Softmax() self._enable_pbn = partial_bn if partial_bn: self.partialBN(True)
def __init__(self, key_dim, value_dim, device): super(Attention, self).__init__() self.device = device self.key_dim = key_dim self.value_dim = value_dim self.softmax = nn.Softmax(dim=-1)
def download_scores(val_loader, model, log_now, process_name, args): if not os.path.isdir(log_now): raise ValueError('the log dir request is not exist') file_to_save_or_load = log_now + '/' + process_name + '.pth.tar' probabilities = [] labels = [] batch_time = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to evaluate mode model.eval() end = time.time() softmax = nn.Softmax() for i, (input, target, target_loss) in enumerate(val_loader): target = target.cuda(async=True) input_var = torch.autograd.Variable(input) # compute output with torch.no_grad(): output = model(input_var) # print(output) if process_name == 'partnet': output = torch.nn.functional.normalize(output, p=1, dim=1) else: output = softmax(output) prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) for j in range(input.size(0)): # maybe here need to sub tensor to save memory. probabilities.append(output.data[j].cpu().clone()) labels.append(target[j]) top1.update(prec1[0], input.size(0)) top5.update(prec5[0], input.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() print('Test: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( 1, i, len(val_loader), batch_time=batch_time, top1=top1, top5=top5)) log = open(os.path.join(log_now, 'log.txt'), 'a') log.write("\n") log.write(process_name) log.write(" Val:epoch: %d, Top1 acc: %3f, Top5 acc: %3f" % \ (1, top1.avg, top5.avg)) log.close() torch.save({ 'scores': probabilities, 'labels': labels }, file_to_save_or_load) return probabilities, labels
def __init__( self, input_size: int, time_length: int, hidden_size: int, batch_size: int, spatial_attn_dropout11: float, spatial_attn_dropout12: float, spatial_attn_dropout2: float, gru_lstm: bool = True, num_layers: int = 1, parallel: bool = False, ): """ input size: number of underlying factors hidden_size: dimension of the hidden stats """ super().__init__() self.input_size = input_size self.hidden_size = hidden_size self.batch_size = batch_size self.gru_lstm = gru_lstm self.num_layers = num_layers self.parallel = parallel self.spatial_attn_dropout11 = nn.Dropout(spatial_attn_dropout11) self.spatial_attn_dropout12 = nn.Dropout(spatial_attn_dropout12) self.spatial_attn_dropout2 = nn.Dropout(spatial_attn_dropout2) # Softmax fix self.softmax = nn.Softmax(dim=1) # print(input_size, hidden_size) if gru_lstm: self.lstm_layer1 = nn.LSTM( input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, ) self.lstm_layer2 = nn.LSTM( input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, ) if self.parallel: self.lstm_layer3 = nn.LSTM( input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, ) else: self.gru_layer1 = nn.GRU( input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, ) self.gru_layer2 = nn.GRU( input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, ) if self.parallel: self.gru_layer3 = nn.GRU( input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, ) self.attn_linear1 = nn.Linear( in_features=self.hidden_size * 2 + time_length, out_features=1 ) if self.parallel: self.attn_linear2 = nn.Linear( in_features=self.hidden_size * 2 + 3 * time_length, out_features=1 ) self.attn_linear3 = nn.Linear( in_features=self.hidden_size * 2 + 2 * time_length, out_features=1 ) else: self.attn_linear2 = nn.Linear( in_features=self.hidden_size * 2 + 2 * time_length, out_features=1 )
def __init__(self, channel_size): super(CNN_attention, self).__init__() self.attention = nn.Conv2d(channel_size, channel_size, kernel_size=1) self.softmax = nn.Softmax(dim=-1) self._initialize_weights()
def main(args): # fix random seeds torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) np.random.seed(args.seed) device = torch.device('cuda:0' if torch.cuda.is_available() else "cpu") print(device) criterion = nn.CrossEntropyLoss() cluster_log = Logger(os.path.join(args.exp, '../../..', 'clusters.pickle')) # CNN if args.verbose: print('Architecture: {}'.format(args.arch)) ########################################## ########################################## # Model definition ########################################## ########################################## model = models.__dict__[args.arch](bn=True, num_cluster=args.nmb_cluster, num_category=args.nmb_category) fd = int(model.cluster_layer[0].weight.size()[1]) # due to transpose, fd is input dim of W (in dim, out dim) model.cluster_layer = None model.category_layer = None model.features = torch.nn.DataParallel(model.features) model = model.double() model.to(device) cudnn.benchmark = True if args.optimizer is 'Adam': print('Adam optimizer: conv') optimizer_body = torch.optim.Adam( filter(lambda x: x.requires_grad, model.parameters()), lr=args.lr_Adam, betas=(0.9, 0.999), weight_decay=10 ** args.wd, ) else: print('SGD optimizer: conv') optimizer_body = torch.optim.SGD( filter(lambda x: x.requires_grad, model.parameters()), lr=args.lr_SGD, momentum=args.momentum, weight_decay=10 ** args.wd, ) ########################################## ########################################## # category_layer ########################################## ########################################## model.category_layer = nn.Sequential( nn.Linear(fd, args.nmb_category), nn.Softmax(dim=1), ) model.category_layer[0].weight.data.normal_(0, 0.01) model.category_layer[0].bias.data.zero_() model.category_layer = model.category_layer.double() model.category_layer.to(device) if args.optimizer is 'Adam': print('Adam optimizer: conv') optimizer_category = torch.optim.Adam( filter(lambda x: x.requires_grad, model.category_layer.parameters()), lr=args.lr_Adam, betas=(0.9, 0.999), weight_decay=10 ** args.wd, ) else: print('SGD optimizer: conv') optimizer_category = torch.optim.SGD( filter(lambda x: x.requires_grad, model.category_layer.parameters()), lr=args.lr_SGD, momentum=args.momentum, weight_decay=10 ** args.wd, ) ######################################## ######################################## # Create echogram sampling index ######################################## ######################################## print('Sample echograms.') dataset_cp, dataset_semi = sampling_echograms_full(args) dataloader_semi = torch.utils.data.DataLoader(dataset_semi, shuffle=False, batch_size=args.batch, num_workers=args.workers, drop_last=False, pin_memory=True) dataset_test = sampling_echograms_test(args) dataloader_test = torch.utils.data.DataLoader(dataset_test, shuffle=False, batch_size=args.batch, num_workers=args.workers, drop_last=False, pin_memory=True) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] # remove top located layer parameters from checkpoint copy_checkpoint_state_dict = checkpoint['state_dict'].copy() for key in list(copy_checkpoint_state_dict): if 'cluster_layer' in key: del copy_checkpoint_state_dict[key] # if 'category_layer' in key: # del copy_checkpoint_state_dict[key] checkpoint['state_dict'] = copy_checkpoint_state_dict model.load_state_dict(checkpoint['state_dict']) optimizer_body.load_state_dict(checkpoint['optimizer_body']) optimizer_category.load_state_dict(checkpoint['optimizer_category']) category_save = os.path.join(args.exp, '../../..', 'category_layer.pth.tar') if os.path.isfile(category_save): category_layer_param = torch.load(category_save) model.category_layer.load_state_dict(category_layer_param) print("=> loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) # creating checkpoint repo exp_check = os.path.join(args.exp, '../../..', 'checkpoints') if not os.path.isdir(exp_check): os.makedirs(exp_check) ############################ ############################ # PRETRAIN ############################ ############################ if args.start_epoch < args.pretrain_epoch: if os.path.isfile(os.path.join(args.exp, '../../..', 'pretrain_loss_collect.pickle')): with open(os.path.join(args.exp, '../../..', 'pretrain_loss_collect.pickle'), "rb") as f: pretrain_loss_collect = pickle.load(f) else: pretrain_loss_collect = [[], [], [], [], []] print('Start pretraining with %d percent of the dataset from epoch %d/(%d)' % (int(args.semi_ratio * 100), args.start_epoch, args.pretrain_epoch)) model.cluster_layer = None for epoch in range(args.start_epoch, args.pretrain_epoch): with torch.autograd.set_detect_anomaly(True): pre_loss, pre_accuracy = supervised_train(loader=dataloader_semi, model=model, crit=criterion, opt_body=optimizer_body, opt_category=optimizer_category, epoch=epoch, device=device, args=args) test_loss, test_accuracy = test(dataloader_test, model, criterion, device, args) # print log if args.verbose: print('###### Epoch [{0}] ###### \n' 'PRETRAIN tr_loss: {1:.3f} \n' 'TEST loss: {2:.3f} \n' 'PRETRAIN tr_accu: {3:.3f} \n' 'TEST accu: {4:.3f} \n'.format(epoch, pre_loss, test_loss, pre_accuracy, test_accuracy)) pretrain_loss_collect[0].append(epoch) pretrain_loss_collect[1].append(pre_loss) pretrain_loss_collect[2].append(test_loss) pretrain_loss_collect[3].append(pre_accuracy) pretrain_loss_collect[4].append(test_accuracy) torch.save({'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'optimizer_body': optimizer_body.state_dict(), 'optimizer_category': optimizer_category.state_dict(), }, os.path.join(args.exp, '../../..', 'checkpoint.pth.tar')) torch.save(model.category_layer.state_dict(), os.path.join(args.exp, '../../..', 'category_layer.pth.tar')) with open(os.path.join(args.exp, '../../..', 'pretrain_loss_collect.pickle'), "wb") as f: pickle.dump(pretrain_loss_collect, f) if (epoch+1) % args.checkpoints == 0: path = os.path.join( args.exp, '../../..', 'checkpoints', 'checkpoint_' + str(epoch) + '.pth.tar', ) if args.verbose: print('Save checkpoint at: {0}'.format(path)) torch.save({'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'optimizer_body': optimizer_body.state_dict(), 'optimizer_category': optimizer_category.state_dict(), }, path)
from torch.backends import cudnn from torch2trt import torch2trt from torchvision.transforms import transforms as T from myresnet import resnet50 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') torch.set_grad_enabled(False) cudnn.benchmark = True transform = T.Compose([ T.Resize(size=(256, 256)), T.ToTensor(), T.Normalize([0.56687369, 0.44000871, 0.39886727], [0.2415682, 0.2131414, 0.19494878]) ]) soft_max = nn.Softmax() masked_dic = {0: "unmasked", 1: "masked"} def main(image, weight): model = resnet50(num_classes=2) model = model.load_state_dict(torch.load(weight)) model.eval() model = model.to(device) im = cv2.imread(image) im = im[:, :, ::-1] im = Image.fromarray(cv2.cvrt(im, cv2.COLOR_BGR2RGB)) im = transform(im) im = torch.from_numpy(im).unsqueeze(0) im = im.to(device) tic = time.time()
def train(model, train_loader, val_loader, optimizer, num_epochs, path_to_save_best_weights): model.train() log_softmax = nn.LogSoftmax(dim=1) # Use for NLLLoss() softmax = nn.Softmax(dim=1) # weights = [1.0,1.0,1.0,1.0,1.0, 0.0] # class_weights = torch.FloatTensor(weights).to(device) criterion_nlloss = nn.NLLLoss() #(weight=class_weights) metrics_evaluator = PerformanceMetricsEvaluator() to_tensor = transforms.ToTensor() writer = SummaryWriter('runs/unet') since = time.time() best_model_weights = model.state_dict() best_IoU = 0.0 best_val_loss = 1000000000 curr_val_loss = 0.0 curr_training_loss = 0.0 curr_training_IoU = 0.0 curr_val_IoU = 0.0 for epoch in range(num_epochs): print('Epoch {}/{}'.format(epoch, num_epochs - 1)) print('-' * 10) for phase in ['train', 'val']: if phase == 'train': # scheduler.step(best_val_loss) model.train() data_loader = train_loader else: model.eval() data_loader = val_loader running_loss = 0.0 running_IoU = 0 # Iterate over data. ind = 0 for imgs, masks in tqdm(data_loader): imgs = imgs.to(device) masks = masks.to(device) # zero the parameter gradients optimizer.zero_grad() # forward logits = model(imgs) log_softmax_logits = log_softmax(logits) loss = criterion_nlloss(log_softmax_logits, masks) # backward + optimize only if in training phase if phase == 'train': loss.backward() optimizer.step() # ================================================================== # # Tensorboard Logging # # ================================================================== # unet_softmax_collupsed = softmax(logits) unet_softmax_collupsed = np.argmax( unet_softmax_collupsed.detach().cpu(), axis=1) if ind % 10 == 0: if phase == 'val': img_name = 'ValidationEpoch: {}'.format(str(epoch)) else: img_name = 'TrainingEpoch: {}'.format(str(epoch)) rgb_prediction = unet_softmax_collupsed.repeat(3, 1, 1).float() rgb_prediction = np.moveaxis(rgb_prediction.numpy(), 0, -1) converted_img = img_to_visible(rgb_prediction) converted_img = torch.unsqueeze(to_tensor(converted_img), 0) # converted_img = np.moveaxis(converted_img, -1, 0) masks_changed = masks.detach().cpu() masks_changed = masks_changed.repeat(3, 1, 1).float() masks_changed = np.moveaxis(masks_changed.numpy(), 0, -1) masks_changed = img_to_visible(masks_changed) masks_changed = torch.unsqueeze(to_tensor(masks_changed), 0) # print(np.unique(converted_img, return_counts=True)) third_tensor = torch.cat( (converted_img, imgs.detach().cpu(), masks_changed), -1) writer.add_image( img_name, # vutils.make_grid([ # imgs.detach().cpu(), # rgb_prediction third_tensor, # ]), epoch) # statistics running_loss += loss.detach().item() running_IoU += metrics_evaluator.mean_IU( unet_softmax_collupsed.numpy()[0], masks.cpu().numpy()[0]) ind += 1 epoch_loss = running_loss / len(data_loader) epoch_IoU = running_IoU / len(data_loader) print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_IoU)) # deep copy the model if phase == 'val' and epoch_loss < best_val_loss: # TODO add IoU best_val_loss = epoch_loss best_IoU = epoch_IoU best_model_weights = model.state_dict() if phase == 'val': # print(optimizer.param_groups[0]['lr']) curr_val_loss = epoch_loss curr_val_IoU = epoch_IoU else: curr_training_loss = epoch_loss curr_training_IoU = epoch_IoU writer.add_scalars('TrainValIoU', { 'trainIoU': curr_training_IoU, 'validationIoU': curr_val_IoU }, epoch) writer.add_scalars('TrainValLoss', { 'trainLoss': curr_training_loss, 'validationLoss': curr_val_loss }, epoch) # Saving best model torch.save( best_model_weights, os.path.join(path_to_save_best_weights, 'unet{:2f}.pth'.format(best_val_loss))) # Show the timing and final statistics time_elapsed = time.time() - since print('Training complete in {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60)) print('Best val Loss: {:4f}'.format(best_val_loss)) # TODO add IoU
def __init__(self, hidden_dim=10, parts=2): super(Decomp_att, self).__init__() self.conv_fh = nn.Conv2d(hidden_dim, parts+1, kernel_size=1, padding=0, stride=1, bias=True) self.softmax= nn.Softmax(dim=1)
def __init__(self, first_arg=None, second_arg=None, output=None, instruction=None, initial_registers=None, ir=None, stop_threshold=0.9, multiplier=5, correctness_weight=.2, halting_weight=.2, confidence_weight=.2, efficiency_weight=.4, diversity_weight=0, optimize=False, mix_probabilities=False, t_max=75): """ Initialize a bunch of constants and pass in matrices defining a program. :param first_arg: Matrix with the 1st register argument for each timestep stored in the columns (RxM) :param second_arg: Matrix with the 2nd register argument for each timestep stored in the columns (RxM) :param output: Matrix with the output register for each timestep stored in the columns (RxM) :param instruction: Matrix with the instruction for each timestep stored in the columns (NxM) :param initial_registers: Matrix where each row is a distribution over the value in one register (RxM) :param stop_threshold: The stop probability threshold at which the controller should stop running :param multiplier: The factor our vectors are be multiplied by before they're softmaxed to add blur :param correctness_weight: Weight given to the correctness component of the loss function :param halting_weight: Weight given to the halting component of the loss function :param confidence_weight: Weight given to the confidence component of the loss function :param efficiency_weight: Weight given to the efficiency component of the loss function :param optimize: Whether the ANC should optimize or not :param t_max: Maximum number of iterations of the program """ super(Controller, self).__init__() # Initialize dimension constants R, M = initial_registers.size() self.M = M self.R = R self.times = [] # Initialize loss function weights # In the ANC paper, these scalars are called, alpha, beta, gamma, and delta self.correctness_weight = correctness_weight self.halting_weight = halting_weight self.confidence_weight = confidence_weight self.efficiency_weight = efficiency_weight self.diversity_weight = diversity_weight # And yet more initialized constants... yeah, there are a bunch, I know. self.t_max = t_max self.stop_threshold = stop_threshold self.multiplier = multiplier self.mix_probabilities = mix_probabilities self.optimize = optimize if ir is None: IR = torch.zeros(M) IR[0] = 1 else: IR = ir if optimize: # Initialize parameters. These are the things that are going to be optimized. self.first_arg = nn.Parameter(multiplier * first_arg) self.second_arg = nn.Parameter(multiplier * second_arg) self.output = nn.Parameter(multiplier * output) self.instruction = nn.Parameter(multiplier * instruction) self.registers = nn.Parameter(multiplier * initial_registers) self.IR = nn.Parameter(multiplier * IR) else: self.first_arg = multiplier * first_arg self.second_arg = multiplier * second_arg self.output = multiplier * output self.instruction = multiplier * instruction self.registers = multiplier * initial_registers self.register_buffer('IR', multiplier * IR) self.register_buffer('initial_stop_probability', torch.zeros(1)) # Machine initialization self.machine = Machine(M, R) self.softmax = nn.Softmax(0)
def forward_train(self, input, output): """ Runs the controller on a certain input memory matrix. It returns the loss. :param initial_memory: The state of memory at the beginning of the program. :param output: A tuple (output_memory, output_mask): output_memory: The desired state of memory at the end of the program. output_mask: The parts of the output memory that are relevant. :return: Returns the training loss. """ initial_memory = input output_memory = output[0] output_mask = output[1] self.memory = Variable(initial_memory) self.output_memory = Variable(output_memory) self.output_mask = Variable(output_mask) self.stop_probability = Variable(self.initial_stop_probability) # Copy registers so we aren't using the values from the previous iteration. Also # make both registers and IR into a probability distribution. registers = nn.Softmax(1)(self.registers) IR = self.softmax(self.IR) if self.mix_probabilities: first_arg = self.softmax(self.first_arg) second_arg = self.softmax(self.second_arg) output = self.softmax(self.output) instruction = self.softmax(self.instruction) # loss initialization self.confidence = 0 self.efficiency = 1 self.halting = 0 self.correctness = 0 self.diversity = 0 t = 0 # Run the program, one timestep at a time, until the program terminates or whe time out while t < self.t_max and float( self.stop_probability) < self.stop_threshold: if self.mix_probabilities: a = torch.matmul(first_arg, IR) b = torch.matmul(second_arg, IR) o = torch.matmul(output, IR) e = torch.matmul(instruction, IR) else: a = self.softmax(torch.matmul(self.first_arg, IR)) b = self.softmax(torch.matmul(self.second_arg, IR)) o = self.softmax(torch.matmul(self.output, IR)) e = self.softmax(torch.matmul(self.instruction, IR)) # Update memory, registers, and IR after machine operation self.old_stop_probability = self.stop_probability self.memory, registers, IR, new_stop_prob = self.machine( e, a, b, o, self.memory, registers, IR) self.stop_probability = self.stop_probability + ( new_stop_prob * (1 - self.stop_probability)) self.timestep_loss(t) t += 1 self.final_loss(t) self.times.append(t) return self.total_loss()
def sample_cluster(self, dist, centroid): indices = (torch.topk(nn.Softmax(dim=-1)(dist), k=1)[1]).squeeze(1) return centroid[0, list(indices.squeeze(1))].unsqueeze(1)
txt_COVID='./dataset/image/classfication/Data-split/COVID/testCT_COVID.txt', txt_NonCOVID='./dataset/image/classfication/Data-split/NonCOVID/testCT_NonCOVID.txt', transform=val_transformer) # Batch Data Loader train_loader = DataLoader(trainset, batch_size=args.batch_size, drop_last=False, shuffle=True) val_loader = DataLoader(valset, batch_size=args.batch_size, drop_last=False, shuffle=False) test_loader = DataLoader(testset, batch_size=args.batch_size, drop_last=False, shuffle=False) # PreTrain Model: Densenet169 model = models.densenet169(pretrained=True).to(device) pretrained_net = torch.load('./model/pretrain/classification/Self-Trans.pt', map_location=device) model.load_state_dict(pretrained_net) # Change output Num of Class model.classifier = nn.Sequential(nn.Linear(1664, 2), nn.Softmax(dim=1)).to(device) # Model Train & Validation early_stopping = Utils.EarlyStopping(patience=10, delta=0) valid_losses = [] best_loss = np.Inf # train bs = args.batch_size votenum = 1 warnings.filterwarnings('ignore') optimizer = optim.Adam(model.parameters(), lr=1e-5) scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10)
def sample_cluster_multinomial(self, dist, centroid): m = torch.distributions.multinomial.Multinomial( total_count=self.num_clusters, probs=nn.Softmax(dim=-1)(dist)) indices = torch.multinomial(nn.Softmax(dim=-1)(dist)[:, 0], 1) return centroid[0, list(indices)].unsqueeze(1)
def forward(self, Q, K, V, attn_mask): scores = torch.matmul(Q, K.transpose(-1, -2)) / np.sqrt(d_k) # scores : [batch_size x n_heads x len_q(=len_k) x len_k(=len_q)] scores.masked_fill_(attn_mask, -1e9) # Fills elements of self tensor with value where mask is one. attn = nn.Softmax(dim=-1)(scores) context = torch.matmul(attn, V) return context, attn
def __init__(self, d_model, attn_dropout=0.1): super(ScaledDotProductAttention, self).__init__() self.temper = np.power(d_model, 0.5) self.dropout = nn.Dropout(attn_dropout) self.softmax = nn.Softmax(dim=2)
def test(cfg, output_dir='', output_dir_merge='', output_dir_save=''): logger = logging.getLogger('shaper.test') # build model model, loss_fn, _, val_metric = build_model(cfg) model = nn.DataParallel(model).cuda() model_merge = nn.DataParallel(PointNetCls(in_channels=3, out_channels=128)).cuda() # build checkpointer checkpointer = Checkpointer(model, save_dir=output_dir, logger=logger) checkpointer_merge = Checkpointer(model_merge, save_dir=output_dir_merge, logger=logger) if cfg.TEST.WEIGHT: # load weight if specified weight_path = cfg.TEST.WEIGHT.replace('@', output_dir) checkpointer.load(weight_path, resume=False) else: # load last checkpoint checkpointer.load(None, resume=True) checkpointer_merge.load(None, resume=True) #checkpointer_refine.load(None, resume=True) # build data loader test_dataloader = build_dataloader(cfg, mode='test') test_dataset = test_dataloader.dataset assert cfg.TEST.BATCH_SIZE == 1, '{} != 1'.format(cfg.TEST.BATCH_SIZE) save_fig_dir = osp.join(output_dir_save, 'test_fig') os.makedirs(save_fig_dir, exist_ok=True) save_fig_dir_size = osp.join(save_fig_dir, 'size') os.makedirs(save_fig_dir_size, exist_ok=True) save_fig_dir_gt = osp.join(save_fig_dir, 'gt') os.makedirs(save_fig_dir_gt, exist_ok=True) # ---------------------------------------------------------------------------- # # Test # ---------------------------------------------------------------------------- # model.eval() model_merge.eval() loss_fn.eval() softmax = nn.Softmax() set_random_seed(cfg.RNG_SEED) NUM_POINT = 10000 n_shape = len(test_dataloader) NUM_INS = 200 out_mask = np.zeros((n_shape, NUM_INS, NUM_POINT), dtype=np.bool) out_valid = np.zeros((n_shape, NUM_INS), dtype=np.bool) out_conf = np.ones((n_shape, NUM_INS), dtype=np.float32) meters = MetricLogger(delimiter=' ') meters.bind(val_metric) tot_purity_error_list = list() tot_purity_error_small_list = list() tot_purity_error_large_list = list() tot_pred_acc = list() tot_pred_small_acc = list() tot_pred_large_acc = list() tot_mean_rela_size_list = list() tot_mean_policy_label0 = list() tot_mean_label_policy0 = list() tot_mean_policy_label0_large = list() tot_mean_policy_label0_small = list() tot_mean_label_policy0_large = list() tot_mean_label_policy0_small = list() with torch.no_grad(): start_time = time.time() end = start_time for iteration, data_batch in enumerate(test_dataloader): print("batch: ", iteration) data_time = time.time() - end iter_start_time = time.time() data_batch = { k: v.cuda(non_blocking=True) for k, v in data_batch.items() } preds = model(data_batch) loss_dict = loss_fn(preds, data_batch) meters.update(**loss_dict) val_metric.update_dict(preds, data_batch) #extraction box features batch_size, _, num_centroids, num_neighbours = data_batch[ 'neighbour_xyz'].shape num_points = data_batch['points'].shape[-1] #batch_size, num_centroid, num_neighbor _, p = torch.max(preds['ins_logit'], 1) box_index_expand = torch.zeros( (batch_size * num_centroids, num_points)).cuda() box_index_expand = box_index_expand.scatter_( dim=1, index=data_batch['neighbour_index'].reshape( [-1, num_neighbours]), src=p.reshape([-1, num_neighbours]).float()) #centroid_label = data_batch['centroid_label'].reshape(-1) minimum_box_pc_num = 16 minimum_overlap_pc_num = 16 #1/16 * num_neighbour gtmin_mask = (torch.sum(box_index_expand, dim=-1) > minimum_box_pc_num) #remove purity < 0.8 box_label_expand = torch.zeros( (batch_size * num_centroids, 200)).cuda() purity_pred = torch.zeros([0]).type(torch.LongTensor).cuda() purity_pred_float = torch.zeros([0]).type(torch.FloatTensor).cuda() for i in range(batch_size): cur_xyz_pool, xyz_mean = mask_to_xyz( data_batch['points'][i], box_index_expand.view(batch_size, num_centroids, num_points)[i], sample_num=512) cur_xyz_pool -= xyz_mean cur_xyz_pool /= (cur_xyz_pool + 1e-6).norm(dim=1).max( dim=-1)[0].unsqueeze(-1).unsqueeze(-1) logits_purity = model_merge(cur_xyz_pool, 'purity') p = (logits_purity > 0.8).long().squeeze() purity_pred = torch.cat([purity_pred, p]) purity_pred_float = torch.cat( [purity_pred_float, logits_purity.squeeze()]) p_thresh = 0.8 purity_pred = purity_pred_float > p_thresh #in case remove too much while (torch.sum(purity_pred) < 48): p_thresh = p_thresh - 0.01 purity_pred = purity_pred_float > p_thresh valid_mask = gtmin_mask.long() * purity_pred.long() box_index_expand = torch.index_select( box_index_expand, dim=0, index=valid_mask.nonzero().squeeze()) box_num = torch.sum(valid_mask.reshape(batch_size, num_centroids), 1) cumsum_box_num = torch.cumsum(box_num, dim=0) cumsum_box_num = torch.cat([ torch.from_numpy(np.array(0)).cuda().unsqueeze(0), cumsum_box_num ], dim=0) with torch.no_grad(): pc_all = data_batch['points'] xyz_pool1 = torch.zeros([0, 3, 1024]).float().cuda() xyz_pool2 = torch.zeros([0, 3, 1024]).float().cuda() label_pool = torch.zeros([0]).float().cuda() for i in range(pc_all.shape[0]): bs = 1 pc = pc_all[i].clone() cur_mask_pool = box_index_expand[ cumsum_box_num[i]:cumsum_box_num[i + 1]].clone() cover_ratio = torch.unique( cur_mask_pool.nonzero()[:, 1]).shape[0] / num_points #print(iteration, cover_ratio) cur_xyz_pool, xyz_mean = mask_to_xyz(pc, cur_mask_pool) subpart_pool = cur_xyz_pool.clone() subpart_mask_pool = cur_mask_pool.clone() init_pool_size = cur_xyz_pool.shape[0] meters.update(cover_ratio=cover_ratio, init_pool_size=init_pool_size) negative_num = 0 positive_num = 0 #remove I inter_matrix = torch.matmul(cur_mask_pool, cur_mask_pool.transpose(0, 1)) inter_matrix_full = inter_matrix.clone( ) > minimum_overlap_pc_num inter_matrix[torch.eye(inter_matrix.shape[0]).byte()] = 0 pair_idx = (inter_matrix.triu() > minimum_overlap_pc_num).nonzero() zero_pair = torch.ones([0, 2]).long() purity_matrix = torch.zeros(inter_matrix.shape).cuda() policy_matrix = torch.zeros(inter_matrix.shape).cuda() bsp = 64 idx = torch.arange(pair_idx.shape[0]).cuda() #calculate initial policy score matrix purity_pool = torch.zeros([0]).float().cuda() policy_pool = torch.zeros([0]).float().cuda() for k in range(int(np.ceil(idx.shape[0] / bsp))): sub_part_idx = torch.index_select( pair_idx, dim=0, index=idx[k * bsp:(k + 1) * bsp]) part_xyz1 = torch.index_select(cur_xyz_pool, dim=0, index=sub_part_idx[:, 0]) part_xyz2 = torch.index_select(cur_xyz_pool, dim=0, index=sub_part_idx[:, 1]) part_xyz = torch.cat([part_xyz1, part_xyz2], -1) part_xyz -= torch.mean(part_xyz, -1).unsqueeze(-1) part_norm = part_xyz.norm(dim=1).max( dim=-1)[0].unsqueeze(-1).unsqueeze(-1) part_xyz /= part_norm logits_purity = model_merge(part_xyz, 'purity').squeeze() if len(logits_purity.shape) == 0: logits_purity = logits_purity.unsqueeze(0) purity_pool = torch.cat([purity_pool, logits_purity], dim=0) part_xyz11 = part_xyz1 - torch.mean(part_xyz1, -1).unsqueeze(-1) part_xyz22 = part_xyz2 - torch.mean(part_xyz2, -1).unsqueeze(-1) part_xyz11 /= part_norm part_xyz22 /= part_norm logits11 = model_merge(part_xyz11, 'policy') logits22 = model_merge(part_xyz22, 'policy') policy_scores = model_merge( torch.cat([logits11, logits22], dim=-1), 'policy_head').squeeze() if len(policy_scores.shape) == 0: policy_scores = policy_scores.unsqueeze(0) policy_pool = torch.cat([policy_pool, policy_scores], dim=0) purity_matrix[pair_idx[:, 0], pair_idx[:, 1]] = purity_pool policy_matrix[pair_idx[:, 0], pair_idx[:, 1]] = policy_pool score_matrix = torch.zeros(purity_matrix.shape).cuda() score_matrix[pair_idx[:, 0], pair_idx[:, 1]] = softmax( purity_pool * policy_pool) meters.update(initial_pair_num=pair_idx.shape[0]) iteration_num = 0 remote_flag = False #info policy_list = [] purity_list = [] gt_purity_list = [] gt_label_list = [] pred_label_list = [] size_list = [] relative_size_list = [] while (pair_idx.shape[0] > 0) or (remote_flag == False): if pair_idx.shape[0] == 0: remote_flag = True inter_matrix = 20 * torch.ones([ cur_mask_pool.shape[0], cur_mask_pool.shape[0] ]).cuda() inter_matrix[zero_pair[:, 0], zero_pair[:, 1]] = 0 inter_matrix[torch.eye( inter_matrix.shape[0]).byte()] = 0 pair_idx = (inter_matrix.triu() > minimum_overlap_pc_num).nonzero() if pair_idx.shape[0] == 0: break purity_matrix = torch.zeros( inter_matrix.shape).cuda() policy_matrix = torch.zeros( inter_matrix.shape).cuda() bsp = 64 idx = torch.arange(pair_idx.shape[0]).cuda() purity_pool = torch.zeros([0]).float().cuda() policy_pool = torch.zeros([0]).float().cuda() for k in range(int(np.ceil(idx.shape[0] / bsp))): sub_part_idx = torch.index_select( pair_idx, dim=0, index=idx[k * bsp:(k + 1) * bsp]) part_xyz1 = torch.index_select( cur_xyz_pool, dim=0, index=sub_part_idx[:, 0]) part_xyz2 = torch.index_select( cur_xyz_pool, dim=0, index=sub_part_idx[:, 1]) part_xyz = torch.cat([part_xyz1, part_xyz2], -1) part_xyz -= torch.mean(part_xyz, -1).unsqueeze(-1) part_norm = part_xyz.norm(dim=1).max( dim=-1)[0].unsqueeze(-1).unsqueeze(-1) part_xyz /= part_norm logits_purity = model_merge( part_xyz, 'purity').squeeze() if len(logits_purity.shape) == 0: logits_purity = logits_purity.unsqueeze(0) purity_pool = torch.cat( [purity_pool, logits_purity], dim=0) part_xyz11 = part_xyz1 - torch.mean( part_xyz1, -1).unsqueeze(-1) part_xyz22 = part_xyz2 - torch.mean( part_xyz2, -1).unsqueeze(-1) part_xyz11 /= part_norm part_xyz22 /= part_norm logits11 = model_merge(part_xyz11, 'policy') logits22 = model_merge(part_xyz22, 'policy') policy_scores = model_merge( torch.cat([logits11, logits22], dim=-1), 'policy_head').squeeze() if len(policy_scores.shape) == 0: policy_scores = policy_scores.unsqueeze(0) policy_pool = torch.cat( [policy_pool, policy_scores], dim=0) purity_matrix[pair_idx[:, 0], pair_idx[:, 1]] = purity_pool policy_matrix[pair_idx[:, 0], pair_idx[:, 1]] = policy_pool score_matrix = torch.zeros( purity_matrix.shape).cuda() score_matrix[pair_idx[:, 0], pair_idx[:, 1]] = softmax( purity_pool * policy_pool) iteration_num += 1 #everytime select the pair with highest score score_arr = score_matrix[pair_idx[:, 0], pair_idx[:, 1]] highest_score, rank_idx = torch.topk(score_arr, 1, largest=True, sorted=False) perm_idx = rank_idx assert highest_score == score_matrix[pair_idx[rank_idx, 0], pair_idx[rank_idx, 1]] sub_part_idx = torch.index_select(pair_idx, dim=0, index=perm_idx[:bs]) purity_score = purity_matrix[sub_part_idx[:, 0], sub_part_idx[:, 1]] policy_score = policy_matrix[sub_part_idx[:, 0], sub_part_idx[:, 1]] #info policy_list.append(policy_score.cpu().data.numpy()[0]) purity_list.append(purity_score.cpu().data.numpy()[0]) part_xyz1 = torch.index_select(cur_xyz_pool, dim=0, index=sub_part_idx[:, 0]) part_xyz2 = torch.index_select(cur_xyz_pool, dim=0, index=sub_part_idx[:, 1]) part_xyz = torch.cat([part_xyz1, part_xyz2], -1) part_xyz -= torch.mean(part_xyz, -1).unsqueeze(-1) part_xyz1 -= torch.mean(part_xyz1, -1).unsqueeze(-1) part_xyz2 -= torch.mean(part_xyz2, -1).unsqueeze(-1) part_xyz1 /= part_xyz1.norm(dim=1).max( dim=-1)[0].unsqueeze(-1).unsqueeze(-1) part_xyz2 /= part_xyz2.norm(dim=1).max( dim=-1)[0].unsqueeze(-1).unsqueeze(-1) part_xyz /= part_xyz.norm(dim=1).max( dim=-1)[0].unsqueeze(-1).unsqueeze(-1) part_mask11 = torch.index_select(cur_mask_pool, dim=0, index=sub_part_idx[:, 0]) part_mask22 = torch.index_select(cur_mask_pool, dim=0, index=sub_part_idx[:, 1]) context_idx1 = torch.index_select( inter_matrix_full, dim=0, index=sub_part_idx[:, 0]) context_idx2 = torch.index_select( inter_matrix_full, dim=0, index=sub_part_idx[:, 1]) context_mask1 = (torch.matmul( context_idx1.float(), cur_mask_pool) > 0).float() context_mask2 = (torch.matmul( context_idx2.float(), cur_mask_pool) > 0).float() context_mask = ((context_mask1 + context_mask2) > 0).float() context_xyz, xyz_mean = mask_to_xyz(pc, context_mask, sample_num=2048) context_xyz = context_xyz - xyz_mean context_xyz /= context_xyz.norm(dim=1).max( dim=-1)[0].unsqueeze(-1).unsqueeze(-1) if cfg.DATASET.PartNetInsSeg.TEST.shape not in [ 'Chair', 'Lamp', 'StorageFurniture' ]: logits1 = model_merge(part_xyz1, 'backbone') logits2 = model_merge(part_xyz2, 'backbone') merge_logits = model_merge( torch.cat([ part_xyz, torch.cat([ logits1.unsqueeze(-1).expand( -1, -1, part_xyz1.shape[-1]), logits2.unsqueeze(-1).expand( -1, -1, part_xyz2.shape[-1]) ], dim=-1) ], dim=1), 'head') else: if (cur_xyz_pool.shape[0] >= 32): logits1 = model_merge(part_xyz1, 'backbone') logits2 = model_merge(part_xyz2, 'backbone') merge_logits = model_merge( torch.cat([ part_xyz, torch.cat([ logits1.unsqueeze(-1).expand( -1, -1, part_xyz1.shape[-1]), logits2.unsqueeze(-1).expand( -1, -1, part_xyz2.shape[-1]) ], dim=-1) ], dim=1), 'head') else: logits1 = model_merge(part_xyz1, 'backbone') logits2 = model_merge(part_xyz2, 'backbone') context_logits = model_merge( context_xyz, 'backbone2') merge_logits = model_merge( torch.cat([ part_xyz, torch.cat([ logits1.unsqueeze(-1).expand( -1, -1, part_xyz1.shape[-1]), logits2.unsqueeze(-1).expand( -1, -1, part_xyz2.shape[-1]) ], dim=-1), torch.cat([ context_logits.unsqueeze(-1). expand(-1, -1, part_xyz.shape[-1]) ], dim=-1) ], dim=1), 'head2') _, p = torch.max(merge_logits, 1) if not remote_flag: siamese_label = p * ( (purity_score > p_thresh).long()) else: siamese_label = p siamese_label = p * ((purity_score > p_thresh).long()) negative_num += torch.sum(siamese_label == 0) positive_num += torch.sum(siamese_label == 1) pred_label_list.append( siamese_label.cpu().data.numpy()) #info new_part_mask = 1 - (1 - part_mask11) * (1 - part_mask22) size_list.append( torch.sum(new_part_mask).cpu().data.numpy()) size1 = torch.sum(part_mask11).cpu().data.numpy() size2 = torch.sum(part_mask22).cpu().data.numpy() relative_size_list.append(size1 / size2 + size2 / size1) #update info merge_idx1 = torch.index_select( sub_part_idx[:, 0], dim=0, index=siamese_label.nonzero().squeeze()) merge_idx2 = torch.index_select( sub_part_idx[:, 1], dim=0, index=siamese_label.nonzero().squeeze()) merge_idx = torch.unique( torch.cat([merge_idx1, merge_idx2], dim=0)) nonmerge_idx1 = torch.index_select( sub_part_idx[:, 0], dim=0, index=(1 - siamese_label).nonzero().squeeze()) nonmerge_idx2 = torch.index_select( sub_part_idx[:, 1], dim=0, index=(1 - siamese_label).nonzero().squeeze()) part_mask1 = torch.index_select(cur_mask_pool, dim=0, index=merge_idx1) part_mask2 = torch.index_select(cur_mask_pool, dim=0, index=merge_idx2) new_part_mask = 1 - (1 - part_mask1) * (1 - part_mask2) equal_matrix = torch.matmul( new_part_mask, 1 - new_part_mask.transpose(0, 1)) + torch.matmul( 1 - new_part_mask, new_part_mask.transpose( 0, 1)) equal_matrix[torch.eye( equal_matrix.shape[0]).byte()] = 1 fid = (equal_matrix == 0).nonzero() if fid.shape[0] > 0: flag = torch.ones(merge_idx1.shape[0]) for k in range(flag.shape[0]): if flag[k] != 0: flag[fid[:, 1][fid[:, 0] == k]] = 0 new_part_mask = torch.index_select( new_part_mask, dim=0, index=flag.nonzero().squeeze().cuda()) new_part_xyz, xyz_mean = mask_to_xyz(pc, new_part_mask) #update purity and score, policy score matrix if new_part_mask.shape[0] > 0: overlap_idx = ( torch.matmul(cur_mask_pool, new_part_mask.transpose(0, 1)) > minimum_overlap_pc_num).nonzero().squeeze() if overlap_idx.shape[0] > 0: if len(overlap_idx.shape) == 1: overlap_idx = overlap_idx.unsqueeze(0) part_xyz1 = torch.index_select( cur_xyz_pool, dim=0, index=overlap_idx[:, 0]) part_xyz2 = tile(new_part_xyz, 0, overlap_idx.shape[0]) part_xyz = torch.cat([part_xyz1, part_xyz2], -1) part_xyz -= torch.mean(part_xyz, -1).unsqueeze(-1) part_norm = part_xyz.norm(dim=1).max( dim=-1)[0].unsqueeze(-1).unsqueeze(-1) part_xyz /= part_norm overlap_purity_scores = model_merge( part_xyz, 'purity').squeeze() part_xyz11 = part_xyz1 - torch.mean( part_xyz1, -1).unsqueeze(-1) part_xyz22 = part_xyz2 - torch.mean( part_xyz2, -1).unsqueeze(-1) part_xyz11 /= part_norm part_xyz22 /= part_norm logits11 = model_merge(part_xyz11, 'policy') logits22 = model_merge(part_xyz22, 'policy') overlap_policy_scores = model_merge( torch.cat([logits11, logits22], dim=-1), 'policy_head').squeeze() tmp_purity_arr = torch.zeros( [purity_matrix.shape[0]]).cuda() tmp_policy_arr = torch.zeros( [policy_matrix.shape[0]]).cuda() tmp_purity_arr[ overlap_idx[:, 0]] = overlap_purity_scores tmp_policy_arr[ overlap_idx[:, 0]] = overlap_policy_scores purity_matrix = torch.cat([ purity_matrix, tmp_purity_arr.unsqueeze(1) ], dim=1) policy_matrix = torch.cat([ policy_matrix, tmp_policy_arr.unsqueeze(1) ], dim=1) purity_matrix = torch.cat([ purity_matrix, torch.zeros(purity_matrix.shape[1]).cuda(). unsqueeze(0) ]) policy_matrix = torch.cat([ policy_matrix, torch.zeros(policy_matrix.shape[1]).cuda(). unsqueeze(0) ]) else: purity_matrix = torch.cat([ purity_matrix, torch.zeros(purity_matrix.shape[0]).cuda(). unsqueeze(1) ], dim=1) policy_matrix = torch.cat([ policy_matrix, torch.zeros(policy_matrix.shape[0]).cuda(). unsqueeze(1) ], dim=1) purity_matrix = torch.cat([ purity_matrix, torch.zeros(purity_matrix.shape[1]).cuda(). unsqueeze(0) ]) policy_matrix = torch.cat([ policy_matrix, torch.zeros(policy_matrix.shape[1]).cuda(). unsqueeze(0) ]) cur_mask_pool = torch.cat( [cur_mask_pool, new_part_mask], dim=0) subpart_mask_pool = torch.cat( [subpart_mask_pool, new_part_mask], dim=0) cur_xyz_pool = torch.cat([cur_xyz_pool, new_part_xyz], dim=0) subpart_pool = torch.cat([subpart_pool, new_part_xyz], dim=0) cur_pool_size = cur_mask_pool.shape[0] new_mask = torch.ones([cur_pool_size]) new_mask[merge_idx] = 0 new_idx = new_mask.nonzero().squeeze().cuda() cur_xyz_pool = torch.index_select(cur_xyz_pool, dim=0, index=new_idx) cur_mask_pool = torch.index_select(cur_mask_pool, dim=0, index=new_idx) inter_matrix = torch.matmul( cur_mask_pool, cur_mask_pool.transpose(0, 1)) inter_matrix_full = inter_matrix.clone( ) > minimum_overlap_pc_num if remote_flag: inter_matrix = 20 * torch.ones([ cur_mask_pool.shape[0], cur_mask_pool.shape[0] ]).cuda() #update zero_matrix zero_matrix = torch.zeros( [cur_pool_size, cur_pool_size]) zero_matrix[zero_pair[:, 0], zero_pair[:, 1]] = 1 zero_matrix[nonmerge_idx1, nonmerge_idx2] = 1 zero_matrix[nonmerge_idx2, nonmerge_idx1] = 1 zero_matrix = torch.index_select(zero_matrix, dim=0, index=new_idx.cpu()) zero_matrix = torch.index_select(zero_matrix, dim=1, index=new_idx.cpu()) zero_pair = zero_matrix.nonzero() inter_matrix[zero_pair[:, 0], zero_pair[:, 1]] = 0 inter_matrix[torch.eye( inter_matrix.shape[0]).byte()] = 0 pair_idx = (inter_matrix.triu() > minimum_overlap_pc_num).nonzero() purity_matrix = torch.index_select(purity_matrix, dim=0, index=new_idx) purity_matrix = torch.index_select(purity_matrix, dim=1, index=new_idx) policy_matrix = torch.index_select(policy_matrix, dim=0, index=new_idx) policy_matrix = torch.index_select(policy_matrix, dim=1, index=new_idx) score_matrix = torch.zeros(purity_matrix.shape).cuda() score_idx = pair_idx score_matrix[score_idx[:, 0], score_idx[:, 1]] = softmax( purity_matrix[score_idx[:, 0], score_idx[:, 1]] * policy_matrix[score_idx[:, 0], score_idx[:, 1]]) final_pool_size = subpart_pool.shape[0] meters.update(final_pool_size=final_pool_size, negative_num=negative_num, positive_num=positive_num) meters.update(iteration_num=iteration_num) meters.update(iteration_time=time.time() - iter_start_time) t1 = torch.matmul(cur_mask_pool, 1 - cur_mask_pool.transpose(0, 1)) t1[torch.eye(t1.shape[0]).byte()] = 1 t1_id = (t1 == 0).nonzero() final_idx = torch.ones(t1.shape[0]) final_idx[t1_id[:, 0]] = 0 cur_mask_pool = torch.index_select( cur_mask_pool, dim=0, index=final_idx.nonzero().squeeze().cuda()) pred_ins_label = torch.zeros(num_points).cuda() for k in range(cur_mask_pool.shape[0]): pred_ins_label[cur_mask_pool[k].byte()] = k + 1 valid_idx = torch.sum(cur_mask_pool, 0) > 0 if torch.sum(1 - valid_idx) != 0: valid_points = pc[:, valid_idx] invalid_points = pc[:, 1 - valid_idx] #perform knn to cover all points knn_index, _ = _F.knn_distance(invalid_points.unsqueeze(0), valid_points.unsqueeze(0), 5, False) invalid_pred, _ = pred_ins_label[valid_idx][ knn_index.squeeze()].mode() pred_ins_label[1 - valid_idx] = invalid_pred cur_mask_pool_new = torch.zeros([0, num_points]).cuda() for k in range(cur_mask_pool.shape[0]): if torch.sum(pred_ins_label == (k + 1)) != 0: cur_mask_pool_new = torch.cat([ cur_mask_pool_new, ((pred_ins_label == (k + 1)).float()).unsqueeze(0) ], dim=0) out_mask[iteration, :cur_mask_pool_new.shape[0]] = copy.deepcopy( cur_mask_pool_new.cpu().data.numpy().astype(np.bool)) out_valid[iteration, :cur_mask_pool_new.shape[0]] = np.sum( cur_mask_pool_new.cpu().data.numpy()) > 10 test_time = time.time() - start_time logger.info('Test {} test time: {:.2f}s'.format(meters.summary_str, test_time)) for i in range(int(out_mask.shape[0] / 1024) + 1): save_h5(os.path.join(output_dir_save, 'test-%02d.h5' % (i)), out_mask[i * 1024:(i + 1) * 1024], out_valid[i * 1024:(i + 1) * 1024], out_conf[i * 1024:(i + 1) * 1024])
def main(): root = 'data/' source1 = 'real' source2 = 'infograph' source3 = 'quickdraw' target = 'sketch' adaptive_weight = True if not target == 'real': dataset_t = DA(dir=root, name=target, img_size=(224, 224), train=False) else: dataset_t = test_dataset(dir='data/test', img_size=(224, 224)) dataloader_t = DataLoader(dataset_t, batch_size=64, shuffle=False, num_workers=8) path = 'checkpoints/infograph-0-deming.pth' #you may change the path 'checkpoints/sketch-30.pth' feature_extractor = models.feature_extractor() classifier_1 = models.class_classifier() classifier_2 = models.class_classifier() classifier_3 = models.class_classifier() state = torch.load(path) print(len(state)) print(state.keys()) print() feature_extractor.load_state_dict(state['feature_extractor']) classifier_1.load_state_dict(state['{}_classifier'.format(source1)]) classifier_2.load_state_dict(state['{}_classifier'.format(source2)]) classifier_3.load_state_dict(state['{}_classifier'.format(source3)]) if adaptive_weight: w1_mean = state['{}_weight'.format(source1)] w2_mean = state['{}_weight'.format(source2)] w3_mean = state['{}_weight'.format(source3)] else: w1_mean = 1 / 3 w2_mean = 1 / 3 w3_mean = 1 / 3 if torch.cuda.is_available(): feature_extractor = feature_extractor.cuda() classifier_1 = classifier_1.cuda() classifier_2 = classifier_2.cuda() classifier_3 = classifier_3.cuda() feature_extractor.eval() classifier_1.eval(), classifier_2.eval(), classifier_3.eval() ans = open('{}_pred.csv'.format(target), 'w') ans.write('image_name,label\n') m = nn.Softmax(1) with torch.no_grad(): for idx, (img, name) in enumerate(dataloader_t): if torch.cuda.is_available(): img = img.cuda() ft_t = feature_extractor(img) pred1 = classifier_1(ft_t) pred2 = classifier_2(ft_t) pred3 = classifier_3(ft_t) pred = (pred1 * w1_mean + pred2 * w2_mean + pred3 * w3_mean) pred = m(pred) #embed() _, pred = torch.max(pred, dim=1) print('\r Predicting... Progress: %.1f %%' % (100 * (idx + 1) / len(dataloader_t)), end='') for i in range(len(name)): ans.write('{},{}\n'.format(os.path.join('test/', name[i]), pred[i])) ans.close()
dataloader.AddVariable(branch.GetName()) dataloader.AddTree(signal, 'Signal') dataloader.AddTree(background0, 'Background_0') dataloader.AddTree(background1, 'Background_1') dataloader.AddTree(background2, 'Background_2') dataloader.PrepareTrainingAndTestTree( TCut(''), 'SplitMode=Random:NormMode=NumEvents:!V') # Generate model # Define model model = nn.Sequential() model.add_module('linear_1', nn.Linear(in_features=4, out_features=32)) model.add_module('relu', nn.ReLU()) model.add_module('linear_2', nn.Linear(in_features=32, out_features=4)) model.add_module('softmax', nn.Softmax(dim=1)) # Set loss and optimizer loss = nn.CrossEntropyLoss() optimizer = torch.optim.SGD # Define train function def train(model, train_loader, val_loader, num_epochs, batch_size, optimizer, criterion, save_best, scheduler): trainer = optimizer(model.parameters(), lr=0.01) schedule, schedulerSteps = scheduler best_val = None for epoch in range(num_epochs): # Training Loop
def __init__(self, input_dim, img_dim, hidden_dim, key_dim, value_dim, num_label=2, num_head=2, num_layer=1, mode='one-shot', device=torch.device("cpu")): super(MemexQA, self).__init__() self.device = device self.num_label = num_label self.num_head = num_head self.num_layer = num_layer self.mode = mode # img_emb # self.img_emb = nn.Linear(img_dim, hidden_dim) self.img_emb = nn.Sequential(nn.Linear(img_dim, hidden_dim * 2), nn.ReLU(), nn.Linear(hidden_dim * 2, hidden_dim)) self.input_layer_norm = nn.LayerNorm(hidden_dim) # que_emb self.que_emb = nn.Linear(input_dim, key_dim * self.num_head) # ans_emb if self.mode == 'att-concat-one-shot': self.ans_emb = nn.Linear(input_dim, key_dim * self.num_head) else: self.ans_emb = nn.Linear(input_dim, value_dim * self.num_head) # self attention for i in range(self.num_head): setattr(self, 'self_sentence_query' + str(i + 1), nn.Linear(input_dim, key_dim)) setattr(self, 'self_sentence_key' + str(i + 1), nn.Linear(input_dim, key_dim)) setattr(self, 'self_sentence_value' + str(i + 1), nn.Linear(input_dim, value_dim)) setattr(self, 'self_image_query' + str(i + 1), nn.Linear(hidden_dim, key_dim)) setattr(self, 'self_image_key' + str(i + 1), nn.Linear(hidden_dim, key_dim)) setattr(self, 'self_image_value' + str(i + 1), nn.Linear(hidden_dim, value_dim)) self.self_attention = SelfAttention(key_dim, value_dim, self.device) self.layer_norm = nn.LayerNorm(value_dim * self.num_head) # More layers dim = self.num_head * value_dim for nl in range(self.num_layer - 1): for i in range(self.num_head): setattr(self, 'self_query_{}_{}'.format(nl + 1, i + 1), nn.Linear(dim, key_dim)) setattr(self, 'self_key_{}_{}'.format(nl + 1, i + 1), nn.Linear(dim, key_dim)) setattr(self, 'self_value_{}_{}'.format(nl + 1, i + 1), nn.Linear(dim, value_dim)) # question attention self.key_proj = nn.Linear(value_dim * self.num_head, key_dim * self.num_head) self.value_proj = nn.Linear(value_dim * self.num_head, value_dim * self.num_head) self.attention = Attention(key_dim * self.num_head, value_dim * self.num_head, self.device) # Prediction if self.mode == 'one-shot': self.answer_proj = nn.Linear( value_dim * self.num_head * 3 + key_dim * self.num_head, self.num_label) elif self.mode == 'select-one': # NOT GREAT self.answer_proj = nn.Linear( value_dim * self.num_head * 3 + key_dim * self.num_head, 1) elif self.mode == 'att-concat-one-shot': self.answer_proj = nn.Linear( value_dim * self.num_head * 3 + key_dim * self.num_head * 2, self.num_label) else: raise NotImplementedError("Not implemented!") # criterion # self.criterion = nn.CrossEntropyLoss() if 'one-shot' in self.mode: self.criterion = LabelSmoothingLoss(2, 0.1) elif 'select-one' in self.mode: self.criterion = LabelSmoothingLoss(4, 0.1) self.softmax = nn.Softmax(dim=-1) # Additional Techniques self.dropout = nn.Dropout(p=0.1)
def __init__(self,device, pretrained_encoder_isTrue, pre_trained_encoder_variables, shapes, nonlinearities, input_size,window,hidden_units,output_classes=10): super(deltanet_majority_vote, self).__init__() print("pretrained_encoder",pretrained_encoder_isTrue) self.device=device self.window=window self.input_size=input_size self.hidden_units=hidden_units self.output_classes=output_classes #'fc1', 'fc2', 'fc3', 'bottleneck' if pretrained_encoder_isTrue==True: weights, biases = pre_trained_encoder_variables self.shapes=shapes self.nonlinearities=nonlinearities self.layer_encoder,_=pretrained_custom_encoder(input_size, shapes, nonlinearities, weights, biases) else: # shapes, nonlinearities = pre_trained_encoder_variables self.shapes=shapes self.nonlinearities=nonlinearities self.layer_encoder=custom_encoder(input_size,shapes,nonlinearities) self.layer_delta=delta_layer(self.device,self.window) # only blstm implemented self.layer_blstm=nn.LSTM( input_size=shapes[-1]*3, hidden_size=self.hidden_units, num_layers=1, batch_first=True, bidirectional =True, ) # hidden_units*2 because of 2 direction, we watn it to do over the last dim softmax ( not doing here ) self.layer_out=nn.Sequential(nn.Linear(self.hidden_units*2,self.output_classes), nn.Softmax(dim=-1))
env = gym.make(ENV_NAME) env = gym.wrappers.Monitor(env, directory="mon" + ENV_NAME, force=True) HIDDEN_LAYER_SIZE = 200 PERCENTILE = 70 SHOW_SOME = True DEVICE = torch.device(type="cuda") net = nn.Sequential( nn.Linear(env.observation_space.shape[0], HIDDEN_LAYER_SIZE), nn.ReLU(), nn.Linear(HIDDEN_LAYER_SIZE,HIDDEN_LAYER_SIZE), nn.ReLU(), nn.Linear(HIDDEN_LAYER_SIZE, env.action_space.n) ).to(DEVICE) sm = nn.Softmax(dim=1) optimizer = optim.Adagrad(net.parameters()) loss = nn.CrossEntropyLoss() writer = SummaryWriter(comment=("-" + ENV_NAME)) Episode = namedtuple('Episode', field_names=['reward', 'steps']) EpisodeStep = namedtuple('EpisodeStep', field_names=['observation', 'action']) def filter_batch(batch, percentile): rewards = list(map(lambda s: s.reward, batch)) reward_bound = np.percentile(rewards, percentile) reward_mean = float(np.mean(rewards)) train_obs = []
def __init__(self, device, pretrained_stream1_model_isTrue,\ pretrained_stream1_model1, shapes1, nonlinearities1, input_size1, pretrained_stream1_model2, shapes2, nonlinearities2, input_size2 , pretrained_stream1_model3, shapes3, nonlinearities3, input_size3 , pretrained_stream1_model4, shapes4, nonlinearities4, input_size4 , pretrained_stream1_model5, shapes5, nonlinearities5, input_size5 , window,hidden_units,output_classes=10): super(adenet_5stream, self).__init__() self.device=device self.window=window self.input_size1=input_size1 self.input_size2=input_size2 self.input_size3=input_size3 self.input_size4=input_size4 self.input_size5=input_size5 self.hidden_units=hidden_units self.output_classes=output_classes #'fc1', 'fc2', 'fc3', 'bottleneck' if pretrained_stream1_model_isTrue==True: self.stream1_model_1=pretrained_stream1_model1 self.stream1_model_2=pretrained_stream1_model2 self.stream1_model_3=pretrained_stream1_model3 self.stream1_model_4=pretrained_stream1_model4 self.stream1_model_5=pretrained_stream1_model5 else: self.stream1_model_1=cutoff_deltanet_majority_vote(device, False, None, shapes1, nonlinearities1, input_size1,window, hidden_units, 10) self.stream1_model_2=cutoff_deltanet_majority_vote(device, False, None, shapes2, nonlinearities2, input_size2,window, hidden_units, 10) self.stream1_model_3=cutoff_deltanet_majority_vote(device, False, None, shapes3, nonlinearities3, input_size3,window, hidden_units, 10) self.stream1_model_4=cutoff_deltanet_majority_vote(device, False, None, shapes4, nonlinearities4, input_size4,window, hidden_units, 10) self.stream1_model_5=cutoff_deltanet_majority_vote(device, False, None, shapes5, nonlinearities5, input_size5,window, hidden_units, 10) # only blstm implemented self.layer_blstm_agg=nn.LSTM( input_size=self.hidden_units*10, hidden_size=self.hidden_units, num_layers=1, batch_first=True, bidirectional =True, ) # hidden_units*2 because of 2 direction, we watn it to do over the last dim softmax ( not doing here ) self.layer_out=nn.Sequential(nn.Linear(self.hidden_units*2,self.output_classes), nn.Softmax(dim=-1))
def __init__(self, adj_matrix, upper_half_node=[1, 2, 3, 4], lower_half_node=[5, 6], in_dim=256, hidden_dim=10, cls_p=7, cls_h=3, cls_f=2): super(GNN_infer, self).__init__() self.cls_p = cls_p self.cls_h = cls_h self.cls_f = cls_f self.in_dim = in_dim self.hidden_dim = hidden_dim # feature transform self.p_conv = nn.Sequential( nn.Conv2d(in_dim, hidden_dim * (cls_p - 1), kernel_size=1, padding=0, stride=1, bias=False), BatchNorm2d(hidden_dim * (cls_p - 1)), nn.ReLU(inplace=False)) self.h_conv = nn.Sequential( nn.Conv2d(in_dim, hidden_dim * (cls_h - 1), kernel_size=1, padding=0, stride=1, bias=False), BatchNorm2d(hidden_dim * (cls_h - 1)), nn.ReLU(inplace=False)) self.f_conv = nn.Sequential( nn.Conv2d(in_dim, hidden_dim * (cls_f - 1), kernel_size=1, padding=0, stride=1, bias=False), BatchNorm2d(hidden_dim * (cls_f - 1)), nn.ReLU(inplace=False)) self.bg_conv = nn.Sequential( nn.Conv2d(3 * in_dim, hidden_dim, kernel_size=1, padding=0, stride=1, bias=False), BatchNorm2d(hidden_dim), nn.ReLU(inplace=False)) # self.bg_conv_new = nn.Sequential( # nn.Conv2d((cls_p + cls_h + cls_f - 2) * hidden_dim, hidden_dim, kernel_size=1, padding=0, stride=1, # bias=False), # BatchNorm2d(hidden_dim), nn.ReLU(inplace=False)) # gnn infer self.gnn = GNN(adj_matrix, upper_half_node, lower_half_node, self.in_dim, self.hidden_dim, self.cls_p, self.cls_h, self.cls_f) # node supervision # multi-label classifier self.node_cls_final = nn.Conv2d(hidden_dim * (cls_p + cls_h + cls_f - 2), (cls_p + cls_h + cls_f - 2), kernel_size=1, padding=0, stride=1, bias=True, groups=(cls_p + cls_h + cls_f - 2)) self.final_cls = nn.Conv2d( (cls_p + cls_h + cls_f - 2) * hidden_dim + in_dim, cls_p, kernel_size=1, padding=0, stride=1, bias=True) self.softmax = nn.Softmax(dim=1)
def eval_model(model, dataloader, exp_const, step): # Set mode model.net.eval() model.embed2class.eval() img_mean = Variable(torch.cuda.FloatTensor(model.img_mean)) img_std = Variable(torch.cuda.FloatTensor(model.img_std)) softmax = nn.Softmax(dim=1) correct = 0 unseen_correct_per_class = {l: 0 for l in dataloader.dataset.labels} seen_correct_per_class = {l: 0 for l in dataloader.dataset.labels} sample_per_class = {l: 0 for l in dataloader.dataset.labels} for it, data in enumerate(tqdm(dataloader)): # Forward pass imgs = Variable(data['img'].cuda().float() / 255) imgs = dataloader.dataset.normalize(imgs, img_mean, img_std) imgs = imgs.permute(0, 3, 1, 2) if exp_const.feedforward == True: logits, feats = model.net(imgs) else: _, feats = model.net(imgs) class_weights = model.embed2class() logits = model.embed2class.classify(feats, class_weights) gt_labels = data['label'] label_idxs = data['label_idx'].numpy() prob = softmax(logits) prob = prob.data.cpu().numpy() prob_zero_seen = np.copy(prob) prob_zero_unseen = np.copy(prob) for i in range(prob.shape[1]): if i in dataloader.dataset.held_out_idx: prob_zero_unseen[:, i] = 0 else: prob_zero_seen[:, i] = 0 argmax_zero_seen = np.argmax(prob_zero_seen, 1) for i in range(prob.shape[0]): pred_label = dataloader.dataset.labels[argmax_zero_seen[i]] gt_label = gt_labels[i] sample_per_class[gt_label] += 1 if gt_label == pred_label: unseen_correct_per_class[gt_label] += 1 argmax_zero_unseen = np.argmax(prob_zero_unseen, 1) for i in range(prob.shape[0]): pred_label = dataloader.dataset.labels[argmax_zero_unseen[i]] gt_label = gt_labels[i] # sample_per_class[gt_label] += 1 already counted if gt_label == pred_label: seen_correct_per_class[gt_label] += 1 seen_acc = 0 unseen_acc = 0 num_seen_classes = 0 num_unseen_classes = 0 for l in dataloader.dataset.labels: if l in dataloader.dataset.held_out_labels: unseen_acc += (unseen_correct_per_class[l] / sample_per_class[l]) num_unseen_classes += 1 else: seen_acc += (seen_correct_per_class[l] / sample_per_class[l]) num_seen_classes += 1 seen_acc = round(seen_acc * 100 / num_seen_classes, 4) unseen_acc = round(unseen_acc * 100 / num_unseen_classes, 4) hm_acc = round(2 * seen_acc * unseen_acc / (seen_acc + unseen_acc), 4) eval_results = { 'Seen Acc': seen_acc, 'Unseen Acc': unseen_acc, 'HM Acc': hm_acc, 'Step': step, } return eval_results, unseen_correct_per_class
def __init__(self, inplanes, planes, downsample=False, use_gn=False, lr_mult=None, use_out=True, out_bn=True, mask_type='common', use_key_mask=True, use_query_mask=False, mask_pos='after', whiten_type=[None], temperature=None, use_softmax=True): assert mask_type in ['softmax', 'sigmoid', 'common'] assert mask_pos in ['before', 'after'] conv_nd = nn.Conv2d if downsample: max_pool = nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2)) else: max_pool = None bn_nd = nn.BatchNorm2d super(MaskNonLocal2d, self).__init__() self.conv_query = conv_nd(inplanes, planes, kernel_size=1) self.conv_key = conv_nd(inplanes, planes, kernel_size=1) if use_query_mask == True: self.conv_query_mask = conv_nd(inplanes, 1, kernel_size=1) if use_key_mask == True: self.conv_key_mask = conv_nd(inplanes, 1, kernel_size=1) if mask_type == 'sigmoid': self.sigmoid_key = nn.Sigmoid() self.sigmoid_query = nn.Sigmoid() if use_out: self.conv_value = conv_nd(inplanes, planes, kernel_size=1) self.conv_out = conv_nd(planes, inplanes, kernel_size=1, bias=False) else: self.conv_value = conv_nd(inplanes, inplanes, kernel_size=1, bias=False) self.conv_out = None if out_bn: self.out_bn = nn.BatchNorm2d(inplanes) else: self.out_bn = None self.softmax = nn.Softmax(dim=2) self.downsample = max_pool # self.norm = nn.GroupNorm(num_groups=32, num_channels=inplanes) if use_gn else InPlaceABNSync(num_features=inplanes) self.gamma = nn.Parameter(torch.zeros(1)) self.scale = math.sqrt(planes) self.reset_parameters() self.reset_lr_mult(lr_mult) self.use_key_mask = use_key_mask self.use_query_mask = use_query_mask self.mask_type = mask_type self.mask_pos = mask_pos self.temperature = temperature self.whiten_type = whiten_type self.use_softmax = use_softmax
def __init__(self, config, corpus_target, embReader): super().__init__(config) #### # init parameters self.corpus_target = config.corpus_target self.max_num_sents = config.max_num_sents # document length, in terms of the number of sentences self.max_len_sent = config.max_len_sent # sentence length, in terms of words self.max_len_doc = config.max_len_doc # document length, in terms of words self.batch_size = config.batch_size self.vocab = corpus_target.vocab # word2id self.rev_vocab = corpus_target.rev_vocab # id2word self.vocab_size = len(self.vocab) self.pad_id = corpus_target.pad_id self.num_special_vocab = corpus_target.num_special_vocab self.embed_size = config.embed_size self.dropout_rate = config.dropout self.path_pretrained_emb = config.path_pretrained_emb self.num_layers = 1 self.output_size = config.output_size # the number of final output class self.pad_level = config.pad_level self.use_gpu = config.use_gpu if not hasattr(config, "freeze_step"): config.freeze_step = 5000 config.rnn_bidir = True ## fix bi-dir to follow original paper of NAACL19 if config.rnn_bidir: self.sem_dim_size = 2 * config.sem_dim_size else: self.sem_dim_size = config.sem_dim_size self.rnn_cell_size = config.rnn_cell_size self.pooling_sent = config.pooling_sent # max or avg self.pooling_doc = config.pooling_doc # max or avg #### self.encoder_base = Encoder_Main(config, embReader) config.rnn_bidir = False self.encoder_sent = Encoder_RNN(config, embReader, self.rnn_cell_size*2, self.rnn_cell_size*2) config.rnn_bidir = True self.structure_att = StructuredAttention(config) # fc_in_size = self.encoder_base.encoder_out_size linear_1_out = fc_in_size // 2 linear_2_out = linear_1_out // 2 self.linear_out = nn.Linear(self.sem_dim_size, self.output_size) if corpus_target.output_bias is not None: # bias init_mean_val = np.expand_dims(corpus_target.output_bias, axis=1) bias_val = (np.log(init_mean_val) - np.log(1 - init_mean_val)) self.linear_out.bias.data = torch.from_numpy(bias_val).type(torch.FloatTensor) # nn.init.xavier_uniform_(self.linear_out.weight) nn.init.xavier_normal_(self.linear_out.weight) # self.selu = nn.SELU() self.elu = nn.ELU() self.leak_relu = nn.LeakyReLU() self.relu = nn.ReLU() self.tanh = nn.Tanh() self.sigmoid = nn.Sigmoid() self.dropout_layer = nn.Dropout(self.dropout_rate) self.softmax = nn.Softmax(dim=1) return