def __init__(self, config): super(Generator, self).__init__() self.z_dim = config.nz self.ngpu = config.ngpu ngf = self.ngf = config.ngf self.config = config if config.conditional and (config.conditioning == 'concat' or config.conditioning == 'acgan'): inp_dim = self.z_dim + config.num_classes else: inp_dim = self.z_dim conv_layer = cond_conv_layers[config.G_conv] norm_layer = cond_norm_layers['BN'] activation_layer = cond_activation_layers[config.G_activation] lin_layer = cond_linear_layers[config.G_linear] self.init_size = int(config.imageSize / (2**3)) self.dense = lin_layer(inp_dim, self.init_size * self.init_size * ngf) self.network = nn.Sequential( ResBlockGenerator(config, ngf, ngf, stride=2), ResBlockGenerator(config, ngf, ngf, stride=2), ResBlockGenerator(config, ngf, ngf, stride=2), norm_layer(ngf, config.num_classes), activation_layer(True), conv_layer(ngf, config.nc, 3, stride=1, padding=1), CondTanh()) utils.weights_init_xavier(self.network)
def __init__(self, input_dimensionality, num_of_topics, num_of_classes, hid_size, device="cpu"): super(SentimentClassifier, self).__init__() self.input_dimensionality = input_dimensionality self.num_of_topics = num_of_topics self.num_of_classes = num_of_classes self.hidden_size = hid_size self.frozen = False self.dr = nn.Dropout(0.2) self.fc1 = nn.Linear(self.num_of_topics, self.hidden_size).to(device) self.fc2 = nn.Linear(self.hidden_size, self.num_of_classes).to(device) self.fc_bn = nn.BatchNorm1d(self.hidden_size) self.device = device weights_init_xavier(self.fc1) weights_init_xavier(self.fc2)
def __init__(self, config, in_channels, out_channels, stride=1): super(ResBlockDiscriminator, self).__init__() conv_layer = conv_layers[config.D_conv] activation_layer = activation_layers[config.D_activation] if stride == 1: self.model = nn.Sequential( activation_layer(True), conv_layer(in_channels, out_channels, 3, 1, padding=1), activation_layer(True), conv_layer(out_channels, out_channels, 3, 1, padding=1)) else: self.model = nn.Sequential( activation_layer(True), conv_layer(in_channels, out_channels, 3, 1, padding=1), activation_layer(True), conv_layer(out_channels, out_channels, 3, 1, padding=1), nn.AvgPool2d(2, stride=stride, padding=0)) utils.weights_init_xavier(self.model) self.bypass = nn.Sequential() if stride != 1: self.bypass = nn.Sequential( conv_layer(in_channels, out_channels, 1, 1, padding=0), nn.AvgPool2d(2, stride=stride, padding=0)) utils.weights_init_xavier(self.bypass)
def __init__(self, config, in_channels, out_channels, stride=1): super(FirstResBlockDiscriminator, self).__init__() conv_layer = conv_layers[config.D_conv] activation_layer = activation_layers[config.D_activation] # we don't want to apply ReLU activation to raw image before convolution transformation. self.model = nn.Sequential( conv_layer(in_channels, out_channels, 3, 1, padding=1), activation_layer(True), conv_layer(out_channels, out_channels, 3, 1, padding=1), nn.AvgPool2d(2)) self.bypass = nn.Sequential( nn.AvgPool2d(2), conv_layer(in_channels, out_channels, 1, 1, padding=0), ) utils.weights_init_xavier(self.model) utils.weights_init_xavier(self.bypass)
def __init__(self, config, in_channels, out_channels, stride=1): super(ResBlockGenerator, self).__init__() conv_layer = cond_conv_layers[config.G_conv] norm_layer = cond_norm_layers[config.G_normalization] activation_layer = cond_activation_layers[config.G_activation] self.model = nn.Sequential( norm_layer(in_channels, config.num_classes), activation_layer(True), CondUpsample(scale_factor=2), conv_layer(in_channels, out_channels, 3, 1, padding=1), norm_layer(out_channels, config.num_classes), activation_layer(True), conv_layer(out_channels, out_channels, 3, 1, padding=1)) utils.weights_init_xavier(self.model) self.bypass = nn.Sequential() if stride != 1: self.bypass = CondUpsample(scale_factor=2)
def main_worker(gpu, ngpus_per_node, args): global best_acc1 args.gpu = gpu cudnn.benchmark = True if args.gpu is not None: print("Use GPU: {} for training".format(args.gpu)) if args.distributed: if args.dist_url == "env://" and args.rank == -1: args.rank = int(os.environ["RANK"]) if args.multiprocessing_distributed: # For multiprocessing distributed training, rank needs to be the # global rank among all the processes args.rank = args.rank * ngpus_per_node + gpu dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) # Create model if args.arch == 'default_convnet': model = ConvNet() else: if args.pretrained: print("=> using pre-trained model '{}'".format(args.arch)) model = models.__dict__[args.arch](pretrained=True) else: print("=> creating model '{}'".format(args.arch)) model = models.__dict__[args.arch]() if args.out_dim is not None: lin = nn.Linear(model.fc.in_features, args.out_dim) weights_init_xavier(lin) model.fc = lin else: model.fc = Identity() print('Number of parameters: ', sum([p.numel() for p in model.parameters()])) if args.distributed: # For multiprocessing distributed, DistributedDataParallel constructor # should always set the single device scope, otherwise, # DistributedDataParallel will use all available devices. if args.gpu is not None: torch.cuda.set_device(args.gpu) model.cuda(args.gpu) # When using a single GPU per process and per # DistributedDataParallel, we need to divide the batch size # ourselves based on the total number of GPUs we have args.batch_size = int(args.batch_size / ngpus_per_node) args.workers = int(args.workers / ngpus_per_node) model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[args.gpu]) else: model.cuda() # DistributedDataParallel will divide and allocate batch_size to all # available GPUs if device_ids are not set model = torch.nn.parallel.DistributedDataParallel(model) elif args.gpu is not None: torch.cuda.set_device(args.gpu) model = model.cuda(args.gpu) else: # DataParallel will divide and allocate batch_size to all available GPUs if args.arch.startswith('alexnet') or args.arch.startswith('vgg'): model.features = torch.nn.DataParallel(model.features) model.cuda() else: model = torch.nn.DataParallel(model).cuda() # Define optimizer if args.optimizer == 'sgd': optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) elif args.optimizer == 'adam': optimizer = torch.optim.Adam(model.parameters(), args.lr) else: raise ValueError('Optimizer should be "sgd" or "adam"') lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=args.step_size, gamma=args.gamma) # Optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_acc1 = checkpoint['best_acc1'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) # Data loading code train_dataset = MiniImageNet('train', args.splits_path) train_sampler = EpisodicBatchSampler(train_dataset.labels, args.n_episodes_train, args.n_way_train, args.n_support + args.n_query_train) train_loader = DataLoader(dataset=train_dataset, batch_sampler=train_sampler, num_workers=args.workers, pin_memory=True) val_dataset = MiniImageNet('val', args.splits_path) val_sampler = EpisodicBatchSampler(val_dataset.labels, args.n_episodes_val, args.n_way_val, args.n_support + args.n_query_val) val_loader = DataLoader(dataset=val_dataset, batch_sampler=val_sampler, num_workers=args.workers, pin_memory=True) if args.evaluate: validate(val_loader, model, args) return for epoch in range(args.start_epoch, args.epochs): lr_scheduler.step() if args.distributed: train_sampler.set_epoch(epoch) # Train for one epoch loss_t, acc_t = train(train_loader, model, optimizer, epoch, args) # Evaluate on validation set loss_val, acc1 = validate(val_loader, model, args) dict_metrics = { 'loss_training': loss_t, 'loss_validation': loss_val, 'acc_training': acc_t, 'acc_validation': acc1 } for key in dict_metrics: with open(os.path.join(results_dir, key + '.txt'), "a+") as myfile: myfile.write(str(dict_metrics[key]) + '\n') # Remember best acc@1 and save checkpoint is_best = acc1 > best_acc1 best_acc1 = max(acc1, best_acc1) if not args.multiprocessing_distributed or ( args.multiprocessing_distributed and args.rank % ngpus_per_node == 0): print('Saving model...') if args.gpu is None: save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.module.state_dict(), 'best_acc1': best_acc1, 'optimizer': optimizer.state_dict(), }, is_best, results_dir) else: save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_acc1': best_acc1, 'optimizer': optimizer.state_dict(), }, is_best, results_dir)
def __init__(self, exp, input_dimensionality, num_of_classes, vae_hidd_size, num_of_OpinionTopics, num_of_PlotTopics, encoder_layers=1, generator_layers=4, beta_s=1.0, beta_a=1.0, encoder_dropout=False, dropout_prob=0.0, generator_shortcut=False, generator_transform=None, interaction="dot_prod", plug_Plots=False, device="cpu"): super(AdversarialVaeModel, self).__init__() # Args includes all the meta information about the experiment self.exp = exp self.args = exp.args self.beta_a = beta_a self.beta_s = beta_s self.input_dimensionality = input_dimensionality self.num_of_OpinionTopics = num_of_OpinionTopics self.num_of_PlotTopics = num_of_PlotTopics # Prior mean and variance self.priors = dict() self.priors["prior_mean_Plot"] = torch.Tensor(1, self.num_of_PlotTopics).fill_(0).to(device) self.priors["prior_variance_Plot"] = 0.995 self.priors["prior_var_Plot"] = torch.Tensor(1, self.num_of_PlotTopics).fill_(self.priors["prior_variance_Plot"]).to(device) self.priors["prior_logvar_Plot"] = self.priors["prior_var_Plot"].log() self.priors["prior_mean_Opinion"] = torch.Tensor(1, self.num_of_OpinionTopics).fill_(0).to(device) self.priors["prior_variance_Opinion"] = 0.995 self.priors["prior_var_Opinion"] = torch.Tensor(1, self.num_of_OpinionTopics).fill_(self.priors["prior_variance_Opinion"]).to(device) self.priors["prior_logvar_Opinion"] = self.priors["prior_var_Opinion"].to(device).log() # Flags self.interaction = interaction self.plug_Plots = plug_Plots self.topicType = "both" self.wordEmb = None self.alsoAspectLoss = True self.alsoSentLoss = True # Training Device self.device = device # - Inint VAE components - self.aspect_vae_model = VaeAvitmModel(input_dimensionality, d_e=vae_hidd_size, d_t=num_of_PlotTopics, encoder_layers=encoder_layers, generator_layers=generator_layers, without_decoder=True, encoder_dropout=True, dropout_rate=dropout_prob, sparsity=self.args.de_sparsity, generator_shortcut=False, generator_transform='softmax', device=device).to(device) self.sent_vae_model = VaeAvitmModel(input_dimensionality, d_e=vae_hidd_size, d_t=num_of_OpinionTopics, encoder_layers=encoder_layers, generator_layers=generator_layers, without_decoder=True, encoder_dropout=True, dropout_rate=dropout_prob, sparsity=self.args.de_sparsity, generator_shortcut=False, generator_transform='softmax', device=device).to(device) self.plot_vae_model = VaeAvitmModel(input_dimensionality, d_e=vae_hidd_size, d_t=num_of_PlotTopics, encoder_layers=encoder_layers, generator_layers=generator_layers, without_decoder=False, encoder_dropout=True, dropout_rate=dropout_prob, sparsity=self.args.de_sparsity, generator_shortcut=False, generator_transform='softmax', device=device).to(device) # - Sentiment classifier - self.num_of_classes = num_of_classes self.sent_class_model = SentimentClassifier(input_dimensionality, num_of_OpinionTopics, num_of_classes, hid_size=self.args.sent_classi_hid_size, device=device).to(device) # - Plot discriminator/classifier - # It is not an actual sentiment classifier, just reusing the same class. self.plot_discri_model = SentimentClassifier(input_dimensionality, num_of_PlotTopics, num_of_classes=2, hid_size=self.args.plot_classi_hid_size, device=device).to(device) # - Linear projection for possible asymmetric number of topics - if self.num_of_PlotTopics != self.num_of_OpinionTopics: self.plotScaling = nn.Linear(self.num_of_PlotTopics, self.num_of_OpinionTopics) # Dropout self.r_drop = nn.Dropout(dropout_prob) # - Decoder matrix - if self.interaction == "dot_prod": self.de = nn.Linear(self.num_of_PlotTopics*self.num_of_OpinionTopics, self.input_dimensionality) elif self.interaction == "concat": self.de = nn.Linear(self.num_of_PlotTopics + num_of_OpinionTopics, self.input_dimensionality) elif self.interaction == "onlySent": self.de = nn.Linear(self.num_of_OpinionTopics, self.input_dimensionality) elif self.interaction == "onlyNeutral": self.de = nn.Linear(self.num_of_PlotTopics, self.input_dimensionality) # Batch Norm. self.de_bn = nn.BatchNorm1d(self.input_dimensionality) # Orthogonal Reg. self.ortho_regul_flag = True # --- INIT --- # Decoder initialization weights_init_sparse(self.de, sparsity=self.args.de_sparsity) if self.num_of_PlotTopics != self.num_of_OpinionTopics: weights_init_xavier(self.plotScaling)
def init_layers_xavier(self): weights_init_xavier(self.en1_fc) weights_init_xavier(self.en2_fc) weights_init_xavier(self.mean_fc) weights_init_xavier(self.logvar_fc) weights_init_xavier(self.generator1) weights_init_xavier(self.generator2) weights_init_xavier(self.generator3) weights_init_xavier(self.generator4)