def initialize(mean, std, lr): param_cuda = torch.cuda.is_available() G = gan.generator(128).cuda() if param_cuda else gan.generator(128) D = gan.discriminator(128).cuda() if param_cuda else gan.discriminator(128) G.weight_init(mean=mean, std=std) D.weight_init(mean=mean, std=std) G_opt = optim.Adam(G.parameters(), lr=lr, betas=(.5, .999)) D_opt = optim.Adam(D.parameters(), lr=lr, betas=(.5, .999)) return G, D, G_opt, D_opt
def dis_pre_train_step(): discriminator.train() lab_batch = next(labeled_train_loader) lab_token_seqs = lab_batch.content[0] lab_seq_lengths = np.array([len(seq) for seq in lab_token_seqs]) labels = lab_batch.label lab_token_seqs = torch.from_numpy(np.transpose(lab_token_seqs.numpy())) labels = torch.from_numpy(np.transpose(labels.numpy())) num_lab_sample = lab_token_seqs.shape[1] lab_hidden = discriminator.init_hidden(num_lab_sample) lab_output = discriminator(lab_token_seqs, lab_hidden, lab_seq_lengths) lab_element_loss = criterion(lab_output, labels) lab_loss = torch.mean(lab_element_loss) # Before the backward pass, use the optimizer object to zero all of the # gradients for the variables it will update (which are the learnable # weights of the model). This is because by default, gradients are # accumulated in buffers( i.e, not overwritten) whenever .backward() # is called. dis_optimizer.zero_grad() lab_loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. torch.nn.utils.clip_grad_norm_(discriminator.parameters(), args.clip) dis_optimizer.step() return lab_loss
def build_and_train(): (x_train, _), (x_test, _) = mnist.load_data() image_size = x_train.shape[1] x_train = np.reshape(x_train, [-1, image_size, image_size, 1]) x_train = x_train.astype('float32') / 255 input_shape = [image_size, image_size, 1] latent_size = 100 batch_size = 64 train_steps = 40000 lr = 2e-04 decay = 6e-08 optimizer = RMSprop(lr=lr, decay=decay) inputs = Input(shape=input_shape) discriminator = gan.discriminator(inputs, image_size, activation=None) discriminator.compile(loss='mse', optimizer=optimizer, metrics=['accuracy']) discriminator.summary() inputs = Input(shape=(latent_size, )) generator = gan.generator(inputs, image_size) generator.summary() discriminator.trainable = False adversarial = Model(inputs, discriminator(generator(inputs))) adversarial.compile(loss='mse', optimizer=optimizer, metrics=['accuracy']) adversarial.summary() models = (generator, discriminator, adversarial) model_name = 'lsgan_mnist' params = (batch_size, latent_size, train_steps, model_name) gan.train(models, x_train, params)
def evaluate(test=False): # Turn on evaluate mode which disables dropout. correct = 0 total = 0 discriminator.eval() current_loader = valid_loader current_length = valid_length if test: current_loader = test_loader current_length = test_length with torch.no_grad(): for i_batch in range(current_length): sample_batched = next(current_loader) token_seqs = sample_batched.content[0] seq_lengths = np.array([len(seq) for seq in token_seqs]) labels = sample_batched.label token_seqs = torch.from_numpy(np.transpose( token_seqs.numpy())).cuda(env_settings.CUDA_DEVICE) labels = torch.from_numpy(np.transpose(labels.numpy())).cuda( env_settings.CUDA_DEVICE) hidden = discriminator.init_hidden(token_seqs.shape[1]) output = discriminator(token_seqs, hidden, seq_lengths) _, predict_class = torch.max(output, 1) total += labels.size(0) correct += (predict_class == labels).sum().item() for i_metric in range(list(predict_class.size())[0]): metrics_handler.metricsHandler.update( (predict_class.data)[i_metric].item(), (labels.data)[i_metric].item()) test_acc = 100 * correct / total print( 'Accuracy of the classifier on the test data is : {:5.4f}'.format( test_acc)) if test: output_handler.outputFileHandler.write( f'Test Acc: {test_acc:.2f}%\n') output_handler.outputFileHandler.write( f'Test recall: {metrics_handler.metricsHandler.getRecall():.3f}%\n' ) output_handler.outputFileHandler.write( f'Test precision: {metrics_handler.metricsHandler.getPrecision():.3f}%\n' ) else: output_handler.outputFileHandler.write( f'Valid Acc: {test_acc:.2f}%\n') output_handler.outputFileHandler.write( f'Valid recall: {metrics_handler.metricsHandler.getRecall():.3f}%\n' ) output_handler.outputFileHandler.write( f'Valid precision: {metrics_handler.metricsHandler.getPrecision():.3f}%\n' ) return correct / total
def build_and_train_models(): (x_train, y_train), (_, _) = mnist.load_data() image_size = x_train.shape[1] x_train = np.reshape(x_train, [-1, image_size, image_size, 1]) x_train = x_train.astype('float32') / 255 num_labels = len(np.unique(y_train)) y_train = to_categorical(y_train) model_name = "acgan_mnist" latent_size = 100 batch_size = 64 train_steps = 40000 lr = 2e-4 decay = 6e-8 input_shape = (image_size, image_size, 1) label_shape = (num_labels, ) inputs = Input(shape=input_shape, name='discriminator_input') discriminator = gan.discriminator(inputs, num_labels=num_labels) optimizer = RMSprop(lr=lr, decay=decay) loss = ['binary_crossentropy', 'categorical_crossentropy'] discriminator.compile(loss=loss, optimizer=optimizer, metrics=['accuracy']) discriminator.summary() input_shape = (latent_size, ) inputs = Input(shape=input_shape, name='z_input') labels = Input(shape=label_shape, name='labels') generator = gan.generator(inputs, image_size, labels=labels) generator.summary() optimizer = RMSprop(lr=lr*0.5, decay=decay*0.5) discriminator.trainable = False adversarial = Model([inputs, labels], discriminator(generator([inputs, labels])), name=model_name) adversarial.compile(loss=loss, optimizer=optimizer, metrics=['accuracy']) adversarial.summary() models = (generator, discriminator, adversarial) data = (x_train, y_train) params = (batch_size, latent_size, train_steps, num_labels, model_name) train(models, data, params)
def train_discriminator(mixed_inputs, mixed_labels, discriminator, loss_fun, batch_size): ''' Train the discriminator with one batch of mixed real and fake data ''' # discriminator and generator have separate optimizers discriminator_optimizer = torch.optim.Adam(discriminator.parameters(), lr=0.001) mixed_inputs, mixed_labels = Variable(mixed_inputs), Variable(mixed_labels) discriminator_optimizer.zero_grad() outputs = discriminator(mixed_inputs) discriminator_loss = loss_fun(outputs, mixed_labels) accuracy = calculate_accuracy(outputs, mixed_labels, batch_size) discriminator_loss.backward() discriminator_optimizer.step() return accuracy.item()
def build_and_train_models(): (x_train, _), (_, _) = mnist.load_data() image_size = x_train.shape[1] x_train = np.reshape(x_train, [-1, image_size, image_size, 1]) x_train = x_train.astype('float32') / 255 model_name = "wgan_mnist" latent_size = 100 n_critic = 5 clip_value = 0.01 batch_size = 64 lr = 5e-5 train_steps = 20000 input_shape = (image_size, image_size, 1) inputs = Input(shape=input_shape, name='discriminator_input') discriminator = gan.discriminator(inputs, activation='linear') optimizer = RMSprop(lr=lr) discriminator.compile(loss=wasserstein_loss, optimizer=optimizer, metrics=['accuracy']) discriminator.summary() input_shape = (latent_size, ) inputs = Input(shape=input_shape, name='z_input') generator = gan.generator(inputs, image_size) generator.summary() discriminator.trainable = False adversarial = Model(inputs, discriminator(generator(inputs)), name=model_name) adversarial.compile(loss=wasserstein_loss, optimizer=optimizer, metrics=['accuracy']) adversarial.summary() models = (generator, discriminator, adversarial) params = (batch_size, latent_size, n_critic, clip_value, train_steps, model_name) train(models, x_train, params)
def build_and_train(latent_size=100): batch_size = 64 model_name = 'infogan_mnist' train_steps = 40000 lr = 2e-4 decay = 6e-8 (x_train, y_train), (x_test, y_test) = mnist.load_data() image_size = x_train.shape[1] x_train = np.reshape(x_train, [-1, image_size, image_size, 1]) x_train = x_train.astype('float32') / 255 input_shape = [image_size, image_size, 1] num_labels = len(np.unique(y_train)) y_train = to_categorical(y_train) codes_shape = (1,) inputs = Input(shape=input_shape) discriminator = gan.discriminator( inputs, image_size, num_labels=num_labels, num_codes=2) loss = ['binary_crossentropy', 'categorical_crossentropy', mi_loss, mi_loss] optimizer = RMSprop(lr=lr, decay=decay) loss_weights = [1.0, 1.0, 0.5, 0.5] discriminator.compile(loss=loss, loss_weights=loss_weights, optimizer=optimizer, metrics=['accuracy']) discriminator.summary() inputs = Input(shape=(latent_size,)) labels = Input(shape=(num_labels,)) codes1 = Input(shape=codes_shape) codes2 = Input(shape=codes_shape) generator = gan.generator( inputs, image_size, labels=labels, codes=[codes1, codes2]) generator.summary() discriminator.trainable = False optimizer = RMSprop(lr=lr*0.5, decay=decay*0.5) inputs = [inputs, labels, codes1, codes2] adversarial = Model(inputs, discriminator(generator(inputs))) adversarial.compile(loss=loss, loss_weights=loss_weights, optimizer=optimizer, metrics=['accuracy']) adversarial.summary() models = (generator, discriminator, adversarial) data = (x_train, y_train) params = (batch_size, latent_size, train_steps, num_labels, model_name) train(models, params, data)
def build_and_train_models(): (x_train, _), (_, _) = mnist.load_data() image_size = x_train.shape[1] x_train = np.reshape(x_train, [-1, image_size, image_size, 1]) x_train = x_train.astype('float32') / 255 model_name = "lsgan_mnist" latent_size = 100 input_shape = (image_size, image_size, 1) batch_size = 64 lr = 2e-4 decay = 6e-8 train_steps = 20000 inputs = Input(shape=input_shape, name='discriminator_input') discriminator = gan.discriminator(inputs, activation=None) optimizer = RMSprop(lr=lr, decay=decay) discriminator.compile(loss='mse', optimizer=optimizer, metrics=['accuracy']) discriminator.summary() input_shape = (latent_size, ) inputs = Input(shape=input_shape, name='z_input') generator = gan.generator(inputs, image_size) generator.summary() optimizer = RMSprop(lr=lr*0.5, decay=decay*0.5) discriminator.trainable = False adversarial = Model(inputs, discriminator(generator(inputs)), name=model_name) adversarial.compile(loss='mse', optimizer=optimizer, metrics=['accuracy']) adversarial.summary() models = (generator, discriminator, adversarial) params = (batch_size, latent_size, train_steps, model_name) gan.train(models, x_train, params)
def train_generator(batch_size, generator, discriminator, loss_fun): ''' Train the generator on one batch using the discriminator. Generate a batch of fake pictures, classify them with the discriminator and calculate loss based on how sure the discriminator was that the fake data was real. ''' # we want the discriminator to think the fake data is real target_fake_labels = torch.ones(batch_size, 1) # discriminator and generator have separate optimizers generator_optimizer = torch.optim.Adam(generator.parameters(), lr=0.001) # generator's input random_seed = torch.randn(batch_size, generator.input_size) generator_optimizer.zero_grad() # generate a batch of fake data fake_data = generator(random_seed) # see what the discriminator thinks it is predictions = discriminator(fake_data) # we wnat the discriminator to think they're real generator_loss = loss_fun(predictions, target_fake_labels) generator_loss.backward() generator_optimizer.step() return generator_loss.item()
def build_and_train_models(): (x_train, y_train), (x_test, y_test) = mnist.load_data() image_size = x_train.shape[1] x_train = np.reshape(x_train, [-1, image_size, image_size, 1]) x_train = x_train.astype('float32') / 255 x_test = np.reshape(x_test, [-1, image_size, image_size, 1]) x_test = x_test.astype('float32') / 255 num_labels = len(np.unique(y_train)) y_train = to_categorical(y_train) y_test = to_categorical(y_test) model_name = "stackedgan_mnist" batch_size = 64 train_steps = 1000 lr = 2e-4 decay = 6e-8 input_shape = (image_size, image_size, 1) label_shape = (num_labels, ) z_dim = 50 z_shape = (z_dim, ) feature1_dim = 256 feature1_shape = (feature1_dim, ) inputs = Input(shape=input_shape, name='discriminator0_input') dis0 = gan.discriminator(inputs, num_codes=z_dim) optimizer = RMSprop(lr=lr, decay=decay) loss = ['binary_crossentropy', 'mse'] loss_weights = [1.0, 10.0] dis0.compile(loss=loss, loss_weights=loss_weights, optimizer=optimizer, metrics=['accuracy']) dis0.summary() input_shape = (feature1_dim, ) inputs = Input(shape=input_shape, name='discriminator1_input') dis1 = build_discriminator(inputs, z_dim=z_dim) loss = ['binary_crossentropy', 'mse'] loss_weights = [1.0, 1.0] dis1.compile(loss=loss, loss_weights=loss_weights, optimizer=optimizer, metrics=['accuracy']) dis1.summary() feature1 = Input(shape=feature1_shape, name='feature1_input') labels = Input(shape=label_shape, name='labels') z1 = Input(shape=z_shape, name="z1_input") z0 = Input(shape=z_shape, name="z0_input") latent_codes = (labels, z0, z1, feature1) gen0, gen1 = build_generator(latent_codes, image_size) gen0.summary() gen1.summary() input_shape = (image_size, image_size, 1) inputs = Input(shape=input_shape, name='encoder_input') enc0, enc1 = build_encoder((inputs, feature1), num_labels) enc0.summary() enc1.summary() encoder = Model(inputs, enc1(enc0(inputs))) encoder.summary() data = (x_train, y_train), (x_test, y_test) train_encoder(encoder, data, model_name=model_name) optimizer = RMSprop(lr=lr * 0.5, decay=decay * 0.5) enc0.trainable = False dis0.trainable = False gen0_inputs = [feature1, z0] gen0_outputs = gen0(gen0_inputs) adv0_outputs = dis0(gen0_outputs) + [enc0(gen0_outputs)] adv0 = Model(gen0_inputs, adv0_outputs, name="adv0") loss = ['binary_crossentropy', 'mse', 'mse'] loss_weights = [1.0, 10.0, 1.0] adv0.compile(loss=loss, loss_weights=loss_weights, optimizer=optimizer, metrics=['accuracy']) adv0.summary() enc1.trainable = False dis1.trainable = False gen1_inputs = [labels, z1] gen1_outputs = gen1(gen1_inputs) adv1_outputs = dis1(gen1_outputs) + [enc1(gen1_outputs)] adv1 = Model(gen1_inputs, adv1_outputs, name="adv1") loss_weights = [1.0, 1.0, 1.0] loss = ['binary_crossentropy', 'mse', 'categorical_crossentropy'] adv1.compile(loss=loss, loss_weights=loss_weights, optimizer=optimizer, metrics=['accuracy']) adv1.summary() models = (enc0, enc1, gen0, gen1, dis0, dis1, adv0, adv1) params = (batch_size, train_steps, num_labels, z_dim, model_name) train(models, data, params)
def adv_train_step(judge_only=True): discriminator.train() judger.train() # {token_seqs, next_token_seqs, importance_seqs, labels, seq_lengths, pad_length} # Sample m labeled instances from DL lab_batch = next(labeled_train_loader) lab_token_seqs = lab_batch.content[0] lab_seq_lengths = np.array([len(seq) for seq in lab_token_seqs]) labels = lab_batch.label lab_token_seqs = torch.from_numpy(np.transpose(lab_token_seqs.numpy())) labels = torch.from_numpy(np.transpose(labels.numpy())) num_lab_sample = lab_token_seqs.shape[1] # Sample m labeled instances from DU and predict their corresponding label unl_batch = next(unlabeled_train_loader) unl_token_seqs = unl_batch.content[0] unl_seq_lengths = [len(seq) for seq in unl_token_seqs] unl_token_seqs = torch.from_numpy(np.transpose(unl_token_seqs.numpy())) num_unl_sample = unl_token_seqs.shape[1] unl_hidden = discriminator.init_hidden(num_unl_sample) unl_output = discriminator(unl_token_seqs, unl_hidden, unl_seq_lengths) _, fake_labels = torch.max(unl_output, 1) if judge_only: k = 1 else: k = 3 for _k in range(k): # Update the judge model ############################################################################### lab_judge_hidden = judger.init_hidden(num_lab_sample) one_hot_label = one_hot_embedding(labels, args.nclass) # one hot encoder lab_judge_prob = judger(lab_token_seqs, lab_judge_hidden, lab_seq_lengths, one_hot_label) lab_labeled = torch.ones(num_lab_sample) unl_judge_hidden = judger.init_hidden(num_unl_sample) one_hot_unl = one_hot_embedding(fake_labels, args.nclass) # one hot encoder unl_judge_prob = judger(unl_token_seqs, unl_judge_hidden, unl_seq_lengths, one_hot_unl) unl_labeled = torch.zeros(num_unl_sample) if_labeled = torch.cat((lab_labeled, unl_labeled)) all_judge_prob = torch.cat((lab_judge_prob, unl_judge_prob)) all_judge_prob = all_judge_prob.view(-1) judge_loss = criterion_judge(all_judge_prob, if_labeled) judge_optimizer.zero_grad() judge_loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. torch.nn.utils.clip_grad_norm_(judger.parameters(), args.clip) judge_optimizer.step() unl_loss_value = 0.0 lab_loss_value = 0.0 fake_labels = repackage_hidden(fake_labels) unl_judge_prob = repackage_hidden(unl_judge_prob) if not judge_only: # Update the predictor ############################################################################### lab_hidden = discriminator.init_hidden(num_lab_sample) lab_output = discriminator(lab_token_seqs, lab_hidden, lab_seq_lengths) lab_element_loss = criterion(lab_output, labels) lab_loss = torch.mean(lab_element_loss) # calculate loss for unlabeled instances unl_hidden = discriminator.init_hidden(num_unl_sample) unl_output = discriminator(unl_token_seqs, unl_hidden, unl_seq_lengths) unl_element_loss = criterion(unl_output, fake_labels) unl_loss = unl_element_loss.dot( unl_judge_prob.view(-1)) / num_unl_sample # do not include this in version 1 if _k < int(k / 2): lab_unl_loss = lab_loss + unl_loss else: lab_unl_loss = unl_loss dis_optimizer.zero_grad() lab_unl_loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. torch.nn.utils.clip_grad_norm_(discriminator.parameters(), args.clip) dis_optimizer.step() unl_loss_value = unl_loss.item() lab_loss_value = lab_loss.item() return judge_loss, unl_loss_value, lab_loss_value
def discriminator_fn(img, reuse): logits = gan.discriminator(img, reuse) return logits
def build_and_train_models(): # Parameters param = { "Max_A_Size": 10, "Max_B_Size": 10, "Dynamic_Size": False, 'Metod': 'tSNE', "ValidRatio": 0.1, "seed": 180, "dir": "dataset/AAGM/", "Mode": "CNN2", # Mode : CNN_Nature, CNN2 "LoadFromJson": False, "mutual_info": True, # Mean or MI "hyper_opt_evals": 20, "epoch": 150, "No_0_MI": False, # True -> Removing 0 MI Features "autoencoder": False, "cut": None } """Load the dataset, build ACGAN discriminator, generator, and adversarial models. Call the ACGAN train routine. """ images = {} if param['mutual_info']: method = 'MI' else: method = 'Mean' f_myfile = open( param["dir"] + 'train_' + str(param['Max_A_Size']) + 'x' + str(param['Max_B_Size']) + '_' + method + '.pickle', 'rb') images["Xtrain"] = pickle.load(f_myfile) f_myfile.close() f_myfile = open(param["dir"] + 'YTrain.pickle', 'rb') images["Classification"] = pickle.load(f_myfile) f_myfile.close() (x_train, y_train) = np.asarray(images["Xtrain"]), np.asarray( images["Classification"]) print(type(x_train)) # reshape data for CNN as (28, 28, 1) and normalize image_size = x_train.shape[1] x_train = np.reshape(x_train, [-1, image_size, image_size, 1]) x_train = x_train.astype('float32') / 255 # train labels num_labels = len(np.unique(y_train)) y_train = to_categorical(y_train) model_name = "acgan_aagm" # network parameters latent_size = 100 batch_size = 64 train_steps = 40000 lr = 2e-4 decay = 6e-8 input_shape = (image_size, image_size, 1) label_shape = (num_labels, ) # build discriminator Model inputs = Input(shape=input_shape, name='discriminator_input') # call discriminator builder # with 2 outputs, pred source and labels discriminator = gan.discriminator(inputs, num_labels=num_labels) # [1] uses Adam, but discriminator # easily converges with RMSprop optimizer = RMSprop(lr=lr, decay=decay) # 2 loss fuctions: 1) probability image is real # 2) class label of the image loss = ['binary_crossentropy', 'categorical_crossentropy'] discriminator.compile(loss=loss, optimizer=optimizer, metrics=['accuracy']) discriminator.summary() # build generator model input_shape = (latent_size, ) inputs = Input(shape=input_shape, name='z_input') labels = Input(shape=label_shape, name='labels') # call generator builder with input labels generator = gan.generator(inputs, image_size, labels=labels) generator.summary() # build adversarial model = generator + discriminator optimizer = RMSprop(lr=lr * 0.5, decay=decay * 0.5) # freeze the weights of discriminator # during adversarial training discriminator.trainable = False adversarial = Model([inputs, labels], discriminator(generator([inputs, labels])), name=model_name) # same 2 loss fuctions: 1) probability image is real # 2) class label of the image adversarial.compile(loss=loss, optimizer=optimizer, metrics=['accuracy']) #adversarial.summary() # train discriminator and adversarial networks models = (generator, discriminator, adversarial) data = (x_train, y_train) params = (batch_size, latent_size, \ train_steps, num_labels, model_name) print(num_labels) print(x_train.shape) print('here') print(image_size) train(models, data, params)
if len(training_dataloader) == 0: print('Where\'s your data you dweeb') exit() dis_acc_av = float(dis_acc) / len(training_dataloader) gen_loss_av = float(gen_loss) / len(training_dataloader) print('Average discriminator accuracy {} and generator loss {}'.format( dis_acc_av, gen_loss_av)) return training_dataset def test_generator(gen, real_data_shape): fig = plt.figure(figsize=(8, 8)) cols, rows = 4, 4 for i in range(1, cols * rows + 1): # get images from trained generator random_seed = torch.randn(1, gen.input_size) image = gen(random_seed) # reshape back to original form image = image.view(real_data_shape[1], real_data_shape[2]) image = image.detach().numpy() fig.add_subplot(rows, cols, i) plt.imshow(image) plt.show() gen = generator() dis = discriminator() real_image_dataset = train_gan(gen, dis) # generate example images test_generator(gen, real_image_dataset.real_data_shape)
image_label_transform=img_label_transform), batch_size=2, shuffle=True, pin_memory=True) valloader = data.DataLoader(VOCDataSet( "./", split='val', img_transform=val_transform, label_transform=target_transform, image_label_transform=img_label_transform), batch_size=1, shuffle=False, pin_memory=True) schedule = Scheduler(lr=1e-4, total_epoches=4000) D = torch.nn.DataParallel(discriminator(n_filters=32)).cuda() G = torch.nn.DataParallel(generator(n_filters=32)).cuda() gan_loss_percent = 0.03 one = torch.FloatTensor([1]) mone = one * -1 moneg = one * -1 * gan_loss_percent one = one.cuda() mone = mone.cuda() moneg = moneg.cuda() loss_func = BCE_Loss() optimizer_D = Adam(D.parameters(), lr=1e-4, betas=(0.5, 0.9), eps=10e-8) optimizer_G = Adam(G.parameters(), lr=1e-4, betas=(0.5, 0.9), eps=10e-8)