def main(): parser = argparse.ArgumentParser(description="AAE") parser.add_argument("--num_epochs", type=int, default=100) parser.add_argument("--batch_size", type=int, default=64) parser.add_argument("--device", type=str, default="cuda") parser.add_argument("--data_root", type=str, default="./data") parser.add_argument("--data_name", type=str, default="mnist") parser.add_argument("--distribution", type=str, default="gaussian") parser.add_argument("--image_size", type=int, default=32) parser.add_argument("--image_channels", type=int, default=1) parser.add_argument("--latent_dim", type=int, default=2) parser.add_argument("--num_classes", type=int, default=10) opt = parser.parse_args() os.makedirs("./outputs/encode", exist_ok=True) os.makedirs("./outputs/decode", exist_ok=True) os.makedirs("./weights", exist_ok=True) encoder = Encoder(opt.image_size, opt.image_channels, opt.latent_dim).to(opt.device) decoder = Decoder(opt.image_size, opt.image_channels, opt.latent_dim).to(opt.device) discriminator = Discriminator(opt.latent_dim, opt.num_classes, True).to(opt.device) for epoch in range(opt.num_epochs): reconstruct_loss, e_loss, d_loss = train(encoder, decoder, discriminator, opt) print("reconstruct loss: {:.4f} encorder loss: {:.4f} discriminator loss: {:.4f}".format(reconstruct_loss, e_loss, d_loss)) eval_encoder("./outputs/encode/{}.jpg".format(epoch), encoder, opt) eval_decoder("./outputs/decode/{}.jpg".format(epoch), decoder, opt) torch.save(encoder.state_dict(), "./weights/encoder.pth") torch.save(decoder.state_dict(), "./weights/decoder.pth") torch.save(discriminator.state_dict(), "./weights/discriminator.pth")
def __init__(self, x, training=True): self.categories = 10 self.cont_dim = 1 self.regression_dim = 10 + 2 + 2 self.prior_z = tf.random_uniform([args.batch_size, 62], minval=-1.0, maxval=1.0) self.prior_c1 = tf.one_hot( tf.squeeze( tf.random.categorical( tf.log(tf.ones([1, self.categories]) * 0.1), args.batch_size)), self.categories) self.prior_c2 = tf.random.uniform([args.batch_size, self.cont_dim], minval=-1.0, maxval=1.0) self.prior_c3 = tf.random.uniform([args.batch_size, self.cont_dim], minval=-1.0, maxval=1.0) cat_prior = tf.concat( [self.prior_z, self.prior_c1, self.prior_c2, self.prior_c3], axis=-1) self.discriminator = Discriminator() self.decoder = Decoder(62 + 10 + 2) self.train_disc_op, self.train_dec_op = self.build_train_op( cat_prior, x, training) self.build_inference()
def __init__(self, opt): self.opt = opt self.netCE = ContentEncoder(inD=3, contentDim=opt.contentDim, is_norm=opt.normalize).to(device) self.netPE = PoseEncoder(inD=3, poseDim=opt.poseDim, is_norm=opt.normalize).to(device) self.netDE = Decoder(inD=opt.contentDim+opt.poseDim).to(device) self.netSD = SceneDiscriminator(poseDim=opt.poseDim).to(device) # my work self.netRD = RecDiscriminator(inD=6).to(device) self.netCE.weight_init(mean=0, std=0.02) self.netPE.weight_init(mean=0, std=0.02) self.netDE.weight_init(mean=0, std=0.02) self.netRD.weight_init(mean=0, std=0.02) self.optimCE = torch.optim.Adam(self.netCE.parameters(), lr=opt.learningRate, betas=(opt.beta1, 0.999)) self.optimPE = torch.optim.Adam(self.netPE.parameters(), lr=opt.learningRate, betas=(opt.beta1, 0.999)) self.optimDE = torch.optim.Adam(self.netDE.parameters(), lr=opt.learningRate, betas=(opt.beta1, 0.999)) self.optimSD = torch.optim.Adam(self.netSD.parameters(), lr=opt.learningRate, betas=(opt.beta1, 0.999)) self.optimRD = torch.optim.Adam(self.netRD.parameters(), lr=opt.learningRate, betas=(opt.beta1, 0.999)) self.trainDataloader = DataLoader(scenePairs_dataset(opt.dataRoot, opt.epochSize, opt.maxStep), opt.batchSize, num_workers=4) self.valDataloader = DataLoader(scenePairs_dataset(opt.dataRoot, 10, opt.maxStep), opt.batchSize, num_workers=4) self.plotDateloader = DataLoader(plot_dataset(opt.dataVal, 10, 20, delta=4, random_seed=1), 20) self.rec_criterion = nn.MSELoss() self.sim_criterion = nn.MSELoss() self.bce_criterion = nn.BCELoss()
def init_training(opt): # Initialize losses losses = { 'adversarial': torch.nn.BCELoss(), 'pixelwise': torch.nn.L1Loss(), 'action': torch.nn.NLLLoss() } img_shape = (1, opt.img_size, opt.img_size) # Initialize models encoder = Encoder(img_shape, opt.latent_dim) decoder = Decoder(img_shape, opt.latent_dim) discriminator = Discriminator(opt.latent_dim) model = {'enc': encoder, 'dec': decoder, 'discr': discriminator} if opt.domain == 'source': pol = Policy(opt.latent_dim, ac_size=3) model['pol'] = pol if opt.use_dynamics: decoder_next = Decoder(img_shape, opt.latent_dim) model['dec_next'] = decoder_next if opt.domain == 'source': dyn = Dynamics(opt.latent_dim, ac_size=3, ac_embed_size=10) model['dyn'] = dyn # move to GPU if opt.cuda: for loss in losses.values(): loss.cuda() for network in model.values(): network.cuda() # Optimizers G_params = [] for name, network in model.items(): G_params += [network.parameters()] if name != 'discr' else [] G_params = itertools.chain(*G_params) optimizer_G = torch.optim.Adam(G_params, lr=opt.lr, betas=(opt.b1, opt.b2)) optimizer_D = torch.optim.Adam(model['discr'].parameters(), lr=opt.lr, betas=(opt.b1, opt.b2)) # metrics metrics_dict = { 'adv_losses': [], 'pix_losses': [], 'ac_losses': [], 'g_losses': [], 'd_losses': [], 'rf_z_sep_accs': [], 'pol_accs': [] } if opt.use_dynamics: metrics_dict['pix_next_losses'] = [] return model, losses, optimizer_G, optimizer_D, metrics_dict
def inference(from_file_path, args): with tf.Graph().as_default(), tf.Session() as sess: alpha = args[0] encoder = Encoder() decoder = Decoder() content_input = tf.placeholder(tf.float32, shape=(1, None, None, 3), name='content_input') style_input = tf.placeholder(tf.float32, shape=(1, None, None, 3), name='style_input') # switch RGB to BGR content = tf.reverse(content_input, axis=[-1]) style = tf.reverse(style_input, axis=[-1]) # preprocess image content = encoder.preprocess(content) style = encoder.preprocess(style) # encode image # we should initial global variables before restore model enc_c_net = encoder.encode(content, 'content/') enc_s_net = encoder.encode(style, 'style/') # pass the encoded images to AdaIN target_features = AdaIN(enc_c_net.outputs, enc_s_net.outputs, alpha=alpha) # decode target features back to image dec_net = decoder.decode(target_features, prefix="decoder/") generated_img = dec_net.outputs # deprocess image generated_img = encoder.deprocess(generated_img) # switch BGR back to RGB generated_img = tf.reverse(generated_img, axis=[-1]) # clip to 0..255 generated_img = tf.clip_by_value(generated_img, 0.0, 255.0) sess.run(tf.global_variables_initializer()) encoder.restore_model(sess, ENCODER_PATH, enc_c_net) encoder.restore_model(sess, ENCODER_PATH, enc_s_net) decoder.restore_model(sess, DECODER_PATH, dec_net) model_args = (sess, generated_img, content_input, style_input) if from_file_path: run_from_file_paths(model_args, args) else: return run_from_layers(model_args, args)
def build_network(self): print('[info] Build the network architecture') self.encoder = Encoder(z_dim=self.opt.latent_dim) if self.opt.dataset == 'SMPL': num_verts = 6890 elif self.opt.dataset == 'all_animals': num_verts = 3889 self.decoder = Decoder(num_verts=num_verts, z_dim=self.opt.latent_dim) self.discriminator = Discriminator(input_dim=self.opt.latent_dim) self.encoder.cuda() self.decoder.cuda() self.discriminator.cuda()
def initEncoderDecoder(self): if self.opt.dataset == 'SMPL': num_verts = 6890 elif self.opt.dataset == 'all_animals': num_verts = 3889 encoder = Encoder() decoder = Decoder(num_verts=num_verts) encoder.load_state_dict(torch.load(self.encoder_weights)) decoder.load_state_dict(torch.load(self.decoder_weights)) self.encoder = encoder.eval() self.decoder = decoder.eval()
def init_session_handler(self): self.sess = tf.Session() encoder = Encoder() decoder = Decoder() self.content_input = tf.placeholder(tf.float32, shape=(1, None, None, 3), name='content_input') self.style_input = tf.placeholder(tf.float32, shape=(1, None, None, 3), name='style_input') # switch RGB to BGR content = tf.reverse(self.content_input, axis=[-1]) style = tf.reverse(self.style_input, axis=[-1]) # preprocess image content = encoder.preprocess(content) style = encoder.preprocess(style) # encode image # we should initial global variables before restore model enc_c_net = encoder.encode(content, 'content/') enc_s_net = encoder.encode(style, 'style/') # pass the encoded images to AdaIN target_features = transfer_util.AdaIN(enc_c_net.outputs, enc_s_net.outputs, alpha=alpha) # decode target features back to image dec_net = decoder.decode(target_features, prefix="decoder/") self.generated_img = dec_net.outputs # deprocess image self.generated_img = encoder.deprocess(self.generated_img) # switch BGR back to RGB self.generated_img = tf.reverse(self.generated_img, axis=[-1]) # clip to 0..255 self.generated_img = tf.clip_by_value(self.generated_img, 0.0, 255.0) self.sess.run(tf.global_variables_initializer()) # sess.run(tf.global_variables_initializer()) encoder.restore_model(self.sess, self.encode_path, enc_c_net) encoder.restore_model(self.sess, self.encode_path, enc_s_net) decoder.restore_model(self.sess, self.decode_path, dec_net)
def bulid_model(vocab_dicts): logger.info(' * maximum batch size. %d' % opt.batch_size) logger.info('Building model...') encoder = Encoder(opt, vocab_dicts['src']) decoder = Decoder(opt, vocab_dicts['tgt']) if opt.share_embedding: decoder.word_lut = encoder.word_lut decIniter = DecInit(opt) model = NMTModel(encoder, decoder, decIniter) if opt.pointer_gen: generator = Generator(opt, vocab_dicts) # TODO 考虑加dropout else: generator = nn.Sequential( nn.Linear(opt.dec_rnn_size // opt.maxout_pool_size, vocab_dicts['tgt'].size()), # nn.Linear(opt.word_vec_size, dicts['tgt'].size()), # transformer nn.LogSoftmax(dim=-1) ) if len(opt.gpus) >= 1: model.cuda() generator.cuda() else: model.cpu() generator.cpu() model.generator = generator logger.info("model.encoder.word_lut: {}".format(id(model.encoder.word_lut))) logger.info("model.decoder.word_lut: {}".format(id(model.decoder.word_lut))) logger.info("embedding share: {}".format(model.encoder.word_lut is model.decoder.word_lut)) logger.info(repr(model)) param_count = sum([param.view(-1).size()[0] for param in model.parameters()]) logger.info('total number of parameters: %d\n\n' % param_count) init_params(model) optim = build_optim(model) # # 断点重连 # logger.info(opt.checkpoint_file) # # if opt.checkpoint_file is not None: # logger.info("load {}".format(opt.checkpoint_file)) # checkpoint = torch.load(opt.checkpoint_file) # for k, v in checkpoint['generator'].items(): # checkpoint['model']["generator."+k] = v # model.load_state_dict(checkpoint['model']) # optim = checkpoint['optim'] # opt.start_epoch += checkpoint['epoch'] return model, optim
def init_fit(self, X1_train, X2_train, y_train, X1_val, X2_val, y_val, args, ): self.train_loader = get_dataloader (X1_train, X2_train, y_train, args.batch_size) self.test_loader = get_dataloader(X1_val, X2_val, y_val, args.batch_size) self.predictor = Decoder( latent_size=X1_train.shape[1], layer_sizes=[X2_train.shape[1]], activation=args.activation, batch_norm= args.batch_norm, dropout=args.dropout, mlp_type=self.mlp_type, conditional=args.conditional, num_labels=10 if args.conditional else 0).to(self.device) self.optimizer = torch.optim.Adam(self.predictor.parameters(), lr=args.learning_rate) self.scheduler = torch.optim.lr_scheduler.ExponentialLR(self.optimizer, gamma=0.8)
def __init__(self, z_dim=32, h_dim=128, filter_num=64, channel_num=3, lr=1e-3, cuda=False): # Are we cuda'ing it self.cuda = cuda # Encoder, decoder, discriminator self.encoder = self.cudafy_( Encoder(z_dim, h_dim=h_dim, filter_num=filter_num, channel_num=channel_num)) self.encoder.apply(weight_init) self.decoder = self.cudafy_( Decoder(z_dim, filter_num=filter_num, channel_num=channel_num)) self.decoder.apply(weight_init) self.discrim = self.cudafy_(Discriminator(z_dim)) self.discrim.apply(weight_init) # Optimizers generator_params = list(self.encoder.parameters()) + \ list(self.decoder.parameters()) self.optim_enc = optim.Adam(self.encoder.parameters(), lr=lr) self.optim_dec = optim.Adam(self.decoder.parameters(), lr=lr) self.optim_dis = optim.Adam(self.discrim.parameters(), lr=lr) self.optim_gen = optim.Adam(generator_params, lr=lr) self.start_epoch = 0
def captioning(image_path): loader = data_loader( features_shape=2048, attention_features_shape=64, batch_size=256, buffer_size=1000, top_k=5000 ) ## loadm odel and checkpoint embedding_matrix = np.load("./content/drive/My Drive/datasets/embeddingmatrix.npy") encoder = Encoder(200) decoder = Decoder(embedding_dim=200, vocab_size=loader.top_k + 1, units=512, embedding_matrix = embedding_matrix) optimizer = tf.keras.optimizers.Adam() checkpoint_path = "./content/drive/My Drive/datasets/modelcheckpoint/embedding" ckpt = tf.train.Checkpoint(encoder=encoder, decoder=decoder, optimizer=optimizer) ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_path, max_to_keep=3) if ckpt_manager.latest_checkpoint: ckpt.restore(ckpt_manager.latest_checkpoint) ## inference time result, _ = evaluate( encoder, decoder, loader.tokenizer, loader.max_length, loader.attention_features_shape, image_path ) result = " ".join(result) return result
def evaluate(path): with open('tokenizer.pickle', 'rb') as handle: tokenizer = pickle.load(handle) vocab_size = len(tokenizer.word_index) + 1 encoder = Encoder(config.embedding_dim) decoder = Decoder(config.units, config.embedding_dim, vocab_size) checkpoint_dir = './checkpoints' checkpoint = tf.train.Checkpoint(encoder=encoder, decoder=decoder) checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir)) image = load_image(path) encoder_outputs = encoder(tf.expand_dims(image, 0)) dec_state = tf.zeros((1, config.units)) dec_input = tf.expand_dims([tokenizer.word_index['<start>']], 0) result = result = beam_search(config.beam_width, decoder, dec_input, dec_state, encoder_outputs, tokenizer.word_index['<end>'], vocab_size) result = tokenizer.sequences_to_texts([result]) print(result)
def main(test_data_file, checkpoint_dir, training_info_file, beam_width, sample_content, cpd_model_file, print_utt): training_info = helpers.load_from_pickle(training_info_file) encoder = Encoder( len(training_info['mr_word2idx']) + 1, training_info['embedding_dim'], training_info['units']) decoder = Decoder(len(training_info['ref_word2idx']) + 1, training_info['embedding_dim'], training_info['units'] * 2, training=False) optimizer = tf.keras.optimizers.Adam() checkpoint = tf.train.Checkpoint(optimizer=optimizer, encoder=encoder, decoder=decoder) print('Restoring checkpoint from', checkpoint_dir) checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir)) # get test data test_data = load_text_data(test_data_file, 2000) if print_utt: print_generations(test_data, encoder, decoder, training_info, beam_width, sample_content, cpd_model_file) bleu_mean, bleu_var = calculate_mean_bleu_score(test_data, encoder, decoder, training_info, beam_width, sample_content, cpd_model_file) print(bleu_mean, bleu_var)
def get_eval_model(load, sep, resize): e1 = E1(sep, int((resize / 64))) e2 = E2(sep, int((resize / 64))) decoder = Decoder(int((resize / 64))) if torch.cuda.is_available(): e1 = e1.cuda() e2 = e2.cuda() decoder = decoder.cuda() _iter = load_model_for_eval(load, e1, e2, decoder) e1 = e1.eval() e2 = e2.eval() decoder = decoder.eval() return e1, e2, decoder
def main(): train_iterator, valid_iterator, test_iterator, params = prepare_data() (INPUT_DIM, OUTPUT_DIM, ENC_EMB_DIM, DEC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, ENC_DROPOUT, DEC_DROPOUT) = params # INPUT_DIM = len(SRC.vocab), 7855 # OUTPUT_DIM = len(TRG.vocab), 5893 # ENC_EMB_DIM = 256 # DEC_EMB_DIM = 256 # ENC_HID_DIM = 512 # DEC_HID_DIM = 512 # ENC_DROPOUT = 0.5 # DEC_DROPOUT = 0.5 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') attn = Attention(ENC_HID_DIM, DEC_HID_DIM) enc = Encoder(INPUT_DIM, ENC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, ENC_DROPOUT) dec = Decoder(OUTPUT_DIM, DEC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, DEC_DROPOUT, attn) model = Seq2Seq(enc, dec, device).to(device) model.apply(init_weights) print(f'The model has {count_parameters(model):,} trainable parameters') for i, batch in enumerate(train_iterator): print(f'ITER: {i}') example = batch print("Input Length:", example.src.shape, "[src_len, batch_size]") output = model.forward(example.src, example.trg) print(output.shape) print('') if i > 3: break
def evaluate(text): with open('input_tokenizer.pickle', 'rb') as handle: input_tokenizer = pickle.load(handle) with open('output_tokenizer.pickle', 'rb') as handle: output_tokenizer = pickle.load(handle) input_vocab_size = len(input_tokenizer.word_index) + 1 output_vocab_size = len(output_tokenizer.word_index) + 1 text = preprocess_text(text) seq = input_tokenizer.texts_to_sequences([text]) inputs = tf.keras.preprocessing.sequence.pad_sequences(seq, truncating='post', padding='post') inputs = tf.convert_to_tensor(inputs) result = "" encoder = Encoder(input_vocab_size, constants.embedding_dim, constants.units, constants.BATCH_SIZE) decoder = Decoder(output_vocab_size, constants.embedding_dim, constants.units, constants.BATCH_SIZE) checkpoint_dir = './checkpoints' checkpoint = tf.train.Checkpoint(encoder=encoder, decoder=decoder) checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir)) enc_outputs, enc_hidden = encoder(inputs) dec_hidden = enc_hidden dec_input = tf.expand_dims([output_tokenizer.word_index['<start>']], 0) result = beam_search(constants.beam_width, decoder, dec_input, dec_hidden, enc_outputs, output_tokenizer.word_index['<end>'], output_vocab_size) result = output_tokenizer.sequences_to_texts([result]) print(result[0])
def __init__(self, **kwargs): super().__init__() self.save_hyperparameters() self.encoder = Encoder() self.decoder = Decoder() self.loss_func = nn.MSELoss()
def load(): encoder_net = Encoder(vocab_enc, 150, 200, 1, 0.3).to("cpu") decoder_net = Decoder( vocab_dec, 150, 200, vocab_dec, 1, 0.3, ).to("cpu") encoder_net.state_dict( torch.load("/home/aradhya/Desktop/hacks/model_for_faq_encoder.pt")) decoder_net.state_dict( torch.load("/home/aradhya/Desktop/hacks/model_for_faq_decoder.pt")) return encoder_net, decoder_net
def __init__(self): super(VAE, self).__init__() self.E, self.D = Encoder(), Decoder() self.memory = [] self.memory_num = 0 self.opt = torch.optim.Adam(self.parameters(), lr = VAE_LEARNING_RATE)
def __init__(self, config: Config): super().__init__() self.encoder = Encoder() self.decoder = Decoder() self.loss_func = nn.MSELoss() self.config: Config = config
def __init__(self, input_size, latent_size, rate, discrim_weight=1, gen_weight=1, decode_weight=1): ''' Constructor for an AAE Keyword Args: input_dimensions - the default image size is 28x28 ''' super().__init__() # hyperparameters self.rate = rate self.input_size = input_size self.latent_size = latent_size # separate networks in model self.EncoderGenerator = EncoderGenerator(self.input_size, self.latent_size, self.rate, gen_weight, 0.0001) self.Decoder = Decoder(self.input_size, self.latent_size, self.rate, decode_weight, 0.0001) self.Discriminator = Discrminator(self.latent_size, self.rate, discrim_weight, 0.01) # for multiclass case self.Discriminator_Multiclass = Discriminator_Multiclass( self.latent_size, self.rate, discrim_weight, 0.01) # creating autoencoder and adversarial learning AE_input = Input(shape=(self.input_size, ), name="AE_input") latent_data, reconstructed_input = self.call(AE_input) self.Autoencoder = Model(AE_input, reconstructed_input) # compiling models self.Autoencoder.compile(loss="mse", optimizer=self.EncoderGenerator.optimizer) # self.save_model(self.EncoderGenerator.layer_1, "saved_models/aae_computationmodel") self.threshold = None
def get_models(args): if args.atten_mode in ["seq", "both"]: encoder_output_size = args.d_hidden * 2 + args.d_rel_embed elif args.atten_mode in ["BiLSTM", "BiGRU"]: encoder_output_size = args.d_hidden else: encoder_output_size = args.d_hidden + args.d_rel_embed if args.atten_mode == "BiLSTM": logging.info("using BiLSTM") encoder = BiLSTMEncoder(wsize=len(word_vocab), word_dim=args.d_word_embed, rsize=len(rel_vocab), rel_dim=args.d_rel_embed, ssize=len(ent_vocab), ent_dim=args.d_rel_embed, output_size=encoder_output_size, config=args, device=device, mode=args.atten_mode) elif args.atten_mode == "BiGRU": logging.info("using BiGRU") encoder = BiGRUEncoder(wsize=len(word_vocab), word_dim=args.d_word_embed, rsize=len(rel_vocab), rel_dim=args.d_rel_embed, ssize=len(ent_vocab), ent_dim=args.d_rel_embed, output_size=encoder_output_size, config=args, device=device, mode=args.atten_mode) elif args.atten_mode == "arsmcnn": args.n_cells = args.num_layers if args.birnn: args.n_cells *= 2 rel_div_vocab = torch.load("../../data/vocab/vocab.rel_div.pt") encoder = RelationRanking(word_vocab, rel_div_vocab, args) else: encoder = AttenEncoder(wsize=len(word_vocab), word_dim=args.d_word_embed, rsize=len(rel_vocab), rel_dim=args.d_rel_embed, ssize=len(ent_vocab), ent_dim=args.d_rel_embed, output_size=encoder_output_size, config=args, device=device, mode=args.atten_mode) decoder = Decoder(input_size=args.d_rel_embed, hidden_size=encoder_output_size, output_size=len(rel_vocab)) return encoder, decoder
def create_model_decoder(arg, devices_list, eval=False): from models import Decoder resume_dataset = arg.eval_dataset_decoder if eval else arg.dataset resume_split = arg.eval_split_decoder if eval else arg.split resume_epoch = arg.eval_epoch_decoder if eval else arg.resume_epoch decoder = Decoder() if resume_epoch > 0: load_path = arg.resume_folder + 'decoder_' + resume_dataset + '_' + resume_split + '_' + str( resume_epoch) + '.pth' print('Loading decoder from ' + load_path) decoder = load_weights(decoder, load_path, devices_list[0]) else: init_weights(decoder) if arg.cuda: decoder = decoder.cuda(device=devices_list[0]) return decoder
def test(args): # ---------- load model_real_cartoon ---------- # rc_e1 = E1(args.sep, int((args.resize / 64))) rc_e2 = E2(args.sep, int((args.resize / 64))) rc_decoder = Decoder(int((args.resize / 64))) if torch.cuda.is_available(): rc_e1 = rc_e1.cuda() rc_e2 = rc_e2.cuda() rc_decoder = rc_decoder.cuda() if args.load_rc != '': save_file = os.path.join(args.load_rc) load_model_for_eval(save_file, rc_e1, rc_e2, rc_decoder) rc_e1 = rc_e1.eval() rc_e2 = rc_e2.eval() rc_decoder = rc_decoder.eval() # ---------- load model_cartoon ---------- # c_e1 = E1(args.sep, int((args.resize / 64))) c_e2 = E2(args.sep, int((args.resize / 64))) c_decoder = Decoder(int((args.resize / 64))) if torch.cuda.is_available(): c_e1 = c_e1.cuda() c_e2 = c_e2.cuda() c_decoder = c_decoder.cuda() if args.load_c != '': save_file = os.path.join(args.load_c) load_model_for_eval(save_file, c_e1, c_e2, c_decoder) c_e1 = c_e1.eval() c_e2 = c_e2.eval() c_decoder = c_decoder.eval() # -------------- running -------------- # if not os.path.exists(args.out) and args.out != "": os.mkdir(args.out) # trans(args, rc_e1, rc_e2, rc_decoder, c_e1, c_e2, c_decoder) test_domA_cluster, test_domB_cluster = my_get_test_imgs(args) for idx, (test_domA, test_domB) in enumerate( list(zip(test_domA_cluster, test_domB_cluster))): trans(args, idx, test_domA, test_domB, rc_e1, rc_e2, rc_decoder, c_e1, c_e2, c_decoder)
def init_model(args): vocab = torch.load(args.vocab) cnn_encoder = CNNEncoder(len(vocab.src), args.embed_size) encoder = Encoder(cnn_encoder.out_size, args.hidden_size) devoder = Decoder(args.embed_size, args.hidden_size, len(vocab.tgt)) model = Seq2Seq(cnn_encoder, encoder, devoder, args, vocab) model.load_state_dict(torch.load(args.load_model_path)) model.eval() return vocab, model
def __init__(self, x_dim, y_dim, r_dim, z_dim, h_dim): super(NeuralProcess, self).__init__() self.x_dim = x_dim self.y_dim = y_dim self.r_dim = r_dim self.z_dim = z_dim self.h_dim = h_dim # Initialize networks self.xy_to_r = Encoder(x_dim, y_dim, h_dim, r_dim) self.r_to_mu_sigma = MuSigmaEncoder(r_dim, z_dim) self.xz_to_y = Decoder(x_dim, z_dim, h_dim, y_dim)
def main(args): if args.data == 'MNIST': data_path = '/home/szchen/Datasets/' input_dim = 28 * 28 transform = transforms.Compose([transforms.ToTensor()]) mnist = torchvision.datasets.MNIST(data_path, download=False, transform=transform, train=True) dataloader = torch.utils.data.DataLoader(mnist, batch_size=args.batch_size, shuffle=True) encoder = Encoder(input_dim=input_dim, args=args) decoder = Decoder(output_dim=input_dim, args=args) model = VAE(encoder=encoder, decoder=decoder, args=args).cuda() optimizer = optim.Adam(model.parameters(), lr=args.lr) total_loss = [] for epoch in tqdm(range(args.max_epoch)): epoch_loss = [] for input_data, label in dataloader: input_data = Variable(input_data.view(-1, input_dim)).cuda() predict_, z_mean, z_log_var = model(input_data) optimizer.zero_grad() loss = cal_loss(predict_, input_data, z_mean, z_log_var, args) epoch_loss.append(loss.cpu().data) loss.backward() optimizer.step() total_loss.append(np.mean(epoch_loss)) if args.save_fig != None and (epoch + 1) % args.save_fig == 0: test_image = model.inference(16) test_image = test_image.view(-1, 28, 28).detach().cpu().numpy() utils.save_image(test_image, 'Epoch:{}.png'.format(epoch)) if args.save_paras: if not os.path.exists('./param'): os.mkdir('./param') torch.save(model.state_dict(), './param/parameters.pt') utils.draw_loss_curve(total_loss)
def get_models(): from models import Encoder, Decoder from model_config import Config print('Initializing configuration...') config = Config() print('Initializing models...') encoder = Encoder(encoder_name=config.ENCODER_NAME, show_feature_dims=True) decoder = Decoder(encoder_dim=encoder.encoder_dim, decoder_dim=config.decoder_dim, attention_dim=config.attention_dim, action_dim=config.action_dim, num_loc=encoder.num_loc, y_keys_info=config.y_keys_info, num_layers=config.num_layers, dropout_prob=config.dropout_prob) encoder.cuda() decoder.cuda() params_list = os.listdir(config.params_dir) states = load_lastest_states(config.params_dir, params_list) encoder.load_state_dict(states['encoder']) decoder.load_state_dict(states['decoder']) return encoder, decoder, config.init_y
def init_encoders(self): """ Override to add your own encoders """ encoder_q = Encoder(input_dim=self.hparams.input_dim, hidden_dim=self.hparams.hidden_dim, bidirectional=self.hparams.bidirectional, embedding=self.hparams.input_embedding, cell=self.hparams.cell, num_layers=self.hparams.num_layers) encoder_k = Encoder(input_dim=self.hparams.input_dim, hidden_dim=self.hparams.hidden_dim, bidirectional=self.hparams.bidirectional, embedding=self.hparams.input_embedding, cell=self.hparams.cell, num_layers=self.hparams.num_layers) decoder_q = Decoder(input_dim=self.hparams.hidden_dim, hidden_dim=self.hparams.hidden_dim, output_dim=self.hparams.input_dim, bidirectional=self.hparams.bidirectional, cell=self.hparams.cell, num_layers=self.hparams.num_layers) decoder_k = Decoder(input_dim=self.hparams.hidden_dim, hidden_dim=self.hparams.hidden_dim, output_dim=self.hparams.input_dim, bidirectional=self.hparams.bidirectional, cell=self.hparams.cell, num_layers=self.hparams.num_layers) for param in list(encoder_q.parameters()) + list( decoder_k.parameters()): if param.dim() == 2: nn.init.xavier_uniform_(param) return encoder_q, encoder_k, decoder_q, decoder_k