Beispiel #1
0
regulationMatrix = readLTMGnonsparse(args.LTMGDir, ltmgFile)
regulationMatrix = torch.from_numpy(regulationMatrix)
if args.precisionModel == 'Double':
    regulationMatrix = regulationMatrix.type(torch.DoubleTensor)
elif args.precisionModel == 'Float':
    regulationMatrix = regulationMatrix.type(torch.FloatTensor)

# Original
if args.model == 'VAE':
    # model = VAE(dim=scData.features.shape[1]).to(device)
    model = VAE2d(dim=scData.features.shape[1]).to(device)
elif args.model == 'AE':
    model = AE(dim=scData.features.shape[1]).to(device)
if args.precisionModel == 'Double':
    model = model.double()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

# Benchmark
bench_pd = pd.read_csv(args.benchmark, index_col=0)
# t1=pd.read_csv('/home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv',index_col=0)
bench_celltype = bench_pd.iloc[:, 0].to_numpy()

# whether to output debuginfo in running time and memory consumption


def debuginfoStr(info):
    if args.debuginfo:
        print('---' +
              str(datetime.timedelta(seconds=int(time.time() - start_time))) +
              '---' + info)
        mem = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
Beispiel #2
0
torch.cuda.empty_cache()
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True

BATCH = 64
LR = 2e-5
EPOCH = 50

if __name__ == "__main__":
    train_x = np.load(sys.argv[1])
    train_x = preprocess(train_x)
    train_dataset = ImageDataset(train_x)

    model = AE().cuda()
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=LR)

    train_dataloader = DataLoader(train_dataset,
                                  batch_size=BATCH,
                                  shuffle=True)

    for epoch in range(EPOCH):
        model.train()
        for data in train_dataloader:
            x = data.cuda()

            latents, reconst_x = model(x)
            loss = criterion(reconst_x, x)

            optimizer.zero_grad()
            loss.backward()
Beispiel #3
0
from model import AE
import sys

npy_path = sys.argv[1]
model_path = sys.argv[2]

trainX = np.load(npy_path)

trainX_preprocessed, trainX_preprocessed2 = preprocess(trainX)
img_dataset = Image_Dataset2(trainX_preprocessed)

same_seeds(0)

model = AE().cuda()
criterion = nn.MSELoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5, weight_decay=1e-5)
#optimizer = torch.optim.SGD(model.parameters(), lr=1e-3, weight_decay=1e-5, momentum=0.9)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min') # adjust lr

model.train()
n_epoch = 300


# 準備 dataloader, model, loss criterion 和 optimizer
img_dataloader = DataLoader(img_dataset, batch_size=64, shuffle=True)


# 主要的訓練過程
for epoch in range(n_epoch):
    for data in img_dataloader:
        img = data
Beispiel #4
0
save_dir = './save'
if not os.path.exists(save_dir):
    os.makedirs(save_dir)

#Log directory
log_dir = './log'
if not os.path.exists(log_dir):
    os.makedirs(log_dir)
log_file = open(os.path.join(log_dir, 'loss.txt'), 'w')

#Define model's training parameters
lr = 0.0005
num_epochs = 5
ae = AE()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(ae.parameters(), lr=lr, weight_decay=1e-5)


#Validation Loss
def eval_loss():
    val_loss = []
    for batch_id, (x, label) in enumerate(train_loader):
        enc, dec = ae(x)
        loss = criterion(dec, x).item()
        val_loss.append(loss)
        avg_loss = sum(val_loss) / len(val_loss)
        return (avg_loss)


#Train the model
for epoch in range(num_epochs):
Beispiel #5
0
class BiAAE(object):
    def __init__(self, params):

        self.params = params
        self.tune_dir = "{}/{}-{}/{}".format(params.exp_id, params.src_lang,
                                             params.tgt_lang,
                                             params.norm_embeddings)
        self.tune_best_dir = "{}/best".format(self.tune_dir)

        self.X_AE = AE(params)
        self.Y_AE = AE(params)
        self.D_X = Discriminator(input_size=params.d_input_size,
                                 hidden_size=params.d_hidden_size,
                                 output_size=params.d_output_size)
        self.D_Y = Discriminator(input_size=params.d_input_size,
                                 hidden_size=params.d_hidden_size,
                                 output_size=params.d_output_size)

        self.nets = [self.X_AE, self.Y_AE, self.D_X, self.D_Y]
        self.loss_fn = torch.nn.BCELoss()
        self.loss_fn2 = torch.nn.CosineSimilarity(dim=1, eps=1e-6)

    def weights_init(self, m):  # 正交初始化
        if isinstance(m, torch.nn.Linear):
            torch.nn.init.orthogonal(m.weight)
            if m.bias is not None:
                torch.nn.init.constant(m.bias, 0.01)

    def weights_init2(self, m):  # xavier_normal 初始化
        if isinstance(m, torch.nn.Linear):
            torch.nn.init.xavier_normal(m.weight)
            if m.bias is not None:
                torch.nn.init.constant(m.bias, 0.01)

    def weights_init3(self, m):  # 单位阵初始化
        if isinstance(m, torch.nn.Linear):
            m.weight.data.copy_(
                torch.diag(torch.ones(self.params.g_input_size)))

    def freeze(self, m):
        for p in m.parameters():
            p.requires_grad = False

    def defreeze(self, m):
        for p in m.parameters():
            p.requires_grad = True

    def init_state(self, seed=-1):
        if torch.cuda.is_available():
            # Move the network and the optimizer to the GPU
            for net in self.nets:
                net.cuda()
            self.loss_fn = self.loss_fn.cuda()
            self.loss_fn2 = self.loss_fn2.cuda()

        print('Init3 the model...')
        self.X_AE.apply(self.weights_init)  # 可更改G初始化方式
        self.Y_AE.apply(self.weights_init)  # 可更改G初始化方式

        self.D_X.apply(self.weights_init2)
        #print(self.D_X.map1.weight)
        self.D_Y.apply(self.weights_init2)

    def train(self, src_dico, tgt_dico, src_emb, tgt_emb, seed):
        # Load data
        if not os.path.exists(self.params.data_dir):
            print("Data path doesn't exists: %s" % self.params.data_dir)
        if not os.path.exists(self.tune_dir):
            os.makedirs(self.tune_dir)
        if not os.path.exists(self.tune_best_dir):
            os.makedirs(self.tune_best_dir)

        src_word2id = src_dico[1]
        tgt_word2id = tgt_dico[1]
        en = src_emb
        it = tgt_emb

        #eval = Evaluator(self.params, en,it, torch.cuda.is_available())

        AE_optimizer = optim.SGD(filter(
            lambda p: p.requires_grad,
            list(self.X_AE.parameters()) + list(self.Y_AE.parameters())),
                                 lr=self.params.g_learning_rate)
        D_optimizer = optim.SGD(list(self.D_X.parameters()) +
                                list(self.D_Y.parameters()),
                                lr=self.params.d_learning_rate)

        D_A_acc_epochs = []
        D_B_acc_epochs = []
        D_A_loss_epochs = []
        D_B_loss_epochs = []
        d_loss_epochs = []
        G_AB_loss_epochs = []
        G_BA_loss_epochs = []
        G_AB_recon_epochs = []
        G_BA_recon_epochs = []
        g_loss_epochs = []
        L_Z_loss_epoches = []

        acc_epochs = []

        criterion_epochs = []
        best_valid_metric = -100

        try:
            for epoch in range(self.params.num_epochs):
                D_A_losses = []
                D_B_losses = []
                G_AB_losses = []
                G_AB_recon = []
                G_BA_losses = []
                G_adv_losses = []
                G_BA_recon = []
                L_Z_losses = []
                d_losses = []
                g_losses = []
                hit_A = 0
                hit_B = 0
                total = 0
                start_time = timer()
                # lowest_loss = 1e5
                label_D = to_variable(
                    torch.FloatTensor(2 * self.params.mini_batch_size).zero_())
                label_D[:self.params.
                        mini_batch_size] = 1 - self.params.smoothing
                label_D[self.params.mini_batch_size:] = self.params.smoothing

                label_G = to_variable(
                    torch.FloatTensor(self.params.mini_batch_size).zero_())
                label_G = label_G + 1 - self.params.smoothing

                for mini_batch in range(
                        0, self.params.iters_in_epoch //
                        self.params.mini_batch_size):
                    for d_index in range(self.params.d_steps):
                        D_optimizer.zero_grad()  # Reset the gradients
                        self.D_X.train()
                        self.D_Y.train()

                        view_X, view_Y = self.get_batch_data_fast(en, it)

                        # Discriminator X
                        Y_Z = self.Y_AE.encode(view_Y).detach()
                        fake_X = self.X_AE.decode(Y_Z).detach()
                        input = torch.cat([view_X, fake_X], 0)

                        pred_A = self.D_X(input)
                        D_A_loss = self.loss_fn(pred_A, label_D)

                        # Discriminator Y
                        X_Z = self.X_AE.encode(view_X).detach()
                        fake_Y = self.Y_AE.decode(X_Z).detach()

                        input = torch.cat([view_Y, fake_Y], 0)
                        pred_B = self.D_Y(input)
                        D_B_loss = self.loss_fn(pred_B, label_D)

                        D_loss = D_A_loss + self.params.gate * D_B_loss

                        D_loss.backward(
                        )  # compute/store gradients, but don't change params
                        d_losses.append(to_numpy(D_loss.data))
                        D_A_losses.append(to_numpy(D_A_loss.data))
                        D_B_losses.append(to_numpy(D_B_loss.data))

                        discriminator_decision_A = to_numpy(pred_A.data)
                        hit_A += np.sum(
                            discriminator_decision_A[:self.params.
                                                     mini_batch_size] >= 0.5)
                        hit_A += np.sum(
                            discriminator_decision_A[self.params.
                                                     mini_batch_size:] < 0.5)

                        discriminator_decision_B = to_numpy(pred_B.data)
                        hit_B += np.sum(
                            discriminator_decision_B[:self.params.
                                                     mini_batch_size] >= 0.5)
                        hit_B += np.sum(
                            discriminator_decision_B[self.params.
                                                     mini_batch_size:] < 0.5)

                        D_optimizer.step(
                        )  # Only optimizes D's parameters; changes based on stored gradients from backward()

                        # Clip weights
                        #_clip(self.D_X, self.params.clip_value)
                        #_clip(self.D_Y, self.params.clip_value)

                        sys.stdout.write(
                            "[%d/%d] :: Discriminator Loss: %.3f \r" %
                            (mini_batch, self.params.iters_in_epoch //
                             self.params.mini_batch_size,
                             np.asscalar(np.mean(d_losses))))
                        sys.stdout.flush()

                    total += 2 * self.params.mini_batch_size * self.params.d_steps

                    for g_index in range(self.params.g_steps):
                        # 2. Train G on D's response (but DO NOT train D on these labels)
                        AE_optimizer.zero_grad()
                        self.D_X.eval()
                        self.D_Y.eval()
                        view_X, view_Y = self.get_batch_data_fast(en, it)

                        # Generator X_AE
                        ## adversarial loss
                        X_Z = self.X_AE.encode(view_X)
                        X_recon = self.X_AE.decode(X_Z)
                        Y_fake = self.Y_AE.decode(X_Z)
                        pred_Y = self.D_Y(Y_fake)
                        L_adv_X = self.loss_fn(pred_Y, label_G)

                        L_recon_X = 1.0 - torch.mean(
                            self.loss_fn2(view_X, X_recon))

                        # Generator Y_AE
                        # adversarial loss
                        Y_Z = self.Y_AE.encode(view_Y)
                        Y_recon = self.Y_AE.decode(Y_Z)
                        X_fake = self.X_AE.decode(Y_Z)
                        pred_X = self.D_X(X_fake)
                        L_adv_Y = self.loss_fn(pred_X, label_G)

                        ### autoAE Loss
                        L_recon_Y = 1.0 - torch.mean(
                            self.loss_fn2(view_Y, Y_recon))

                        # cross-lingual Loss
                        L_Z = 1.0 - torch.mean(self.loss_fn2(X_Z, Y_Z))

                        G_loss = self.params.adv_weight * (self.params.gate*L_adv_X + L_adv_Y) + \
                                self.params.mono_weight * (L_recon_X+L_recon_Y) + \
                                self.params.cross_weight * L_Z

                        G_loss.backward()

                        g_losses.append(to_numpy(G_loss.data))
                        G_AB_losses.append(to_numpy(L_adv_X.data))
                        G_BA_losses.append(to_numpy(L_adv_Y.data))
                        G_adv_losses.append(
                            to_numpy(L_adv_Y.data + L_adv_X.data))
                        G_AB_recon.append(to_numpy(L_recon_X.data))
                        G_BA_recon.append(to_numpy(L_recon_Y.data))
                        L_Z_losses.append(to_numpy(L_Z.data))

                        AE_optimizer.step()  # Only optimizes G's parameters

                        sys.stdout.write(
                            "[%d/%d] ::                                     Generator Loss: %.3f \r"
                            % (mini_batch, self.params.iters_in_epoch //
                               self.params.mini_batch_size,
                               np.asscalar(np.mean(g_losses))))
                        sys.stdout.flush()
                '''for each epoch'''

                D_A_acc_epochs.append(hit_A / total)
                D_B_acc_epochs.append(hit_B / total)
                G_AB_loss_epochs.append(np.asscalar(np.mean(G_AB_losses)))
                G_BA_loss_epochs.append(np.asscalar(np.mean(G_BA_losses)))
                D_A_loss_epochs.append(np.asscalar(np.mean(D_A_losses)))
                D_B_loss_epochs.append(np.asscalar(np.mean(D_B_losses)))
                G_AB_recon_epochs.append(np.asscalar(np.mean(G_AB_recon)))
                G_BA_recon_epochs.append(np.asscalar(np.mean(G_BA_recon)))
                L_Z_loss_epoches.append(np.asscalar(np.mean(L_Z_losses)))
                d_loss_epochs.append(np.asscalar(np.mean(d_losses)))
                g_loss_epochs.append(np.asscalar(np.mean(g_losses)))

                print(
                    "Epoch {} : Discriminator Loss: {:.3f}, Discriminator Accuracy: {:.3f}, Generator Loss: {:.3f}, Time elapsed {:.2f} mins"
                    .format(epoch, np.asscalar(np.mean(d_losses)),
                            0.5 * (hit_A + hit_B) / total,
                            np.asscalar(np.mean(g_losses)),
                            (timer() - start_time) / 60))

                if (epoch + 1) % self.params.print_every == 0:
                    # No need for discriminator weights

                    X_Z = self.X_AE.encode(Variable(en)).data
                    Y_Z = self.Y_AE.encode(Variable(it)).data

                    mstart_time = timer()
                    for method in [self.params.eval_method]:
                        results = get_word_translation_accuracy(
                            self.params.src_lang,
                            src_word2id,
                            X_Z,
                            self.params.tgt_lang,
                            tgt_word2id,
                            Y_Z,
                            method=method,
                            dico_eval=self.params.eval_file)
                        acc1 = results[0][1]

                    print('{} takes {:.2f}s'.format(method,
                                                    timer() - mstart_time))
                    print('Method:{} score:{:.4f}'.format(method, acc1))

                    csls, size = dist_mean_cosine(self.params, X_Z, Y_Z)
                    criterion = size
                    if criterion > best_valid_metric:
                        print("New criterion value: {}".format(criterion))
                        best_valid_metric = criterion
                        fp = open(
                            self.tune_best_dir +
                            "/seed_{}_dico_{}_gate_{}_epoch_{}_acc_{:.3f}.tmp".
                            format(seed, self.params.dico_build,
                                   self.params.gate, epoch, acc1), 'w')
                        fp.close()
                        torch.save(
                            self.X_AE.state_dict(), self.tune_best_dir +
                            '/seed_{}_dico_{}_gate_{}_best_X.t7'.format(
                                seed, self.params.dico_build,
                                self.params.gate))
                        torch.save(
                            self.Y_AE.state_dict(), self.tune_best_dir +
                            '/seed_{}_dico_{}_gate_{}_best_Y.t7'.format(
                                seed, self.params.dico_build,
                                self.params.gate))
                        torch.save(
                            self.D_X.state_dict(), self.tune_best_dir +
                            '/seed_{}_dico_{}_gate_{}_best_Dx.t7'.format(
                                seed, self.params.dico_build,
                                self.params.gate))
                        torch.save(
                            self.D_Y.state_dict(), self.tune_best_dir +
                            '/seed_{}_dico_{}_gate_{}__best_Dy.t7'.format(
                                seed, self.params.dico_build,
                                self.params.gate))

                    # Saving generator weights
                    fp = open(
                        self.tune_dir +
                        "/seed_{}_gate_{}_epoch_{}_acc_{:.3f}.tmp".format(
                            seed, self.params.gate, epoch, acc1), 'w')
                    fp.close()

                    acc_epochs.append(acc1)
                    criterion_epochs.append(criterion)

            criterion_fb, epoch_fb = max([
                (score, index) for index, score in enumerate(criterion_epochs)
            ])
            fp = open(
                self.tune_best_dir +
                "/seed_{}_dico_{}_gate_{}_epoch_{}_Acc_{:.3f}_{:.4f}.cslsfb".
                format(seed, self.params.gate, self.params.dico_build,
                       epoch_fb, acc_epochs[epoch_fb], criterion_fb), 'w')
            fp.close()

            # Save the plot for discriminator accuracy and generator loss
            fig = plt.figure()
            plt.plot(range(0, len(D_A_acc_epochs)),
                     D_A_acc_epochs,
                     color='b',
                     label='D_A')
            plt.plot(range(0, len(D_B_acc_epochs)),
                     D_B_acc_epochs,
                     color='r',
                     label='D_B')
            plt.ylabel('D_accuracy')
            plt.xlabel('epochs')
            plt.legend()
            fig.savefig(self.tune_dir + '/seed_{}_D_acc.png'.format(seed))

            fig = plt.figure()
            plt.plot(range(0, len(D_A_loss_epochs)),
                     D_A_loss_epochs,
                     color='b',
                     label='D_A')
            plt.plot(range(0, len(D_B_loss_epochs)),
                     D_B_loss_epochs,
                     color='r',
                     label='D_B')
            plt.ylabel('D_losses')
            plt.xlabel('epochs')
            plt.legend()
            fig.savefig(self.tune_dir + '/seed_{}_D_loss.png'.format(seed))

            fig = plt.figure()
            plt.plot(range(0, len(G_AB_loss_epochs)),
                     G_AB_loss_epochs,
                     color='b',
                     label='G_AB')
            plt.plot(range(0, len(G_BA_loss_epochs)),
                     G_BA_loss_epochs,
                     color='r',
                     label='G_BA')
            plt.ylabel('G_losses')
            plt.xlabel('epochs')
            plt.legend()
            fig.savefig(self.tune_dir + '/seed_{}_G_loss.png'.format(seed))

            fig = plt.figure()
            plt.plot(range(0, len(G_AB_recon_epochs)),
                     G_AB_recon_epochs,
                     color='b',
                     label='G_AB')
            plt.plot(range(0, len(G_BA_recon_epochs)),
                     G_BA_recon_epochs,
                     color='r',
                     label='G_BA')
            plt.ylabel('G_recon_loss')
            plt.xlabel('epochs')
            plt.legend()
            fig.savefig(self.tune_dir + '/seed_{}_G_Recon.png'.format(seed))

            # fig = plt.figure()
            # plt.plot(range(0, len(L_Z_loss_epoches)), L_Z_loss_epoches, color='b', label='L_Z')
            # plt.ylabel('L_Z_loss')
            # plt.xlabel('epochs')
            # plt.legend()
            # fig.savefig(tune_dir + '/seed_{}_L_Z.png'.format(seed))

            fig = plt.figure()
            plt.plot(range(0, len(acc_epochs)),
                     acc_epochs,
                     color='b',
                     label='trans_acc1')
            plt.ylabel('trans_acc')
            plt.xlabel('epochs')
            plt.legend()
            fig.savefig(self.tune_dir + '/seed_{}_trans_acc.png'.format(seed))
            '''
            fig = plt.figure()
            plt.plot(range(0, len(csls_epochs)), csls_epochs, color='b', label='csls')
            plt.ylabel('csls')
            plt.xlabel('epochs')
            plt.legend()
            fig.savefig(self.tune_dir + '/seed_{}_csls.png'.format(seed))
            '''
            fig = plt.figure()
            plt.plot(range(0, len(g_loss_epochs)),
                     g_loss_epochs,
                     color='b',
                     label='G_loss')
            plt.ylabel('g_loss')
            plt.xlabel('epochs')
            plt.legend()
            fig.savefig(self.tune_dir + '/seed_{}_g_loss.png'.format(seed))

            fig = plt.figure()
            plt.plot(range(0, len(d_loss_epochs)),
                     d_loss_epochs,
                     color='b',
                     label='csls')
            plt.ylabel('D_loss')
            plt.xlabel('epochs')
            plt.legend()
            fig.savefig(self.tune_dir + '/seed_{}_d_loss.png'.format(seed))
            plt.close('all')

        except KeyboardInterrupt:
            print("Interrupted.. saving model !!!")
            torch.save(self.X_AE.state_dict(),
                       self.tune_dir + '/X_AE_model_interrupt.t7')
            torch.save(self.Y_AE.state_dict(),
                       self.tune_dir + '/Y_AE_model_interrupt.t7')
            torch.save(self.D_X.state_dict(),
                       self.tune_dir + '/D_X_model_interrupt.t7')
            torch.save(self.D_Y.state_dict(),
                       self.tune_dir + '/D_y_model_interrupt.t7')
            exit()

        return

    def get_batch_data_fast(self, emb_en, emb_it):

        params = self.params
        random_en_indices = torch.LongTensor(params.mini_batch_size).random_(
            params.most_frequent_sampling_size)
        random_it_indices = torch.LongTensor(params.mini_batch_size).random_(
            params.most_frequent_sampling_size)
        en_batch = to_variable(emb_en)[random_en_indices.cuda()]
        it_batch = to_variable(emb_it)[random_it_indices.cuda()]

        return en_batch, it_batch
def main(argv):
    TRAIN, NOISE_TYPES, IMAGE_SIZE, FRAME_SIZE, OVERLAY_SIZE, LATENT_CLEAN_SIZE, BATCH_SIZE, EPOCHS, TEST = arguments_parsing(argv)
    
    if TRAIN:
        print('model training with parameters:\n'+
              'noise types = {}\n'.format(NOISE_TYPES)+
              'image size = {}\n'.format(IMAGE_SIZE)+
              'frame size = {}\n'.format(FRAME_SIZE)+
              'overlay size = {}\n'.format(OVERLAY_SIZE)+
              'latent clean size = {}\n'.format(LATENT_CLEAN_SIZE)+
              'batch size = {}\n'.format(BATCH_SIZE)+
              'number of epochs = {}\n'.format(EPOCHS))
        
        # dataset table creating
        make_dataset_table(PATH_TO_DATA, NOISE_TYPES, PATH_TO_DATASET_TABLE)
        train_test_split(PATH_TO_DATASET_TABLE, test_size=0.2)

        # dataset and dataloader creating
        torch.manual_seed(0)
        transforms = [Compose([RandomHorizontalFlip(p=1.0), ToTensor()]),
                      Compose([RandomVerticalFlip(p=1.0), ToTensor()]),
                      Compose([ColorJitter(brightness=(0.9, 2.0), contrast=(0.9, 2.0)), ToTensor()])]

        train_dataset = []
        for transform in transforms:
            dataset = DenoisingDataset(dataset=pd.read_csv(PATH_TO_DATASET_TABLE),
                                       image_size=IMAGE_SIZE,
                                       frame_size=FRAME_SIZE,
                                       overlay_size=OVERLAY_SIZE,
                                       phase='train',
                                       transform=transform)
            train_dataset = ConcatDataset([train_dataset, dataset])

        train_loader = DataLoader(dataset=train_dataset,
                                  batch_size=BATCH_SIZE,
                                  shuffle=True, # can be set to True only for train loader
                                  num_workers=0)

        # model training
        model = AE(1, LATENT_CLEAN_SIZE)
        loss = SSIMLoss()
        latent_loss = MSELoss()
        optimizer = torch.optim.Adam(model.parameters(), lr=1.0e-3)
        model = train_model(model, train_loader,
                            loss, latent_loss,
                            optimizer,
                            epochs=EPOCHS,
                            device=DEVICE)

        # model saving
        path_to_model = './model' + '_{}'.format('_'.join([str(elem) for elem in NOISE_TYPES])) + '.pth'
        torch.save(model, path_to_model)
    
    if TEST:    
        # model loading
        path_to_model = './model' + '_{}'.format('_'.join([str(elem) for elem in NOISE_TYPES])) + '.pth'
        print('{} testing...\n'.format(os.path.basename(path_to_model)))
        model = torch.load(path_to_model)

        dataset=pd.read_csv(PATH_TO_DATASET_TABLE)
        test_dataset = dataset[dataset['phase']=='test']

        # model testing and results saving
        loss = SSIMLoss()
        latent_loss = MSELoss()
        print('{} evaluation on test images'.format(os.path.basename(path_to_model)))
        test_evaluation(model, test_dataset,
                        loss, latent_loss,
                        device=DEVICE)
        print()
        
        path_to_results = PATH_TO_RESULTS + '_{}'.format('_'.join([str(elem) for elem in NOISE_TYPES]))
        if not os.path.exists(path_to_results):
            os.makedirs(path_to_results)
        print('{} running and results saving'.format(os.path.basename(path_to_model)))
        test_model(model, test_dataset, path_to_results)
        
    print('process completed: OK')
Beispiel #7
0
            data = data.to(device).view(-1, 28 * 28)
            label = label.to(device).view(-1, 28 * 28)
            _, recons_x = model(data)
            loss = criterion(recons_x, label)
            test_loss += loss.item()
            total += label.size(0)

    avg_loss = test_loss / total

    print('===> Test Average loss: {:.7f}\n'.format(avg_loss))

    return avg_loss


ae_model = AE()
ae_model.to(device)
optimizer = optim.Adam(ae_model.parameters(), lr=0.001, betas=(0.9, 0.999))
criterion = nn.MSELoss()
batch_size = 256

train_loader, test_loader = get_customDataLoader(
    './data_for_ae/data_for_autoencoder.pth', batch_size=batch_size)
trainer(ae_model,
        train_loader,
        test_loader,
        optimizer,
        criterion,
        save_path='./pretrained_models/autoencoder_pretrained_1.pth')
#load_model(ae_model, './autoencoder_pretrained.pth')
#test(ae_model, test_loader)
Beispiel #8
0
class CycleBWE(object):
    def __init__(self, params):
        self.params = params
        self.tune_dir = "{}/{}-{}/{}".format(params.exp_id, params.src_lang,
                                             params.tgt_lang,
                                             params.norm_embeddings)
        self.tune_best_dir = "{}/best".format(self.tune_dir)
        self.tune_export_dir = "{}/export".format(self.tune_dir)
        if self.params.eval_file == 'wiki':
            self.eval_file = '../data/bilingual_dicts/{}-{}.5000-6500.txt'.format(
                self.params.src_lang, self.params.tgt_lang)
            self.eval_file2 = '../data/bilingual_dicts/{}-{}.5000-6500.txt'.format(
                self.params.tgt_lang, self.params.src_lang)
        elif self.params.eval_file == 'wacky':
            self.eval_file = '../data/bilingual_dicts/{}-{}.test.txt'.format(
                self.params.src_lang, self.params.tgt_lang)
            self.eval_file2 = '../data/bilingual_dicts/{}-{}.test.txt'.format(
                self.params.tgt_lang, self.params.src_lang)
        else:
            print('Invalid eval file!')
        # self.seed = random.randint(0, 1000)
        # self.seed = 41
        # self.initialize_exp(self.seed)

        self.X_AE = AE(params)
        self.Y_AE = AE(params)
        self.D_X = Discriminator(input_size=params.d_input_size,
                                 hidden_size=params.d_hidden_size,
                                 output_size=params.d_output_size)
        self.D_Y = Discriminator(input_size=params.d_input_size,
                                 hidden_size=params.d_hidden_size,
                                 output_size=params.d_output_size)

        self.nets = [self.X_AE, self.Y_AE, self.D_X, self.D_Y]
        self.loss_fn = torch.nn.BCELoss()
        self.loss_fn2 = torch.nn.CosineSimilarity(dim=1, eps=1e-6)

    def weights_init(self, m):  # 正交初始化
        if isinstance(m, torch.nn.Linear):
            torch.nn.init.orthogonal(m.weight)
            if m.bias is not None:
                torch.nn.init.constant(m.bias, 0.01)

    def weights_init2(self, m):  # xavier_normal 初始化
        if isinstance(m, torch.nn.Linear):
            torch.nn.init.xavier_normal_(m.weight)
            if m.bias is not None:
                torch.nn.init.constant_(m.bias, 0.01)

    def weights_init3(self, m):  # 单位阵初始化
        if isinstance(m, torch.nn.Linear):
            m.weight.data.copy_(
                torch.diag(torch.ones(self.params.g_input_size)))

    def init_state(self, state=1):
        if torch.cuda.is_available():
            # Move the network and the optimizer to the GPU
            for net in self.nets:
                net.cuda()
            self.loss_fn = self.loss_fn.cuda()
            self.loss_fn2 = self.loss_fn2.cuda()

        if self.params.init == 'eye':
            self.X_AE.apply(self.weights_init3)  # 可更改G初始化方式
            self.Y_AE.apply(self.weights_init3)  # 可更改G初始化方式

        elif self.params.init == 'orth':
            self.X_AE.apply(self.weights_init)  # 可更改G初始化方式
            self.Y_AE.apply(self.weights_init)
        else:
            print('Invalid init func!')

        #self.D_X.apply(self.weights_init2)
        #self.D_Y.apply(self.weights_init2)

    def orthogonalize(self, W):
        params = self.params
        W.copy_((1 + params.beta) * W -
                params.beta * W.mm(W.transpose(0, 1).mm(W)))

    def train(self, src_dico, tgt_dico, src_emb, tgt_emb, seed):
        params = self.params
        # Load data
        if not os.path.exists(params.data_dir):
            print("Data path doesn't exists: %s" % params.data_dir)
        if not os.path.exists(self.tune_dir):
            os.makedirs(self.tune_dir)
        if not os.path.exists(self.tune_best_dir):
            os.makedirs(self.tune_best_dir)
        if not os.path.exists(self.tune_export_dir):
            os.makedirs(self.tune_export_dir)

        src_word2id = src_dico[1]
        tgt_word2id = tgt_dico[1]

        en = src_emb
        it = tgt_emb

        params = _get_eval_params(params)
        self.params = params
        eval = Evaluator(params, en, it, torch.cuda.is_available())

        # for seed_index in range(params.num_random_seeds):

        AE_optimizer = optim.SGD(filter(
            lambda p: p.requires_grad,
            list(self.X_AE.parameters()) + list(self.Y_AE.parameters())),
                                 lr=params.g_learning_rate)
        # AE_optimizer = optim.SGD(G_params, lr=0.1, momentum=0.9)
        # AE_optimizer = optim.Adam(G_params, lr=params.g_learning_rate, betas=(0.9, 0.9))
        # AE_optimizer = optim.RMSprop(filter(lambda p: p.requires_grad, list(self.X_AE.parameters()) + list(self.Y_AE.parameters())),lr=params.g_learning_rate,alpha=0.9)
        D_optimizer = optim.SGD(list(self.D_X.parameters()) +
                                list(self.D_Y.parameters()),
                                lr=params.d_learning_rate)
        # D_optimizer = optim.Adam(D_params, lr=params.d_learning_rate, betas=(0.5, 0.9))
        # D_optimizer = optim.RMSprop(list(self.D_X.parameters()) + list(self.D_Y.parameters()), lr=params.d_learning_rate , alpha=0.9)

        # D_X=nn.DataParallel(D_X)
        # D_Y=nn.DataParallel(D_Y)
        # true_dict = get_true_dict(params.data_dir)
        D_A_acc_epochs = []
        D_B_acc_epochs = []
        D_A_loss_epochs = []
        D_B_loss_epochs = []
        G_AB_loss_epochs = []
        G_BA_loss_epochs = []
        G_AB_recon_epochs = []
        G_BA_recon_epochs = []
        L_Z_loss_epoches = []

        acc1_epochs = []
        acc2_epochs = []

        csls_epochs = []
        f_csls_epochs = []
        b_csls_epochs = []
        best_valid_metric = -100

        # logs for plotting later
        log_file = open(
            "log_src_tgt.txt",
            "w")  # Being overwritten in every loop, not really required
        log_file.write("epoch, dis_loss, dis_acc, g_loss\n")

        try:
            for epoch in range(self.params.num_epochs):
                D_A_losses = []
                D_B_losses = []
                G_AB_losses = []
                G_AB_recon = []
                G_BA_losses = []
                G_adv_losses = []
                G_BA_recon = []
                L_Z_losses = []
                d_losses = []
                g_losses = []
                hit_A = 0
                hit_B = 0
                total = 0
                start_time = timer()
                # lowest_loss = 1e5
                # label_D = to_variable(torch.FloatTensor(2 * params.mini_batch_size).zero_())
                label_D = to_variable(
                    torch.FloatTensor(2 * params.mini_batch_size).zero_())
                label_D[:params.mini_batch_size] = 1 - params.smoothing
                label_D[params.mini_batch_size:] = params.smoothing

                label_G = to_variable(
                    torch.FloatTensor(params.mini_batch_size).zero_())
                label_G = label_G + 1 - params.smoothing

                for mini_batch in range(
                        0, params.iters_in_epoch // params.mini_batch_size):
                    for d_index in range(params.d_steps):
                        D_optimizer.zero_grad()  # Reset the gradients
                        self.D_X.train()
                        self.D_Y.train()

                        #print('D_X:', self.D_X.map1.weight.data)
                        #print('D_Y:', self.D_Y.map1.weight.data)

                        view_X, view_Y = self.get_batch_data_fast_new(en, it)
                        # Discriminator X
                        #print('View_Y',view_Y)
                        fake_X = self.Y_AE.encode(view_Y).detach()
                        #print('fakeX',fake_X)
                        input = torch.cat([view_X, fake_X], 0)

                        pred_A = self.D_X(input)
                        #print('Pred_A',pred_A)
                        D_A_loss = self.loss_fn(pred_A, label_D)
                        # print(view_Y)
                        # Discriminator Y
                        # print('View_X',view_X)
                        fake_Y = self.X_AE.encode(view_X).detach()
                        # print('fakeY:',fake_Y)

                        input = torch.cat([view_Y, fake_Y], 0)
                        pred_B = self.D_Y(input)
                        # print('Pred_B', pred_B)
                        D_B_loss = self.loss_fn(pred_B, label_D)

                        D_loss = (1.0) * D_A_loss + params.gate * D_B_loss

                        D_loss.backward(
                        )  # compute/store gradients, but don't change params
                        d_losses.append(to_numpy(D_loss.data))
                        D_A_losses.append(to_numpy(D_A_loss.data))
                        D_B_losses.append(to_numpy(D_B_loss.data))

                        discriminator_decision_A = to_numpy(pred_A.data)
                        hit_A += np.sum(
                            discriminator_decision_A[:params.mini_batch_size]
                            >= 0.5)
                        hit_A += np.sum(
                            discriminator_decision_A[params.mini_batch_size:] <
                            0.5)

                        discriminator_decision_B = to_numpy(pred_B.data)
                        hit_B += np.sum(
                            discriminator_decision_B[:params.mini_batch_size]
                            >= 0.5)
                        hit_B += np.sum(
                            discriminator_decision_B[params.mini_batch_size:] <
                            0.5)

                        D_optimizer.step(
                        )  # Only optimizes D's parameters; changes based on stored gradients from backward()

                        # Clip weights
                        _clip(self.D_X, params.clip_value)
                        _clip(self.D_Y, params.clip_value)
                        # print('D_loss',d_losses)

                        sys.stdout.write(
                            "[%d/%d] :: Discriminator Loss: %.3f \r" %
                            (mini_batch,
                             params.iters_in_epoch // params.mini_batch_size,
                             np.asscalar(np.mean(d_losses))))
                        sys.stdout.flush()

                    total += 2 * params.mini_batch_size * params.d_steps

                    for g_index in range(params.g_steps):
                        # 2. Train G on D's response (but DO NOT train D on these labels)
                        AE_optimizer.zero_grad()
                        self.D_X.eval()
                        self.D_Y.eval()
                        view_X, view_Y = self.get_batch_data_fast_new(en, it)

                        # Generator X_AE
                        ## adversarial loss
                        Y_fake = self.X_AE.encode(view_X)
                        # X_recon = self.X_AE.decode(X_Z)
                        # Y_fake = self.Y_AE.encode(X_Z)
                        pred_Y = self.D_Y(Y_fake)
                        L_adv_X = self.loss_fn(pred_Y, label_G)

                        X_Cycle = self.Y_AE.encode(Y_fake)
                        L_Cycle_X = 1.0 - torch.mean(
                            self.loss_fn2(view_X, X_Cycle))

                        # L_recon_X = 1.0 - torch.mean(self.loss_fn2(view_X, X_recon))
                        # L_G_AB = L_adv_X + params.recon_weight * L_recon_X

                        # Generator Y_AE
                        # adversarial loss
                        X_fake = self.Y_AE.encode(view_Y)
                        pred_X = self.D_X(X_fake)
                        L_adv_Y = self.loss_fn(pred_X, label_G)

                        ### Cycle Loss
                        Y_Cycle = self.X_AE.encode(X_fake)
                        L_Cycle_Y = 1.0 - torch.mean(
                            self.loss_fn2(view_Y, Y_Cycle))

                        # L_recon_Y = 1.0 - torch.mean(self.loss_fn2(view_Y, Y_recon))
                        # L_G_BA = L_adv_Y + params.recon_weight * L_recon_Y
                        # L_Z = 1.0 - torch.mean(self.loss_fn2(X_Z, Y_Z))

                        # G_loss = L_G_AB + L_G_BA + L_Z
                        G_loss = params.adv_weight * ( params.gate * L_adv_X + (1.0) * L_adv_Y) + \
                                 params.cycle_weight * (L_Cycle_X+L_Cycle_Y)

                        G_loss.backward()

                        g_losses.append(to_numpy(G_loss.data))
                        G_AB_losses.append(to_numpy(L_adv_X.data))
                        G_BA_losses.append(to_numpy(L_adv_Y.data))
                        G_adv_losses.append(to_numpy(L_adv_Y.data))
                        G_AB_recon.append(to_numpy(L_Cycle_X.data))
                        G_BA_recon.append(to_numpy(L_Cycle_Y.data))

                        AE_optimizer.step()  # Only optimizes G's parameters
                        self.orthogonalize(self.X_AE.map1.weight.data)
                        self.orthogonalize(self.Y_AE.map1.weight.data)

                        sys.stdout.write(
                            "[%d/%d] ::                                     Generator Loss: %.3f \r"
                            % (mini_batch,
                               params.iters_in_epoch // params.mini_batch_size,
                               np.asscalar(np.mean(g_losses))))
                        sys.stdout.flush()
                '''for each epoch'''
                D_A_acc_epochs.append(hit_A / total)
                D_B_acc_epochs.append(hit_B / total)
                G_AB_loss_epochs.append(np.asscalar(np.mean(G_AB_losses)))
                G_BA_loss_epochs.append(np.asscalar(np.mean(G_BA_losses)))
                D_A_loss_epochs.append(np.asscalar(np.mean(D_A_losses)))
                D_B_loss_epochs.append(np.asscalar(np.mean(D_B_losses)))
                G_AB_recon_epochs.append(np.asscalar(np.mean(G_AB_recon)))
                G_BA_recon_epochs.append(np.asscalar(np.mean(G_BA_recon)))
                # L_Z_loss_epoches.append(np.asscalar(np.mean(L_Z_losses)))

                print(
                    "Epoch {} : Discriminator Loss: {:.3f}, Discriminator Accuracy: {:.3f}, Generator Loss: {:.3f}, Time elapsed {:.2f} mins"
                    .format(epoch, np.asscalar(np.mean(d_losses)),
                            0.5 * (hit_A + hit_B) / total,
                            np.asscalar(np.mean(g_losses)),
                            (timer() - start_time) / 60))

                # lr decay
                # g_optim_state = AE_optimizer.state_dict()
                # old_lr = g_optim_state['param_groups'][0]['lr']
                # g_optim_state['param_groups'][0]['lr'] = max(old_lr * params.lr_decay, params.lr_min)
                # AE_optimizer.load_state_dict(g_optim_state)
                # print("Changing the learning rate: {} -> {}".format(old_lr, g_optim_state['param_groups'][0]['lr']))
                # d_optim_state = D_optimizer.state_dict()
                # d_optim_state['param_groups'][0]['lr'] = max(
                #     d_optim_state['param_groups'][0]['lr'] * params.lr_decay, params.lr_min)
                # D_optimizer.load_state_dict(d_optim_state)
                #     d_optim_state['param_groups'][0]['lr'] * params.lr_decay, params.lr_min)
                # D_optimizer.load_state_dict(d_optim_state)

                if (epoch + 1) % params.print_every == 0:
                    # No need for discriminator weights
                    # torch.save(d.state_dict(), 'discriminator_weights_en_es_{}.t7'.format(epoch))

                    # all_precisions = eval.get_all_precisions(G_AB(src_emb.weight).data)
                    Vec_xy = self.X_AE.encode(Variable(en))
                    Vec_xyx = self.Y_AE.encode(Vec_xy)
                    Vec_yx = self.Y_AE.encode(Variable(it))
                    Vec_yxy = self.X_AE.encode(Vec_yx)

                    mstart_time = timer()

                    # for method in ['csls_knn_10']:
                    for method in [params.eval_method]:
                        results = get_word_translation_accuracy(
                            params.src_lang,
                            src_word2id,
                            Vec_xy.data,
                            params.tgt_lang,
                            tgt_word2id,
                            it,
                            method=method,
                            dico_eval=self.eval_file,
                            device=params.cuda_device)
                        acc1 = results[0][1]
                        results = get_word_translation_accuracy(
                            params.tgt_lang,
                            tgt_word2id,
                            Vec_yx.data,
                            params.src_lang,
                            src_word2id,
                            en,
                            method=method,
                            dico_eval=self.eval_file2,
                            device=params.cuda_device)
                        acc2 = results[0][1]
                        print('{} takes {:.2f}s'.format(
                            method,
                            timer() - mstart_time))
                        print('Method:{} test_score:{:.4f}-{:.4f}'.format(
                            method, acc1, acc2))
                    '''
                    # for method in ['csls_knn_10']:
                    for method in [params.eval_method]:
                        results = get_word_translation_accuracy(
                            params.src_lang, src_word2id, Vec_xyx.data,
                            params.src_lang, src_word2id, en,
                            method=method,
                            dico_eval='/data/dictionaries/{}-{}.wacky.dict'.format(params.src_lang,params.src_lang),
                            device=params.cuda_device
                        )
                        acc11 = results[0][1]
                    # for method in ['csls_knn_10']:
                    for method in [params.eval_method]:
                        results = get_word_translation_accuracy(
                            params.tgt_lang, tgt_word2id, Vec_yxy.data,
                            params.tgt_lang, tgt_word2id, it,
                            method=method,
                            dico_eval='/data/dictionaries/{}-{}.wacky.dict'.format(params.tgt_lang,params.tgt_lang),
                            device=params.cuda_device
                        )
                        acc22 = results[0][1]
                    print('Valid:{} score:{:.4f}-{:.4f}'.format(method, acc11, acc22))
                    avg_valid = (acc11+acc22)/2.0
                    # valid_x = torch.mean(self.loss_fn2(en, Vec_xyx.data))
                    # valid_y = torch.mean(self.loss_fn2(it, Vec_yxy.data))
                    # avg_valid = (valid_x+valid_y)/2.0
                    '''
                    # csls = 0
                    f_csls = eval.dist_mean_cosine(Vec_xy.data, it)
                    b_csls = eval.dist_mean_cosine(Vec_yx.data, en)
                    csls = (f_csls + b_csls) / 2.0
                    # csls = eval.calc_unsupervised_criterion(X_Z)
                    if csls > best_valid_metric:
                        print("New csls value: {}".format(csls))
                        best_valid_metric = csls
                        fp = open(
                            self.tune_dir +
                            "/best/seed_{}_dico_{}_epoch_{}_acc_{:.3f}-{:.3f}.tmp"
                            .format(seed, params.dico_build, epoch, acc1,
                                    acc2), 'w')
                        fp.close()
                        torch.save(
                            self.X_AE.state_dict(), self.tune_dir +
                            '/best/seed_{}_dico_{}_best_X.t7'.format(
                                seed, params.dico_build))
                        torch.save(
                            self.Y_AE.state_dict(), self.tune_dir +
                            '/best/seed_{}_dico_{}_best_Y.t7'.format(
                                seed, params.dico_build))
                        torch.save(
                            self.D_X.state_dict(), self.tune_dir +
                            '/best/seed_{}_dico_{}_best_Dx.t7'.format(
                                seed, params.dico_build))
                        torch.save(
                            self.D_Y.state_dict(), self.tune_dir +
                            '/best/seed_{}_dico_{}_best_Dy.t7'.format(
                                seed, params.dico_build))
                    # print(json.dumps(all_precisions))
                    # p_1 = all_precisions['validation']['adv']['without-ref']['nn'][1]
                    # p_1 = all_precisions['validation']['adv']['without-ref']['csls'][1]
                    # log_file.write(str(results) + "\n")
                    # print('Method: nn score:{:.4f}'.format(acc))
                    # Saving generator weights
                    # torch.save(X_AE.state_dict(), tune_dir+'/G_AB_seed_{}_mf_{}_lr_{}_p@1_{:.3f}.t7'.format(seed,params.most_frequent_sampling_size,params.g_learning_rate,acc))
                    # torch.save(Y_AE.state_dict(), tune_dir+'/G_BA_seed_{}_mf_{}_lr_{}_p@1_{:.3f}.t7'.format(seed,params.most_frequent_sampling_size,params.g_learning_rate,acc))
                    fp = open(
                        self.tune_dir +
                        "/seed_{}_epoch_{}_acc_{:.3f}-{:.3f}_valid_{:.4f}.tmp".
                        format(seed, epoch, acc1, acc2, csls), 'w')
                    fp.close()
                    acc1_epochs.append(acc1)
                    acc2_epochs.append(acc2)
                    csls_epochs.append(csls)
                    f_csls_epochs.append(f_csls)
                    b_csls_epochs.append(b_csls)

            csls_fb, epoch_fb = max([
                (score, index) for index, score in enumerate(csls_epochs)
            ])
            fp = open(
                self.tune_dir +
                "/best/seed_{}_epoch_{}_{:.3f}_{:.3f}_{:.3f}.cslsfb".format(
                    seed, epoch_fb, acc1_epochs[epoch_fb],
                    acc2_epochs[epoch_fb], csls_fb), 'w')
            fp.close()
            csls_f, epoch_f = max([
                (score, index) for index, score in enumerate(f_csls_epochs)
            ])
            fp = open(
                self.tune_dir +
                "/best/seed_{}_epoch_{}_{:.3f}_{:.3f}_{:.3f}.cslsf".format(
                    seed, epoch_f, acc1_epochs[epoch_f], acc2_epochs[epoch_f],
                    csls_f), 'w')
            fp.close()
            csls_b, epoch_b = max([
                (score, index) for index, score in enumerate(b_csls_epochs)
            ])
            fp = open(
                self.tune_dir +
                "/best/seed_{}_epoch_{}_{:.3f}_{:.3f}_{:.3f}.cslsb".format(
                    seed, epoch_b, acc1_epochs[epoch_b], acc2_epochs[epoch_b],
                    csls_b), 'w')
            fp.close()
            '''

            # Save the plot for discriminator accuracy and generator loss
            fig = plt.figure()
            plt.plot(range(0, len(D_A_acc_epochs)), D_A_acc_epochs, color='b', label='D_A')
            plt.plot(range(0, len(D_B_acc_epochs)), D_B_acc_epochs, color='r', label='D_B')
            plt.ylabel('D_accuracy')
            plt.xlabel('epochs')
            plt.legend()
            fig.savefig(self.tune_dir + '/seed_{}_D_acc.png'.format(seed))

            fig = plt.figure()
            plt.plot(range(0, len(D_A_loss_epochs)), D_A_loss_epochs, color='b', label='D_A')
            plt.plot(range(0, len(D_B_loss_epochs)), D_B_loss_epochs, color='r', label='D_B')
            plt.ylabel('D_losses')
            plt.xlabel('epochs')
            plt.legend()
            fig.savefig(self.tune_dir + '/seed_{}_D_loss.png'.format(seed))

            fig = plt.figure()
            plt.plot(range(0, len(G_AB_loss_epochs)), G_AB_loss_epochs, color='b', label='G_AB')
            plt.plot(range(0, len(G_BA_loss_epochs)), G_BA_loss_epochs, color='r', label='G_BA')
            plt.ylabel('G_losses')
            plt.xlabel('epochs')
            plt.legend()
            fig.savefig(self.tune_dir + '/seed_{}_G_loss.png'.format(seed))

            fig = plt.figure()
            plt.plot(range(0, len(G_AB_recon_epochs)), G_AB_recon_epochs, color='b', label='G_AB')
            plt.plot(range(0, len(G_BA_recon_epochs)), G_BA_recon_epochs, color='r', label='G_BA')
            plt.ylabel('G_Cycle_loss')
            plt.xlabel('epochs')
            plt.legend()
            fig.savefig(self.tune_dir + '/seed_{}_G_Cycle.png'.format(seed))

            # fig = plt.figure()
            # plt.plot(range(0, len(L_Z_loss_epoches)), L_Z_loss_epoches, color='b', label='L_Z')
            # plt.ylabel('L_Z_loss')
            # plt.xlabel('epochs')
            # plt.legend()
            # fig.savefig(tune_dir + '/seed_{}_stage_{}_L_Z.png'.format(seed,stage))

            fig = plt.figure()
            plt.plot(range(0, len(acc1_epochs)), acc1_epochs, color='b', label='trans_acc1')
            plt.plot(range(0, len(acc2_epochs)), acc2_epochs, color='r', label='trans_acc2')
            plt.ylabel('trans_acc')
            plt.xlabel('epochs')
            plt.legend()
            fig.savefig(self.tune_dir + '/seed_{}_trans_acc.png'.format(seed))

            fig = plt.figure()
            plt.plot(range(0, len(csls_epochs)), csls_epochs, color='b', label='csls')
            plt.plot(range(0, len(f_csls_epochs)), f_csls_epochs, color='r', label='csls_f')
            plt.plot(range(0, len(b_csls_epochs)), b_csls_epochs, color='g', label='csls_b')
            plt.ylabel('csls')
            plt.xlabel('epochs')
            plt.legend()
            fig.savefig(self.tune_dir + '/seed_{}_csls.png'.format(seed))

            fig = plt.figure()
            plt.plot(range(0, len(g_losses)), g_losses, color='b', label='G_loss')
            plt.ylabel('g_loss')
            plt.xlabel('epochs')
            plt.legend()
            fig.savefig(self.tune_dir + '/seed_{}_g_loss.png'.format(seed))

            fig = plt.figure()
            plt.plot(range(0, len(d_losses)), d_losses, color='b', label='csls')
            plt.ylabel('D_loss')
            plt.xlabel('epochs')
            plt.legend()
            fig.savefig(self.tune_dir + '/seed_{}_d_loss.png'.format(seed))
            plt.close('all')
            '''

        except KeyboardInterrupt:
            print("Interrupted.. saving model !!!")
            torch.save(self.X_AE.state_dict(), 'g_model_interrupt.t7')
            torch.save(self.D_X.state_dict(), 'd_model_interrupt.t7')
            log_file.close()
            exit()

        log_file.close()
        return self.X_AE

    def get_batch_data_fast_new(self, emb_en, emb_it):

        params = self.params
        random_en_indices = torch.LongTensor(params.mini_batch_size).random_(
            params.most_frequent_sampling_size)
        random_it_indices = torch.LongTensor(params.mini_batch_size).random_(
            params.most_frequent_sampling_size)
        #print(random_en_indices)
        #print(random_it_indices)
        en_batch = to_variable(emb_en)[random_en_indices.cuda()]
        it_batch = to_variable(emb_it)[random_it_indices.cuda()]
        return en_batch, it_batch

    def export(self,
               src_dico,
               tgt_dico,
               emb_en,
               emb_it,
               seed,
               export_emb=False):
        params = _get_eval_params(self.params)
        eval = Evaluator(params, emb_en, emb_it, torch.cuda.is_available())
        # Export adversarial dictionaries
        optim_X_AE = AE(params).cuda()
        optim_Y_AE = AE(params).cuda()
        print('Loading pre-trained models...')
        optim_X_AE.load_state_dict(
            torch.load(self.tune_dir +
                       '/best/seed_{}_dico_{}_best_X.t7'.format(
                           seed, params.dico_build)))
        optim_Y_AE.load_state_dict(
            torch.load(self.tune_dir +
                       '/best/seed_{}_dico_{}_best_Y.t7'.format(
                           seed, params.dico_build)))
        X_Z = optim_X_AE.encode(Variable(emb_en)).data
        Y_Z = optim_Y_AE.encode(Variable(emb_it)).data

        mstart_time = timer()
        for method in ['nn', 'csls_knn_10']:
            results = get_word_translation_accuracy(params.src_lang,
                                                    src_dico[1],
                                                    X_Z,
                                                    params.tgt_lang,
                                                    tgt_dico[1],
                                                    emb_it,
                                                    method=method,
                                                    dico_eval=self.eval_file,
                                                    device=params.cuda_device)
            acc1 = results[0][1]
            results = get_word_translation_accuracy(params.tgt_lang,
                                                    tgt_dico[1],
                                                    Y_Z,
                                                    params.src_lang,
                                                    src_dico[1],
                                                    emb_en,
                                                    method=method,
                                                    dico_eval=self.eval_file2,
                                                    device=params.cuda_device)
            acc2 = results[0][1]

            # csls = 0
            print('{} takes {:.2f}s'.format(method, timer() - mstart_time))
            print('Method:{} score:{:.4f}-{:.4f}'.format(method, acc1, acc2))

        f_csls = eval.dist_mean_cosine(X_Z, emb_it)
        b_csls = eval.dist_mean_cosine(Y_Z, emb_en)
        csls = (f_csls + b_csls) / 2.0
        print("Seed:{},ACC:{:.4f}-{:.4f},CSLS_FB:{:.6f}".format(
            seed, acc1, acc2, csls))
        #'''
        print('Building dictionaries...')
        params.dico_build = "S2T&T2S"
        params.dico_method = "csls_knn_10"
        X_Z = X_Z / X_Z.norm(2, 1, keepdim=True).expand_as(X_Z)
        emb_it = emb_it / emb_it.norm(2, 1, keepdim=True).expand_as(emb_it)
        f_dico_induce = build_dictionary(X_Z, emb_it, params)
        f_dico_induce = f_dico_induce.cpu().numpy()
        Y_Z = Y_Z / Y_Z.norm(2, 1, keepdim=True).expand_as(Y_Z)
        emb_en = emb_en / emb_en.norm(2, 1, keepdim=True).expand_as(emb_en)
        b_dico_induce = build_dictionary(Y_Z, emb_en, params)
        b_dico_induce = b_dico_induce.cpu().numpy()

        f_dico_set = set([(a, b) for a, b in f_dico_induce])
        b_dico_set = set([(b, a) for a, b in b_dico_induce])

        intersect = list(f_dico_set & b_dico_set)
        union = list(f_dico_set | b_dico_set)

        with io.open(
                self.tune_dir +
                '/export/{}-{}.dict'.format(params.src_lang, params.tgt_lang),
                'w',
                encoding='utf-8',
                newline='\n') as f:
            for item in f_dico_induce:
                f.write('{} {}\n'.format(src_dico[0][item[0]],
                                         tgt_dico[0][item[1]]))

        with io.open(
                self.tune_dir +
                '/export/{}-{}.dict'.format(params.tgt_lang, params.src_lang),
                'w',
                encoding='utf-8',
                newline='\n') as f:
            for item in b_dico_induce:
                f.write('{} {}\n'.format(tgt_dico[0][item[0]],
                                         src_dico[0][item[1]]))

        with io.open(self.tune_dir + '/export/{}-{}.intersect'.format(
                params.src_lang, params.tgt_lang),
                     'w',
                     encoding='utf-8',
                     newline='\n') as f:
            for item in intersect:
                f.write('{} {}\n'.format(src_dico[0][item[0]],
                                         tgt_dico[0][item[1]]))

        with io.open(self.tune_dir + '/export/{}-{}.intersect'.format(
                params.tgt_lang, params.src_lang),
                     'w',
                     encoding='utf-8',
                     newline='\n') as f:
            for item in intersect:
                f.write('{} {}\n'.format(tgt_dico[0][item[1]],
                                         src_dico[0][item[0]]))

        with io.open(
                self.tune_dir +
                '/export/{}-{}.union'.format(params.src_lang, params.tgt_lang),
                'w',
                encoding='utf-8',
                newline='\n') as f:
            for item in union:
                f.write('{} {}\n'.format(src_dico[0][item[0]],
                                         tgt_dico[0][item[1]]))

        with io.open(
                self.tune_dir +
                '/export/{}-{}.union'.format(params.tgt_lang, params.src_lang),
                'w',
                encoding='utf-8',
                newline='\n') as f:
            for item in union:
                f.write('{} {}\n'.format(tgt_dico[0][item[1]],
                                         src_dico[0][item[0]]))

        if export_emb:
            print('Exporting {}-{}.{}'.format(params.src_lang, params.tgt_lang,
                                              params.src_lang))
            loader.export_embeddings(
                src_dico[0],
                X_Z,
                path=self.tune_dir + '/export/{}-{}.{}'.format(
                    params.src_lang, params.tgt_lang, params.src_lang),
                eformat='txt')
            print('Exporting {}-{}.{}'.format(params.src_lang, params.tgt_lang,
                                              params.tgt_lang))
            loader.export_embeddings(
                tgt_dico[0],
                emb_it,
                path=self.tune_dir + '/export/{}-{}.{}'.format(
                    params.src_lang, params.tgt_lang, params.tgt_lang),
                eformat='txt')
            print('Exporting {}-{}.{}'.format(params.tgt_lang, params.src_lang,
                                              params.tgt_lang))
            loader.export_embeddings(
                tgt_dico[0],
                Y_Z,
                path=self.tune_dir + '/export/{}-{}.{}'.format(
                    params.tgt_lang, params.src_lang, params.tgt_lang),
                eformat='txt')
            print('Exporting {}-{}.{}'.format(params.tgt_lang, params.src_lang,
                                              params.src_lang))
            loader.export_embeddings(
                src_dico[0],
                emb_en,
                path=self.tune_dir + '/export/{}-{}.{}'.format(
                    params.tgt_lang, params.src_lang, params.src_lang),
                eformat='txt')
Beispiel #9
0
def train(*,
          folder=None,
          dataset='mnist',
          patch_size=8,
          resume=False,
          log_interval=1,
          device='cpu',
          objective='vae',
          batch_size=64,
          nz=100,
          lr=0.001,
          num_workers=1,
          nb_filters=64,
          nb_draw_layers=1):
    if folder is None:
        folder = f'results/{dataset}/{patch_size}x{patch_size}'
    try:
        os.makedirs(folder)
    except Exception:
        pass
    act = 'sigmoid'
    nb_epochs = 3000
    dataset = load_dataset(dataset, split='train')
    if patch_size is not None:
        patch_size = int(patch_size)
        dataset = PatchDataset(dataset, patch_size)
    x0, _ = dataset[0]
    nc = x0.size(0)
    dataloader = torch.utils.data.DataLoader(
        dataset,
        batch_size=batch_size,
        shuffle=True,
        num_workers=num_workers,
    )
    if resume:
        net = torch.load('{}/net.th'.format(folder))
    else:
        net = AE(
            latent_size=nz,
            nc=nc,
            w=patch_size,
            ndf=nb_filters,
            act=act,
            objective=objective,
        )
    opt = optim.Adam(net.parameters(), lr=lr)
    net = net.to(device)
    niter = 0
    for epoch in range(nb_epochs):
        for i, (X, _), in enumerate(dataloader):
            net.zero_grad()
            X = X.to(device)
            Xrec, mu, logvar = net(X)
            rec, kld = net.loss_function(X, Xrec, mu, logvar)
            loss = rec + kld
            loss.backward()
            opt.step()
            if niter % log_interval == 0:
                print(
                    f'Epoch: {epoch:05d}/{nb_epochs:05d} iter: {niter:05d} loss: {loss.item():.2f} rec: {rec.item():.2f} kld:{kld.item():.2f}'
                )
            if niter % 100 == 0:
                Xsamples = net.sample(nb_examples=100)
                X = 0.5 * (X + 1) if act == 'tanh' else X
                Xrecs = 0.5 * (Xrec + 1) if act == 'tanh' else Xrec
                Xsamples = 0.5 * (Xsamples + 1) if act == 'tanh' else Xsamples
                X = X.detach().to('cpu').numpy()
                Xrecs = Xrecs.detach().to('cpu').numpy()
                Xsamples = Xsamples.detach().to('cpu').numpy()
                imsave(f'{folder}/real_samples.png', grid_of_images_default(X))
                imsave(f'{folder}/rec_samples.png',
                       grid_of_images_default(Xrecs))
                imsave(f'{folder}/fake_samples.png',
                       grid_of_images_default(Xsamples))
                torch.save(net, '{}/net.th'.format(folder))
            niter += 1
Beispiel #10
0
z = random.randint(1, 10)
ind_x = list(range(z + 80, z + 90, 1))
# ind_y = [int((a-50)*(a-50)*0.2 + a + 1) for a in ind_x]
ind_y = [int(-1 * a + 101)
         for a in ind_x]  # Change these things to make shape of training data
index = [a + b * d for (a, b) in zip(ind_x, ind_y)]
print(ind_x, ind_y, index)
# index =  torch.randperm(d*d)[:20] --> If you want to select random location for training data
index = torch.randperm(d * d)[:16]

uncertainty_data = uncertainty_data.reshape((d * d, 2))
train_data = uncertainty_data[index, :]

### Model and optimizer initialization
model = AE()
optim = torch.optim.Adam(model.parameters())
criterion = torch.nn.MSELoss()

### Training
for i in range(
        1500):  # change 1000 to 0 if you want to see not-trained version.
    td = train_data[torch.randperm(train_data.size(0))]
    for batch in td.split(4, dim=0):
        out = model(batch)  # train_data
        loss = criterion(out, batch)  # train_data

        optim.zero_grad()
        loss.backward()
        optim.step()
    if i % 500 == 0:
        print(i, loss.item())