Ejemplo n.º 1
0
    def build_model(self,
                    lr,
                    Epoch,
                    loss='WGANLoss',
                    optim='Adam',
                    betas=(0.5, 0.999)):

        self.Epoch = Epoch
        #### Build Model ####
        if torch.cuda.is_available():
            self.device = torch.device('cuda')
            self.Generator = Generator().cuda(self.device)
            self.Discriminator = Discriminator().cuda(self.device)
        else:
            self.device = torch.device('cpu')
            self.Generator = Generator()
            self.Discriminator = Discriminator()

        if 'Generator.pkl' in os.listdir():
            self.Generator = torch.load('Generator.pkl')
        if 'Discriminator.pkl' in os.listdir():
            self.Discriminator = torch.load('Discriminator.pkl')

        #### Build Loss Function ####
        self.criterion = utils.Loss(loss=loss)

        #### Build optimizing ####
        self.optimG = utils.optim(self.Generator,
                                  lr=lr,
                                  optim=optim,
                                  betas=betas)
        self.optimD = utils.optim(self.Discriminator,
                                  lr=lr,
                                  optim=optim,
                                  betas=betas)
Ejemplo n.º 2
0
    def build_model(self, lr, EPOCH):
        if torch.cuda.is_available():
            self.device = torch.device('cuda')
            self.VAE = Varational_AutoEncoder().cuda(device=self.device)

        else:
            self.device = torch.device('cpu')
            self.VAE = Varational_AutoEncoder()

        self.optim = utils.optim(self.VAE, lr=lr)
        self.EPOCH = EPOCH

        if 'VAE.pkl' in os.listdir():
            self.VAE = torch.load('VAE.pkl')
Ejemplo n.º 3
0
 def train(self):
     embedded = self.creat_model()
     lr = tf.placeholder(dtype=tf.float32, name="learning_rate")  # learning rate
     global_step = tf.Variable(0, name='global_step', trainable=False)
     w = tf.get_variable("w", initializer=np.array([10], dtype=np.float32))
     b = tf.get_variable("b", initializer=np.array([-5], dtype=np.float32))
     sim_matrix = similarity(embedded, w, b)
     loss = loss_cal(sim_matrix, type=config.loss)
     trainable_vars = tf.trainable_variables()  # get variable list
     optimizer = optim(lr)  # get optimizer (type is determined by configuration)
     grads, vars = zip(*optimizer.compute_gradients(loss))  # compute gradients of variables with respect to loss
     grads_clip, _ = tf.clip_by_global_norm(grads, 3.0)  # l2 norm clipping by 3
     grads_rescale = [0.01 * grad for grad in grads_clip[:2]] + grads_clip[2:]  # smaller gradient scale for w, b
     train_op = optimizer.apply_gradients(zip(grads_rescale, vars),
                                          global_step=global_step)  # gradient update operation
     # check variables memory
     variable_count = np.sum(np.array([np.prod(np.array(v.get_shape().as_list())) for v in trainable_vars]))
     print("total variables :", variable_count)
     tf.summary.scalar("loss", loss)
     merged = tf.summary.merge_all()
     saver = tf.train.Saver()
     with tf.Session() as sess:
         tf.global_variables_initializer().run()
         os.makedirs(os.path.join(config.model_path, "Check_Point"), exist_ok=True)  # make folder to save model
         os.makedirs(os.path.join(config.model_path, "logs"), exist_ok=True)  # make folder to save log
         writer = tf.summary.FileWriter(os.path.join(config.model_path, "logs"), sess.graph)
         lr_factor = 1  # lr decay factor ( 1/2 per 10000 iteration)
         loss_acc = 0  # accumulated loss ( for running average of loss)
         for iter in range(config.iteration):
             # run forward and backward propagation and update parameters
             _, loss_cur, summary = sess.run([train_op, loss, merged],
                                             feed_dict={self.fingerprint_input: random_batch(),
                                                        lr: config.lr * lr_factor})
             loss_acc += loss_cur  # accumulated loss for each 100 iteration
             if iter % 10 == 0:
                 writer.add_summary(summary, iter)  # write at tensorboard
             if (iter + 1) % 100 == 0:
                 print("(iter : %d) loss: %.4f" % ((iter + 1), loss_acc / 100))
                 loss_acc = 0  # reset accumulated loss
             if (iter + 1) % 1000 == 0:
                 lr_factor /= 2  # lr decay
                 print("learning rate is decayed! current lr : ", config.lr * lr_factor)
             if (iter + 1) % 1000 == 0:
                 saver.save(sess, os.path.join(config.model_path, "./Check_Point/model.ckpt"),
                            global_step=iter // 1000)
                 print("model is saved!")
Ejemplo n.º 4
0
    def train(self, Identity = True):
        #### Check build_model ####
        try:
            getattr(self, 'GeneratorA')
            getattr(self, 'GeneratorB')

            getattr(self, 'DiscriminatorA')
            getattr(self, 'DiscriminatorB')

            getattr(self, 'criterion')
            getattr(self, 'L1Loss')

        except:
            assert False, 'Not apply build_model'
        
        #### Check load_dataset ####
        try:
            getattr(self, 'dataloaderA')
            getattr(self, 'dataloaderB')

        except:
            assert False, 'Not apply load_dataset'
        
        ImagePoolA = []
        ImagePoolB = []
        
        for epoch in range(self.EPOCH):
            if epoch < self.step:
                continue
            if epoch >= 100:
                lr = 0.0002 - 0.0002*(epoch-100)/100
                
            for i, data in enumerate(zip(self.dataloaderA, self.dataloaderB)):
                real_A = data[0][0].to(self.device)
                real_B = data[1][0].to(self.device)
                
                self.optimG_A = utils.optim(self.GeneratorA, lr = self.lr, optim = self.optim, betas =self. betas)
                self.optimG_B = utils.optim(self.GeneratorB, lr = self.lr, optim = self.optim, betas = self.betas)
                self.optimD_A = utils.optim(self.DiscriminatorA, lr = self.lr, optim = self.optim, betas = self.betas)
                self.optimD_B = utils.optim(self.DiscriminatorB, lr = self.lr, optim = self.optim, betas = self.betas)
                
                fake_A = self.GeneratorA(real_B)
                fake_B = self.GeneratorB(real_A)
                
                recon_B = self.GeneratorB(fake_A)
                recon_A = self.GeneratorA(fake_B)

                DA_real_A = self.DiscriminatorA(real_A)
                DB_real_B = self.DiscriminatorB(real_B)
                
                DA_fake_A = self.DiscriminatorA(fake_A)
                DB_fake_B = self.DiscriminatorB(fake_B)
                
                
                ### train Generator ###
                self.optimG_A.zero_grad()
                self.optimG_B.zero_grad()
                
                lossG_A = self.criterion(DA_fake_A, torch.ones(DA_fake_A.shape).to(self.device))
                lossG_B = self.criterion(DB_fake_B, torch.ones(DB_fake_B.shape).to(self.device))
                
                cycleLoss= (self.L1Loss(recon_A, real_A) + self.L1Loss(recon_B, real_B)) * self.cycleLambda
                
                LossG = lossG_A + lossG_B + cycleLoss
                if Identity:
                    LossG += 0.5 * self.cycleLambda * (self.L1Loss(fake_B, real_A) + self.L1Loss(fake_A, real_B))
                LossG.backward(retain_graph = True)
                
                self.optimG_A.step()
                self.optimG_B.step()

                
                ### train Discriminator ###
                
                fake_A, p_A = ImagePool(ImagePoolA, fake_A, device = self.device)
                fake_B, p_B = ImagePool(ImagePoolB, fake_B, device = self.device)
            
                DA_real_A = self.DiscriminatorA(real_A)
                DB_real_B = self.DiscriminatorB(real_B)
                self.optimD_A.zero_grad()
                self.optimD_B.zero_grad()
                
                
                
                lossA_real = self.criterion(DA_real_A, torch.ones(DA_real_A.shape).to(self.device))
                lossA_fake = self.criterion(DA_fake_A,torch.zeros(DA_fake_A.shape).to(self.device))
                lossB_real = self.criterion(DB_real_B, torch.ones(DB_real_B.shape).to(self.device))
                lossB_fake = self.criterion(DB_fake_B, torch.zeros(DB_real_B.shape).to(self.device))
                
                (lossA_real + lossA_fake).backward(retain_graph = True)
                (lossB_real + lossB_fake).backward(retain_graph = True)
                
                self.optimD_A.step()
                self.optimD_B.step()    

                utils.PresentationExperience(epoch, i, 100, G = LossG.item(), D_A = (lossA_real+lossA_fake).item(), D_B = (lossB_real+lossB_fake).item())
                
                if i % 100 == 99:
                    torch.save(self.DiscriminatorA, 'DiscriminatorA.pkl')
                    torch.save(self.DiscriminatorB, 'DiscriminatorB.pkl')
                    torch.save(self.GeneratorA, 'GeneratorA.pkl')
                    torch.save(self.GeneratorB, 'GeneratorB.pkl')
Ejemplo n.º 5
0
def train(path):
    tf.reset_default_graph()  # reset graph

    # draw graph
    batch = tf.placeholder(
        shape=[None, config.N * config.M,
               40], dtype=tf.float32)  # input batch (time x batch x n_mel)
    lr = tf.placeholder(dtype=tf.float32)  # learning rate
    global_step = tf.Variable(0, name='global_step', trainable=False)
    w = tf.get_variable("w", initializer=np.array([10], dtype=np.float32))
    b = tf.get_variable("b", initializer=np.array([-5], dtype=np.float32))

    # embedding lstm (3-layer default)
    with tf.variable_scope("lstm"):
        lstm_cells = [
            tf.contrib.rnn.LSTMCell(num_units=config.hidden,
                                    num_proj=config.proj)
            for i in range(config.num_layer)
        ]
        lstm = tf.contrib.rnn.MultiRNNCell(
            lstm_cells)  # define lstm op and variables
        outputs, _ = tf.nn.dynamic_rnn(
            cell=lstm, inputs=batch, dtype=tf.float32,
            time_major=True)  # for TI-VS must use dynamic rnn
        embedded = outputs[-1]  # the last ouput is the embedded d-vector
        embedded = normalize(embedded)  # normalize
    print("embedded size: ", embedded.shape)

    # loss
    sim_matrix = similarity(embedded, w, b)
    print("similarity matrix size: ", sim_matrix.shape)
    loss = loss_cal(sim_matrix, type=config.loss)

    # optimizer operation
    trainable_vars = tf.trainable_variables()  # get variable list
    optimizer = optim(
        lr)  # get optimizer (type is determined by configuration)
    grads, vars = zip(*optimizer.compute_gradients(
        loss))  # compute gradients of variables with respect to loss
    grads_clip, _ = tf.clip_by_global_norm(grads, 3.0)  # l2 norm clipping by 3
    grads_rescale = [0.01 * grad for grad in grads_clip[:2]
                     ] + grads_clip[2:]  # smaller gradient scale for w, b
    train_op = optimizer.apply_gradients(
        zip(grads_rescale,
            vars), global_step=global_step)  # gradient update operation

    # check variables memory
    variable_count = np.sum(
        np.array([
            np.prod(np.array(v.get_shape().as_list())) for v in trainable_vars
        ]))
    print("total variables :", variable_count)

    # record loss
    loss_summary = tf.summary.scalar("loss", loss)
    merged = tf.summary.merge_all()
    saver = tf.train.Saver()

    # training session
    with tf.Session() as sess:
        tf.global_variables_initializer().run()

        if (os.path.exists(path)):
            print("Restore from {}".format(
                os.path.join(path, "Check_Point/model.ckpt-2")))
            saver.restore(sess, os.path.join(path, "Check_Point/model.ckpt-2")
                          )  # restore variables from selected ckpt file
        else:
            os.makedirs(os.path.join(path, "Check_Point"),
                        exist_ok=True)  # make folder to save model
            os.makedirs(os.path.join(path, "logs"),
                        exist_ok=True)  # make folder to save log

        writer = tf.summary.FileWriter(os.path.join(path, "logs"), sess.graph)
        epoch = 0
        lr_factor = 1  # lr decay factor ( 1/2 per 10000 iteration)
        loss_acc = 0  # accumulated loss ( for running average of loss)

        for iter in range(config.iteration):
            # run forward and backward propagation and update parameters
            _, loss_cur, summary = sess.run([train_op, loss, merged],
                                            feed_dict={
                                                batch: random_batch(),
                                                lr: config.lr * lr_factor
                                            })

            loss_acc += loss_cur  # accumulated loss for each 100 iteration

            if iter % 10 == 0:
                writer.add_summary(summary, iter)  # write at tensorboard
            if (iter + 1) % 100 == 0:
                print("(iter : %d) loss: %.4f" % ((iter + 1), loss_acc / 100))
                loss_acc = 0  # reset accumulated loss
            if (iter + 1) % 3000 == 0:
                lr_factor /= 2  # lr decay
                print("learning rate is decayed! current lr : ",
                      config.lr * lr_factor)
            if (iter + 1) % 2500 == 0:
                saver.save(sess,
                           os.path.join(path, "./Check_Point/model.ckpt"),
                           global_step=iter // 2500)
                print("model is saved!")
Ejemplo n.º 6
0
    def train(self):
        #### Check build_model ####
        try:
            getattr(self, 'Generator')

            getattr(self, 'Discriminator')

            getattr(self, 'criterion')
            getattr(self, 'L1Loss')

        except:
            assert False, 'Not apply build_model'

        #### Check load_dataset ####
        try:
            getattr(self, 'dataloader')

        except:
            assert False, 'Not apply load_dataset'

        for epoch in range(self.EPOCH):
            if epoch < self.step:
                continue
            if epoch >= 100:
                lr = 0.0002 - 0.0002 * (epoch - 100) / 100

            for i, data in enumerate(zip(self.dataloader)):
                real = data[0].to(self.device)

                self.optimG = utils.optim(self.Generator,
                                          lr=self.lr,
                                          optim=self.optim,
                                          betas=self.betas)
                self.optimD = utils.optim(self.Discriminator,
                                          lr=self.lr,
                                          optim=self.optim,
                                          betas=self.betas)

                fake = self.GeneratorA(real)

                D_real = self.DiscriminatorA(real)

                D_fake = self.DiscriminatorA(fake)

                ### train Generator ###
                self.optimG.zero_grad()

                LossG = self.criterion(
                    D_fake,
                    torch.ones(D_fake.shape).to(self.device))
                LossG.backward(retain_graph=True)

                self.optimG.step()

                ### train Discriminator ###

                self.optimD.zero_grad()

                loss_real = self.criterion(
                    D_real,
                    torch.ones(D_real.shape).to(self.device))
                loss_fake = self.criterion(
                    D_fake,
                    torch.zeros(D_fake.shape).to(self.device))

                (lossA_real + lossA_fake).backward(retain_graph=True)

                self.optimD.step()

                utils.PresentationExperience(epoch,
                                             i,
                                             100,
                                             G=LossG.item(),
                                             D=(loss_real + loss_fake).item())

                if i % 100 == 99:
                    torch.save(self.Discriminator, 'Discriminator.pkl')
                    torch.save(self.Generator, 'Generator.pkl')
Ejemplo n.º 7
0
def train(path):
    tf.reset_default_graph()    # reset graph

    # draw graph
    batch = tf.placeholder(shape= [None, config.N*config.M, 40], dtype=tf.float32)  # input batch (time x batch x n_mel)
    lr = tf.placeholder(dtype= tf.float32)  # learning rate
    global_step = tf.Variable(0, name='global_step', trainable=False)
    w = tf.get_variable("w", initializer= np.array([10], dtype=np.float32))
    b = tf.get_variable("b", initializer= np.array([-5], dtype=np.float32))

    # embedding lstm (3-layer default)
    with tf.variable_scope("lstm"):
        lstm_cells = [tf.contrib.rnn.LSTMCell(num_units=config.hidden, num_proj=config.proj) for i in range(config.num_layer)]
        lstm = tf.contrib.rnn.MultiRNNCell(lstm_cells)    # define lstm op and variables
        outputs, _ = tf.nn.dynamic_rnn(cell=lstm, inputs=batch, dtype=tf.float32, time_major=True)   # for TI-VS must use dynamic rnn
        embedded = outputs[-1]                            # the last ouput is the embedded d-vector
        embedded = normalize(embedded)                    # normalize
    print("embedded size: ", embedded.shape)

    # loss
    sim_matrix = similarity(embedded, w, b)
    print("similarity matrix size: ", sim_matrix.shape)
    loss = loss_cal(sim_matrix, type=config.loss)

    # optimizer operation
    trainable_vars= tf.trainable_variables()                # get variable list
    optimizer= optim(lr)                                    # get optimizer (type is determined by configuration)
    grads, vars= zip(*optimizer.compute_gradients(loss))    # compute gradients of variables with respect to loss
    grads_clip, _ = tf.clip_by_global_norm(grads, 3.0)      # l2 norm clipping by 3
    grads_rescale= [0.01*grad for grad in grads_clip[:2]] + grads_clip[2:]   # smaller gradient scale for w, b
    train_op= optimizer.apply_gradients(zip(grads_rescale, vars), global_step= global_step)   # gradient update operation

    # check variables memory
    variable_count = np.sum(np.array([np.prod(np.array(v.get_shape().as_list())) for v in trainable_vars]))
    print("total variables :", variable_count)

    # record loss
    loss_summary = tf.summary.scalar("loss", loss)
    merged = tf.summary.merge_all()
    saver = tf.train.Saver()

    # training session
    with tf.Session() as sess:
        tf.global_variables_initializer().run()

        if(os.path.exists(path)):
            print("Restore from {}".format(os.path.join(path, "Check_Point/model.ckpt-2")))
            saver.restore(sess, os.path.join(path, "Check_Point/model.ckpt-2"))  # restore variables from selected ckpt file
        else:
            os.makedirs(os.path.join(path, "Check_Point"), exist_ok=True)  # make folder to save model
            os.makedirs(os.path.join(path, "logs"), exist_ok=True)          # make folder to save log

        writer = tf.summary.FileWriter(os.path.join(path, "logs"), sess.graph)
        epoch = 0
        lr_factor = 1   # lr decay factor ( 1/2 per 10000 iteration)
        loss_acc = 0    # accumulated loss ( for running average of loss)

        for iter in range(config.iteration):
            # run forward and backward propagation and update parameters
            _, loss_cur, summary = sess.run([train_op, loss, merged],
                                  feed_dict={batch: random_batch(), lr: config.lr*lr_factor})

            loss_acc += loss_cur    # accumulated loss for each 100 iteration

            if iter % 10 == 0:
                writer.add_summary(summary, iter)   # write at tensorboard
            if (iter+1) % 100 == 0:
                print("(iter : %d) loss: %.4f" % ((iter+1),loss_acc/100))
                loss_acc = 0                        # reset accumulated loss
            if (iter+1) % 10000 == 0:
                lr_factor /= 2                      # lr decay
                print("learning rate is decayed! current lr : ", config.lr*lr_factor)
            if (iter+1) % 10000 == 0:
                saver.save(sess, os.path.join(path, "./Check_Point/model.ckpt"), global_step=iter//10000)
                print("model is saved!")
Ejemplo n.º 8
0
def train(path):
    tf.reset_default_graph()  # reset graph

    # draw graph
    batch = tf.placeholder(
        shape=[None, config.N * config.M,
               40], dtype=tf.float32)  # input batch (time x batch x n_mel)
    lr = tf.placeholder(dtype=tf.float32)  # learning rate
    global_step = tf.Variable(0, name='global_step', trainable=False)
    w = tf.get_variable("w", initializer=np.array([10], dtype=np.float32))
    b = tf.get_variable("b", initializer=np.array([-5], dtype=np.float32))

    # embedding lstm (3-layer default)
    with tf.variable_scope("lstm"):
        lstm_cells = [
            tf.contrib.rnn.LSTMCell(num_units=config.hidden,
                                    num_proj=config.proj)
            for i in range(config.num_layer)
        ]
        lstm = tf.contrib.rnn.MultiRNNCell(
            lstm_cells)  # define lstm op and variables
        outputs, _ = tf.nn.dynamic_rnn(
            cell=lstm, inputs=batch, dtype=tf.float32,
            time_major=True)  # for TI-VS must use dynamic rnn
        embedded = outputs[-1]  # the last ouput is the embedded d-vector
        embedded = normalize(embedded)  # normalize
    print("embedded size: ", embedded.shape)

    # loss
    sim_matrix = similarity(embedded, w, b)
    print("similarity matrix size: ", sim_matrix.shape)
    loss = loss_cal(sim_matrix, type=config.loss)

    # optimizer operation
    trainable_vars = tf.trainable_variables()  # get variable list
    optimizer = optim(
        lr)  # get optimizer (type is determined by configuration)
    grads, vars = zip(*optimizer.compute_gradients(
        loss))  # compute gradients of variables with respect to loss
    grads_clip, _ = tf.clip_by_global_norm(grads, 3.0)  # l2 norm clipping by 3
    grads_rescale = [0.01 * grad for grad in grads_clip[:2]
                     ] + grads_clip[2:]  # smaller gradient scale for w, b
    train_op = optimizer.apply_gradients(
        zip(grads_rescale,
            vars), global_step=global_step)  # gradient update operation

    # check variables memory
    variable_count = np.sum(
        np.array([
            np.prod(np.array(v.get_shape().as_list())) for v in trainable_vars
        ]))
    print("total variables :", variable_count)

    # record loss
    loss_summary = tf.summary.scalar("loss", loss)
    merged = tf.summary.merge_all()
    saver = tf.train.Saver()

    # training session
    # with tf.Session() as sess:
    gpu_options = tf.GPUOptions(allow_growth=True)
    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
        tf.global_variables_initializer().run()
        os.makedirs(os.path.join(path, "Check_Point"),
                    exist_ok=True)  # make folder to save model
        os.makedirs(os.path.join(path, "logs"),
                    exist_ok=True)  # make folder to save log
        writer = tf.summary.FileWriter(os.path.join(path, "logs"), sess.graph)
        epoch = 0
        lr_factor = 1  # lr decay factor ( 1/2 per 10000 iteration)
        loss_acc = 0  # accumulated loss ( for running average of loss)

        train_times = [
        ]  #===========================================================================2020/05/20 16:30
        total_times = 0  #===========================================================================2020/05/20 16:30
        for iter in range(config.iteration):
            # run forward and backward propagation and update parameters
            # 记录迭代训练开始时间
            begin_time = time.clock(
            )  #===========================================================================2020/05/20 16:30
            _, loss_cur, summary = sess.run([train_op, loss, merged],
                                            feed_dict={
                                                batch: random_batch(),
                                                lr: config.lr * lr_factor
                                            })
            # 记录迭代训练结束时间
            end_time = time.clock(
            )  # ===========================================================================2020/05/20 16:30
            total_times += end_time - begin_time  # ===========================================================================2020/05/20 16:30
            train_times.append(
                str(begin_time) + '_' + str(end_time) + '_' +
                str(end_time - begin_time)
            )  # ===========================================================================2020/05/20 16:30

            loss_acc += loss_cur  # accumulated loss for each 100 iteration

            if iter % 10 == 0:
                writer.add_summary(summary, iter)  # write at tensorboard
            if (iter + 1) % 100 == 0:
                print("(iter : %d) loss: %.4f" % ((iter + 1), loss_acc / 100))
                loss_acc = 0  # reset accumulated loss
                print(
                    "iter:{},耗时:{}s".format(iter, str(end_time - begin_time))
                )  # ===========================================================================2020/05/20 16:30
            if (iter + 1) % 10000 == 0:
                lr_factor /= 2  # lr decay
                print("learning rate is decayed! current lr : ",
                      config.lr * lr_factor)
            if (iter + 1) % 10000 == 0:
                saver.save(sess,
                           os.path.join(path, "./Check_Point/model.ckpt"),
                           global_step=iter // 10000)
                print("model is saved!")
        # ===========================================================================2020/05/20 16:30
        # 存模型
        saver.save(sess,
                   os.path.join(path, "./Check_Point/model.ckpt"),
                   global_step=iter)
        print("model is saved!")

        # 将时间写入文件
        with open('GE2E_epoch说话人{}_batch说话人{}_人均音频数{}_iter{}_迭代耗时.txt'.format(
                config.spk_num, config.N, config.M, config.iteration),
                  mode='w',
                  encoding='utf-8') as wf:
            wf.write(
                "epoch说话人{}个;batch说话人:{}个;人均音频数:{}条;迭代总次数:{};平均每次训练迭代耗时:{}\n".
                format(config.spk_num, config.N, config.M, config.iteration,
                       total_times / config.iteration))
            wf.write("开始训练时间_结束训练时间_耗时\n")
            for line in train_times:
                wf.write(line + '\n')
Ejemplo n.º 9
0
def train(path):
    tf.reset_default_graph()  # reset graph

    # draw graph
    batch = tf.placeholder(
        shape=[None, config.N * config.M,
               40], dtype=tf.float32)  # input batch (time x batch x n_mel)
    lr = tf.placeholder(dtype=tf.float32)  # learning rate
    global_step = tf.Variable(0, name='global_step', trainable=False)
    w = tf.get_variable("w", initializer=np.array([10], dtype=np.float32))
    b = tf.get_variable("b", initializer=np.array([-5], dtype=np.float32))

    # embedding lstm (3-layer default)
    with tf.variable_scope("lstm"):
        lstm_cells = [
            tf.contrib.rnn.LSTMCell(num_units=config.hidden,
                                    num_proj=config.proj)
            for i in range(config.num_layer)
        ]
        lstm = tf.contrib.rnn.MultiRNNCell(
            lstm_cells)  # define lstm op and variables
        outputs, _ = tf.nn.dynamic_rnn(
            cell=lstm, inputs=batch, dtype=tf.float32,
            time_major=True)  # for TI-VS must use dynamic rnn
        embedded = outputs[-1]  # the last ouput is the embedded d-vector
        embedded = normalize(embedded)  # normalize
    print("embedded size: ", embedded.shape)

    # loss
    sim_matrix = similarity(embedded, w, b)
    print("similarity matrix size: ", sim_matrix.shape)
    loss = loss_cal(sim_matrix, type=config.loss)

    # optimizer operation
    trainable_vars = tf.trainable_variables()  # get variable list
    optimizer = optim(
        lr)  # get optimizer (type is determined by configuration)
    grads, vars = zip(*optimizer.compute_gradients(
        loss))  # compute gradients of variables with respect to loss
    grads_clip, _ = tf.clip_by_global_norm(grads, 3.0)  # l2 norm clipping by 3
    grads_rescale = [0.01 * grad for grad in grads_clip[:2]
                     ] + grads_clip[2:]  # smaller gradient scale for w, b
    train_op = optimizer.apply_gradients(
        zip(grads_rescale,
            vars), global_step=global_step)  # gradient update operation

    # check variables memory
    variable_count = np.sum(
        np.array([
            np.prod(np.array(v.get_shape().as_list())) for v in trainable_vars
        ]))
    print("total variables :", variable_count)

    # record loss
    loss_summary = tf.summary.scalar("loss", loss)
    merged = tf.summary.merge_all()
    saver = tf.train.Saver()
    iter = 0

    # training session
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        if config.restore:

            # Restore saved model if the user requested it, default = True
            try:
                ckpt = tf.train.latest_checkpoint(
                    checkpoint_dir=os.path.join(path, "Check_Point"))

                #                if (checkpoint_state and checkpoint_state.model_checkpoint_path):
                #                    print('Loading checkpoint {}'.format(checkpoint_state.model_checkpoint_path))
                #saver = tf.train.import_meta_graph(os.path.join(path,"Check_Point/model.cpkt.meta"))

                #ckpt = tf.train.load_checkpoint(os.path.join(path,"Check_Point/model"))
                saver.restore(sess, ckpt)

#                else:
#                    print('No model to load at {}'.format(save_dir))

#                    saver.save(sess, checkpoint_path, global_step=global_step)

            except:
                print('Cannot restore checkpoint exception')

        #if loaded == 0:
        #    raise AssertionError("ckpt file does not exist! Check config.model_num or config.model_path.")

        #print("train file path : ", config.test_path)

        else:

            os.makedirs(os.path.join(path, "Check_Point"),
                        exist_ok=True)  # make folder to save model
            os.makedirs(os.path.join(path, "logs"),
                        exist_ok=True)  # make folder to save log

        writer = tf.summary.FileWriter(os.path.join(path, "logs"), sess.graph)
        epoch = 0
        lr_factor = 1  # lr decay factor ( 1/2 per 10000 iteration)
        loss_acc = 0  # accumulated loss ( for running average of loss)
        iter = 0
        training_data_size = len(os.listdir(config.train_path))
        print("train_size: ", training_data_size)
        prev_iter = -1

        #        while iter  < config.iteration :
        while iter < config.iteration:
            prev_iter = iter

            # run forward and backward propagation and update parameters
            iter, _, loss_cur, summary = sess.run(
                [global_step, train_op, loss, merged],
                feed_dict={
                    batch: random_batch(),
                    lr: config.lr * lr_factor
                })

            loss_acc += loss_cur  # accumulated loss for each 100 iteration

            if (iter - prev_iter > 1):
                epoch = config.N * (iter + 1) // training_data_size
                #lr_factor = lr_factor / (2**(epoch//100))
                lr_factor = lr_factor / (2**(iter // 10000))
                print("restored epoch:", epoch)
                print("restored learning rate:", lr_factor * config.lr)

            #if iter % 1000 == 0:
            #    writer.add_summary(summary, iter)   # write at tensorboard
            if (iter + 1) % 100 == 0:
                print("(iter : %d) loss: %.4f" % ((iter + 1), loss_acc / 100))
                loss_acc = 0  # reset accumulated loss

            #if config.N * (iter+1) % training_data_size == 0:
            #    epoch = epoch + 1
            #    print("epoch: ", epoch)

            if (iter + 1) % 10000 == 0:
                lr_factor /= 2
                print("learning rate is decayed! current lr : ",
                      config.lr * lr_factor)

            #if ((config.N * (iter+1)) / training_data_size)%100  == 0:
            #    lr_factor = lr_factor / 2
            #    print("learning factor: " , lr_factor)
            #    print("learning rate is decayed! current lr : ", config.lr*lr_factor)

            if (iter + 1) % 5000 == 0:
                saver.save(sess,
                           os.path.join(path, "Check_Point/model.ckpt"),
                           global_step=iter)  #pooooooooooooint
                writer.add_summary(summary, iter)  # write at tensorboard
                print("model is saved!")
Ejemplo n.º 10
0
def train(path, args):
    tf.reset_default_graph()  # reset graph
    timestamp = time_string() if args.time_string == None else args.time_string

    # draw graph
    feeder = Feeder(args.train_filename, args, hparams)

    output_classes = max(
        [int(f) for f in feeder.total_emt]) + 1 if args.model_type in [
            'emt', 'accent'
        ] else max([int(f) for f in feeder.total_spk]) + 1

    batch = tf.placeholder(
        shape=[args.N * args.M, None, config.n_mels],
        dtype=tf.float32)  # input batch (time x batch x n_mel)
    labels = tf.placeholder(shape=[args.N * args.M], dtype=tf.int32)
    lr = tf.placeholder(dtype=tf.float32)  # learning rate
    global_step = tf.Variable(0, name='global_step', trainable=False)
    w = tf.get_variable("w", initializer=np.array([10], dtype=np.float32))
    b = tf.get_variable("b", initializer=np.array([-5], dtype=np.float32))

    # embedded = triple_lstm(batch)
    print("Training {} Discriminator Model".format(args.model_type))
    encoder = ReferenceEncoder(
        filters=hparams.reference_filters,
        kernel_size=(3, 3),
        strides=(2, 2),
        is_training=True,
        scope='Tacotron_model/inference/pretrained_ref_enc_{}'.format(
            args.model_type),
        depth=hparams.reference_depth)  # [N, 128])
    embedded = encoder(batch)
    embedded = normalize(embedded)

    if args.discriminator:
        logit = tf.layers.dense(
            embedded,
            output_classes,
            name='Tacotron_model/inference/pretrained_ref_enc_{}_dense'.format(
                args.model_type))
        labels_one_hot = tf.one_hot(tf.to_int32(labels), output_classes)
        # loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logit,labels=labels_one_hot))
        loss = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits=logit,
                                                    labels=labels_one_hot))
        acc, acc_op = tf.metrics.accuracy(labels=tf.argmax(labels_one_hot, 1),
                                          predictions=tf.argmax(logit, 1))
        val_acc, val_acc_op = tf.metrics.accuracy(
            labels=tf.argmax(labels_one_hot, 1),
            predictions=tf.argmax(logit, 1))
    else:
        # loss
        sim_matrix = similarity(embedded,
                                w,
                                b,
                                args.N,
                                args.M,
                                P=hparams.reference_depth)
        print("similarity matrix size: ", sim_matrix.shape)
        loss = loss_cal(sim_matrix, args.N, args.M, type=config.loss)
        val_acc_op = tf.constant(1.)

    # optimizer operation
    trainable_vars = tf.trainable_variables()  # get variable list
    optimizer = optim(
        lr)  # get optimizer (type is determined by configuration)
    grads, vars = zip(*optimizer.compute_gradients(
        loss))  # compute gradients of variables with respect to loss

    if args.discriminator:
        grads_rescale = grads
    else:
        grads_clip, _ = tf.clip_by_global_norm(grads,
                                               3.0)  # l2 norm clipping by 3
        grads_rescale = [0.01 * grad for grad in grads_clip[:2]
                         ] + grads_clip[2:]  # smaller gradient scale for w, b

    train_op = optimizer.apply_gradients(
        zip(grads_rescale,
            vars), global_step=global_step)  # gradient update operation

    # check variables memory
    variable_count = np.sum(
        np.array([
            np.prod(np.array(v.get_shape().as_list())) for v in trainable_vars
        ]))
    print("total variables :", variable_count)

    # record loss
    loss_summary = tf.summary.scalar("loss", loss)
    merged = tf.summary.merge_all()
    saver = tf.train.Saver(max_to_keep=20)
    loss_window = ValueWindow(100)
    acc_window = ValueWindow(100)
    val_loss_window = ValueWindow(5)
    val_acc_window = ValueWindow(5)

    # training session
    with tf.Session() as sess:
        tf.local_variables_initializer().run()
        tf.global_variables_initializer().run()

        checkpoint_folder = os.path.join(path, "checkpoints", timestamp)
        logs_folder = os.path.join(path, "logs", timestamp)
        os.makedirs(checkpoint_folder,
                    exist_ok=True)  # make folder to save model
        os.makedirs(logs_folder, exist_ok=True)  # make folder to save log
        model_name = '{}_disc_model.ckpt'.format(args.model_type)
        checkpoint_path = os.path.join(checkpoint_folder, model_name)

        if args.restore:
            checkpoint_state = tf.train.get_checkpoint_state(checkpoint_folder)
            if (checkpoint_state and checkpoint_state.model_checkpoint_path):
                print('Loading checkpoint {}'.format(
                    checkpoint_state.model_checkpoint_path))
                saver.restore(sess, checkpoint_state.model_checkpoint_path)
            else:
                print('No model to load at {}'.format(checkpoint_folder))
                saver.save(sess, checkpoint_path, global_step=global_step)
        else:
            print('Starting new training!')
            saver.save(sess, checkpoint_path, global_step=global_step)

        writer = tf.summary.FileWriter(logs_folder, sess.graph)
        lr_factor = 1  # lr decay factor ( 1/2 per 10000 iteration)

        iterations = 30000 if args.model_type == 'emt' else config.iteration
        for iter in range(iterations):
            if args.discriminator:
                batch_iter, _, labels_iter = feeder.random_batch_disc()
            else:
                batch_iter, _, labels_iter = feeder.random_batch()
            # run forward and backward propagation and update parameters
            step, _, loss_cur, summary, acc_cur = sess.run(
                [global_step, train_op, loss, merged, acc_op],
                feed_dict={
                    batch: batch_iter,
                    labels: labels_iter,
                    lr: config.lr * lr_factor
                })

            loss_window.append(loss_cur)
            acc_window.append(acc_cur)

            if step % 10 == 0:
                writer.add_summary(summary, step)  # write at tensorboard
            if (step + 1) % 20 == 0:
                val_loss_cur_batch = 0
                val_acc_cur_batch = 0
                for iter in range(VAL_ITERS):
                    if args.discriminator:
                        batch_iter, _, labels_iter = feeder.random_batch_disc(
                            TEST=True)
                    else:
                        batch_iter, _, labels_iter = feeder.random_batch(
                            TEST=True)
                    # run forward and backward propagation and update parameters
                    val_loss_cur, val_acc_cur = sess.run([loss, val_acc_op],
                                                         feed_dict={
                                                             batch: batch_iter,
                                                             labels:
                                                             labels_iter
                                                         })
                    val_loss_cur_batch += val_loss_cur
                    val_acc_cur_batch += val_acc_cur
                val_loss_cur_batch /= VAL_ITERS
                val_acc_cur_batch /= VAL_ITERS
                val_loss_window.append(val_loss_cur_batch)
                val_acc_window.append(val_acc_cur_batch)

                message = "(iter : %d) loss: %.4f" % (
                    (step + 1), loss_window.average)
                if args.discriminator:
                    message += ', acc: {:.2f}%'.format(acc_window.average)
                message += ", val_loss: %.4f" % (val_loss_window.average)
                if args.discriminator:
                    message += ', val_acc: {:.2f}%'.format(
                        val_acc_window.average)
                print(message)

            lr_changed = False
            if args.model_type == 'emt':
                if step > 6000:
                    lr_changed = True if lr_factor != .01 else False
                    lr_factor = .01
                elif step > 4000:
                    lr_changed = True if lr_factor != .1 else False
                    lr_factor = .1
                if lr_changed:
                    print("learning rate is decayed! current lr : ",
                          config.lr * lr_factor)
            elif args.model_type == 'spk':
                if step > 300:  #4000:
                    lr_changed = True if lr_factor != .01 else False
                    lr_factor = .01
                elif step > 180:  #2500:
                    lr_changed = True if lr_factor != .1 else False
                    lr_factor = .1
                if lr_changed:
                    print("learning rate is decayed! current lr : ",
                          config.lr * lr_factor)
            if step % config.save_checkpoint_iters == 0:
                saver.save(sess, checkpoint_path, global_step=global_step)
Ejemplo n.º 11
0
def train(path):
    tf.reset_default_graph()  # reset graph

    # Draw train graph
    train_batch = tf.placeholder(
        shape=[None, config.N * config.M,
               40], dtype=tf.float32)  # input batch (time x batch x n_mel)
    lr = tf.placeholder(dtype=tf.float32)  # learning rate

    global_step = tf.Variable(0, name='global_step', trainable=False)
    w = tf.get_variable("w", initializer=np.array([10], dtype=np.float32))
    b = tf.get_variable("b", initializer=np.array([-5], dtype=np.float32))

    # Embedding LSTM (3-layer default)
    with tf.variable_scope("lstm", reuse=None):
        lstm_cells = [
            tf.contrib.rnn.LSTMCell(num_units=config.hidden,
                                    num_proj=config.proj)
            for i in range(config.num_layer)
        ]
        print(config.num_layer)
        lstm = tf.contrib.rnn.MultiRNNCell(
            lstm_cells)  # define lstm op and variables
        outputs, _ = tf.nn.dynamic_rnn(
            cell=lstm, inputs=train_batch, dtype=tf.float32,
            time_major=True)  # for TI-VS must use dynamic rnn
        embedded = outputs[-1]  # the last ouput is the embedded d-vector
        embedded = normalize(embedded)  # normalize
    print("embedded size: ", embedded.shape)

    # Define loss
    sim_matrix = similarity(embedded, w, b)
    print("similarity matrix size: ", sim_matrix.shape)
    loss = loss_cal(sim_matrix, type=config.loss)

    # Optimizer operation
    trainable_vars = tf.trainable_variables()  # get variable list
    optimizer = optim(
        lr)  # get optimizer (type is determined by configuration)
    grads, vars = zip(*optimizer.compute_gradients(
        loss))  # compute gradients of variables with respect to loss
    grads_clip, _ = tf.clip_by_global_norm(grads, 3.0)  # l2 norm clipping by 3
    grads_rescale = [0.01 * grad for grad in grads_clip[:2]
                     ] + grads_clip[2:]  # smaller gradient scale for w, b
    train_op = optimizer.apply_gradients(
        zip(grads_rescale,
            vars), global_step=global_step)  # gradient update operation

    # Check variables memory
    variable_count = np.sum(
        np.array([
            np.prod(np.array(v.get_shape().as_list())) for v in trainable_vars
        ]))
    print("total variables :", variable_count)

    # TensorBoard vars declaration
    lr_summ = tf.summary.scalar(name='My_LR', tensor=lr)
    loss_summary = tf.summary.scalar("loss_ORIG", loss)
    w_summary = tf.summary.histogram('My_Weights', w)
    b_summary = tf.summary.histogram('My_Bias', b)
    merged = tf.summary.merge_all()  # merge all TB vars into one
    saver = tf.train.Saver(
        max_to_keep=40
    )  # create a saver, max_to_keep=40 w/ every 2500 steps = around 100000

    # Training session
    with tf.Session() as sess:
        tf.global_variables_initializer().run()

        os.makedirs(os.path.join(path, "Check_Point"),
                    exist_ok=True)  # make folder to save model
        os.makedirs(os.path.join(path, "logs"),
                    exist_ok=True)  # make folder to save TensorBoard logs
        os.makedirs(
            "./Plots/",
            exist_ok=True)  # make folder to save all plots and .txt logs
        os.makedirs("./Plots/" + path[11:],
                    exist_ok=True)  # makes the subdirs for individual plots
        log_path = "./Plots/" + path[11:] + "/" + path[
            11:] + ".txt"  # declares .txt log files naming convention

        # Block of code to make folders of runs for TensorBoard visualization
        logspath = os.path.join(path, "logs")
        num_previous_runs = os.listdir('./tisv_model/logs')
        if len(num_previous_runs) == 0:
            run_number = 1
        else:
            run_number = max(
                [int(s.split('run_')[1]) for s in num_previous_runs]) + 1
        curr_logdir = 'run_%02d' % run_number
        writer = tf.summary.FileWriter(
            os.path.join(logspath, curr_logdir),
            sess.graph)  # Define writer for TensorBoard
        # END of Block

        # epoch = 0      # not used
        lr_factor = 1  # LR decay factor (1/2 per 10000 iteration)
        loss_acc = 0  # accumulated loss (for calculating average of loss)

        # declares lists for figure creation
        EER_list = []  # collects the EER results every 100 steps for plotting
        train_loss_list = [
        ]  # collects the training loss results every 100 steps for plotting
        # LR_decay_list = []  # not used

        for iter in range(config.iteration):
            # run forward and backward propagation and update parameters
            _, loss_cur, summary = sess.run([train_op, loss, merged],
                                            feed_dict={
                                                train_batch: random_batch(),
                                                lr: config.lr * lr_factor
                                            })

            loss_acc += loss_cur  # accumulated loss for each 100 iteration

            # write train_loss to TensorBoard
            if iter % 10 == 0:
                writer.add_summary(summary, iter)
            # perform validation
            if (iter + 1) % 100 == 0:
                # print("(iter : %d) loss: %.4f" % ((iter+1),loss_acc/100))
                # print("==============VALIDATION START!============")

                # Draw validation graph
                enroll = tf.placeholder(
                    shape=[None, config.N * config.M, 40], dtype=tf.float32
                )  # enrollment batch (time x batch x n_mel)
                valid = tf.placeholder(
                    shape=[None, config.N * config.M, 40], dtype=tf.float32
                )  # validation batch (time x batch x n_mel)
                val_batch = tf.concat([enroll, valid], axis=1)

                # Embedding LSTM (3-layer default)
                with tf.variable_scope("lstm", reuse=tf.AUTO_REUSE):
                    lstm_cells = [
                        tf.contrib.rnn.LSTMCell(num_units=config.hidden,
                                                num_proj=config.proj)
                        for i in range(config.num_layer)
                    ]
                    lstm = tf.contrib.rnn.MultiRNNCell(
                        lstm_cells)  # make lstm op and variables
                    outputs, _ = tf.nn.dynamic_rnn(
                        cell=lstm,
                        inputs=val_batch,
                        dtype=tf.float32,
                        time_major=True)  # for TI-VS must use dynamic rnn
                    embedded = outputs[
                        -1]  # the last ouput is the embedded d-vector
                    embedded = normalize(embedded)  # normalize
                # print("embedded size: ", embedded.shape)

                # enrollment embedded vectors (speaker model)
                enroll_embed = normalize(
                    tf.reduce_mean(tf.reshape(embedded[:config.N *
                                                       config.M, :],
                                              shape=[config.N, config.M, -1]),
                                   axis=1))
                # validation embedded vectors
                valid_embed = embedded[config.N * config.M:, :]
                similarity_matrix = similarity(embedded=valid_embed,
                                               w=1.,
                                               b=0.,
                                               center=enroll_embed)

                # print("test file path : ", config.test_path)

                # Return similarity matrix (SM) after enrollment and validation
                time1 = time.time()  # for check inference time
                S = sess.run(similarity_matrix,
                             feed_dict={
                                 enroll:
                                 random_batch(shuffle=False,
                                              forceValidation=True),
                                 valid:
                                 random_batch(shuffle=False,
                                              utter_start=config.M,
                                              forceValidation=True)
                             })
                S = S.reshape([config.N, config.M, -1])
                time2 = time.time()

                np.set_printoptions(precision=4)
                # print("inference time for %d utterences : %0.2fs" % (2 * config.M * config.N, time2 - time1))
                # print(S)  # print similarity matrix

                # calculating EER
                diff = 1
                EER = 0
                EER_thres = 0
                EER_FAR = 0
                EER_FRR = 0

                # through thresholds calculate false acceptance ratio (FAR) and false reject ratio (FRR)
                for thres in [0.01 * i + 0.5 for i in range(50)]:
                    S_thres = S > thres

                    # False acceptance ratio = false acceptance / mismatched population (enroll speaker != validation speaker)
                    FAR = sum([
                        np.sum(S_thres[i]) - np.sum(S_thres[i, :, i])
                        for i in range(config.N)
                    ]) / (config.N - 1) / config.M / config.N

                    # False reject ratio = false reject / matched population (enroll speaker = validation speaker)
                    FRR = sum([
                        config.M - np.sum(S_thres[i][:, i])
                        for i in range(config.N)
                    ]) / config.M / config.N

                    # Save threshold when FAR = FRR (=EER)
                    if diff > abs(FAR - FRR):
                        diff = abs(FAR - FRR)
                        EER = (FAR + FRR) / 2
                        EER_thres = thres
                        EER_FAR = FAR
                        EER_FRR = FRR

                print(
                    "\n(iter : %d) loss: %.4f || EER : %0.4f (thres:%0.4f, FAR:%0.4f, FRR:%0.4f) || inference time for %d utterences: %0.2fs"
                    % ((iter + 1), loss_acc / 100, EER, EER_thres, EER_FAR,
                       EER_FRR, 2 * config.M * config.N, time2 - time1))
                EER_list.append(EER)
                # print("==============VALIDATION END!==============")
                train_loss_list.append(loss_acc / 100)

                # save figures
                if (iter + 1) % 500 == 0:
                    plt.ioff()
                    fig_EER = plt.figure()
                    iter_list = [(i + 1) * 100 for i in range(len(EER_list))]
                    plt.plot(iter_list, EER_list, label="EER")
                    plt.xlabel("Steps")
                    plt.ylabel("EER")
                    plt.title("Equal error rate progress")
                    plt.grid(True)
                    plot_path = "./Plots/" + path[11:] + "/" + path[
                        11:] + ".png"
                    print("Saving plot as: %s" % plot_path)
                    plt.savefig(plot_path)
                    plt.close(fig_EER)

                    plt.ioff()
                    fig_LOSS = plt.figure()
                    iter_list = [(i + 1) * 100 for i in range(len(EER_list))]
                    plt.plot(iter_list,
                             train_loss_list,
                             color="orange",
                             label="train_loss")
                    plt.xlabel("Steps")
                    plt.ylabel("Training loss")
                    plt.title("Training progress")
                    plt.grid(True)
                    plot_path = "./Plots/" + path[11:] + "/" + path[
                        11:] + "_LOSS.png"
                    print("Saving plot as: %s" % plot_path)
                    plt.savefig(plot_path)
                    plt.close(fig_LOSS)

                # Every 100 iterations, save a log of training progress
                with open(log_path, "a") as file:
                    file.write(
                        str(iter + 1) + "," + str(loss_acc / 100) + "," +
                        str(EER) + "," + str(EER_thres) + "," + str(EER_FAR) +
                        "," + str(EER_FRR) + "\n")

                loss_acc = 0  # reset accumulated loss
            # decay learning rate
            if (iter + 1) % 5000 == 0:
                lr_factor /= 2  # lr decay
                print("Learning Rate (LR) decayed! Current LR: ",
                      config.lr * lr_factor)
            # save model checkpoint
            if (iter + 1) % 5000 == 0:
                saver.save(sess,
                           os.path.join(path, "./Check_Point/model.ckpt"),
                           global_step=iter // 5000)  # naming val
                print("Model checkpoint saved!")