예제 #1
0
    def __init__(self, Batch, BatchAP, MiniBatchSize, chkpt_dir):
        self.MiniBatchSize = MiniBatchSize
        self.Batch = Batch
        ap_pairs = []
        for spk in self.Batch['speaker_id'].unique():
            files = list(
                self.Batch[self.Batch['speaker_id'] == spk]['filename'])
            for i in itertools.combinations(files, 2):
                ap_pairs.append([i[0], i[1], spk])
        self.BatchAP = pd.DataFrame(
            ap_pairs, columns=['anchor', 'positive', 'speaker_id'])
        self.Cores = cpu_count() / 2
        self.Partitions = min(self.Cores, MiniBatchSize * 3)
        self.Speakers = self.Batch['speaker_id'].unique().tolist()
        self.APlen = np.inf

        embeddings = model(x, 1)
        tf.get_variable_scope().reuse_variables()
        saver = tf.train.Saver()
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            if os.path.exists(chkpt_dir + '/checkpoint'):
                saver.restore(sess, chkpt_dir + '/model.ckpt')
                print('restoring !')
            else:
                print('Checkpoint File Not Found !')
            xs = self.Batch['filename'].apply(lambda xx: load_filterbanks(xx))
            filenames = self.Batch['filename'].tolist()
            embds = {}
            for _x in range(xs):
                embds[filenames[_x]] = sess.run(embeddings,
                                                feed_dict={x: xs[_x]})
        self.embds = embds
예제 #2
0
파일: trainer.py 프로젝트: zLi90/deepriver
    def train(self):
        # iteration number
        global_step = tf.Variable(0,
                                  dtype=tf.int32,
                                  trainable=False,
                                  name='global_step')

        # training graph
        iterator = self._data_layer()
        input_layer, labels, _ = iterator.get_next()
        logits = model(input_layer, self.num_labels, training=True)
        total_loss = self._loss_functions(logits, labels)
        optimizer = self._optimizer(total_loss, global_step)

        # summary ops and placeholders
        summ_op, mean_loss = self._summaries(logits)

        # don't allocate entire gpu memory
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True

        with tf.Session(config=config) as sess:
            sess.run(tf.global_variables_initializer())

            writer = tf.summary.FileWriter(self.checkpoint_path, sess.graph)

            saver = tf.train.Saver(max_to_keep=None)  # keep all checkpoints
            ckpt = tf.train.get_checkpoint_state(self.checkpoint_path)

            # resume training if a checkpoint exists
            if ckpt and ckpt.model_checkpoint_path:
                saver.restore(sess, ckpt.model_checkpoint_path)

            initial_step = global_step.eval()

            # train the model
            streaming_loss = 0
            for i in range(initial_step, self.num_iter + 1):
                _, loss_batch = sess.run([optimizer, total_loss])

                # log training statistics
                streaming_loss += loss_batch
                if i % self.log_iter == self.log_iter - 1:
                    streaming_loss /= self.log_iter
                    print(i + 1, streaming_loss)
                    summary = sess.run(summ_op,
                                       feed_dict={mean_loss: streaming_loss})
                    writer.add_summary(summary, global_step=i)
                    streaming_loss = 0

                # save model
                if i % self.save_iter == self.save_iter - 1:
                    saver.save(sess,
                               os.path.join(self.checkpoint_path,
                                            'checkpoint'),
                               global_step=global_step)
                    print("Model saved!")

            writer.close()
예제 #3
0
    def __init__(self, checkpoint_path='./checkpoints/'):
        self.checkpoint_path = checkpoint_path
        self.num_labels = 3

        self.input_layer = tf.placeholder(tf.float32,
                                          shape=[None, None],
                                          name='input')
        logits = model(tf.expand_dims(self.input_layer, axis=0),
                       num_labels=self.num_labels,
                       training=False)
        self.preds = tf.squeeze(tf.nn.softmax(logits))
def train():
    BatchLoader = MultiTaskBatchManager(Data=train_df,
                                        batch_size=batch_size,
                                        Ntasks=5,
                                        Nepochs=num_epoch)
    print('epoch size : = %s' % (len(train_df) / batch_size))
    with T_graph.as_default():
        tower_grads = []
        global_step = tf.Variable(0, name='global_step', trainable=False)
        x = tf.placeholder('float32', shape=[batch_size, None])
        y = tf.placeholder('int32', shape=[batch_size])
        optimizer = tf.train.AdamOptimizer(learning_rate=args.lr)
        available_gpus = get_available_gpus()
        num_clones = len(available_gpus)
        #x,y,flag=train_data.GetBatch()
        #x=tf.convert_to_tensor(x, dtype=tf.float32)
        #y=tf.convert_to_tensor(y, dtype=tf.int32)
        #batch_queue = tf.contrib.slim.prefetch_queue.prefetch_queue([x, y], capacity=2 * num_clones)
        print('Number of clones = %d' % num_clones)
        with tf.variable_scope(tf.get_variable_scope()):
            for i in range(num_clones):
                with tf.device(available_gpus[i]):
                    #batch_x, batch_y = batch_queue.dequeue()
                    # Distribute data among all clones equally.
                    step = int(batch_size / float(num_clones))
                    # Network outputs.
                    prediction = model(x[i * step:(i + 1) * step], step,
                                       total_speakers)
                    label_onehot = tf.one_hot(y[i * step:(i + 1) * step],
                                              depth=total_speakers + 1)
                    #prediction= model(batch_x,batch_size,total_speakers)
                    #label_onehot = tf.one_hot(batch_y, depth=total_speakers+1)
                    SOFTMAX = tf.nn.softmax_cross_entropy_with_logits(
                        logits=prediction, labels=label_onehot)
                    with tf.name_scope('loss'):
                        loss = tf.reduce_mean(SOFTMAX)
                    # Reuse variables for the next tower.
                    tf.get_variable_scope().reuse_variables()
                    # Calculate the gradients for the batch of data on this tower.
                    grads = optimizer.compute_gradients(loss)
                    tower_grads.append(grads)
        grads = average_gradients(tower_grads)
        # Apply the gradients to adjust the shared variables.
        apply_gradient_op = optimizer.apply_gradients(grads,
                                                      global_step=global_step)
        # Track the moving averages of all trainable variables.
        MOVING_AVERAGE_DECAY = 0.9999
        variable_averages = tf.train.ExponentialMovingAverage(
            MOVING_AVERAGE_DECAY, global_step)
        variables_averages_op = variable_averages.apply(
            tf.trainable_variables())
        train_op = tf.group(apply_gradient_op, variables_averages_op)

        summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))
        summaries.add(tf.summary.scalar('loss', loss))
        summary_op = tf.summary.merge(list(summaries))

        with tf.Session(graph=T_graph,
                        config=tf.ConfigProto(
                            allow_soft_placement=True,
                            log_device_placement=False)) as sess:
            saver = tf.train.Saver()
            summary_writer = tf.summary.FileWriter(chkpt_dir, graph=T_graph)
            sess.run(tf.global_variables_initializer())
            tf.train.start_queue_runners(sess=sess)
            if os.path.exists(chkpt_dir + '/checkpoint'):
                print('restoring !!')
                saver.restore(sess, chkpt_file)
            elif not os.path.exists(chkpt_dir):
                os.mkdir(chkpt_dir)
            print('Training Started !!')
            isrunning = True
            stepcount = 0
            steploss = 0
            epoch_loss = 0
            nep = 0
            while isrunning:
                stepcount += 1
                #st=time.time()
                batch_x, batch_y, flag, isrunning = BatchLoader.next_batch()
                #print('data loading time %s'%(time.time()-st))
                if isrunning:
                    #st=time.time()
                    _, c, summary, g = sess.run(
                        [apply_gradient_op, loss, summary_op, global_step],
                        feed_dict={
                            x: batch_x,
                            y: batch_y
                        })
                    #print('training time %s'%(time.time()-st))
                    steploss += c
                    epoch_loss += c
                    if stepcount % 100 == 0:
                        epoch_loss += steploss
                        save_path = saver.save(sess, chkpt_file)
                        print('step_loss : %s ' % steploss)
                        logging.info('step_loss : %s' % (steploss))
                        steploss = 0
                    if stepcount % (len(train_df) / batch_size) == 0:
                        logging.info('Epoch %d loss : %f' % (nep, epoch_loss))
                        print('Epoch %d loss : %f' % (nep, epoch_loss))
                        epoch_loss = 0
                        nep += 1
            logging.info('last Batch %d loss : %f' % (nep, epoch_loss))
            print('last Batch %d loss : %f' % (nep, epoch_loss))
            BatchLoader.close()
예제 #5
0
def train():
    BatchLoader = MultiTaskBatchManager(Data=train_df,
                                        batch_size=batch_size,
                                        Ntasks=5,
                                        Nepochs=num_epoch)
    with T_graph.as_default():
        tower_grads = []
        global_step = tf.Variable(0, name='global_step', trainable=False)
        x = tf.placeholder('float32')
        alpha = tf.placeholder('float32')
        optimizer = tf.train.AdamOptimizer(learning_rate=args.lr)
        available_gpus = get_available_gpus()
        num_clones = len(available_gpus)
        print('Number of clones = %d' % num_clones)
        with tf.variable_scope(tf.get_variable_scope()):
            for i in range(num_clones):
                with tf.device(available_gpus[i]):
                    # Network outputs.
                    prediction = model(x[i], batch_size * 3, total_speakers)
                    prediction = tf.nn.l2_normalize(prediction,
                                                    1,
                                                    1e-10,
                                                    name='embeddings')
                    with tf.name_scope('loss'):
                        loss = triplet_loss(prediction, alpha)
                    # Reuse variables for the next tower.
                    tf.get_variable_scope().reuse_variables()
                    # Calculate the gradients for the batch of data on this tower.
                    grads = optimizer.compute_gradients(loss)
                    tower_grads.append(grads)
        grads = average_gradients(tower_grads)
        # Apply the gradients to adjust the shared variables.
        apply_gradient_op = optimizer.apply_gradients(grads,
                                                      global_step=global_step)
        # Track the moving averages of all trainable variables.
        MOVING_AVERAGE_DECAY = 0.9999
        variable_averages = tf.train.ExponentialMovingAverage(
            MOVING_AVERAGE_DECAY, global_step)
        variables_averages_op = variable_averages.apply(
            tf.trainable_variables())
        train_op = tf.group(apply_gradient_op, variables_averages_op)

        summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))
        summaries.add(tf.summary.scalar('loss', loss))
        summary_op = tf.summary.merge(list(summaries))

        with tf.Session(graph=T_graph,
                        config=tf.ConfigProto(
                            allow_soft_placement=True,
                            log_device_placement=False)) as sess:
            saver = tf.train.Saver()
            summary_writer = tf.summary.FileWriter(chkpt_dir, graph=T_graph)
            sess.run(tf.global_variables_initializer())
            tf.train.start_queue_runners(sess=sess)
            if os.path.exists(chkpt_dir + '/checkpoint'):
                print('restoring !!')
                saver.restore(sess, chkpt_file)
            elif not os.path.exists(chkpt_dir):
                os.mkdir(chkpt_dir)
            print('Training Started !!')
            isrunning = True
            stepcount = 0
            steploss = 0
            while isrunning:
                stepcount += 1
                #st=time.time()
                batch_xs = []
                for _ in range(num_clones):
                    batch_x, batch_y, flag, isrunning = BatchLoader.next_batch(
                    )
                    batch_xs.append(batch_x)
                    if not isrunning:
                        break
                if not isrunning:
                    break
                #print('data loading time %s'%(time.time()-st))
                if isrunning:
                    #st=time.time()
                    _, c, summary, g = sess.run(
                        [apply_gradient_op, loss, summary_op, global_step],
                        feed_dict={
                            x: batch_xs,
                            alpha: 0.1
                        })
                    #print('training time %s'%(time.time()-st))
                    steploss += c
                if stepcount % 100 == 0:
                    save_path = saver.save(sess, chkpt_file)
                    print('step_loss : %s ' % steploss)
                    steploss = 0
            BatchLoader.close()