def TestModel(self,
                  sess,
                  writer,
                  datapath='',
                  str_dataset='eval',
                  data_count=32,
                  out_report=False,
                  show_ratio=True,
                  step=0):
        '''
        测试检验模型效果
        '''
        data = DataSpeech(self.datapath, str_dataset)
        # data.LoadDataList(str_dataset)
        num_data = sum(data.DataNum)  # 获取数据的数量
        if (data_count <= 0 or data_count >
                num_data):  # 当data_count为小于等于0或者大于测试数据量的值时,则使用全部数据来测试
            data_count = num_data

        try:
            ran_num = random.randint(0, num_data - 1)  # 获取一个随机数
            overall_p = 0
            overall_n = 0
            overall_tp = 0
            overall_tn = 0
            accuracy = 0
            sensitivity = 0
            specificity = 0
            score = 0

            nowtime = time.strftime('%Y%m%d_%H%M%S',
                                    time.localtime(time.time()))
            txt_obj = []
            if (out_report == True):
                txt_obj = open('Test_Report_' + str_dataset + '_' + nowtime +
                               '.txt',
                               'w',
                               encoding='UTF-8')  # 打开文件并读入

            start = time.time()
            cm_pre = []
            cm_lab = []
            map = {0: 'normal', 1: 'bowel sounds'}
            # data_count = 200
            for i in tqdm(range(data_count)):
                data_input, data_labels = data.GetData(
                    (ran_num + i) % num_data,
                    mode='non-repetitive')  # 从随机数开始连续向后取一定数量数据

                predictions = []
                if len(data_input) <= AUDIO_LENGTH:
                    data_in = np.zeros(
                        (1, AUDIO_LENGTH, AUDIO_FEATURE_LENGTH, 1),
                        dtype=np.float)
                    data_in[0, 0:len(data_input)] = data_input
                    data_pre = self.model.predict_on_batch(data_in)
                    predictions = np.argmax(data_pre[0], axis=0)
                else:
                    assert (0)

                # print('predictions:',predictions)
                # print('data_pre:',np.argmax(data_pre[0], axis=0))
                # print ('data_label:',data_labels[0])

                cm_pre.append(map[predictions])
                cm_lab.append(map[data_labels[0]])

                tp, fp, tn, fn = Comapare2(predictions,
                                           data_labels[0])  # 计算metrics
                overall_p += tp + fn
                overall_n += tn + fp
                overall_tp += tp
                overall_tn += tn

                txt = ''
                if (out_report == True):
                    txt += str(i) + '\n'
                    txt += 'True:\t' + str(data_labels) + '\n'
                    txt += 'Pred:\t' + str(data_pre) + '\n'
                    txt += '\n'
                    txt_obj.write(txt)

            if overall_p != 0:
                sensitivity = overall_tp / overall_p * 100
                sensitivity = round(sensitivity, 2)
            else:
                sensitivity = 'None'
            if overall_n != 0:
                specificity = overall_tn / overall_n * 100
                specificity = round(specificity, 2)
            else:
                specificity = 'None'
            if sensitivity != 'None' and specificity != 'None':
                score = (sensitivity + specificity) / 2
                score = round(score, 2)
            else:
                score = 'None'
            accuracy = (overall_tp + overall_tn) / (overall_p +
                                                    overall_n) * 100
            accuracy = round(accuracy, 2)
            end = time.time()
            dtime = round(end - start, 2)
            # print('*[测试结果] 片段识别 ' + str_dataset + ' 敏感度:', sensitivity, '%, 特异度: ', specificity, '%, 得分: ', score, ', 准确度: ', accuracy, '%, 用时: ', dtime, 's.')
            strg = '*[测试结果] 片段识别 {0} 敏感度:{1}%, 特异度: {2}%, 得分: {3}, 准确度: {4}%, 用时: {5}s.'.format(
                str_dataset, sensitivity, specificity, score, accuracy, dtime)
            tqdm.write(strg)

            assert (len(cm_lab) == len(cm_pre))
            img_cm = plot_confusion_matrix(cm_lab,
                                           cm_pre,
                                           list(map.values()),
                                           tensor_name='MyFigure/cm',
                                           normalize=False)
            writer.add_summary(img_cm, global_step=step)
            summary = tf.Summary()
            summary.value.add(tag=str_dataset + '/sensitivity',
                              simple_value=sensitivity)
            summary.value.add(tag=str_dataset + '/specificity',
                              simple_value=specificity)
            summary.value.add(tag=str_dataset + '/score', simple_value=score)
            summary.value.add(tag=str_dataset + '/accuracy',
                              simple_value=accuracy)
            writer.add_summary(summary, global_step=step)

            if (out_report == True):
                txt = '*[测试结果] 片段识别 ' + str_dataset + ' 敏感度:' + sensitivity + '%, 特异度: ' + specificity + '%, 得分: ' + score + ', 准确度: ' + accuracy + '%, 用时: ' + dtime + 's.'
                txt_obj.write(txt)
                txt_obj.close()

            metrics = {
                'data_set': str_dataset,
                'sensitivity': sensitivity,
                'specificity': specificity,
                'score': score,
                'accuracy': accuracy
            }

            return metrics

        except StopIteration:
            print('[Error] Model Test Error. please check data format.')
    def TrainModel(self,
                   datapath,
                   epoch=2,
                   batch_size=32,
                   load_weights=False,
                   filename='model_set/speech_model25'):
        assert (batch_size % CLASS_NUM == 0)
        data = DataSpeech(datapath, 'train')
        num_data = sum(data.DataNum)  # 获取数据的数量

        os.system('pkill tensorboard')
        os.system('rm -rf ./checkpoints/files_summary/* ')
        train_writter = tf.summary.FileWriter(
            os.path.join(os.getcwd(), 'checkpoints', 'files_summary'))
        os.system(
            'tensorboard --logdir=/home/zhaok14/example/PycharmProjects/setsail/individual_spp/checkpoints/files_summary/ &'
        )
        print('\n')
        print(90 * '*')
        print(90 * '*')

        iterations_per_epoch = min(
            data.DataNum) // (batch_size // CLASS_NUM) + 1
        # iterations_per_epoch = 2
        print('trainer info:')
        print('training data size: %d' % num_data)
        print('increased epoches: ', epoch)
        print('minibatch size: %d' % batch_size)
        print('iterations per epoch: %d' % iterations_per_epoch)

        with k.get_session() as sess:
            train_writter.add_graph(sess.graph)
            if load_weights == True:
                try:
                    # modelpath = os.path.join(os.getcwd(), 'network&&weights', 'spectrogram', 'inception','spec_inception.h5')
                    self.model = load_model(modelpath,
                                            custom_objects={
                                                'focal_loss': focal_loss,
                                                'focal_loss_fixed':
                                                focal_loss()
                                            })
                    print('Successfully loading the model.')
                except:
                    print('Loading weights failed. Train from scratch.')
            sess.run(tf.global_variables_initializer())

            best_score = 0
            for i in range(0, epoch):
                iteration = 0
                yielddatas = data.data_genetator(batch_size, epoch)
                pbar = tqdm(yielddatas)
                for input, labels in pbar:
                    loss = self.model.train_on_batch(input[0], labels)
                    train_summary = tf.Summary()
                    train_summary.value.add(tag='loss', simple_value=loss)
                    train_writter.add_summary(
                        train_summary, iteration + i * iterations_per_epoch)
                    pr = 'epoch:%d/%d,iteration: %d/%d ,loss: %s' % (
                        epoch, i, iterations_per_epoch, iteration, loss)
                    pbar.set_description(pr)
                    if iteration == iterations_per_epoch:
                        break
                    else:
                        iteration += 1
                pbar.close()
                if i % 1 == 0:
                    self.TestModel(sess=sess,
                                   datapath=self.datapath,
                                   str_dataset='train',
                                   data_count=1000,
                                   out_report=False,
                                   writer=train_writter,
                                   step=i)
                    metrics = self.TestModel(sess=sess,
                                             datapath=self.datapath,
                                             str_dataset='eval',
                                             data_count=-1,
                                             out_report=False,
                                             writer=train_writter,
                                             step=i)
                    if (metrics['score'] >= best_score and i > 0):
                        self.metrics = metrics
                        self.metrics['epoch'] = i
                        best_score = metrics['score']
                        self.model.save(self.savpath)

        print('The best metrics took place in the epoch: ',
              self.metrics['epoch'])
        print(
            'Sensitivity: {}; Specificity: {}; Score: {}; Accuracy: {}'.format(
                self.metrics['sensitivity'], self.metrics['specificity'],
                self.metrics['score'], self.metrics['accuracy']))
예제 #3
0
    def TrainModel(self, datapath,epoch=10, batch_size=32):
        assert (batch_size % CLASS_NUM == 0)
        data = DataSpeech(datapath, 'train')
        num_data = sum(data.DataNum)  # 获取数据的数�?
        os.system('pkill tensorboard')
        os.system('rm -rf ./checkpoints/files_summary/* ')
        train_writter = tf.summary.FileWriter(os.path.join(os.getcwd(), 'checkpoints', 'files_summary'))
        os.system('tensorboard --logdir=/home/zhaok14/example/PycharmProjects/setsail/individual_spp/checkpoints/files_summary/ &')
        print('\n')
        print(90 * '*')
        print(90 * '*')

        iterations_per_epoch = min(data.DataNum) // (batch_size // CLASS_NUM) + 1
        # iterations_per_epoch = 1
        print('trainer info:')
        print('training data size: %d' % num_data)
        print('increased epoches: ', epoch)
        print('minibatch size: %d' % batch_size)
        print('iterations per epoch: %d' % iterations_per_epoch)

        sess = k.get_session()
        train_writter.add_graph(sess.graph)
        sess.run(tf.global_variables_initializer())
        best_score = 0
        # epoch = 2
        duration = 0
        for i in range(0, epoch):
            iteration = 0
            yielddatas = data.data_genetator(batch_size,epoch)
            pbar = tqdm(yielddatas)
            for input, labels in pbar:
                stime = time.time()
                loss = self.model.train_on_batch(input[0],labels)
                # temp = self.model.predict_on_batch(input[0])
                dtime = time.time() - stime
                duration = duration + dtime
                # okay = self.model.predict_on_batch(input[0])
                # compare = self.interlayer.predict_on_batch(input[0])
                train_summary = tf.Summary()
                train_summary.value.add(tag='loss', simple_value=loss)
                train_writter.add_summary(train_summary, iteration + i * iterations_per_epoch)
                pr = 'epoch:%d/%d,iteration: %d/%d ,loss: %s' % (epoch, i, iterations_per_epoch, iteration, loss)
                pbar.set_description(pr)
                if iteration == iterations_per_epoch:
                    break
                else:
                    iteration += 1
            pbar.close()
            if i % 1 == 0:
                tmetrics = self.TestModel(sess=sess, datapath=datapath, str_dataset='train', data_count=1000, out_report=False, writer=train_writter, step=i)
                metrics = self.TestModel(sess=sess, datapath=datapath, str_dataset='eval', data_count=-1, out_report=False, writer=train_writter, step=i)
                if i > 0:
                    if metrics['score'] >= best_score:
                        self.metrics = metrics
                        self.metrics['epoch'] = i
                        best_score = metrics['score']
                        self.savpath = []
                        self.savpath.append((self.baseSavPath[0] + '_epoch' + str(i) + '.h5'))
                        self.savpath.append((self.baseSavPath[1] + '_epoch' + str(i) + '.h5'))
                        self.model.save(self.savpath[0])
                        self.model.save_weights(self.savpath[1])
        if 'epoch' in self.metrics.keys():
            print('The best metric after restriction took place in the epoch: ', self.metrics['epoch'])
            print('Sensitivity: {}; Specificity: {}; Score: {}; Accuracy: {}'.format(self.metrics['sensitivity'],self.metrics['specificity'],self.metrics['score'],self.metrics['accuracy']))
            self.TestGenerability(weightspath=self.savpath[1])
        else:
            print('The restricted best metric is not found. Done!')
            # path_test = '/home/zhaok14/example/PycharmProjects/setsail/individual_spp/network&&weights/spectrogram/mlp/spec_mlp_weights_epoch12.h5'
            # self.TestGenerability(weightspath=path_test)
        print('Training duration: {}s'.format(round(duration,2)))
    def TrainModel(self,
                   datapath,
                   epoch=2,
                   batch_size=32,
                   load_model=False,
                   filename='model_set/speech_model25'):
        '''
        训练模型
        参数:
            datapath: 数据保存的路径
            epoch: 迭代轮数
            save_step: 每多少步保存一次模型
            filename: 默认保存文件名,不含文件后缀名
        '''
        '''
        currently dont involve in load model function and txt export function.
        '''
        assert (batch_size % 2 == 0)
        data = DataSpeech(datapath, 'train')
        num_data = sum(data.DataNum)  # 获取数据的数量

        os.system('pkill tensorboard')
        os.system('rm -rf ./checkpoints/files_summary/* ')
        train_writter = tf.summary.FileWriter(
            os.path.join(os.getcwd(), 'checkpoints', 'files_summary'))
        os.system(
            'tensorboard --logdir=/home/zhaok14/example/PycharmProjects/setsail/individual_spp/checkpoints/files_summary/ &'
        )
        print('\n')
        print(90 * '*')
        print(90 * '*')

        iterations_per_epoch = min(
            data.DataNum) // (batch_size // CLASS_NUM) + 1
        # iterations_per_epoch = 2
        print('trainer info:')
        print('training data size: %d' % num_data)
        print('increased epoches: ', epoch)
        print('minibatch size: %d' % batch_size)
        print('iterations per epoch: %d' % iterations_per_epoch)

        self.graph = tf.Graph()
        model_summary = self.CreateModel(graph=self.graph)
        with tf.Session(graph=self.graph) as sess:

            train_writter.add_graph(sess.graph)
            saver = tf.train.Saver(max_to_keep=1)
            if load_model == True:
                try:
                    saver.restore(
                        sess,
                        os.path.join(os.getcwd(), 'checkpoints', 'files_model',
                                     'speech-f' + str(0) +
                                     '.module'))  # two files in a folder.
                except:
                    print('Loading weights failed. Train from scratch.')
            sess.run(tf.global_variables_initializer())
            best_score = 0

            for i in range(0, epoch):
                iteration = 0
                yielddatas = data.data_genetator(batch_size, epoch)
                pbar = tqdm(yielddatas)
                for input, _ in pbar:
                    feed = {
                        self.input_data: input[0],
                        self.label: input[1],
                        self.is_train: True
                    }
                    _, loss, train_summary = sess.run(
                        [self.optimize, self.loss, model_summary],
                        feed_dict=feed)
                    train_writter.add_summary(
                        train_summary, iteration + i * iterations_per_epoch)
                    pr = 'epoch:%d/%d,iteration: %d/%d ,loss: %s' % (
                        epoch, i, iterations_per_epoch, iteration, loss)
                    pbar.set_description(pr)
                    if iteration == iterations_per_epoch:
                        break
                    else:
                        iteration += 1
                pbar.close()
                if i % 1 == 0:
                    self.TestModel(sess=sess,
                                   datapath=self.datapath,
                                   str_dataset='train',
                                   data_count=-1,
                                   out_report=False,
                                   writer=train_writter,
                                   step=i)
                    metrics = self.TestModel(sess=sess,
                                             datapath=self.datapath,
                                             str_dataset='eval',
                                             data_count=-1,
                                             out_report=False,
                                             writer=train_writter,
                                             step=i)
                    if (metrics['score'] > best_score and i > 0):
                        self.metrics = metrics
                        self.metrics['epoch'] = i
                        best_score = metrics['score']
                        saver.save(sess,
                                   os.path.join(
                                       os.getcwd(), 'checkpoints',
                                       'files_model',
                                       'speech-f' + str(0) + '.module'),
                                   global_step=i)

        print('The best metrics took place in the epoch: ',
              self.metrics['epoch'])
        print(
            'Sensitivity: {}; Specificity: {}; Score: {}; Accuracy: {}'.format(
                self.metrics['sensitivity'], self.metrics['specificity'],
                self.metrics['score'], self.metrics['accuracy']))
import matplotlib.pyplot as plt
import sys
sys.path.append('../')
from skimage.feature import hog
from skimage import exposure
import numpy as np

from release.readdata_bowel import DataSpeech
datapath = '/home/zhaok14/example/PycharmProjects/setsail/individual_spp/bowelsounds/perfect'
traindata = DataSpeech(datapath, 'train')

oimage, _ = traindata.GetData(10, mode='non-repetitive')
image = np.squeeze(oimage)

fd, hog_image = hog(image,
                    orientations=8,
                    pixels_per_cell=(16, 16),
                    cells_per_block=(1, 1),
                    visualise=True)

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(8, 4), sharex=True, sharey=True)

ax1.axis('off')
ax1.imshow(image, cmap=plt.cm.gray)
ax1.set_title('Input image')

# Rescale histogram for better display
hog_image_rescaled = exposure.rescale_intensity(hog_image, in_range=(0, 10))

ax2.axis('off')
ax2.imshow(hog_image_rescaled, cmap=plt.cm.gray)