コード例 #1
0
ファイル: train.py プロジェクト: 975150313/3DUNET-1
def create_data_yaml(path):
    if Path(str(output_path / 't22seg_train.yaml')).exists():
        return
    paired_data = list()
    path = Path(path) / 't2'
    for t1_file in path.iterdir():
        seg_file = str(t1_file).replace('t2', 'seg')
        t1_image = nii_utils.nii_reader(str(t1_file))
        seg_image = nii_utils.nii_reader(str(seg_file))
        if t1_image.shape == seg_image.shape:  # check dataset
            paired_data.append({'t2': str(t1_file), 'seg': str(seg_file)})
    shuffle(paired_data)
    yaml_utils.write(str(output_path / 't22seg_train.yaml'),
                     paired_data[:8 * len(paired_data) // 10])  # train 80%
    yaml_utils.write(str(output_path / 't22seg_test.yaml'),
                     paired_data[8 * len(paired_data) // 10:])  # test 20%
コード例 #2
0
ファイル: model.py プロジェクト: kuoluo1995/word2vec
    def train(self):
        optimizer = tf.train.GradientDescentOptimizer(1.0).minimize(self.loss)
        # We must initialize all variables before we use them.
        init_op = tf.global_variables_initializer()
        self.sess.run(init_op)
        writer = tf.summary.FileWriter(
            '../tensorboard_logs/{}/{}/{}'.format(self.dataset_name, self.name,
                                                  self.tag), self.sess.graph)

        for epoch in range(self.num_epoch):
            batch_inputs, batch_labels = next(self.train_data_generator)
            _, loss_val, summary = self.sess.run(
                [optimizer, self.loss, self.scalar_summary],
                feed_dict={
                    self.inputs: batch_inputs,
                    self.labels: batch_labels
                })
            writer.add_summary(summary, epoch)
            if epoch % self.valid_freq == 0:
                sim = self.similarity.eval()
                print('>>第{}次验证:'.format(epoch))
                for i in range(self.valid_size):
                    valid_word = self.reverse_dictionary[
                        self.valid_examples[i]]
                    top_k = 8  # number of nearest neighbors
                    nearest = (-sim[i, :]).argsort()[1:top_k + 1]
                    log_str = '最接近 ' + valid_word + ' 的词语是:'
                    for k in range(top_k):
                        close_word = self.reverse_dictionary[nearest[k]]
                        log_str += str(k + 1) + ':' + close_word + ' '
                    print(log_str)
                # save train model
                self.save(self.checkpoint_dir / 'train', self.saver, epoch)
        final_embeddings = self.normalized_embeddings.eval()
        # Write corresponding labels for the embeddings.
        self.save(self.checkpoint_dir / 'train', self.saver, self.num_epoch)
        yaml_utils.write(self.checkpoint_dir / 'final_embeddings.yaml',
                         final_embeddings)
        # Create a configuration for visualizing embeddings with the labels in TensorBoard.
        config = projector.ProjectorConfig()
        embedding_conf = config.embeddings.add()
        embedding_conf.tensor_name = self.embeddings.name
        embedding_conf.metadata_path = self.dataset_info['dictionary']
        projector.visualize_embeddings(writer, config)
        writer.close()
コード例 #3
0
def build_dataset(words):
    print('统计字符出现的数量')
    count = [('UNK', -1)]
    count.extend(collections.Counter(words).most_common(vocabulary_size - 1))
    print('创建词典')
    dictionary = dict()
    for word, _ in count:
        dictionary[word] = len(dictionary)
    print('根据词典转化原数据成序列')
    data = list()
    unk_count = 0
    for word in words:
        index = dictionary.get(word, 0)  # dictionary['UNK']
        if index == 0:
            unk_count += 1
        data.append(index)
    count[0] = ('UNK', unk_count)
    print('制作反向查询词典')
    reverse_dictionary = dict(zip(dictionary.values(), dictionary.keys()))
    output_path = output_dir / dataset_name
    print('导出文件')
    yaml_utils.write(output_path / 'data.yaml', data)
    yaml_utils.write(output_path / 'reverse_dictionary.yaml', reverse_dictionary)
    with (output_path / 'dictionary.tsv').open(mode='w', encoding='UTF-8') as file:
        for i in range(vocabulary_size):
            file.write(reverse_dictionary[i] + '\n')
    info_dict = {'vocabulary_size': vocabulary_size, 'data': str(output_path / 'data.yaml'),
                 'dictionary': str(output_path / 'dictionary.tsv'),
                 'reverse_dictionary': str(output_path / 'reverse_dictionary.yaml')}
    yaml_utils.write(output_path / 'info.yaml', info_dict)
    print('导出完成')
コード例 #4
0
a = 'T1'
b = 'STIR'

source = 'E:/Datasets/Neurofibromatosis/source'  #  /home/yf/datas/NF/
output = 'E:/Datasets/Neurofibromatosis'  # E:/Datasets/Neurofibromatosis /home/yf/datas/NF/
o_A = listdir(source + '/' + a)
o_B = listdir(source + '/' + b)

A = list()
B = list()

for i in range(len(o_A)):
    image_A = nii_utils.nii_reader(source + '/' + a + '/' + o_A[i])
    image_B = nii_utils.nii_reader(source + '/' + b + '/' + o_B[i])
    if image_A.shape[2] == image_B.shape[2]:
        A.append(source + '/' + a + '/' + o_A[i])
        B.append(source + '/' + b + '/' + o_B[i])

train_A, test_A, train_B, test_B = train_test_split(A,
                                                    B,
                                                    test_size=0.2,
                                                    random_state=10)

fold = dict()
fold['A'] = train_A
fold['B'] = train_B
yaml_utils.write(output + '/' + a + '2' + b + '_train.yaml', fold)
fold['A'] = test_A
fold['B'] = test_B
yaml_utils.write(output + '/' + a + '2' + b + '_test.yaml', fold)
コード例 #5
0
            name = obj.find('name').text
            if name not in classes.keys() or int(difficult) == 1:
                continue
            item = dict()  # save object params
            item['class_id'] = classes[name]
            bndbox = obj.find('bndbox')
            xmin = int(bndbox.find('xmin').text)
            ymin = int(bndbox.find('ymin').text)
            xmax = int(bndbox.find('xmax').text)
            ymax = int(bndbox.find('ymax').text)
            item['bndbox'] = {
                'xmin': xmin,
                'ymin': ymin,
                'xmax': xmax,
                'ymax': ymax
            }
            cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (0, 0, 255),
                          thickness=2)
            data['objects'].append(item)
        if len(data['objects']) < 1:
            continue
        # cv2.imshow('image', img)  # 展示图片
        # cv2.waitKey(1)
        dataset.append(data)
output_dir.mkdir(parents=True, exist_ok=True)
np.random.shuffle(dataset)
train_steps = int(len(dataset) * train_scale)
yaml_utils.write(output_dir / 'train_dataset.yaml', dataset[:train_steps])
yaml_utils.write(output_dir / 'eval_dataset.yaml', dataset[train_steps:])
yaml_utils.write(output_dir / 'classes.yaml', classes)
コード例 #6
0
from pathlib import Path
from utils import nii_utils, yaml_utils

output_path = Path('E:/Datasets/BraTS_2018')


def nii_gz2nii(path):
    detail = dict()
    for file in Path(path).iterdir():
        item_name = Path(path).stem
        class_fold = file.stem.replace(item_name,
                                       '').replace('.nii',
                                                   '').replace('_', '')
        output = output_path / class_fold / (item_name + '.nii')
        output.parent.mkdir(parents=True, exist_ok=True)
        image = nii_utils.nii_reader(str(file))
        header = nii_utils.nii_header_reader(str(file))
        nii_utils.nii_writer(str(output), header, image)
        detail[class_fold] = image.shape
    return detail


if __name__ == '__main__':
    source_data = 'E:/SourceDatasets/LGG'
    _dict = dict()
    for item in Path(source_data).iterdir():
        _dict[item.stem] = nii_gz2nii(str(item))
        print('\r>>' + item.stem, end='')
    yaml_utils.write(str(output_path) + '/detail.yaml', _dict)
コード例 #7
0
            except:
                error['sequence name error'].append(tag_dict['path'])

    if A in dict_.keys() and B in dict_.keys():
        for key, value in dict_.items():
            print(output_path + '/' + key + '/' + value['patient_id'] + '.nii')
            Path(output_path + '/' + key + '/').mkdir(parents=True,
                                                      exist_ok=True)
            dcm_utils.write(
                dcm_utils.read(value['path']),
                output_path + '/' + key + '/' + value['patient_id'] + '.nii')
        return dict_
    else:
        one_error = list()
        for key, value in dict_.items():
            one_error.append(key + ':' + value['path'])
        error['match datas error'].append(one_error)
        return None


if __name__ == '__main__':
    source_data = 'E:/SourceDatasets/Neurofibromatosis/NF'
    detail = list()
    for dataset in Path(source_data).iterdir():
        dataset = dataset / 'DICOM'
        result = dcm2nii(dataset)
        if result is not None:
            detail.append(result)
    yaml_utils.write(output_path + '/info.yaml', detail)
    yaml_utils.write(output_path + '/error.yaml', error)