Exemple #1
0
                                           sampler=sampler,
                                           shuffle=False,
                                           num_workers=4,
                                           drop_last=True)
val_loader = torch.utils.data.DataLoader(val_set,
                                         batch_size=batch_size,
                                         shuffle=False,
                                         num_workers=4,
                                         drop_last=True)

resnet101 = torchvision.models.resnet101(pretrained=True)
resnet101.fc = nn.Linear(2048, 18)

if continue_train == True:
    print('Loading network')
    load_model(resnet101, checkpoint_dir, 5, was_parallel=was_parallel)

resnet101.cuda(cuda_id)

Loss_function = nn.CrossEntropyLoss()
if parallel:
    resnet101 = torch.nn.DataParallel(resnet101, device_ids=cuda_id_list)

total_val_img = np.zeros(18)
total_correct_val_img = np.zeros(18)

#--------------------------training-----------------------

print('evaluating now')
resnet101.eval()
Exemple #2
0
"""

#if using zhang's code
data_transforms = transforms.Compose([
    transforms.Resize([224, 224]),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

dataloader = torch.utils.data.DataLoader(testData(transforms=data_transforms),
                                         batch_size=4,
                                         shuffle=False,
                                         num_workers=4)
net = torchvision.models.resnet152(pretrained=True)
net.fc = nn.Linear(2048, 18)
load_model(net, checkpoint_dir, 'latest', was_parallel=was_parallel)

if use_gpu:
    net = net.cuda(gpu_id)

net.eval()
id_list = []
id_no = 1
pred_list = []

for inputs in dataloader:
    if use_gpu:
        inputs = Variable(inputs.cuda(gpu_id))
    else:
        inputs = Variable(inputs)
Exemple #3
0
def main():
    p_dict = dict() # 所有参数
    p_dict['args'] = args

    ### 加载数据
    word_index_dict = json.load(open(args.word_index_json))
    args.words = { v:k for k,v in word_index_dict.items() }
    p_dict['word_index_dict'] = word_index_dict
    num_classes = args.num_classes
    p_dict['num_classes'] = num_classes
    image_label_dict = json.load(open(args.image_label_json))
    p_dict['image_label_dict'] = image_label_dict
    # 划分数据集
    test_filelist = sorted(glob(os.path.join(args.data_dir,'test/plots','*.png')))
    trainval_filelist = sorted(glob(os.path.join(args.data_dir,'train/plots','*.png')))
    # 两种输入size训练
    # train_filelist1: 长宽比小于8:1的图片,经过padding后变成 64*512 的输入
    # train_filelist2: 长宽比大于8:1的图片,经过padding,crop后变成 64*1024的输入
    '''
    train_filelist1, train_filelist2 = [],[]
    # 黑名单,这些图片的label是有问题的
    black_list = set(json.load(open(args.black_json))['black_list'])
    image_hw_ratio_dict = json.load(open(args.image_hw_ratio_json))
    for f in trainval_filelist:
        image = f.split('/')[-1]
        if image in black_list:
            continue
        r = image_hw_ratio_dict[image]
        if r == 0:
            train_filelist1.append(f)
        else:
            train_filelist2.append(f)
    val_filelist = train_filelist1[-2048:]
    train_filelist = train_filelist1[:-2048] 
    '''
    # train_filelist1 = sorted(trainval_filelist)
    # val_filelist = train_filelist1[-2048:]
    # train_filelist = train_filelist1[:-2048] 
    val_filelist = test_filelist
    train_filelist = trainval_filelist
    # generated_list = glob(os.path.join(args.data_dir.replace('dataset', 'generated_images'), '*_image.png'))
    # n_test = 4096
    # pretrain_filelist = generated_list[:-n_test]
    # preval_filelist = generated_list[-n_test:]
    # train_filelist2 = train_filelist2
    image_size = args.input_shape
    test_dataset = dataloader.DataSet(
                test_filelist, 
                image_label_dict,
                num_classes, 
                # transform=train_transform, 
                args=args,
                image_size=image_size,
                phase='test')
    test_loader = DataLoader(
                dataset=test_dataset, 
                batch_size=1, 
                shuffle=False, 
                num_workers=args.workers, 
                pin_memory=True)
    train_dataset  = dataloader.DataSet(
                train_filelist, 
                image_label_dict, 
                num_classes, 
                image_size=image_size,
                args=args,
                phase='train')
    train_loader = DataLoader(
                dataset=train_dataset, 
                batch_size=args.batch_size,
                shuffle=True, 
                num_workers=args.workers, 
                pin_memory=True)
    val_dataset  = dataloader.DataSet(
                val_filelist, 
                image_label_dict, 
                num_classes, 
                image_size=image_size,
                args=args,
                phase='val')
    val_loader = DataLoader(
                dataset=val_dataset, 
                batch_size=args.batch_size,
                shuffle=False, 
                num_workers=args.workers, 
                pin_memory=True)
    '''
    pretrain_dataset = dataloader.DataSet(
                pretrain_filelist, 
                image_label_dict,
                num_classes, 
                image_size=image_size,
                word_index_dict = word_index_dict,
                args=args,
                font_range=[8,32],
                margin=10,
                rotate_range=[-10., 10. ],
                phase='pretrain')
    pretrain_loader = DataLoader(
                dataset=pretrain_dataset, 
                batch_size=args.batch_size, 
                shuffle=True, 
                num_workers=args.workers, 
                pin_memory=True)
    preval_dataset = dataloader.DataSet(
                preval_filelist, 
                image_label_dict,
                num_classes, 
                image_size=image_size,
                word_index_dict = word_index_dict,
                args=args,
                font_range=[8,32],
                margin=10,
                rotate_range=[-10., 10. ],
                phase='pretrain')
    preval_loader = DataLoader(
                dataset=preval_dataset, 
                batch_size=args.batch_size, 
                shuffle=False, 
                num_workers=args.workers, 
                pin_memory=True)
    '''

    p_dict['train_loader'] = train_loader
    p_dict['val_loader'] = val_loader
    p_dict['test_loader'] = test_loader
    # p_dict['pretrain_loader'] = pretrain_loader


    # p_dict['train_loader'] = pretrain_loader
    # p_dict['val_loader'] = preval_loader
    # p_dict['test_loader'] = preval_loader




    ### 定义模型
    cudnn.benchmark = True
    framework.build_model(p_dict)
    parameters = []
    model_dict = p_dict['model_dict']
    for model in model_dict.values():
        for p in model.parameters():
            parameters.append(p)
    optimizer = torch.optim.Adam(parameters, lr=args.lr)
    p_dict['optimizer'] = optimizer
    # model = torch.nn.DataParallel(model).cuda()
    # loss = Loss().cuda()
    start_epoch = 0
    # args.epoch = start_epoch
    # print ('best_f1score' + str(best_f1score))

    p_dict['epoch'] = 0
    p_dict['best_metric'] = 0


    ### 加载预训练模型与参数
    if os.path.exists(args.resume):
        function.load_model(p_dict, args.resume)


    ### 训练及测试模型
    if args.phase == 'test':
        # 测试输出文字检测结果
        test(p_dict)
    elif args.phase == 'train':

        best_f1score = 0
        eval_mode = 'eval'
        best_macc = 0
        p_dict['best_metric'] = 0
        for epoch in range(p_dict['epoch'] + 1, args.epochs):
            p_dict['epoch'] = epoch
            if best_f1score > 0.9:
                args.lr = 0.0001
            if best_f1score > 0.9:
                args.hard_mining = 1
            for param_group in optimizer.param_groups:
                param_group['lr'] = args.lr
            train_eval(p_dict, 'train')
            train_eval(p_dict, 'val')
Exemple #4
0
def main():
    p_dict = dict()  # All the parameters
    p_dict['args'] = args
    args.split_nn = args.split_num + args.split_nor * 3
    args.vocab_size = args.split_nn * 145 + 1
    print 'vocab_size', args.vocab_size

    ### load data
    print 'read data ...'
    patient_time_record_dict = py_op.myreadjson(
        os.path.join(args.result_dir, 'patient_time_record_dict.json'))
    patient_master_dict = py_op.myreadjson(
        os.path.join(args.result_dir, 'patient_master_dict.json'))
    patient_label_dict = py_op.myreadjson(
        os.path.join(args.result_dir, 'patient_label_dict.json'))

    patient_train = list(
        json.load(open(os.path.join(args.file_dir, args.task, 'train.json'))))
    patient_valid = list(
        json.load(open(os.path.join(args.file_dir, args.task, 'val.json'))))

    if len(patient_train) > len(patient_label_dict):
        patients = patient_time_record_dict.keys()
        patients = patient_label_dict.keys()
        n = int(0.8 * len(patients))
        patient_train = patients[:n]
        patient_valid = patients[n:]

    print 'data loading ...'
    train_dataset = dataloader.DataSet(patient_train,
                                       patient_time_record_dict,
                                       patient_label_dict,
                                       patient_master_dict,
                                       args=args,
                                       phase='train')
    train_loader = DataLoader(dataset=train_dataset,
                              batch_size=args.batch_size,
                              shuffle=True,
                              num_workers=8,
                              pin_memory=True)
    val_dataset = dataloader.DataSet(patient_valid,
                                     patient_time_record_dict,
                                     patient_label_dict,
                                     patient_master_dict,
                                     args=args,
                                     phase='val')
    val_loader = DataLoader(dataset=val_dataset,
                            batch_size=args.batch_size,
                            shuffle=False,
                            num_workers=8,
                            pin_memory=True)

    p_dict['train_loader'] = train_loader
    p_dict['val_loader'] = val_loader

    cudnn.benchmark = True
    net = lstm.LSTM(args)
    if args.gpu:
        net = net.cuda()
        p_dict['loss'] = loss.Loss().cuda()
    else:
        p_dict['loss'] = loss.Loss()

    parameters = []
    for p in net.parameters():
        parameters.append(p)
    optimizer = torch.optim.Adam(parameters, lr=args.lr)
    p_dict['optimizer'] = optimizer
    p_dict['model'] = net
    start_epoch = 0
    # args.epoch = start_epoch
    # print ('best_f1score' + str(best_f1score))

    p_dict['epoch'] = 0
    p_dict['best_metric'] = [0, 0]

    ### resume pretrained model
    if os.path.exists(args.resume):
        print 'resume from model ' + args.resume
        function.load_model(p_dict, args.resume)
        print 'best_metric', p_dict['best_metric']
        # return

    if args.phase == 'train':

        best_f1score = 0
        for epoch in range(p_dict['epoch'] + 1, args.epochs):
            p_dict['epoch'] = epoch
            for param_group in optimizer.param_groups:
                param_group['lr'] = args.lr
            train_eval(p_dict, 'train')
            train_eval(p_dict, 'val')
Exemple #5
0
def main():
    args.n_ehr = len(
        json.load(
            open(os.path.join(args.files_dir, 'demo_index_dict.json'),
                 'r'))) + 10
    args.name_list = json.load(
        open(os.path.join(args.files_dir, 'feature_list.json'), 'r'))[1:]
    args.input_size = len(args.name_list)
    files = sorted(glob(os.path.join(args.data_dir, 'resample_data/*.csv')))
    data_splits = json.load(
        open(os.path.join(args.files_dir, 'splits.json'), 'r'))
    train_files = [
        f for idx in [0, 1, 2, 3, 4, 5, 6] for f in data_splits[idx]
    ]
    valid_files = [f for idx in [7] for f in data_splits[idx]]
    test_files = [f for idx in [8, 9] for f in data_splits[idx]]
    if args.phase == 'test':
        train_phase, valid_phase, test_phase, train_shuffle = 'test', 'test', 'test', False
    else:
        train_phase, valid_phase, test_phase, train_shuffle = 'train', 'valid', 'test', True
    train_dataset = data_loader.DataBowl(args, train_files, phase=train_phase)
    valid_dataset = data_loader.DataBowl(args, valid_files, phase=valid_phase)
    test_dataset = data_loader.DataBowl(args, test_files, phase=test_phase)
    train_loader = DataLoader(train_dataset,
                              batch_size=args.batch_size,
                              shuffle=train_shuffle,
                              num_workers=args.workers,
                              pin_memory=True)
    valid_loader = DataLoader(valid_dataset,
                              batch_size=args.batch_size,
                              shuffle=False,
                              num_workers=args.workers,
                              pin_memory=True)
    test_loader = DataLoader(test_dataset,
                             batch_size=args.batch_size,
                             shuffle=False,
                             num_workers=args.workers,
                             pin_memory=True)

    args.vocab_size = args.input_size + 2

    if args.use_unstructure:
        args.unstructure_size = len(
            py_op.myreadjson(os.path.join(args.files_dir,
                                          'vocab_list.json'))) + 10

    # net = icnn.CNN(args)
    # net = cnn.CNN(args)
    net = lstm.LSTM(args)
    # net = torch.nn.DataParallel(net)
    # loss = myloss.Loss(0)
    loss = myloss.MultiClassLoss(0)

    net = _cuda(net, 0)
    loss = _cuda(loss, 0)

    best_metric = [0, 0]
    start_epoch = 0

    if args.resume:
        p_dict = {'model': net}
        function.load_model(p_dict, args.resume)
        best_metric = p_dict['best_metric']
        start_epoch = p_dict['epoch'] + 1

    parameters_all = []
    for p in net.parameters():
        parameters_all.append(p)

    optimizer = torch.optim.Adam(parameters_all, args.lr)

    if args.phase == 'train':
        for epoch in range(start_epoch, args.epochs):
            print('start epoch :', epoch)
            t0 = time.time()
            train_eval(train_loader, net, loss, epoch, optimizer, best_metric)
            t1 = time.time()
            print('Running time:', t1 - t0)
            best_metric = train_eval(valid_loader,
                                     net,
                                     loss,
                                     epoch,
                                     optimizer,
                                     best_metric,
                                     phase='valid')
        print('best metric', best_metric)

    elif args.phase == 'test':
        train_eval(test_loader, net, loss, 0, optimizer, best_metric, 'test')
Exemple #6
0
def main():
    p_dict = dict()  # All the parameters
    p_dict['args'] = args
    args.split_nn = 3 * 5
    args.vocab_size = args.split_nn * 145 + 2
    print('vocab_size', args.vocab_size)

    ### load data
    print('read data ...')
    if args.task == 'mortality':

        patient_time_record_dict = py_op.myreadjson(
            os.path.join(args.result_dir, 'patient_time_record_dict.json'))
        patient_master_dict = py_op.myreadjson(
            os.path.join(args.result_dir, 'patient_master_dict.json'))
        patient_label_dict = py_op.myreadjson(
            os.path.join(args.result_dir, 'patient_label_dict.json'))

        if os.path.exists(os.path.join(args.result_dir, 'train.json')):
            patient_train = list(
                json.load(open(os.path.join(args.result_dir, 'train.json'))))
            patient_valid = list(
                json.load(open(os.path.join(args.result_dir, 'valid.json'))))
            patient_test = list(
                json.load(open(os.path.join(args.result_dir, 'test.json'))))
        else:
            patients = sorted(
                set(patient_label_dict.keys()) & set(patient_time_record_dict)
                & set(patient_master_dict))
            print(len(patient_master_dict), len(patient_label_dict),
                  len(patient_time_record_dict))
            print('There are {:d} patients.'.format(len(patients)))
            n_train = int(0.7 * len(patients))
            n_valid = int(0.2 * len(patients))
            patient_train = patients[:n_train]
            patient_valid = patients[n_train:n_train + n_valid]
            patient_test = patients[n_train + n_valid:]

        args.master_size = len(patient_master_dict[patients[0]])
    elif args.task == 'sepsis':
        patient_time_record_dict = py_op.myreadjson(
            os.path.join(args.result_dir, 'sepsis_time_record_dict.json'))
        patient_master_dict = py_op.myreadjson(
            os.path.join(args.result_dir, 'patient_master_dict.json'))
        patient_label_dict = py_op.myreadjson(
            os.path.join(args.result_dir, 'sepsis_label_dict.json'))
        sepsis_split = py_op.myreadjson(
            os.path.join(args.result_dir, 'sepsis_split.json'))
        print(sepsis_split.keys())
        sepsis_split = sepsis_split[str(-args.last_time)]

        patient_train = sepsis_split['train']
        patient_valid = sepsis_split['valid']
        print('train: {:d}'.format(len(patient_train)))
        print('valid: {:d}'.format(len(patient_valid)))

    print('data loading ...')
    train_dataset = dataloader.DataSet(patient_train,
                                       patient_time_record_dict,
                                       patient_label_dict,
                                       patient_master_dict,
                                       args=args,
                                       phase='train')
    train_loader = DataLoader(dataset=train_dataset,
                              batch_size=args.batch_size,
                              shuffle=True,
                              num_workers=8,
                              pin_memory=True)
    val_dataset = dataloader.DataSet(patient_valid,
                                     patient_time_record_dict,
                                     patient_label_dict,
                                     patient_master_dict,
                                     args=args,
                                     phase='val')
    val_loader = DataLoader(dataset=val_dataset,
                            batch_size=args.batch_size,
                            shuffle=True,
                            num_workers=8,
                            pin_memory=True)
    test_dataset = dataloader.DataSet(patient_test,
                                      patient_time_record_dict,
                                      patient_label_dict,
                                      patient_master_dict,
                                      args=args,
                                      phase='val')
    test_loader = DataLoader(dataset=test_dataset,
                             batch_size=args.batch_size,
                             shuffle=True,
                             num_workers=8,
                             pin_memory=True)

    p_dict['train_loader'] = train_loader
    if args.phase == 'train':
        p_dict['val_loader'] = val_loader
    else:
        p_dict['val_loader'] = test_loader

    cudnn.benchmark = True
    net = attention.Attention(args)
    if args.gpu:
        net = net.cuda()
        p_dict['loss'] = loss.Loss().cuda()
    else:
        p_dict['loss'] = loss.Loss()

    parameters = []
    for p in net.parameters():
        parameters.append(p)
    optimizer = torch.optim.Adam(parameters, lr=args.lr)
    p_dict['optimizer'] = optimizer
    p_dict['model'] = net
    start_epoch = 0
    # args.epoch = start_epoch
    # print ('best_f1score' + str(best_f1score))

    p_dict['epoch'] = 0
    p_dict['best_metric'] = [0, 0]

    ### resume pretrained model
    if os.path.exists(args.resume):
        print('resume from model ' + args.resume)
        function.load_model(p_dict, args.resume)
        print('best_metric', p_dict['best_metric'])

    if args.phase == 'train':

        best_f1score = 0
        for epoch in range(p_dict['epoch'] + 1, args.epochs):
            p_dict['epoch'] = epoch
            for param_group in optimizer.param_groups:
                param_group['lr'] = args.lr
            train_eval(p_dict, 'train')
            train_eval(p_dict, 'val')
        log_info = '# task : {:s}; model: {:s} ; last_time: {:d} ; auc: {:3.4f} \n'.format(
            args.task, args.model, args.last_time, p_dict['best_metric'][0])
        with open('../result/log.txt', 'a') as f:
            f.write(log_info)
    else:
        train_eval(p_dict, 'test')
weight=torch.DoubleTensor(weight)
sampler = torch.utils.data.sampler.WeightedRandomSampler(weight, len(weight))


train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size,sampler=sampler,shuffle=False, num_workers=4, drop_last=True)
val_loader = torch.utils.data.DataLoader(val_set, batch_size=batch_size, shuffle=False, num_workers=4, drop_last=True)


resnet101 = torchvision.models.resnet152(pretrained=True)
resnet101.fc=nn.Linear(2048,18)



if continue_train==True:
    print('Loading network')
    load_model(resnet101,checkpoint_dir,'latest',was_parallel=was_parallel)
    loss_list=list(np.loadtxt(os.path.join(checkpoint_dir,'loss_history_train.txt')))
    acc_list = list(np.loadtxt(os.path.join(checkpoint_dir, 'acc_history_train.txt')))
    loss_list_val = list(np.loadtxt(os.path.join(checkpoint_dir,'loss_history_val.txt')))
    acc_list_val = list(np.loadtxt(os.path.join(checkpoint_dir, 'acc_history_val.txt')))
    lowest_loss=min(loss_list_val)
    start_epoch=len(loss_list)
else:
    start_epoch=0
    loss_list = []
    loss_list_val = []
    acc_list=[]
    acc_list_val=[]
    lowest_loss=9999

resnet101.cuda(cuda_id)
Exemple #8
0
def main():

    assert args.dataset in ['DACMI', 'MIMIC']
    if args.dataset == 'MIMIC':
        args.n_ehr = len(py_op.myreadjson(os.path.join(args.data_dir, args.dataset, 'ehr_list.json')))
    args.name_list = py_op.myreadjson(os.path.join(args.file_dir, args.dataset+'_feature_list.json'))[1:]
    args.output_size = len(args.name_list)
    files = sorted(glob(os.path.join(args.data_dir, args.dataset, 'train_with_missing/*.csv')))
    data_splits = py_op.myreadjson(os.path.join(args.file_dir, args.dataset + '_splits.json'))
    train_files = [f for idx in [0, 1, 2, 3, 4, 5, 6] for f in data_splits[idx]]
    valid_files = [f for idx in [7] for f in data_splits[idx]]
    test_files = [f for idx in [8, 9] for f in data_splits[idx]]
    if args.phase == 'test':
        train_phase, valid_phase, test_phase, train_shuffle = 'test', 'test', 'test', False
    else:
        train_phase, valid_phase, test_phase, train_shuffle = 'train', 'valid', 'test', True
    train_dataset = data_loader.DataBowl(args, train_files, phase=train_phase)
    valid_dataset = data_loader.DataBowl(args, valid_files, phase=valid_phase)
    test_dataset = data_loader.DataBowl(args, test_files, phase=test_phase)
    train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=train_shuffle, num_workers=args.workers, pin_memory=True)
    valid_loader = DataLoader(valid_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True)
    test_loader = DataLoader(test_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True)
    args.vocab_size = (args.output_size + 2) * (1 + args.split_num) + 5

    if args.model == 'tame':
        net = tame.AutoEncoder(args)
    loss = myloss.MSELoss(args)

    net = _cuda(net, 0)
    loss = _cuda(loss, 0)

    best_metric= [0,0]
    start_epoch = 0

    if args.resume:
        p_dict = {'model': net}
        function.load_model(p_dict, args.resume)
        best_metric = p_dict['best_metric']
        start_epoch = p_dict['epoch'] + 1

    parameters_all = []
    for p in net.parameters():
        parameters_all.append(p)

    optimizer = torch.optim.Adam(parameters_all, args.lr)

    if args.phase == 'train':
        for epoch in range(start_epoch, args.epochs):
            print('start epoch :', epoch)
            train_eval(train_loader, net, loss, epoch, optimizer, best_metric)
            best_metric = train_eval(valid_loader, net, loss, epoch, optimizer, best_metric, phase='valid')
        print 'best metric', best_metric

    elif args.phase == 'test':
        folder = os.path.join(args.result_dir, args.dataset, 'imputation_result')
        os.system('rm -r ' + folder)
        os.system('mkdir ' + folder)

        train_eval(train_loader, net, loss, 0, optimizer, best_metric, 'test')
        train_eval(valid_loader, net, loss, 0, optimizer, best_metric, 'test')
        train_eval(test_loader, net, loss, 0, optimizer, best_metric, 'test')