def main(args): if not os.path.isdir(args.checkpoint): try: os.makedirs(args.checkpoint) except OSError as e: if e.errno != errno.EEXIST: raise #ini log log = open(os.path.join(args.checkpoint, 'log.txt'), 'w') log.write('Epoch') log.write('\t') log.write('LR') log.write('\t') log.write('Loss') log.write('\t') log.write('Acc') log.write('\t') log.write('\n') log.flush() model = MyModel(classes=train_cfg.num_class) # model.build(input_shape=(None,train_cfg.data_shape[0],train_cfg.data_shape[1],3)) #model._set_inputs((None,train_cfg.data_shape[0],train_cfg.data_shape[1],3)) optimizer = tf.keras.optimizers.Adam(learning_rate=5e-4) criterion = tf.keras.losses.SparseCategoricalCrossentropy() train_data, train_len = MyDataset(train_cfg, train_mode=0).get_dataset() val_data, val_len = MyDataset(val_cfg, train_mode=1).get_dataset() for epoch in range(args.start_epoch, train_cfg.epochs): lr = adjust_learning_rate(optimizer, epoch) print('epoch {},lr={}'.format(epoch, lr)) print('begin the {}th epoch'.format(epoch)) train_loss = train(train_data, model, criterion, optimizer) print('avg_train_loss:{:.5f}'.format(train_loss / train_len)) acc = eval(val_data, model) print('accuracy:{:.5f}'.format(acc)) log.write('{}'.format(epoch)) log.write('\t') log.write('{:.5f}'.format(lr)) log.write('\t') log.write('{:.5f}'.format(train_loss / val_len)) log.write('\t') log.write('{:.5f}'.format(acc)) log.write('\t') log.write('\n') log.flush() filename = 'epoch' + str(epoch + 1) + 'checkpoint.h5' filepath = os.path.join(args.checkpoint, filename) #model.save(filepath=filepath,overwrite=False,include_optimizer=True,save_format='tf') model.save_weights(filepath=filepath, overwrite=False, save_format='h5') log.close()
def main(args): model = MyModel(classes=test_cfg.num_class) test_data, test_len = MyDataset(test_cfg, train_mode=2).get_dataset() predict_np = [] checkpoint_file = os.path.join(test_cfg.checkpoint_path, args.checkpoint + '.h5') model.load_weights(checkpoint_file) print('successful loaded checkpoint:{} (epoch{})'.format( checkpoint_file, args.checkpoint[5])) print('testing.........') for i, (inputs, targets) in tqdm(enumerate(test_data)): out = model(inputs) out = out.numpy() prediction = numpy.argmax(out, axis=1) predict_np.append(prediction) csv_path = os.path.join(test_cfg.img_folder, '..', test_cfg.csv_name) data = pd.read_csv(csv_path) predict_np = numpy.concatenate(predict_np) data['labels'] = pd.DataFrame(predict_np) data.to_csv(csv_path, index=False) print('successful write the predict results!')
def main(args): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model=networks(num_class=test_cfg.num_class, pretrained = False).to(device) test_loader = torch.utils.data.DataLoader( MyDataset(test_cfg,train_mode=2), batch_size=test_cfg.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) predict_np=[] checkpoint_file = os.path.join(test_cfg.checkpoint_path,args.checkpoint +'.pth.tar') checkpoint = torch.load(checkpoint_file) model.load_state_dict(checkpoint['state_dict']) print('successful loaded checkpoint:{} (epoch{})'.format(checkpoint_file,checkpoint['epoch'])) model.eval() print('testing.........') for i, inputs in tqdm(enumerate(test_loader)): with torch.no_grad(): input_var = torch.autograd.Variable(inputs) out = model(input_var) _, prediction = torch.max(out.data, 1) pre = prediction.numpy() predict_np.append(pre) csv_path = os.path.join(test_cfg.img_folder,'..',test_cfg.csv_name) data = pd.read_csv(csv_path) predict_np=numpy.concatenate(predict_np) data['labels'] = pd.DataFrame(predict_np) data.to_csv(csv_path,index=False) print('successful write the predict results!')
def train(): xml_dir = args.base_path + "data/label" img_dir = args.base_path + "data/img" test_dir = args.base_path + "data/test" save_path = args.base_path + "result" dataset_class = ['Item'] colors = ((0, 0, 0), (255, 0, 0)) device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') dataset = MyDataset(img_dir, xml_dir, dataset_class) train_dataloader = torch.utils.data.DataLoader(dataset, batch_size=args.batch_size, shuffle=True, collate_fn=collate_fn) model = FasterRCNN(num_classes=2).to(device) params = [p for p in model.parameters() if p.requires_grad] optimizer = torch.optim.SGD(params, lr=args.lr, momentum=0.9, weight_decay=0.0005) for epoch in range(args.num_epochs): model.train() train_loss = 0 for i, batch in enumerate(train_dataloader): images, targets, image_ids = batch images = list(image.to(device) for image in images) targets = [{k: v.to(device) for k, v in t.items()} for t in targets] loss_dict = model(images, targets) losses = sum(loss for loss in loss_dict.values()) train_loss += losses.item() * len(images) optimizer.zero_grad() losses.backward() optimizer.step() print(i, train_loss) if i >= 0: break print( f"epoch {epoch+1} loss: {train_loss / len(train_dataloader.dataset)}" ) torch.save(model, 'model.pt') test(model, dataset_class, colors, test_dir, save_path, device)
def test(cfg): # transform transform_test_list = [ transforms.Resize(size=cfg.INPUT.SIZE_TEST, interpolation=3), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ] # prepare dataset test_dataset = MyDataset(root=root, transform=transforms.Compose(transform_test_list), type='test') test_loader = DataLoader(test_dataset, batch_size=1, shuffle=True, num_workers=8, pin_memory=False) num_classes = cfg.MODEL.HEADS.NUM_CLASSES # prepare model def load_network(network): save_path = cfg.LOAD_FROM checkpoint = torch.load(save_path) if 'model' in checkpoint: network.load_state_dict(checkpoint['model']) else: network.load_state_dict(checkpoint) return network model = build_model(cfg, num_classes) model = load_network(model) model = model.cuda() # for data in tqdm(test_loader): for data in test_loader: model.train(False) inputs, labels = data # print(inputs.shape) inputs = Variable(inputs.cuda().detach()) with torch.no_grad(): out = model(inputs) # print(logits) score, preds = torch.max(out['pred_class_logits'], 1) # print(score, preds) if preds.int() == 0: cat = "No" elif preds.int() == 1: cat = "Yes" print(preds.cpu().numpy().item(), labels.cpu().numpy().item()) return
# training config epochs = 500 batch_size = 16 # mini-batch size sample_size = 128 # number of samples to train d&g over one epoch learning_rate = {'pretrain': 1e-5, 'g': 1e-3, 'd': 1e-3} k_step = 1 # step of training discriminator over one epoch sample_rate = 50 # save result every sample_reate epochs dropout = 0 pretrain_epochs = 30 # 0 is not to do pretrain teacher_forcing_rate = 0.7 save_path = '/home/wu/projects/emo-gan/chkpt/rnngan' # load data train_data_path = "/home/wu/mounts/Emo-gesture/train_set.pkl" with open(train_data_path, 'rb') as f: data = pickle.load(f) dataset = MyDataset(data, max_len=max_len, num_joints=3, dim=dim) gan = GAN(latent_code_size, hidden_size, generator_output_size, discriminator_output_size, num_layers, bidirectional, relu_slope, dropout, max_len=300) gan.fit(dataset, epochs, batch_size, sample_size, learning_rate, k_step, sample_rate, pretrain_epochs, teacher_forcing_rate, save_path)
batch_input = batch_input.cuda() target = target.cuda() output, _ = clf(batch_input) pred = torch.max(output, 1)[1] correct += (pred == target).sum() acc = correct.item() / len(valid_loader.dataset) print("Acc {:.3f}".format(acc)) return acc if __name__ == '__main__': # Dataloader train_data = MyDataset('./dataset/metadata/train.csv', 'train', transform=transform) valid_data = MyDataset('./dataset/metadata/dev.csv', 'dev', transform=test_transform) train_loader = DataLoader(train_data, batch_size=32, shuffle=True, num_workers=8) valid_loader = DataLoader(valid_data, batch_size=32, shuffle=False, num_workers=8) # Model clf = classifier().cuda()
from dataloader import MyDataset checkpoint = '/raid/user-data/lscheucher/tmp/my_instance_segmentation/checkpoints/350my_instance_segmentation_SGD.pth' parent_dir = checkpoint.rsplit('/', maxsplit=1)[0] eval_folder = 'eval_' + checkpoint.rsplit('/', maxsplit=1)[1].split('.')[0] eval_dir = os.path.join(parent_dir, eval_folder) if not os.path.isdir(eval_dir): os.mkdir(eval_dir) image_dir = '/raid/group-data/uc150429/AID_DATA_201905/batch-123/original_image/' segvecs_dir = '/raid/group-data/uc150429/AID_DATA_201905/batch-123/center_vectors/' classes_dir = '/raid/group-data/uc150429/AID_DATA_201905/batch-123/pixelwise_annotation_xml' dataset_eval = MyDataset(phase='val', image_dir=image_dir, classes_dir=classes_dir, segvecs_dir=segvecs_dir) sampler = None eval_loader = torch.utils.data.DataLoader(dataset_eval, batch_size=3, shuffle=(sampler is None), num_workers=2, pin_memory=True, sampler=sampler) model = smp.Unet('resnet34', classes=4) model.load_state_dict(torch.load(checkpoint)) model.eval() # Loop over evaluation images
data_split = (train, dev, test) np.random.seed(42) img_list = np.random.permutation(int(sum(data_split))) # img_list = range(0, int(sum(data_split)+1), 1) # print(img_list) # sys.exit() model = Mobile_UNet() if args.training: train_dataset = MyDataset('train', root, img_list, data_split, transform=img_transform) train_loader_args = dict(batch_size=batch_size, shuffle=True, num_workers=8) train_loader = data.DataLoader(train_dataset, **train_loader_args) # print(train_dataset.__len__()) dev_dataset = MyDataset('dev', root, img_list, data_split, transform=test_transform) dev_loader_args = dict(batch_size=batch_size, shuffle=True,
def main_worker(gpu, n_gpus_per_node, args): global best_meteor global scores_record # distribute init print(f"Use GPU: {gpu} for training" ) if args.verbose and gpu is not None else False distributed = args.world_size > 1 args.gpu = gpu if distributed: args.rank = args.rank * n_gpus_per_node + gpu dist.init_process_group('nccl', init_method=args.dist_url, world_size=args.world_size, rank=args.rank) # dataset print('loading training dataset') if args.verbose else False train_dataset = MyDataset(args.train_dataset, args.text_feature, args.audio_feature, args.video_feature, None, args.min_freq, args.modality, max_len=args.seq_len, context_len=args.context_len, context=args.context, on_memory=args.on_memory) print('loading validation dataset') if args.verbose else False test_dataset = MyDataset(args.test_dataset, args.text_feature, args.audio_feature, args.video_feature, train_dataset, args.min_freq, args.modality, max_len=args.seq_len, context_len=args.context_len, context=args.context, on_memory=args.on_memory) # model print('loading model') if args.verbose else False if args.model == 'base': target_model = SimpleEncoderDecoderCat elif args.model == 'uni_tricoder': target_model = UnifiedTriEncoderTransformer else: raise ValueError(f'Unknown model : {args.model}') dim_feature = args.dim_audio if args.modality == 'a' else args.dim_video model = target_model(len(train_dataset.caption_vocab), dim_feature, args.dim_model, args.dim_ff, args.head, args.n_layer, args.dropout, args.modality, n_src_vocab=len(train_dataset.text_vocab), args=args) torch.cuda.set_device(gpu) model.cuda(gpu) total_params = sum(p.numel() for p in model.parameters()) trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad) print(f"total parameters : {total_params}") if args.verbose else False print(f"trainable parameters : {trainable_params}" ) if args.verbose else False if distributed: args.batch_size = args.batch_size // n_gpus_per_node args.n_worker = (args.n_worker + n_gpus_per_node - 1) // n_gpus_per_node model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[args.gpu], find_unused_parameters=True) torch.backends.cudnn.benchmark = True # dataloader if distributed: train_sampler = torch.utils.data.distributed.DistributedSampler( train_dataset) else: train_sampler = None train_loader = DataLoader(train_dataset, batch_size=args.batch_size, num_workers=args.n_worker, sampler=train_sampler, shuffle=train_sampler is None) test_loader = DataLoader( test_dataset, batch_size=args.batch_size, num_workers=args.n_worker, ) # scheduler print('loading scheduler') if args.verbose else False scheduler = Scheduler(model, train_dataset.pad_idx, args) # epoch runner print('loading epoch runner') if args.verbose else False trainer = EpochRunner(model, train_loader, test_loader, scheduler, args) min_loss = float('inf') # run epoch for i in range(args.epoch): if train_sampler: train_sampler.set_epoch(i) loss = trainer.train(i) scores_record['epoch'].append(i) scores_record['train_loss'].append(loss) if i < args.warm_up: scores_record['eval_loss'].append(0) scores_record['bleu4'].append(0) scores_record['meteor'].append(0) scores_record['rouge_l'].append(0) scores_record['cider'].append(0) continue scores = trainer.eval(i, min_loss) min_loss = max(min_loss, scores['eval_loss']) if scores: best_meteor = max(best_meteor, scores['meteor']) is_best = best_meteor == scores['meteor'] if args.save_model and (i % args.log_freq == 0 or is_best): save_checkpoint( { 'epoch': i, 'state_dict': model.state_dict(), 'scores': scores, 'optimizer': scheduler.optimizer.state_dict(), }, is_best, i, args.log_path) print( '**************************************************************' ) print(f'epoch({i}): scores {scores}') if args.verbose else False print( '**************************************************************' ) for each in scores: scores_record[each].append(scores[each]) if scores['bleu4'] != 0: record_path = os.path.join(args.log_path, 'score_record' + str(i) + '.csv') pd.DataFrame(scores_record).to_csv(record_path) print(f'best_meteor : {best_meteor}')
transforms.Resize(100), transforms.CenterCrop(100), transforms.ToTensor(), normalize, ]) # Create train, validation, and test data set #TRAIN DATA s=np.arange(39209) # total number of training data is 39209 s_train, s_val = train_test_split(s, test_size=0.35, random_state=1) #s_train, s_test = train_test_split(s_train, test_size=0.25, random_state=1) ds_train = MyDataset(data_path,'Train',train_transform, s_train) ds_val = MyDataset(data_path,'Train',train_transform, s_val) # TEST DATA s=np.arange(12630) #total number of test data is 12630 #np.random.seed(43) #np.random.shuffle(s) ds_test = MyDataset(data_path,'Test',train_transform, s) print(ds_train.names.shape) print(ds_val.names.shape) print(ds_test.names.shape) train_loader = torch.utils.data.DataLoader(dataset=ds_train,
label_ls_test = text_data.get_label_ls('test') word2idx = vocab.get_word2idx() word2idx = int_dict_increment_by(vocab.get_word2idx(),2) word2idx["<pad>"] = 0 word2idx["<unk>"] = 1 idx2word = dict_kv2vk(word2idx) print("dataloaded...time taken: " + str(time.time() - start)) train_x = text_2_int_list(text_ls_train, word2idx, args["max_doc_len"]) valid_x = text_2_int_list(text_ls_val, word2idx, args["max_doc_len"]) test_x = text_2_int_list(text_ls_test, word2idx, args["max_doc_len"]) # datasets train = MyDataset(train_x, label_ls_train) valid = MyDataset(valid_x, label_ls_val) test = MyDataset(test_x, label_ls_test) # hyper-parameters task = config["preprocess"]["dataset"] embed_dim = args["embed_dim"] nhead = args["nhead"] nhid = args["nhid"] nlayers = args["nlayers"] vocab_size = len(word2idx) bs = args["batch_size"] nclass = len(label2idx) lr = args["lr"] grad_clip = args["grad_clip"] print_iter = args["print_iter"]
from dataloader import MyDataset def count_acc(logits, label): pred = torch.argmax(logits, dim=1) return (pred == label).type(torch.cuda.FloatTensor).mean().item() if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--max_epoch', type=int, default=50) parser.add_argument('--batch_size', type=int, default=256) parser.add_argument('--lr', type=float, default=0.01) parser.add_argument('--step_size', type=int, default=5) parser.add_argument('--gamma', type=float, default=0.5) args = parser.parse_args() trainset = MyDataset('train') train_loader = DataLoader(dataset=trainset, num_workers=4, batch_size=args.batch_size, shuffle=True, drop_last=True, pin_memory=True) valset = MyDataset('val') val_loader = DataLoader(dataset=valset, num_workers=4, batch_size=args.batch_size, pin_memory=True) testset = MyDataset('test') test_loader = DataLoader(dataset=testset, num_workers=4, batch_size=args.batch_size, pin_memory=True) model = ConvNet() optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=args.step_size, gamma=args.gamma) model = model.cuda() best_acc = 0.0 for epoch in range(args.max_epoch):
def run_train(): learning_rate = 1e-3 folder_path = "D:/Dataset/VOC_Dataset/" train_dataset = MyDataset(folder_path=folder_path + 'VOC2012_trainval', train_category='train') val_dataset = MyDataset(folder_path=folder_path + 'VOC2012_trainval', train_category='val') batch_size = 8 train_data_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True, num_workers=4) val_data_loader = DataLoader(dataset=val_dataset, batch_size=batch_size, shuffle=True, num_workers=4) my_model = model.YOLO_v2().cuda() my_model.train() optimizer = optim.Adam(my_model.parameters(), lr=1e-4, weight_decay=1e-5) scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.95) training_epoch = 135 grid_size = 13 criterion = losses.DetectionLoss().cuda() train_loss = [] val_loss = [] sum_correct, sum = 0, 0 for epoch in range(training_epoch): x_line = [i + 1 for i in range(epoch + 1)] epoch_loss = [] print('------training------') for it, data in enumerate(train_data_loader): x = data[0].cuda() y = data[1] path = data[2][0] y_pred = my_model(x).cuda() gt = list(y) loss, obj_loss, no_obj_loss, conf_loss = criterion(y_pred, y) epoch_loss.append(loss.item()) if it % 100 == 0: #and it > 0: print( 'step [{0:} / {1}] \t loss : {2:3.4f} \t obj_loss : {3:3.4f} \t no_obj_loss : {4:3.4f} \t conf_loss : {5:3.4f}' .format(it, len(train_data_loader), loss, obj_loss, no_obj_loss, conf_loss)) optimizer.zero_grad() loss.backward() scheduler.optimizer.step() train_loss.append(np.mean(epoch_loss)) epoch_loss = [] print('------validation------') for it, data in enumerate(val_data_loader): x = data[0].cuda() img = x.clone() y = data[1] path = data[2] y_pred = my_model(x) loss, obj_loss, no_obj_loss, conf_loss = criterion(y_pred, y) loss.cuda() epoch_loss.append(loss.item()) if it % 300 == 0 and it > 0: print( 'step [{0:} / {1}] \t loss : {2:3.4f} \t obj_loss : {3:3.4f} \t no_obj_loss : {4:3.4f} \t conf_loss : {5:3.4f}' .format(it, len(val_data_loader), loss, obj_loss, no_obj_loss, conf_loss)) # pred, pred_img = NonMaxSupression(y_pred, path, grid_size) # print(pred.shape, pred_img.shape) # cv.imshow("pred", pred_img) # cv.waitKey(0) val_loss.append(np.mean(epoch_loss)) print( 'step [{0:} / {1}] \t loss : {2:3.4f} \t obj_loss : {3:3.4f} \t no_obj_loss : {4:3.4f} \t conf_loss : {5:3.4f}' .format(epoch, training_epoch, loss, obj_loss, no_obj_loss, conf_loss)) plt.plot(x_line, train_loss, 'r-', label='train') plt.plot(x_line, val_loss, 'b-', label='val') plt.xlabel('Epoch') plt.ylabel('Loss') plt.title('YOLO_v2') # plt.show() plt.savefig('YOLO_v2_loss.png', dpi=300) torch.save(my_model.state_dict(), 'Weights/YOLO_v2_{}.pt'.format(epoch + 1))
def main(args=None): train_transform = transforms.Compose([ transforms.Resize(96), transforms.RandomHorizontalFlip(p=0.5), #transforms.RandomRotation(15), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) test_transform = transforms.Compose([ transforms.Resize(96), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) dataset_train = MyDataset(label_path=args.train_txt, transform=train_transform) dataset_val = MyDataset(label_path=args.val_txt, transform=test_transform) dataloader_train = DataLoader(dataset=dataset_train, batch_size=args.batch_size, shuffle=True) dataloader_val = DataLoader(dataset=dataset_val, batch_size=args.batch_size, shuffle=False) batch_num = len(dataset_train) // args.batch_size model = mobilenetv3() print('network:') print(model) save_path = './checkpoint' if not os.path.exists(save_path): os.mkdir(save_path) use_cuda = torch.cuda.is_available() and len(args.gpus) > 0 if use_cuda: torch.cuda.set_device(args.gpus[0]) torch.cuda.manual_seed(args.seed) #model = torch.nn.DataParallel(model, device_ids=args.gpus) model.to(device) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=1e-4, weight_decay=0.0005) milestones = [50, 80, 120, 150] scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones, gamma=0.1, last_epoch=-1) writer = SummaryWriter(log_dir='./summary') for epoch in range(args.start_epoch, args.epochs + 1): train_loss = train(dataloader_train, model, criterion, optimizer, epoch, scheduler, batch_num) test_loss = test(dataloader_val, model, criterion) scheduler.step() model_name = 'mask_detection' save_name = '{}/{}_{}.pth.tar'.format(save_path, model_name, epoch) save_checkpoint( { 'epoch': epoch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() }, filename=save_name) writer.add_scalars('scalar/loss', { 'train_loss': train_loss, 'test_loss': test_loss }, epoch) writer.export_scalars_to_json('./summary/' + 'pretrain' + 'all_scalars.json') writer.close()
train_instances = prepare_instance(dicts, args.data_path, args, args.MAX_LENGTH) print("train_instances {}".format(len(train_instances))) if args.version != 'mimic2': dev_instances = prepare_instance( dicts, args.data_path.replace('train', 'dev'), args, args.MAX_LENGTH) print("dev_instances {}".format(len(dev_instances))) else: dev_instances = None test_instances = prepare_instance(dicts, args.data_path.replace('train', 'test'), args, args.MAX_LENGTH) print("test_instances {}".format(len(test_instances))) train_loader = DataLoader(MyDataset(train_instances), args.batch_size, shuffle=True, collate_fn=my_collate) if args.version != 'mimic2': dev_loader = DataLoader(MyDataset(dev_instances), 1, shuffle=False, collate_fn=my_collate) else: dev_loader = None test_loader = DataLoader(MyDataset(test_instances), 1, shuffle=False, collate_fn=my_collate)
def main(): # training set data_set = MyDataset(200, dir_path=train_data_dir) data_set_val = MyDatasetEval(200, dir_path=train_data_dir) # dataloader = torch.utils.data.DataLoader(data_set, batch_size=16, shuffle=True) label_list = [] for i in range(len(data_set)): label_list.append(data_set[i][1]) total_loss = 0 total_size = 0 #cross_validationごとのclassification_report report1 = "" report2 = "" report3 = "" conf1 = "" conf2 = "" conf3 = "" pre_score = 0 re_score = 0 f_score = 0 #cross_validation kf = StratifiedKFold(n_splits=3, shuffle=True, random_state=0) #loss function criterion = nn.CrossEntropyLoss() # criterion = FocalLoss(alpha=0.25, gamma=2) for fold_idx, idx in enumerate(kf.split(data_set, label_list)): # モデルを構築 # net = models.compare(pretrained=True) net = models.resnet34(pretrained=True) # print(net) #vgg16の全結合 # num_ftrs = net.classifier[6].in_features # net.classifier[6] = nn.Linear(num_ftrs, 2) #resnetの全結合 num_ftrs = net.fc.in_features net.fc = nn.Linear(num_ftrs, 2) # print(net) net = net.to(device) print("ネットワーク設定完了:学習をtrainモードで開始します") # モデルの重みを読み込み # net.load_state_dict(torch.load(weight_pash), strict=False) # 最適化 optimizer = optim.SGD(net.parameters(), lr=0.001) train_idx, valid_idx = idx train_loader = torch.utils.data.DataLoader(Subset(data_set, train_idx), batch_size=16, shuffle=True) valid_loader = torch.utils.data.DataLoader(Subset( data_set_val, valid_idx), batch_size=1, shuffle=False) # trainモードで開始 net.train() # modelの全体像を表示 summary(net, input_size=(3, 200, 200)) train_loss_value = [] # trainingのlossを保持するlist train_acc_value = [] # trainingのaccuracyを保持するlist # 同じデータを回学習します for epoch in range(10): print("epoch =", epoch + 1) # 今回の学習効果を保存するための変数 running_loss = 0.0 for batch_idx, data in enumerate( train_loader): # dataがラベルと画像情報の2つの情報を持つ # データ整理 inputs, labels = data inputs = inputs.to(device) labels = labels.to(device) # 前回の勾配情報をリセット optimizer.zero_grad() # 予測 outputs = net(inputs) # 予測結果と教師ラベルを比べて損失を計算 loss = criterion(outputs, labels) running_loss += loss.item() total_loss += loss.item() total_size += inputs.size(0) # 損失に基づいてネットワークのパラメーターを更新 loss.backward() optimizer.step() # if batch_idx % 1 == 0: # now = datetime.datetime.now() # print('[{}] Train Epoch: {} [{}/{} ({:.0f}%)]\tAverage loss: {:.6f}'.format( # now, epoch, batch_idx * len(inputs), len(dataloader.dataset), 100. * batch_idx / len(dataloader), # total_loss / total_size)) # train_loss_value.append(running_loss * 50 / len(dataloader.dataset)) # traindataのlossをグラフ描画のためにlistに保持 # print("running_loss=", running_loss * 50 / len(dataloader.dataset)) if batch_idx % 1 == 0: now = datetime.datetime.now() print( '[{}] Train Epoch: {} [{}/{} ({:.0f}%)]\tAverage loss: {:.6f}' .format(now, epoch + 1, batch_idx * len(inputs), len(train_loader.dataset), 100. * batch_idx / len(train_loader), total_loss / total_size)) train_loss_value.append( running_loss / len(train_loader.dataset)) # traindataのlossをグラフ描画のためにlistに保持 print("train_loss=", running_loss / len(train_loader)) #評価モード net.eval() print("ネットワークをevalに変更します") #test with torch.no_grad(): pred = [] Y = [] target_name = ["correct 0", "broken 1"] for batch_idx, data in enumerate(valid_loader): input_val, label_val, image_name = data input_val = input_val.to(device) label_val = label_val.to(device) image_name = image_name[0].replace("'", "").replace(",", "") output_val = net(input_val) pred += [int(l.argmax()) for l in output_val] Y += [int(l) for l in label_val] if Y[-1] != pred[-1]: if Y[-1] == 1: mis_image = cv2.imread( os.path.join( train_data_dir, "broken/{filename}".format( filename=image_name))) # print(os.path.join(train2, "{filename}".format(filename=image_name))) cv2.imwrite( os.path.join( img_save_dir, "broken/{filename}".format( filename=image_name)), mis_image) else: mis_image = cv2.imread( os.path.join( train_data_dir, "correct/{filename}".format( filename=image_name))) # print(os.path.join(train2, "{filename}".format(filename=image_name))) cv2.imwrite( os.path.join( img_save_dir, "correct/{filename}".format( filename=image_name)), mis_image) # save_imageはネットの出力を保存する # save_image(output_val, os.path.join(img_save_dir, "{filename}".format(filename=image_name)), nrow=1) pre_score += precision_score(Y, pred) re_score += recall_score(Y, pred) f_score += f1_score(Y, pred) print(classification_report(Y, pred, target_names=target_name)) print(confusion_matrix(Y, pred)) if fold_idx == 0: report1 = classification_report(Y, pred, target_names=target_name) conf1 = confusion_matrix(Y, pred) # モデルを保存 model_path = 'model_cross1.pth' torch.save(net.state_dict(), model_path) elif fold_idx == 1: report2 = classification_report(Y, pred, target_names=target_name) conf2 = confusion_matrix(Y, pred) # モデルを保存 model_path = 'model_cross2.pth' torch.save(net.state_dict(), model_path) else: report3 = classification_report(Y, pred, target_names=target_name) conf3 = confusion_matrix(Y, pred) # モデルを保存 model_path = 'model_cross3.pth' torch.save(net.state_dict(), model_path) conf_exp = "TN FP\nFN TP" print("====cross1の結果====") print(report1) print(conf1, "\n", conf_exp) print("====cross2の結果====") print(report2) print(conf2, "\n", conf_exp) print("====cross3の結果====") print(report3) print(conf3, "\n", conf_exp) print("broken_precision=", pre_score / 3) print("broken_recall=", re_score / 3) print("broken_f1=", f_score / 3)
model = smp.Unet('resnet34', classes=params.numclasses+2, encoder_weights='imagenet') model = model.to(device) #output = model.forward(inputs) #print("output-shape:", output.shape) preprocessing_fn = smp.encoders.get_preprocessing_fn(encoder_name = 'resnet34', pretrained='imagenet') """dataloader""" image_dir = '/raid/group-data/uc150429/AID_DATA_201905/batch-123/original_image/' segvecs_dir = '/raid/group-data/uc150429/AID_DATA_201905/batch-123/center_vectors/' classes_dir = '/raid/group-data/uc150429/AID_DATA_201905/batch-123/pixelwise_annotation_xml' dataset_train = MyDataset(phase='train', image_dir = image_dir, classes_dir = classes_dir, segvecs_dir = segvecs_dir) dataset_val = MyDataset(phase='val', image_dir = image_dir, classes_dir = classes_dir, segvecs_dir = segvecs_dir) #print(x.shape) train_sampler = None train_loader = torch.utils.data.DataLoader( dataset_train, batch_size=params.batch_size, shuffle=(train_sampler is None), num_workers=20, pin_memory=True, sampler=train_sampler) val_loader = torch.utils.data.DataLoader( dataset_val, batch_size=params.batch_size, shuffle=False,
from model import classifier, classifier_sep import argparse if __name__ == '__main__': # Command parser parser = argparse.ArgumentParser() parser.add_argument("-p", '--checkpoint', type=str, required=True, help="model checkpoint") args = parser.parse_args() # Dataloader test_data = MyDataset('./dataset/metadata/test.csv', 'test', transform=test_transform) test_loader = DataLoader(test_data, batch_size=32, shuffle=False, num_workers=8) clf = classifier() clf.load_state_dict(torch.load(args.checkpoint)) clf = clf.cuda() clf.eval() f = open('test.csv', 'w') preds = [] for i, (batch_input, target) in enumerate(test_loader):
def main(args): if not os.path.isdir(args.checkpoint): try: os.makedirs(args.checkpoint) except OSError as e: if e.errno != errno.EEXIST: raise device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = networks(num_class=train_cfg.num_class, pretrained=True).to(device) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=5e-4, weight_decay=1e-5) train_loader = torch.utils.data.DataLoader(MyDataset(train_cfg, train_mode=0), batch_size=train_cfg.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) eval_loader = torch.utils.data.DataLoader(MyDataset(val_cfg, train_mode=1), batch_size=val_cfg.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) #ini log log = open(os.path.join(args.checkpoint, 'log.txt'), 'w') log.write('Epoch') log.write('\t') log.write('LR') log.write('\t') log.write('Avg Train Loss') log.write('\t') log.write('Acc') log.write('\t') log.write('\n') log.flush() print('Total params: %.2fMB' % (sum(p.numel() for p in model.parameters()) / (1024 * 1024) * 4)) for epoch in range(args.start_epoch, train_cfg.epochs): lr = adjust_learning_rate(optimizer, epoch) print('epoch {},lr={}'.format(epoch, lr)) print('begin the {}th epoch'.format(epoch)) train_loss = train(train_loader, model, criterion, optimizer) print('avg_train_loss:{:.5f}'.format(train_loss / train_loader.__len__())) acc = eval(eval_loader, model) print('accuracy:{:.5f}'.format(acc)) log.write('{}'.format(epoch)) log.write('\t') log.write('{:.5f}'.format(lr)) log.write('\t') log.write('{:.5f}'.format(train_loss / train_loader.__len__())) log.write('\t') log.write('{:.5f}'.format(acc)) log.write('\t') log.write('\n') log.flush() filename = 'epoch' + str(epoch + 1) + 'checkpoint.pth.tar' filepath = os.path.join(args.checkpoint, filename) torch.save( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), }, filepath) log.close()
def main(world_size, epochs, rank, batch_size=200, backend='nccl', data_path='/dataset', lr=1e-3, momentum=0.01, weight_decay=1e-5, no_cuda=False, seed=35, aggregation_method='naive', load_model=False, load_path='/data'): '''Main Function''' use_cuda = not no_cuda and torch.cuda.is_available( ) # 使用的使用多个gpu进行训练,需要改进一下 torch.manual_seed(seed) timeline = time.strftime('%m%d%Y_%H:%M', time.localtime()) device = torch.device("cuda" if use_cuda else "cpu") ratio = 0.8551957853612336 weight = torch.FloatTensor([ratio, 1 - ratio]).to(device) Loss = NN.BCELoss(weight=weight) start_epoch = 1 result_dir = 'logs' + '/' + timeline + '/' + 'results' + '/' + '{:s}' + '/' model_dir = 'logs' + '/' + timeline + '/' + 'models' + '/' + '{:s}' + '/' param_dir = 'logs' + '/' + timeline + '/' csvname = '{:s}_log.csv' modelname = 'model_{:d}.pth' paramname = 'param.csv' param = { 'world_size': world_size, 'batch_size': batch_size, 'bachend': backend, 'lr': lr, 'momentum': momentum, 'weight_decay': weight_decay, 'seed': seed, 'aggregation': aggregation_method } if rank == 0: name = 'master' if not os.path.exists(param_dir): os.makedirs(param_dir) with open(param_dir + paramname, 'a', newline='') as p: fieldnames = param.keys() writer = csv.DictWriter(p, fieldnames=fieldnames) writer.writeheader() writer.writerow(param) p.close() if aggregation_method == 'distillation': DistillationData = MyDataset(root=data_path, train=True, data_root='distillation.csv') distillation_dataloader = DataLoader(dataset=DistillationData, batch_size=batch_size, shuffle=True, drop_last=True) model_set = [] for worker_id in range(world_size): model_set.append(CNN_Model_withDropout().to(device)) opt_set = [] for worker_id in range(world_size): opt_set.append( optim.SGD(model_set[worker_id].parameters(), lr=lr, momentum=momentum, weight_decay=weight_decay)) if load_model: if aggregation_method == 'distillation': raise ValueError('Unexpected model') checkpoint = torch.load(load_path) for worker_id in range(world_size): model_set[worker_id].load_state_dict( checkpoint['model_state_dict']) opt_set[worker_id].load_state_dict( checkpoint['opt_state_dict']) start_epoch = checkpoint['epoch'] model = SF.Master(model=model_set[0], backend=backend, rank=rank, world_size=world_size, learning_rate=lr, device=device, aggregation_method=aggregation_method) for epoch in range(start_epoch, epochs + 1): model.train() model.step(model_buffer=model_set) model.update(model_set[1:]) for worder_id in range(world_size): adjust_learning_rate(opt_set[worker_id], epoch, lr) if aggregation_method == 'distillation': distillation(NN_set=model_set[1:], opt_set=opt_set[1:], dataset=distillation_dataloader, world_size=world_size, epoch=epoch, device=device) # best_idx = choose_best(NN_set=model_set[1:], name=name, dataset=dataloader,world_size=world_size, # epoch=epoch, Loss=Loss, time=timeline) # best_state_dict = model_set[best_idx+1].state_dict() # model_set[0].load_state_dict(best_state_dict) # 这里要回传所有的模型 else: name = 'worker' + str(rank) DataSet_train = MyDataset(root=data_path, train=True, data_root='{}.csv'.format(name)) dataloader_train = DataLoader(dataset=DataSet_train, batch_size=batch_size, shuffle=True, drop_last=True) DataSet_test = MyDataset(root=data_path, train=True, data_root='{}.csv'.format('test')) dataloader_test = DataLoader(dataset=DataSet_test, batch_size=batch_size, shuffle=True, drop_last=True) model_set = [] for worker_id in range(world_size): model_set.append(CNN_Model_withDropout().to(device)) train_model = model_set[0] optimizer = optim.SGD(train_model.parameters(), lr=lr, momentum=momentum, weight_decay=weight_decay) if load_model: checkpoint = torch.load(load_path) for worker_id in range(world_size): model_set[worker_id].load_state_dict( checkpoint['model_state_dict']) optimizer.load_state_dict(checkpoint['opt_state_dict']) start_epoch = checkpoint['epoch'] train_model = model_set[0] backup_model = CNN_Model_withDropout().to(device) model = SF.Servent(model=train_model, backend=backend, rank=rank, world_size=world_size, device=device, aggregation_method=aggregation_method) for epoch in range(start_epoch, epochs + 1): model.train() model.step(model_buffer=model_set, rank=rank) best_state_dict = train_model.state_dict() backup_model.load_state_dict(best_state_dict) adjust_learning_rate(optimizer, epoch, lr) train(dataloader=dataloader_train, model=train_model, optimizer=optimizer, Loss=Loss, epoch=epoch, time=timeline, result_dir=result_dir.format(name), model_dir=model_dir.format(name), device=device, csvname=csvname.format(name), modelname=modelname) model.update(backup_model) test(dataloader=dataloader_test, model=train_model, epoch=epoch, Loss=Loss, time=timeline, result_dir=result_dir.format(name), model_dir=model_dir.format(name), csvname=csvname.format(name), modelname=modelname, device=device)
with open('./configs/train_params.yaml') as config_file: config = yaml.load(config_file, Loader=yaml.FullLoader) config = Dict(config) device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') print('Device: ', device) model = MyModel() model.to(device) train_df = pd.read_csv('../data/train_dataset.csv') test_df = pd.read_csv('../data/test_dataset.csv') train_dataset = MyDataset(train_df, image_folder_path='../data/images', augmentations=get_training_augmentation()) test_dataset = MyDataset(test_df, image_folder_path='../data/images', augmentations=get_validation_augmentation()) train_dataloader = DataLoader(train_dataset, batch_size=config.batch_size) test_dataloader = DataLoader(test_dataset, batch_size=1) optimizer = Adam(model.parameters(), lr=config.learning_rate) loss_function = CrossEntropyLoss() for epoch in range(config.epochs):
package_name = 'net_lstm' model_name = 'LSTMClassifaierAndDenoise' model_arg = 39 # batch_size = 10000 batch_size = 1 optimazier = 'Adam' lr = 0.001 root_path = '/media/ophir/DATA1/Ophir/DeepLearning/project/data_for_lstm' # mfcc_path = os.path.join(root_path, 'TRAIN', 'MFCC') # stft_path = os.path.join(root_path, 'TRAIN', 'STFT') # map_path = os.path.join(root_path, 'TRAIN', 'MAP') main_path_train = os.path.join(root_path, 'TRAIN') # main_path_train = '/media/ophir/DATA1/Ophir/DeepLearning/project/data_in_batchs' train_data = MyDataset(main_path_train) trainloader = DataLoader(train_data, batch_size=batch_size, shuffle=False, num_workers=0) # valid_mfcc_path = os.path.join(root_path, 'TEST', 'MFCC') # valid_stft_path = os.path.join(root_path, 'TEST', 'STFT') # valid_map_path = os.path.join(root_path, 'TEST', 'MAP') main_path_val = os.path.join(root_path, 'TEST') valid_data = MyDataset(main_path_val) validloader = DataLoader(train_data, batch_size=batch_size, shuffle=False, num_workers=0)
def train(args): print('Start') if torch.cuda.is_available(): device = 'cuda' torch.set_default_tensor_type('torch.cuda.FloatTensor') else: device = 'cpu' train_epoch = args.train_epoch lr = args.lr beta1 = args.beta1 beta2 = args.beta2 batch_size = args.batch_size noise_var = args.noise_var h_dim = args.h_dim images_path = glob.glob(args.data_dir+'/face_images/*/*.png') random.shuffle(images_path) split_num = int(len(images_path)*0.8) train_path = images_path[:split_num] test_path = images_path[split_num:] result_path = images_path[-15:] train_dataset = MyDataset(train_path) train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True) test_dataset = MyDataset(test_path) test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=True) result_dataset = MyDataset(result_path) result_dataloader = torch.utils.data.DataLoader(result_dataset, batch_size=result_dataset.__len__(), shuffle=False) result_images = next(iter(result_dataloader)) model = AutoEncoder(h_dim=h_dim).to(device) criterion = nn.MSELoss() optimizer = torch.optim.Adam(model.parameters(), lr, (beta1, beta2)) out_path = args.model_dir train_loss_list = [] test_loss_list = [] for epoch in range(train_epoch): model.to(device) loss_train = 0 for x in train_dataloader: noised_x = add_noise(x, noise_var) recon_x = model(noised_x) loss = criterion(recon_x, x) optimizer.zero_grad() loss.backward() optimizer.step() loss_train += loss.item() loss_train /= train_dataloader.__len__() train_loss_list.append(loss_train) if epoch % 1 == 0: with torch.no_grad(): model.eval() loss_test = 0 for x_test in test_dataloader: recon_x_test = model(x_test) loss_test += criterion(recon_x_test, x_test).item() loss_test /= test_dataloader.__len__() test_loss_list.append(loss_test) np.save(os.path.join(out_path, 'train_loss.npy'), np.array(train_loss_list)) np.save(os.path.join(out_path, 'test_loss.npy'), np.array(test_loss_list)) model.train()
def train(cfg): # logger logger = logging.getLogger(name="merlin.baseline.train") logger.info("training...") # transform transform_train_list = [ # transforms.RandomResizedCrop(size=128, scale=(0.75,1.0), ratio=(0.75,1.3333), interpolation=3), #Image.BICUBIC) transforms.Resize(size=cfg.INPUT.SIZE_TRAIN, interpolation=1), transforms.Pad(32), transforms.RandomCrop(cfg.INPUT.SIZE_TRAIN), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ] transform_val_list = [ transforms.Resize(size=cfg.INPUT.SIZE_TEST, interpolation=3), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ] # prepare dataset train_dataset = MyDataset(root=cfg.DATA.ROOT, transform=transforms.Compose(transform_train_list), type='train') val_dataset = MyDataset(root=cfg.DATA.ROOT, transform=transforms.Compose(transform_val_list), type='val') train_loader = DataLoader(train_dataset, batch_size=cfg.SOLVER.BATCH_SIZE, shuffle=True, num_workers=8, pin_memory=False) val_loader = DataLoader(val_dataset, batch_size=cfg.SOLVER.BATCH_SIZE, shuffle=True, num_workers=8, pin_memory=False) num_classes = cfg.MODEL.HEADS.NUM_CLASSES # prepare model model = build_model(cfg, num_classes) model = model.cuda() model = nn.DataParallel(model) # prepare solver optimizer = make_optimizer(cfg, model) scheduler = WarmupMultiStepLR(optimizer, cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, cfg.SOLVER.WARMUP_FACTOR, cfg.SOLVER.WARMUP_ITERS, cfg.SOLVER.WARMUP_METHOD) start_epoch = 0 # Train and val since = time.time() for epoch in range(start_epoch, cfg.SOLVER.MAX_EPOCHS): model.train(True) logger.info("Epoch {}/{}".format(epoch, cfg.SOLVER.MAX_EPOCHS - 1)) logger.info('-' * 10) running_loss = 0.0 # Iterate over data it = 0 running_acc = 0 for data in train_loader: it += 1 # get the inputs inputs, labels = data now_batch_size, c, h, w = inputs.shape if now_batch_size < cfg.SOLVER.BATCH_SIZE: # skip the last batch continue # wrap them in Variable inputs = Variable(inputs.cuda().detach()) labels = Variable(labels.cuda().detach()) # zero the parameter gradients optimizer.zero_grad() # forward out = model(inputs) loss_dict = get_loss(cfg, outs=out, label=labels) loss = sum(loss_dict.values()) loss.backward() optimizer.step() scheduler.step() # statistics with torch.no_grad(): _, preds = torch.max(out['pred_class_logits'], 1) running_loss += loss running_acc += torch.sum(preds == labels.data).float().item() / cfg.SOLVER.BATCH_SIZE if it % 50 == 0: logger.info( 'epoch {}, iter {}, loss: {:.3f}, acc: {:.3f}, lr: {:.5f}'.format( epoch, it, running_loss / it, running_acc / it, optimizer.param_groups[0]['lr'])) epoch_loss = running_loss / it epoch_acc = running_acc / it logger.info('epoch {} loss: {:.4f} Acc: {:.4f}'.format(epoch, epoch_loss, epoch_acc)) # save checkpoint if epoch % cfg.SOLVER.CHECKPOINT_PERIOD == 0: checkpoint = {'epoch': epoch + 1, 'model': model.module.state_dict() if (len(cfg.MODEL.DEVICE_ID) - 2) > 1 else model.state_dict(), 'optimizer': optimizer.state_dict() } save_checkpoint(checkpoint, epoch, cfg) # evaluate if epoch % cfg.SOLVER.EVAL_PERIOD == 0: logger.info('evaluate...') model.train(False) total = 0.0 correct = 0.0 for data in val_loader: inputs, labels = data inputs = Variable(inputs.cuda().detach()) labels = Variable(labels.cuda().detach()) with torch.no_grad(): out = model(inputs) _, preds = torch.max(out['pred_class_logits'], 1) c = (preds == labels).squeeze() total += c.size(0) correct += c.float().sum().item() acc = correct / total logger.info('eval acc:{:.4f}'.format(acc)) time_elapsed = time.time() - since logger.info('Training complete in {:.0f}m {:.0f}s\n'.format( time_elapsed // 60, time_elapsed % 60)) return model
import torch import glob from torch.utils.data import DataLoader from torchvision import transforms from dataloader import MyDataset from fmeasure import calculateF1Measure composed = transforms.Compose([transforms.Grayscale(1), transforms.ToTensor()]) test_dataset = MyDataset('/path/*', '/path/*', transform=composed) test_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=8) filenames = sorted(glob.glob('/path/*.png')) filenames = [x.split('/')[-1] for x in filenames] with torch.no_grad(): sum_val_loss_g1 = 0 sum_val_false_ratio_g1 = 0 sum_val_detect_ratio_g1 = 0 sum_val_F1_g1 = 0 g1_time = 0 sum_val_loss_g2 = 0 sum_val_false_ratio_g2 = 0 sum_val_detect_ratio_g2 = 0 sum_val_F1_g2 = 0