def main(): model = Yolov1(split_size=7, num_boxes=2, num_classes=20).to(DEVICE) optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY) loss_fn = YoloLoss() train_dataset = VOCDataset("data/train.csv", transform=transform, img_dir=IMG_DIR, label_dir=LABEL_DIR) test_dataset = VOCDataset("data/test.csv", transform=transform, img_dir=IMG_DIR, label_dir=LABEL_DIR) train_loader=DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, num_workers=1, pin_memory=PIN_MEMORY, shuffle=True,drop_last=True) test_loader=DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, num_workers=1, pin_memory=PIN_MEMORY, shuffle=True,drop_last=True) for epoch in range(EPOCHS): pred_boxes, target_boxes = get_bboxes(train_loader, model, iou_threshold=0.5, threshold=0.4) mAP = mean_average_precision(pred_boxes, target_boxes, iou_threshold=0.5) print(f"Train mAP:{mAP}") train_fn(train_loader, model, optimizer, loss_fn) if epoch > 99: for x, y in test_loader: x = x.to(DEVICE) for idx in range(16): bboxes = cellboxes_to_boxes(model(x)) bboxes = non_max_suppression(bboxes[idx], iou_threshold=0.5, threshold=0.4) plot_image(x[idx].permute(1,2,0).to("cpu"), bboxes) if __name__ == "__main__": main()
def _setup_dataloaders(root_dir, return_dataset=False): """ Setup dataloaders. """ preprocessing = [ aug.NormalizeBboxes(cfg.grid_size), aug.Bboxes2Matrices(cfg.grid_size, cfg.num_classes), aug.Resize(cfg.target_size), aug.Normalize(cfg.mean, cfg.std, 1. / 255), aug.ToTensor() ] transforms_train = preprocessing transforms_val = preprocessing ds_train = VOCDataset(root_dir, image_set="train") dl_train = get_dataloader(ds_train, transforms_train, cfg.batch_size, num_workers=4) ds_val = VOCDataset(root_dir, image_set="val") dl_val = get_dataloader(ds_val, transforms_val, cfg.batch_size) if return_dataset: return dl_train, dl_val, ds_train, ds_val return dl_train, dl_val
def main(): model = Yolov1(split_size=7, num_boxes=2, num_classes=20).to(DEVICE) optimizer = optim.Adam( model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY ) loss_fn = YoloLoss() if LOAD_MODEL: load_checkpoint(torch.load(LOAD_MODEL_FILE), model, optimizer) train_dataset = VOCDataset( "data/100examples.csv", transform=transform, img_dir=IMG_DIR, label_dir=LABEL_DIR ) test_dataset = VOCDataset( "data/test.csv", transform=transform, img_dir=IMG_DIR, label_dir=LABEL_DIR ) train_loader = DataLoader( dataset=train_dataset, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS, pin_memory=PIN_MEMORY, shuffle=True, drop_last=True ) test_loader = DataLoader( dataset=test_dataset, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS, pin_memory=PIN_MEMORY, shuffle=True, drop_last=True ) for epoch in range(EPOCHS): pred_boxes, target_boxes = get_bboxes( train_loader, model, iou_threshold=0.5, threshold=0.4 ) mean_avg_prec = mean_average_precision( pred_boxes, target_boxes, iou_threshold=0.5, box_format="midpoint" ) print(f"Train mAP in {epoch}: {mean_avg_prec}") train_fn(train_loader, model, optimizer, loss_fn)
def get_dataloader(params, transform): # create data loader BATCH_SIZE, NUM_WORKERS, PIN_MEMORY, DATA_DIR, CSV_TRAIN, CSV_VAL = params print('***:', type(CSV_TRAIN), type(CSV_VAL)) img_dir = '{}/images'.format(DATA_DIR) label_dir = '{}/labels'.format(DATA_DIR) # create annotations if type(CSV_TRAIN) == str: csv_t = pd.read_csv('{}/{}'.format(DATA_DIR, CSV_TRAIN)) else: csv_t = pd.DataFrame(columns=['image', 'annotation']) for csv_dir in CSV_TRAIN: csv_t = csv_t.append(pd.read_csv('{}/{}'.format(DATA_DIR, csv_dir)), ignore_index=True) if type(CSV_VAL) == str: csv_v = pd.read_csv('{}/{}'.format(DATA_DIR, CSV_VAL)) else: csv_v = pd.DataFrame(columns=['image', 'annotation']) for csv_dir in CSV_VAL: csv_v = csv_v.append(pd.read_csv('{}/{}'.format(DATA_DIR, csv_dir)), ignore_index=True) my_transform = transform data_train = VOCDataset(dataset_csv=csv_t, img_dir=img_dir, label_dir=label_dir, transform=my_transform['train']) data_val = VOCDataset(dataset_csv=csv_v, img_dir=img_dir, label_dir=label_dir, transform=my_transform['val']) data_loader = { 'train': DataLoader(data_train, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS, drop_last=True, pin_memory=PIN_MEMORY), 'val': DataLoader(data_val, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS, drop_last=True, pin_memory=PIN_MEMORY) } return data_loader
def main(model_name=None): tr = transforms.Compose([ transforms.RandomResizedCrop(300), transforms.ToTensor(), transforms.Normalize([0.4589, 0.4355, 0.4032], [0.2239, 0.2186, 0.2206]) ]) val_set = VOCDataset(directory, 'val', transforms=tr) val_loader = DataLoader(val_set, batch_size=batch_size, collate_fn=collate_wrapper, shuffle=False, num_workers=16) model = models.resnet34(pretrained=True) model.fc = nn.Linear(512, 20) model.load_state_dict(torch.load(model_name + '.pt')) model.to(device) classwise_frequencies = np.array(list(val_set.classes_count.values())) minimum_frequency = np.min(classwise_frequencies) loss_weights = minimum_frequency / classwise_frequencies loss_weights = torch.Tensor(loss_weights).to(device) loss_function = nn.BCEWithLogitsLoss(weight=loss_weights) val_loss, predictions, targets = validate(model, device, val_loader, loss_function) print("Saving raw predictions for validation pass...") with open("{}_validation.pkl".format(model_name), 'wb') as f: pred_targets = torch.cat( (predictions.unsqueeze(0), targets.unsqueeze(0))) pickle.dump(pred_targets, f) f.close()
def main(): bs = 64 n_anchors = 4 dataset = VOCDataset(TRAIN_JSON, TRAIN_JPEG, device=DEVICE) loader = VOCDataLoader(dataset, batch_size=bs, num_workers=0) # plotter = VOCPlotter(id2cat=dataset.id2cat, figsize=(12, 10)) # # for images, (boxes, classes) in iter(loader): # with plotter: # plotter.plot_boxes(*to_np(images, boxes, classes)) # break # a single batch to verify everything works n_classes = len(dataset.id2cat) cycle_len = math.ceil(len(dataset)/bs) model = SSD(n_classes=n_classes, bias=-3.) optimizer = optim.Adam(model.parameters(), lr=1e-2) scheduler = CosineAnnealingLR(optimizer, t_max=cycle_len) loop = Loop(model, optimizer, scheduler, device=DEVICE) anchors, grid_sizes = [ x.to(DEVICE) for x in ( t(make_grid(n_anchors), requires_grad=False).float(), t([1/n_anchors], requires_grad=False).unsqueeze(1))] bce_loss = BinaryCrossEntropyLoss(n_classes) loss_fn = lambda x, y: ssd_loss(x, y, anchors, grid_sizes, bce_loss, n_classes) loop.run( train_data=loader, epochs=100, loss_fn=loss_fn, callbacks=[Logger()] )
def main(args): dataset = VOCDataset('data') # return test(dataset) if args.infer: infer(dataset) if args.train: train(dataset)
def evaluate(): checkpoint_path = os.path.join(args.model_root, args.model_name) checkpoint = torch.load(checkpoint_path, map_location=device) model = SSD300(n_classes=len(label_map), device=device).to(device) model.load_state_dict(checkpoint['model']) transform = Transform(size=(300, 300), train=False) test_dataset = VOCDataset(root=args.data_root, image_set=args.image_set, transform=transform, keep_difficult=True) test_loader = DataLoader(dataset=test_dataset, collate_fn=collate_fn, batch_size=args.batch_size, num_workers=args.num_workers, shuffle=False, pin_memory=True) detected_bboxes = [] detected_labels = [] detected_scores = [] true_bboxes = [] true_labels = [] true_difficulties = [] model.eval() with torch.no_grad(): bar = tqdm(test_loader, desc='Evaluate the model') for i, (images, bboxes, labels, difficulties) in enumerate(bar): images = images.to(device) bboxes = [b.to(device) for b in bboxes] labels = [l.to(device) for l in labels] difficulties = [d.to(device) for d in difficulties] predicted_bboxes, predicted_scores = model(images) _bboxes, _labels, _scores = model.detect_objects(predicted_bboxes, predicted_scores, min_score=0.01, max_overlap=0.45, top_k=200) detected_bboxes += _bboxes detected_labels += _labels detected_scores += _scores true_bboxes += bboxes true_labels += labels true_difficulties += difficulties all_ap, mean_ap = calculate_mAP(detected_bboxes, detected_labels, detected_scores, true_bboxes, true_labels, true_difficulties, device=device) pretty_printer = PrettyPrinter() pretty_printer.pprint(all_ap) print('Mean Average Precision (mAP): %.4f' % mean_ap)
def run(): parser = argparse.ArgumentParser(description='PyTorch MNIST Example') parser.add_argument('--batch-size', type=int, default=32, metavar='N', help='input batch size for training (default: 32)') parser.add_argument('--test-batch-size', type=int, default=32, metavar='N', help='input batch size for testing (default: 1000)') parser.add_argument('--epochs', type=int, default=15, metavar='N', help='number of epochs to train (default: 15)') parser.add_argument('--lr', type=float, default=0.001, metavar='LR', help='learning rate (default: 0.001)') parser.add_argument('--momentum', type=float, default=0.5, metavar='M', help='SGD momentum (default: 0.5)') parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') parser.add_argument('--log-interval', type=int, default=10, metavar='N', help='how many batches to wait before logging training status') parser.add_argument('--mode', type=str, default='A', metavar='M', help='Mode of model') args = parser.parse_args() use_cuda = not args.no_cuda and torch.cuda.is_available() torch.manual_seed(args.seed) device = torch.device("cuda" if use_cuda else "cpu") root = './' model_path = './results/pascalvoc_A.pt' test_transform = transforms.Compose([ transforms.Resize(256), transforms.FiveCrop(224), transforms.Lambda(lambda crops: torch.stack([transforms.ToTensor()(crop) for crop in crops])), transforms.Lambda(lambda crops: torch.stack([transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])(crop) for crop in crops])), ]) #Get dataset and input into Dataloader test_loader = torch.utils.data.DataLoader( VOCDataset(root, 'val', transform = test_transform), batch_size=args.test_batch_size, shuffle=False) test_loss_function = F.binary_cross_entropy_with_logits #Define Model model = load_model(model_path) model = model.to(device) val_loss, val_acc, output = test(args, model, device, test_loader, test_loss_function) torch.save(output, 'val_set_results.pt')
def main(): model = Yolov1(split_size=7, num_boxes=2, num_classes=20).to(DEVICE) optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY) loss_fn = YoloLoss() if LOAD_MODEL: pass train_transform = transforms.Compose( [transforms.Resize(size=(448, 448)), transforms.ToTensor()]) train_dataset = VOCDataset(csv_file='', img_root=IMG_DIR, S=7, B=2, C=20, transform=train_transform) test_transform = transforms.Compose( [transforms.Resize(size=(448, 448)), transforms.ToTensor()]) test_dataset = VOCDataset(csv_file='', img_root=IMG_DIR, transform=test_transform) train_loader = DataLoader( dataset=train_dataset, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS, pin_memory=PIN_MEMORY, shuffle=True, drop_last=True, ) test_loader = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS, pin_memory=PIN_MEMORY, shuffle=True, drop_last=True) for epoch in range(EPOCHS): train_fn(train_loader, model, optimizer, loss_fn)
def get_dataloader(params): # create data loader BATCH_SIZE, NUM_WORKERS, PIN_MEMORY, DATA_DIR, CSV_TRAIN, CSV_VAL = params img_dir = '{}/images'.format(DATA_DIR) label_dir = '{}/labels'.format(DATA_DIR) csv_dir_t = '{}/{}'.format(DATA_DIR, CSV_TRAIN) csv_dir_v = '{}/{}'.format(DATA_DIR, CSV_VAL) my_transform = { 'train': transforms.Compose([ transforms.Resize((448, 448)), transforms.ToTensor(), ]), 'val': transforms.Compose([ transforms.Resize((448, 448)), transforms.ToTensor(), ]) } data_train = VOCDataset(dataset_csv=csv_dir_t, img_dir=img_dir, label_dir=label_dir, transform=my_transform['train']) data_val = VOCDataset(dataset_csv=csv_dir_v, img_dir=img_dir, label_dir=label_dir, transform=my_transform['val']) data_loader = { 'train': DataLoader(data_train, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS, drop_last=True, pin_memory=PIN_MEMORY), 'val': DataLoader(data_val, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS, drop_last=True, pin_memory=PIN_MEMORY) } return data_loader
def main(): model = YOLOv1(split_size=7, num_boxes=2, num_classes=20).to(device) optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=wd) loss_fn = YoloLoss() if load_model: load_checkpoint(torch.load(load_model_file), model, optimizer) train_dataset = VOCDataset("data/8examples.csv", transform=transform, img_dir=img_dir, label_dir=label_dir) test_dataset = VOCDataset("data/test.csv", transform=transform, img_dir=img_dir, label_dir=label_dir) train_loader = DataLoader(dataset=train_dataset, batch_size=bs, num_workers=num_workers, pin_memory=pin_mem, shuffle=True, drop_last=False) test_loader = DataLoader(dataset=test_dataset, batch_size=bs, num_workers=num_workers, pin_memory=pin_mem, shuffle=True, drop_last=True) for epoch in range(epochs): pred_boxes, target_boxes = get_bboxes(train_loader, model, iou_threshold=0.5, threshold=0.4) mean_avg_prec = mean_average_precision(pred_boxes, target_boxes, iou_threshold=0.5, box_format="midpoint") print(f"Train mAP: {mean_avg_prec}") train_fn(train_loader, model, optimizer, loss_fn)
def train(): ROOT = './' VGG16_WEIGHT_PATH = './vgg/vgg16_weights.npz' DATASET_PATH = os.path.join(ROOT, 'VOC2012/') CHECKPOINT_DIR = os.path.join(DATASET_PATH, 'saved_model') IMAGE_SHAPE = (512, 512) N_CLASSES = 21 N_EPOCHS = 100 BATCH_SIZE = 1 LEARNING_RATE = 1e-5 DECAY_RATE = 0.95 DECAY_EPOCH = 10 DROPOUT_RATE = 0.5 print('Starting end-to-end training FCN-8s') session_config = tf.ConfigProto(gpu_options=tf.GPUOptions( allow_growth=True)) session = tf.compat.v1.InteractiveSession(config=session_config) session.as_default() # ------------- Load VOC from TFRecord --------------- dataset = VOCDataset() dataset_train = dataset.load_dataset(DATASET_PATH, BATCH_SIZE, is_training=True) dataset_val = dataset.load_dataset(DATASET_PATH, BATCH_SIZE, is_training=False) # ------------- Build fcn model ------------ fcn = FCN(IMAGE_SHAPE, N_CLASSES, VGG16_WEIGHT_PATH) fcn.build_from_vgg() learning_rate_fn = learning_rate_with_exp_decay(BATCH_SIZE, dataset.n_images['train'], DECAY_EPOCH, DECAY_RATE, LEARNING_RATE) compile_model(fcn, learning_rate_fn) fit_model(fcn, N_EPOCHS, BATCH_SIZE, dataset_train, dataset_val, CHECKPOINT_DIR, DROPOUT_RATE)
def main(args): dataset = VOCDataset(args.images_dir, args.labels_dir) # Saved cropped images of each object to categorized folders for img in dataset: for obj in img.objects: save_object(obj, args.action_types, args.save_dir, must_include_all_actions=False, save_negatives=True)
def get_detection_dataset(data_type, subset, root): if data_type in bam_media_classes: dataset = BAMDataset(root, subset) elif data_type == 'clipart': dataset = ClipArtDataset(root, subset) elif data_type == 'voc': dataset = VOCDataset(root, subset) else: raise NotImplementedError assert (issubclass(type(dataset), chainer.dataset.DatasetMixin)) return dataset
def test(): import numpy as np import argparse import matplotlib.pyplot as plt from visual import draw_rect from dataset import VOCDataset parser = argparse.ArgumentParser() parser.add_argument('-p', '--phase', default='train', help='载入哪一部分的数据,默认是train,还可以是valid、test') parser.add_argument('-c', '--channle', default=4, type=int, help='可视化preds的哪个维度,默认是4,即第一个B的confidence') parser.add_argument('-f', '--func', default='encode', choices=['encode', 'decode'], help='测试的方法,默认是encode,也可以是decode') args = parser.parse_args() dat = VOCDataset('G:/dataset/VOC2012/VOCdevkit/VOC2012/', phase=args.phase, drop_diff=False, return_tensor=True, out='all') for img, labels, locs, preds in dat: if args.func == 'encode': img = draw_rect(img, locs, labels=labels) fig, axes = plt.subplots(ncols=2, figsize=(10, 5)) axes[0].imshow(np.asarray(img)) axes[1].imshow(preds[..., args.channle]) plt.show() else: res_c, res_s, res_l = dat.y_encoder.decode(preds, img.size) print(res_c) print(res_s) print(res_l) print(labels) print(locs) img = draw_rect(img, res_l) fig, axes = plt.subplots(ncols=2, figsize=(10, 5)) axes[0].imshow(np.asarray(img)) axes[1].imshow(preds[..., args.channle]) plt.show() break
def test(): import numpy as np import argparse import matplotlib.pyplot as plt from torchvision import transforms from visual import draw_rect from dataset import VOCDataset parser = argparse.ArgumentParser() parser.add_argument('-p', '--phase', default='train', help='载入哪一部分的数据,默认是train,还可以是valid、test') args = parser.parse_args() # 这里和github略有不同,github上是每个变换都各自有0.5的概率是不发生的,而 # 这里是只要发生变换则三种变换是同时发生的。 color_transfers = transforms.RandomApply([ transforms.ColorJitter( brightness=(0.5, 1.5), saturation=(0.5, 1.5), hue=(-0.2, 0.2)) ], 0.5) img_transfers = OnlyImage([RandomBlur(), color_transfers]) all_transfers = Compose([ RandomHorizontalFlip(), RandomResize(), img_transfers, RandomShift(), RandomCrop() ]) dat = VOCDataset('G:/dataset/VOC2012/VOCdevkit/VOC2012/', phase=args.phase, drop_diff=False, return_tensor=True, transfers=all_transfers) for img, labels, locs in dat: img = draw_rect(img, locs, labels=labels) plt.imshow(np.asarray(img)) plt.show()
def train(): net.train() # define optimizer optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.decay) # create data batch generator training_set = VOCDataset("D:/dataset/VOC/VOCdevkit/", "2012", "train", image_size=net.IMAGE_W) dataloader = DataLoader(training_set, shuffle=True, batch_size=net.BATCH_SIZE) N_ITERS_PER_EPOCH = len(dataloader) writer = SummaryWriter() if torch.cuda.is_available(): writer.add_graph(net.cpu(), torch.rand(4, 3, 416, 416)) else: writer.add_graph(net, torch.rand(4, 3, 416, 416)) for epoch in range(args.epoch): for step, (images, labels) in enumerate(dataloader): if images.shape[0] != net.BATCH_SIZE: continue print("") print("========== Epoch: {}, step: {}/{} ==========".format( epoch, step, N_ITERS_PER_EPOCH)) time_start = time.time() if torch.cuda.is_available(): image = Variable(images.cuda(), requires_grad=True) else: image = Variable(images, requires_grad=True) optimizer.zero_grad() output = net.forward(images) loss_xy, loss_wh, loss_conf, loss_cls = net.loss(output, labels) loss_coord = loss_xy + loss_wh total_loss = loss_coord + loss_conf + loss_cls total_loss.backward() optimizer.step() total_loss, loss_xy, loss_wh, loss_conf, loss_cls = [ l.item() for l in [total_loss, loss_xy, loss_wh, loss_conf, loss_cls] ] ### logs to tensorboard writer.add_scalar('Train/Total_loss', total_loss, epoch * N_ITERS_PER_EPOCH + step) writer.add_scalar('Train/Coordination_xy_loss', loss_xy, epoch * N_ITERS_PER_EPOCH + step) writer.add_scalar('Train/Coordination_wh_loss', loss_wh, epoch * N_ITERS_PER_EPOCH + step) writer.add_scalar('Train/Confidence_loss', loss_conf, epoch * N_ITERS_PER_EPOCH + step) writer.add_scalar('Train/Class_loss', loss_cls, epoch * N_ITERS_PER_EPOCH + step) ### log to console print('- Train step time: {} seconds'.format(time.time() - time_start)) print('- Train/Coordination_xy_loss: ', loss_xy) print('- Train/Coordination_wh_loss: ', loss_wh) print('- Train/Confidence_loss: ', loss_conf) print('- Train/Class_loss: ', loss_cls) print('- Train/Total_loss: ', total_loss) if step % 10 == 0: boxes = get_detection_result(output, net.ANCHORS, net.CLASS, conf_thres=0.5, nms_thres=0.4) # draw detected boxes and save sample im = images[0].data.numpy().astype('uint8') im = im.transpose(1, 2, 0) im = im.copy() color_red = (0, 0, 255) color_green = (0, 255, 0) im = draw_boxes(im, labels[0], net.LABELS, color=color_green) im = draw_boxes(im, boxes[0], net.LABELS, color=color_red) file_path = os.path.join( args.output, "result_epoch_{}_iter_{}.jpg".format(epoch, step)) cv2.imwrite(file_path, im) ### save model model_path = os.path.join(args.model_dir, "yolov2_epoch_{}.weights".format(epoch)) torch.save(net.state_dict(), model_path) print("Saved model: ", model_path) writer.close()
from torchvision import transforms from torch.utils.data import DataLoader from dataset import VOCDataset, collate_wrapper directory = 'VOC2012' tr = transforms.Compose([transforms.CenterCrop(224), transforms.ToTensor()]) train = VOCDataset(directory, 'train', transforms=tr, multi_instance=True) train_loader = DataLoader(train, batch_size=16, collate_fn=collate_wrapper, shuffle=True, num_workers=4) """ How to enumerate across the DataLoader: for _, batch in enumerate(train_loader): batch_of_image_tensors = batch.image batch of label_lists = batch.labels """
def main(mode, num_epochs, num_workers, lr, sc, model_name=None): tr = transforms.Compose([transforms.RandomResizedCrop(300), transforms.ToTensor(), transforms.Normalize([0.4589, 0.4355, 0.4032],[0.2239, 0.2186, 0.2206])]) augs = transforms.Compose([transforms.RandomResizedCrop(300), transforms.RandomRotation(20), transforms.ToTensor(), transforms.Normalize([0.4589, 0.4355, 0.4032],[0.2239, 0.2186, 0.2206])]) # Get the NB matrix from the dataset, # counting multiple instances of labels. nb_dataset = VOCDataset(directory, 'train', transforms=tr, multi_instance=True) nb = NaiveBayes(nb_dataset, 1) mat = nb.get_nb_matrix() print_nb_matrix(nb_dataset, mat) mat = torch.Tensor(mat).to(device) # Define the training dataset, removing # multiple instances for the training problem. train_set = VOCDataset(directory, 'train', transforms=augs, multi_instance=False) train_loader = DataLoader(train_set, batch_size=batch_size, collate_fn=collate_wrapper, shuffle=True, num_workers=num_workers) val_set = VOCDataset(directory, 'val', transforms=tr) val_loader = DataLoader(val_set, batch_size=batch_size, collate_fn=collate_wrapper, shuffle=True, num_workers=num_workers) model = models.resnet34(pretrained=True) model.fc = nn.Linear(512, 20) if model_name == None: train_losses = [] val_losses = [] curr_epoch = 0 else: model.load_state_dict(torch.load(model_name + '.pt')) print('Loading history') train_losses = np.load('train_history_{}_{}.npy'.format(mode, model_name)).tolist() val_losses = np.load('val_history_{}_{}.npy'.format(mode, model_name)).tolist() curr_epoch = int(model_name.split('_')[-2]) model.to(device) print('Starting optimizer with LR={}'.format(lr)) optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9) # ====================================== # # Use either: # # loss_function = nn.BCEWithLogitsLoss() # # loss_function = MultiLabelNBLoss(mat) # # ====================================== # if mode == 'BCE': classwise_frequencies = np.array(list(train_set.classes_count.values())) minimum_frequency = np.min(classwise_frequencies) loss_weights = minimum_frequency / classwise_frequencies loss_weights = torch.Tensor(loss_weights).to(device) loss_function = nn.BCEWithLogitsLoss(weight=loss_weights) elif mode == 'NB': loss_function = MultiLabelNBLoss(mat, scaling_c=sc) try: for epoch in range(1, num_epochs + 1): train_loss = train(model, device, train_loader, optimizer, curr_epoch+1, loss_function) val_loss, predictions, targets = validate(model, device, val_loader, loss_function) print("Saving raw predictions for epoch {}...".format(curr_epoch+1)) with open("pred_{}_{}.pkl".format(mode, curr_epoch+1), 'wb') as f: pred_targets = torch.cat((predictions.unsqueeze(0), targets.unsqueeze(0))) pickle.dump(pred_targets, f) if (len(val_losses) > 0) and (val_loss < min(val_losses)): torch.save(model.state_dict(), "lr{}_sc{}_model_{}_{}_{:.4f}.pt".format(lr, sc, mode, curr_epoch+1, val_loss)) print("Saving model (epoch {}) with lowest validation loss: {}" .format(epoch, val_loss)) train_losses.append(train_loss) val_losses.append(val_loss) torch.save(model.state_dict(), 'temp_model.pt') curr_epoch += 1 model_save_name = "stop_lr{}_sc{}_model_{}_{}_{:.4f}.pt".format(lr, sc, mode, curr_epoch, val_losses[-1]) torch.save(model.state_dict(), model_save_name) except KeyboardInterrupt: model.load_state_dict(torch.load('temp_model.pt')) model_save_name = "pause_lr{}_sc{}_model_{}_{}_{:.4f}.pt".format(lr, sc, mode, curr_epoch, val_losses[-1]) torch.save(model.state_dict(), model_save_name) print("Saving model (epoch {}) with current validation loss: {}".format(curr_epoch, val_losses[-1])) train_history = np.array(train_losses) val_history = np.array(val_losses) print('Saving history') np.save("train_history_{}_{}".format(mode, model_save_name[5:-3]), train_history) np.save("val_history_{}_{}".format(mode, model_save_name[5:-3]), val_history)
def main(): model = Yolov1(split_size=S, num_boxes=B, num_classes=C).to(DEVICE) optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY) loss_fn = YoloLoss(S=S, B=B, C=C) if LOAD_MODEL: load_checkpoint(torch.load(LOAD_MODEL_FILE), model, optimizer) train_dataset = VOCDataset( training_path= '/home/mt/Desktop/For_github/computer_vision_projects/face_recognition/data', S=3, C=2, transform=transform) # test_dataset = VOCDataset( # "data/test.csv", transform=transform, img_dir=IMG_DIR, label_dir=LABEL_DIR, # ) train_loader = DataLoader( dataset=train_dataset, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS, pin_memory=PIN_MEMORY, shuffle=True, drop_last=True, ) # test_loader = DataLoader( # dataset=test_dataset, # batch_size=BATCH_SIZE, # num_workers=NUM_WORKERS, # pin_memory=PIN_MEMORY, # shuffle=True, # drop_last=True, # ) for epoch in range(EPOCHS): # for x, y in train_loader: # x = x.to(DEVICE) # for idx in range(8): # bboxes = cellboxes_to_boxes(model(x)) # bboxes = non_max_suppression(bboxes[idx], iou_threshold=0.5, threshold=0.4, box_format="midpoint") # plot_image(x[idx].permute(1,2,0).to("cpu"), bboxes) # import sys # sys.exit() pred_boxes, target_boxes = get_bboxes(train_loader, model, iou_threshold=0.5, threshold=0.4) mean_avg_prec = mean_average_precision(pred_boxes, target_boxes, iou_threshold=0.5, box_format="midpoint") print(f"Train mAP: {mean_avg_prec}") #if mean_avg_prec > 0.9: # checkpoint = { # "state_dict": model.state_dict(), # "optimizer": optimizer.state_dict(), # } # save_checkpoint(checkpoint, filename=LOAD_MODEL_FILE) # import time # time.sleep(10) train_fn(train_loader, model, optimizer, loss_fn)
from fcn import FCN # Killing optional CPU driver warnings os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' tf.logging.set_verbosity(tf.logging.ERROR) if __name__ == '__main__': image_shape = (512, 512) n_classes = 21 vgg16_weights_path = './vgg/vgg16_weights.npz' model = FCN(image_shape, n_classes, vgg16_weights_path) model.build_from_vgg() root_path = './' dataset_path = os.path.join(root_path, 'VOC2012/') dataset = VOCDataset(augmentation_params=None) dataset_val = dataset.load_dataset(dataset_path, batch_size=8, is_training=False) iterator = tf.data.Iterator.from_structure(dataset_val.output_types, dataset_val.output_shapes) next_batch = iterator.get_next() val_init_op = iterator.make_initializer(dataset_val) session_config = tf.ConfigProto(gpu_options=tf.GPUOptions( allow_growth=True)) with tf.Session(config=session_config) as session: session.run(tf.global_variables_initializer()) session.run(val_init_op)
def main(): model = Yolov1(split_size=7, num_boxes=2, num_classes=20).to(DEVICE) optimizer = optim.Adam( model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY ) loss_fn = YoloLoss() if LOAD_MODEL: load_checkpoint(torch.load(LOAD_MODEL_FILE), model, optimizer) train_dataset = VOCDataset( # test.csv, 8examples.csv, 100examples.csv "data/8examples.csv", transform=transform, img_dir=IMG_DIR, label_dir=LABEL_DIR, ) test_dataset = VOCDataset( "data/test.csv" , transform = transform, img_dir = IMG_DIR, label_dir = LABEL_DIR ) train_loader = DataLoader( dataset=train_dataset, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS, pin_memory=PIN_MEMORY, shuffle=True, drop_last=False, ) test_loader = DataLoader( dataset=test_dataset, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS, pin_memory=PIN_MEMORY, shuffle=True, drop_last=False, ) for epoch in range(EPOCHS): # for x, y in train_loader: # x = x.to(DEVICE) # for idx in range(8): # bboxes = cellboxes_to_boxes(model(x)) # bboxes = non_max_suppression(bboxes[idx], iou_threshold=0.5, threshold=0.4, box_format="midpoint") # plot_image(x[idx].permute(1,2,0).to("cpu"), bboxes) # import sys # sys.exit() pred_boxes, target_boxes = get_bboxes( train_loader, model, iou_threshold=0.5, threshold=0.4 , device= DEVICE, ) mean_avg_prec = mean_average_precision( pred_boxes, target_boxes, iou_threshold=0.5, box_format="midpoint" ) if mean_avg_prec > 0.9: checkpoint = { "state_dict": model.state_dict(), "optimizer": optimizer.state_dict(), } save_checkpoint(checkpoint, filename=LOAD_MODEL_FILE) import time time.sleep(10) print(f"Train mAP: {mean_avg_prec}") train_fn(train_loader, model, optimizer, loss_fn)
def test2(target_dir='augmented2'): data_dir = '/home/alex/datasets/PascalVOC' img_dir = '{}/images'.format(data_dir) label_dir = '{}/labels'.format(data_dir) csv_dir = '{}/8examples.csv'.format(data_dir) # csv_dir = '{}/1example.csv'.format(data_dir) # init transform transform = Compose([]) data = VOCDataset(dataset_csv=csv_dir, img_dir=img_dir, label_dir=label_dir, transform=transform, test=True) for img_id, (img, labels) in enumerate(data): w, h = img.shape[1:] for j in range(labels.shape[0]): x1, x2 = int((labels[j, 1] - labels[j, 3] / 2) * w), int( (labels[j, 1] + labels[j, 3] / 2) * w) y1, y2 = int((labels[j, 2] - labels[j, 4] / 2) * h), int( (labels[j, 2] + labels[j, 4] / 2) * h) img = rectangle(img, x1, x2, y1, y2) # save_image(img, '{}/{}_bb{}.jpeg'.format(target_dir, img_id, j)) save_image(img, '{}/{}_bb.jpeg'.format(target_dir, img_id)) # translation transform transform = Compose([ RTranslation(), ]) data = VOCDataset(dataset_csv=csv_dir, img_dir=img_dir, label_dir=label_dir, transform=transform, test=True) for epoch in range(5): for img_id, (img, labels) in enumerate(data): w, h = img.shape[1:] for j in range(labels.shape[0]): x1, x2 = int((labels[j, 1] - labels[j, 3] / 2) * w), int( (labels[j, 1] + labels[j, 3] / 2) * w) y1, y2 = int((labels[j, 2] - labels[j, 4] / 2) * h), int( (labels[j, 2] + labels[j, 4] / 2) * h) img = rectangle(img, x1, x2, y1, y2) # save_image(img, '{}/{}_bb{}.jpeg'.format(target_dir, img_id, j)) save_image(img, '{}/{}_transl{}.jpeg'.format(target_dir, img_id, epoch)) # scaling transform transform = Compose([ RScaling(), ]) data = VOCDataset(dataset_csv=csv_dir, img_dir=img_dir, label_dir=label_dir, transform=transform, test=True) for epoch in range(5): for img_id, (img, labels) in enumerate(data): w, h = img.shape[1:] for j in range(labels.shape[0]): x1, x2 = int((labels[j, 1] - labels[j, 3] / 2) * w), int( (labels[j, 1] + labels[j, 3] / 2) * w) y1, y2 = int((labels[j, 2] - labels[j, 4] / 2) * h), int( (labels[j, 2] + labels[j, 4] / 2) * h) img = rectangle(img, x1, x2, y1, y2) # save_image(img, '{}/{}_bb{}.jpeg'.format(target_dir, img_id, j)) save_image(img, '{}/{}_scale{}.jpeg'.format(target_dir, img_id, epoch))
rc('mathtext', default='regular') params = { 'legend.fontsize': 'x-large', 'figure.figsize': (10, 10), 'axes.labelsize': 'x-large', 'axes.titlesize': 'x-large', 'xtick.labelsize': 'x-large', 'ytick.labelsize': 'x-large' } pylab.rcParams.update(params) fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(25, 18)) ax2 = ax.twinx() from dataset import VOCDataset dataset = VOCDataset('VOC2012', 'train') keys = list(dataset.classes_count.keys()) values = np.array(list(dataset.classes_count.values())) minv = np.min(values) maxv = np.max(values) newvalues = (minv / values) ax.tick_params(labelsize=20) ax2.tick_params(labelsize=20) ax2.set_ylim(top=maxv) plt.setp(ax.get_xticklabels(), **{"rotation": 45, "ha": "right"}) ax.set_title("Training set distribution", fontsize=30) ax.set_xlabel("Classes", fontsize=30) ax.set_ylabel("Count", fontsize=30) ax2.set_ylabel("Weights", fontsize=30)
from dataset import VOCDataset, collate_wrapper import torchvision.transforms as transforms from torch.utils.data import DataLoader tr = transforms.Compose([transforms.RandomResizedCrop(300), transforms.ToTensor()]) dataset = VOCDataset('VOC2012', 'train', transforms=tr) loader = DataLoader(dataset, batch_size=48, collate_fn=collate_wrapper, shuffle=False, num_workers=16) mean = 0. std = 0. nb_samples = 0. for _, batch in enumerate(loader): data = batch.image data = data.view(data.size(0), data.size(1), -1) mean += data.mean(2).sum(0) std += data.std(2).sum(0) nb_samples += data.size(0) mean /= nb_samples std /= nb_samples print(mean,std)
def run(): parser = argparse.ArgumentParser(description='Pascal VOC 2012 Classifier') parser.add_argument('--batch-size', type=int, default=32, metavar='N', help='input batch size for training (default: 32)') parser.add_argument('--test-batch-size', type=int, default=32, metavar='N', help='input batch size for testing (default: 1000)') parser.add_argument('--epochs', type=int, default=15, metavar='N', help='number of epochs to train (default: 15)') parser.add_argument('--lr', type=float, default=0.001, metavar='LR', help='learning rate (default: 0.001)') parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') parser.add_argument( '--log-interval', type=int, default=10, metavar='N', help='how many batches to wait before logging training status') parser.add_argument('--mode', type=str, default='A', metavar='M', help='Mode of model') args = parser.parse_args() use_cuda = not args.no_cuda and torch.cuda.is_available() torch.manual_seed(args.seed) device = torch.device("cuda" if use_cuda else "cpu") root = './' train_transform, test_transform = initialise_transforms() #Get dataset and input into Dataloader train_loader = torch.utils.data.DataLoader(VOCDataset( root, 'train', transform=train_transform), batch_size=args.batch_size, shuffle=True) test_loader = torch.utils.data.DataLoader(VOCDataset( root, 'val', transform=test_transform), batch_size=args.test_batch_size, shuffle=True) #Define Loss function train_loss_function = nn.modules.BCEWithLogitsLoss() test_loss_function = F.binary_cross_entropy_with_logits #Define Model model, params = load_model() model = model.to(device) #Define Optimizer optimizer = torch.optim.Adam( params, lr=args.lr, ) best_loss = -1 train_loss_epoch = [] val_loss_epoch = [] val_acc_epoch = [] for epoch in range(1, (args.epochs + 1)): train_loss = train(args, model, device, train_loader, optimizer, epoch, train_loss_function) val_loss, val_acc = test(args, model, device, test_loader, test_loss_function) train_loss_epoch.append(train_loss.item()) val_acc_epoch.append(val_acc) val_loss_epoch.append(val_loss) if best_loss < 0 or val_loss < best_loss: best_loss = val_loss best_param = model.state_dict() print("FOUND BETTER MODEL, SAVING WEIGHTS...\n") results = { "train_loss": train_loss_epoch, "val_loss": val_loss_epoch, "val_acc": val_acc_epoch } print('Saving model...') save_dir = './results' torch.save(best_param, save_dir + 'pascalvoc_' + args.mode + '.pt') print('Model saved as : {}\n'.format('pascalvoc_' + args.mode + '.pt')) print('Saving results...') torch.save(results, save_dir + 'pascalvoc_' + args.mode + '_results' + '.pt') print('Results saved as : {}'.format('pascalvoc_' + args.mode + '_results' + '.pt'))
from torch.utils.data import DataLoader from torchvision import transforms import torch from dataset import VOCDataset # 计算图像各个通道的均值和方差,以便后续作正则化 valid_dataset = VOCDataset(train=False, transform=transforms.ToTensor(), label_transform=transforms.ToTensor()) valid_loader = DataLoader(dataset=valid_dataset, batch_size=1, shuffle=True, num_workers=8) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if __name__ == "__main__": mean_0 = 0 mean_1 = 0 mean_2 = 0 std_0 = 0 std_1 = 0 std_2 = 0 for i, (img, label) in enumerate(valid_loader): img.to(device) mean_0 += img[0][0].mean() mean_1 += img[0][1].mean() mean_2 += img[0][2].mean()
def train(): set_seed(seed=10) os.makedirs(args.save_root, exist_ok=True) # create model, optimizer and criterion model = SSD300(n_classes=len(label_map), device=device) biases = [] not_biases = [] for name, param in model.named_parameters(): if param.requires_grad: if name.endswith('.bias'): biases.append(param) else: not_biases.append(param) model = model.to(device) optimizer = torch.optim.SGD(params=[{ 'params': biases, 'lr': 2 * args.lr }, { 'params': not_biases }], lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) if args.resume is None: start_epoch = 0 else: checkpoint = torch.load(args.resume, map_location=device) start_epoch = checkpoint['epoch'] + 1 model.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) print(f'Training will start at epoch {start_epoch}.') criterion = MultiBoxLoss(priors_cxcy=model.priors_cxcy, device=device, alpha=args.alpha) criterion = criterion.to(device) ''' scheduler = StepLR(optimizer=optimizer, step_size=20, gamma=0.5, last_epoch=start_epoch - 1, verbose=True) ''' # load data transform = Transform(size=(300, 300), train=True) train_dataset = VOCDataset(root=args.data_root, image_set=args.image_set, transform=transform, keep_difficult=True) train_loader = DataLoader(dataset=train_dataset, collate_fn=collate_fn, batch_size=args.batch_size, num_workers=args.num_workers, shuffle=True, pin_memory=True) losses = AverageMeter() for epoch in range(start_epoch, args.num_epochs): # decay learning rate at particular epochs if epoch in [120, 140, 160]: adjust_learning_rate(optimizer, 0.1) # train model model.train() losses.reset() bar = tqdm(train_loader, desc='Train the model') for i, (images, bboxes, labels, _) in enumerate(bar): images = images.to(device) bboxes = [b.to(device) for b in bboxes] labels = [l.to(device) for l in labels] predicted_bboxes, predicted_scores = model( images) # (N, 8732, 4), (N, 8732, num_classes) loss = criterion(predicted_bboxes, predicted_scores, bboxes, labels) optimizer.zero_grad() loss.backward() optimizer.step() losses.update(loss.item(), images.size(0)) if i % args.print_freq == args.print_freq - 1: bar.write(f'Average Loss: {losses.avg:.4f}') bar.write(f'Epoch: [{epoch + 1}|{args.num_epochs}] ' f'Average Loss: {losses.avg:.4f}') # adjust learning rate # scheduler.step() # save model state_dict = { 'epoch': epoch, 'model': model.state_dict(), 'optimizer': optimizer.state_dict() } save_path = os.path.join(args.save_root, 'ssd300.pth') torch.save(state_dict, save_path) if epoch % args.save_freq == args.save_freq - 1: shutil.copyfile( save_path, os.path.join(args.save_root, f'ssd300_epochs_{epoch + 1}.pth'))
def run(): parser = argparse.ArgumentParser(description='Pascal VOC 2012 Classifier') parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') parser.add_argument('--test-batch-size', type=int, default=32, metavar='N', help='input batch size for testing (default: 1000)') parser.add_argument('--mode', type=str, default='A', metavar='M', help='Mode of model') parser.add_argument('--demo_mode', type=str, default='single', metavar='M', help='Mode of demo') parser.add_argument('--image_path', type=str, default='./test.jpg', metavar='M', help='Mode of demo') # parser.add_argument('--class_name', type=str, default='aeroplane', metavar='M', # help='Mode of demo') args = parser.parse_args() use_cuda = not args.no_cuda and torch.cuda.is_available() torch.manual_seed(args.seed) device = torch.device("cuda" if use_cuda else "cpu") # Get transform _, test_transform = initialise_transforms() # Initialise model model, params = load_model() model = model.to(device) model.eval() model_name = 'pascalvoc_' + args.mode + '.pt' print('Loading model...') model.load_state_dict(torch.load(model_name)) # Convert jpg to tensor if args.demo_mode == 'single': image = Image.open(args.image_path).convert('RGB') image_tensor = test_transform(image).unsqueeze(0).to(device) # Get model prediction pred = model(image_tensor) pred = F.sigmoid(pred) display_prediction(pred, image) elif args.demo_mode == 'gui': class_to_index = utils.class_to_index() # index = class_to_index[args.class_name] # 2-part transform to preserve image after first_transform first_transform = transforms.Compose([transforms.Resize(224)]) second_transform = transforms.Compose([ transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) # Validation set test_loader = torch.utils.data.DataLoader( VOCDataset(root, 'val', transform=test_transform), batch_size=args.test_batch_size, shuffle=True) # Get predictions on validation set model.eval() all_predictions = [] start = time.time() with torch.no_grad(): for data, target in test_loader: data, target = data.to(device), target.to(device) output = model(data) output = F.sigmoid(output) target = target.float() # Precision for each class in each example for i in range(output.shape[0]): example_predictions = [] scores = target[i] * output[ i] # Ground truth as mask for predictions all_predictions.append(scores) end = time.time() print("Time lapsed: {:.2f}s".format((end - start))) print(all_predictions) else: raise Exception("Please enter demo_mode as 'single' or 'gui'")