from image_generator import ImageGenerator from models import SSD300 from utils.prior_box_creator import PriorBoxCreator from utils.prior_box_assigner import PriorBoxAssigner from utils.box_transformer import BoxTransformer from utils.XML_parser import XMLParser from utils.utils import split_data from utils.utils import read_image, resize_image import numpy as np import matplotlib.pyplot as plt image_shape = (300, 300, 3) model = SSD300(image_shape) box_creator = PriorBoxCreator(model) prior_boxes = box_creator.create_boxes() layer_scale, box_arg = 0, 780 box_coordinates = prior_boxes[layer_scale][box_arg, :, :] image_path = '../images/' image_key = '007040.jpg' box_creator.draw_boxes(image_path + image_key, box_coordinates) data_path = '../datasets/VOCdevkit/VOC2007/' ground_truths = XMLParser(data_path + 'Annotations/').get_data() prior_box_manager = PriorBoxAssigner(prior_boxes, ground_truths) assigned_boxes = prior_box_manager.assign_boxes() prior_box_manager.draw_assigned_boxes(image_path, image_shape[0:2], image_key) batch_size = 7 train_keys, validation_keys = split_data(assigned_boxes, training_ratio=.8) assigned_image_generator = ImageGenerator(assigned_boxes, batch_size,
import torch import torchvision import torch.nn.functional as F import torchvision.transforms as transforms from torch.autograd import Variable from utils.transforms import resize from datasets import ListDataset from models import SSD300, SSDBoxCoder from PIL import Image, ImageDraw import os import sys print('Loading model..') net = SSD300(num_classes=2) #print(net) model = torch.load('./checkpoint/train_synthtext_detection_2.pth') net.load_state_dict(model['net']) net.eval() print('Loading image..') img = Image.open('/usr/local/share/data/SynthText/160/stage_40_64.jpg') ow = oh = 300 img = img.resize((ow, oh)) print('Predicting..') transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ])
from utils.visualizations import plot_kernels import matplotlib.pyplot as plt # from models.ssd_utils import construct_SSD from models import SSD300 """ weights_path = '../trained_models/SSD300_weights.hdf5' model = SSD300(weights_path=weights_path) model.summary() kernel_weights = model.get_layer('conv2d_1').get_weights()[0] plot_kernels(kernel_weights[:, :, 0:1, :]) plot_kernels(kernel_weights[:, :, 1:2, :]) plot_kernels(kernel_weights[:, :, 2:3, :]) plt.show() """ weights_path = '../trained_models/VGG16_weights.hdf5' model = SSD300() model.load_weights(weights_path, by_name=True) kernel_weights = model.get_layer('conv2d_1').get_weights()[0] plot_kernels(kernel_weights[:, :, 0:1, :]) plot_kernels(kernel_weights[:, :, 1:2, :]) plot_kernels(kernel_weights[:, :, 2:3, :]) plt.show()
xmin = int(round(top_xmin[i] * img.shape[1])) ymin = int(round(top_ymin[i] * img.shape[0])) xmax = int(round(top_xmax[i] * img.shape[1])) ymax = int(round(top_ymax[i] * img.shape[0])) score = top_conf[i] label = int(top_label_indices[i]) #label_name = voc_classes[label - 1] #label_name = self.class_names[label - 1] label_name = self.class_names[label] display_txt = '{:0.2f}, {}'.format(score, label_name) coords = (xmin, ymin), xmax-xmin+1, ymax-ymin+1 color = colors[label] currentAxis.add_patch(plt.Rectangle(*coords, fill=False, edgecolor=color, linewidth=2)) currentAxis.text(xmin, ymin, display_txt, bbox={'facecolor':color, 'alpha':0.5}) plt.show() if __name__ == "__main__": #weights_path = '../trained_models/ssd300_weights.34-1.54.hdf5' weights_path = '../trained_models/ssd300_weights.17-1.51.hdf5' #weights_path = '../trained_models/SSD300_COCO_weights.08-0.35.hdf5' model = SSD300(num_classes=21) model.load_weights(weights_path) tester = Tester(model) tester.test_prior_boxes() tester.test_XML_parser() tester.test_image_loading() tester.test_model()
from datasets import DataManager from datasets import get_class_names from preprocessing import get_image_size from models import SSD300 from metrics import compute_average_precision from metrics import compute_precision_and_recall from utils.boxes import create_prior_boxes from utils.boxes import calculate_intersection_over_union from utils.boxes import denormalize_boxes from utils.inference import infer_from_path dataset_name = 'VOC2007' image_prefix = '../datasets/VOCdevkit/VOC2007/JPEGImages/' weights_path = '../trained_models/SSD300_weights.hdf5' model = SSD300(weights_path=weights_path) prior_boxes = create_prior_boxes() input_shape = model.input_shape[1:3] class_threshold = .1 iou_nms_threshold = .45 iou_threshold = .5 num_classes = 21 image_prefix = '../datasets/VOCdevkit/VOC2007/JPEGImages/' with_difficult_objects = False split = 'test' class_names = get_class_names(dataset_name) class_names = class_names[1:] average_precisions = []
plt.show() def denormalize_box(self, box_coordinates, image_shape): x_min = box_coordinates[:, 0] y_min = box_coordinates[:, 1] x_max = box_coordinates[:, 2] y_max = box_coordinates[:, 3] original_image_width, original_image_height = image_shape x_min = x_min * original_image_width y_min = y_min * original_image_height x_max = x_max * original_image_width y_max = y_max * original_image_height return [x_min, y_min, x_max, y_max] if __name__ == '__main__': import sys sys.path.insert(0, '../') from models import SSD300 from prior_box_creator import PriorBoxCreator from XML_parser import XMLParser model = SSD300((300, 300, 3)) box_creator = PriorBoxCreator(model) prior_boxes = box_creator.create_boxes() data_path = '../../datasets/VOCdevkit/VOC2007/' ground_truths = XMLParser(data_path + 'Annotations/').get_data() prior_box_manager = PriorBoxAssigner(prior_boxes, ground_truths) assigned_boxes = prior_box_manager.assign_boxes() prior_box_manager.draw_assigned_boxes(data_path + 'JPEGImages/')
num_classes = len(class_names) exit(0) val_data_manager = CSVDataManager(val_dataset, val_split, class_names, False) val_data = val_data_manager.load_data() # generator prior_boxes = to_point_form(create_prior_boxes()) train_sequencer = SequenceManager(train_data, 'train', prior_boxes, batch_size, box_scale_factors, num_classes) val_sequencer = SequenceManager(val_data, 'val', prior_boxes, batch_size, box_scale_factors, num_classes) # model multibox_loss = MultiboxLoss(num_classes, negative_positive_ratio, batch_size) model = SSD300(image_shape, num_classes, weights_path) model.compile(optimizer, loss=multibox_loss.compute_loss) # callbacks if not os.path.exists(model_path): os.makedirs(model_path) checkpoint = ModelCheckpoint(save_path, verbose=1, period=1) log = CSVLogger(model_path + 'SSD_scratch.log') learning_rate_manager = LearningRateManager(learning_rate, decay, step_epochs) learning_rate_schedule = LearningRateScheduler(learning_rate_manager.schedule) tbCallBack = TensorBoard(log_dir='./Graph/', histogram_freq=0, batch_size=batch_size, write_graph=True) callbacks = [checkpoint, log, learning_rate_schedule, tbCallBack] # model fit
def main(): """ Training. """ global start_epoch, label_map, epoch, checkpoint, decay_lr_at # Initialize model or load checkpoint if checkpoint is None: start_epoch = 0 model = SSD300(n_classes=n_classes) # Initialize the optimizer, with twice the default learning rate for biases, as in the original Caffe repo biases = list() not_biases = list() for param_name, param in model.named_parameters(): if param.requires_grad: if param_name.endswith('.bias'): biases.append(param) else: not_biases.append(param) optimizer = torch.optim.SGD(params=[{ 'params': biases, 'lr': 2 * lr }, { 'params': not_biases }], lr=lr, momentum=momentum, weight_decay=weight_decay) else: checkpoint = torch.load(checkpoint) start_epoch = checkpoint['epoch'] + 1 print('\nLoaded checkpoint from epoch %d.\n' % start_epoch) model = checkpoint['model'] optimizer = checkpoint['optimizer'] # Move to default device model = model.to(device) criterion = MultiBoxLoss(priors_cxcy=model.priors_cxcy).to(device) # Custom dataloaders train_dataset = PascalVOCDataset(data_folder, split='train', keep_difficult=keep_difficult) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=batch_size, shuffle=True, collate_fn=train_dataset.collate_fn, num_workers=workers, pin_memory=True) # note that we're passing the collate function here # Calculate total number of epochs to train and the epochs to decay learning rate at (i.e. convert iterations to epochs) # To convert iterations to epochs, divide iterations by the number of iterations per epoch # The paper trains for 120,000 iterations with a batch size of 32, decays after 80,000 and 100,000 iterations epochs = iterations // (len(train_dataset) // 32) decay_lr_at = [it // (len(train_dataset) // 32) for it in decay_lr_at] # Epochs for epoch in range(start_epoch, epochs): # Decay learning rate at particular epochs if epoch in decay_lr_at: adjust_learning_rate(optimizer, decay_lr_to) # One epoch's training train(train_loader=train_loader, model=model, criterion=criterion, optimizer=optimizer, epoch=epoch) # Save checkpoint save_checkpoint(epoch, model, optimizer)
dataset_manager = DataManager(datasets, splits, class_names, difficult_boxes) train_data = dataset_manager.load_data() val_data = test_data = DataManager('VOC2007', 'test').load_data() class_names = dataset_manager.class_names num_classes = len(class_names) # generator prior_boxes = to_point_form(create_prior_boxes()) generator = ImageGenerator(train_data, val_data, prior_boxes, batch_size, box_scale_factors, num_classes) # model multibox_loss = MultiboxLoss(num_classes, negative_positive_ratio, batch_size) model = SSD300(image_shape, num_classes, weights_path, frozen_layers, randomize_top) model.compile(optimizer, loss=multibox_loss.compute_loss) # callbacks if not os.path.exists(model_path): os.makedirs(model_path) checkpoint = ModelCheckpoint(save_path, verbose=1, period=1) log = CSVLogger(model_path + 'SSD_scratch.log') learning_rate_manager = LearningRateManager(gamma_decay, learning_rate) learning_rate_schedule = LearningRateScheduler(learning_rate_manager.schedule) plateau = ReduceLROnPlateau('val_loss', factor=0.9, patience=1, verbose=1) early_stop = EarlyStopping('val_loss', min_delta=1e-4, patience=14, verbose=1) callbacks = [checkpoint, log, learning_rate_schedule] # model fit
scheduled_epochs = [155, 195, 235] negative_positive_ratio = 3 base_weights_path = '../trained_models/VGG16_weights.hdf5' # data class_names = get_class_names('VOC2007') val_dataset, val_split = 'VOC2007', 'test' train_datasets, train_splits = ['VOC2007', 'VOC2012'], ['trainval', 'trainval'] train_data_manager = DataManager(train_datasets, train_splits, class_names) train_data = train_data_manager.load_data() num_classes = len(class_names) val_data_manager = DataManager(val_dataset, val_split, class_names) val_data = val_data_manager.load_data() # model model = SSD300(num_classes=num_classes) model.load_weights(base_weights_path, by_name=True) prior_boxes = to_point_form(create_prior_boxes()) multibox_loss = MultiboxLoss(num_classes, negative_positive_ratio, alpha_loss) optimizer = SGD(learning_rate, momentum, weight_decay) model.compile(optimizer, loss=multibox_loss.compute_loss) data_generator = DataGenerator(train_data, prior_boxes, batch_size, num_classes, val_data) # callbacks model_path = '../trained_models/' + model_name + '/' save_path = model_path + 'weights.{epoch:02d}-{val_loss:.2f}.hdf5' if not os.path.exists(model_path): os.makedirs(model_path) log = CSVLogger(model_path + model_name + '.log') checkpoint = ModelCheckpoint(save_path, verbose=1, save_weights_only=True)
dataset_manager = DataManager(['VOC2007', 'VOC2012'], ['trainval', 'trainval']) train_data = dataset_manager.load_data() val_data = test_data = DataManager('VOC2007', 'test').load_data() class_names = dataset_manager.class_names num_classes = len(class_names) prior_boxes = create_prior_boxes() generator = ImageGenerator(train_data, val_data, prior_boxes, batch_size) weights_path = '../trained_models/SSD300_weights.hdf5' frozen_layers = ['input_1', 'conv1_1', 'conv1_2', 'pool1', 'conv2_1', 'conv2_2', 'pool2', 'conv3_1', 'conv3_2', 'conv3_3', 'pool3'] model = SSD300(image_shape, num_classes, weights_path, frozen_layers, True) multibox_loss = MultiboxLoss(num_classes, neg_pos_ratio=2.0).compute_loss model.compile(Adam(lr=3e-4), loss=multibox_loss) # callbacks model_path = '../trained_models/SSD_scratch/' if not os.path.exists(model_path): os.makedirs(model_path) save_path = model_path + 'weights.{epoch:02d}-{val_loss:.2f}.hdf5' checkpoint = ModelCheckpoint(save_path, verbose=1, save_best_only=True) plateau = ReduceLROnPlateau(factor=0.5, patience=10, verbose=1) early_stop = EarlyStopping(min_delta=1e-3, patience=25, verbose=1) log = CSVLogger(model_path + 'SSD_scratch.log')
from models import SSD300 from utils.prior_box_creator_paper import PriorBoxCreator from utils.prior_box_manager import PriorBoxManager from utils.XML_parser import XMLParser from utils.utils import split_data from utils.utils import scheduler # constants batch_size = 7 num_epochs = 15 num_classes = 21 root_prefix = '../datasets/VOCdevkit/VOC2007/' ground_data_prefix = root_prefix + 'Annotations/' image_prefix = root_prefix + 'JPEGImages/' image_shape = (224, 224, 3) model = SSD300(image_shape, num_classes) model.load_weights('../trained_models/weights_SSD300.hdf5', by_name=True) freeze = [ 'input_1', 'conv1_1', 'conv1_2', 'pool1', 'conv2_1', 'conv2_2', 'pool2', 'conv3_1', 'conv3_2', 'conv3_3', 'pool3' ] for layer in model.layers: if layer.name in freeze: layer.trainable = False multibox_loss = MultiboxLoss(num_classes, neg_pos_ratio=2.0).compute_loss model.compile(optimizer=Adam(lr=3e-4), loss=multibox_loss, metrics=['acc']) box_creator = PriorBoxCreator(model)
weights_path = '../trained_models/ssd_300_VOC0712.pth' vgg_weights = torch.load(weights_path) biases = [] weights = [] for name, torch_weights in vgg_weights.items(): print(name) if 'bias' in name: biases.append(torch_weights.numpy()) if 'weight' in name: print(torch_weights.numpy().shape) conv_weights = torch_weights.numpy() conv_weights = np.rollaxis(conv_weights, 0, 4) conv_weights = np.rollaxis(conv_weights, 0, 3) weights.append(conv_weights) vgg_weights = list(zip(weights, biases)) base_model = SSD300(return_base=True) pytorch_layer_arg = 0 for layer in base_model.layers[1:]: conv_weights = vgg_weights[pytorch_layer_arg][0] bias_weights = vgg_weights[pytorch_layer_arg][1] if ('conv2d' in layer.name) or ('branch' in layer.name): print(layer.name) print('pre-trained_weigths:', conv_weights.shape) print('model weights:', layer.get_weights()[0].shape) layer.set_weights([conv_weights, bias_weights]) pytorch_layer_arg = pytorch_layer_arg + 1 base_model.save_weights('../trained_models/SSD300_weights.hdf5')
def evaluate_detections(box_list, output_dir, dataset): write_voc_results_file(box_list, dataset) do_python_eval(output_dir) if __name__ == '__main__': # load net num_classes = len(VOC_CLASSES) + 1 # +1 background """ net = build_ssd('test', 300, num_classes) # initialize SSD net.load_state_dict(torch.load(args.trained_model)) net.eval() """ weights_path = '../trained_models/SSD300_weights.hdf5' net = SSD300(weights_path=weights_path) print('Finished loading model!') # load data dataset = VOCDetection(args.voc_root, [('2007', set_type)], BaseTransform(300, dataset_mean), AnnotationTransform()) # evaluation cuda = False test_net(args.save_folder, net, cuda, dataset, BaseTransform(size=300, mean=dataset_mean), args.top_k, 300, thresh=args.confidence_threshold)