def __getitem__(self, idx): if self.use_M == True: image = self.images[idx] else: image, _ = self.datainfo.__getitem__(idx) if self.aug == True: self.rv = random.random() else: self.rv = 1 if self.rv < 0.9: # augmenation of img and masks angle = random.randrange(-15, 15) # trans img with masks self.data_trans = mytransforms.Compose([mytransforms.ToPILImage(), mytransforms.Rotation(angle), mytransforms.ColorJitter(brightness=random.random(), contrast=random.random(), saturation=random.random(), hue=random.random() / 2), mytransforms.ToTensor(), ]) self.mask_trans = mytransforms.Compose([mytransforms.ToPILImage(), mytransforms.Rotation(angle), mytransforms.ToTensor(), ]) if self.binary == True: image = perturbator.comb_binary_rec(image, [self.H, self.W]) #image = comb_black_rec(image, [self.H, self.W]) image = self.data_trans(image) mask = torch.empty(self.mask_num, image.shape[1], image.shape[2], dtype=torch.float) if self.use_M == True: for k in range(0, self.mask_num): X = self.images[idx + (self.data_num * (1 + k))] mask[k] = self.mask_trans(X) else: for k in range(0, self.mask_num): X, _ = self.datainfo.__getitem__(idx + (self.data_num * (1 + k))) mask[k] = self.mask_trans(X) else: mask = torch.empty(self.mask_num, image.shape[1], image.shape[2], dtype=torch.float) if self.use_M == True: for k in range(0, self.mask_num): X = self.images[idx + (self.data_num * (1 + k))] mask[k] = X else: for k in range(0, self.mask_num): X, _ = self.datainfo.__getitem__(idx + (self.data_num * (1 + k))) mask[k] = X mask = torch.pow(mask, self.pow_n) mask = mask / mask.max() return [image, mask]
def get_prediction(model, img_path, cat_names, threshold): """ get_prediction parameters: - model - the model to be used - img_path - path of the input image - cat_names - selected name for each category - threshold - the confidence interval for making predictions method: - Image is obtained from the image path - the image is converted to image tensor using PyTorch's Transforms - image is passed through the model to get the predictions - masks, classes and bounding boxes are obtained from the model and soft masks are made binary(0 or 1) on masks ie: eg. segment of cat is made 1 and rest of the image is made 0 """ img = Image.open(img_path) transform = T.Compose([T.ToTensor()]) img = transform(img) if use_cuda: img = img.cuda() pred = model([img]) pred_score = list(pred[0]['scores'].detach().cpu().numpy()) pred_t = [pred_score.index(x) for x in pred_score if x > threshold][-1] masks = None if 'masks' in pred[0]: masks = (pred[0]['masks'] > 0.5).squeeze().detach().cpu().numpy() elif 'keypoints' in pred[0]: masks = (pred[0]['keypoints']).squeeze().detach().cpu().numpy() pred_class = [cat_names[i] for i in list(pred[0]['labels'].cpu().numpy())] pred_id = [i for i in list(pred[0]['labels'].cpu().numpy())] pred_boxes = [[(i[0], i[1]), (i[2], i[3])] for i in list(pred[0]['boxes'].detach().cpu().numpy())] masks = masks[:pred_t + 1] pred_boxes = pred_boxes[:pred_t + 1] pred_class = pred_class[:pred_t + 1] pred_id = pred_id[:pred_t + 1] return masks, pred_boxes, pred_class, pred_id
def train_RCNN(model, path2data, path2json, weight_path=None): # train on the GPU or on the CPU, if a GPU is not available device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') #device = torch.device('cpu') # see if pretrained weights are available load_pretrained = False if weight_path is not None: load_pretrained = True # get coco style dataset dataset = coco_utils.get_coco(path2data, path2json, T.ToTensor()) # split the dataset in train and test set indices = torch.randperm(len(dataset)).tolist() dataset = torch.utils.data.Subset(dataset, indices[:-1]) dataset_test = torch.utils.data.Subset(dataset, indices[-1:]) # define training and validation data loaders(use num_workers for multi-gpu) data_loader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=True, collate_fn=myutils.collate_fn) data_loader_test = torch.utils.data.DataLoader( dataset_test, batch_size=1, shuffle=False, collate_fn=myutils.collate_fn) if torch.cuda.device_count() > 1: print("Using", torch.cuda.device_count(), "GPUs") # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs model = nn.DataParallel(model) # move model to the right device model.to(device) # construct an optimizer params = [p for p in model.parameters() if p.requires_grad] optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005) # and a learning rate scheduler lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1) # load the dataset in case of pretrained weights start_epoch = 0 if load_pretrained: checkpoint = torch.load(weight_path, map_location=device) model.load_state_dict(checkpoint['model_state_dict']) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) start_epoch = checkpoint['epoch'] for epoch in range(num_epochs): # train for one epoch, printing every 10 iterations train_one_epoch(model, optimizer, data_loader, device, epoch + start_epoch, print_freq=100) # update the learning rate #lr_scheduler.step() # evaluate on the test dataset # Find a way around the broken pytorch nograd keypoint evaluation # evaluate(model, data_loader_test, device=device) # save weights when done torch.save( { 'epoch': num_epochs + start_epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), }, save_weights_to)
def __init__(self, input_root, target_root='', counts_file='', coords_file='', processing_file='', train=True, patch_size=(256, 256), patch_stride=128, prescale=0.0, generate_target=True, target_type='density', per_image_norm=False, num_logits=0, transform=None, target_transform=None): extract_extra = False if os.path.exists(processing_file) else True input_ids, input_infos = find_inputs(input_root, types=['.jpg'], extract_extra=extract_extra) if len(input_ids) == 0: raise (RuntimeError("Found 0 images in : " + input_root)) self.input_index = input_ids self.patch_index = [[]] * len(input_ids) self.patch_count = 0 self.patch_size = patch_size self.patch_stride = patch_stride self.prescale = prescale if prescale != 1.0 else 0.0 assert target_type in TARGET_TYPES self.target_type = target_type self.num_logits = num_logits if train and num_logits: assert target_type == 'countception' self.generate_target = generate_target # generate on the fly instead of loading self.data_by_id = dict() for index, (k, v) in enumerate(zip(input_ids, input_infos)): if 'width' in v: if self.prescale: v = self._apply_prescale(v, self.prescale) patch_info = self._calc_patch_info(v) num_patches = patch_info['num'] self.patch_index[index] = list(range(num_patches)) self.patch_count += num_patches v['patches'] = patch_info v['index'] = index self.data_by_id[k] = v self.has_targets = False if os.path.exists(target_root): targets = find_targets(target_root, input_ids, types=['.npz']) if len(targets): for k, v in targets.items(): self.data_by_id[k]['target'] = v self.has_targets = True else: raise (RuntimeError("Found 0 targets in : " + target_root)) if train: assert self.has_targets self.train = train if counts_file: counts_df = pd.read_csv(counts_file).rename(columns=CATEGORY_MAP) counts_df.drop(['train_id'], 1, inplace=True) for k, v in counts_df.to_dict(orient='index').items(): if k in self.data_by_id: d = self.data_by_id[k] d['counts_by_cat'] = v d['count'] = sum(v.values()) if processing_file: process_df = pd.read_csv(processing_file, index_col=False) cols = ['xmin', 'ymin', 'xmax', 'ymax', 'width', 'height'] process_df[cols] = process_df[cols].astype(int) process_df['train_id'] = process_df.filename.map( lambda x: int(os.path.splitext(x)[0])) process_df.set_index(['train_id'], inplace=True) for k, v in process_df[cols].to_dict(orient='index').items(): if k in self.data_by_id: d = self.data_by_id[k] if self.prescale: v = self._apply_prescale(v, self.prescale) patch_info = self._calc_patch_info(v) num_patches = patch_info['num'] self.patch_index[d['index']] = list(range(num_patches)) self.patch_count += num_patches v['patches'] = patch_info d.update(v) #print(d, self.patch_count) if coords_file: coords_df = pd.read_csv(coords_file, index_col=False) coords_df.x_coord = coords_df.x_coord.astype('int') coords_df.y_coord = coords_df.y_coord.astype('int') coords_df.category = coords_df.category.replace(CATEGORY_MAP) groupby_file = coords_df.groupby(['filename']) for file in groupby_file.indices: coords = groupby_file.get_group(file) coords = coords[['x_coord', 'y_coord', 'category']].as_matrix() coords = coords[coords[:, 0].argsort()] fid = int(os.path.splitext(file)[0]) if fid in self.data_by_id: d = self.data_by_id[fid] if self.prescale: coords[:, :2] = np.rint(coords[:, :2] * self.prescale) xy_offset = np.array([d['xmin'], d['ymin']]) coords[:, :2] = coords[:, :2] + xy_offset d['coords'] = coords self.dataset_mean = [0.43632373, 0.46022959, 0.4618598] self.dataset_std = [0.17749958, 0.16631233, 0.16272708] if transform is None: tfs = [] if per_image_norm: tfs.append(mytransforms.NormalizeImg()) tfs.append(mytransforms.ToTensor()) if self.train: tfs.append(mytransforms.ColorJitter()) if not per_image_norm: tfs.append( transforms.Normalize(self.dataset_mean, self.dataset_std)) self.transform = transforms.Compose(tfs) self.target_transform = target_transform self.ttime = utils.AverageMeter()
import torch.utils.data as data from torchvision.models.detection.faster_rcnn import FastRCNNPredictor from Libs.pytorch_utils.engine import train_one_epoch, evaluate from Libs.pytorch_utils import utils from torchvision.models.detection.rpn import AnchorGenerator from create_detection_dataset import porpoise_dataset IMG_RESIZE = 800 BATCH_SIZE = 8 NUM_WORKERS = 0 DATA_PATH = "porpoise_detection_data" TRAIN_SPLIT = 0.1 TRANSFORM_TRAIN = T.Compose([ T.ToTensor(), T.Resize(IMG_RESIZE), T.RandomVerticalFlip(0.5), T.RandomHorizontalFlip(0.5), T.RandomColor(0.4, 0.2, 0.3, 0.1), T.AddRandomNoise(0.02, 0.5), #T.ShowImg(), T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) TRANSFORM_VAL = T.Compose([ T.ToTensor(), T.Resize(IMG_RESIZE), T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ])
def __init__( self, input_root, target_file='', multi_label=True, train=True, train_fold=False, fold=0, img_type='.png', img_size=(512, 512), test_aug=0, transform=None): assert img_type in ['.png'] inputs = find_inputs(input_root, types=[img_type]) if len(inputs) == 0: raise (RuntimeError("Found 0 images in : " + input_root)) if target_file: target_df = pd.read_csv(target_file) if train or train_fold: target_df = target_df[target_df['fold'] != fold] else: target_df = target_df[target_df['fold'] == fold] target_df.drop(['fold'], 1, inplace=True) input_dict = dict(inputs) target_df = target_df[target_df.Id.map(lambda x: x in input_dict)] target_df['filename'] = target_df.Id.map(lambda x: input_dict[x]) self.inputs = target_df['Id'].apply(lambda x:os.path.join(input_root,x)).tolist() labels = get_labels() self.target_array = target_df.as_matrix(columns=labels).astype(np.float32) if not multi_label: self.target_array = np.argmax(self.target_array, axis=1) self.target_array = torch.from_numpy(self.target_array) else: assert not train inputs = sorted(inputs, key=lambda x: natural_key(x[0])) self.target_array = None self.inputs = [x[1] for x in inputs] self.train = train if img_type == '.jpg': self.dataset_mean = [0.31535792, 0.34446435, 0.30275137] self.dataset_std = [0.05338271, 0.04247036, 0.03543708] else: # For png self.dataset_mean = [0.0804419, 0.05262986, 0.05474701, 0.08270896] self.dataset_std = [0.13000701, 0.08796628, 0.1386317, 0.12718021] self.img_size = img_size self.img_type = img_type if not train: self.test_aug = get_test_aug(test_aug) else: self.test_aug = [] if transform is None: tfs = [] if img_type == '.jpg': tfs.append(mytransforms.ToTensor()) if self.train: tfs.append(mytransforms.ColorJitter(brightness=0.01, contrast=0.01, saturation=0.01)) tfs.append(transforms.Normalize(self.dataset_mean, self.dataset_std)) else: tfs.append(mytransforms.ToTensor()) if self.train: tfs.append(mytransforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2)) tfs.append(transforms.Normalize(self.dataset_mean, self.dataset_std)) self.transform = transforms.Compose(tfs)
def __init__( self, input_root, target_file='', tags_type='all', multi_label=True, train=True, train_fold=False, fold=0, img_type='.jpg', img_size=(256, 256), test_aug=0, transform=None): assert img_type in ['.jpg', '.tif'] inputs = find_inputs(input_root, types=[img_type]) if len(inputs) == 0: raise (RuntimeError("Found 0 images in : " + input_root)) if target_file: target_df = pd.read_csv(target_file) if train or train_fold: target_df = target_df[target_df['fold'] != fold] else: target_df = target_df[target_df['fold'] == fold] target_df.drop(['fold'], 1, inplace=True) input_dict = dict(inputs) print(len(input_dict), len(target_df.index)) target_df = target_df[target_df.image_name.map(lambda x: x in input_dict)] target_df['filename'] = target_df.image_name.map(lambda x: input_dict[x]) self.inputs = target_df['filename'].tolist() tags = get_tags(tags_type) self.target_array = target_df.as_matrix(columns=tags).astype(np.float32) if not multi_label: self.target_array = np.argmax(self.target_array, axis=1) self.target_array = torch.from_numpy(self.target_array) else: assert not train inputs = sorted(inputs, key=lambda x: natural_key(x[0])) self.target_array = None self.inputs = [x[1] for x in inputs] self.tags_type = tags_type self.train = train if img_type == '.jpg': self.dataset_mean = [0.31535792, 0.34446435, 0.30275137] self.dataset_std = [0.05338271, 0.04247036, 0.03543708] else: # For IR,R,G self.dataset_mean = [6398.84897763/2**16, 4988.75696302/2**16, 4270.74552695/2**16] # NRG self.dataset_std = [858.46477922/2**16, 399.06597519/2**16, 408.51461036/2**16] # NRG self.img_size = img_size self.img_type = img_type if not train: self.test_aug = get_test_aug(test_aug) else: self.test_aug = [] if transform is None: tfs = [] if img_type == '.jpg': tfs.append(mytransforms.ToTensor()) if self.train: tfs.append(mytransforms.ColorJitter(brightness=0.01, contrast=0.01, saturation=0.01)) tfs.append(transforms.Normalize(self.dataset_mean, self.dataset_std)) else: tfs.append(mytransforms.ToTensor()) if self.train: tfs.append(mytransforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2)) tfs.append(transforms.Normalize(self.dataset_mean, self.dataset_std)) self.transform = transforms.Compose(tfs)