def make_detections(predictions, classes, thresholds, scale=1, network_id=None): def detection(p): object_class = classes[p.label] return struct(shape=encode_shape(p.bbox.cpu() / scale, object_class), label=object_class.id, confidence=p.confidence.item(), match=int(p.match) if 'match' in p else None) detections = list(map(detection, predictions)) total_confidence = torch.FloatTensor([d.confidence for d in detections]) def score(ds): return (total_confidence**2).sum().item() counts, class_counts = get_counts(detections, classes, thresholds) stats = struct(score=score(detections), class_score={ c.id: score([d for d in detections if d.label == c.id]) for c in classes }, counts=counts, class_counts=class_counts, network_id=(env.run, env.epoch)) return struct(instances=detections, stats=stats)
def encode(target, anchor_boxes, params): n = anchor_boxes.size(0) m = target.bbox.size(0) if m == 0: return struct(location=target.bbox.new_zeros(n, 4), classification=target.bbox.new_zeros(n, dtype=torch.long)) ious = box.iou_matrix(box.point_form(anchor_boxes), target.bbox) if params.top_anchors > 0: top_ious, inds = ious.topk(params.top_anchors, dim=0) ious = ious.scatter(0, inds, top_ious * 2) max_ious, max_ids = ious.max(1) class_target = encode_classes(target.label, max_ious, max_ids, match_thresholds=params.match_thresholds) location = target.bbox[max_ids] if params.location_loss == "l1": location = encode_boxes(location, anchor_boxes) return struct(location=location, classification=class_target)
def count(image): annotations = image_annotations(image) n = len(annotations) categories = struct( test=n if image.category == 'test' else 0, validate=n if image.category == 'validate' else 0, train=n if image.category == 'train' else 0, new=n if image.category == 'new' else 0, discard=n if image.category == 'discard' else 0, ) def box_area(ann): x1, y1, x2, y2 = ann.box return (x2 - x1) * (y2 - y1) def box_length(ann): x1, y1, x2, y2 = ann.box return max(x2 - x1, y2 - y1) box_areas = list(map(box_area, annotations)) box_lengths = list(map(box_length, annotations)) return struct(n=n, categories=categories, box_areas=box_areas, box_lengths=box_lengths, image_size=image.image_size)
def train_update(n, total): lr = schedule_lr(n / total, env.epoch, args) adjust_learning_rate(lr, env.optimizer) activity = struct(tag='train', epoch=env.epoch) send_command('progress', struct(activity=activity, progress=(n, total))) poll_command()
def decode_dataset(data): data = to_structs(data) config = data.config classes = [struct(id=int(k), **v) for k, v in config.classes.items()] images = filter_none([decode_image(i, config) for i in data.images]) images.sort(key=lambda image: image.start) return struct(classes=classes, images=images, config=config)
def default_parameters(parameters): defaults = {} for name, param in parameters.items(): if param.type == 'choice': defaults[name] = struct(choice=param.default, parameters=default_parameters( param.options[param.default])) else: defaults[name] = param.default return struct(**defaults)
def decode_obj(obj): tag, shape = split_tagged(obj.shape) if tag == 'box': return struct(label=obj.label, box=[*shape.lower, *shape.upper]) elif tag == 'circle': x, y, r = *shape.centre, shape.radius return struct(label=obj.label, box=[x - r, y - r, x + r, y + r]) else: # Ignore unsupported annotation for now return None
def read_log(file): entries = [to_structs(json.loads(line)) for line in open(file, mode="r")] steps = {} tags = {} for entry in entries: step = steps.get(entry.step) or {} step[entry.tag] = struct(value=entry.value, time=entry.time) tags[entry.tag] = True steps[entry.step] = step return struct (tags=tags.keys(), steps={i : Struct(step) for i, step in steps.items()})
def decode_action(action): if action.tag == 'undo': return struct(action = 'undo') elif action.tag == 'redo': return struct(action = 'redo') elif action.tag == 'threshold': return struct(action = 'threshold', value=action.contents) elif action.tag == 'close': return struct(action='submit') elif action.tag == 'edit': edit = action.contents if edit.tag == 'confirm_detection': return struct(action='confirm', ids = list(edit.contents.keys())) elif edit.tag == 'transform_parts': transform, ids = edit.contents s, t = transform return struct(action='transform', t = 'translate', ids = list(ids.keys())) elif edit.tag == 'add': return struct(action='add') elif edit.tag == 'delete_parts': ids = list(edit.contents.keys()) return struct(action='delete', ids = ids) elif edit.tag == 'clear_all': return struct(action='delete') elif edit.tag == 'set_class': class_id, ids = edit.contents return struct(action='set_class', ids = ids, class_id = class_id) else: assert False, "unknown edit type: " + edit.tag else: assert False, "unknown action type: " + action.tag
def extract_session(session, config): start = date.parse(session.time) detections = [] actions = [] detections = annotate.decode_detections(session.open.contents.instances, annotate.class_mapping(config)) \ if session.open.tag == "new" else empty_detections def previous(): return actions[-1] if len(actions) > 0 else None def previous_time(): return (actions[-1].time if len(actions) > 0 else 0) for (datestr, action) in session.history: t = date.parse(datestr) action = decode_action(action) prev = previous() if prev and action.action in ['transform', 'delete']: if prev.action == 'confirm' and prev.ids == action.ids: actions.pop() time = (t - start).total_seconds() duration = time - previous_time() actions.append(action._extend(time = time, duration = min(30, duration), real_duration = duration)) duration = sum (pluck('duration', actions)) end = actions[-1].time return struct(start = start, detections = detections, actions = actions, \ duration = duration, real_duration = end, type = session.open.tag, threshold=session.threshold)
def history_summary(history): summaries = image_summaries(history) totals = sum_list(summaries) n = len(history) summaries = transpose_structs(summaries) actions = transpose_structs([actions._subset('action', 'duration', 'real_duration') for actions in totals.actions]) actions_count = count_struct(actions.action, action_types) total_actions = sum(actions_count.values(), 0) return summaries, struct ( action_durations = stats(actions.duration), action_real_durations = stats(actions.real_duration), annotation_breaks = len([action.real_duration for action in totals.actions if action.real_duration > 60]), image_durations = stats(summaries.duration), n_actions = stats(summaries.n_actions), instances_image = stats(summaries.instances), correction_count = totals.correction_count, actions_count = totals.actions_count, total_minutes = totals.duration / 60, total_actions = total_actions, actions_minute = 60 * total_actions / totals.duration, instances_minute = 60 * totals.instances / totals.duration, actions_annotation = total_actions / totals.instances )
def detection(p): object_class = classes[p.label] return struct(shape=encode_shape(p.bbox.cpu() / scale, object_class), label=object_class.id, confidence=p.confidence.item(), match=int(p.match) if 'match' in p else None)
def loss(self, input_size, target, encoding, prediction): classification, location = prediction anchor_boxes = self.anchors(input_size) encoding = stack_tables( [anchor.encode(t, anchor_boxes, self.params) for t in target]) # target = tensors_to(encoding, device=prediction.location.device) class_loss = loss.class_loss(encoding.classification, classification, class_weights=self.class_weights) loc_loss = 0 if self.params.location_loss == "l1": loc_loss = loss.l1(encoding.location, location, encoding.classification) elif self.params.location_loss == "giou": bbox = anchor.decode( location, anchor_boxes.unsqueeze(0).expand(location.size())) loc_loss = loss.giou(encoding.location, bbox, encoding.classification) return struct(classification=class_loss / self.params.balance, location=loc_loss)
def create(args, dataset_args): num_classes = len(dataset_args.classes) num_boxes, box_sizes = anchor_sizes(args.first, args.depth, anchor_scale=args.anchor_scale, square=args.square) pyramid = feature_pyramid(backbone_name=args.backbone, features=args.features, \ first=args.first, depth=args.depth, decode_blocks=args.decode_blocks) model = RetinaNet(pyramid, num_boxes=num_boxes, num_classes=num_classes, shared=args.shared) assert args.location_loss in ["l1", "giou"] params = struct(crop_boxes=args.crop_boxes, match_thresholds=(args.neg_match, args.pos_match), top_anchors=args.top_anchors, location_loss=args.location_loss, balance=args.balance) class_weights = [c.get('weighting', 0.25) for c in dataset_args.classes] encoder = Encoder(args.first, box_sizes, class_weights=class_weights, params=params) return model, encoder
def decode_dataset(data): data = to_structs(data) config = data.config classes = [struct(id=int(k), **v) for k, v in config.classes.items()] images = {i.image_file: decode_image(i, config) for i in data.images} return config, DetectionDataset(classes=classes, images=images)
def add_noise(self, noise=0, offset=0): totals = struct(iou=0, n=0) def add_image_noise(image): nonlocal totals n = image.target._size centre, size = box.split(box.extents_form(image.target.bbox)) centre.add_(offset * size) if image.category == 'train': centre.add_(torch.randn(n, 2) * noise * size) size.mul_(torch.randn(n, 2) * noise + 1) noisy = box.point_form(torch.cat([centre, size], 1)) if image.category == 'train': totals += struct(iou=box.iou_matrix_matched( noisy, image.target.bbox).sum(), n=n) return image._extend(target=image.target._extend(bbox=noisy)) self.images = { k: add_image_noise(image) for k, image in self.images.items() } print("added noise, mean iou = ", totals.iou / totals.n) return self
def encode_shape(box, class_config): lower, upper = box[:2], box[2:] if class_config.shape == 'circle': centre = ((lower + upper) * 0.5).tolist() radius = ((upper - lower).sum().item() / 4) circle_shape = struct(centre=centre, radius=radius) return tagged('circle', circle_shape) elif class_config.shape == 'box': return tagged('box', struct(lower=lower.tolist(), upper=upper.tolist())) assert False, "unsupported shape config: " + class_config.shape
def encode_target(target, heatmap_size, num_classes, params): m = target.bbox.size(0) w, h = heatmap_size # sort by area, largest boxes first (and least priority) areas = box.area(target.bbox) areas, boxes_ind = torch.sort(areas, descending=True) heatmap = areas.new_zeros(num_classes, h, w) box_weight = areas.new_zeros(h, w) box_target = areas.new_zeros(h, w, 4) for (label, target_box) in zip(target.label[boxes_ind], target.bbox[boxes_ind]): assert label < num_classes extents = box.extents(target_box) area = extents.size.dot(extents.size) for gaussian, slices in clipped_gaussian(heatmap_size, extents, params.alpha): gaussian = gaussian.type_as(heatmap) local_heatmap = heatmap[label][slices] torch.max(gaussian, local_heatmap, out=local_heatmap) loc_weight = gaussian * (area.log() / gaussian.sum()) mask = loc_weight > box_weight[slices] box_target[slices][mask] = target_box box_weight[slices][mask] = loc_weight[mask] return struct(heatmap=heatmap.permute(1, 2, 0), box_target=box_target, box_weight=box_weight)
def compute(points): conf, iou, label = zip(*points) hist = np.histogram2d(np.array(conf), np.array(iou), bins=ranges)[0] / len(points) return struct(high_imprecise=hist[1, 0], low_precise=hist[0, 1], high_precise=hist[1, 1])
def evaluate_vis(model, encoder, data, nms_params, classes, args, debug_key=None, iou=0.5): with torch.no_grad(): result = evaluate.evaluate_image(model, data.image, encoder, device=device, nms_params=nms_params) print(shape(result)) target = data.target._map(Tensor.to, result.detections._device) matches = match_boxes(result.detections, target, threshold=iou) scores = mAP_matches(matches, target.label.size(0)) debug = encoder.debug(data.image, target, result.prediction, classes) return struct(image=data.image, file=data.file, id=data.id, image_size=data.image_size, matches=matches, target=data.target, detections=result.detections, stats=image_stats(data.image), mAP=scores.mAP, debug=debug[debug_key] if debug_key else None)
def image_stats(batch): assert (batch.dim() == 3 and batch.size(2) == 3) batch = batch.float().div_(255) flat = batch.view(-1, 3) return struct(mean=flat.mean(0).cpu(), std=flat.std(0).cpu())
def f(d): encoding = encoder.encode(d.image, d.target) return struct(image=d.image, encoding=encoding, target=d.target, lengths=len(d.target.label), id=d.id)
def read_training(logfile): log = read_log(logfile) epoch, AP = extract_key(get_entry(log, "validate"), 'AP') _, loss = extract_key(get_entry(log, "train/loss"), 'total') best = best_epoch('AP')(log) return struct(epoch=np.array(epoch), best_AP = best, AP=np.array(AP), loss=np.array(loss))
def iou_box(box1, box2): overlap = struct(lower=np.maximum(box1.lower, box2.lower), upper=np.minimum(box1.upper, box2.upper)) i = area(overlap) u = (area(box1) + area(box2) - i) return i / u
def add_noise(box, offset=0, noise=0): centre = box.centre + offset * box.size + np.random.normal(0, noise, 2) * box.size size = box.size * np.random.normal(1, noise, 2) noisy = struct(centre = centre, size = size) iou = iou_box(to_points(noisy), to_points(box)) return noisy, iou
def detection(p): object_class = classes[p.label] config = object_class.name t, box = encode_shape(p.bbox.cpu(), config) return struct(box=box, label=p.label, confidence=p.confidence.item(), match=p.match.item() if 'match' in p else None)
def load_state(model, info, strict=True): if strict: model.load_state_dict(info.state, strict=True) else: load_state_partial(model, info.state) return struct(model = model, thresholds=info.thresholds if 'thresholds' in info else None, score = info.score, epoch = info.epoch)
def plot_scales(figure_path): datasets = ["apples", "penguins", "scallops", "seals"] scales = [1,2,4,8] crops = [512, 768, 1024] colors = plt.get_cmap("tab10") styles = {512: ':', 1024:'-', 768:'--'} rows = [] for dataset in datasets: fig, ax = make_chart() for s, scale in enumerate(scales): for crop in crops: logfile = path.join(log_path, 'scales', str(scale), str(crop), dataset, 'log.json') log = read_log(logfile) epoch, AP = extract_key(get_entry(log, "validate"), 'AP') time = training_time(log) epoch = epoch[:40] AP = AP[:40] rows.append(struct(dataset=dataset, scale=1/scale, crop=crop, AP=np.array(AP[8:]).mean(), time=time[-1] / len(time))) plt.plot(epoch, AP, color=colors(s), linestyle=styles[crop], label= str(1/scale * 100) + ":" + str(crop) ) plt.xlabel("training epoch") plt.ylabel("average precision ($AP_{COCO}$)") plt.xlim(xmin=0) plt.ylim(ymin=0) plt.legend() fig.savefig(path.join(figure_path, "crops_scales", dataset + ".pdf"), bbox_inches='tight') df = pd.DataFrame(rows) aps = [] times = [] for dataset in datasets: d = df.loc[df['dataset'] == dataset] ap = d.pivot(columns='scale', index='crop', values='AP') time = d.pivot(columns='scale', index='crop', values='time') aps.append(ap/ap.max().max()) times.append(time.max().max()/time) print(sum(aps) / len(aps)) print(sum(times) / len(times))
def make_statistics(data, encoder, loss, prediction): stats = struct( error=sum(loss.values()), loss=loss._map(Tensor.item), size=data.image.size(0), instances=data.lengths.sum().item(), ) return stats
def initialise(config, dataset, args): data_root = config.root log_root = args.log_dir or data_root model_args = struct(dataset=struct(classes=dataset.classes, input_channels=3), model=args.model, version=2) run = 0 debug = struct(predictions=args.debug_predictions or args.debug_all, boxes=args.debug_boxes or args.debug_all) output_path, log = logger.make_experiment(log_root, args.run_name, load=not args.no_load, dry_run=args.dry_run) model_path = os.path.join(output_path, "model.pth") model, encoder = models.create(model_args.model, model_args.dataset) set_bn_momentum(model, args.bn_momentum) best, current, resumed = checkpoint.load_checkpoint( model_path, model, model_args, args) model, epoch = current.model, current.epoch + 1 pause_time = args.pause_epochs running_average = [] if epoch >= args.average_start else [] optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # optimizer = optim.Adam(parameters, lr=args.lr, weight_decay=args.weight_decay) device = torch.cuda.current_device() tests = args.tests.split(",") return struct(**locals())