def read_image_and_gt(img_files, gt_files, config):
    '''
    Transform images and send transformed image and label
    :param img_files: list of image files including the path of a batch
    :param gt_files: list of gt files including the path of a batch
    :param config: config dict containing various hyperparameters

    :return images and annotations
    '''

    labels = []
    bboxes = []
    deltas = []
    aidxs = []

    #loads annotations from file
    def load_annotation(gt_file):

        with open(gt_file, 'r') as f:
            lines = f.readlines()
        f.close()

        annotations = []

        #each line is an annotation bounding box
        for line in lines:
            obj = line.strip().split(' ')

            #get class, if class is not in listed, skip it
            try:
                cls = config.CLASS_TO_IDX[obj[0].lower().strip()]
                # print(cls)

                #get coordinates
                xmin = float(obj[4])
                ymin = float(obj[5])
                xmax = float(obj[6])
                ymax = float(obj[7])

                #check for valid bounding boxes
                assert xmin >= 0.0 and xmin <= xmax, \
                    'Invalid bounding box x-coord xmin {} or xmax {} at {}' \
                        .format(xmin, xmax, gt_file)
                assert ymin >= 0.0 and ymin <= ymax, \
                    'Invalid bounding box y-coord ymin {} or ymax {} at {}' \
                        .format(ymin, ymax, gt_file)

                #transform to  point + width and height representation
                x, y, w, h = bbox_transform_inv([xmin, ymin, xmax, ymax])

                annotations.append([x, y, w, h, cls])

            except:
                print(obj)
                continue
        return annotations

    #init tensor of images
    imgs = np.zeros((config.BATCH_SIZE, config.IMAGE_HEIGHT,
                     config.IMAGE_WIDTH, config.N_CHANNELS))

    img_idx = 0

    #iterate files
    for img_name, gt_name in zip(img_files, gt_files):

        #open img
        img = cv2.imread(img_name).astype(np.float32, copy=False)

        # scale image
        img = cv2.resize(img, (config.IMAGE_WIDTH, config.IMAGE_HEIGHT))

        #subtract means
        img = (img - np.mean(img)) / np.std(img)

        #store original height and width?
        orig_h, orig_w, _ = [float(v) for v in img.shape]

        #print(orig_h, orig_w)
        # load annotations
        annotations = load_annotation(gt_name)

        #split in classes and boxes
        labels_per_file = [a[4] for a in annotations]

        bboxes_per_file = np.array([a[0:4] for a in annotations])

        #dynamic Data Augmentation
        img = np.asarray(img)
        bboxes_per_file = [
            fix_bbox(*img.shape[:2], bbox) for bbox in bboxes_per_file
        ]
        #print(bboxes_per_file)
        annotations = {
            'image': img,
            'bboxes': bboxes_per_file,
            'labels': labels_per_file
        }
        augmented = _aug(config)(**annotations)
        img = augmented['image']
        bboxes_per_file = np.array(augmented['bboxes'])
        labels_per_file = augmented['labels']

        #and store
        imgs[img_idx] = img

        img_idx += 1

        # scale annotation
        x_scale = config.IMAGE_WIDTH / orig_w
        y_scale = config.IMAGE_HEIGHT / orig_h

        #scale boxes
        bboxes_per_file[:, 0::2] = bboxes_per_file[:, 0::2] * x_scale
        bboxes_per_file[:, 1::2] = bboxes_per_file[:, 1::2] * y_scale

        bboxes.append(bboxes_per_file)

        aidx_per_image, delta_per_image = [], []
        aidx_set = set()

        #iterate all bounding boxes for a file
        for i in range(len(bboxes_per_file)):

            #compute overlaps of bounding boxes and anchor boxes
            overlaps = batch_iou(config.ANCHOR_BOX, bboxes_per_file[i])

            #achor box index
            aidx = len(config.ANCHOR_BOX)

            #sort for biggest overlaps
            for ov_idx in np.argsort(overlaps)[::-1]:
                #when overlap is zero break
                if overlaps[ov_idx] <= 0:
                    break
                #if one is found add and break
                if ov_idx not in aidx_set:
                    aidx_set.add(ov_idx)
                    aidx = ov_idx
                    break

            # if the largest available overlap is 0, choose the anchor box with the one that has the
            # smallest square distance
            if aidx == len(config.ANCHOR_BOX):
                dist = np.sum(np.square(bboxes_per_file[i] -
                                        config.ANCHOR_BOX),
                              axis=1)
                for dist_idx in np.argsort(dist):
                    if dist_idx not in aidx_set:
                        aidx_set.add(dist_idx)
                        aidx = dist_idx
                        break

            #compute deltas for regression
            box_cx, box_cy, box_w, box_h = bboxes_per_file[i]
            delta = [0] * 4
            delta[0] = (box_cx - config.ANCHOR_BOX[aidx][0]
                        ) / config.ANCHOR_BOX[aidx][2]
            delta[1] = (box_cy - config.ANCHOR_BOX[aidx][1]
                        ) / config.ANCHOR_BOX[aidx][3]
            delta[2] = np.log(box_w / config.ANCHOR_BOX[aidx][2])
            delta[3] = np.log(box_h / config.ANCHOR_BOX[aidx][3])

            aidx_per_image.append(aidx)
            delta_per_image.append(delta)

        deltas.append(delta_per_image)
        aidxs.append(aidx_per_image)
        labels.append(labels_per_file)

    #we need to transform this batch annotations into a form we can feed into the model
    label_indices, bbox_indices, box_delta_values, mask_indices, box_values, \
          = [], [], [], [], []

    aidx_set = set()

    #iterate batch
    for i in range(len(labels)):
        #and annotations
        for j in range(len(labels[i])):
            if (i, aidxs[i][j]) not in aidx_set:
                aidx_set.add((i, aidxs[i][j]))
                label_indices.append([i, aidxs[i][j], labels[i][j]])
                mask_indices.append([i, aidxs[i][j]])
                bbox_indices.extend([[i, aidxs[i][j], k] for k in range(4)])
                box_delta_values.extend(deltas[i][j])
                box_values.extend(bboxes[i][j])

    #transform them into matrices
    input_mask = np.reshape(
        sparse_to_dense(mask_indices, [config.BATCH_SIZE, config.ANCHORS],
                        [1.0] * len(mask_indices)),
        [config.BATCH_SIZE, config.ANCHORS, 1])

    box_delta_input = sparse_to_dense(bbox_indices,
                                      [config.BATCH_SIZE, config.ANCHORS, 4],
                                      box_delta_values)

    box_input = sparse_to_dense(bbox_indices,
                                [config.BATCH_SIZE, config.ANCHORS, 4],
                                box_values)

    labels = sparse_to_dense(
        label_indices, [config.BATCH_SIZE, config.ANCHORS, config.CLASSES],
        [1.0] * len(label_indices))

    #concatenate ouputs
    Y = np.concatenate((input_mask, box_input, box_delta_input, labels),
                       axis=-1).astype(np.float32)

    return imgs, Y
def compute_statistics_for_thresholding(all_boxes, all_classes, all_scores,
                                        all_gts, config):
    """Compute tps, fps, fns, and other stuff for computing APs
    
    
    Arguments:
        all_boxes {[type]} -- list of predicted boxes
        all_classes {[type]} -- list of predicted classes
        all_scores {[type]} --list of predicted scores  
        all_gts {[type]} -- list of all y_trues
        config {[type]} -- squeezedet config
    
    Returns:
        [type] -- boxes_per_img , boxes_per_gt, np.stack(all_tps), np.stack(all_fps), np.stack(all_fns), is_gt, all_score_thresholds
    """

    boxes_per_img = []
    boxes_per_gt = []

    all_tps = []
    all_fps = []

    all_fns = []
    all_score_thresholds = [[] for c in range(config.CLASSES)]
    is_gt = [[] for c in range(config.CLASSES)]

    for i in range(len(all_boxes)):

        batch_gt = all_gts[i]

        batch_classes = all_classes[i]

        batch_scores = all_scores[i]

        box_input = batch_gt[:, :, 1:5]
        labels = batch_gt[:, :, 9:]

        for j in range(len(all_boxes[i])):

            boxes_per_img.append(len(all_boxes[i][j]))

            non_zero_idx = np.sum(box_input[j][:], axis=-1) > 0

            nonzero_gts = np.reshape(box_input[j][non_zero_idx], [-1, 4])

            boxes_per_gt.append(len(nonzero_gts))

            labels_per_image = labels[j]

            nonzero_labels = [
                tuple[0]
                for labels in labels_per_image[non_zero_idx, :].astype(int)
                for tuple in enumerate(labels) if tuple[1] == 1
            ]

            tp_per_image = np.zeros(config.CLASSES)
            fp_per_image = np.zeros(config.CLASSES)
            fn_per_image = np.zeros(config.CLASSES)

            assigned_idx = np.zeros_like(batch_classes[j])

            for k in range(len(nonzero_gts)):

                try:
                    ious = utils.batch_iou(np.stack(all_boxes[i][j]),
                                           nonzero_gts[k])

                    current_score = -1
                    current_idx = -1

                    for iou_index, iou in enumerate(ious):
                        if iou > config.IOU_THRESHOLD \
                        and batch_classes[j][iou_index] == nonzero_labels[k] \
                        and not assigned_idx[iou_index]\
                        and batch_scores[j][iou_index] > current_score:

                            current_score = batch_scores[j][iou_index]
                            current_idx = iou_index

                    if current_score < 0:
                        fn_per_image[nonzero_labels[k]] += 1

                        is_gt[nonzero_labels[k]].append(1)
                        all_score_thresholds[nonzero_labels[k]].append(0)
                    else:
                        tp_per_image[nonzero_labels[k]] += 1
                        assigned_idx[current_idx] = 1
                        is_gt[nonzero_labels[k]].append(1)
                        all_score_thresholds[nonzero_labels[k]].append(
                            current_score)

                except:

                    fn_per_image[nonzero_labels[k]] = len(nonzero_gts[k])

            for index, ai in enumerate(assigned_idx):

                if ai == 0:

                    fp_per_image[batch_classes[j][index]] += 1
                    is_gt[batch_classes[j][index]].append(0)
                    all_score_thresholds[batch_classes[j][index]].append(
                        batch_scores[j][index])

            all_tps.append(tp_per_image)
            all_fns.append(fn_per_image)
            all_fps.append(fp_per_image)

    return boxes_per_img, boxes_per_gt, np.stack(all_tps), np.stack(
        all_fps), np.stack(all_fns), is_gt, all_score_thresholds
def read_image_and_gt_with_original(img_files, gt_files, config):
    '''
    Transform images and send transformed image and label, but also return the image only resized
    :param img_files: list of image files including the path of a batch
    :param gt_files: list of gt files including the path of a batch
    :param config: config dict containing various hyperparameters

    :return images and annotations
    '''

    labels = []
    bboxes = []
    deltas = []
    aidxs = []

    #loads annotations from file
    def load_annotation(gt_file):

        with open(gt_file, 'r') as f:
            lines = f.readlines()
        f.close()

        annotations = []

        #each line is an annotation bounding box
        for line in lines:
            obj = line.strip().split(' ')

            #get class
            try:
                cls = config.CLASS_TO_IDX[obj[0].lower().strip()]
                # print cls

                #get coordinates
                xmin = float(obj[4])
                ymin = float(obj[5])
                xmax = float(obj[6])
                ymax = float(obj[7])

                #check for valid bounding boxes
                assert xmin >= 0.0 and xmin <= xmax, \
                    'Invalid bounding box x-coord xmin {} or xmax {} at {}' \
                        .format(xmin, xmax, gt_file)
                assert ymin >= 0.0 and ymin <= ymax, \
                    'Invalid bounding box y-coord ymin {} or ymax {} at {}' \
                        .format(ymin, ymax, gt_file)

                #transform to  point + width and height representation
                x, y, w, h = bbox_transform_inv([xmin, ymin, xmax, ymax])

                annotations.append([x, y, w, h, cls])
            except:
                continue
        return annotations

    imgs = np.zeros((config.BATCH_SIZE, config.IMAGE_HEIGHT,
                     config.IMAGE_WIDTH, config.N_CHANNELS))
    imgs_only_resized = np.zeros((config.BATCH_SIZE, config.IMAGE_HEIGHT,
                                  config.IMAGE_WIDTH, config.N_CHANNELS))

    img_idx = 0

    #iterate files
    for img_name, gt_name in zip(img_files, gt_files):

        #open img
        img = cv2.imread(img_name).astype(np.float32, copy=False)

        #store original height and width?
        orig_h, orig_w, _ = [float(v) for v in img.shape]

        # scale image
        img = cv2.resize(img, (config.IMAGE_WIDTH, config.IMAGE_HEIGHT))

        imgs_only_resized[img_idx] = img

        #subtract means
        img = (img - np.mean(img)) / np.std(img)

        #print(orig_h, orig_w)
        # load annotations
        annotations = load_annotation(gt_name)

        #split in classes and boxes
        labels_per_file = [a[4] for a in annotations]

        bboxes_per_file = np.array([a[0:4] for a in annotations])

        #TODO enable dynamic Data Augmentation
        """

        if config.DATA_AUGMENTATION:
            assert mc.DRIFT_X >= 0 and mc.DRIFT_Y > 0, \
                'mc.DRIFT_X and mc.DRIFT_Y must be >= 0'

            if mc.DRIFT_X > 0 or mc.DRIFT_Y > 0:
                # Ensures that gt boundibg box is not cutted out of the image
                max_drift_x = min(gt_bbox[:, 0] - gt_bbox[:, 2] / 2.0 + 1)
                max_drift_y = min(gt_bbox[:, 1] - gt_bbox[:, 3] / 2.0 + 1)
                assert max_drift_x >= 0 and max_drift_y >= 0, 'bbox out of image'

                dy = np.random.randint(-mc.DRIFT_Y, min(mc.DRIFT_Y + 1, max_drift_y))
                dx = np.random.randint(-mc.DRIFT_X, min(mc.DRIFT_X + 1, max_drift_x))

                # shift bbox
                gt_bbox[:, 0] = gt_bbox[:, 0] - dx
                gt_bbox[:, 1] = gt_bbox[:, 1] - dy

                # distort image
                orig_h -= dy
                orig_w -= dx
                orig_x, dist_x = max(dx, 0), max(-dx, 0)
                orig_y, dist_y = max(dy, 0), max(-dy, 0)

                distorted_im = np.zeros(
                    (int(orig_h), int(orig_w), 3)).astype(np.float32)
                distorted_im[dist_y:, dist_x:, :] = im[orig_y:, orig_x:, :]
                im = distorted_im

            # Flip image with 50% probability
            if np.random.randint(2) > 0.5:
                im = im[:, ::-1, :]
                gt_bbox[:, 0] = orig_w - 1 - gt_bbox[:, 0]


        """

        #and store
        imgs[img_idx] = np.asarray(img)
        #
        img_idx += 1

        # scale annotation
        x_scale = config.IMAGE_WIDTH / orig_w
        y_scale = config.IMAGE_HEIGHT / orig_h

        #scale boxes
        bboxes_per_file[:, 0::2] = bboxes_per_file[:, 0::2] * x_scale
        bboxes_per_file[:, 1::2] = bboxes_per_file[:, 1::2] * y_scale

        bboxes.append(bboxes_per_file)

        aidx_per_image, delta_per_image = [], []
        aidx_set = set()

        #iterate all bounding boxes for a file
        for i in range(len(bboxes_per_file)):

            #compute overlaps of bounding boxes and anchor boxes
            overlaps = batch_iou(config.ANCHOR_BOX, bboxes_per_file[i])

            #achor box index
            aidx = len(config.ANCHOR_BOX)

            #sort for biggest overlaps
            for ov_idx in np.argsort(overlaps)[::-1]:
                #when overlap is zero break
                if overlaps[ov_idx] <= 0:
                    break
                #if one is found add and break
                if ov_idx not in aidx_set:
                    aidx_set.add(ov_idx)
                    aidx = ov_idx
                    break

            # if the largest available overlap is 0, choose the anchor box with the one that has the
            # smallest square distance
            if aidx == len(config.ANCHOR_BOX):
                dist = np.sum(np.square(bboxes_per_file[i] -
                                        config.ANCHOR_BOX),
                              axis=1)
                for dist_idx in np.argsort(dist):
                    if dist_idx not in aidx_set:
                        aidx_set.add(dist_idx)
                        aidx = dist_idx
                        break

            #compute deltas for regression
            box_cx, box_cy, box_w, box_h = bboxes_per_file[i]
            delta = [0] * 4
            delta[0] = (box_cx - config.ANCHOR_BOX[aidx][0]
                        ) / config.ANCHOR_BOX[aidx][2]
            delta[1] = (box_cy - config.ANCHOR_BOX[aidx][1]
                        ) / config.ANCHOR_BOX[aidx][3]
            delta[2] = np.log(box_w / config.ANCHOR_BOX[aidx][2])
            delta[3] = np.log(box_h / config.ANCHOR_BOX[aidx][3])

            aidx_per_image.append(aidx)
            delta_per_image.append(delta)

        deltas.append(delta_per_image)
        aidxs.append(aidx_per_image)
        labels.append(labels_per_file)

    #print(labels)

    #we need to transform this batch annotations into a form we can feed into the model
    label_indices, bbox_indices, box_delta_values, mask_indices, box_values, \
          = [], [], [], [], []

    aidx_set = set()

    #iterate batch
    for i in range(len(labels)):
        #and annotations
        for j in range(len(labels[i])):
            if (i, aidxs[i][j]) not in aidx_set:
                aidx_set.add((i, aidxs[i][j]))
                label_indices.append([i, aidxs[i][j], labels[i][j]])
                mask_indices.append([i, aidxs[i][j]])
                bbox_indices.extend([[i, aidxs[i][j], k] for k in range(4)])
                box_delta_values.extend(deltas[i][j])
                box_values.extend(bboxes[i][j])

    #transform them into matrices
    input_mask = np.reshape(
        sparse_to_dense(mask_indices, [config.BATCH_SIZE, config.ANCHORS],
                        [1.0] * len(mask_indices)),
        [config.BATCH_SIZE, config.ANCHORS, 1])

    box_delta_input = sparse_to_dense(bbox_indices,
                                      [config.BATCH_SIZE, config.ANCHORS, 4],
                                      box_delta_values)

    box_input = sparse_to_dense(bbox_indices,
                                [config.BATCH_SIZE, config.ANCHORS, 4],
                                box_values)

    labels = sparse_to_dense(
        label_indices, [config.BATCH_SIZE, config.ANCHORS, config.CLASSES],
        [1.0] * len(label_indices))

    #concatenate ouputs
    Y = np.concatenate((input_mask, box_input, box_delta_input, labels),
                       axis=-1).astype(np.float32)

    return imgs, Y, imgs_only_resized
def compute_statistics_for_thresholding(all_boxes, all_classes, all_scores,
                                        all_gts, config):
    """Compute tps, fps, fns, and other stuff for computing APs
    
    
    Arguments:
        all_boxes {[type]} -- list of predicted boxes
        all_classes {[type]} -- list of predicted classes
        all_scores {[type]} --list of predicted scores  
        all_gts {[type]} -- list of all y_trues
        config {[type]} -- squeezedet config
    
    Returns:
        [type] -- boxes_per_img , boxes_per_gt, np.stack(all_tps), np.stack(all_fps), np.stack(all_fns), is_gt, all_score_thresholds
    """

    boxes_per_img = []
    boxes_per_gt = []

    all_tps = []
    all_fps = []

    all_fns = []
    all_score_thresholds = [[] for c in range(config.CLASSES)]
    is_gt = [[] for c in range(config.CLASSES)]

    #print(all_score_thresholds)

    #here we compute the false positives, false negatives and true positives of the network predictions
    #we cannot do everything in a numpy array as each image has a different number of filtered detections

    #iterate all batches
    for i in range(len(all_boxes)):

        batch_gt = all_gts[i]

        batch_classes = all_classes[i]

        batch_scores = all_scores[i]

        #shape is batch_size * achors * x
        box_input = batch_gt[:, :, 1:5]
        labels = batch_gt[:, :, 9:]

        #print(labels.shape)

        #iterate images per batch for image level analysis
        for j in range(len(all_boxes[i])):

            # add number of detections
            boxes_per_img.append(len(all_boxes[i][j]))

            #get index of non zero boxes
            non_zero_idx = np.sum(box_input[j][:], axis=-1) > 0

            #get non zero gt boxes
            nonzero_gts = np.reshape(box_input[j][non_zero_idx], [-1, 4])

            # add number of gt boxes
            boxes_per_gt.append(len(nonzero_gts))

            #get labels
            labels_per_image = labels[j]

            #get non zero labels
            nonzero_labels = [
                tuple[0]
                for labels in labels_per_image[non_zero_idx, :].astype(int)
                for tuple in enumerate(labels) if tuple[1] == 1
            ]

            #for every class count the true positives, false positives and false negatives
            tp_per_image = np.zeros(config.CLASSES)
            fp_per_image = np.zeros(config.CLASSES)
            fn_per_image = np.zeros(config.CLASSES)

            #print(batch_classes[j])

            #use this to check if predicted box has already been assigned to a different gt
            assigned_idx = np.zeros_like(batch_classes[j])

            # for every gt per image compute overlaps with detections
            for k in range(len(nonzero_gts)):

                try:
                    #get overlap between gt box and all predictions
                    ious = utils.batch_iou(np.stack(all_boxes[i][j]),
                                           nonzero_gts[k])

                    #use this to check for biggest score
                    current_score = -1
                    #index of best detection
                    current_idx = -1

                    #iterate all the ious
                    for iou_index, iou in enumerate(ious):

                        # check if iou is above threshold, if classes match,
                        # if it has not been assigned before and if the score is bigger than the current best score
                        # if all conditions are satisfied this marked as the current biggest detection
                        if iou > config.IOU_THRESHOLD \
                        and batch_classes[j][iou_index] == nonzero_labels[k] \
                        and not assigned_idx[iou_index]\
                        and batch_scores[j][iou_index] > current_score:

                            #update current score
                            current_score = batch_scores[j][iou_index]
                            #update idx of best
                            current_idx = iou_index

                    #if nothing was assigned to this box add a false negative
                    if current_score < 0:
                        fn_per_image[nonzero_labels[k]] += 1

                        #for mAP calc set this to a gt
                        is_gt[nonzero_labels[k]].append(1)
                        #append 0 as the score, as we did not detect it
                        all_score_thresholds[nonzero_labels[k]].append(0)
                    else:
                        #otherwise add a true positive for the corresponding class
                        tp_per_image[nonzero_labels[k]] += 1
                        # set to ignore assigned box
                        assigned_idx[current_idx] = 1
                        #append it as a gt
                        is_gt[nonzero_labels[k]].append(1)
                        #save threshold
                        all_score_thresholds[nonzero_labels[k]].append(
                            current_score)

                except:

                    fn_per_image[nonzero_labels[k]] = len(nonzero_gts[k])

            #calculate false positives, that is boxes that have not been assigned to a gt
            for index, ai in enumerate(assigned_idx):
                #if box has not been assigned

                if ai == 0:

                    #add a false positive to the corresponding class
                    fp_per_image[batch_classes[j][index]] += 1
                    #add this as a non gt
                    is_gt[batch_classes[j][index]].append(0)
                    #append the predicted score to the predicted class
                    all_score_thresholds[batch_classes[j][index]].append(
                        batch_scores[j][index])

            all_tps.append(tp_per_image)
            all_fns.append(fn_per_image)
            all_fps.append(fp_per_image)

    return boxes_per_img, boxes_per_gt, np.stack(all_tps), np.stack(
        all_fps), np.stack(all_fns), is_gt, all_score_thresholds