Beispiel #1
0
def main():
    cam = cv2.VideoCapture(0)

    densepose = DensePoseWrapper()
    sanitizer = Sanitizer()
    sanitizer.loadModel("./models/Sanitizer.pth")
    tracker = Tracker()
    uvMapper = UVMapper()
    descriptionExtractor = DescriptionExtractor()

    while True:
        # Get image from webcam
        return_value, image = cam.read()
        assert return_value, "Failed to read from web camera"

        # White balance the image to get better color features
        image = white_balance(image)
        debugImage = image.copy()

        # Send image to DensePose
        people = densepose.extract(image)
        debugImage = densepose.renderDebug(debugImage, people)

        # Refine DensePose output to get actual people
        people = sanitizer.extract(people)
        debugImage = sanitizer.renderDebug(debugImage, alpha=0.2)

        # Track the people (which modifies the people variables)
        tracker.extract(people, True)
        debugImage = tracker.renderDebug(debugImage, people)

        # Extract UV map for each person
        peopleMaps = uvMapper.extract(people, image)
        peopleTextures = uvMapper.getPeopleTexture(peopleMaps)
        for i in range(len(peopleTextures)):
            cv2.imshow("UV image " + str(i), peopleTextures[i])

        # Classify what the person is wearing
        clothes = descriptionExtractor.extract(peopleMaps)

        # Show image
        print("Show image")
        cv2.imshow("debug image", debugImage)

        # Quit on escape
        if cv2.waitKey(1) == 27:
            break

        print("")

    cv2.destroyAllWindows()
Beispiel #2
0
    def _init_training(self, learningRate, dataset, useDatabase):
        # Dataset is COCO
        print("Initiating training of Sanitizer MaskGenerator")
        print("Loading COCO")
        from pycocotools.coco import COCO
        from os import path

        annFile = topDir + '/annotations/instances_{}.json'.format(dataset)
        self.cocoPath = topDir + '/data/{}'.format(dataset)

        self.coco = COCO(annFile)
        self.personCatID = self.coco.getCatIds(catNms=['person'])[0]
        self.cocoImageIds = self.coco.getImgIds(catIds=self.personCatID)

        def is_not_crowd(imgId):
            annIds = self.coco.getAnnIds(imgIds=imgId,
                                         catIds=self.personCatID,
                                         iscrowd=False)
            annotation = self.coco.loadAnns(annIds)[0]
            return not annotation["iscrowd"]

        self.cocoImageIds = list(filter(is_not_crowd, self.cocoImageIds))
        self.cocoOnDisk = path.exists(self.cocoPath)

        print("Coco dataset size: {}".format(len(self.cocoImageIds)))
        print("Coco images found on disk:", self.cocoOnDisk)

        # Init LMDB_helper
        if useDatabase:
            self.lmdb = LMDBHelper("a")
            self.lmdb.verbose = False

        # Init loss function and optimizer
        self.optimizer = torch.optim.Adam(self.maskGenerator.parameters(),
                                          lr=learningRate,
                                          amsgrad=True)
        self.loss_function = torch.nn.BCELoss()

        # Init DensePose extractor
        self.denseposeExtractor = DensePoseWrapper()
    def _initTraining(self, learningRate, dataset, useDatabase):
        # Dataset is DeepFashion2
        print("Initiating training of DescriptionExtractor")
        print("Loading DeepFashion2")
        from torchvision import transforms
        from torchvision.datasets import CocoDetection

        self.annFile = topDir + '/annotations/deepfashion2_{}.json'.format(
            dataset)
        self.cocoImgPath = topDir + '/data/DeepFashion2/{}'.format(dataset)

        self.useDatabase = useDatabase
        self.dataset = CocoDetection(
            self.cocoImgPath,
            self.annFile,
            transform=transforms.Compose([
                transforms.ToTensor(),
                transforms.Lambda(lambda x: x.permute(1, 2, 0)),
                transforms.Lambda(lambda x: (x * 255).byte().numpy()),
                transforms.Lambda(lambda x: x[:, :, ::-1])
            ]))

        # Init LMDB_helper
        if useDatabase:
            self.lmdb = LMDBHelper("a")
            self.lmdb.verbose = False

        self.denseposeExtractor = DensePoseWrapper()
        self.sanitizer = Sanitizer()
        self.sanitizer.load_model(topDir + "/models/Sanitizer.pth")
        self.uvMapper = UVMapper()

        # PyTorch things
        self.optimizer = torch.optim.Adam(self.classifier.parameters(),
                                          lr=learningRate,
                                          amsgrad=True)
        self.lossFunction = torch.nn.BCEWithLogitsLoss()
    def _initTraining(self, learningRate, datasetName, useLMDB):
        self.datasetName = datasetName

        from DenseSense.algorithms.DensePoseWrapper import DensePoseWrapper
        from DenseSense.algorithms.Sanitizer import Sanitizer

        self.denseposeExtractor = DensePoseWrapper()
        self.sanitizer = Sanitizer()
        self.sanitizer.load_model(topDir + "/models/Sanitizer.pth")

        if datasetName in ActionClassifier.COCO_Datasets:
            print("Loading COCO dataset: " + datasetName)
            from pycocotools.coco import COCO
            from os import path

            annFile = topDir + '/annotations/instances_{}.json'.format(
                datasetName)
            self.cocoPath = topDir + '/data/{}'.format(datasetName)

            self.coco = COCO(annFile)
            personCatID = self.coco.getCatIds(catNms=['person'])[0]
            self.dataset = self.coco.getImgIds(catIds=personCatID)

        elif datasetName in ActionClassifier.AVA_Datasets:
            print("Loading AVA dataset: " + datasetName)
            import csv
            from collections import defaultdict
            from DenseSense.utils.YoutubeLoader import YoutubeLoader

            annFile = topDir + "/annotations/{}.csv".format(
                datasetName.replace("_predictive", ""))
            self.dataset = defaultdict(lambda: defaultdict(defaultdict))
            with open(annFile, 'r') as csvFile:
                reader = csv.reader(csvFile)
                for row in reader:
                    video, t, x1, y1, x2, y2, action, person = row
                    actions = {action}
                    if person in self.dataset[video][t]:
                        actions = actions.union(
                            self.dataset[video][t][person]["actions"])
                    self.dataset[video][t][person] = {
                        "bbox": (x1, y1, x2, y2),
                        "actions": actions
                    }

            ordered_data = []
            for key, video in self.dataset.items():
                ordered_data.append((key, []))
                for t, annotation in video.items():
                    ordered_data[-1][1].append((int(t), annotation))
                ordered_data[-1][1].sort(key=lambda x: x[0])

            self.dataset = ordered_data

            self.youtubeLoader = YoutubeLoader(verbose=False)
            for key, video in self.dataset:
                self.youtubeLoader.queue_video(key, video[0][0], video[-1][0])

            self.current_video_index = 0
            self.current_video_frame_index = 0

            self.tracker = Tracker()
        else:
            raise Exception("Unknown dataset")

        self.useLMDB = useLMDB
        if useLMDB:
            self.lmdb = LMDBHelper("a", max_size=1028 * 1028 * 1028 * 32)
            self.lmdb.verbose = False

        self.optimizer = torch.optim.Adam(self._AE_model.parameters(),
                                          lr=learningRate)
        self.loss_function = torch.nn.BCELoss()
class ActionClassifier(DenseSense.algorithms.Algorithm.Algorithm):
    actions = {
        4: "dance",
        11: "sit",
        14: "walk",
        69: "hand wave",
        12: "idle",  # stand
        17: "idle",  # carry/hold (an object)
        36: "idle",  # lift/pick up
        37: "idle",  # listen
        47: "idle",  # put down
    }

    COCO_Datasets = ["val2014", "train2014", "val2017", "train2017"]
    AVA_Datasets = [
        "ava_val", "ava_train", "ava_val_predictive", "ava_train_predictive"
    ]

    def __init__(self):
        print("Initiating ActionClassifier")
        super().__init__()

        self._modelPath = None
        self._AE_model = AutoEncoder()
        self._training = False

    def loadModel(self,
                  modelPath):  # TODO: load multiple models, refactor name
        self._modelPath = modelPath
        print("Loading ActionClassifier file from: " + self._modelPath)
        self._AE_model.load_state_dict(
            torch.load(self._modelPath, map_location=device))
        self._AE_model.to(device)

    def saveModel(self, modelPath):
        if modelPath is None:
            print("Don't know where to save model")
        self._modelPath = modelPath
        print("Saving ActionClassifier model to: " + self._modelPath)
        torch.save(self._AE_model.state_dict(), self._modelPath)

    def extract_ae(self, people, delta_time=None):
        S = _tensorify_people(people)

        if S.shape[0] == 0:
            return

        # Run prediction
        with torch.no_grad():
            embeddings = self._AE_model.encode(S, delta_time)

        # Add prediction to people
        for i, embedding in enumerate(embeddings):
            people[i].pose_vector = embedding.detach().cpu().numpy()

    def _initTraining(self, learningRate, datasetName, useLMDB):
        self.datasetName = datasetName

        from DenseSense.algorithms.DensePoseWrapper import DensePoseWrapper
        from DenseSense.algorithms.Sanitizer import Sanitizer

        self.denseposeExtractor = DensePoseWrapper()
        self.sanitizer = Sanitizer()
        self.sanitizer.load_model(topDir + "/models/Sanitizer.pth")

        if datasetName in ActionClassifier.COCO_Datasets:
            print("Loading COCO dataset: " + datasetName)
            from pycocotools.coco import COCO
            from os import path

            annFile = topDir + '/annotations/instances_{}.json'.format(
                datasetName)
            self.cocoPath = topDir + '/data/{}'.format(datasetName)

            self.coco = COCO(annFile)
            personCatID = self.coco.getCatIds(catNms=['person'])[0]
            self.dataset = self.coco.getImgIds(catIds=personCatID)

        elif datasetName in ActionClassifier.AVA_Datasets:
            print("Loading AVA dataset: " + datasetName)
            import csv
            from collections import defaultdict
            from DenseSense.utils.YoutubeLoader import YoutubeLoader

            annFile = topDir + "/annotations/{}.csv".format(
                datasetName.replace("_predictive", ""))
            self.dataset = defaultdict(lambda: defaultdict(defaultdict))
            with open(annFile, 'r') as csvFile:
                reader = csv.reader(csvFile)
                for row in reader:
                    video, t, x1, y1, x2, y2, action, person = row
                    actions = {action}
                    if person in self.dataset[video][t]:
                        actions = actions.union(
                            self.dataset[video][t][person]["actions"])
                    self.dataset[video][t][person] = {
                        "bbox": (x1, y1, x2, y2),
                        "actions": actions
                    }

            ordered_data = []
            for key, video in self.dataset.items():
                ordered_data.append((key, []))
                for t, annotation in video.items():
                    ordered_data[-1][1].append((int(t), annotation))
                ordered_data[-1][1].sort(key=lambda x: x[0])

            self.dataset = ordered_data

            self.youtubeLoader = YoutubeLoader(verbose=False)
            for key, video in self.dataset:
                self.youtubeLoader.queue_video(key, video[0][0], video[-1][0])

            self.current_video_index = 0
            self.current_video_frame_index = 0

            self.tracker = Tracker()
        else:
            raise Exception("Unknown dataset")

        self.useLMDB = useLMDB
        if useLMDB:
            self.lmdb = LMDBHelper("a", max_size=1028 * 1028 * 1028 * 32)
            self.lmdb.verbose = False

        self.optimizer = torch.optim.Adam(self._AE_model.parameters(),
                                          lr=learningRate)
        self.loss_function = torch.nn.BCELoss()

    def _load(self, index=None):  # Load next if index is None
        if self.datasetName in ActionClassifier.COCO_Datasets:
            people = None
            # Load image from disk and process
            cocoImage = self.coco.loadImgs(self.dataset[index])[0]
            if self.useLMDB:
                people = self.lmdb.get("DensePoseWrapper_Sanitized_Coco",
                                       str(cocoImage["id"]))

            if people is None:
                image = cv2.imread(self.cocoPath + "/" +
                                   cocoImage["file_name"])
                if image is None:
                    raise Exception("Could not find image: " + str(index))

                people = self.denseposeExtractor.extract(image)
                people = self.sanitizer.extract(people)
                if self.useLMDB:
                    self.lmdb.save("DensePoseWrapper_Sanitized_Coco",
                                   str(cocoImage["id"]), people)
            return people, cocoImage

        elif self.datasetName in ActionClassifier.AVA_Datasets:
            data = None
            image = None
            people, frame_time, is_last = None, None, False
            key = self.dataset[self.current_video_index][0]

            if self.useLMDB:
                data = self.lmdb.get(
                    "DensePoseWrapper_Sanitized_AVA",
                    str(key) + "_" + str(self.current_video_frame_index))

            if data is None:
                image, frame_time, is_last = self.youtubeLoader.get(
                    self.current_video_index, self.current_video_frame_index)
                if image is None:
                    people = []
                    frame_time = 0
                else:
                    people = self.denseposeExtractor.extract(image)
                    people = self.sanitizer.extract(people)

                if self.useLMDB:  # Save processed data
                    self.lmdb.save(
                        "DensePoseWrapper_Sanitized_AVA",
                        str(key) + "_" + str(self.current_video_frame_index),
                        (people, frame_time, is_last))

            else:
                people, frame_time, is_last = data

            timestamp = np.round(frame_time)
            ava_annotation = None

            sameTimestamp = [
                v[1] for v in self.dataset[self.current_video_index][1]
                if v[0] == timestamp
            ]
            if len(sameTimestamp) == 1:
                ava_annotation = sameTimestamp[0]

            # To show the whole dataset as it's being downloaded
            if image is not None and True:
                if ava_annotation is not None:
                    for k, p in ava_annotation.items():
                        bbox = np.array([
                            float(p["bbox"][0]),
                            float(p["bbox"][1]),
                            float(p["bbox"][2]),
                            float(p["bbox"][3])
                        ])
                        p1 = bbox[:2] * np.array(
                            [image.shape[1], image.shape[0]], dtype=np.float)
                        p2 = bbox[2:] * np.array(
                            [image.shape[1], image.shape[0]], dtype=np.float)
                        image = cv2.rectangle(image,
                                              tuple(p1.astype(np.int32)),
                                              tuple(p2.astype(np.int32)),
                                              (20, 20, 200), 1)
                cv2.imshow("frame", image)
                cv2.waitKey(1)

            # Change increment video and frame
            if is_last:
                self.current_video_frame_index = 0
                self.current_video_index += 1
                if len(self.dataset) == self.current_video_index:
                    self.current_video_index = 0
            else:
                self.current_video_frame_index += 1

            return people, frame_time, is_last, ava_annotation

    def trainAutoEncoder(self,
                         epochs=100,
                         learningRate=0.005,
                         dataset="Coco",
                         useLMDB=True,
                         printUpdateEvery=40,
                         visualize=0,
                         tensorboard=False):
        self._training = True
        self._initTraining(learningRate, dataset, useLMDB)

        # Tensorboard setup
        if tensorboard or type(tensorboard) == str:
            from torch.utils.tensorboard import SummaryWriter

            if type(tensorboard) == str:
                writer = SummaryWriter(topDir + "/data/tensorboard/" +
                                       tensorboard)
            else:
                writer = SummaryWriter(topDir + "/data/tensorboard/")
            tensorboard = True

        # Start the training process
        total_iterations = len(self.dataset)
        visualize_counter = 0
        open_windows = set()

        if self.datasetName in ActionClassifier.COCO_Datasets:
            print("Starting COCO dataset training")
            for epoch in range(epochs):
                epochLoss = np.float64(0)
                for i in range(total_iterations):
                    people, annotation = self._load(i)
                    S = _tensorify_people(people)

                    if S.shape[0] == 0:
                        continue

                    # Run prediction
                    embedding = self._AE_model.encode(S)
                    out = self._AE_model.decode(embedding)

                    # Optimize
                    lossSize = self.loss_function(out, S)

                    lossSize.backward()
                    self.optimizer.step()
                    self.optimizer.zero_grad()
                    lossSize = lossSize.cpu().item()

                    # Give feedback of training process
                    epochLoss += lossSize / total_iterations
                    visualize_counter += 1
                    if (i - 1) % printUpdateEvery == 0:
                        print("Iteration {} / {}, epoch {} / {}".format(
                            i, total_iterations, epoch, epochs))
                        print("Loss size: {}\n".format(lossSize /
                                                       printUpdateEvery))

                    if visualize != 0 and visualize <= visualize_counter:
                        visualize_counter = 0
                        new_open_windows = set()
                        for index, _ in enumerate(S):
                            inpS = (S[index, 0].detach()).cpu().to(
                                torch.float).numpy()
                            outS = (out[index, 0].detach()).cpu().to(
                                torch.float32).numpy()
                            emb = embedding.detach().cpu().numpy()
                            debug_image = self._get_ae_from_embedding(
                                index, inpS, emb, outS, None)
                            cv2.imshow("person " + str(index), debug_image)
                            new_open_windows.add("person " + str(index))
                            break  # Only show one person

                        for window in open_windows.difference(
                                new_open_windows):
                            cv2.destroyWindow(window)
                        open_windows = new_open_windows
                        cv2.waitKey(1)

                    if tensorboard:
                        absI = i + epoch * total_iterations
                        writer.add_scalar("Loss size", lossSize, absI)

                print("Finished epoch {} / {}. Loss size:".format(
                    epoch, epochs, epochLoss))
                self.saveModel(self._modelPath)

        elif self.datasetName in ActionClassifier.AVA_Datasets:
            # Unfortunately, needs to run through the whole AVA dataset to determine the size in frames
            print("Going through ava dataset once to determine the size")
            total_iterations = 0
            for video_i in range(len(self.dataset)):
                is_last = False
                while not is_last:
                    people, frame_time, is_last, annotation = self._load(
                    )  # Load next
                    total_iterations += 1
                    if (total_iterations - 1) % 500 == 0:
                        print("Frame/iteration {} (video {} / {})".format(
                            total_iterations, video_i, len(self.dataset)))
            print("Total number of iterations are {}".format(total_iterations))

            print("Starting AVA dataset training")
            last_frame_time = None
            last_people = []
            S_next = None
            current_video = 0
            was_last = False
            for epoch in range(epochs):
                epochLoss = np.float64(0)
                for i in range(total_iterations):
                    people, frame_time, is_last, annotation = self._load(
                    )  # Load next
                    current_video += is_last

                    if "predictive" in self.datasetName:
                        # Track the next frame
                        self.tracker.extract(people, time_now=frame_time)
                        if is_last:  # If new video next
                            self.tracker = Tracker()
                            last_frame_time = None

                        # Only save the people who exist in all frames
                        old_ids = list(map(lambda p: p.id, last_people))
                        new_ids = list(map(lambda p: p.id, people))

                        old_people = list(
                            filter(lambda p: p.id in new_ids,
                                   last_people.copy()))
                        new_people = list(
                            filter(lambda p: p.id in old_ids, people.copy()))

                        # Filter old Ss
                        S = _tensorify_people(old_people, True)
                        S_next = _tensorify_people(new_people, False)

                        last_people = people
                    else:
                        frame_time = last_frame_time
                        S = _tensorify_people(people)

                    if S.shape[0] == 0:
                        continue

                    delta_time = 0
                    if last_frame_time is not None and was_last is False:
                        delta_time = frame_time - last_frame_time
                    last_frame_time = frame_time

                    # Run prediction
                    embedding = self._AE_model.encode(S, delta_time)
                    out = self._AE_model.decode(embedding)

                    # Optimize
                    if "predictive" in self.datasetName:
                        lossSize = self.loss_function(out, S_next)
                    else:
                        lossSize = self.loss_function(out, S)

                    lossSize.backward()
                    self.optimizer.step()
                    self.optimizer.zero_grad()
                    lossSize = lossSize.cpu().item()

                    # Give feedback of training process
                    epochLoss += lossSize / total_iterations
                    visualize_counter += 1
                    was_last = is_last
                    if (i - 1) % printUpdateEvery == 0:
                        print("Iteration {} / {} (video {}/{}), epoch {} / {}".
                              format(i, total_iterations, current_video,
                                     len(self.dataset), epoch, epochs))
                        print("Loss size: {}\n".format(lossSize /
                                                       printUpdateEvery))

                    if visualize != 0 and visualize <= visualize_counter:
                        visualize_counter = 0
                        new_open_windows = set()
                        for index, _ in enumerate(S):
                            inpS = (S[index, 0].detach()).cpu().to(
                                torch.float).numpy()
                            outS = (out[index, 0].detach()).cpu().to(
                                torch.float32).numpy()
                            emb = embedding.detach().cpu().numpy()
                            debug_image = self._get_ae_from_embedding(
                                index, inpS, emb, outS, S_next)
                            cv2.imshow("person " + str(index), debug_image)
                            new_open_windows.add("person " + str(index))
                            break  # Only show one person

                        for window in open_windows.difference(
                                new_open_windows):
                            cv2.destroyWindow(window)
                        open_windows = new_open_windows
                        cv2.waitKey(1)

                    if tensorboard:
                        absI = i + epoch * total_iterations
                        writer.add_scalar("Loss size", lossSize, absI)

                print("Finished epoch {} / {}. Loss size:".format(
                    epoch, epochs, epochLoss))
                self.saveModel(self._modelPath)

    def _get_ae_from_embedding(self, index, S, embedding, out, S_next):
        S = (S * 255).astype(np.uint8)
        out = (out * 255).astype(np.uint8)
        emb = ((embedding[index] * 0.5 + 1.0) * 255).astype(np.uint8)
        emb = np.expand_dims(emb, axis=0)
        emb = np.repeat(emb, repeats=14, axis=0).T
        emb = np.repeat(emb, repeats=10, axis=0)
        emb = np.vstack((emb, np.zeros((56 - 5 * 10, 14), dtype=np.uint8)))
        comparison = np.hstack((S, emb, out))
        if S_next is not None:
            Sn = (S_next[index, 0].detach() * 255).cpu().to(
                torch.uint8).numpy()
            Sn = np.hstack((np.zeros((56, 56 + 14)), Sn))
            comparison = np.vstack((comparison, Sn)).astype(np.uint8)

        return cv2.applyColorMap(comparison, cv2.COLORMAP_JET)

    def get_ae_debug(self, people):
        combined = []
        for index, person in enumerate(people):
            embedding = torch.from_numpy(person.pose_vector).to(device)
            embedding = embedding.reshape((1, embedding.shape[0]))
            out = self._AE_model.decode(embedding)
            out = out[0, 0].detach().cpu().to(torch.float32).numpy()
            emb = embedding.detach().cpu().numpy()
            image = self._get_ae_from_embedding(0, person.S, emb, out, None)
            combined.append(image)
        return combined
Beispiel #6
0
def main():
    densepose = DensePoseWrapper()
    sanitizer = Sanitizer()
    sanitizer.load_model("./models/Sanitizer.pth")
    tracker = Tracker()
    uvMapper = UVMapper()
    descriptionExtractor = DescriptionExtractor()
    descriptionExtractor.loadModel("./models/DescriptionExtractor.pth")
    actionClassifier = ActionClassifier()
    actionClassifier.loadModel("./models/ActionClassifier_AutoEncoder.pth")

    cam = cv2.VideoCapture(0)
    frameIndex = 0
    frame_time = time.time()
    oldOpenWindows = set()

    while True:
        # Get image from webcam
        return_value, image = cam.read()
        assert return_value, "Failed to read from web camera"
        delta_time = time.time() - frame_time
        frame_time = time.time()

        # White balance the image to get better color features
        image = white_balance(image)
        debugImage = image.copy()

        # Send image to DensePose
        people = densepose.extract(image)
        debugImage = densepose.renderDebug(debugImage, people)

        print("DensePose people:", len(people))

        # Refine DensePose output to get actual people
        people = sanitizer.extract(people)
        debugImage = sanitizer.renderDebug(debugImage, people, alpha=0.2)
        print("Sanitizer people", len(people))

        # Track the people (which modifies the people variables)
        tracker.extract(people, True)
        debugImage = tracker.renderDebug(debugImage, people)
        print("Tracker people", len(people))

        # Extract UV map for each person
        peopleMaps = uvMapper.extract(people, image)
        peopleTextures = uvMapper.getPeopleTexture(peopleMaps)

        # Classify what the person is wearing
        clothes = descriptionExtractor.extract(peopleMaps)
        clothingImages = descriptionExtractor.getLabelImage()

        # Get pose embedding
        actionClassifier.extract_ae(people, delta_time)
        debugACAE = actionClassifier.get_ae_debug(people)

        # Per person window management
        newOpenWindows = set()
        for i, person in enumerate(people):
            # Show UV map and label
            S_ROI = (person.I * (255 / 25)).astype(np.uint8)
            S_ROI = cv2.applyColorMap(S_ROI, cv2.COLORMAP_PARULA)
            S_ROI = cv2.resize(S_ROI, (160, 160))
            personWindow = cv2.resize(peopleTextures[i],
                                      (int(5 / 3 * 160), 160))
            coloredSlice = np.zeros((160, 3, 3), dtype=np.uint8)
            coloredSlice[:, :] = person.color
            personWindow = np.hstack(
                (coloredSlice, S_ROI, personWindow, clothingImages[i]))

            # View window
            windowName = "UV image " + str(person.id)
            newOpenWindows.add(windowName)
            cv2.imshow(windowName, personWindow)
            cv2.resizeWindow(windowName, 600, 600)

            # ... and a window for ac ae
            windowName = "ActionClassifier_AutoEncoder image " + str(person.id)
            newOpenWindows.add(windowName)
            cv2.imshow(
                windowName,
                cv2.resize(
                    debugACAE[i],
                    (debugACAE[i].shape[1] * 3, debugACAE[i].shape[0] * 3)))

        for oldWindow in oldOpenWindows:
            if oldWindow not in newOpenWindows:
                cv2.destroyWindow(oldWindow)
        oldOpenWindows = newOpenWindows

        # Show image
        print("Show frame:", frameIndex, "\n")
        cv2.imshow("debug image", debugImage)
        frameIndex += 1

        # Quit on escape
        if cv2.waitKey(1) == 27:
            break

    cv2.destroyAllWindows()
class DescriptionExtractor(DenseSense.algorithms.Algorithm.Algorithm):
    iteration = 0

    availableLabels = {
        0: "none",
        1: "short sleeve top",
        2: "long sleeve top",
        3: "short sleeve outwear",
        4: "long sleeve outwear",
        5: "vest",
        6: "sling",
        7: "shorts",
        8: "trousers",
        9: "skirt",
        10: "short sleeve dress",
        11: "long sleeve dress",
        12: "dress vest",
        13: "sling dress"
    }

    #  0 : none
    #  1 : trousers
    #  2 : R hand
    #  3 : L hand
    #  4 : R foot
    #  5 : L foot
    #  6 : R thigh
    #  7 : L thigh
    #  8 : R calf
    #  9 : L calf
    # 10 : L upper arm
    # 11 : R upper arm
    # 12 : L lower arm
    # 13 : R lower arm
    # 14 : head

    labelColorCheck = {
        0: [],
        1: [1, 10, 11],
        2: [1, 10, 11, 12, 13],
        3: [1, 10, 11],
        4: [1, 10, 11, 12, 13],
        5: [1, 10, 11],
        6: [1, 10, 11],
        7: [6, 7],
        8: [6, 7, 8, 9],
        9: [6, 7],
        10: [1, 10, 11],
        11: [1, 10, 11, 12, 13],
        12: [1, 10, 11],
        13: [1, 10, 11]
    }

    colors = [((255, 255, 255), "white"), ((210, 209, 218), "white"),
              ((145, 164, 164), "white"), ((169, 144, 135), "white"),
              ((197, 175, 177), "white"), ((117, 126, 115), "white"),
              ((124, 126, 129), "white"), ((0, 0, 0), "black"),
              ((10, 10, 10), "black"), ((1, 6, 9), "black"),
              ((5, 10, 6), "black"), ((18, 15, 11), "black"),
              ((18, 22, 9), "black"), ((16, 16, 14), "black"),
              ((153, 153, 0), "yellow"), ((144, 115, 99), "pink"),
              ((207, 185, 174), "pink"), ((206, 191, 131), "pink"),
              ((208, 179, 54), "pink"), ((202, 19, 43), "red"),
              ((206, 28, 50), "red"), ((82, 30, 26), "red"),
              ((156, 47, 35), "orange"), ((126, 78, 47), "wine red"),
              ((74, 72, 77), "green"), ((31, 38, 38), "green"),
              ((40, 52, 79), "green"), ((100, 82, 116), "green"),
              ((8, 17, 55), "green"), ((29, 31, 37), "dark green"),
              ((46, 46, 36), "blue"), ((29, 78, 60), "blue"),
              ((74, 97, 85), "blue"), ((60, 68, 67), "blue"),
              ((181, 195, 232), "neon blue"), ((40, 148, 184), "bright blue"),
              ((210, 40, 69), "orange"), ((66, 61, 52), "gray"),
              ((154, 120, 147), "gray"), ((124, 100, 86), "gray"),
              ((46, 55, 46), "gray"), ((119, 117, 122), "gray"),
              ((88, 62, 62), "brown"), ((60, 29, 17), "brown"),
              ((153, 50, 204), "purple"), ((77, 69, 30), "purple"),
              ((153, 91, 14), "violet"), ((207, 185, 151), "beige")]

    colorsHSV = None

    class Network(nn.Module):
        def __init__(self, labels):  # FIXME: make this work!
            super(DescriptionExtractor.Network, self).__init__()
            self.layer1 = nn.Sequential(  # Fixme: 3x15 in channels
                nn.Conv2d(in_channels=3 * 15,
                          out_channels=15,
                          kernel_size=3,
                          stride=1,
                          padding=1), nn.ReLU(),
                nn.MaxPool2d(kernel_size=4, stride=2, padding=0))
            self.layer2 = nn.Sequential(
                nn.Conv2d(in_channels=15,
                          out_channels=10,
                          kernel_size=3,
                          stride=1,
                          padding=1), nn.ReLU(),
                nn.MaxPool2d(kernel_size=4, stride=2, padding=0))

            self.fc1 = nn.Linear(360, 180)
            self.relu1 = nn.ReLU(inplace=False)
            self.fc2 = nn.Linear(180, labels)

        def forward(self, x):
            batchSize = x.shape[0]
            x = x.view(batchSize, 15 * 3, 32, 32)
            x = self.layer1(x)
            x = self.layer2(x)
            x = x.view(batchSize, -1)
            x = self.fc1(x)
            x = self.relu1(x)
            x = self.fc2(x)
            return x

    def __init__(self, model=None, db=None):
        print("Initiating DescriptionExtractor")
        super().__init__()

        self.classifier = DescriptionExtractor.Network(
            len(self.availableLabels))
        self.modelPath = None
        self._training = False
        self.predictions = []
        self.peopleLabels = []

        # Init color lookup KD-tree
        self.colorsHSV = []
        for c in self.colors:
            RGBobj = sRGBColor(c[0][0], c[0][1], c[0][2])
            self.colorsHSV.append(convert_color(RGBobj, HSVColor))

    def loadModel(self, modelPath):
        self.modelPath = modelPath
        print("Loading DescriptionExtractor file from: " + self.modelPath)
        self.classifier.load_state_dict(
            torch.load(self.modelPath, map_location=device))
        self.classifier.to(device)

    def saveModel(self, modelPath):
        if modelPath is None:
            print("Don't know where to save model")
        self.modelPath = modelPath
        print("Saving DescriptionExtractor model to: " + self.modelPath)
        torch.save(self.classifier.state_dict(), self.modelPath)

    def _initTraining(self, learningRate, dataset, useDatabase):
        # Dataset is DeepFashion2
        print("Initiating training of DescriptionExtractor")
        print("Loading DeepFashion2")
        from torchvision import transforms
        from torchvision.datasets import CocoDetection

        self.annFile = topDir + '/annotations/deepfashion2_{}.json'.format(
            dataset)
        self.cocoImgPath = topDir + '/data/DeepFashion2/{}'.format(dataset)

        self.useDatabase = useDatabase
        self.dataset = CocoDetection(
            self.cocoImgPath,
            self.annFile,
            transform=transforms.Compose([
                transforms.ToTensor(),
                transforms.Lambda(lambda x: x.permute(1, 2, 0)),
                transforms.Lambda(lambda x: (x * 255).byte().numpy()),
                transforms.Lambda(lambda x: x[:, :, ::-1])
            ]))

        # Init LMDB_helper
        if useDatabase:
            self.lmdb = LMDBHelper("a")
            self.lmdb.verbose = False

        self.denseposeExtractor = DensePoseWrapper()
        self.sanitizer = Sanitizer()
        self.sanitizer.load_model(topDir + "/models/Sanitizer.pth")
        self.uvMapper = UVMapper()

        # PyTorch things
        self.optimizer = torch.optim.Adam(self.classifier.parameters(),
                                          lr=learningRate,
                                          amsgrad=True)
        self.lossFunction = torch.nn.BCEWithLogitsLoss()

    def extract(self, peopleMaps):
        if len(peopleMaps) == 0:
            return []
        self.peopleLabels = []
        determineColorThreshold = 0.3  # FIXME: tune

        # Do label classification
        peopleMapsDevice = torch.Tensor(peopleMaps).to(device)
        self.predictions = self.classifier.forward(peopleMapsDevice)
        self.predictions = self.predictions.sigmoid()
        self.predictions = self.predictions.detach().cpu().numpy()

        # Compile predictions into nice dictionary
        for personIndex, prediction in enumerate(self.predictions):
            labels = {}
            # Some labels might use same areas for determining color
            # This is therefore a lookup table in case value has already been computed
            averages = np.full((peopleMaps.shape[1], 3), -1, dtype=np.int64)
            for i, value in enumerate(prediction):
                if i == 0:  # 0 is None, and not trained on anyways
                    continue
                label = self.availableLabels[i]

                info = {"activation": value}
                if determineColorThreshold < value:
                    # If certainty is above threshold, take the time to calculate the average color
                    averageOfAreas = np.zeros(3, dtype=np.int64)
                    relevantAreas = torch.as_tensor(
                        self.labelColorCheck[i], dtype=torch.int64).to(device)
                    nonBlackAreas = 0
                    for areaIndex in relevantAreas:
                        if (averages[areaIndex] == -1).all():
                            # Calculate average
                            relevantPixels = peopleMapsDevice[personIndex,
                                                              areaIndex, :, :]
                            relevantPixels = relevantPixels[
                                torch.sum(relevantPixels, axis=2) != 0]
                            if relevantPixels.shape[0] == 0:
                                # All black
                                averages[areaIndex] = np.zeros(3)
                                continue
                            average = relevantPixels.mean(
                                axis=0).cpu().numpy().astype(np.uint8)
                            averages[areaIndex] = average

                        nonBlackAreas += 1
                        averageOfAreas += averages[areaIndex]

                    averageOfAreas = (averageOfAreas /
                                      float(nonBlackAreas)).astype(np.uint8)
                    info.update(self._findColorName(averageOfAreas))

                labels[label] = info

            self.peopleLabels.append(labels)

        return self.peopleLabels

    def train(self,
              epochs=100,
              learningRate=0.005,
              dataset="Coco",
              useDatabase=True,
              printUpdateEvery=40,
              visualize=False,
              tensorboard=False):
        self._training = True
        self._initTraining(learningRate, dataset, useDatabase)

        # Deal with tensorboard
        if tensorboard or type(tensorboard) == str:
            from torch.utils.tensorboard import SummaryWriter

            if type(tensorboard) == str:
                writer = SummaryWriter(topDir + "/data/tensorboard/" +
                                       tensorboard)
            else:
                writer = SummaryWriter(topDir + "/data/tensorboard/")
            tensorboard = True

        def findBestROI(ROIs, label):
            bestMatch = 0
            bestIndex = -1
            for i, ROI in enumerate(ROIs):
                lbox = np.array(label["bbox"])
                larea = lbox[2:] - lbox[:2]
                larea = larea[0] * larea[1]
                rbox = ROI.bounds
                rarea = rbox[2:] - rbox[:2]
                rarea = rarea[0] * rarea[1]

                SI = np.maximum(0, np.minimum(lbox[2], rbox[2]) - np.maximum(lbox[0], rbox[0])) * \
                     np.maximum(0, np.minimum(lbox[3], rbox[3]) - np.maximum(lbox[1], rbox[1]))
                SU = larea + rarea - SI
                overlap = SI / SU
                if bestMatch < overlap and SU != 0:
                    bestMatch = overlap
                    bestIndex = i
            return bestIndex

        Iterations = len(self.dataset)

        print("Starting training")
        for epoch in range(epochs):
            epochLoss = np.float64(0)
            for i in range(Iterations):
                ROIs, peopleTextures, labels = self._load(i)

                # Figure out what ROI belongs to what label
                groundtruth = np.zeros((len(ROIs), 14), dtype=np.float32)
                for label in labels:
                    mostMatching = findBestROI(ROIs, label)
                    if mostMatching != -1:
                        groundtruth[mostMatching][label["category_id"]] = 1

                # Most items in this dataset will be bypassed because no people were found or overlapping with gt
                if len(ROIs) == 0 or not np.any(groundtruth != 0):
                    continue

                groundtruth = torch.from_numpy(groundtruth).to(device)

                # Apply noise to peopleTextures
                noise = np.random.randn(*peopleTextures.shape) * 5
                peopleTextures = peopleTextures.astype(
                    np.int32) + noise.astype(np.int32)
                peopleTextures = np.clip(peopleTextures, 0, 255)
                peopleTextures = peopleTextures.astype(np.uint8)

                peopleTextures = torch.Tensor(peopleTextures).to(device)
                predictions = self.classifier.forward(peopleTextures)
                print(groundtruth)
                print(predictions)
                print("\n")

                lossSize = self.lossFunction(predictions, groundtruth)
                lossSize.backward()
                self.optimizer.step()
                self.optimizer.zero_grad()
                lossSize = lossSize.cpu().item()

                epochLoss += lossSize / Iterations
                if (i - 1) % printUpdateEvery == 0:
                    print("Iteration {} / {}, epoch {} / {}".format(
                        i, Iterations, epoch, epochs))
                    print("Loss size: {}\n".format(lossSize /
                                                   printUpdateEvery))

                if tensorboard:
                    absI = i + epoch * Iterations
                    writer.add_scalar("Loss size", lossSize, absI)

            print("Finished epoch {} / {}. Loss size:".format(
                epoch, epochs, epochLoss))
            self.saveModel(self.modelPath)

        self._training = False

    def getLabelImage(self):
        images = []
        for personLabel in self.peopleLabels:
            # Sort labels by score
            labels = sorted(list(personLabel.items()),
                            key=lambda x: x[1]["activation"],
                            reverse=True)

            # Create image
            image = np.zeros((160, 210, 3))
            for i, label in enumerate(labels):
                name, classification = label
                text = "{0:4d}%   {1}".format(
                    int(classification["activation"] * 100), name)
                color = (255, 255, 255)
                if classification[
                        "activation"] < 0.75:  # FIXME: magic number, tune
                    color = (128, 128, 128)
                image = cv2.putText(image, text, (0, 12 + 12 * i),
                                    cv2.FONT_HERSHEY_DUPLEX, .3, color, 1,
                                    cv2.LINE_AA)

                # Add color
                if "bestMatch" in classification:
                    colorText = classification["bestMatch"][1]
                    colorTextColor = classification["color"]
                    colorTextColor = (int(colorTextColor[0]),
                                      int(colorTextColor[1]),
                                      int(colorTextColor[2]))
                    image = cv2.putText(image, colorText, (150, 12 + 12 * i),
                                        cv2.FONT_HERSHEY_DUPLEX, .3,
                                        colorTextColor, 1, cv2.LINE_AA)

            images.append(image.astype(np.uint8))

        return images

    def _load(self, index):
        cocoImage = self.dataset[index]
        ROIs = None
        if self.useDatabase:
            ROIs = self.lmdb.get("DensePoseWrapper_Sanitized_deepfashion2",
                                 str(cocoImage["id"]))
        if ROIs is None:
            ROIs = self.denseposeExtractor.extract(cocoImage[0])
            ROIs = self.sanitizer.extract(ROIs)
            if self.useDatabase:
                self.lmdb.save("DensePoseWrapper_Sanitized_deepfashion2",
                               str(cocoImage["id"]), ROIs)

        peopleTextures = None
        if self.useDatabase:
            peopleTextures = self.lmdb.get("UVMapper_deepfashion2", str(index))
        if peopleTextures is None:
            peopleTextures = self.uvMapper.extract(ROIs, cocoImage[0])
            if self.useDatabase:
                self.lmdb.save("UVMapper_deepfashion2", str(index),
                               peopleTextures)

        return ROIs, peopleTextures, cocoImage[1]

    def _findColorName(self, color):
        b = color[0]
        g = color[1]
        r = color[2]

        # This prints the color colored in the terminal
        colorRepr = '\033[{};2;{};{};{}m'.format(38, r, g, b) \
                    + "rgb("+str(r)+", "+str(g)+", "+str(b)+")"+'\033[0m'

        # Get nearest color name
        HSVobj = convert_color(sRGBColor(r, g, b), HSVColor)

        nearestIndex = -1
        diffMin = 100000
        for i in range(len(self.colorsHSV)):
            colEntry = self.colorsHSV[i]

            d = HSVobj.hsv_h - colEntry.hsv_h
            dh = min(abs(d), 360 - abs(d)) / 180.0
            ds = abs(HSVobj.hsv_s - colEntry.hsv_s)
            dv = abs(HSVobj.hsv_v - colEntry.hsv_v) / 255.0
            diff = np.sqrt(dh * dh + ds * ds + dv * dv)
            if diff < diffMin:
                diffMin = diff
                nearestIndex = i

        return {
            "color": tuple(color),
            "colorDistance": diffMin,
            "coloredStr": colorRepr,
            "bestMatch": self.colors[nearestIndex]
        }
class DescriptionExtractor(DenseSense.algorithms.Algorithm.Algorithm):
    iteration = 0

    availableLabels = {
        0: "none",
        1: "short sleeve top",
        2: "long sleeve top",
        3: "short sleeve outwear",
        4: "long sleeve outwear",
        5: "vest",
        6: "sling",
        7: "shorts",
        8: "trousers",
        9: "skirt",
        10: "short sleeve dress",
        11: "long sleeve dress",
        12: "dress vest",
        13: "sling dress"
    }

    # FIXME: change because now using S
    labelBodyparts = {  # https://github.com/facebookresearch/DensePose/issues/64#issuecomment-405608749 PRAISE
        "boots": [5, 6],
        "footwear": [5, 6],
        "outer": [1, 2, 15, 17, 16, 18, 19, 21, 20, 22],
        "dress": [1, 2],
        "sunglasses": [],
        "pants": [7, 9, 8, 10, 11, 13, 12, 14],
        "top": [1, 2],
        "shorts": [7, 9, 8, 10],
        "skirt": [1, 2],
        "headwear": [23, 24],
        "scarfAndTie": []
    }

    colors = [  # TODO: use color model file
        ((255, 255, 255), "white"), ((210, 209, 218), "white"),
        ((145, 164, 164), "white"), ((169, 144, 135), "white"),
        ((197, 175, 177), "white"), ((117, 126, 115), "white"),
        ((124, 126, 129), "white"), ((0, 0, 0), "black"),
        ((10, 10, 10), "black"), ((1, 6, 9), "black"), ((5, 10, 6), "black"),
        ((18, 15, 11), "black"), ((18, 22, 9), "black"),
        ((16, 16, 14), "black"), ((153, 153, 0), "yellow"),
        ((144, 115, 99), "pink"), ((207, 185, 174), "pink"),
        ((206, 191, 131), "pink"), ((208, 179, 54), "pink"),
        ((202, 19, 43), "red"), ((206, 28, 50), "red"), ((82, 30, 26), "red"),
        ((156, 47, 35), "orange"), ((126, 78, 47), "wine red"),
        ((74, 72, 77), "green"), ((31, 38, 38), "green"),
        ((40, 52, 79), "green"), ((100, 82, 116), "green"),
        ((8, 17, 55), "green"), ((29, 31, 37), "dark green"),
        ((46, 46, 36), "blue"), ((29, 78, 60), "blue"), ((74, 97, 85), "blue"),
        ((60, 68, 67), "blue"), ((181, 195, 232), "neon blue"),
        ((40, 148, 184), "bright blue"), ((210, 40, 69), "orange"),
        ((66, 61, 52), "gray"), ((154, 120, 147), "gray"),
        ((124, 100, 86), "gray"), ((46, 55, 46), "gray"),
        ((119, 117, 122), "gray"), ((88, 62, 62), "brown"),
        ((60, 29, 17), "brown"), ((153, 50, 204), "purple"),
        ((77, 69, 30), "purple"), ((153, 91, 14), "violet"),
        ((207, 185, 151), "beige")
    ]

    colorsHSV = None

    class Network(nn.Module):
        def __init__(self, labels):  # FIXME: make this work!
            super(DescriptionExtractor.Network, self).__init__()
            self.layer1 = nn.Sequential(  # Fixme: 3x15 in channels
                nn.Conv2d(in_channels=3 * 15,
                          out_channels=15,
                          kernel_size=3,
                          stride=1,
                          padding=1), nn.ReLU(),
                nn.MaxPool2d(kernel_size=4, stride=2, padding=0))
            self.layer2 = nn.Sequential(
                nn.Conv2d(in_channels=15,
                          out_channels=10,
                          kernel_size=3,
                          stride=1,
                          padding=1), nn.ReLU(),
                nn.MaxPool2d(kernel_size=4, stride=2, padding=0))

            self.fc1 = nn.Linear(360, 180)
            self.relu1 = nn.ReLU(inplace=False)
            self.fc2 = nn.Linear(180, labels)
            self.softmax = nn.Softmax()

        def forward(self, x):
            batchSize = x.shape[0]
            x = x.view(batchSize, 15 * 3, 32, 32)
            x = self.layer1(x)
            x = self.layer2(x)
            x = x.view(batchSize, -1)
            x = self.fc1(x)
            x = self.relu1(x)
            x = self.fc2(x)
            #x = self.softmax(x)
            return x

    def __init__(self, model=None, db=None):
        print("Initiating DescriptionExtractor")
        super().__init__()

        self.classifier = DescriptionExtractor.Network(
            len(self.availableLabels))
        self.modelPath = None
        self._training = False

        # Init color lookup KD-tree
        self.colorsHSV = []
        for c in self.colors:
            RGBobj = sRGBColor(c[0][0], c[0][1], c[0][2])
            self.colorsHSV.append(convert_color(RGBobj, HSVColor))

    def loadModel(self, modelPath):
        self.modelPath = modelPath
        print("Loading DescriptionExtractor file from: " + self.modelPath)
        self.classifier.load_state_dict(
            torch.load(self.modelPath, map_location=device))
        self.classifier.to(device)

    def saveModel(self, modelPath):
        if modelPath is None:
            print("Don't know where to save model")
        self.modelPath = modelPath
        print("Saving DescriptionExtractor model to: " + self.modelPath)
        torch.save(self.classifier.state_dict(), self.modelPath)

    def _initTraining(self, learningRate, dataset, useDatabase):
        # Dataset is DeepFashion2
        print("Initiating training of DescriptionExtractor")
        print("Loading DeepFashion2")
        from torchvision import transforms
        from torchvision.datasets import CocoDetection

        self.annFile = './annotations/deepfashion2_{}.json'.format(dataset)
        self.cocoImgPath = './data/DeepFashion2/{}'.format(dataset)

        self.useDatabase = useDatabase
        self.dataset = CocoDetection(
            self.cocoImgPath,
            self.annFile,
            transform=transforms.Compose([
                transforms.ToTensor(),
                transforms.Lambda(lambda x: x.permute(1, 2, 0)),
                transforms.Lambda(lambda x: (x * 255).byte().numpy()),
                transforms.Lambda(lambda x: x[:, :, ::-1])
            ]))

        # Init LMDB_helper
        if useDatabase:
            self.lmdb = LMDBHelper("a")
            self.lmdb.verbose = False

        self.denseposeExtractor = DensePoseWrapper()
        self.sanitizer = Sanitizer()
        self.sanitizer.loadModel("./models/Sanitizer.pth")
        self.uvMapper = UVMapper()

        # PyTorch things
        self.optimizer = torch.optim.Adam(self.classifier.parameters(),
                                          lr=learningRate,
                                          amsgrad=True)
        self.lossFunction = torch.nn.BCEWithLogitsLoss()

    def extract(self, peopleMaps):
        labelsPeople = []
        # Do label classification
        for personMap in peopleMaps:
            # Run the classification on it
            pyTorchTexture = torch.from_numpy(
                np.array([np.moveaxis(personTexture / 255.0, -1, 0)])).float()

            pyTorchTexture = pyTorchTexture.to(device)  # FIXME: Do in model
            labelVector = self.net(pyTorchTexture)[0]

            # Store the data
            labelVectorHost = labelVector.detach().cpu().numpy()
            labels = {}
            for j in range(len(labelVector)):
                label = self.availableLabels.values()[j]
                d = (self.onActivation - self.noActivation) / 2
                val = (labelVectorHost[j] - d) / d + 0.5

                info = {"activation": min(max(val, 0.0), 1.0)}
                if 0.7 < val:
                    color = self._findColorName(personTexture,
                                                self.labelBodyparts[label])
                    if color != 0:
                        info.update(color)
                        # print(color["color"]+"  "+color["coloredStr"])
                labels[label] = info

            labelsPeople.append(labels)
        return labelsPeople

    def train(self,
              epochs=100,
              learningRate=0.005,
              dataset="Coco",
              useDatabase=True,
              printUpdateEvery=40,
              visualize=False,
              tensorboard=False):
        self._training = True
        self._initTraining(learningRate, dataset, useDatabase)

        # Deal with tensorboard
        if tensorboard or type(tensorboard) == str:
            from torch.utils.tensorboard import SummaryWriter

            if type(tensorboard) == str:
                writer = SummaryWriter("./data/tensorboard/" + tensorboard)
            else:
                writer = SummaryWriter("./data/tensorboard/")
            tensorboard = True

        def findBestROI(ROIs, label):
            bestMatch = 0
            bestIndex = -1
            for i, ROI in enumerate(ROIs):
                lbox = np.array(label["bbox"])
                larea = lbox[2:] - lbox[:2]
                larea = larea[0] * larea[1]
                rbox = ROI.bounds
                rarea = rbox[2:] - rbox[:2]
                rarea = rarea[0] * rarea[1]

                SI = np.maximum(0, np.minimum(lbox[2], rbox[2]) - np.maximum(lbox[0], rbox[0])) * \
                     np.maximum(0, np.minimum(lbox[3], rbox[3]) - np.maximum(lbox[1], rbox[1]))
                SU = larea + rarea - SI
                overlap = SI / SU
                if bestMatch < overlap and SU != 0:
                    bestMatch = overlap
                    bestIndex = i
            return bestIndex

        Iterations = len(self.dataset)

        print("Starting training")
        for epoch in range(epochs):
            epochLoss = np.float64(0)
            for i in range(Iterations):
                ROIs, peopleTextures, labels = self._load(i)

                # Figure out what ROI belongs to what label
                groundtruth = np.zeros((len(ROIs), 14), dtype=np.float32)
                for label in labels:
                    mostMatching = findBestROI(ROIs, label)
                    if mostMatching != -1:
                        groundtruth[mostMatching][label["category_id"]] = 1

                # Most items in this dataset will be bypassed because no people were found or overlapping with gt
                if len(ROIs) == 0 or not np.any(groundtruth != 0):
                    continue

                groundtruth = torch.from_numpy(groundtruth).to(device)

                # Apply noise to peopleTextures
                noise = np.random.randn(*peopleTextures.shape) * 5
                b = peopleTextures.astype(np.int32)
                peopleTextures = peopleTextures.astype(
                    np.int32) + noise.astype(np.int32)
                peopleTextures = np.clip(peopleTextures, 0, 255)
                peopleTextures = peopleTextures.astype(np.uint8)

                peopleTextures = torch.Tensor(peopleTextures).to(device)
                predictions = self.classifier.forward(peopleTextures)
                print(groundtruth)
                print(predictions)
                print("\n")

                lossSize = self.lossFunction(predictions, groundtruth)
                lossSize.backward()
                self.optimizer.step()
                self.optimizer.zero_grad()
                lossSize = lossSize.cpu().item()

                epochLoss += lossSize / Iterations
                if (i - 1) % printUpdateEvery == 0:
                    print("Iteration {} / {}, epoch {} / {}".format(
                        i, Iterations, epoch, epochs))
                    print("Loss size: {}\n".format(lossSize /
                                                   printUpdateEvery))

                if tensorboard:
                    absI = i + epoch * Iterations
                    writer.add_scalar("Loss size", lossSize, absI)

                # Show visualization
                if visualize:
                    pass  # TODO
                    """
                    image = self.renderDebug(image)
                    plt.ion()
                    plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
                    plt.draw()
                    plt.pause(4)
                    """

            print("Finished epoch {} / {}. Loss size:".format(
                epoch, epochs, epochLoss))
            self.saveModel(self.modelPath)

        self._training = False

    def _load(self, index):
        cocoImage = self.dataset[index]
        ROIs = None
        if self.useDatabase:
            ROIs = self.lmdb.get(DensePoseWrapper, "deepfashion2" + str(index))
        if ROIs is None:
            ROIs = self.denseposeExtractor.extract(cocoImage[0])
            ROIs = self.sanitizer.extract(ROIs)
            if self.useDatabase:
                self.lmdb.save(DensePoseWrapper, "deepfashion2" + str(index),
                               ROIs)

        peopleTextures = None
        if self.useDatabase:
            peopleTextures = self.lmdb.get(UVMapper,
                                           "deepfashion2" + str(index))
        if peopleTextures is None:
            peopleTextures = self.uvMapper.extract(ROIs, cocoImage[0])
            if self.useDatabase:
                self.lmdb.save(UVMapper, "deepfashion2" + str(index),
                               peopleTextures)

        return ROIs, peopleTextures, cocoImage[1]

    def _findColorName(self, personMap, areas):
        areaS = int(personMap.shape[1] / 5)
        Rs, Gs, Bs = [], [], []

        # Pick out colors
        for i in areas:
            xMin = int((i % 5) * areaS)
            yMin = int(np.floor(i / 5) * areaS)
            for j in range(20):
                x = np.random.randint(xMin, xMin + areaS)
                y = np.random.randint(yMin, yMin + areaS)
                b = personTexture[x, y, 0]  # FIXME
                g = personTexture[x, y, 1]
                r = personTexture[x, y, 2]

                if r != 0 or b != 0 or g != 0:
                    Rs.append(r)
                    Gs.append(g)
                    Bs.append(b)

        if len(Rs) + len(Gs) + len(Bs) < 3:
            return 0

        # Find mean color
        r = np.mean(np.array(Rs)).astype(np.uint8)
        g = np.mean(np.array(Gs)).astype(np.uint8)
        b = np.mean(np.array(Bs)).astype(np.uint8)

        # This prints the color colored in the terminal
        RESET = '\033[0m'

        def get_color_escape(r, g, b, background=False):
            return '\033[{};2;{};{};{}m'.format(48 if background else 38, r, g,
                                                b)

        colorRepr = get_color_escape(
            r, b,
            g) + "rgb(" + str(r) + ", " + str(g) + ", " + str(b) + ")" + RESET

        # Get nearest color name
        HSVobj = convert_color(sRGBColor(r, g, b), HSVColor)

        nearestIndex = -1
        diffMin = 100000
        for i in range(len(self.colorsHSV)):
            colEntry = self.colorsHSV[i]

            d = HSVobj.hsv_h - colEntry.hsv_h
            dh = min(abs(d), 360 - abs(d)) / 180.0
            ds = abs(HSVobj.hsv_s - colEntry.hsv_s)
            dv = abs(HSVobj.hsv_v - colEntry.hsv_v) / 255.0
            diff = np.sqrt(dh * dh + ds * ds + dv * dv)
            if diff < diffMin:
                diffMin = diff
                nearestIndex = i

        return {
            "color": self.colors[nearestIndex][1],
            "colorDistance": diffMin,
            "coloredStr": colorRepr
        }
Beispiel #9
0
class Sanitizer(DenseSense.algorithms.Algorithm.Algorithm):
    def __init__(self):
        super().__init__()

        # Generate and maybe load mask generator model()
        self.maskGenerator = AutoEncoder()
        self.modelPath = None

        self._training = False
        self._trainingInitiated = False
        self._ROI_masks = torch.Tensor()
        self._ROI_bounds = np.array([])
        self._overlappingROIs = np.array([])
        self._overlappingROIsValues = np.array([])

    def load_model(self, modelPath):
        self.modelPath = modelPath
        print("Loading Sanitizer MaskGenerator file from: " + self.modelPath)
        self.maskGenerator.load_state_dict(
            torch.load(self.modelPath, map_location=device))
        self.maskGenerator.to(device)

    def save_model(self, modelPath):
        if modelPath is None:
            print("Don't know where to save model")
        self.modelPath = modelPath
        print("Saving Sanitizer MaskGenerator model to: " + self.modelPath)
        torch.save(self.maskGenerator.state_dict(), self.modelPath)

    def _init_training(self, learningRate, dataset, useDatabase):
        # Dataset is COCO
        print("Initiating training of Sanitizer MaskGenerator")
        print("Loading COCO")
        from pycocotools.coco import COCO
        from os import path

        annFile = topDir + '/annotations/instances_{}.json'.format(dataset)
        self.cocoPath = topDir + '/data/{}'.format(dataset)

        self.coco = COCO(annFile)
        self.personCatID = self.coco.getCatIds(catNms=['person'])[0]
        self.cocoImageIds = self.coco.getImgIds(catIds=self.personCatID)

        def is_not_crowd(imgId):
            annIds = self.coco.getAnnIds(imgIds=imgId,
                                         catIds=self.personCatID,
                                         iscrowd=False)
            annotation = self.coco.loadAnns(annIds)[0]
            return not annotation["iscrowd"]

        self.cocoImageIds = list(filter(is_not_crowd, self.cocoImageIds))
        self.cocoOnDisk = path.exists(self.cocoPath)

        print("Coco dataset size: {}".format(len(self.cocoImageIds)))
        print("Coco images found on disk:", self.cocoOnDisk)

        # Init LMDB_helper
        if useDatabase:
            self.lmdb = LMDBHelper("a")
            self.lmdb.verbose = False

        # Init loss function and optimizer
        self.optimizer = torch.optim.Adam(self.maskGenerator.parameters(),
                                          lr=learningRate,
                                          amsgrad=True)
        self.loss_function = torch.nn.BCELoss()

        # Init DensePose extractor
        self.denseposeExtractor = DensePoseWrapper()

    def extract(self, people):
        # Generate masks for all ROIs (people) using neural network model
        with torch.no_grad():
            self._ROI_masks, Ss = self._generate_masks(people)

        self._ROI_bounds = np.zeros((len(people), 4), dtype=np.int32)
        for i in range(len(people)):
            self._ROI_bounds[i] = np.array(people[i].bounds, dtype=np.int32)

        if len(self._ROI_masks) == 0:
            return people

        # Multiply masks with with segmentation mask from DensePose
        masked = self._ROI_masks * Ss

        # Find overlapping ROIs
        overlaps, overlapLow, overlapHigh = self._overlapping_matrix(
            self._ROI_bounds.astype(np.int32),
            self._ROI_bounds.astype(np.int32))
        overlaps[np.triu_indices(overlaps.shape[0])] = False
        overlapsInds = np.array(list(zip(*np.where(overlaps))))
        overlapsCorr = np.full_like(overlaps, 0, dtype=np.float)

        # Find correlations between overlapping ROIs
        if overlapsInds.shape[0] != 0:
            for a, b in overlapsInds:  # For every overlap
                # Extract part that overlaps from mask and make sizes match to smallest dim
                xCoords = np.array([overlapLow[0][a, b], overlapHigh[0][a, b]])
                yCoords = np.array([overlapLow[1][a, b], overlapHigh[1][a, b]])
                aMask = self._get_transformed_roi(masked[a, 0],
                                                  self._ROI_bounds[a], xCoords,
                                                  yCoords)
                bMask = self._get_transformed_roi(masked[b, 0],
                                                  self._ROI_bounds[b], xCoords,
                                                  yCoords)
                aArea = aMask.shape[0] * aMask.shape[1]
                bArea = bMask.shape[0] * bMask.shape[1]

                # Scale down the biggest one
                if aArea < bArea:
                    bMask = bMask.unsqueeze(0)
                    bMask = F.adaptive_avg_pool2d(bMask, aMask.shape)[0]
                else:
                    aMask = aMask.unsqueeze(0)
                    aMask = F.adaptive_avg_pool2d(aMask, bMask.shape)[0]

                # Calculate correlation
                aMean = aMask.mean()
                bMean = bMask.mean()
                correlation = torch.sum(
                    (aMask - aMean) *
                    (bMask - bMean)) / (aMask.shape[0] * aMask.shape[1] - 1)
                overlapsCorr[a, b] = correlation

        # Find best disjoint sets of overlapping ROIs
        threshold = 0.06  # Must be above 0

        goodCorrelations = np.argwhere(threshold < overlapsCorr)
        sortedCorrelations = overlapsCorr[goodCorrelations[:, 0],
                                          goodCorrelations[:, 1]].argsort()
        goodCorrelations = goodCorrelations[sortedCorrelations]
        overlapsCorr += overlapsCorr.T
        coupled = {}

        def get_bi_potential(a, diff):
            potential = 0
            for bOther in np.argwhere(overlapsCorr[diff] != 0):
                bOther = bOther[0]
                if bOther in coupled[a][0]:
                    potential += overlapsCorr[a, bOther]
            return potential

        for a, b in goodCorrelations:
            aIn = a in coupled
            bIn = b in coupled
            if aIn:
                if bIn:
                    potential = overlapsCorr[a, b]
                    for diff in coupled[b][0]:
                        potential += get_bi_potential(a, diff)
                    if 0 < potential:
                        coupled[a][0].update(coupled[b][0])
                        for diff in coupled[b][0]:
                            coupled[diff] = coupled[a]
                            coupled[a][1] += potential
                else:
                    potential = overlapsCorr[a, b] + get_bi_potential(a, b)
                    if 0 < potential:
                        coupled[a][0].add(b)
                        coupled[a][1] += potential
                        coupled[b] = coupled[a]
            elif bIn:
                potential = overlapsCorr[b, a] + get_bi_potential(b, a)
                if 0 < potential:
                    coupled[b][0].add(a)
                    coupled[b][1] += potential
                    coupled[a] = coupled[b]
            else:
                n = [{a, b}, overlapsCorr[a, b]]
                coupled[a] = n
                coupled[b] = n

        # Update all people data their data.
        ActiveThreshold = 0.1  # FIXME: magic number
        newPeople = []
        skip = set()
        for i, person in enumerate(people):
            if i not in skip:
                if i in coupled:
                    # Merge all coupled into one person
                    instances = list(coupled[i][0])
                    for j in instances:
                        skip.add(j)
                    instances = list(map(lambda i: people[i], instances))
                    instances[0].merge(instances[1:])
                    newPeople.append(instances[0])
                else:
                    # Lonely ROIs are kept alive if it is at least 20 % active
                    active = torch.mean(masked[i])
                    if ActiveThreshold < active:
                        newPeople.append(person)

        # Generate a mask again for the whole person, allowing for a holistic judgement
        self._generate_masks(newPeople)

        # TODO: find the edges of the person and crop

        return newPeople

    def train(self,
              epochs=100,
              learning_rate=0.005,
              dataset="Coco",
              use_database=True,
              print_update_every=40,
              visualize=0,
              tensorboard=False):

        self._training = True
        if not self._trainingInitiated:
            self._init_training(learning_rate, dataset, use_database)

        if tensorboard or type(tensorboard) == str:
            from torch.utils.tensorboard import SummaryWriter

            if type(tensorboard) == str:
                writer = SummaryWriter(topDir + "/data/tensorboard/" +
                                       tensorboard)
            else:
                writer = SummaryWriter(topDir + "/data/tensorboard/")
            tensorboard = True

        total_iterations = len(self.cocoImageIds)
        visualize_counter = 0

        meanPixels = []

        print("Starting training")

        for epoch in range(epochs):
            epoch_loss = np.float64(0)
            for i in range(total_iterations):
                # Load instance of COCO dataset
                cocoImage, image = self._get_coco_image(i)
                if image is None:  # FIXME
                    print("Image is None??? Skipping.", i)
                    print(cocoImage)
                    continue

                # Get annotation
                annIds = self.coco.getAnnIds(imgIds=cocoImage["id"],
                                             catIds=self.personCatID,
                                             iscrowd=False)
                annotation = self.coco.loadAnns(annIds)

                # Get DensePose data from DB or Extractor
                generated = False
                ROIs = None
                if use_database:
                    ROIs = self.lmdb.get("DensePoseWrapper_Coco",
                                         str(cocoImage["id"]))
                if ROIs is None:
                    ROIs = self.denseposeExtractor.extract(image)
                    generated = True
                if use_database and generated:
                    self.lmdb.save("DensePoseWrapper_Coco",
                                   str(cocoImage["id"]), ROIs)

                # Run prediction
                self._ROI_masks, Ss = self._generate_masks(ROIs)

                # Store bounds
                self._ROI_bounds = np.zeros((len(ROIs), 4), dtype=np.int32)
                for j in range(len(ROIs)):
                    self._ROI_bounds[j] = np.array(ROIs[j].bounds,
                                                   dtype=np.int32)

                if len(self._ROI_masks) == 0:
                    continue

                if tensorboard:
                    means = [
                        torch.mean(ROI).detach().cpu().numpy()
                        for ROI in self._ROI_masks
                    ]
                    meanPixels.append(sum(means) / len(means))

                # Draw each person in annotation to separate mask from polygon vertices
                segs = []
                seg_bounds = []
                for person in annotation:
                    mask = np.zeros(image.shape[0:2], dtype=np.uint8)
                    for s in person["segmentation"]:
                        s = np.reshape(np.array(s, dtype=np.int32), (-2, 2))
                        cv2.fillPoly(mask, [s], 1)
                    segs.append(mask)
                    bbox = person["bbox"]
                    seg_bounds.append(
                        np.array([
                            bbox[0], bbox[1], bbox[0] + bbox[2],
                            bbox[1] + bbox[3]
                        ]))

                seg_bounds = np.array(seg_bounds, dtype=np.int32)

                # Find overlaps between bboxes of segs and ROIs
                overlaps, overlapLow, overlapHigh = self._overlapping_matrix(
                    seg_bounds.astype(np.int32),
                    self._ROI_bounds.astype(np.int32))

                overlapsInds = np.array(list(zip(*np.where(overlaps))))
                if overlapsInds.shape[0] == 0:
                    continue

                # Get average value where there is overlap between COCO-mask for each person and predictions for
                contentAverage = {}
                for a, b in overlapsInds:  # For every overlap
                    xCoords = np.array(
                        [overlapLow[0][a, b], overlapHigh[0][a, b]])
                    yCoords = np.array(
                        [overlapLow[1][a, b], overlapHigh[1][a, b]])
                    ROI_mask = self._get_transformed_roi(
                        self._ROI_masks[a, 0], self._ROI_bounds[a], xCoords,
                        yCoords)

                    # Segmentation overlap area
                    segOverlap = segs[b][yCoords[0]:yCoords[1],
                                         xCoords[0]:xCoords[1]]

                    # Transform segmentation
                    segOverlap = cv2.resize(
                        segOverlap, (ROI_mask.shape[1], ROI_mask.shape[0]),
                        interpolation=cv2.INTER_AREA)

                    # Calculate sum of product of the ROI mask and segment overlap
                    segOverlap = torch.from_numpy(segOverlap).float().to(
                        device)
                    avgVariable = torch.sum(ROI_mask * segOverlap)

                    # Store this sum
                    if str(a) not in contentAverage:
                        contentAverage[str(a)] = []

                    contentAverage[str(a)].append(
                        (avgVariable, segOverlap, ROI_mask))

                self._overlappingROIs = np.unique(overlapsInds[:, 0])

                # Choose which segment each ROI should be compared with
                losses = []
                for j in range(len(
                        self._overlappingROIs)):  # For every ROI with overlap
                    a = self._overlappingROIs[j]

                    AL = list(contentAverage[str(a)])
                    AV = np.array([float(x[0].cpu()) for x in AL])

                    ind = AV.argmax()
                    lossSize = self.loss_function(AL[ind][2], AL[ind][1])
                    lossSize.backward(retain_graph=True)

                    losses.append(lossSize.detach().cpu().float())

                self.optimizer.step()
                self.optimizer.zero_grad()

                lossSize = sum(losses) / len(losses)

                epoch_loss += lossSize / total_iterations
                visualize_counter += 1
                if (i - 1) % print_update_every == 0:
                    print("Iteration {} / {}, epoch {} / {}".format(
                        i, total_iterations, epoch, epochs))
                    print("Loss size: {}\n".format(lossSize /
                                                   print_update_every))
                    if tensorboard:
                        absI = i + epoch * total_iterations
                        writer.add_scalar("Loss size", lossSize, absI)
                        writer.add_histogram("Mean ROI pixel value",
                                             np.array(meanPixels), absI)
                        meanPixels = []

                # Show visualization
                if visualize != 0 and visualize <= visualize_counter:
                    visualize_counter = 0
                    image = self.renderDebug(image, None, annotated_segs=segs)
                    cv2.imshow("Sanitizer training", image)
                    for j, m in enumerate(self._ROI_masks):
                        if j > 6:
                            break
                        cv2.imshow("Mask " + str(j),
                                   (m[0] * 255).cpu().detach().to(
                                       torch.uint8).numpy())

                    if len(self._ROI_masks) >= 2:
                        cv2.imshow("Mask diff",
                                   (torch.abs(self._ROI_masks[0][0] -
                                              self._ROI_masks[1][0]) *
                                    255).cpu().detach().to(
                                        torch.uint8).numpy())
                    cv2.waitKey(1)

            print("Finished epoch {} / {}. Loss size:".format(
                epoch, epochs, epoch_loss))
            if tensorboard:
                writer.add_scalar("epoch loss size", epoch_loss,
                                  total_iterations * epoch)
            self.save_model(self.modelPath)

        self._training = False

    def _generate_masks(self, ROIs):
        Ss = self._tensorify_ROIs(ROIs)
        masks = []
        if len(ROIs) != 0:
            masks = self.maskGenerator.forward(Ss)

        for i in range(len(ROIs)):
            ROIs[i].A = torch.round(masks[i, 0]).detach().cpu().numpy()

        return masks, Ss

    def _get_coco_image(self, index):
        if self.cocoOnDisk:
            # Load image from disk
            cocoImage = self.coco.loadImgs(self.cocoImageIds[index])[0]
            image = cv2.imread(self.cocoPath + "/" + cocoImage["file_name"])
            return cocoImage, image
        else:
            raise FileNotFoundError("COCO image cant be found on disk")

    @staticmethod
    def _tensorify_ROIs(ROIs):
        S = torch.Tensor(len(ROIs), 1, 56, 56)
        for j in range(len(ROIs)):
            person = ROIs[j]
            S[j][0] = torch.from_numpy(person.S)

        S = S.to(device)
        S[0 < S] = S[0 < S] / 15.0 * 0.8 + 0.2
        return S

    @staticmethod
    def _overlapping_matrix(a, b):
        xo_high = np.minimum(a[:, 2], b[:, None, 2])
        xo_low = np.maximum(a[:, 0], b[:, None, 0])
        xo = xo_high - xo_low

        yo_high = np.minimum(a[:, 3], b[:, None, 3])
        yo_low = np.maximum(a[:, 1], b[:, None, 1])
        yo = yo_high - yo_low

        overlappingMask = np.logical_and((0 < xo), (0 < yo))
        return overlappingMask, (xo_low, yo_low), (xo_low + xo, yo_low + yo)

    @staticmethod
    def _get_transformed_roi(ROI, bounds, x_coords, y_coords):
        # ROI transformed overlap area
        ROI_xCoords = (x_coords - bounds[0]) / (bounds[2] - bounds[0])
        ROI_xCoords = (ROI_xCoords * 56).astype(np.int32)
        ROI_xCoords[1] += ROI_xCoords[0] == ROI_xCoords[1]
        ROI_yCoords = (y_coords - bounds[1]) / (bounds[3] - bounds[1])
        ROI_yCoords = (ROI_yCoords * 56).astype(np.int32)
        ROI_yCoords[1] += ROI_yCoords[0] == ROI_yCoords[1]

        ROI_mask = ROI[ROI_yCoords[0]:ROI_yCoords[1],
                       ROI_xCoords[0]:ROI_xCoords[1]]

        return ROI_mask

    def renderDebug(self, image, people, annotated_segs=None, alpha=0.65):
        # Normalize ROIs from (0, 1) to (0, 255)
        ROIsMaskNorm = self._ROI_masks * 255

        # Render masks on image
        for i in range(len(self._ROI_masks)):
            mask = ROIsMaskNorm[i, 0].cpu().detach().to(torch.uint8).numpy()
            bnds = self._ROI_bounds[i]

            # Change colors of mask
            if 0 < alpha:
                mask = cv2.applyColorMap(mask, cv2.COLORMAP_SUMMER)
            else:
                alpha = -alpha
                mask = cv2.applyColorMap(mask, cv2.COLORMAP_PINK)

            # Resize mask to bounds
            dims = (bnds[2] - bnds[0], bnds[3] - bnds[1])
            mask = cv2.resize(mask, dims, interpolation=cv2.INTER_AREA)

            # Overlay image
            overlap = image[bnds[1]:bnds[3], bnds[0]:bnds[2]]
            mask = mask * alpha + overlap * (1.0 - alpha)
            image[bnds[1]:bnds[3], bnds[0]:bnds[2]] = mask

        if people is not None:
            for person in people:
                bnds = person.bounds
                image = cv2.rectangle(image, (bnds[0], bnds[1]),
                                      (bnds[2], bnds[3]), (60, 20, 20), 1)

        # Render annotated segmentations
        if annotated_segs is not None:
            for seg in annotated_segs:
                seg = seg * 60
                image = image.astype(np.int32)
                image[:, :, 0] += seg
                image[:, :, 1] += seg // 3
                image[:, :, 2] += seg // 3
                image = image.clip(0, 255).astype(np.uint8)

        return image
Beispiel #10
0
class Sanitizer(DenseSense.algorithms.Algorithm.Algorithm):

    # UNet, inspired by https://github.com/usuyama/pytorch-unet/
    # But with a fully connected layer in the middle
    class MaskGenerator(nn.Module):
        def __init__(self):
            super(Sanitizer.MaskGenerator, self).__init__()

            self.dconv1 = nn.Sequential(
                nn.Conv2d(1, 8, 3, padding=2),
                nn.LeakyReLU(inplace=True),
            )

            self.dconv2 = nn.Sequential(
                nn.Conv2d(8, 4, 3, padding=1),
                nn.LeakyReLU(inplace=True),
            )

            self.dconv3 = nn.Sequential(
                nn.Conv2d(3, 1, 3, padding=1),
                nn.LeakyReLU(inplace=True),
            )

            self.fcImg = nn.Linear(14*14*2+2, 14*14)

            self.maxpool = nn.MaxPool2d(2)
            self.upsample1 = nn.Upsample(size=(29, 29), mode="bilinear")
            self.upsample2 = nn.Upsample(size=(56, 56), mode="bilinear")

            self.sigmoid = nn.Sigmoid()
            self.leakyReLU = nn.LeakyReLU()

        def forward(self, people):
            if len(people) == 0:
                return np.array([]), torch.Tensor([]).to(device)

            # Send data to device
            S = torch.Tensor(len(people), 1, 56, 56)
            b = torch.Tensor(len(people), 2)
            for i in range(len(people)):
                person = people[i]
                S[i][0] = torch.from_numpy(person.S)
                bnds = person.bounds
                area = np.power(np.sqrt((bnds[2] - bnds[0]) * (bnds[3] - bnds[1])), 0.2)
                if bnds[3] == bnds[1]:
                    aspect = 0
                else:
                    aspect = (bnds[2] - bnds[0]) / (bnds[3] - bnds[1])
                b[i] = torch.Tensor([area, aspect])

            S = S.to(device)
            b = b.to(device)
            batchSize = S.shape[0]

            # Normalize input
            x = S.clone()
            x[0 < x] = x[0 < x] / 15.0 * 0.2 + 0.8

            # Convolutions
            x = self.dconv1(x)  # 1 -> 8, 56x56 -> 58x58
            x = self.maxpool(x)     # 58x58 -> 29x29
            conv = self.dconv2(x)  # 8 -> 4
            x = self.maxpool(conv[:, :2])     # 29x29 -> 14x14

            # Fully connected layer
            x = x.view(batchSize, 14*14*2)
            x = torch.cat([x, b], dim=1)  # Image and bbox info
            x = self.fcImg(x)
            x = self.leakyReLU(x)
            x = x.view(batchSize, 1, 14, 14)

            # Merge fully connected with past convolution calculation
            x = self.upsample1(x)  # 14x14 -> 29x29
            x = torch.cat([x, conv[:, 2:]], dim=1)
            x = self.dconv3(x)     # 3 -> 1
            x = self.sigmoid(x)
            x = self.upsample2(x)  # 29x29 -> 56x56

            return x, S

    def __init__(self):
        super().__init__()

        # Generate and maybe load mask generator model()
        self.maskGenerator = Sanitizer.MaskGenerator()
        self.modelPath = None

        self._training = False
        self._trainingInitiated = False
        self._ROI_masks = torch.Tensor()
        self._ROIs = torch.Tensor()
        self._ROI_bounds = np.array([])
        self._overlappingROIs = np.array([])
        self._overlappingROIsValues = np.array([])

    def loadModel(self, modelPath):
        self.modelPath = modelPath
        print("Loading Sanitizer MaskGenerator file from: " + self.modelPath)
        self.maskGenerator.load_state_dict(torch.load(self.modelPath, map_location=device))
        self.maskGenerator.to(device)

    def saveModel(self, modelPath):
        if modelPath is None:
            print("Don't know where to save model")
        self.modelPath = modelPath
        print("Saving Sanitizer MaskGenerator model to: "+self.modelPath)
        torch.save(self.maskGenerator.state_dict(), self.modelPath)

    def _initTraining(self, learningRate, dataset, useDatabase):
        # Dataset is COCO
        print("Initiating training of Sanitizer MaskGenerator")
        print("Loading COCO")
        from pycocotools.coco import COCO
        from os import path

        # TODO: support other data sets than Coco
        annFile = './annotations/instances_{}.json'.format(dataset)
        self.cocoPath = './data/{}'.format(dataset)

        self.coco = COCO(annFile)
        self.personCatID = self.coco.getCatIds(catNms=['person'])[0]
        self.cocoImageIds = self.coco.getImgIds(catIds=self.personCatID)

        def isNotCrowd(imgId):
            annIds = self.coco.getAnnIds(imgIds=imgId, catIds=self.personCatID, iscrowd=False)
            annotation = self.coco.loadAnns(annIds)[0]
            return not annotation["iscrowd"]

        self.cocoImageIds = list(filter(isNotCrowd, self.cocoImageIds))
        self.cocoOnDisk = path.exists(self.cocoPath)

        print("Coco dataset size: {}".format(len(self.cocoImageIds)))
        print("Coco images found on disk:", self.cocoOnDisk)

        # Init LMDB_helper
        if useDatabase:
            self.lmdb = LMDBHelper("a")
            self.lmdb.verbose = False

        # Init loss function and optimizer
        self.optimizer = torch.optim.Adam(self.maskGenerator.parameters(), lr=learningRate, amsgrad=True)
        self.lossFunction = torch.nn.MSELoss()

        # Init DensePose extractor
        self.denseposeExtractor = DensePoseWrapper()

    def extract(self, people):
        # Generate masks for all ROIs (people) using neural network model
        with torch.no_grad():
            self._generateMasks(people)

            if len(self._ROI_masks) == 0:
                return people

            # Multiply masks with with segmentation mask from DensePose
            masked = self._ROI_masks*self._ROIs

            # Find overlapping ROIs
            overlaps, overlapLow, overlapHigh = self._overlappingMatrix(
                self._ROI_bounds.astype(np.int32),
                self._ROI_bounds.astype(np.int32)
            )
            overlaps[np.triu_indices(overlaps.shape[0])] = False
            overlapsInds = np.array(list(zip(*np.where(overlaps))))
            overlapsCorr = np.full_like(overlaps, 0, dtype=np.float)

            if overlapsInds.shape[0] != 0:
                for a, b in overlapsInds:  # For every overlap
                    # Extract part that overlaps from mask and make sizes match to smallest dim
                    xCoords = np.array([overlapLow[0][a, b], overlapHigh[0][a, b]])
                    yCoords = np.array([overlapLow[1][a, b], overlapHigh[1][a, b]])
                    aMask = self._getTransformedROI(masked[a, 0], self._ROI_bounds[a], xCoords, yCoords)
                    bMask = self._getTransformedROI(masked[b, 0], self._ROI_bounds[b], xCoords, yCoords)
                    aArea = aMask.shape[0]*aMask.shape[1]
                    bArea = bMask.shape[0]*bMask.shape[1]
                    if aArea < bArea:
                        bMask = bMask.unsqueeze(0)
                        bMask = F.adaptive_avg_pool2d(bMask, aMask.shape)[0]
                    else:
                        aMask = aMask.unsqueeze(0)
                        aMask = F.adaptive_avg_pool2d(aMask, bMask.shape)[0]

                    # Calculate correlation
                    aMean = aMask.mean()
                    bMean = bMask.mean()
                    correlation = torch.sum((aMask-aMean)*(bMask-bMean))/(aMask.shape[0]*aMask.shape[1]-1)
                    overlapsCorr[a, b] = correlation

            # Find best disjoint sets of overlapping ROIs
            threshold = 0.06  # Must be above 0

            goodCorrelations = np.argwhere(threshold < overlapsCorr)
            sortedCorrelations = overlapsCorr[goodCorrelations[:, 0], goodCorrelations[:, 1]].argsort()
            goodCorrelations = goodCorrelations[sortedCorrelations]
            overlapsCorr += overlapsCorr.T
            coupled = {}

            def getBiPotential(a, diff):
                potential = 0
                for bOther in np.argwhere(overlapsCorr[diff] != 0):
                    bOther = bOther[0]
                    if bOther in coupled[a][0]:
                        potential += overlapsCorr[a, bOther]
                return potential

            for a, b in goodCorrelations:
                aIn = a in coupled
                bIn = b in coupled
                if aIn:
                    if bIn:
                        potential = overlapsCorr[a, b]
                        for diff in coupled[b][0]:
                            potential += getBiPotential(a, diff)
                        if 0 < potential:
                            coupled[a][0].update(coupled[b][0])
                            for diff in coupled[b][0]:
                                coupled[diff] = coupled[a]
                                coupled[a][1] += potential
                    else:
                        potential = overlapsCorr[a, b] + getBiPotential(a, b)
                        if 0 < potential:
                            coupled[a][0].add(b)
                            coupled[a][1] += potential
                            coupled[b] = coupled[a]
                elif bIn:
                    potential = overlapsCorr[b, a] + getBiPotential(b, a)
                    if 0 < potential:
                        coupled[b][0].add(a)
                        coupled[b][1] += potential
                        coupled[a] = coupled[b]
                else:
                    n = [{a, b}, overlapsCorr[a, b]]
                    coupled[a] = n
                    coupled[b] = n

            newPeople = []

            # Update all people data their data
            while len(coupled) != 0:
                instance = next(iter(coupled))
                instances = list(coupled[instance][0])
                for i in instances:
                    del coupled[i]
                instances = list(map(lambda i: people[i], instances))
                instances[0].merge(instances[1:])
                newPeople.append(instances[0])

            return newPeople

    def train(self, epochs=100, learningRate=0.005, dataset="Coco",
              useDatabase=True, printUpdateEvery=40,
              visualize=False, tensorboard=False):

        self._training = True
        if not self._trainingInitiated:
            self._initTraining(learningRate, dataset, useDatabase)

        if tensorboard or type(tensorboard) == str:
            from torch.utils.tensorboard import SummaryWriter

            if type(tensorboard) == str:
                writer = SummaryWriter("./data/tensorboard/"+tensorboard)
            else:
                writer = SummaryWriter("./data/tensorboard/")
            tensorboard = True

            # dummy_input = torch.Tensor(5, 1, 56, 56)
            # writer.add_graph(self.maskGenerator, dummy_input)
            # writer.close()

        Iterations = len(self.cocoImageIds)

        meanPixels = []

        print("Starting training")

        for epoch in range(epochs):
            epochLoss = np.float64(0)
            interestingImage = None
            interestingMeasure = -100000
            for i in range(Iterations):

                # Load instance of COCO dataset
                cocoImage, image = self._getCocoImage(i)
                if image is None:  # FIXME
                    print("Image is None??? Skipping.", i)
                    print(cocoImage)
                    continue

                # Get annotation
                annIds = self.coco.getAnnIds(imgIds=cocoImage["id"], catIds=self.personCatID, iscrowd=False)
                annotation = self.coco.loadAnns(annIds)

                # Draw each person in annotation to separate mask
                segs = []
                seg_bounds = []
                for person in annotation:
                    mask = np.zeros(image.shape[0:2], dtype=np.uint8)
                    for s in person["segmentation"]:
                        s = np.reshape(np.array(s, dtype=np.int32), (-2, 2))
                        cv2.fillPoly(mask, [s], 1)
                    segs.append(mask)
                    bbox = person["bbox"]
                    seg_bounds.append(np.array([bbox[0], bbox[1], bbox[0] + bbox[2], bbox[1] + bbox[3]]))

                seg_bounds = np.array(seg_bounds, dtype=np.int32)

                # Get DensePose data from DB or Extractor
                generated = False
                ROIs = None
                if useDatabase:
                    ROIs = self.lmdb.get(DensePoseWrapper, "coco" + str(cocoImage["id"]))
                if ROIs is None:
                    ROIs = self.denseposeExtractor.extract(image)
                    generated = True
                if useDatabase and generated:
                    self.lmdb.save(DensePoseWrapper, "coco" + str(cocoImage["id"]), ROIs)

                # Run prediction
                self._generateMasks(ROIs)

                if len(self._ROI_masks) == 0:
                    continue

                if tensorboard:
                    means = [torch.mean(ROI).detach().cpu().numpy() for ROI in self._ROI_masks]
                    meanPixels.append(sum(means)/len(means))

                # Find overlaps between bboxes of segs and ROIs
                overlaps, overlapLow, overlapHigh = self._overlappingMatrix(
                    seg_bounds.astype(np.int32),
                    self._ROI_bounds.astype(np.int32)
                )

                overlapsInds = np.array(list(zip(*np.where(overlaps))))
                if overlapsInds.shape[0] == 0:
                    continue

                # Get average value where there is overlap between COCO-mask for each person and predictions for
                contentAverage = {}
                for a, b in overlapsInds:  # For every overlap
                    xCoords = np.array([overlapLow[0][a, b], overlapHigh[0][a, b]])
                    yCoords = np.array([overlapLow[1][a, b], overlapHigh[1][a, b]])
                    ROI_mask = self._getTransformedROI(self._ROI_masks[a, 0], self._ROI_bounds[a], xCoords, yCoords)

                    # Segmentation overlap area
                    segOverlap = segs[b][yCoords[0]:yCoords[1], xCoords[0]:xCoords[1]]

                    # Transform segmentation
                    segOverlap = cv2.resize(segOverlap, (ROI_mask.shape[1], ROI_mask.shape[0]),
                                            interpolation=cv2.INTER_AREA)

                    # Calculate sum of product of the ROI mask and segment overlap
                    segOverlap = torch.from_numpy(segOverlap).float().to(device)
                    avgVariable = torch.sum(ROI_mask * segOverlap)

                    # Store this sum
                    if str(a) not in contentAverage:
                        contentAverage[str(a)] = []

                    contentAverage[str(a)].append((avgVariable, segOverlap, ROI_mask))

                self._overlappingROIs = np.unique(overlapsInds[:, 0])

                # Choose which segment each ROI should be compared with
                losses = []
                for j in range(len(self._overlappingROIs)):  # For every ROI with overlap
                    a = self._overlappingROIs[j]

                    AL = list(contentAverage[str(a)])
                    AV = np.array([float(x[0].cpu()) for x in AL])

                    ind = AV.argmax()
                    lossSize = self.lossFunction(AL[ind][2], AL[ind][1])
                    losses.append(lossSize)

                # Modify weights
                losses = torch.stack(losses)
                lossSize = torch.sum(losses)
                lossSize.backward()
                self.optimizer.step()
                self.optimizer.zero_grad()
                lossSize = lossSize.cpu().item()

                epochLoss += lossSize/Iterations
                if (i-1) % printUpdateEvery == 0:
                    print("Iteration {} / {}, epoch {} / {}".format(i, Iterations, epoch, epochs))
                    print("Loss size: {}\n".format(lossSize / printUpdateEvery))
                    if tensorboard:
                        absI = i + epoch * Iterations
                        writer.add_scalar("Loss size", lossSize, absI)
                        writer.add_histogram("Mean ROI pixel value", np.array(meanPixels), absI)
                        meanPixels = []

                if tensorboard:
                    interestingness = np.random.random()  # just choose a random one
                    if interestingMeasure < interestingness:
                        interestingImage = self.renderDebug(image.copy())
                        interestingMeasure = interestingness

                # Show visualization
                if visualize:
                    image = self.renderDebug(image)
                    plt.ion()
                    plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
                    plt.draw()
                    plt.pause(4)

            print("Finished epoch {} / {}. Loss size:".format(epoch, epochs, epochLoss))
            if tensorboard:
                writer.add_scalar("epoch loss size", epochLoss, Iterations*epoch)
                if interestingImage is not None:
                    interestingImage = cv2.cvtColor(interestingImage, cv2.COLOR_BGR2RGB)
                    interestingImage = torch.from_numpy(interestingImage).permute(2, 0, 1)
                    writer.add_image("interesting image", interestingImage, Iterations*epoch)
            self.saveModel(self.modelPath)

        self._training = False

    def _generateMasks(self, ROIs):
        self._ROI_masks, self._ROIs = self.maskGenerator.forward(ROIs)
        self._ROIs[self._ROIs != 0] = 1
        self._ROI_bounds = np.zeros((len(ROIs), 4), dtype=np.int32)
        for i in range(len(ROIs)):
            self._ROI_bounds[i] = np.array(ROIs[i].bounds, dtype=np.int32)
            ROIs[i].A = torch.round(self._ROI_masks[i, 0]).cpu().numpy()

    def _getCocoImage(self, index):
        if self.cocoOnDisk:
            # Load image from disk
            cocoImage = self.coco.loadImgs(self.cocoImageIds[index])[0]
            image = cv2.imread(self.cocoPath + "/" + cocoImage["file_name"])
            return cocoImage, image
        else:
            raise FileNotFoundError("COCO image cant be found on disk")

    @staticmethod
    def _overlappingMatrix(a, b):
        xo_high = np.minimum(a[:, 2], b[:, None, 2])
        xo_low = np.maximum(a[:, 0], b[:, None, 0])
        xo = xo_high - xo_low

        yo_high = np.minimum(a[:, 3], b[:, None, 3])
        yo_low = np.maximum(a[:, 1], b[:, None, 1])
        yo = yo_high - yo_low

        overlappingMask = np.logical_and((0 < xo), (0 < yo))
        return overlappingMask, (xo_low, yo_low), (xo_low + xo, yo_low + yo)

    @staticmethod
    def _getTransformedROI(ROI, bounds, xCoords, yCoords):
        # ROI transformed overlap area
        ROI_xCoords = (xCoords -bounds[0]) / (bounds[2] - bounds[0])
        ROI_xCoords = (ROI_xCoords * 56).astype(np.int32)
        ROI_xCoords[1] += ROI_xCoords[0] == ROI_xCoords[1]
        ROI_yCoords = (yCoords - bounds[1]) / (bounds[3] - bounds[1])
        ROI_yCoords = (ROI_yCoords * 56).astype(np.int32)
        ROI_yCoords[1] += ROI_yCoords[0] == ROI_yCoords[1]

        ROI_mask = ROI[ROI_yCoords[0]:ROI_yCoords[1], ROI_xCoords[0]:ROI_xCoords[1]]

        return ROI_mask

    def renderDebug(self, image, alpha=0.55):
        # Normalize ROIs from (0, 1) to (0, 255)
        ROIsMaskNorm = self._ROI_masks * 255

        # Render masks on image
        for i in range(len(self._ROI_masks)):
            mask = ROIsMaskNorm[i, 0].cpu().detach().to(torch.uint8).numpy()
            bnds = self._ROI_bounds[i]

            # Change colors of mask
            if 0 < alpha:
                mask = cv2.applyColorMap(mask, cv2.COLORMAP_SUMMER)
            else:
                alpha = -alpha
                mask = cv2.applyColorMap(mask, cv2.COLORMAP_PINK)

            # TODO: render contours instead?

            # Resize mask to bounds
            dims = (bnds[2] - bnds[0], bnds[3] - bnds[1])
            mask = cv2.resize(mask, dims, interpolation=cv2.INTER_AREA)

            # Overlay image
            overlap = image[bnds[1]:bnds[3], bnds[0]:bnds[2]]
            mask = mask * alpha + overlap * (1.0 - alpha)
            image[bnds[1]:bnds[3], bnds[0]:bnds[2]] = mask

        return image