コード例 #1
0
    def _initTraining(self, learningRate, dataset, useDatabase):
        # Dataset is DeepFashion2
        print("Initiating training of DescriptionExtractor")
        print("Loading DeepFashion2")
        from torchvision import transforms
        from torchvision.datasets import CocoDetection

        self.annFile = topDir + '/annotations/deepfashion2_{}.json'.format(
            dataset)
        self.cocoImgPath = topDir + '/data/DeepFashion2/{}'.format(dataset)

        self.useDatabase = useDatabase
        self.dataset = CocoDetection(
            self.cocoImgPath,
            self.annFile,
            transform=transforms.Compose([
                transforms.ToTensor(),
                transforms.Lambda(lambda x: x.permute(1, 2, 0)),
                transforms.Lambda(lambda x: (x * 255).byte().numpy()),
                transforms.Lambda(lambda x: x[:, :, ::-1])
            ]))

        # Init LMDB_helper
        if useDatabase:
            self.lmdb = LMDBHelper("a")
            self.lmdb.verbose = False

        self.denseposeExtractor = DensePoseWrapper()
        self.sanitizer = Sanitizer()
        self.sanitizer.load_model(topDir + "/models/Sanitizer.pth")
        self.uvMapper = UVMapper()

        # PyTorch things
        self.optimizer = torch.optim.Adam(self.classifier.parameters(),
                                          lr=learningRate,
                                          amsgrad=True)
        self.lossFunction = torch.nn.BCEWithLogitsLoss()
コード例 #2
0
ファイル: webcam.py プロジェクト: fiv21/DenseSense
def main():
    cam = cv2.VideoCapture(0)

    densepose = DensePoseWrapper()
    sanitizer = Sanitizer()
    sanitizer.loadModel("./models/Sanitizer.pth")
    tracker = Tracker()
    uvMapper = UVMapper()
    descriptionExtractor = DescriptionExtractor()

    while True:
        # Get image from webcam
        return_value, image = cam.read()
        assert return_value, "Failed to read from web camera"

        # White balance the image to get better color features
        image = white_balance(image)
        debugImage = image.copy()

        # Send image to DensePose
        people = densepose.extract(image)
        debugImage = densepose.renderDebug(debugImage, people)

        # Refine DensePose output to get actual people
        people = sanitizer.extract(people)
        debugImage = sanitizer.renderDebug(debugImage, alpha=0.2)

        # Track the people (which modifies the people variables)
        tracker.extract(people, True)
        debugImage = tracker.renderDebug(debugImage, people)

        # Extract UV map for each person
        peopleMaps = uvMapper.extract(people, image)
        peopleTextures = uvMapper.getPeopleTexture(peopleMaps)
        for i in range(len(peopleTextures)):
            cv2.imshow("UV image " + str(i), peopleTextures[i])

        # Classify what the person is wearing
        clothes = descriptionExtractor.extract(peopleMaps)

        # Show image
        print("Show image")
        cv2.imshow("debug image", debugImage)

        # Quit on escape
        if cv2.waitKey(1) == 27:
            break

        print("")

    cv2.destroyAllWindows()
コード例 #3
0
    def _initTraining(self, learningRate, datasetName, useLMDB):
        self.datasetName = datasetName

        from DenseSense.algorithms.DensePoseWrapper import DensePoseWrapper
        from DenseSense.algorithms.Sanitizer import Sanitizer

        self.denseposeExtractor = DensePoseWrapper()
        self.sanitizer = Sanitizer()
        self.sanitizer.load_model(topDir + "/models/Sanitizer.pth")

        if datasetName in ActionClassifier.COCO_Datasets:
            print("Loading COCO dataset: " + datasetName)
            from pycocotools.coco import COCO
            from os import path

            annFile = topDir + '/annotations/instances_{}.json'.format(
                datasetName)
            self.cocoPath = topDir + '/data/{}'.format(datasetName)

            self.coco = COCO(annFile)
            personCatID = self.coco.getCatIds(catNms=['person'])[0]
            self.dataset = self.coco.getImgIds(catIds=personCatID)

        elif datasetName in ActionClassifier.AVA_Datasets:
            print("Loading AVA dataset: " + datasetName)
            import csv
            from collections import defaultdict
            from DenseSense.utils.YoutubeLoader import YoutubeLoader

            annFile = topDir + "/annotations/{}.csv".format(
                datasetName.replace("_predictive", ""))
            self.dataset = defaultdict(lambda: defaultdict(defaultdict))
            with open(annFile, 'r') as csvFile:
                reader = csv.reader(csvFile)
                for row in reader:
                    video, t, x1, y1, x2, y2, action, person = row
                    actions = {action}
                    if person in self.dataset[video][t]:
                        actions = actions.union(
                            self.dataset[video][t][person]["actions"])
                    self.dataset[video][t][person] = {
                        "bbox": (x1, y1, x2, y2),
                        "actions": actions
                    }

            ordered_data = []
            for key, video in self.dataset.items():
                ordered_data.append((key, []))
                for t, annotation in video.items():
                    ordered_data[-1][1].append((int(t), annotation))
                ordered_data[-1][1].sort(key=lambda x: x[0])

            self.dataset = ordered_data

            self.youtubeLoader = YoutubeLoader(verbose=False)
            for key, video in self.dataset:
                self.youtubeLoader.queue_video(key, video[0][0], video[-1][0])

            self.current_video_index = 0
            self.current_video_frame_index = 0

            self.tracker = Tracker()
        else:
            raise Exception("Unknown dataset")

        self.useLMDB = useLMDB
        if useLMDB:
            self.lmdb = LMDBHelper("a", max_size=1028 * 1028 * 1028 * 32)
            self.lmdb.verbose = False

        self.optimizer = torch.optim.Adam(self._AE_model.parameters(),
                                          lr=learningRate)
        self.loss_function = torch.nn.BCELoss()
コード例 #4
0
class ActionClassifier(DenseSense.algorithms.Algorithm.Algorithm):
    actions = {
        4: "dance",
        11: "sit",
        14: "walk",
        69: "hand wave",
        12: "idle",  # stand
        17: "idle",  # carry/hold (an object)
        36: "idle",  # lift/pick up
        37: "idle",  # listen
        47: "idle",  # put down
    }

    COCO_Datasets = ["val2014", "train2014", "val2017", "train2017"]
    AVA_Datasets = [
        "ava_val", "ava_train", "ava_val_predictive", "ava_train_predictive"
    ]

    def __init__(self):
        print("Initiating ActionClassifier")
        super().__init__()

        self._modelPath = None
        self._AE_model = AutoEncoder()
        self._training = False

    def loadModel(self,
                  modelPath):  # TODO: load multiple models, refactor name
        self._modelPath = modelPath
        print("Loading ActionClassifier file from: " + self._modelPath)
        self._AE_model.load_state_dict(
            torch.load(self._modelPath, map_location=device))
        self._AE_model.to(device)

    def saveModel(self, modelPath):
        if modelPath is None:
            print("Don't know where to save model")
        self._modelPath = modelPath
        print("Saving ActionClassifier model to: " + self._modelPath)
        torch.save(self._AE_model.state_dict(), self._modelPath)

    def extract_ae(self, people, delta_time=None):
        S = _tensorify_people(people)

        if S.shape[0] == 0:
            return

        # Run prediction
        with torch.no_grad():
            embeddings = self._AE_model.encode(S, delta_time)

        # Add prediction to people
        for i, embedding in enumerate(embeddings):
            people[i].pose_vector = embedding.detach().cpu().numpy()

    def _initTraining(self, learningRate, datasetName, useLMDB):
        self.datasetName = datasetName

        from DenseSense.algorithms.DensePoseWrapper import DensePoseWrapper
        from DenseSense.algorithms.Sanitizer import Sanitizer

        self.denseposeExtractor = DensePoseWrapper()
        self.sanitizer = Sanitizer()
        self.sanitizer.load_model(topDir + "/models/Sanitizer.pth")

        if datasetName in ActionClassifier.COCO_Datasets:
            print("Loading COCO dataset: " + datasetName)
            from pycocotools.coco import COCO
            from os import path

            annFile = topDir + '/annotations/instances_{}.json'.format(
                datasetName)
            self.cocoPath = topDir + '/data/{}'.format(datasetName)

            self.coco = COCO(annFile)
            personCatID = self.coco.getCatIds(catNms=['person'])[0]
            self.dataset = self.coco.getImgIds(catIds=personCatID)

        elif datasetName in ActionClassifier.AVA_Datasets:
            print("Loading AVA dataset: " + datasetName)
            import csv
            from collections import defaultdict
            from DenseSense.utils.YoutubeLoader import YoutubeLoader

            annFile = topDir + "/annotations/{}.csv".format(
                datasetName.replace("_predictive", ""))
            self.dataset = defaultdict(lambda: defaultdict(defaultdict))
            with open(annFile, 'r') as csvFile:
                reader = csv.reader(csvFile)
                for row in reader:
                    video, t, x1, y1, x2, y2, action, person = row
                    actions = {action}
                    if person in self.dataset[video][t]:
                        actions = actions.union(
                            self.dataset[video][t][person]["actions"])
                    self.dataset[video][t][person] = {
                        "bbox": (x1, y1, x2, y2),
                        "actions": actions
                    }

            ordered_data = []
            for key, video in self.dataset.items():
                ordered_data.append((key, []))
                for t, annotation in video.items():
                    ordered_data[-1][1].append((int(t), annotation))
                ordered_data[-1][1].sort(key=lambda x: x[0])

            self.dataset = ordered_data

            self.youtubeLoader = YoutubeLoader(verbose=False)
            for key, video in self.dataset:
                self.youtubeLoader.queue_video(key, video[0][0], video[-1][0])

            self.current_video_index = 0
            self.current_video_frame_index = 0

            self.tracker = Tracker()
        else:
            raise Exception("Unknown dataset")

        self.useLMDB = useLMDB
        if useLMDB:
            self.lmdb = LMDBHelper("a", max_size=1028 * 1028 * 1028 * 32)
            self.lmdb.verbose = False

        self.optimizer = torch.optim.Adam(self._AE_model.parameters(),
                                          lr=learningRate)
        self.loss_function = torch.nn.BCELoss()

    def _load(self, index=None):  # Load next if index is None
        if self.datasetName in ActionClassifier.COCO_Datasets:
            people = None
            # Load image from disk and process
            cocoImage = self.coco.loadImgs(self.dataset[index])[0]
            if self.useLMDB:
                people = self.lmdb.get("DensePoseWrapper_Sanitized_Coco",
                                       str(cocoImage["id"]))

            if people is None:
                image = cv2.imread(self.cocoPath + "/" +
                                   cocoImage["file_name"])
                if image is None:
                    raise Exception("Could not find image: " + str(index))

                people = self.denseposeExtractor.extract(image)
                people = self.sanitizer.extract(people)
                if self.useLMDB:
                    self.lmdb.save("DensePoseWrapper_Sanitized_Coco",
                                   str(cocoImage["id"]), people)
            return people, cocoImage

        elif self.datasetName in ActionClassifier.AVA_Datasets:
            data = None
            image = None
            people, frame_time, is_last = None, None, False
            key = self.dataset[self.current_video_index][0]

            if self.useLMDB:
                data = self.lmdb.get(
                    "DensePoseWrapper_Sanitized_AVA",
                    str(key) + "_" + str(self.current_video_frame_index))

            if data is None:
                image, frame_time, is_last = self.youtubeLoader.get(
                    self.current_video_index, self.current_video_frame_index)
                if image is None:
                    people = []
                    frame_time = 0
                else:
                    people = self.denseposeExtractor.extract(image)
                    people = self.sanitizer.extract(people)

                if self.useLMDB:  # Save processed data
                    self.lmdb.save(
                        "DensePoseWrapper_Sanitized_AVA",
                        str(key) + "_" + str(self.current_video_frame_index),
                        (people, frame_time, is_last))

            else:
                people, frame_time, is_last = data

            timestamp = np.round(frame_time)
            ava_annotation = None

            sameTimestamp = [
                v[1] for v in self.dataset[self.current_video_index][1]
                if v[0] == timestamp
            ]
            if len(sameTimestamp) == 1:
                ava_annotation = sameTimestamp[0]

            # To show the whole dataset as it's being downloaded
            if image is not None and True:
                if ava_annotation is not None:
                    for k, p in ava_annotation.items():
                        bbox = np.array([
                            float(p["bbox"][0]),
                            float(p["bbox"][1]),
                            float(p["bbox"][2]),
                            float(p["bbox"][3])
                        ])
                        p1 = bbox[:2] * np.array(
                            [image.shape[1], image.shape[0]], dtype=np.float)
                        p2 = bbox[2:] * np.array(
                            [image.shape[1], image.shape[0]], dtype=np.float)
                        image = cv2.rectangle(image,
                                              tuple(p1.astype(np.int32)),
                                              tuple(p2.astype(np.int32)),
                                              (20, 20, 200), 1)
                cv2.imshow("frame", image)
                cv2.waitKey(1)

            # Change increment video and frame
            if is_last:
                self.current_video_frame_index = 0
                self.current_video_index += 1
                if len(self.dataset) == self.current_video_index:
                    self.current_video_index = 0
            else:
                self.current_video_frame_index += 1

            return people, frame_time, is_last, ava_annotation

    def trainAutoEncoder(self,
                         epochs=100,
                         learningRate=0.005,
                         dataset="Coco",
                         useLMDB=True,
                         printUpdateEvery=40,
                         visualize=0,
                         tensorboard=False):
        self._training = True
        self._initTraining(learningRate, dataset, useLMDB)

        # Tensorboard setup
        if tensorboard or type(tensorboard) == str:
            from torch.utils.tensorboard import SummaryWriter

            if type(tensorboard) == str:
                writer = SummaryWriter(topDir + "/data/tensorboard/" +
                                       tensorboard)
            else:
                writer = SummaryWriter(topDir + "/data/tensorboard/")
            tensorboard = True

        # Start the training process
        total_iterations = len(self.dataset)
        visualize_counter = 0
        open_windows = set()

        if self.datasetName in ActionClassifier.COCO_Datasets:
            print("Starting COCO dataset training")
            for epoch in range(epochs):
                epochLoss = np.float64(0)
                for i in range(total_iterations):
                    people, annotation = self._load(i)
                    S = _tensorify_people(people)

                    if S.shape[0] == 0:
                        continue

                    # Run prediction
                    embedding = self._AE_model.encode(S)
                    out = self._AE_model.decode(embedding)

                    # Optimize
                    lossSize = self.loss_function(out, S)

                    lossSize.backward()
                    self.optimizer.step()
                    self.optimizer.zero_grad()
                    lossSize = lossSize.cpu().item()

                    # Give feedback of training process
                    epochLoss += lossSize / total_iterations
                    visualize_counter += 1
                    if (i - 1) % printUpdateEvery == 0:
                        print("Iteration {} / {}, epoch {} / {}".format(
                            i, total_iterations, epoch, epochs))
                        print("Loss size: {}\n".format(lossSize /
                                                       printUpdateEvery))

                    if visualize != 0 and visualize <= visualize_counter:
                        visualize_counter = 0
                        new_open_windows = set()
                        for index, _ in enumerate(S):
                            inpS = (S[index, 0].detach()).cpu().to(
                                torch.float).numpy()
                            outS = (out[index, 0].detach()).cpu().to(
                                torch.float32).numpy()
                            emb = embedding.detach().cpu().numpy()
                            debug_image = self._get_ae_from_embedding(
                                index, inpS, emb, outS, None)
                            cv2.imshow("person " + str(index), debug_image)
                            new_open_windows.add("person " + str(index))
                            break  # Only show one person

                        for window in open_windows.difference(
                                new_open_windows):
                            cv2.destroyWindow(window)
                        open_windows = new_open_windows
                        cv2.waitKey(1)

                    if tensorboard:
                        absI = i + epoch * total_iterations
                        writer.add_scalar("Loss size", lossSize, absI)

                print("Finished epoch {} / {}. Loss size:".format(
                    epoch, epochs, epochLoss))
                self.saveModel(self._modelPath)

        elif self.datasetName in ActionClassifier.AVA_Datasets:
            # Unfortunately, needs to run through the whole AVA dataset to determine the size in frames
            print("Going through ava dataset once to determine the size")
            total_iterations = 0
            for video_i in range(len(self.dataset)):
                is_last = False
                while not is_last:
                    people, frame_time, is_last, annotation = self._load(
                    )  # Load next
                    total_iterations += 1
                    if (total_iterations - 1) % 500 == 0:
                        print("Frame/iteration {} (video {} / {})".format(
                            total_iterations, video_i, len(self.dataset)))
            print("Total number of iterations are {}".format(total_iterations))

            print("Starting AVA dataset training")
            last_frame_time = None
            last_people = []
            S_next = None
            current_video = 0
            was_last = False
            for epoch in range(epochs):
                epochLoss = np.float64(0)
                for i in range(total_iterations):
                    people, frame_time, is_last, annotation = self._load(
                    )  # Load next
                    current_video += is_last

                    if "predictive" in self.datasetName:
                        # Track the next frame
                        self.tracker.extract(people, time_now=frame_time)
                        if is_last:  # If new video next
                            self.tracker = Tracker()
                            last_frame_time = None

                        # Only save the people who exist in all frames
                        old_ids = list(map(lambda p: p.id, last_people))
                        new_ids = list(map(lambda p: p.id, people))

                        old_people = list(
                            filter(lambda p: p.id in new_ids,
                                   last_people.copy()))
                        new_people = list(
                            filter(lambda p: p.id in old_ids, people.copy()))

                        # Filter old Ss
                        S = _tensorify_people(old_people, True)
                        S_next = _tensorify_people(new_people, False)

                        last_people = people
                    else:
                        frame_time = last_frame_time
                        S = _tensorify_people(people)

                    if S.shape[0] == 0:
                        continue

                    delta_time = 0
                    if last_frame_time is not None and was_last is False:
                        delta_time = frame_time - last_frame_time
                    last_frame_time = frame_time

                    # Run prediction
                    embedding = self._AE_model.encode(S, delta_time)
                    out = self._AE_model.decode(embedding)

                    # Optimize
                    if "predictive" in self.datasetName:
                        lossSize = self.loss_function(out, S_next)
                    else:
                        lossSize = self.loss_function(out, S)

                    lossSize.backward()
                    self.optimizer.step()
                    self.optimizer.zero_grad()
                    lossSize = lossSize.cpu().item()

                    # Give feedback of training process
                    epochLoss += lossSize / total_iterations
                    visualize_counter += 1
                    was_last = is_last
                    if (i - 1) % printUpdateEvery == 0:
                        print("Iteration {} / {} (video {}/{}), epoch {} / {}".
                              format(i, total_iterations, current_video,
                                     len(self.dataset), epoch, epochs))
                        print("Loss size: {}\n".format(lossSize /
                                                       printUpdateEvery))

                    if visualize != 0 and visualize <= visualize_counter:
                        visualize_counter = 0
                        new_open_windows = set()
                        for index, _ in enumerate(S):
                            inpS = (S[index, 0].detach()).cpu().to(
                                torch.float).numpy()
                            outS = (out[index, 0].detach()).cpu().to(
                                torch.float32).numpy()
                            emb = embedding.detach().cpu().numpy()
                            debug_image = self._get_ae_from_embedding(
                                index, inpS, emb, outS, S_next)
                            cv2.imshow("person " + str(index), debug_image)
                            new_open_windows.add("person " + str(index))
                            break  # Only show one person

                        for window in open_windows.difference(
                                new_open_windows):
                            cv2.destroyWindow(window)
                        open_windows = new_open_windows
                        cv2.waitKey(1)

                    if tensorboard:
                        absI = i + epoch * total_iterations
                        writer.add_scalar("Loss size", lossSize, absI)

                print("Finished epoch {} / {}. Loss size:".format(
                    epoch, epochs, epochLoss))
                self.saveModel(self._modelPath)

    def _get_ae_from_embedding(self, index, S, embedding, out, S_next):
        S = (S * 255).astype(np.uint8)
        out = (out * 255).astype(np.uint8)
        emb = ((embedding[index] * 0.5 + 1.0) * 255).astype(np.uint8)
        emb = np.expand_dims(emb, axis=0)
        emb = np.repeat(emb, repeats=14, axis=0).T
        emb = np.repeat(emb, repeats=10, axis=0)
        emb = np.vstack((emb, np.zeros((56 - 5 * 10, 14), dtype=np.uint8)))
        comparison = np.hstack((S, emb, out))
        if S_next is not None:
            Sn = (S_next[index, 0].detach() * 255).cpu().to(
                torch.uint8).numpy()
            Sn = np.hstack((np.zeros((56, 56 + 14)), Sn))
            comparison = np.vstack((comparison, Sn)).astype(np.uint8)

        return cv2.applyColorMap(comparison, cv2.COLORMAP_JET)

    def get_ae_debug(self, people):
        combined = []
        for index, person in enumerate(people):
            embedding = torch.from_numpy(person.pose_vector).to(device)
            embedding = embedding.reshape((1, embedding.shape[0]))
            out = self._AE_model.decode(embedding)
            out = out[0, 0].detach().cpu().to(torch.float32).numpy()
            emb = embedding.detach().cpu().numpy()
            image = self._get_ae_from_embedding(0, person.S, emb, out, None)
            combined.append(image)
        return combined
コード例 #5
0
ファイル: webcam.py プロジェクト: Axelwickm/DenseSense
def main():
    densepose = DensePoseWrapper()
    sanitizer = Sanitizer()
    sanitizer.load_model("./models/Sanitizer.pth")
    tracker = Tracker()
    uvMapper = UVMapper()
    descriptionExtractor = DescriptionExtractor()
    descriptionExtractor.loadModel("./models/DescriptionExtractor.pth")
    actionClassifier = ActionClassifier()
    actionClassifier.loadModel("./models/ActionClassifier_AutoEncoder.pth")

    cam = cv2.VideoCapture(0)
    frameIndex = 0
    frame_time = time.time()
    oldOpenWindows = set()

    while True:
        # Get image from webcam
        return_value, image = cam.read()
        assert return_value, "Failed to read from web camera"
        delta_time = time.time() - frame_time
        frame_time = time.time()

        # White balance the image to get better color features
        image = white_balance(image)
        debugImage = image.copy()

        # Send image to DensePose
        people = densepose.extract(image)
        debugImage = densepose.renderDebug(debugImage, people)

        print("DensePose people:", len(people))

        # Refine DensePose output to get actual people
        people = sanitizer.extract(people)
        debugImage = sanitizer.renderDebug(debugImage, people, alpha=0.2)
        print("Sanitizer people", len(people))

        # Track the people (which modifies the people variables)
        tracker.extract(people, True)
        debugImage = tracker.renderDebug(debugImage, people)
        print("Tracker people", len(people))

        # Extract UV map for each person
        peopleMaps = uvMapper.extract(people, image)
        peopleTextures = uvMapper.getPeopleTexture(peopleMaps)

        # Classify what the person is wearing
        clothes = descriptionExtractor.extract(peopleMaps)
        clothingImages = descriptionExtractor.getLabelImage()

        # Get pose embedding
        actionClassifier.extract_ae(people, delta_time)
        debugACAE = actionClassifier.get_ae_debug(people)

        # Per person window management
        newOpenWindows = set()
        for i, person in enumerate(people):
            # Show UV map and label
            S_ROI = (person.I * (255 / 25)).astype(np.uint8)
            S_ROI = cv2.applyColorMap(S_ROI, cv2.COLORMAP_PARULA)
            S_ROI = cv2.resize(S_ROI, (160, 160))
            personWindow = cv2.resize(peopleTextures[i],
                                      (int(5 / 3 * 160), 160))
            coloredSlice = np.zeros((160, 3, 3), dtype=np.uint8)
            coloredSlice[:, :] = person.color
            personWindow = np.hstack(
                (coloredSlice, S_ROI, personWindow, clothingImages[i]))

            # View window
            windowName = "UV image " + str(person.id)
            newOpenWindows.add(windowName)
            cv2.imshow(windowName, personWindow)
            cv2.resizeWindow(windowName, 600, 600)

            # ... and a window for ac ae
            windowName = "ActionClassifier_AutoEncoder image " + str(person.id)
            newOpenWindows.add(windowName)
            cv2.imshow(
                windowName,
                cv2.resize(
                    debugACAE[i],
                    (debugACAE[i].shape[1] * 3, debugACAE[i].shape[0] * 3)))

        for oldWindow in oldOpenWindows:
            if oldWindow not in newOpenWindows:
                cv2.destroyWindow(oldWindow)
        oldOpenWindows = newOpenWindows

        # Show image
        print("Show frame:", frameIndex, "\n")
        cv2.imshow("debug image", debugImage)
        frameIndex += 1

        # Quit on escape
        if cv2.waitKey(1) == 27:
            break

    cv2.destroyAllWindows()
コード例 #6
0
class DescriptionExtractor(DenseSense.algorithms.Algorithm.Algorithm):
    iteration = 0

    availableLabels = {
        0: "none",
        1: "short sleeve top",
        2: "long sleeve top",
        3: "short sleeve outwear",
        4: "long sleeve outwear",
        5: "vest",
        6: "sling",
        7: "shorts",
        8: "trousers",
        9: "skirt",
        10: "short sleeve dress",
        11: "long sleeve dress",
        12: "dress vest",
        13: "sling dress"
    }

    #  0 : none
    #  1 : trousers
    #  2 : R hand
    #  3 : L hand
    #  4 : R foot
    #  5 : L foot
    #  6 : R thigh
    #  7 : L thigh
    #  8 : R calf
    #  9 : L calf
    # 10 : L upper arm
    # 11 : R upper arm
    # 12 : L lower arm
    # 13 : R lower arm
    # 14 : head

    labelColorCheck = {
        0: [],
        1: [1, 10, 11],
        2: [1, 10, 11, 12, 13],
        3: [1, 10, 11],
        4: [1, 10, 11, 12, 13],
        5: [1, 10, 11],
        6: [1, 10, 11],
        7: [6, 7],
        8: [6, 7, 8, 9],
        9: [6, 7],
        10: [1, 10, 11],
        11: [1, 10, 11, 12, 13],
        12: [1, 10, 11],
        13: [1, 10, 11]
    }

    colors = [((255, 255, 255), "white"), ((210, 209, 218), "white"),
              ((145, 164, 164), "white"), ((169, 144, 135), "white"),
              ((197, 175, 177), "white"), ((117, 126, 115), "white"),
              ((124, 126, 129), "white"), ((0, 0, 0), "black"),
              ((10, 10, 10), "black"), ((1, 6, 9), "black"),
              ((5, 10, 6), "black"), ((18, 15, 11), "black"),
              ((18, 22, 9), "black"), ((16, 16, 14), "black"),
              ((153, 153, 0), "yellow"), ((144, 115, 99), "pink"),
              ((207, 185, 174), "pink"), ((206, 191, 131), "pink"),
              ((208, 179, 54), "pink"), ((202, 19, 43), "red"),
              ((206, 28, 50), "red"), ((82, 30, 26), "red"),
              ((156, 47, 35), "orange"), ((126, 78, 47), "wine red"),
              ((74, 72, 77), "green"), ((31, 38, 38), "green"),
              ((40, 52, 79), "green"), ((100, 82, 116), "green"),
              ((8, 17, 55), "green"), ((29, 31, 37), "dark green"),
              ((46, 46, 36), "blue"), ((29, 78, 60), "blue"),
              ((74, 97, 85), "blue"), ((60, 68, 67), "blue"),
              ((181, 195, 232), "neon blue"), ((40, 148, 184), "bright blue"),
              ((210, 40, 69), "orange"), ((66, 61, 52), "gray"),
              ((154, 120, 147), "gray"), ((124, 100, 86), "gray"),
              ((46, 55, 46), "gray"), ((119, 117, 122), "gray"),
              ((88, 62, 62), "brown"), ((60, 29, 17), "brown"),
              ((153, 50, 204), "purple"), ((77, 69, 30), "purple"),
              ((153, 91, 14), "violet"), ((207, 185, 151), "beige")]

    colorsHSV = None

    class Network(nn.Module):
        def __init__(self, labels):  # FIXME: make this work!
            super(DescriptionExtractor.Network, self).__init__()
            self.layer1 = nn.Sequential(  # Fixme: 3x15 in channels
                nn.Conv2d(in_channels=3 * 15,
                          out_channels=15,
                          kernel_size=3,
                          stride=1,
                          padding=1), nn.ReLU(),
                nn.MaxPool2d(kernel_size=4, stride=2, padding=0))
            self.layer2 = nn.Sequential(
                nn.Conv2d(in_channels=15,
                          out_channels=10,
                          kernel_size=3,
                          stride=1,
                          padding=1), nn.ReLU(),
                nn.MaxPool2d(kernel_size=4, stride=2, padding=0))

            self.fc1 = nn.Linear(360, 180)
            self.relu1 = nn.ReLU(inplace=False)
            self.fc2 = nn.Linear(180, labels)

        def forward(self, x):
            batchSize = x.shape[0]
            x = x.view(batchSize, 15 * 3, 32, 32)
            x = self.layer1(x)
            x = self.layer2(x)
            x = x.view(batchSize, -1)
            x = self.fc1(x)
            x = self.relu1(x)
            x = self.fc2(x)
            return x

    def __init__(self, model=None, db=None):
        print("Initiating DescriptionExtractor")
        super().__init__()

        self.classifier = DescriptionExtractor.Network(
            len(self.availableLabels))
        self.modelPath = None
        self._training = False
        self.predictions = []
        self.peopleLabels = []

        # Init color lookup KD-tree
        self.colorsHSV = []
        for c in self.colors:
            RGBobj = sRGBColor(c[0][0], c[0][1], c[0][2])
            self.colorsHSV.append(convert_color(RGBobj, HSVColor))

    def loadModel(self, modelPath):
        self.modelPath = modelPath
        print("Loading DescriptionExtractor file from: " + self.modelPath)
        self.classifier.load_state_dict(
            torch.load(self.modelPath, map_location=device))
        self.classifier.to(device)

    def saveModel(self, modelPath):
        if modelPath is None:
            print("Don't know where to save model")
        self.modelPath = modelPath
        print("Saving DescriptionExtractor model to: " + self.modelPath)
        torch.save(self.classifier.state_dict(), self.modelPath)

    def _initTraining(self, learningRate, dataset, useDatabase):
        # Dataset is DeepFashion2
        print("Initiating training of DescriptionExtractor")
        print("Loading DeepFashion2")
        from torchvision import transforms
        from torchvision.datasets import CocoDetection

        self.annFile = topDir + '/annotations/deepfashion2_{}.json'.format(
            dataset)
        self.cocoImgPath = topDir + '/data/DeepFashion2/{}'.format(dataset)

        self.useDatabase = useDatabase
        self.dataset = CocoDetection(
            self.cocoImgPath,
            self.annFile,
            transform=transforms.Compose([
                transforms.ToTensor(),
                transforms.Lambda(lambda x: x.permute(1, 2, 0)),
                transforms.Lambda(lambda x: (x * 255).byte().numpy()),
                transforms.Lambda(lambda x: x[:, :, ::-1])
            ]))

        # Init LMDB_helper
        if useDatabase:
            self.lmdb = LMDBHelper("a")
            self.lmdb.verbose = False

        self.denseposeExtractor = DensePoseWrapper()
        self.sanitizer = Sanitizer()
        self.sanitizer.load_model(topDir + "/models/Sanitizer.pth")
        self.uvMapper = UVMapper()

        # PyTorch things
        self.optimizer = torch.optim.Adam(self.classifier.parameters(),
                                          lr=learningRate,
                                          amsgrad=True)
        self.lossFunction = torch.nn.BCEWithLogitsLoss()

    def extract(self, peopleMaps):
        if len(peopleMaps) == 0:
            return []
        self.peopleLabels = []
        determineColorThreshold = 0.3  # FIXME: tune

        # Do label classification
        peopleMapsDevice = torch.Tensor(peopleMaps).to(device)
        self.predictions = self.classifier.forward(peopleMapsDevice)
        self.predictions = self.predictions.sigmoid()
        self.predictions = self.predictions.detach().cpu().numpy()

        # Compile predictions into nice dictionary
        for personIndex, prediction in enumerate(self.predictions):
            labels = {}
            # Some labels might use same areas for determining color
            # This is therefore a lookup table in case value has already been computed
            averages = np.full((peopleMaps.shape[1], 3), -1, dtype=np.int64)
            for i, value in enumerate(prediction):
                if i == 0:  # 0 is None, and not trained on anyways
                    continue
                label = self.availableLabels[i]

                info = {"activation": value}
                if determineColorThreshold < value:
                    # If certainty is above threshold, take the time to calculate the average color
                    averageOfAreas = np.zeros(3, dtype=np.int64)
                    relevantAreas = torch.as_tensor(
                        self.labelColorCheck[i], dtype=torch.int64).to(device)
                    nonBlackAreas = 0
                    for areaIndex in relevantAreas:
                        if (averages[areaIndex] == -1).all():
                            # Calculate average
                            relevantPixels = peopleMapsDevice[personIndex,
                                                              areaIndex, :, :]
                            relevantPixels = relevantPixels[
                                torch.sum(relevantPixels, axis=2) != 0]
                            if relevantPixels.shape[0] == 0:
                                # All black
                                averages[areaIndex] = np.zeros(3)
                                continue
                            average = relevantPixels.mean(
                                axis=0).cpu().numpy().astype(np.uint8)
                            averages[areaIndex] = average

                        nonBlackAreas += 1
                        averageOfAreas += averages[areaIndex]

                    averageOfAreas = (averageOfAreas /
                                      float(nonBlackAreas)).astype(np.uint8)
                    info.update(self._findColorName(averageOfAreas))

                labels[label] = info

            self.peopleLabels.append(labels)

        return self.peopleLabels

    def train(self,
              epochs=100,
              learningRate=0.005,
              dataset="Coco",
              useDatabase=True,
              printUpdateEvery=40,
              visualize=False,
              tensorboard=False):
        self._training = True
        self._initTraining(learningRate, dataset, useDatabase)

        # Deal with tensorboard
        if tensorboard or type(tensorboard) == str:
            from torch.utils.tensorboard import SummaryWriter

            if type(tensorboard) == str:
                writer = SummaryWriter(topDir + "/data/tensorboard/" +
                                       tensorboard)
            else:
                writer = SummaryWriter(topDir + "/data/tensorboard/")
            tensorboard = True

        def findBestROI(ROIs, label):
            bestMatch = 0
            bestIndex = -1
            for i, ROI in enumerate(ROIs):
                lbox = np.array(label["bbox"])
                larea = lbox[2:] - lbox[:2]
                larea = larea[0] * larea[1]
                rbox = ROI.bounds
                rarea = rbox[2:] - rbox[:2]
                rarea = rarea[0] * rarea[1]

                SI = np.maximum(0, np.minimum(lbox[2], rbox[2]) - np.maximum(lbox[0], rbox[0])) * \
                     np.maximum(0, np.minimum(lbox[3], rbox[3]) - np.maximum(lbox[1], rbox[1]))
                SU = larea + rarea - SI
                overlap = SI / SU
                if bestMatch < overlap and SU != 0:
                    bestMatch = overlap
                    bestIndex = i
            return bestIndex

        Iterations = len(self.dataset)

        print("Starting training")
        for epoch in range(epochs):
            epochLoss = np.float64(0)
            for i in range(Iterations):
                ROIs, peopleTextures, labels = self._load(i)

                # Figure out what ROI belongs to what label
                groundtruth = np.zeros((len(ROIs), 14), dtype=np.float32)
                for label in labels:
                    mostMatching = findBestROI(ROIs, label)
                    if mostMatching != -1:
                        groundtruth[mostMatching][label["category_id"]] = 1

                # Most items in this dataset will be bypassed because no people were found or overlapping with gt
                if len(ROIs) == 0 or not np.any(groundtruth != 0):
                    continue

                groundtruth = torch.from_numpy(groundtruth).to(device)

                # Apply noise to peopleTextures
                noise = np.random.randn(*peopleTextures.shape) * 5
                peopleTextures = peopleTextures.astype(
                    np.int32) + noise.astype(np.int32)
                peopleTextures = np.clip(peopleTextures, 0, 255)
                peopleTextures = peopleTextures.astype(np.uint8)

                peopleTextures = torch.Tensor(peopleTextures).to(device)
                predictions = self.classifier.forward(peopleTextures)
                print(groundtruth)
                print(predictions)
                print("\n")

                lossSize = self.lossFunction(predictions, groundtruth)
                lossSize.backward()
                self.optimizer.step()
                self.optimizer.zero_grad()
                lossSize = lossSize.cpu().item()

                epochLoss += lossSize / Iterations
                if (i - 1) % printUpdateEvery == 0:
                    print("Iteration {} / {}, epoch {} / {}".format(
                        i, Iterations, epoch, epochs))
                    print("Loss size: {}\n".format(lossSize /
                                                   printUpdateEvery))

                if tensorboard:
                    absI = i + epoch * Iterations
                    writer.add_scalar("Loss size", lossSize, absI)

            print("Finished epoch {} / {}. Loss size:".format(
                epoch, epochs, epochLoss))
            self.saveModel(self.modelPath)

        self._training = False

    def getLabelImage(self):
        images = []
        for personLabel in self.peopleLabels:
            # Sort labels by score
            labels = sorted(list(personLabel.items()),
                            key=lambda x: x[1]["activation"],
                            reverse=True)

            # Create image
            image = np.zeros((160, 210, 3))
            for i, label in enumerate(labels):
                name, classification = label
                text = "{0:4d}%   {1}".format(
                    int(classification["activation"] * 100), name)
                color = (255, 255, 255)
                if classification[
                        "activation"] < 0.75:  # FIXME: magic number, tune
                    color = (128, 128, 128)
                image = cv2.putText(image, text, (0, 12 + 12 * i),
                                    cv2.FONT_HERSHEY_DUPLEX, .3, color, 1,
                                    cv2.LINE_AA)

                # Add color
                if "bestMatch" in classification:
                    colorText = classification["bestMatch"][1]
                    colorTextColor = classification["color"]
                    colorTextColor = (int(colorTextColor[0]),
                                      int(colorTextColor[1]),
                                      int(colorTextColor[2]))
                    image = cv2.putText(image, colorText, (150, 12 + 12 * i),
                                        cv2.FONT_HERSHEY_DUPLEX, .3,
                                        colorTextColor, 1, cv2.LINE_AA)

            images.append(image.astype(np.uint8))

        return images

    def _load(self, index):
        cocoImage = self.dataset[index]
        ROIs = None
        if self.useDatabase:
            ROIs = self.lmdb.get("DensePoseWrapper_Sanitized_deepfashion2",
                                 str(cocoImage["id"]))
        if ROIs is None:
            ROIs = self.denseposeExtractor.extract(cocoImage[0])
            ROIs = self.sanitizer.extract(ROIs)
            if self.useDatabase:
                self.lmdb.save("DensePoseWrapper_Sanitized_deepfashion2",
                               str(cocoImage["id"]), ROIs)

        peopleTextures = None
        if self.useDatabase:
            peopleTextures = self.lmdb.get("UVMapper_deepfashion2", str(index))
        if peopleTextures is None:
            peopleTextures = self.uvMapper.extract(ROIs, cocoImage[0])
            if self.useDatabase:
                self.lmdb.save("UVMapper_deepfashion2", str(index),
                               peopleTextures)

        return ROIs, peopleTextures, cocoImage[1]

    def _findColorName(self, color):
        b = color[0]
        g = color[1]
        r = color[2]

        # This prints the color colored in the terminal
        colorRepr = '\033[{};2;{};{};{}m'.format(38, r, g, b) \
                    + "rgb("+str(r)+", "+str(g)+", "+str(b)+")"+'\033[0m'

        # Get nearest color name
        HSVobj = convert_color(sRGBColor(r, g, b), HSVColor)

        nearestIndex = -1
        diffMin = 100000
        for i in range(len(self.colorsHSV)):
            colEntry = self.colorsHSV[i]

            d = HSVobj.hsv_h - colEntry.hsv_h
            dh = min(abs(d), 360 - abs(d)) / 180.0
            ds = abs(HSVobj.hsv_s - colEntry.hsv_s)
            dv = abs(HSVobj.hsv_v - colEntry.hsv_v) / 255.0
            diff = np.sqrt(dh * dh + ds * ds + dv * dv)
            if diff < diffMin:
                diffMin = diff
                nearestIndex = i

        return {
            "color": tuple(color),
            "colorDistance": diffMin,
            "coloredStr": colorRepr,
            "bestMatch": self.colors[nearestIndex]
        }
コード例 #7
0
class DescriptionExtractor(DenseSense.algorithms.Algorithm.Algorithm):
    iteration = 0

    availableLabels = {
        0: "none",
        1: "short sleeve top",
        2: "long sleeve top",
        3: "short sleeve outwear",
        4: "long sleeve outwear",
        5: "vest",
        6: "sling",
        7: "shorts",
        8: "trousers",
        9: "skirt",
        10: "short sleeve dress",
        11: "long sleeve dress",
        12: "dress vest",
        13: "sling dress"
    }

    # FIXME: change because now using S
    labelBodyparts = {  # https://github.com/facebookresearch/DensePose/issues/64#issuecomment-405608749 PRAISE
        "boots": [5, 6],
        "footwear": [5, 6],
        "outer": [1, 2, 15, 17, 16, 18, 19, 21, 20, 22],
        "dress": [1, 2],
        "sunglasses": [],
        "pants": [7, 9, 8, 10, 11, 13, 12, 14],
        "top": [1, 2],
        "shorts": [7, 9, 8, 10],
        "skirt": [1, 2],
        "headwear": [23, 24],
        "scarfAndTie": []
    }

    colors = [  # TODO: use color model file
        ((255, 255, 255), "white"), ((210, 209, 218), "white"),
        ((145, 164, 164), "white"), ((169, 144, 135), "white"),
        ((197, 175, 177), "white"), ((117, 126, 115), "white"),
        ((124, 126, 129), "white"), ((0, 0, 0), "black"),
        ((10, 10, 10), "black"), ((1, 6, 9), "black"), ((5, 10, 6), "black"),
        ((18, 15, 11), "black"), ((18, 22, 9), "black"),
        ((16, 16, 14), "black"), ((153, 153, 0), "yellow"),
        ((144, 115, 99), "pink"), ((207, 185, 174), "pink"),
        ((206, 191, 131), "pink"), ((208, 179, 54), "pink"),
        ((202, 19, 43), "red"), ((206, 28, 50), "red"), ((82, 30, 26), "red"),
        ((156, 47, 35), "orange"), ((126, 78, 47), "wine red"),
        ((74, 72, 77), "green"), ((31, 38, 38), "green"),
        ((40, 52, 79), "green"), ((100, 82, 116), "green"),
        ((8, 17, 55), "green"), ((29, 31, 37), "dark green"),
        ((46, 46, 36), "blue"), ((29, 78, 60), "blue"), ((74, 97, 85), "blue"),
        ((60, 68, 67), "blue"), ((181, 195, 232), "neon blue"),
        ((40, 148, 184), "bright blue"), ((210, 40, 69), "orange"),
        ((66, 61, 52), "gray"), ((154, 120, 147), "gray"),
        ((124, 100, 86), "gray"), ((46, 55, 46), "gray"),
        ((119, 117, 122), "gray"), ((88, 62, 62), "brown"),
        ((60, 29, 17), "brown"), ((153, 50, 204), "purple"),
        ((77, 69, 30), "purple"), ((153, 91, 14), "violet"),
        ((207, 185, 151), "beige")
    ]

    colorsHSV = None

    class Network(nn.Module):
        def __init__(self, labels):  # FIXME: make this work!
            super(DescriptionExtractor.Network, self).__init__()
            self.layer1 = nn.Sequential(  # Fixme: 3x15 in channels
                nn.Conv2d(in_channels=3 * 15,
                          out_channels=15,
                          kernel_size=3,
                          stride=1,
                          padding=1), nn.ReLU(),
                nn.MaxPool2d(kernel_size=4, stride=2, padding=0))
            self.layer2 = nn.Sequential(
                nn.Conv2d(in_channels=15,
                          out_channels=10,
                          kernel_size=3,
                          stride=1,
                          padding=1), nn.ReLU(),
                nn.MaxPool2d(kernel_size=4, stride=2, padding=0))

            self.fc1 = nn.Linear(360, 180)
            self.relu1 = nn.ReLU(inplace=False)
            self.fc2 = nn.Linear(180, labels)
            self.softmax = nn.Softmax()

        def forward(self, x):
            batchSize = x.shape[0]
            x = x.view(batchSize, 15 * 3, 32, 32)
            x = self.layer1(x)
            x = self.layer2(x)
            x = x.view(batchSize, -1)
            x = self.fc1(x)
            x = self.relu1(x)
            x = self.fc2(x)
            #x = self.softmax(x)
            return x

    def __init__(self, model=None, db=None):
        print("Initiating DescriptionExtractor")
        super().__init__()

        self.classifier = DescriptionExtractor.Network(
            len(self.availableLabels))
        self.modelPath = None
        self._training = False

        # Init color lookup KD-tree
        self.colorsHSV = []
        for c in self.colors:
            RGBobj = sRGBColor(c[0][0], c[0][1], c[0][2])
            self.colorsHSV.append(convert_color(RGBobj, HSVColor))

    def loadModel(self, modelPath):
        self.modelPath = modelPath
        print("Loading DescriptionExtractor file from: " + self.modelPath)
        self.classifier.load_state_dict(
            torch.load(self.modelPath, map_location=device))
        self.classifier.to(device)

    def saveModel(self, modelPath):
        if modelPath is None:
            print("Don't know where to save model")
        self.modelPath = modelPath
        print("Saving DescriptionExtractor model to: " + self.modelPath)
        torch.save(self.classifier.state_dict(), self.modelPath)

    def _initTraining(self, learningRate, dataset, useDatabase):
        # Dataset is DeepFashion2
        print("Initiating training of DescriptionExtractor")
        print("Loading DeepFashion2")
        from torchvision import transforms
        from torchvision.datasets import CocoDetection

        self.annFile = './annotations/deepfashion2_{}.json'.format(dataset)
        self.cocoImgPath = './data/DeepFashion2/{}'.format(dataset)

        self.useDatabase = useDatabase
        self.dataset = CocoDetection(
            self.cocoImgPath,
            self.annFile,
            transform=transforms.Compose([
                transforms.ToTensor(),
                transforms.Lambda(lambda x: x.permute(1, 2, 0)),
                transforms.Lambda(lambda x: (x * 255).byte().numpy()),
                transforms.Lambda(lambda x: x[:, :, ::-1])
            ]))

        # Init LMDB_helper
        if useDatabase:
            self.lmdb = LMDBHelper("a")
            self.lmdb.verbose = False

        self.denseposeExtractor = DensePoseWrapper()
        self.sanitizer = Sanitizer()
        self.sanitizer.loadModel("./models/Sanitizer.pth")
        self.uvMapper = UVMapper()

        # PyTorch things
        self.optimizer = torch.optim.Adam(self.classifier.parameters(),
                                          lr=learningRate,
                                          amsgrad=True)
        self.lossFunction = torch.nn.BCEWithLogitsLoss()

    def extract(self, peopleMaps):
        labelsPeople = []
        # Do label classification
        for personMap in peopleMaps:
            # Run the classification on it
            pyTorchTexture = torch.from_numpy(
                np.array([np.moveaxis(personTexture / 255.0, -1, 0)])).float()

            pyTorchTexture = pyTorchTexture.to(device)  # FIXME: Do in model
            labelVector = self.net(pyTorchTexture)[0]

            # Store the data
            labelVectorHost = labelVector.detach().cpu().numpy()
            labels = {}
            for j in range(len(labelVector)):
                label = self.availableLabels.values()[j]
                d = (self.onActivation - self.noActivation) / 2
                val = (labelVectorHost[j] - d) / d + 0.5

                info = {"activation": min(max(val, 0.0), 1.0)}
                if 0.7 < val:
                    color = self._findColorName(personTexture,
                                                self.labelBodyparts[label])
                    if color != 0:
                        info.update(color)
                        # print(color["color"]+"  "+color["coloredStr"])
                labels[label] = info

            labelsPeople.append(labels)
        return labelsPeople

    def train(self,
              epochs=100,
              learningRate=0.005,
              dataset="Coco",
              useDatabase=True,
              printUpdateEvery=40,
              visualize=False,
              tensorboard=False):
        self._training = True
        self._initTraining(learningRate, dataset, useDatabase)

        # Deal with tensorboard
        if tensorboard or type(tensorboard) == str:
            from torch.utils.tensorboard import SummaryWriter

            if type(tensorboard) == str:
                writer = SummaryWriter("./data/tensorboard/" + tensorboard)
            else:
                writer = SummaryWriter("./data/tensorboard/")
            tensorboard = True

        def findBestROI(ROIs, label):
            bestMatch = 0
            bestIndex = -1
            for i, ROI in enumerate(ROIs):
                lbox = np.array(label["bbox"])
                larea = lbox[2:] - lbox[:2]
                larea = larea[0] * larea[1]
                rbox = ROI.bounds
                rarea = rbox[2:] - rbox[:2]
                rarea = rarea[0] * rarea[1]

                SI = np.maximum(0, np.minimum(lbox[2], rbox[2]) - np.maximum(lbox[0], rbox[0])) * \
                     np.maximum(0, np.minimum(lbox[3], rbox[3]) - np.maximum(lbox[1], rbox[1]))
                SU = larea + rarea - SI
                overlap = SI / SU
                if bestMatch < overlap and SU != 0:
                    bestMatch = overlap
                    bestIndex = i
            return bestIndex

        Iterations = len(self.dataset)

        print("Starting training")
        for epoch in range(epochs):
            epochLoss = np.float64(0)
            for i in range(Iterations):
                ROIs, peopleTextures, labels = self._load(i)

                # Figure out what ROI belongs to what label
                groundtruth = np.zeros((len(ROIs), 14), dtype=np.float32)
                for label in labels:
                    mostMatching = findBestROI(ROIs, label)
                    if mostMatching != -1:
                        groundtruth[mostMatching][label["category_id"]] = 1

                # Most items in this dataset will be bypassed because no people were found or overlapping with gt
                if len(ROIs) == 0 or not np.any(groundtruth != 0):
                    continue

                groundtruth = torch.from_numpy(groundtruth).to(device)

                # Apply noise to peopleTextures
                noise = np.random.randn(*peopleTextures.shape) * 5
                b = peopleTextures.astype(np.int32)
                peopleTextures = peopleTextures.astype(
                    np.int32) + noise.astype(np.int32)
                peopleTextures = np.clip(peopleTextures, 0, 255)
                peopleTextures = peopleTextures.astype(np.uint8)

                peopleTextures = torch.Tensor(peopleTextures).to(device)
                predictions = self.classifier.forward(peopleTextures)
                print(groundtruth)
                print(predictions)
                print("\n")

                lossSize = self.lossFunction(predictions, groundtruth)
                lossSize.backward()
                self.optimizer.step()
                self.optimizer.zero_grad()
                lossSize = lossSize.cpu().item()

                epochLoss += lossSize / Iterations
                if (i - 1) % printUpdateEvery == 0:
                    print("Iteration {} / {}, epoch {} / {}".format(
                        i, Iterations, epoch, epochs))
                    print("Loss size: {}\n".format(lossSize /
                                                   printUpdateEvery))

                if tensorboard:
                    absI = i + epoch * Iterations
                    writer.add_scalar("Loss size", lossSize, absI)

                # Show visualization
                if visualize:
                    pass  # TODO
                    """
                    image = self.renderDebug(image)
                    plt.ion()
                    plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
                    plt.draw()
                    plt.pause(4)
                    """

            print("Finished epoch {} / {}. Loss size:".format(
                epoch, epochs, epochLoss))
            self.saveModel(self.modelPath)

        self._training = False

    def _load(self, index):
        cocoImage = self.dataset[index]
        ROIs = None
        if self.useDatabase:
            ROIs = self.lmdb.get(DensePoseWrapper, "deepfashion2" + str(index))
        if ROIs is None:
            ROIs = self.denseposeExtractor.extract(cocoImage[0])
            ROIs = self.sanitizer.extract(ROIs)
            if self.useDatabase:
                self.lmdb.save(DensePoseWrapper, "deepfashion2" + str(index),
                               ROIs)

        peopleTextures = None
        if self.useDatabase:
            peopleTextures = self.lmdb.get(UVMapper,
                                           "deepfashion2" + str(index))
        if peopleTextures is None:
            peopleTextures = self.uvMapper.extract(ROIs, cocoImage[0])
            if self.useDatabase:
                self.lmdb.save(UVMapper, "deepfashion2" + str(index),
                               peopleTextures)

        return ROIs, peopleTextures, cocoImage[1]

    def _findColorName(self, personMap, areas):
        areaS = int(personMap.shape[1] / 5)
        Rs, Gs, Bs = [], [], []

        # Pick out colors
        for i in areas:
            xMin = int((i % 5) * areaS)
            yMin = int(np.floor(i / 5) * areaS)
            for j in range(20):
                x = np.random.randint(xMin, xMin + areaS)
                y = np.random.randint(yMin, yMin + areaS)
                b = personTexture[x, y, 0]  # FIXME
                g = personTexture[x, y, 1]
                r = personTexture[x, y, 2]

                if r != 0 or b != 0 or g != 0:
                    Rs.append(r)
                    Gs.append(g)
                    Bs.append(b)

        if len(Rs) + len(Gs) + len(Bs) < 3:
            return 0

        # Find mean color
        r = np.mean(np.array(Rs)).astype(np.uint8)
        g = np.mean(np.array(Gs)).astype(np.uint8)
        b = np.mean(np.array(Bs)).astype(np.uint8)

        # This prints the color colored in the terminal
        RESET = '\033[0m'

        def get_color_escape(r, g, b, background=False):
            return '\033[{};2;{};{};{}m'.format(48 if background else 38, r, g,
                                                b)

        colorRepr = get_color_escape(
            r, b,
            g) + "rgb(" + str(r) + ", " + str(g) + ", " + str(b) + ")" + RESET

        # Get nearest color name
        HSVobj = convert_color(sRGBColor(r, g, b), HSVColor)

        nearestIndex = -1
        diffMin = 100000
        for i in range(len(self.colorsHSV)):
            colEntry = self.colorsHSV[i]

            d = HSVobj.hsv_h - colEntry.hsv_h
            dh = min(abs(d), 360 - abs(d)) / 180.0
            ds = abs(HSVobj.hsv_s - colEntry.hsv_s)
            dv = abs(HSVobj.hsv_v - colEntry.hsv_v) / 255.0
            diff = np.sqrt(dh * dh + ds * ds + dv * dv)
            if diff < diffMin:
                diffMin = diff
                nearestIndex = i

        return {
            "color": self.colors[nearestIndex][1],
            "colorDistance": diffMin,
            "coloredStr": colorRepr
        }
コード例 #8
0
ファイル: train.py プロジェクト: Axelwickm/DenseSense
def main():
    # Print args
    args = parser.parse_args()
    for arg in vars(args):
        print("\t", arg, getattr(args, arg))
    print("\n")

    # Determine model path
    modelPath = args.model
    if os.path.isdir(modelPath):
        modelPath = os.path.join(modelPath, args.algorithm + ".pth")
    alreadyExists = os.path.exists(modelPath)

    # Determine tensorboard path
    try:
        tb = int(args.tensorboard)
        tb = True if 0 < tb else False
    except ValueError:
        tb = args.tensorboard

        # Potentially delete old tensorboard
        if os.path.isdir("./data/tensorboard/" + tb):
            print("Deleting old tensorboard: " + tb)
            shutil.rmtree("./data/tensorboard/" + tb)

    if args.algorithm == "DescriptionExtractor":
        from DenseSense.algorithms.DescriptionExtractor import DescriptionExtractor
        descriptionExtractor = DescriptionExtractor()
        if alreadyExists and not args.override:
            print("Will keep working on existing model")
            descriptionExtractor.loadModel(modelPath)
        descriptionExtractor.saveModel(modelPath)

        dataset = "val"
        if args.dataset is not None:
            dataset = args.dataset

        descriptionExtractor.train(epochs=args.epochs,
                                   dataset=dataset,
                                   learningRate=args.learningRate,
                                   useDatabase=args.lmdb,
                                   printUpdateEvery=args.print,
                                   visualize=args.visualize,
                                   tensorboard=tb)

    elif args.algorithm == "Sanitizer":
        from DenseSense.algorithms.Sanitizer import Sanitizer
        sanitizer = Sanitizer()
        if alreadyExists and not args.override:
            print("Will keep working on existing model")
            sanitizer.load_model(modelPath)
        sanitizer.save_model(modelPath)

        dataset = "val2017"
        if args.dataset is not None:
            dataset = args.dataset

        sanitizer.train(epochs=args.epochs,
                        dataset=dataset,
                        learning_rate=args.learningRate,
                        use_database=args.lmdb,
                        print_update_every=args.print,
                        visualize=args.visualize,
                        tensorboard=tb)

    elif args.algorithm == "ActionClassifier":
        from DenseSense.algorithms.ActionClassifier import ActionClassifier
        ac = ActionClassifier()
        if alreadyExists and not args.override:
            print("Will keep working on existing model")
            ac.loadModel(modelPath)
        ac.saveModel(modelPath)

        dataset = "val2017"
        if args.dataset is not None:
            dataset = args.dataset

        ac.trainAutoEncoder(epochs=args.epochs,
                            dataset=dataset,
                            learningRate=args.learningRate,
                            useLMDB=args.lmdb,
                            printUpdateEvery=args.print,
                            visualize=args.visualize,
                            tensorboard=tb)
コード例 #9
0
ファイル: train.py プロジェクト: fiv21/DenseSense
def main():
    args = parser.parse_args()
    for arg in vars(args):
        print("\t", arg, getattr(args, arg))
    print("\n")

    modelPath = args.model
    if os.path.isdir(modelPath):
        modelPath = os.path.join(modelPath, args.algorithm + ".pth")
    alreadyExists = os.path.exists(modelPath)

    if args.algorithm == "DescriptionExtractor":
        from DenseSense.algorithms.DescriptionExtractor import DescriptionExtractor
        descriptionExtractor = DescriptionExtractor()
        # FIXME: should be put in a function
        if alreadyExists and not args.override:
            print("Will keep working on existing model")
            descriptionExtractor.loadModel(modelPath)
        descriptionExtractor.saveModel(modelPath)

        dataset = "val"
        if args.dataset is not None:
            dataset = args.dataset

        try:
            tb = int(args.tensorboard)
            tb = True if 0 < tb else False
        except ValueError:
            tb = args.tensorboard

        descriptionExtractor.train(epochs=args.epochs,
                                   dataset=dataset,
                                   learningRate=args.learningRate,
                                   useDatabase=args.lmdb,
                                   printUpdateEvery=args.print,
                                   visualize=args.visualize,
                                   tensorboard=tb)

    elif args.algorithm == "Sanitizer":
        from DenseSense.algorithms.Sanitizer import Sanitizer
        sanitizer = Sanitizer()
        if alreadyExists and not args.override:
            print("Will keep working on existing model")
            sanitizer.loadModel(modelPath)
        sanitizer.saveModel(modelPath)

        dataset = "val2017"
        if args.dataset is not None:
            dataset = args.dataset

        try:
            tb = int(args.tensorboard)
            tb = True if 0 < tb else False
        except ValueError:
            tb = args.tensorboard

        sanitizer.train(epochs=args.epochs,
                        dataset=dataset,
                        learningRate=args.learningRate,
                        useDatabase=args.lmdb,
                        printUpdateEvery=args.print,
                        visualize=args.visualize,
                        tensorboard=tb)