def _initTraining(self, learningRate, dataset, useDatabase): # Dataset is DeepFashion2 print("Initiating training of DescriptionExtractor") print("Loading DeepFashion2") from torchvision import transforms from torchvision.datasets import CocoDetection self.annFile = topDir + '/annotations/deepfashion2_{}.json'.format( dataset) self.cocoImgPath = topDir + '/data/DeepFashion2/{}'.format(dataset) self.useDatabase = useDatabase self.dataset = CocoDetection( self.cocoImgPath, self.annFile, transform=transforms.Compose([ transforms.ToTensor(), transforms.Lambda(lambda x: x.permute(1, 2, 0)), transforms.Lambda(lambda x: (x * 255).byte().numpy()), transforms.Lambda(lambda x: x[:, :, ::-1]) ])) # Init LMDB_helper if useDatabase: self.lmdb = LMDBHelper("a") self.lmdb.verbose = False self.denseposeExtractor = DensePoseWrapper() self.sanitizer = Sanitizer() self.sanitizer.load_model(topDir + "/models/Sanitizer.pth") self.uvMapper = UVMapper() # PyTorch things self.optimizer = torch.optim.Adam(self.classifier.parameters(), lr=learningRate, amsgrad=True) self.lossFunction = torch.nn.BCEWithLogitsLoss()
def main(): cam = cv2.VideoCapture(0) densepose = DensePoseWrapper() sanitizer = Sanitizer() sanitizer.loadModel("./models/Sanitizer.pth") tracker = Tracker() uvMapper = UVMapper() descriptionExtractor = DescriptionExtractor() while True: # Get image from webcam return_value, image = cam.read() assert return_value, "Failed to read from web camera" # White balance the image to get better color features image = white_balance(image) debugImage = image.copy() # Send image to DensePose people = densepose.extract(image) debugImage = densepose.renderDebug(debugImage, people) # Refine DensePose output to get actual people people = sanitizer.extract(people) debugImage = sanitizer.renderDebug(debugImage, alpha=0.2) # Track the people (which modifies the people variables) tracker.extract(people, True) debugImage = tracker.renderDebug(debugImage, people) # Extract UV map for each person peopleMaps = uvMapper.extract(people, image) peopleTextures = uvMapper.getPeopleTexture(peopleMaps) for i in range(len(peopleTextures)): cv2.imshow("UV image " + str(i), peopleTextures[i]) # Classify what the person is wearing clothes = descriptionExtractor.extract(peopleMaps) # Show image print("Show image") cv2.imshow("debug image", debugImage) # Quit on escape if cv2.waitKey(1) == 27: break print("") cv2.destroyAllWindows()
def _initTraining(self, learningRate, datasetName, useLMDB): self.datasetName = datasetName from DenseSense.algorithms.DensePoseWrapper import DensePoseWrapper from DenseSense.algorithms.Sanitizer import Sanitizer self.denseposeExtractor = DensePoseWrapper() self.sanitizer = Sanitizer() self.sanitizer.load_model(topDir + "/models/Sanitizer.pth") if datasetName in ActionClassifier.COCO_Datasets: print("Loading COCO dataset: " + datasetName) from pycocotools.coco import COCO from os import path annFile = topDir + '/annotations/instances_{}.json'.format( datasetName) self.cocoPath = topDir + '/data/{}'.format(datasetName) self.coco = COCO(annFile) personCatID = self.coco.getCatIds(catNms=['person'])[0] self.dataset = self.coco.getImgIds(catIds=personCatID) elif datasetName in ActionClassifier.AVA_Datasets: print("Loading AVA dataset: " + datasetName) import csv from collections import defaultdict from DenseSense.utils.YoutubeLoader import YoutubeLoader annFile = topDir + "/annotations/{}.csv".format( datasetName.replace("_predictive", "")) self.dataset = defaultdict(lambda: defaultdict(defaultdict)) with open(annFile, 'r') as csvFile: reader = csv.reader(csvFile) for row in reader: video, t, x1, y1, x2, y2, action, person = row actions = {action} if person in self.dataset[video][t]: actions = actions.union( self.dataset[video][t][person]["actions"]) self.dataset[video][t][person] = { "bbox": (x1, y1, x2, y2), "actions": actions } ordered_data = [] for key, video in self.dataset.items(): ordered_data.append((key, [])) for t, annotation in video.items(): ordered_data[-1][1].append((int(t), annotation)) ordered_data[-1][1].sort(key=lambda x: x[0]) self.dataset = ordered_data self.youtubeLoader = YoutubeLoader(verbose=False) for key, video in self.dataset: self.youtubeLoader.queue_video(key, video[0][0], video[-1][0]) self.current_video_index = 0 self.current_video_frame_index = 0 self.tracker = Tracker() else: raise Exception("Unknown dataset") self.useLMDB = useLMDB if useLMDB: self.lmdb = LMDBHelper("a", max_size=1028 * 1028 * 1028 * 32) self.lmdb.verbose = False self.optimizer = torch.optim.Adam(self._AE_model.parameters(), lr=learningRate) self.loss_function = torch.nn.BCELoss()
class ActionClassifier(DenseSense.algorithms.Algorithm.Algorithm): actions = { 4: "dance", 11: "sit", 14: "walk", 69: "hand wave", 12: "idle", # stand 17: "idle", # carry/hold (an object) 36: "idle", # lift/pick up 37: "idle", # listen 47: "idle", # put down } COCO_Datasets = ["val2014", "train2014", "val2017", "train2017"] AVA_Datasets = [ "ava_val", "ava_train", "ava_val_predictive", "ava_train_predictive" ] def __init__(self): print("Initiating ActionClassifier") super().__init__() self._modelPath = None self._AE_model = AutoEncoder() self._training = False def loadModel(self, modelPath): # TODO: load multiple models, refactor name self._modelPath = modelPath print("Loading ActionClassifier file from: " + self._modelPath) self._AE_model.load_state_dict( torch.load(self._modelPath, map_location=device)) self._AE_model.to(device) def saveModel(self, modelPath): if modelPath is None: print("Don't know where to save model") self._modelPath = modelPath print("Saving ActionClassifier model to: " + self._modelPath) torch.save(self._AE_model.state_dict(), self._modelPath) def extract_ae(self, people, delta_time=None): S = _tensorify_people(people) if S.shape[0] == 0: return # Run prediction with torch.no_grad(): embeddings = self._AE_model.encode(S, delta_time) # Add prediction to people for i, embedding in enumerate(embeddings): people[i].pose_vector = embedding.detach().cpu().numpy() def _initTraining(self, learningRate, datasetName, useLMDB): self.datasetName = datasetName from DenseSense.algorithms.DensePoseWrapper import DensePoseWrapper from DenseSense.algorithms.Sanitizer import Sanitizer self.denseposeExtractor = DensePoseWrapper() self.sanitizer = Sanitizer() self.sanitizer.load_model(topDir + "/models/Sanitizer.pth") if datasetName in ActionClassifier.COCO_Datasets: print("Loading COCO dataset: " + datasetName) from pycocotools.coco import COCO from os import path annFile = topDir + '/annotations/instances_{}.json'.format( datasetName) self.cocoPath = topDir + '/data/{}'.format(datasetName) self.coco = COCO(annFile) personCatID = self.coco.getCatIds(catNms=['person'])[0] self.dataset = self.coco.getImgIds(catIds=personCatID) elif datasetName in ActionClassifier.AVA_Datasets: print("Loading AVA dataset: " + datasetName) import csv from collections import defaultdict from DenseSense.utils.YoutubeLoader import YoutubeLoader annFile = topDir + "/annotations/{}.csv".format( datasetName.replace("_predictive", "")) self.dataset = defaultdict(lambda: defaultdict(defaultdict)) with open(annFile, 'r') as csvFile: reader = csv.reader(csvFile) for row in reader: video, t, x1, y1, x2, y2, action, person = row actions = {action} if person in self.dataset[video][t]: actions = actions.union( self.dataset[video][t][person]["actions"]) self.dataset[video][t][person] = { "bbox": (x1, y1, x2, y2), "actions": actions } ordered_data = [] for key, video in self.dataset.items(): ordered_data.append((key, [])) for t, annotation in video.items(): ordered_data[-1][1].append((int(t), annotation)) ordered_data[-1][1].sort(key=lambda x: x[0]) self.dataset = ordered_data self.youtubeLoader = YoutubeLoader(verbose=False) for key, video in self.dataset: self.youtubeLoader.queue_video(key, video[0][0], video[-1][0]) self.current_video_index = 0 self.current_video_frame_index = 0 self.tracker = Tracker() else: raise Exception("Unknown dataset") self.useLMDB = useLMDB if useLMDB: self.lmdb = LMDBHelper("a", max_size=1028 * 1028 * 1028 * 32) self.lmdb.verbose = False self.optimizer = torch.optim.Adam(self._AE_model.parameters(), lr=learningRate) self.loss_function = torch.nn.BCELoss() def _load(self, index=None): # Load next if index is None if self.datasetName in ActionClassifier.COCO_Datasets: people = None # Load image from disk and process cocoImage = self.coco.loadImgs(self.dataset[index])[0] if self.useLMDB: people = self.lmdb.get("DensePoseWrapper_Sanitized_Coco", str(cocoImage["id"])) if people is None: image = cv2.imread(self.cocoPath + "/" + cocoImage["file_name"]) if image is None: raise Exception("Could not find image: " + str(index)) people = self.denseposeExtractor.extract(image) people = self.sanitizer.extract(people) if self.useLMDB: self.lmdb.save("DensePoseWrapper_Sanitized_Coco", str(cocoImage["id"]), people) return people, cocoImage elif self.datasetName in ActionClassifier.AVA_Datasets: data = None image = None people, frame_time, is_last = None, None, False key = self.dataset[self.current_video_index][0] if self.useLMDB: data = self.lmdb.get( "DensePoseWrapper_Sanitized_AVA", str(key) + "_" + str(self.current_video_frame_index)) if data is None: image, frame_time, is_last = self.youtubeLoader.get( self.current_video_index, self.current_video_frame_index) if image is None: people = [] frame_time = 0 else: people = self.denseposeExtractor.extract(image) people = self.sanitizer.extract(people) if self.useLMDB: # Save processed data self.lmdb.save( "DensePoseWrapper_Sanitized_AVA", str(key) + "_" + str(self.current_video_frame_index), (people, frame_time, is_last)) else: people, frame_time, is_last = data timestamp = np.round(frame_time) ava_annotation = None sameTimestamp = [ v[1] for v in self.dataset[self.current_video_index][1] if v[0] == timestamp ] if len(sameTimestamp) == 1: ava_annotation = sameTimestamp[0] # To show the whole dataset as it's being downloaded if image is not None and True: if ava_annotation is not None: for k, p in ava_annotation.items(): bbox = np.array([ float(p["bbox"][0]), float(p["bbox"][1]), float(p["bbox"][2]), float(p["bbox"][3]) ]) p1 = bbox[:2] * np.array( [image.shape[1], image.shape[0]], dtype=np.float) p2 = bbox[2:] * np.array( [image.shape[1], image.shape[0]], dtype=np.float) image = cv2.rectangle(image, tuple(p1.astype(np.int32)), tuple(p2.astype(np.int32)), (20, 20, 200), 1) cv2.imshow("frame", image) cv2.waitKey(1) # Change increment video and frame if is_last: self.current_video_frame_index = 0 self.current_video_index += 1 if len(self.dataset) == self.current_video_index: self.current_video_index = 0 else: self.current_video_frame_index += 1 return people, frame_time, is_last, ava_annotation def trainAutoEncoder(self, epochs=100, learningRate=0.005, dataset="Coco", useLMDB=True, printUpdateEvery=40, visualize=0, tensorboard=False): self._training = True self._initTraining(learningRate, dataset, useLMDB) # Tensorboard setup if tensorboard or type(tensorboard) == str: from torch.utils.tensorboard import SummaryWriter if type(tensorboard) == str: writer = SummaryWriter(topDir + "/data/tensorboard/" + tensorboard) else: writer = SummaryWriter(topDir + "/data/tensorboard/") tensorboard = True # Start the training process total_iterations = len(self.dataset) visualize_counter = 0 open_windows = set() if self.datasetName in ActionClassifier.COCO_Datasets: print("Starting COCO dataset training") for epoch in range(epochs): epochLoss = np.float64(0) for i in range(total_iterations): people, annotation = self._load(i) S = _tensorify_people(people) if S.shape[0] == 0: continue # Run prediction embedding = self._AE_model.encode(S) out = self._AE_model.decode(embedding) # Optimize lossSize = self.loss_function(out, S) lossSize.backward() self.optimizer.step() self.optimizer.zero_grad() lossSize = lossSize.cpu().item() # Give feedback of training process epochLoss += lossSize / total_iterations visualize_counter += 1 if (i - 1) % printUpdateEvery == 0: print("Iteration {} / {}, epoch {} / {}".format( i, total_iterations, epoch, epochs)) print("Loss size: {}\n".format(lossSize / printUpdateEvery)) if visualize != 0 and visualize <= visualize_counter: visualize_counter = 0 new_open_windows = set() for index, _ in enumerate(S): inpS = (S[index, 0].detach()).cpu().to( torch.float).numpy() outS = (out[index, 0].detach()).cpu().to( torch.float32).numpy() emb = embedding.detach().cpu().numpy() debug_image = self._get_ae_from_embedding( index, inpS, emb, outS, None) cv2.imshow("person " + str(index), debug_image) new_open_windows.add("person " + str(index)) break # Only show one person for window in open_windows.difference( new_open_windows): cv2.destroyWindow(window) open_windows = new_open_windows cv2.waitKey(1) if tensorboard: absI = i + epoch * total_iterations writer.add_scalar("Loss size", lossSize, absI) print("Finished epoch {} / {}. Loss size:".format( epoch, epochs, epochLoss)) self.saveModel(self._modelPath) elif self.datasetName in ActionClassifier.AVA_Datasets: # Unfortunately, needs to run through the whole AVA dataset to determine the size in frames print("Going through ava dataset once to determine the size") total_iterations = 0 for video_i in range(len(self.dataset)): is_last = False while not is_last: people, frame_time, is_last, annotation = self._load( ) # Load next total_iterations += 1 if (total_iterations - 1) % 500 == 0: print("Frame/iteration {} (video {} / {})".format( total_iterations, video_i, len(self.dataset))) print("Total number of iterations are {}".format(total_iterations)) print("Starting AVA dataset training") last_frame_time = None last_people = [] S_next = None current_video = 0 was_last = False for epoch in range(epochs): epochLoss = np.float64(0) for i in range(total_iterations): people, frame_time, is_last, annotation = self._load( ) # Load next current_video += is_last if "predictive" in self.datasetName: # Track the next frame self.tracker.extract(people, time_now=frame_time) if is_last: # If new video next self.tracker = Tracker() last_frame_time = None # Only save the people who exist in all frames old_ids = list(map(lambda p: p.id, last_people)) new_ids = list(map(lambda p: p.id, people)) old_people = list( filter(lambda p: p.id in new_ids, last_people.copy())) new_people = list( filter(lambda p: p.id in old_ids, people.copy())) # Filter old Ss S = _tensorify_people(old_people, True) S_next = _tensorify_people(new_people, False) last_people = people else: frame_time = last_frame_time S = _tensorify_people(people) if S.shape[0] == 0: continue delta_time = 0 if last_frame_time is not None and was_last is False: delta_time = frame_time - last_frame_time last_frame_time = frame_time # Run prediction embedding = self._AE_model.encode(S, delta_time) out = self._AE_model.decode(embedding) # Optimize if "predictive" in self.datasetName: lossSize = self.loss_function(out, S_next) else: lossSize = self.loss_function(out, S) lossSize.backward() self.optimizer.step() self.optimizer.zero_grad() lossSize = lossSize.cpu().item() # Give feedback of training process epochLoss += lossSize / total_iterations visualize_counter += 1 was_last = is_last if (i - 1) % printUpdateEvery == 0: print("Iteration {} / {} (video {}/{}), epoch {} / {}". format(i, total_iterations, current_video, len(self.dataset), epoch, epochs)) print("Loss size: {}\n".format(lossSize / printUpdateEvery)) if visualize != 0 and visualize <= visualize_counter: visualize_counter = 0 new_open_windows = set() for index, _ in enumerate(S): inpS = (S[index, 0].detach()).cpu().to( torch.float).numpy() outS = (out[index, 0].detach()).cpu().to( torch.float32).numpy() emb = embedding.detach().cpu().numpy() debug_image = self._get_ae_from_embedding( index, inpS, emb, outS, S_next) cv2.imshow("person " + str(index), debug_image) new_open_windows.add("person " + str(index)) break # Only show one person for window in open_windows.difference( new_open_windows): cv2.destroyWindow(window) open_windows = new_open_windows cv2.waitKey(1) if tensorboard: absI = i + epoch * total_iterations writer.add_scalar("Loss size", lossSize, absI) print("Finished epoch {} / {}. Loss size:".format( epoch, epochs, epochLoss)) self.saveModel(self._modelPath) def _get_ae_from_embedding(self, index, S, embedding, out, S_next): S = (S * 255).astype(np.uint8) out = (out * 255).astype(np.uint8) emb = ((embedding[index] * 0.5 + 1.0) * 255).astype(np.uint8) emb = np.expand_dims(emb, axis=0) emb = np.repeat(emb, repeats=14, axis=0).T emb = np.repeat(emb, repeats=10, axis=0) emb = np.vstack((emb, np.zeros((56 - 5 * 10, 14), dtype=np.uint8))) comparison = np.hstack((S, emb, out)) if S_next is not None: Sn = (S_next[index, 0].detach() * 255).cpu().to( torch.uint8).numpy() Sn = np.hstack((np.zeros((56, 56 + 14)), Sn)) comparison = np.vstack((comparison, Sn)).astype(np.uint8) return cv2.applyColorMap(comparison, cv2.COLORMAP_JET) def get_ae_debug(self, people): combined = [] for index, person in enumerate(people): embedding = torch.from_numpy(person.pose_vector).to(device) embedding = embedding.reshape((1, embedding.shape[0])) out = self._AE_model.decode(embedding) out = out[0, 0].detach().cpu().to(torch.float32).numpy() emb = embedding.detach().cpu().numpy() image = self._get_ae_from_embedding(0, person.S, emb, out, None) combined.append(image) return combined
def main(): densepose = DensePoseWrapper() sanitizer = Sanitizer() sanitizer.load_model("./models/Sanitizer.pth") tracker = Tracker() uvMapper = UVMapper() descriptionExtractor = DescriptionExtractor() descriptionExtractor.loadModel("./models/DescriptionExtractor.pth") actionClassifier = ActionClassifier() actionClassifier.loadModel("./models/ActionClassifier_AutoEncoder.pth") cam = cv2.VideoCapture(0) frameIndex = 0 frame_time = time.time() oldOpenWindows = set() while True: # Get image from webcam return_value, image = cam.read() assert return_value, "Failed to read from web camera" delta_time = time.time() - frame_time frame_time = time.time() # White balance the image to get better color features image = white_balance(image) debugImage = image.copy() # Send image to DensePose people = densepose.extract(image) debugImage = densepose.renderDebug(debugImage, people) print("DensePose people:", len(people)) # Refine DensePose output to get actual people people = sanitizer.extract(people) debugImage = sanitizer.renderDebug(debugImage, people, alpha=0.2) print("Sanitizer people", len(people)) # Track the people (which modifies the people variables) tracker.extract(people, True) debugImage = tracker.renderDebug(debugImage, people) print("Tracker people", len(people)) # Extract UV map for each person peopleMaps = uvMapper.extract(people, image) peopleTextures = uvMapper.getPeopleTexture(peopleMaps) # Classify what the person is wearing clothes = descriptionExtractor.extract(peopleMaps) clothingImages = descriptionExtractor.getLabelImage() # Get pose embedding actionClassifier.extract_ae(people, delta_time) debugACAE = actionClassifier.get_ae_debug(people) # Per person window management newOpenWindows = set() for i, person in enumerate(people): # Show UV map and label S_ROI = (person.I * (255 / 25)).astype(np.uint8) S_ROI = cv2.applyColorMap(S_ROI, cv2.COLORMAP_PARULA) S_ROI = cv2.resize(S_ROI, (160, 160)) personWindow = cv2.resize(peopleTextures[i], (int(5 / 3 * 160), 160)) coloredSlice = np.zeros((160, 3, 3), dtype=np.uint8) coloredSlice[:, :] = person.color personWindow = np.hstack( (coloredSlice, S_ROI, personWindow, clothingImages[i])) # View window windowName = "UV image " + str(person.id) newOpenWindows.add(windowName) cv2.imshow(windowName, personWindow) cv2.resizeWindow(windowName, 600, 600) # ... and a window for ac ae windowName = "ActionClassifier_AutoEncoder image " + str(person.id) newOpenWindows.add(windowName) cv2.imshow( windowName, cv2.resize( debugACAE[i], (debugACAE[i].shape[1] * 3, debugACAE[i].shape[0] * 3))) for oldWindow in oldOpenWindows: if oldWindow not in newOpenWindows: cv2.destroyWindow(oldWindow) oldOpenWindows = newOpenWindows # Show image print("Show frame:", frameIndex, "\n") cv2.imshow("debug image", debugImage) frameIndex += 1 # Quit on escape if cv2.waitKey(1) == 27: break cv2.destroyAllWindows()
class DescriptionExtractor(DenseSense.algorithms.Algorithm.Algorithm): iteration = 0 availableLabels = { 0: "none", 1: "short sleeve top", 2: "long sleeve top", 3: "short sleeve outwear", 4: "long sleeve outwear", 5: "vest", 6: "sling", 7: "shorts", 8: "trousers", 9: "skirt", 10: "short sleeve dress", 11: "long sleeve dress", 12: "dress vest", 13: "sling dress" } # 0 : none # 1 : trousers # 2 : R hand # 3 : L hand # 4 : R foot # 5 : L foot # 6 : R thigh # 7 : L thigh # 8 : R calf # 9 : L calf # 10 : L upper arm # 11 : R upper arm # 12 : L lower arm # 13 : R lower arm # 14 : head labelColorCheck = { 0: [], 1: [1, 10, 11], 2: [1, 10, 11, 12, 13], 3: [1, 10, 11], 4: [1, 10, 11, 12, 13], 5: [1, 10, 11], 6: [1, 10, 11], 7: [6, 7], 8: [6, 7, 8, 9], 9: [6, 7], 10: [1, 10, 11], 11: [1, 10, 11, 12, 13], 12: [1, 10, 11], 13: [1, 10, 11] } colors = [((255, 255, 255), "white"), ((210, 209, 218), "white"), ((145, 164, 164), "white"), ((169, 144, 135), "white"), ((197, 175, 177), "white"), ((117, 126, 115), "white"), ((124, 126, 129), "white"), ((0, 0, 0), "black"), ((10, 10, 10), "black"), ((1, 6, 9), "black"), ((5, 10, 6), "black"), ((18, 15, 11), "black"), ((18, 22, 9), "black"), ((16, 16, 14), "black"), ((153, 153, 0), "yellow"), ((144, 115, 99), "pink"), ((207, 185, 174), "pink"), ((206, 191, 131), "pink"), ((208, 179, 54), "pink"), ((202, 19, 43), "red"), ((206, 28, 50), "red"), ((82, 30, 26), "red"), ((156, 47, 35), "orange"), ((126, 78, 47), "wine red"), ((74, 72, 77), "green"), ((31, 38, 38), "green"), ((40, 52, 79), "green"), ((100, 82, 116), "green"), ((8, 17, 55), "green"), ((29, 31, 37), "dark green"), ((46, 46, 36), "blue"), ((29, 78, 60), "blue"), ((74, 97, 85), "blue"), ((60, 68, 67), "blue"), ((181, 195, 232), "neon blue"), ((40, 148, 184), "bright blue"), ((210, 40, 69), "orange"), ((66, 61, 52), "gray"), ((154, 120, 147), "gray"), ((124, 100, 86), "gray"), ((46, 55, 46), "gray"), ((119, 117, 122), "gray"), ((88, 62, 62), "brown"), ((60, 29, 17), "brown"), ((153, 50, 204), "purple"), ((77, 69, 30), "purple"), ((153, 91, 14), "violet"), ((207, 185, 151), "beige")] colorsHSV = None class Network(nn.Module): def __init__(self, labels): # FIXME: make this work! super(DescriptionExtractor.Network, self).__init__() self.layer1 = nn.Sequential( # Fixme: 3x15 in channels nn.Conv2d(in_channels=3 * 15, out_channels=15, kernel_size=3, stride=1, padding=1), nn.ReLU(), nn.MaxPool2d(kernel_size=4, stride=2, padding=0)) self.layer2 = nn.Sequential( nn.Conv2d(in_channels=15, out_channels=10, kernel_size=3, stride=1, padding=1), nn.ReLU(), nn.MaxPool2d(kernel_size=4, stride=2, padding=0)) self.fc1 = nn.Linear(360, 180) self.relu1 = nn.ReLU(inplace=False) self.fc2 = nn.Linear(180, labels) def forward(self, x): batchSize = x.shape[0] x = x.view(batchSize, 15 * 3, 32, 32) x = self.layer1(x) x = self.layer2(x) x = x.view(batchSize, -1) x = self.fc1(x) x = self.relu1(x) x = self.fc2(x) return x def __init__(self, model=None, db=None): print("Initiating DescriptionExtractor") super().__init__() self.classifier = DescriptionExtractor.Network( len(self.availableLabels)) self.modelPath = None self._training = False self.predictions = [] self.peopleLabels = [] # Init color lookup KD-tree self.colorsHSV = [] for c in self.colors: RGBobj = sRGBColor(c[0][0], c[0][1], c[0][2]) self.colorsHSV.append(convert_color(RGBobj, HSVColor)) def loadModel(self, modelPath): self.modelPath = modelPath print("Loading DescriptionExtractor file from: " + self.modelPath) self.classifier.load_state_dict( torch.load(self.modelPath, map_location=device)) self.classifier.to(device) def saveModel(self, modelPath): if modelPath is None: print("Don't know where to save model") self.modelPath = modelPath print("Saving DescriptionExtractor model to: " + self.modelPath) torch.save(self.classifier.state_dict(), self.modelPath) def _initTraining(self, learningRate, dataset, useDatabase): # Dataset is DeepFashion2 print("Initiating training of DescriptionExtractor") print("Loading DeepFashion2") from torchvision import transforms from torchvision.datasets import CocoDetection self.annFile = topDir + '/annotations/deepfashion2_{}.json'.format( dataset) self.cocoImgPath = topDir + '/data/DeepFashion2/{}'.format(dataset) self.useDatabase = useDatabase self.dataset = CocoDetection( self.cocoImgPath, self.annFile, transform=transforms.Compose([ transforms.ToTensor(), transforms.Lambda(lambda x: x.permute(1, 2, 0)), transforms.Lambda(lambda x: (x * 255).byte().numpy()), transforms.Lambda(lambda x: x[:, :, ::-1]) ])) # Init LMDB_helper if useDatabase: self.lmdb = LMDBHelper("a") self.lmdb.verbose = False self.denseposeExtractor = DensePoseWrapper() self.sanitizer = Sanitizer() self.sanitizer.load_model(topDir + "/models/Sanitizer.pth") self.uvMapper = UVMapper() # PyTorch things self.optimizer = torch.optim.Adam(self.classifier.parameters(), lr=learningRate, amsgrad=True) self.lossFunction = torch.nn.BCEWithLogitsLoss() def extract(self, peopleMaps): if len(peopleMaps) == 0: return [] self.peopleLabels = [] determineColorThreshold = 0.3 # FIXME: tune # Do label classification peopleMapsDevice = torch.Tensor(peopleMaps).to(device) self.predictions = self.classifier.forward(peopleMapsDevice) self.predictions = self.predictions.sigmoid() self.predictions = self.predictions.detach().cpu().numpy() # Compile predictions into nice dictionary for personIndex, prediction in enumerate(self.predictions): labels = {} # Some labels might use same areas for determining color # This is therefore a lookup table in case value has already been computed averages = np.full((peopleMaps.shape[1], 3), -1, dtype=np.int64) for i, value in enumerate(prediction): if i == 0: # 0 is None, and not trained on anyways continue label = self.availableLabels[i] info = {"activation": value} if determineColorThreshold < value: # If certainty is above threshold, take the time to calculate the average color averageOfAreas = np.zeros(3, dtype=np.int64) relevantAreas = torch.as_tensor( self.labelColorCheck[i], dtype=torch.int64).to(device) nonBlackAreas = 0 for areaIndex in relevantAreas: if (averages[areaIndex] == -1).all(): # Calculate average relevantPixels = peopleMapsDevice[personIndex, areaIndex, :, :] relevantPixels = relevantPixels[ torch.sum(relevantPixels, axis=2) != 0] if relevantPixels.shape[0] == 0: # All black averages[areaIndex] = np.zeros(3) continue average = relevantPixels.mean( axis=0).cpu().numpy().astype(np.uint8) averages[areaIndex] = average nonBlackAreas += 1 averageOfAreas += averages[areaIndex] averageOfAreas = (averageOfAreas / float(nonBlackAreas)).astype(np.uint8) info.update(self._findColorName(averageOfAreas)) labels[label] = info self.peopleLabels.append(labels) return self.peopleLabels def train(self, epochs=100, learningRate=0.005, dataset="Coco", useDatabase=True, printUpdateEvery=40, visualize=False, tensorboard=False): self._training = True self._initTraining(learningRate, dataset, useDatabase) # Deal with tensorboard if tensorboard or type(tensorboard) == str: from torch.utils.tensorboard import SummaryWriter if type(tensorboard) == str: writer = SummaryWriter(topDir + "/data/tensorboard/" + tensorboard) else: writer = SummaryWriter(topDir + "/data/tensorboard/") tensorboard = True def findBestROI(ROIs, label): bestMatch = 0 bestIndex = -1 for i, ROI in enumerate(ROIs): lbox = np.array(label["bbox"]) larea = lbox[2:] - lbox[:2] larea = larea[0] * larea[1] rbox = ROI.bounds rarea = rbox[2:] - rbox[:2] rarea = rarea[0] * rarea[1] SI = np.maximum(0, np.minimum(lbox[2], rbox[2]) - np.maximum(lbox[0], rbox[0])) * \ np.maximum(0, np.minimum(lbox[3], rbox[3]) - np.maximum(lbox[1], rbox[1])) SU = larea + rarea - SI overlap = SI / SU if bestMatch < overlap and SU != 0: bestMatch = overlap bestIndex = i return bestIndex Iterations = len(self.dataset) print("Starting training") for epoch in range(epochs): epochLoss = np.float64(0) for i in range(Iterations): ROIs, peopleTextures, labels = self._load(i) # Figure out what ROI belongs to what label groundtruth = np.zeros((len(ROIs), 14), dtype=np.float32) for label in labels: mostMatching = findBestROI(ROIs, label) if mostMatching != -1: groundtruth[mostMatching][label["category_id"]] = 1 # Most items in this dataset will be bypassed because no people were found or overlapping with gt if len(ROIs) == 0 or not np.any(groundtruth != 0): continue groundtruth = torch.from_numpy(groundtruth).to(device) # Apply noise to peopleTextures noise = np.random.randn(*peopleTextures.shape) * 5 peopleTextures = peopleTextures.astype( np.int32) + noise.astype(np.int32) peopleTextures = np.clip(peopleTextures, 0, 255) peopleTextures = peopleTextures.astype(np.uint8) peopleTextures = torch.Tensor(peopleTextures).to(device) predictions = self.classifier.forward(peopleTextures) print(groundtruth) print(predictions) print("\n") lossSize = self.lossFunction(predictions, groundtruth) lossSize.backward() self.optimizer.step() self.optimizer.zero_grad() lossSize = lossSize.cpu().item() epochLoss += lossSize / Iterations if (i - 1) % printUpdateEvery == 0: print("Iteration {} / {}, epoch {} / {}".format( i, Iterations, epoch, epochs)) print("Loss size: {}\n".format(lossSize / printUpdateEvery)) if tensorboard: absI = i + epoch * Iterations writer.add_scalar("Loss size", lossSize, absI) print("Finished epoch {} / {}. Loss size:".format( epoch, epochs, epochLoss)) self.saveModel(self.modelPath) self._training = False def getLabelImage(self): images = [] for personLabel in self.peopleLabels: # Sort labels by score labels = sorted(list(personLabel.items()), key=lambda x: x[1]["activation"], reverse=True) # Create image image = np.zeros((160, 210, 3)) for i, label in enumerate(labels): name, classification = label text = "{0:4d}% {1}".format( int(classification["activation"] * 100), name) color = (255, 255, 255) if classification[ "activation"] < 0.75: # FIXME: magic number, tune color = (128, 128, 128) image = cv2.putText(image, text, (0, 12 + 12 * i), cv2.FONT_HERSHEY_DUPLEX, .3, color, 1, cv2.LINE_AA) # Add color if "bestMatch" in classification: colorText = classification["bestMatch"][1] colorTextColor = classification["color"] colorTextColor = (int(colorTextColor[0]), int(colorTextColor[1]), int(colorTextColor[2])) image = cv2.putText(image, colorText, (150, 12 + 12 * i), cv2.FONT_HERSHEY_DUPLEX, .3, colorTextColor, 1, cv2.LINE_AA) images.append(image.astype(np.uint8)) return images def _load(self, index): cocoImage = self.dataset[index] ROIs = None if self.useDatabase: ROIs = self.lmdb.get("DensePoseWrapper_Sanitized_deepfashion2", str(cocoImage["id"])) if ROIs is None: ROIs = self.denseposeExtractor.extract(cocoImage[0]) ROIs = self.sanitizer.extract(ROIs) if self.useDatabase: self.lmdb.save("DensePoseWrapper_Sanitized_deepfashion2", str(cocoImage["id"]), ROIs) peopleTextures = None if self.useDatabase: peopleTextures = self.lmdb.get("UVMapper_deepfashion2", str(index)) if peopleTextures is None: peopleTextures = self.uvMapper.extract(ROIs, cocoImage[0]) if self.useDatabase: self.lmdb.save("UVMapper_deepfashion2", str(index), peopleTextures) return ROIs, peopleTextures, cocoImage[1] def _findColorName(self, color): b = color[0] g = color[1] r = color[2] # This prints the color colored in the terminal colorRepr = '\033[{};2;{};{};{}m'.format(38, r, g, b) \ + "rgb("+str(r)+", "+str(g)+", "+str(b)+")"+'\033[0m' # Get nearest color name HSVobj = convert_color(sRGBColor(r, g, b), HSVColor) nearestIndex = -1 diffMin = 100000 for i in range(len(self.colorsHSV)): colEntry = self.colorsHSV[i] d = HSVobj.hsv_h - colEntry.hsv_h dh = min(abs(d), 360 - abs(d)) / 180.0 ds = abs(HSVobj.hsv_s - colEntry.hsv_s) dv = abs(HSVobj.hsv_v - colEntry.hsv_v) / 255.0 diff = np.sqrt(dh * dh + ds * ds + dv * dv) if diff < diffMin: diffMin = diff nearestIndex = i return { "color": tuple(color), "colorDistance": diffMin, "coloredStr": colorRepr, "bestMatch": self.colors[nearestIndex] }
class DescriptionExtractor(DenseSense.algorithms.Algorithm.Algorithm): iteration = 0 availableLabels = { 0: "none", 1: "short sleeve top", 2: "long sleeve top", 3: "short sleeve outwear", 4: "long sleeve outwear", 5: "vest", 6: "sling", 7: "shorts", 8: "trousers", 9: "skirt", 10: "short sleeve dress", 11: "long sleeve dress", 12: "dress vest", 13: "sling dress" } # FIXME: change because now using S labelBodyparts = { # https://github.com/facebookresearch/DensePose/issues/64#issuecomment-405608749 PRAISE "boots": [5, 6], "footwear": [5, 6], "outer": [1, 2, 15, 17, 16, 18, 19, 21, 20, 22], "dress": [1, 2], "sunglasses": [], "pants": [7, 9, 8, 10, 11, 13, 12, 14], "top": [1, 2], "shorts": [7, 9, 8, 10], "skirt": [1, 2], "headwear": [23, 24], "scarfAndTie": [] } colors = [ # TODO: use color model file ((255, 255, 255), "white"), ((210, 209, 218), "white"), ((145, 164, 164), "white"), ((169, 144, 135), "white"), ((197, 175, 177), "white"), ((117, 126, 115), "white"), ((124, 126, 129), "white"), ((0, 0, 0), "black"), ((10, 10, 10), "black"), ((1, 6, 9), "black"), ((5, 10, 6), "black"), ((18, 15, 11), "black"), ((18, 22, 9), "black"), ((16, 16, 14), "black"), ((153, 153, 0), "yellow"), ((144, 115, 99), "pink"), ((207, 185, 174), "pink"), ((206, 191, 131), "pink"), ((208, 179, 54), "pink"), ((202, 19, 43), "red"), ((206, 28, 50), "red"), ((82, 30, 26), "red"), ((156, 47, 35), "orange"), ((126, 78, 47), "wine red"), ((74, 72, 77), "green"), ((31, 38, 38), "green"), ((40, 52, 79), "green"), ((100, 82, 116), "green"), ((8, 17, 55), "green"), ((29, 31, 37), "dark green"), ((46, 46, 36), "blue"), ((29, 78, 60), "blue"), ((74, 97, 85), "blue"), ((60, 68, 67), "blue"), ((181, 195, 232), "neon blue"), ((40, 148, 184), "bright blue"), ((210, 40, 69), "orange"), ((66, 61, 52), "gray"), ((154, 120, 147), "gray"), ((124, 100, 86), "gray"), ((46, 55, 46), "gray"), ((119, 117, 122), "gray"), ((88, 62, 62), "brown"), ((60, 29, 17), "brown"), ((153, 50, 204), "purple"), ((77, 69, 30), "purple"), ((153, 91, 14), "violet"), ((207, 185, 151), "beige") ] colorsHSV = None class Network(nn.Module): def __init__(self, labels): # FIXME: make this work! super(DescriptionExtractor.Network, self).__init__() self.layer1 = nn.Sequential( # Fixme: 3x15 in channels nn.Conv2d(in_channels=3 * 15, out_channels=15, kernel_size=3, stride=1, padding=1), nn.ReLU(), nn.MaxPool2d(kernel_size=4, stride=2, padding=0)) self.layer2 = nn.Sequential( nn.Conv2d(in_channels=15, out_channels=10, kernel_size=3, stride=1, padding=1), nn.ReLU(), nn.MaxPool2d(kernel_size=4, stride=2, padding=0)) self.fc1 = nn.Linear(360, 180) self.relu1 = nn.ReLU(inplace=False) self.fc2 = nn.Linear(180, labels) self.softmax = nn.Softmax() def forward(self, x): batchSize = x.shape[0] x = x.view(batchSize, 15 * 3, 32, 32) x = self.layer1(x) x = self.layer2(x) x = x.view(batchSize, -1) x = self.fc1(x) x = self.relu1(x) x = self.fc2(x) #x = self.softmax(x) return x def __init__(self, model=None, db=None): print("Initiating DescriptionExtractor") super().__init__() self.classifier = DescriptionExtractor.Network( len(self.availableLabels)) self.modelPath = None self._training = False # Init color lookup KD-tree self.colorsHSV = [] for c in self.colors: RGBobj = sRGBColor(c[0][0], c[0][1], c[0][2]) self.colorsHSV.append(convert_color(RGBobj, HSVColor)) def loadModel(self, modelPath): self.modelPath = modelPath print("Loading DescriptionExtractor file from: " + self.modelPath) self.classifier.load_state_dict( torch.load(self.modelPath, map_location=device)) self.classifier.to(device) def saveModel(self, modelPath): if modelPath is None: print("Don't know where to save model") self.modelPath = modelPath print("Saving DescriptionExtractor model to: " + self.modelPath) torch.save(self.classifier.state_dict(), self.modelPath) def _initTraining(self, learningRate, dataset, useDatabase): # Dataset is DeepFashion2 print("Initiating training of DescriptionExtractor") print("Loading DeepFashion2") from torchvision import transforms from torchvision.datasets import CocoDetection self.annFile = './annotations/deepfashion2_{}.json'.format(dataset) self.cocoImgPath = './data/DeepFashion2/{}'.format(dataset) self.useDatabase = useDatabase self.dataset = CocoDetection( self.cocoImgPath, self.annFile, transform=transforms.Compose([ transforms.ToTensor(), transforms.Lambda(lambda x: x.permute(1, 2, 0)), transforms.Lambda(lambda x: (x * 255).byte().numpy()), transforms.Lambda(lambda x: x[:, :, ::-1]) ])) # Init LMDB_helper if useDatabase: self.lmdb = LMDBHelper("a") self.lmdb.verbose = False self.denseposeExtractor = DensePoseWrapper() self.sanitizer = Sanitizer() self.sanitizer.loadModel("./models/Sanitizer.pth") self.uvMapper = UVMapper() # PyTorch things self.optimizer = torch.optim.Adam(self.classifier.parameters(), lr=learningRate, amsgrad=True) self.lossFunction = torch.nn.BCEWithLogitsLoss() def extract(self, peopleMaps): labelsPeople = [] # Do label classification for personMap in peopleMaps: # Run the classification on it pyTorchTexture = torch.from_numpy( np.array([np.moveaxis(personTexture / 255.0, -1, 0)])).float() pyTorchTexture = pyTorchTexture.to(device) # FIXME: Do in model labelVector = self.net(pyTorchTexture)[0] # Store the data labelVectorHost = labelVector.detach().cpu().numpy() labels = {} for j in range(len(labelVector)): label = self.availableLabels.values()[j] d = (self.onActivation - self.noActivation) / 2 val = (labelVectorHost[j] - d) / d + 0.5 info = {"activation": min(max(val, 0.0), 1.0)} if 0.7 < val: color = self._findColorName(personTexture, self.labelBodyparts[label]) if color != 0: info.update(color) # print(color["color"]+" "+color["coloredStr"]) labels[label] = info labelsPeople.append(labels) return labelsPeople def train(self, epochs=100, learningRate=0.005, dataset="Coco", useDatabase=True, printUpdateEvery=40, visualize=False, tensorboard=False): self._training = True self._initTraining(learningRate, dataset, useDatabase) # Deal with tensorboard if tensorboard or type(tensorboard) == str: from torch.utils.tensorboard import SummaryWriter if type(tensorboard) == str: writer = SummaryWriter("./data/tensorboard/" + tensorboard) else: writer = SummaryWriter("./data/tensorboard/") tensorboard = True def findBestROI(ROIs, label): bestMatch = 0 bestIndex = -1 for i, ROI in enumerate(ROIs): lbox = np.array(label["bbox"]) larea = lbox[2:] - lbox[:2] larea = larea[0] * larea[1] rbox = ROI.bounds rarea = rbox[2:] - rbox[:2] rarea = rarea[0] * rarea[1] SI = np.maximum(0, np.minimum(lbox[2], rbox[2]) - np.maximum(lbox[0], rbox[0])) * \ np.maximum(0, np.minimum(lbox[3], rbox[3]) - np.maximum(lbox[1], rbox[1])) SU = larea + rarea - SI overlap = SI / SU if bestMatch < overlap and SU != 0: bestMatch = overlap bestIndex = i return bestIndex Iterations = len(self.dataset) print("Starting training") for epoch in range(epochs): epochLoss = np.float64(0) for i in range(Iterations): ROIs, peopleTextures, labels = self._load(i) # Figure out what ROI belongs to what label groundtruth = np.zeros((len(ROIs), 14), dtype=np.float32) for label in labels: mostMatching = findBestROI(ROIs, label) if mostMatching != -1: groundtruth[mostMatching][label["category_id"]] = 1 # Most items in this dataset will be bypassed because no people were found or overlapping with gt if len(ROIs) == 0 or not np.any(groundtruth != 0): continue groundtruth = torch.from_numpy(groundtruth).to(device) # Apply noise to peopleTextures noise = np.random.randn(*peopleTextures.shape) * 5 b = peopleTextures.astype(np.int32) peopleTextures = peopleTextures.astype( np.int32) + noise.astype(np.int32) peopleTextures = np.clip(peopleTextures, 0, 255) peopleTextures = peopleTextures.astype(np.uint8) peopleTextures = torch.Tensor(peopleTextures).to(device) predictions = self.classifier.forward(peopleTextures) print(groundtruth) print(predictions) print("\n") lossSize = self.lossFunction(predictions, groundtruth) lossSize.backward() self.optimizer.step() self.optimizer.zero_grad() lossSize = lossSize.cpu().item() epochLoss += lossSize / Iterations if (i - 1) % printUpdateEvery == 0: print("Iteration {} / {}, epoch {} / {}".format( i, Iterations, epoch, epochs)) print("Loss size: {}\n".format(lossSize / printUpdateEvery)) if tensorboard: absI = i + epoch * Iterations writer.add_scalar("Loss size", lossSize, absI) # Show visualization if visualize: pass # TODO """ image = self.renderDebug(image) plt.ion() plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) plt.draw() plt.pause(4) """ print("Finished epoch {} / {}. Loss size:".format( epoch, epochs, epochLoss)) self.saveModel(self.modelPath) self._training = False def _load(self, index): cocoImage = self.dataset[index] ROIs = None if self.useDatabase: ROIs = self.lmdb.get(DensePoseWrapper, "deepfashion2" + str(index)) if ROIs is None: ROIs = self.denseposeExtractor.extract(cocoImage[0]) ROIs = self.sanitizer.extract(ROIs) if self.useDatabase: self.lmdb.save(DensePoseWrapper, "deepfashion2" + str(index), ROIs) peopleTextures = None if self.useDatabase: peopleTextures = self.lmdb.get(UVMapper, "deepfashion2" + str(index)) if peopleTextures is None: peopleTextures = self.uvMapper.extract(ROIs, cocoImage[0]) if self.useDatabase: self.lmdb.save(UVMapper, "deepfashion2" + str(index), peopleTextures) return ROIs, peopleTextures, cocoImage[1] def _findColorName(self, personMap, areas): areaS = int(personMap.shape[1] / 5) Rs, Gs, Bs = [], [], [] # Pick out colors for i in areas: xMin = int((i % 5) * areaS) yMin = int(np.floor(i / 5) * areaS) for j in range(20): x = np.random.randint(xMin, xMin + areaS) y = np.random.randint(yMin, yMin + areaS) b = personTexture[x, y, 0] # FIXME g = personTexture[x, y, 1] r = personTexture[x, y, 2] if r != 0 or b != 0 or g != 0: Rs.append(r) Gs.append(g) Bs.append(b) if len(Rs) + len(Gs) + len(Bs) < 3: return 0 # Find mean color r = np.mean(np.array(Rs)).astype(np.uint8) g = np.mean(np.array(Gs)).astype(np.uint8) b = np.mean(np.array(Bs)).astype(np.uint8) # This prints the color colored in the terminal RESET = '\033[0m' def get_color_escape(r, g, b, background=False): return '\033[{};2;{};{};{}m'.format(48 if background else 38, r, g, b) colorRepr = get_color_escape( r, b, g) + "rgb(" + str(r) + ", " + str(g) + ", " + str(b) + ")" + RESET # Get nearest color name HSVobj = convert_color(sRGBColor(r, g, b), HSVColor) nearestIndex = -1 diffMin = 100000 for i in range(len(self.colorsHSV)): colEntry = self.colorsHSV[i] d = HSVobj.hsv_h - colEntry.hsv_h dh = min(abs(d), 360 - abs(d)) / 180.0 ds = abs(HSVobj.hsv_s - colEntry.hsv_s) dv = abs(HSVobj.hsv_v - colEntry.hsv_v) / 255.0 diff = np.sqrt(dh * dh + ds * ds + dv * dv) if diff < diffMin: diffMin = diff nearestIndex = i return { "color": self.colors[nearestIndex][1], "colorDistance": diffMin, "coloredStr": colorRepr }
def main(): # Print args args = parser.parse_args() for arg in vars(args): print("\t", arg, getattr(args, arg)) print("\n") # Determine model path modelPath = args.model if os.path.isdir(modelPath): modelPath = os.path.join(modelPath, args.algorithm + ".pth") alreadyExists = os.path.exists(modelPath) # Determine tensorboard path try: tb = int(args.tensorboard) tb = True if 0 < tb else False except ValueError: tb = args.tensorboard # Potentially delete old tensorboard if os.path.isdir("./data/tensorboard/" + tb): print("Deleting old tensorboard: " + tb) shutil.rmtree("./data/tensorboard/" + tb) if args.algorithm == "DescriptionExtractor": from DenseSense.algorithms.DescriptionExtractor import DescriptionExtractor descriptionExtractor = DescriptionExtractor() if alreadyExists and not args.override: print("Will keep working on existing model") descriptionExtractor.loadModel(modelPath) descriptionExtractor.saveModel(modelPath) dataset = "val" if args.dataset is not None: dataset = args.dataset descriptionExtractor.train(epochs=args.epochs, dataset=dataset, learningRate=args.learningRate, useDatabase=args.lmdb, printUpdateEvery=args.print, visualize=args.visualize, tensorboard=tb) elif args.algorithm == "Sanitizer": from DenseSense.algorithms.Sanitizer import Sanitizer sanitizer = Sanitizer() if alreadyExists and not args.override: print("Will keep working on existing model") sanitizer.load_model(modelPath) sanitizer.save_model(modelPath) dataset = "val2017" if args.dataset is not None: dataset = args.dataset sanitizer.train(epochs=args.epochs, dataset=dataset, learning_rate=args.learningRate, use_database=args.lmdb, print_update_every=args.print, visualize=args.visualize, tensorboard=tb) elif args.algorithm == "ActionClassifier": from DenseSense.algorithms.ActionClassifier import ActionClassifier ac = ActionClassifier() if alreadyExists and not args.override: print("Will keep working on existing model") ac.loadModel(modelPath) ac.saveModel(modelPath) dataset = "val2017" if args.dataset is not None: dataset = args.dataset ac.trainAutoEncoder(epochs=args.epochs, dataset=dataset, learningRate=args.learningRate, useLMDB=args.lmdb, printUpdateEvery=args.print, visualize=args.visualize, tensorboard=tb)
def main(): args = parser.parse_args() for arg in vars(args): print("\t", arg, getattr(args, arg)) print("\n") modelPath = args.model if os.path.isdir(modelPath): modelPath = os.path.join(modelPath, args.algorithm + ".pth") alreadyExists = os.path.exists(modelPath) if args.algorithm == "DescriptionExtractor": from DenseSense.algorithms.DescriptionExtractor import DescriptionExtractor descriptionExtractor = DescriptionExtractor() # FIXME: should be put in a function if alreadyExists and not args.override: print("Will keep working on existing model") descriptionExtractor.loadModel(modelPath) descriptionExtractor.saveModel(modelPath) dataset = "val" if args.dataset is not None: dataset = args.dataset try: tb = int(args.tensorboard) tb = True if 0 < tb else False except ValueError: tb = args.tensorboard descriptionExtractor.train(epochs=args.epochs, dataset=dataset, learningRate=args.learningRate, useDatabase=args.lmdb, printUpdateEvery=args.print, visualize=args.visualize, tensorboard=tb) elif args.algorithm == "Sanitizer": from DenseSense.algorithms.Sanitizer import Sanitizer sanitizer = Sanitizer() if alreadyExists and not args.override: print("Will keep working on existing model") sanitizer.loadModel(modelPath) sanitizer.saveModel(modelPath) dataset = "val2017" if args.dataset is not None: dataset = args.dataset try: tb = int(args.tensorboard) tb = True if 0 < tb else False except ValueError: tb = args.tensorboard sanitizer.train(epochs=args.epochs, dataset=dataset, learningRate=args.learningRate, useDatabase=args.lmdb, printUpdateEvery=args.print, visualize=args.visualize, tensorboard=tb)