def prepare_data_loaders(self): """ Our target is to construct embbeding space. Therefore we use the "test set" as validation """ # data transform trans = Compose([ Resize(cfg.sizes), ToTensor(), Normalize(cfg.mean, cfg.std), ]) self.train_ds = DeepFashionDataset(cfg.root_dir, 'train', transform=trans) self.val_ds = DeepFashionDataset(cfg.root_dir, 'val', transform=trans) # ---------------------------- # Consturct data loader # ---------------------------- loader_kwargs = { 'pin_memory': True, 'batch_size': self.hparams.batch_size, 'num_workers': os.cpu_count(), } self.train_loader = DataLoader(self.train_ds, shuffle=True, **loader_kwargs) self.val_loader = DataLoader(self.val_ds, shuffle=False, **loader_kwargs)
def test_ds_len(): # train data loader train_ds = DeepFashionDataset(deep_fashion_root_dir, 'train') assert len(train_ds) > 0 # Validation loader val_ds = DeepFashionDataset(deep_fashion_root_dir, 'val') assert len(val_ds) > 0 test_ds = DeepFashionDataset(deep_fashion_root_dir, 'test') assert len(test_ds) > 0
def test_siamesize_train_ds_epoch(): n_epochs = 10 dataset = DeepFashionDataset(root_dir, 'val') # make it trainable dataset.train = True siamese_ds = Siamesize(dataset) for epoch in range(n_epochs): for idx1 in range(len(siamese_ds)): idx2, is_similar = siamese_ds._get_idx2_and_target(idx1)
def test_subset_from_ds(): train_ds = DeepFashionDataset(deep_fashion_root_dir, 'train') trans = Compose([Resize((224, 224)), ToTensor()]) n_samples = 20 select_idx = np.random.choice(len(train_ds), n_samples, replace=False) sub_ds = Subset(train_ds, select_idx) assert len(sub_ds) == n_samples assert_dataset(sub_ds)
def prepare_data_loaders(self): """ Our target is to construct embbeding space. Therefore we use the "test set" as validation """ # data transform trans = Compose([ Resize(cfg.sizes), ToTensor(), Normalize(cfg.mean, cfg.std), ]) self.train_ds = DeepFashionDataset(cfg.root_dir, 'train', transform=trans) self.val_ds = DeepFashionDataset(cfg.root_dir, 'val', transform=trans) # --------------------------------------------------- # Returns pairs of images and target same/different # --------------------------------------------------- siamese_train_ds = Siamesize(self.train_ds) # siamese_val_ds = Siamesize(self.val_ds) if self._debug: # For debug use self.train_ds = Subset(self.train_ds, range(5000)) self.val_ds = Subset(self.val_ds, range(1000)) siamese_train_ds = Subset(siamese_train_ds, range(5000)) # siamese_val_ds = Subset(siamese_train_ds, range(1000)) # ---------------------------- # Consturct data loader # ---------------------------- loader_kwargs = { 'pin_memory': True, 'batch_size': self.hparams.batch_size, 'num_workers': os.cpu_count(), } self.siamese_train_loader = DataLoader(siamese_train_ds, **loader_kwargs) self.siamese_val_loader = DataLoader(siamese_train_ds, **loader_kwargs) # Set the lenght for tqdm self.train_loader_len = len(self.siamese_train_loader)
def datasets(self): if self._datasets is None: trans = Compose( [Resize(cfg.sizes), ToTensor(), Normalize(cfg.mean, cfg.std)]) # dataset train_ds = DeepFashionDataset(cfg.root_dir, 'train', transform=trans) val_ds = DeepFashionDataset(cfg.root_dir, 'val', transform=trans) test_ds = DeepFashionDataset(cfg.root_dir, 'test', transform=trans) siam_train_ds = Siamesize(train_ds) siam_val_ds = Siamesize(val_ds) siam_test_ds = Siamesize(test_ds) # Subset if needed if self._debug: train_samples = np.random.choice(len(train_ds), 1000, replace=False) val_samples = np.random.choice(len(val_ds), 100, replace=False) # Subset the datasets train_ds = Subset(train_ds, train_samples) val_ds = Subset(val_ds, val_samples) test_ds = Subset(test_ds, val_samples) siam_train_ds = Subset(siam_train_ds, train_samples) siam_val_ds = Subset(siam_val_ds, val_samples) siam_test_ds = Subset(siam_test_ds, val_samples) # ------------------------------------------------------- # pack them up self._datasets = { "train": train_ds, "val": val_ds, "test": test_ds, "siam_train": siam_train_ds, "siam_val": siam_val_ds, "siam_test": siam_test_ds, } return self._datasets
def test_siamesize_train_ds(): """ One epoch testing """ dataset = DeepFashionDataset(root_dir, 'train') siamese_ds = Siamesize(dataset) targets = list() for idx1 in range(len(siamese_ds)): idx2, is_similar = siamese_ds._get_idx2_and_target(idx1) # store the targets targets.append(is_similar) # empirically check probability of the target getting 0 or 1 prob = np.mean(targets) assert 0.48 <= prob <= 0.52
def test_siamesize_train_ds_basic(): """ Basic testing """ dataset = DeepFashionDataset(root_dir, 'train') siamese_ds = Siamesize(dataset) targets = list() for idx1 in range(5000): idx2, is_similar = siamese_ds._get_idx2_and_target(idx1) assert is_similar in [0, 1] assert idx1 != idx2 # store the targets targets.append(is_similar) # empirically check probability of the target getting 0 or 1 prob = np.mean(targets) assert 0.4 <= prob <= 0.6
def test_siamesize_test_ds(): dataset = DeepFashionDataset(root_dir, 'test') siamese_ds = Siamesize(dataset) assert not siamese_ds.train # first round pairs = list() targets = list() for idx1 in range(len(siamese_ds)): idx2, is_similar = siamese_ds._get_idx2_and_target(idx1) assert is_similar in [0, 1] assert idx1 != idx2 # store the targets pairs.append((idx1, idx2)) targets.append(is_similar) prob = np.mean(targets) assert 0.48 <= prob <= 0.52 # second round for idx1 in range(len(siamese_ds)): idx2, is_similar = siamese_ds._get_idx2_and_target(idx1) assert (idx1, idx2) in pairs
# Dataset from utils.datasets import DeepFashionDataset from torchvision.transforms import Compose from torchvision.transforms import Resize from torchvision.transforms import ToTensor from torchvision.transforms import Normalize from config.deep_fashion import DeepFashionConfig as cfg from torch.utils.data import DataLoader from utils.datasets import Siamesize trans = Compose([ Resize(cfg.sizes), ToTensor(), Normalize(cfg.mean, cfg.std), ]) # dataset train_ds = DeepFashionDataset(cfg.root_dir, 'train', transform=trans) val_ds = DeepFashionDataset(cfg.root_dir, 'val', transform=trans) siamese_train_ds = Siamesize(train_ds) # loader loader_kwargs = { 'pin_memory': True, 'batch_size': 100, 'num_workers': 4, } s_train_loader = DataLoader(siamese_train_ds, **loader_kwargs) train_loader = DataLoader(val_ds, **loader_kwargs) val_loader = DataLoader(val_ds, **loader_kwargs) # Optim import torch.optim as optim optimizer = optim.Adam(clsf_net.parameters(), lr=1e-4)
import matplotlib.pyplot as plt plt.switch_backend('Agg') # take the input args import sys exp_folder = sys.argv[1] print("Experiment result folder:", exp_folder) # Mdoels emb_net = ResidualEmbNetwork() emb_net.load_state_dict(torch.load(join(exp_folder, "_emb_net_20.pth"))) # Dataset trans = Compose([Resize(cfg.sizes), ToTensor(), Normalize(cfg.mean, cfg.std)]) # train_ds = DeepFashionDataset(cfg.root_dir, 'train', transform=trans) val_ds = DeepFashionDataset(cfg.root_dir, 'val', transform=trans) test_ds = DeepFashionDataset(cfg.root_dir, 'test', transform=trans) # Extract embedding vectors load_kwargs = {'batch_size': 128, 'num_workers': os.cpu_count()} test_embs, _ = extract_embeddings(emb_net, DataLoader(test_ds, **load_kwargs)) val_embs, _ = extract_embeddings(emb_net, DataLoader(val_ds, **load_kwargs)) # search tree building search_tree = AnnoyIndex(emb_net.emb_dim, metric='euclidean') for i, vec in enumerate(val_embs): search_tree.add_item(i, vec.cpu().numpy()) search_tree.build(100)
def preprocess(ds_type): ds = DeepFashionDataset(Config.root_dir, ds_type, transform=Config.trans) img_dict = get_img_path_to_tensor_dict(ds) torch.save(test_img_dict, path_join(Config.root_dir, 'img_{}.pkl'.format(ds_type)))
from torchvision.transforms import ToTensor from torchvision.transforms import Compose from torchvision.transforms import Resize from torchvision.transforms import Normalize from torch.utils.data import DataLoader import torch from tqdm import tqdm if __name__ == "__main__": deep_fashion_root_dir = "./deepfashion_data" trans = Compose([ Resize((224, 224)), ToTensor(), # Normalize([0.7464, 0.7155, 0.7043], [0.2606, 0.2716, 0.2744]), # For check against ]) train_ds = DeepFashionDataset(deep_fashion_root_dir, 'train', transform=trans) loader = DataLoader(train_ds, batch_size=200, num_workers=2) scalar = StandardScaler() for imgs, _ in tqdm(loader): scalar.partial_fit(imgs) print("--------------------") print(scalar._mean) print(scalar._var) print(scalar._std) print("--------------------")
def test_get_img_from_ds(): train_ds = DeepFashionDataset(deep_fashion_root_dir, 'train') img, cat = train_ds.get_img_cat(1) assert cat in catagories assert np.any(np.array(img))
def test_get_item_from_ds(): train_ds = DeepFashionDataset(deep_fashion_root_dir, 'train') assert_dataset(train_ds)