def __init__(self, root, segs_root, split='train'): with open('data/' + split + '.pkl', 'rb') as f: self.data = pickle.load(f) self.root_dir = root self.mat_dir = segs_root + '/' + split + '_mat' # e.g.'/home/mi/RelationalReasoning/CLEVR_seg/images/train_mat_full_single' self.apps_dir = segs_root + '/' + split + '_apps_single' # e.g.'/home/mi/RelationalReasoning/CLEVR_seg/images/train_apps_single' self.data self.transform0 = transforms.Compose([ Scale([128, 128]), transforms.Pad(4), transforms.RandomCrop([128, 128]), transforms.ToTensor(), ]) self.transform1 = transforms.Compose([ Scale([128, 128]), transforms.Pad(4), transforms.RandomCrop([128, 128]), # transforms.ToTensor(), # transforms.Normalize(mean=[0.5, 0.5, 0.5], # std=[0.5, 0.5, 0.5]) ]) self.transform2 = transforms.Compose([ transforms.ToTensor(), ]) self.if_aug = (split == 'train') self.transform_app = transforms.Compose([ # Scale([128, 128]), transforms.ToTensor(), ]) self.split = split
def __init__(self, root, split='train'): with open(os.path.join(root, split + '.pkl'), 'rb') as f: self.data = pickle.load(f) self.root_dir = root self.img_dir = os.path.join(root, 'smallnorb_' + split) self.transform = transforms.Compose([ # transforms.Scale(48, 48), Scale([48, 48]), transforms.CenterCrop([32, 32]), transforms.ToTensor() ]) self.transform_aug = transforms.Compose([ # transforms.Scale(48, 48), Scale([48, 48]), transforms.Pad(1), transforms.CenterCrop([32, 32]), transforms.RandomRotation(5), transforms.ColorJitter(brightness=0.1, contrast=0.1), transforms.ToTensor() ]) self.if_aug = (split == 'train') self.split = split
def _init_transform(self): if self.thetas and not self.scales: return Rotate(self.theta_range_1, self.theta_range_2) if self.scales and not self.thetas: return Scale(self.scale_range_1, self.scale_range_2) if self.scales and self.thetas: return ScaleRotate(self.scale_range_1, self.scale_range_2, self.theta_range_1, self.theta_range_2) else: return transforms.ToTensor()
def __init__(self, g_conv_dim, z_size, d_conv_dim): super(Net, self).__init__() self.z_size = z_size self.generator = Generator(z_size, g_conv_dim) self.discriminator = Discriminator(d_conv_dim) self.criterion = nn.BCEWithLogitsLoss() self.scale = Scale() self.samples = []
def test_augment(img, mask=None, model='scale'): if model == 'scale': return DualCompose([ Scale(size=128), ImageOnly( Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))) ])(img, mask=None) else: return DualCompose([ Centerpad(size=(128, 128)), ImageOnly( Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))) ])(img, mask=None)
def train_augment(img,mask,prob=0.5): return DualCompose([HorizontalFlip(prob=0.5),ShiftScale(limit=4,prob=0.5), OneOf([#ImageOnly(CLAHE(clipLimit=2.0, tileGridSize=(8, 8))), ImageOnly(Brightness_shift(limit=0.1)), ImageOnly(do_Gamma(limit=0.08)), ImageOnly(Brightness_multiply(limit=0.08)), ],prob=0.5), ImageOnly(Median_blur( ksize=3, prob=.15)), Scale(size=pad), #Centerpad(size=(pad,pad)), ImageOnly(Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))) #OneOf([Scale(size=128),Randompadding(size=(128,128))],prob=1), #RandomErasing(probability = 0.22, sl = 0.02, sh = 0.2, r1 = 0.2, mean=[0.4914, 0.4822, 0.4465]) ])(img,mask)
def __init__(self, root=""): self.detected_file = os.path.join(root, self.detected_file_name) self.annotations = get_classes(os.path.join(root, self.label_file_name)) self._root_dir = root self.item_dict = { ItemEnum.IMAGE_PATH: [], ItemEnum.BOX_COORDS: [], ItemEnum.LABEL: [] } self.load_label_file() self.transform = Compose([ Scale(0.1), Rotate(), ])
def view_detection_dataset(loader, annotations): transform = Compose([ Scale(0.2), Rotate() ]) for img, bbox, label in tqdm(loader): img = img.permute(1, 2, 0).cpu().numpy() bbox = bbox.cpu().numpy() label = label.cpu().numpy() img, bbox = transform(img, bbox) for i in range(bbox.shape[0]): img = cv2.rectangle(img, (int(bbox[i, 0]), int( bbox[i, 1])), (int(bbox[i, 2]), int(bbox[i, 3])), (255, 0, 0), 2) print(bbox.shape) cv2.putText(img, get_class_name_from_id(annotations, label[i]), ( int(bbox[i, 0]), int(bbox[i, 1]-20)), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (100, 255, 100), 2) plt.figure(dpi=120) plt.imshow(img, cmap="jet") plt.waitforbuttonpress(0) plt.close()
id = int(imgfile.rsplit('_', 1)[1][:-4]) img = torch.from_numpy(self.img[id]) if self.split == "train": c_mask = torch.from_numpy(self.constraints[index]) else: c_mask = None return img, question, len(question), answer, family, c_mask def __len__(self): return len(self.data) transform = transforms.Compose([ Scale([224, 224]), transforms.Pad(4), transforms.RandomCrop([224, 224]), transforms.ToTensor(), transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) ]) def collate_data(batch): images, lengths, answers, families, c_masks = [], [], [], [], [] batch_size = len(batch) max_len = max(map(lambda x: len(x[1]), batch)) questions = np.zeros((batch_size, max_len), dtype=np.int64) sort_by_len = sorted(batch, key=lambda x: len(x[1]), reverse=True)
def train(*, data_folder='data', nb=None, lr=1e-3, model_name='VAE_CPPN', batch_size=32, epochs=1000, input_dim=1, depth=8, max_len=500, log_interval=50, latent_size=10, ensemble_dim=1, cuda=False): mod = getattr(model, model_name) vae = mod( latent_size=latent_size, output_dim=max_len, ensemble_dim=ensemble_dim, depth=depth, ) if cuda: vae = vae.cuda() optimizer = optim.Adam( vae.parameters(), lr=lr, ) vae.train() epoch_start = 1 transform = Compose([ PadTrim(max_len=max_len), Scale(), ]) if nb: nb = int(nb) dataset = Dataset(data_folder, transform=transform, nb=nb) print(len(dataset)) dataloader = DataLoader(dataset, batch_size=batch_size) nb_iter = 0 for epoch in range(epoch_start, epochs + 1): for batch_index, data in enumerate(dataloader): x = data x = x.cuda() if cuda else x vae.zero_grad() xrec, mu, logvar = vae(x) loss = vae.loss_function(x, xrec, mu, logvar) loss.backward() optimizer.step() if nb_iter % log_interval == 0: print(f'niter: {nb_iter:05d} loss: {loss.item():.4f}') x = x.detach().cpu().numpy() xrec = xrec.detach().cpu().numpy() signal = x[0:3, 0].T fake_signal = xrec[0:3, 0].T for i in range(len(xrec)): s = xrec[i, 0] wavfile.write(f'out/fake_{i:03d}.wav', 16000, s) for i in range(len(x)): s = x[i, 0] wavfile.write(f'out/real_{i:03d}.wav', 16000, s) fig = plt.figure(figsize=(50, 10)) plt.plot(signal, color='blue', label='true') plt.plot(fake_signal, color='orange', label='fake') #plt.legend() plt.savefig('out.png') plt.close(fig) nb_iter += 1
import os import sys import json import pickle as pkl import constraints from typing import List, Type, Dict from transforms import Scale import torch import h5py from PIL import Image from tqdm import tqdm from torchvision import transforms transform = transforms.Compose([Scale([224, 224]), transforms.ToTensor()]) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') def parse_scene_into_dict(scene_file_path: str, split: str = "train") -> Dict[str, Dict]: scene_dict = {} with open(scene_file_path) as fin: scene_data = json.load(fin) for scene in scene_data["scenes"]: _img_fname = "CLEVR_{}_{}.png".format( split,
def train(*, data_folder='data', nb=None, lr=1e-04, weight_decay=1e-04, beta1=0.5, beta2=.999, batch_size=32, epochs=1000, input_dim=1, max_len=500, log_interval=50, cppn=True, cuda=False): # define the optimizers. if cppn: output_dim = 1 else: output_dim = max_len generator = Generator(input_dim=input_dim, output_dim=output_dim) discriminator = Discriminator(input_dim=1, output_dim=1, input_size=max_len) if cuda: discriminator = discriminator.cuda() generator = generator.cuda() generator_optimizer = optim.Adam(generator.parameters(), lr=lr, betas=(beta1, beta2), weight_decay=weight_decay) discriminator_optimizer = optim.Adam(discriminator.parameters(), lr=lr, betas=(beta1, beta2), weight_decay=weight_decay) # prepare the model and statistics. generator.train() discriminator.train() epoch_start = 1 transform = Compose([ PadTrim(max_len=max_len), Scale(), ]) if nb: nb = int(nb) dataset = Dataset(data_folder, transform=transform, nb=nb) print(len(dataset)) dataloader = DataLoader(dataset, batch_size=batch_size) nb_iter = 0 for epoch in range(epoch_start, epochs + 1): for batch_index, data in enumerate(dataloader): for p in discriminator.parameters(): p.data.clamp_(-0.1, 0.1) x = data x = x.cuda() if cuda else x discriminator.zero_grad() dreal = discriminator(x).mean() l = torch.randn(batch_size, generator.input_dim - 1) t = torch.linspace(-1, 1, discriminator.input_size) if cppn: z = torch.zeros(batch_size, discriminator.input_size, generator.input_dim) if cuda: z = z.cuda() z[:, :, 1:] = l.view(l.size(0), 1, l.size(1)).expand(l.size(0), z.size(1), l.size(1)) z[:, :, 0] = t z = z.contiguous() z_ = z.view(z.size(0) * z.size(1), -1) z_ = z_.contiguous() xfake = generator(z_) xfake = xfake.view(z.size(0), 1, z.size(1)) else: z = torch.randn(batch_size, generator.input_dim) if cuda: z = z.cuda() xfake = generator(z) xfake = xfake.view(xfake.size(0), 1, xfake.size(1)) if nb_iter % 2 == 0: dfake = discriminator(xfake).mean() discr_loss = dfake - dreal discr_loss.backward(retain_graph=True) discriminator_optimizer.step() generator.zero_grad() dfake = discriminator(xfake).mean() gen_loss = -dfake gen_loss.backward() generator_optimizer.step() if nb_iter % log_interval == 0: print( f'niter: {nb_iter:05d} gen_loss: {gen_loss.item():.4f} discr_loss: {discr_loss.item():.4f}' ) x = x.detach().cpu().numpy() xfake = xfake.detach().cpu().numpy() signal = x[0:3, 0].T fake_signal = xfake[0:3, 0].T for i in range(len(xfake)): s = xfake[i, 0] wavfile.write(f'out/fake_{i:03d}.wav', 16000, s) for i in range(len(x)): s = x[i, 0] wavfile.write(f'out/real_{i:03d}.wav', 16000, s) fig = plt.figure(figsize=(50, 10)) plt.plot(signal, color='blue', label='true') plt.plot(fake_signal, color='orange', label='fake') #plt.legend() plt.savefig('out.png') plt.close(fig) nb_iter += 1
def __getitem__(self, index): imgfile, question, answer, family = self.data[index] # print (imgfile) dir_path = os.path.join(self.apps_dir, imgfile[0:len(imgfile) - 4]) mat_path = os.path.join(self.mat_dir, imgfile[0:len(imgfile) - 4] + '.mat') mat = sio.loadmat(mat_path) num_layers = mat['num_layers'] - 1 # no background # print (int(num_layers)) # img = Image.open(os.path.join(self.root, 'images', self.split, imgfile)).convert('RGB') masks = np.zeros((MX_N, 32, 32)) masks_part = mat['masks'] masks[:int(num_layers)] = masks_part[1:] # print (masks_part) apps = np.zeros((MX_N, 128, 128, 3), dtype=np.uint8) for l in range(1, int(num_layers) + 1): app_path = os.path.join( dir_path, imgfile[0:len(imgfile) - 4] + '_' + str(l) + '.png') app_img = imread(app_path) # app_img = Image.open(app_path).convert('RGB') # app_img = self.transform_app(app_img) # apps.append(app_img) app_img = imresize(app_img, [128, 128]) apps[l - 1] = app_img[:, :, 0:3] # print (app_path) ## imshow # import matplotlib.pyplot as plt # fig = plt.figure() # ax = fig.add_subplot(111) # ax.imshow(app_img) # plt.show() # print (apps) coors = np.zeros((MX_N, 2)) coors_part = mat['coors'] coors[:int(num_layers)] = coors_part[1:int(num_layers) + 1] # sizes = np.zeros((MX_N)) # sizes_part = mat['sizes'] # sizes[:int(num_layers)] = sizes_part[:int(num_layers)] # apps_tensor = torch.FloatTensor(apps.asdtype(float)) # print (img) if self.if_aug: # img = self.transform1(img) # angle = random.random()*2.8648*2 - 2.8648 # -0.05-0.05 # img = img.rotate(angle, resample=Image.BILINEAR) ## print (img) # img = self.transform2(img) apps_tensor = [] for l in range(int(num_layers)): # print (apps[l], 'apps[l]') transform_tmp = transforms.Compose([ transforms.ToPILImage(), Scale([32, 32]), transforms.Pad(1), transforms.RandomCrop([32, 32]), ]) apps_tmp = transform_tmp(apps[l]) angle = random.random() * 2.8648 * 2 - 2.8648 # -0.05-0.05 apps_tmp = apps_tmp.rotate(angle, resample=Image.BILINEAR) # print (apps_tmp, 'apps_tmp') # PIL.Image.Image image mode=RGB size=128x128 apps_tmp = self.transform2(apps_tmp) # print (apps_tmp) apps_tensor.append(apps_tmp) apps_tensor = torch.stack(apps_tensor) else: # img = self.transform0(img) apps_tensor = [] for l in range(int(num_layers)): # print (apps[l], 'apps[l]') transform_tmp = transforms.Compose([ transforms.ToPILImage(), Scale([32, 32]), transforms.ToTensor(), ]) apps_tmp = transform_tmp(apps[l]) apps_tensor.append(apps_tmp) apps_tensor = torch.stack(apps_tensor) # print (apps_tensor) apps_tensor_pad = torch.FloatTensor(np.zeros((MX_N, 3, 32, 32))) apps_tensor_pad[:int(num_layers)].copy_(apps_tensor) # print (apps_tensor_pad) # 23*3*32*32 # print (img) # 3*128*128 coors = torch.FloatTensor(coors) masks_tensor_pad = torch.FloatTensor(masks) # sizes = torch.FloatTensor(sizes) return apps_tensor_pad, masks_tensor_pad, int( num_layers), question, len(question), answer, family
def valid_augment(img,mask): return DualCompose([Scale(size=pad),ImageOnly(Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)))])(img, mask)
class Dataset: def __init__(self, folder, transform=None, nb=None): self.folder = folder self.classes = os.listdir(folder) self.filenames = glob(os.path.join(folder, '**', '*.wav')) if nb: self.filenames = self.filenames[0:nb] self.transform = transform def __getitem__(self, idx): sample_rate, signal = wavfile.read(self.filenames[idx]) signal = signal.copy() signal = torch.from_numpy(signal) signal = signal.view(1, -1) if self.transform: signal = self.transform(signal) return signal def __len__(self): return len(self.filenames) if __name__ == '__main__': transform = Compose([ Scale(), PadTrim(max_len=16000), ]) dataset = Dataset('data', transform=transform) print(dataset[0].size()) dataloader = DataLoader(dataset, batch_size=32)