def __getitem__(self, idx): file_name, start_frame, back_address, label_path = self.file_list[idx] imgs = [] idxes = [] for i in range(self.interval + 1): if self.reverse: frame_num = (start_frame - i + 2 * self.interval) ###backward else: frame_num = (start_frame + i) ### forward img = default_loader( os.path.join(file_name + "{:06d}".format(frame_num) + back_address)) img = self.img_transform(img) imgs.append((img)) idxes.append(frame_num) if (self.mode != 'test') and (self.mode != 'video'): label = default_loader( os.path.join(self.data_path + "gtFine", self.mode, label_path.split("_")[0], label_path)) #label = self.label_transform(label) #print(label.shape) label = self.encode_segmap(torch.tensor(np.array(label)[:, :, 0])) if self.bi_direction: return imgs, label, idxes return imgs, label else: return imgs, label_path
def __getitem__(self, idx): file_name, start_frame, back_address, label_path = self.file_list[idx] imgs = [] idxes = [] for i in range(self.interval+1): if self.reverse: frame_num = (start_frame-i+2*self.interval) ###backward else: frame_num = (start_frame+i) ### forward img = default_loader( os.path.join (self.data_path+self.train_prefix+file_name.split("_")[0], file_name + "{:05d}".format(frame_num) + back_address)) img = np.array(img).astype(np.float32) img = img/255.0 img = img - np.array([0.41189489566336, 0.4251328133025, 0.4326707089857]) img = img / np.array([0.27413549931506, 0.28506257482912, 0.28284674400252]) img = np.ascontiguousarray(img[ :, :, :], dtype=np.float32).transpose(2,0,1) img = torch.tensor(img) imgs.append((img)) idxes.append(frame_num) if self.mode != 'test': label = default_loader(os.path.join (self.data_path+self.eval_prefix, label_path) ) label = self.encode_segmap(torch.tensor(np.array(label)[:,:,0])) if self.bi_direction: return imgs, label, idxes return imgs, label else: return imgs, label_path
def __getitem__(self, idx): image = self.img_transform(default_loader(self.dataset[idx][0])) heatmap = self.heatmap_transform(default_loader(self.dataset[idx][1])) sample = (image, heatmap) return sample
def __getitem__(self, idx): fn = self.imgs[idx] lbls = self.lbls[idx] if self.is_train: imgs = default_loader(self.dst_path + '/train/' + fn) else: imgs = default_loader(self.dst_path + '/test/' + fn) imgs = self.transform(imgs) lbls = torch.Tensor(lbls) return [imgs, lbls]
def classify(self, image: ImageType, text: str, image_tensor = None, zero_image=False, zero_text=False): """Classifies a given image and text in it into Hateful/Non-Hateful. Image can be a url or a local path or you can directly pass a PIL.Image.Image object. Text needs to be a sentence containing all text in the image. Args: image (ImageType): Image to be classified text (str): Text in the image zero_image: zero out the image features when classifying zero_text: zero out the text features when classifying return_type: either "prob" or "logits" Returns: {"label": 0, "confidence": 0.56} """ sample = Sample() if image_tensor != None: sample.image = image_tensor else: if isinstance(image, str): if image.startswith("http"): temp_file = tempfile.NamedTemporaryFile() download(image, *os.path.split(temp_file.name), disable_tqdm=True) image = tv_helpers.default_loader(temp_file.name) temp_file.close() else: image = tv_helpers.default_loader(image) image = self.processor_dict["image_processor"](image) sample.image = image text = self.processor_dict["text_processor"]({"text": text}) sample.text = text["text"] if "input_ids" in text: sample.update(text) sample_list = SampleList([sample]) device = next(self.model.parameters()).device sample_list = sample_list.to(device) output = self.model(sample_list, zero_image=zero_image, zero_text=zero_text) scores = nn.functional.softmax(output["scores"], dim=1) if image_tensor != None: return scores confidence, label = torch.max(scores, dim=1) return {"label": label.item(), "confidence": confidence.item()}
def __getitem__(self, index): siamese_target = np.random.randint(0, 2) img1, label1 = self.data[index], self.labels[index].item() # flag1, softlabel1 = self.flag[index], self.soft_label[index] if siamese_target == 1: siamese_index = index while siamese_index == index: siamese_index = np.random.choice(self.label_to_indices[label1]) else: siamese_label = np.random.choice( list(self.labels_set - set([label1]))) siamese_index = np.random.choice( self.label_to_indices[siamese_label]) img2, label2 = self.data[siamese_index], self.labels[ siamese_index].item() # flag2, softlabel2 = self.flag[siamese_index], self.soft_label[siamese_index] img1 = default_loader(img1) img2 = default_loader(img2) if self.transform is not None: img1 = self.transform(img1) img2 = self.transform(img2) # return (img1, img2), siamese_target, (int(label1), int(label2)), (flag1, flag2), (softlabel1, softlabel2) if self.class_name[index][:4] == self.class_name[siamese_index][:4] \ and self.class_name[index][-4:] == self.class_name[siamese_index][-4:]: vf_labels11_12 = 1 vf_labels11_21 = 1 vf_labels22_12 = 1 vf_labels22_21 = 1 vf_labels12_21 = 1 elif self.class_name[index][:4] == self.class_name[siamese_index][:4] \ and self.class_name[index][-4:] != self.class_name[siamese_index][-4:]: vf_labels11_12 = 0 vf_labels11_21 = 1 vf_labels22_12 = 1 vf_labels22_21 = 0 vf_labels12_21 = 0 elif self.class_name[index][:4] != self.class_name[siamese_index][:4] \ and self.class_name[index][-4:] == self.class_name[siamese_index][-4:]: vf_labels11_12 = 1 vf_labels11_21 = 0 vf_labels22_12 = 0 vf_labels22_21 = 1 vf_labels12_21 = 0 else: vf_labels11_12 = 0 vf_labels11_21 = 0 vf_labels22_12 = 0 vf_labels22_21 = 0 vf_labels12_21 = 0 return (img1, img2), \ (siamese_target, vf_labels11_12, vf_labels11_21, vf_labels22_12, vf_labels22_21, vf_labels12_21), \ (int(label1), int(label2))
def __init__(self): train_transform = transforms.Compose([ transforms.Resize((384, 128), interpolation=3), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), RandomErasing(probability=0.5, mean=[0.0, 0.0, 0.0]) ]) test_transform = transforms.Compose([ transforms.Resize((384, 128), interpolation=3), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) self.trainset = Market1501(train_transform, 'train', opt.data_path) self.testset = Market1501(test_transform, 'test', opt.data_path) self.queryset = Market1501(test_transform, 'query', opt.data_path) self.train_loader = dataloader.DataLoader( self.trainset, sampler=RandomSampler(self.trainset, batch_id=opt.batchid, batch_image=opt.batchimage), batch_size=opt.batchid * opt.batchimage, num_workers=8, pin_memory=True) self.test_loader = dataloader.DataLoader(self.testset, batch_size=opt.batchtest, num_workers=8, pin_memory=True) self.query_loader = dataloader.DataLoader(self.queryset, batch_size=opt.batchtest, num_workers=8, pin_memory=True) if opt.mode == 'vis': self.query_image = test_transform( default_loader( os.path.join(self.queryset.data_path, opt.query_image))) if opt.mode == 'compare': self.compare_img_a = test_transform( default_loader(opt.compare_img_a)) self.compare_img_b = test_transform( default_loader(opt.compare_img_b)) self.query_image = test_transform( default_loader("cache/query.jpg")) self.compare_img_b = test_transform( default_loader(opt.compare_img_b))
def __getitem__(self, idx): img_path = os.path.join(self.img_dir, self.rgb_img_seq[idx]) rgb_path = os.path.join(self.rgb_dir, self.rgb_img_seq[idx]) img_Image = default_loader(img_path) rgb_Image = default_loader(rgb_path) if self.transform: img_Image = self.transform(img_Image) rgb_Image = self.transform(rgb_Image) return rgb_Image, img_Image, int( self.rgb_img_seq[idx].split('_')[-1][:-4]), str( self.rgb_img_seq[idx][:-4])
def __getitem__(self, idx): if self.preloaded: img1 = self.images[self.pairs[idx][0]] img2 = self.images[self.pairs[idx][1]] else: img1 = default_loader(self.pairs[idx][0]) img2 = default_loader(self.pairs[idx][1]) if self.transform: img1 = self.transform(img1) img2 = self.transform(img2) return [img1, img2], self.issame[idx]
def __getitem__(self, index): img_name = self.img_names[index] mask_name = self.mask_names[index] img = default_loader(os.path.join(self.dir_images, img_name)) mask = default_loader(os.path.join(self.dir_masks, mask_name)) assert img.size == mask.size if not self.transform: return img, mask img, mask = self.apply_transform(img, mask) return img, mask
def get_raw_image(self, index, bbox=False): vid_id, act_id, frame_id = self.frames[index] participant_id = vid_id.split('_')[0] img_path = os.path.join(self.img_root, participant_id, vid_id, f'frame_{frame_id:010d}.jpg') # To use high resolution images, the videos need to be downloaded first img = default_loader( img_path) # this loads a smaller version of the image if bbox: img_bboxes = [] objects = self.objects[vid_id][frame_id] orig_w, orig_h = self.video_info[vid_id]['res'] img_w, img_h = img.size for obj in objects: for t, l, h, w in obj['bbox']: h_scale = img_h / orig_h w_scale = img_w / orig_w t *= h_scale h *= h_scale l *= w_scale w *= w_scale if h < 10 or w < 10: continue # too thin or narrow? do not add bbox bbox = [int(l), int(t), int(l + w), int(t + h)] img_bboxes.append(img.crop(bbox)) return img, img_bboxes return img
def __getitem__(self, idx): img = default_loader(self.samples[idx]) label = self.img_label[idx] # The index_channel is used to shuffle channels of the original image index_channel = [[0, 1, 2], [0, 2, 1], [1, 0, 2], [1, 2, 0], [2, 0, 1], [2, 1, 0]] p_index1 = 0.9 if np.random.random() < p_index1: index1 = 0 else: index1 = np.random.randint(self.domain_num) index2 = np.random.randint(self.domain_num) while index2 == index1: index2 = np.random.randint(self.domain_num) img_3channel = img.split() img1 = Image.merge('RGB', (img_3channel[index_channel[index1][0]], img_3channel[index_channel[index1][1]], img_3channel[index_channel[index1][2]])) img2 = Image.merge('RGB', (img_3channel[index_channel[index2][0]], img_3channel[index_channel[index2][1]], img_3channel[index_channel[index2][2]])) if self.transform is not None: img1 = self.transform(img1) if self.transform is not None: img2 = self.transform(img2) label1 = self.class_num * index1 + label label2 = self.class_num * index2 + label # The below operation can produce data with more diversity if np.random.randint(2) == 0: return img1, img2, label1, label2 else: return img2, img1, label2, label1
def img_loader(args): test_transform = transforms.Compose([ Resize((args.height, args.width)), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) dataloaderX = defaultdict(list) # get path data_path = args.datadir imgs_path = [] for i in os.walk(data_path): for j in i[2]: if os.path.splitext(j)[-1] == '.jpg' or os.path.splitext(j)[-1] == '.png': imgs_path.append(os.path.join(i[0], j)) # img process imgs = [] for i in imgs_path: img = default_loader(i) imgs.append(test_transform(img).unsqueeze_(0)) # dataloaderX for num, path in enumerate(imgs_path): # the element is : {class1: [img ,label], class2: [img ,label]} dataloaderX[path.split('/')[-2]].append([imgs[num],torch.tensor([int(path.split('/')[-1].split('_')[0])])]) print('[INFO]Total {} pairs of img...'.format(len(dataloaderX))) return dataloaderX
def __getimgs_bylabel__(self, label, img_num): if len(self.label_to_indices[label]) >= img_num: index = np.random.choice(self.label_to_indices[label], size=img_num, replace=False) else: index1 = np.random.choice(self.label_to_indices[label], size=len(self.label_to_indices[label]), replace=False) index2 = np.random.choice(self.label_to_indices[label], size=img_num - len(self.label_to_indices[label]), replace=True) index = np.concatenate((index1, index2)) for i in range(img_num): img_temp = (self.data[index[i]]) label_temp = (self.labels[index[i]]) if type(label_temp) not in (tuple, list): label_temp = (label_temp, ) label_temp = torch.LongTensor(label_temp) img_temp = default_loader(img_temp) if self.transform is not None: img_temp = self.transform(img_temp) img_temp = img_temp.unsqueeze(0) if i == 0: img = img_temp label = label_temp else: img = torch.cat((img, img_temp), 0) label = torch.cat((label, label_temp), 0) return img, label
def __getitem__(self, index: Tuple[Tuple[int, int], int]): """ Args: index (int): Index Returns: tuple: (sample, target) where target is class_index of the target class. """ (img_idx, align_idx), audio_idx = index path, target = self._samples[img_idx] vid = default_loader(f"{self.root}/{path}") if self.should_align_faces: relative_bb = self.relative_bbs[align_idx] vid = self.align_face(vid, relative_bb) if self.transform is not None: vid = self.transform(vid) if self.target_transform is not None: target = self.target_transform(target) if self.should_sample_audio: if self.audio_mode == AudioMode.FAKE_NOISE_DIFFERENT_VIDEO or ( self.audio_mode == AudioMode.MANIPULATION_METHOD_DIFFERENT_VIDEO and target == 4 # MANIPULATION_METHOD_DIFFERENT_VIDEO means we select different audio for manipulation vidoes ): aud_path, _ = self._samples[img_idx] else: aud_path, _ = self._samples[audio_idx] aud = self.audio_file_list(aud_path, stacked=True) aud: np.ndarray # this adds gaussian noise to audio input if it's supposed to be fake input if ( self.audio_mode == AudioMode.FAKE_NOISE_DIFFERENT_VIDEO and audio_idx != img_idx ) or (self.audio_mode == AudioMode.FAKE_NOISE and target != 4): aud += np.random.normal(0, 1, aud.shape).astype(aud.dtype) sample = vid, aud # we have to do this because noisynets use the audio label for classification if self.audio_mode == AudioMode.MANIPULATION_METHOD_DIFFERENT_VIDEO: if target == 4: audio_idx = img_idx else: audio_idx = ( -1 ) # this is nessecary for the case of wanting exact audio, # but using audio targets for training not class targets # this indicates if the audio and the images are in sync target = (target, int(audio_idx == img_idx)) else: sample = vid return sample, target
def __getitem__(self, i): fp = os.path.join(self.data_path, self.file_list[i]) img = default_loader(fp) img = self.transform(img) target = 0 # unsupervised data return img, target
def encode_proc(model_path, model_config_path, img_root_path, img_key_path_list, img_size, device, output_path): model_config_json = open(model_config_path).read() print("ModelConfig:", model_config_json, file=sys.stderr, flush=True) model_config = VqvaeConfig.from_json(model_config_json) model = VQVAE(model_config).to(device) if device.type == "cuda": torch.cuda.set_device(device) model.load_state_dict(torch.load(model_path, map_location=device)) model.eval() transforms = build_transform(img_size) output_fp = open(output_path, "w") linecnt = 0 for f in img_key_path_list: for line in open(f): linecnt += 1 if linecnt % 100000 == 0: print("{} {} done".format(f, linecnt), file=sys.stderr, flush=True) img_key = line.strip() img_path = get_key_path(img_root_path, line.strip()) try: img = default_loader(img_path) except: continue img = transforms(img)[None].to(device) id_t = model(img)[2].detach().cpu().flatten(1) print("{}\t{}".format(img_key, ",".join( (str(x) for x in id_t[0].tolist()))), file=output_fp, flush=True) output_fp.close()
def __getitem__(self, index): path = self.paths[index] image = default_loader(path) if self.transform is not None: image = self.transform(image) # Add a bogus label to be compatible with standard image datasets. return image, torch.tensor([0.])
def __getitem__(self, index): img, img_name, label = default_loader( self.imgs[index]), self.imgs[index], self.labels[index] if self.transform is not None: img = self.transform(img) return img, label, img_name
def loader(self, path, to_gray=False): img = default_loader(path) if to_gray: img = F.to_grayscale(img) return img
def __iter__(self): worker_info = torch.utils.data.get_worker_info() if worker_info is not None: num_workers = worker_info.num_workers worker_id = worker_info.id else: num_workers = 1 worker_id = 0 pic_size = int(math.ceil(len(self.img_keys_file_list) / num_workers)) file_list = self.img_keys_file_list[pic_size * worker_id:pic_size * worker_id + pic_size] self.rand.shuffle(file_list) for f in file_list: for line in open(f): img_key = line.strip() img_path = get_key_path(self.img_root_path, img_key) try: img = default_loader(img_path) except: continue if self.with_key: yield img_key, self.transform(img) else: yield self.transform(img)
def __getitem__(self, item): path = self.paths[item] label = self.labels[item] if label: # adversarial image = torch.from_numpy(np.load(path)['img']) if self.adv_transform: image = self.adv_transform(image) else: image = default_loader(path) if self.orig_transform: image = self.orig_transform(image) # path, image, label ret = [None, image, None] if self.return_paths: ret[0] = path if self.return_label: ret[2] = label ret = [r for r in ret if r is not None] ret = ret[0] if len(ret) == 1 else ret return ret
def compare(self, query_image_path, input_image_path): # self.model.eval() # Extract feature print('extract features, this may take a few time') # query_image = self.test_transform(default_loader(query_image_path)) input_image = self.test_transform(default_loader(input_image_path)) # feature_a = extract_feature(self.model, tqdm([(torch.unsqueeze(query_image, 0), 1)])) feature_b = extract_feature( self.model, tqdm([(torch.unsqueeze(input_image, 0), 1)])) # sort images # feature_a = feature_a.view(-1, 1) feature_b = feature_b.view(-1, 1) # print(feature_b) # print(self.feature_a.size()) score = torch.mm(self.feature_a, feature_b) score = score.squeeze(1).cpu() score = score.numpy() return score
def infer(model, image_list, isFlip=True): device = torch.device('cpu' if args.cpu else 'cuda') test_transform = transforms.Compose([ transforms.Resize((args.height, args.width), interpolation=3), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) inputlist = [ test_transform(default_loader(imgpath)) for imgpath in image_list ] inputs = torch.stack(inputlist, dim=0) ff = torch.FloatTensor(inputs.size(0), 2048).zero_() num = 2 if isFlip else 1 for i in range(num): if i == 1: inputs = fliphor(inputs) input_img = inputs.to(device) outputs = model(input_img) f = outputs[0].data.cpu() ff = ff + f fnorm = torch.norm(ff, p=2, dim=1, keepdim=True) ff = ff.div(fnorm.expand_as(ff)) return ff
def spectrogram_loader(filepath: str): if has_file_allowed_extension(filepath, IMG_EXTENSIONS): img = default_loader(filepath) data = np.array(img) else: data = np.load(filepath) return data
def get_res_feature(frame_dir, feature_dir): # build resnet class resnet = ResNet(224) for sub_frame_dir in sorted(frame_dir.glob('*/')): video_feature = np.array([]) for frame_filename in sorted(sub_frame_dir.glob( '*.jpg')): # for each frame image in dir_path frame = default_loader(str(frame_filename)) print(frame_filename) # extract ResNet feature res_conv5, res_pool5 = resnet(frame) # gpu variable -> cpu variable -> tensor -> numpy array -> 1D array frame_feature = res_pool5.cpu().data.numpy().flatten() if video_feature.size == 0: video_feature = np.hstack((video_feature, frame_feature)) else: video_feature = np.vstack((video_feature, frame_feature)) print(video_feature.shape) if not os.path.exists(str(feature_dir)): os.makedirs(str(feature_dir)) h5_filename = str(feature_dir) + '/' + ( str(sub_frame_dir).split('/'))[-1] + '.h5' h5file = h5py.File(h5_filename, 'w') h5file.create_dataset('pool5', data=video_feature) h5file.close()
def __getitem__(self, idx): temp = self.samples[idx] # folder_files # print(temp) if self.img_flag[idx] == 1: foldername = 'gen_0000' filename = temp[9:] else: foldername = temp[:4] filename = temp[5:] img = default_loader(self.image_dir + '/' + foldername + '/' + filename) if self.train_val == 'train_new': result = { 'img': data_transforms['train'](img), 'label': self.img_label[idx], 'flag': self.img_flag[idx] } # flag=0 for ture data and 0 for generated data else: result = { 'img': data_transforms['val'](img), 'label': self.img_label[idx], 'flag': self.img_flag[idx] } return result
def iterate(self): for file in [ f for f in listdir(self.image_dir) if isfile(join(self.image_dir, f)) ]: yield file, self.img_transform( default_loader(join(self.image_dir, file)))
def __getitem__(self, index): image_dir = glob.glob(self.files_A[index] + '*.*') car_side = np.array(random.randint(0, 4)) car_id = find_car_id(self.files_A[index]) return self.transform( blank_extention(default_loader( image_dir[car_side]))), car_id, car_side
def __getitem__(self, index): if self.preprocessed: video_data = self.video_features[self.video_list[index]] video_data = torch.Tensor(video_data).cuda() print(video_data.size()) return video_data, self.video_list[index] # imgs = [] # for image_name in video_group.keys(): # image_data = video_group[image_name] # imgs.append(torch.Tensor(image_data)) # return torch.stack(imgs), self.video_list[index] # image_path = self.video_list[index] # print("image", image_path) # with h5py.File(image_path, 'r') as f: # if self.with_name: # return torch.Tensor(np.array(f['pool5'])), image_path.name[:-5] # else: # return torch.Tensor(np.array(f['pool5'])) else: images = [] print("here") count = 0 for img_path in Path(self.video_list[index]).glob('*.jpg'): img = default_loader(img_path) img_tensor = self.transform(img) images.append(img_tensor) count += 1 if count == 256: break print(images[0].size()) return torch.stack(images), img_path.parent.name[4:]
def read_image_for_pytorch(image_file_name): img = default_loader(image_file_name) # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK if img.mode == 'YCbCr': nchannel = 3 else: nchannel = len(img.mode) # convert to numpy array img = np.array(img.getdata()).reshape(img.size[1], img.size[0], nchannel) # permute dimensions img = np.transpose(img, (2, 0, 1)).copy() return img