def _ivector_processing(data, labels): n_samples = [len(i) for i in list(data.values())[0]] # repeat the labels if labels is not None: new_labels = [] for n, lab in zip(n_samples, labels): new_labels += [lab] * n labels = np.asarray(new_labels) return [torch.from_numpy(np.vstack(dat)) for dat in data.values()], labels
def _get_text_dim(self, data): for v in data.values(): for vv in v: x_text = torch.Tensor(vv["text_embedding_{}".format( self.text)]) if len(x_text.size()): return x_text.shape
def __init__(self, data, set_type, config): super().__init__() self.audio_files = list(data.keys()) self.set_type = set_type self.audio_labels = list(data.values()) config["bg_noise_files"] = list( filter(lambda x: x.endswith("wav"), config.get("bg_noise_files", []))) self.bg_noise_audio = [ librosa.core.load(file, sr=16000)[0] for file in config["bg_noise_files"] ] self.unknown_prob = config["unknown_prob"] self.silence_prob = config["silence_prob"] self.noise_prob = config["noise_prob"] self.input_length = config["input_length"] self.timeshift_ms = config["timeshift_ms"] self._audio_cache = SimpleCache(config["cache_size"]) self._file_cache = SimpleCache(config["cache_size"]) n_unk = len(list(filter(lambda x: x == 1, self.audio_labels))) self.n_silence = int(self.silence_prob * (len(self.audio_labels) - n_unk)) self.audio_processor = AudioPreprocessor( n_mels=config["n_mels"], n_dct_filters=config["n_dct_filters"], hop_ms=10) self.audio_preprocess_type = config["audio_preprocess_type"]
def __init__( self, root, split='train', num_node=16, downsample=0.03, self_augment=False, augment_noise=0.005, augment_axis=1, augment_rotation=1.0, augment_translation=0.001, config=None, ): self.root = root self.split = split self.num_node = num_node self.downsample = downsample self.self_augment = self_augment self.augment_noise = augment_noise self.augment_axis = augment_axis self.augment_rotation = augment_rotation self.augment_translation = augment_translation self.config = config assert self_augment == False # containers self.ids = [] self.points = [] self.src_to_tgt = {} # load data pts_filename = join( self.root, f'3DMatch_{split}_{self.downsample:.3f}_points.pkl') keypts_filename = join( self.root, f'3DMatch_{split}_{self.downsample:.3f}_keypts.pkl') if exists(pts_filename) and exists(keypts_filename): with open(pts_filename, 'rb') as file: data = pickle.load(file) self.points = [*data.values()] self.ids_list = [*data.keys()] with open(keypts_filename, 'rb') as file: self.correspondences = pickle.load(file) print(f"Load PKL file from {pts_filename}") else: print("PKL file not found.") return for idpair in self.correspondences.keys(): src = idpair.split("@")[0] tgt = idpair.split("@")[1] # add (key -> value) src -> tgt if src not in self.src_to_tgt.keys(): self.src_to_tgt[src] = [tgt] else: self.src_to_tgt[src] += [tgt]
def create_dict(path): #path = path to the imagenet json file with open(path) as file: data = json.load(file) key = list(data.keys()) val = list(data.values()) arr = [] db = dict() for i in range(len(key)): arr.append(val[i]['class_name']) db[arr[i]] = key[i] return db
def _apply_to_data(data, func, unpack_dict=False): """Apply a function to data, trying to unpack different data types. """ apply_ = partial(_apply_to_data, func=func, unpack_dict=unpack_dict) if isinstance(data, dict): if unpack_dict: return [apply_(v) for v in data.values()] return {k: apply_(v) for k, v in data.items()} elif isinstance(data, (list, tuple)): try: # e.g.list/tuple of arrays return [apply_(x) for x in data] except TypeError: return func(data) return func(data)
def __init__(self, data, uk_data, set_type, config): super().__init__() self.uk_data = uk_data print(set_type, set(list(self.uk_data.values()))) c = Counter(list(self.uk_data.values())) print(c) self.audio_files = list(data.keys()) self.set_type = set_type self.audio_labels = list(data.values()) print(np.bincount(np.array(self.audio_labels))) config["bg_noise_files"] = list( filter(lambda x: x.endswith("wav"), config.get("bg_noise_files", []))) self.bg_noise_audio = [ librosa.core.load(file, sr=16000)[0] for file in config["bg_noise_files"] ] self.unknown_prob = config["unknown_prob"] self.silence_prob = config["silence_prob"] self.noise_prob = config["noise_prob"] self.n_dct = config["n_dct_filters"] self.input_length = config["input_length"] self.timeshift_ms = config["timeshift_ms"] self.filters = librosa.filters.dct(config["n_dct_filters"], config["n_mels"]) self.n_mels = config["n_mels"] self._audio_cache = SimpleCache(config["cache_size"]) self._file_cache = SimpleCache(config["cache_size"]) n_unk = len(list(filter(lambda x: x == 1, self.audio_labels))) print(set_type, self.unknown_prob) self.uk_audio_files = [k for (k, v) in data.items() if v == 1] self.n_unknown = int( self.unknown_prob * (len(self.audio_labels) - len(self.uk_audio_files))) #shuffle(self.uk_audio_files) self.uk_index = 0 self.n_silence = int( self.silence_prob * (len(self.audio_labels) - len(self.uk_audio_files))) self.audio_files = [k for (k, v) in data.items() if v != 1] self.audio_labels = [v for (k, v) in data.items() if v != 1]
def getData(root, mode): if mode == 'train': data = json.load(open(os.path.join(root, 'train.json'))) obj = json.load(open(os.path.join(root, 'objects.json'))) img = list(data.keys()) label = list(data.values()) for i in range(len(label)): for j in range(len(label[i])): label[i][j] = obj[label[i][j]] return np.squeeze(img), np.squeeze(label) else: data = json.load(open(os.path.join(root, 'test.json'))) obj = json.load(open(os.path.join(root, 'objects.json'))) label = data for i in range(len(label)): for j in range(len(label[i])): label[i][j] = obj[label[i][j]] return None, label
def load_data_iter(game=None, data=['images', 'actions', 'fused_gazes'], dataset='combined', device=torch.device('cpu'), load_type='memory', batch_size=32, sampler=None): """ Summary: Args: Returns: """ if load_type == 'memory': data = load_hdf_data(game=game, dataset=[dataset]) x, y_, x_g = data.values() x = torch.Tensor(x).squeeze().to(device=device) y = torch.LongTensor(y_).squeeze()[:, -1].to(device=device) x_g = torch.Tensor(x_g).squeeze().to(device=device) dataset = torch.utils.data.TensorDataset(x, y, x_g) dataset.labels = y_[0][:, -1] elif load_type == 'disk': dataset = HDF5TorchDataset(game=game, data=data, dataset=dataset, device=device) if sampler is None: data_iter = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True) else: data_iter = torch.utils.data.DataLoader(dataset, batch_size=batch_size, sampler=sampler(dataset)) return data_iter
def get_iCLEVR_data(root_folder, mode): if mode == 'train': data = json.load(open(os.path.join(root_folder, 'train.json'))) obj = json.load(open(os.path.join(root_folder, 'objects.json'))) img = list(data.keys()) label = list(data.values()) for i in range(len(label)): for j in range(len(label[i])): label[i][j] = obj[label[i][j]] tmp = np.zeros(len(obj)) tmp[label[i]] = 1 label[i] = tmp return np.squeeze(img), np.squeeze(label) else: data = json.load(open(os.path.join(root_folder, 'test.json'))) obj = json.load(open(os.path.join(root_folder, 'objects.json'))) label = data for i in range(len(label)): for j in range(len(label[i])): label[i][j] = obj[label[i][j]] tmp = np.zeros(len(obj)) tmp[label[i]] = 1 label[i] = tmp return None, label
def __init__(self, data, set_type, config): super().__init__() self.i = 0 self.without_word = 0.25 self.SR = 16000 self.PADD = 0.075 self.LENGTH = 2 self.BOUND = 0.15 self.VOICE_WINDOW = np.clip( librosa.filters.get_window( ('kaiser', 1.5), self.SR) + np.abs(np.random.normal(0, 0.1)), 0, 1.1) self.BACKGROUND_WINDOW = 1 - \ librosa.filters.get_window(('kaiser', 2.5), self.SR*(1+2*self.PADD))+0.05 self.audio_files = list(data.keys()) self.set_type = set_type self.audio_labels = list(data.values()) config["bg_noise_files"] = list( filter(lambda x: x.endswith("npy"), config.get("bg_noise_files", []))) config["bg_files"] = list( filter(lambda x: x.endswith("npy"), config.get("bg_files", []))) self.unknown_prob = config["unknown_prob"] self.silence_prob = config["silence_prob"] self.noise_prob = config["noise_prob"] self.input_length = config["input_length"] self.timeshift_ms = config["timeshift_ms"] self._audio_cache = SimpleCache(config["cache_size"]) self._file_cache = SimpleCache(config["cache_size"]) n_unk = len(list(filter(lambda x: x == 1, self.audio_labels))) self.n_silence = int(self.silence_prob * (len(self.audio_labels) - n_unk)) self.audio_processor = AudioPreprocessor( n_mels=config["n_mels"], n_dct_filters=config["n_dct_filters"], hop_ms=10) self.audio_preprocess_type = config["audio_preprocess_type"] total_lbl = list(data.values()) + [0] * self.n_silence uniq_labels = sorted(list(set(total_lbl))) uniq_idx = {k: v for v, k in enumerate(uniq_labels)} labels_count = [(np.array(total_lbl) == l).sum() for l in uniq_labels] self.labels_count = torch.tensor(max(labels_count) / labels_count) if len(labels_count) > 2: self.len = max(labels_count[2:]) * len(labels_count) else: self.len = 0 # for weighted random sampler self.data_probs = [1 / labels_count[uniq_idx[l]] for l in total_lbl] if len(SpeechDataset.bg_noise_audio) == 0: SpeechDataset.bg_noise_audio = [ np.load(file) for file in config["bg_noise_files"] ] if len(SpeechDataset.bg_noise_audio) == 0: SpeechDataset.bg_noise_audio = [np.zeros(self.input_length)] print( f'# of background noise: {len(SpeechDataset.bg_noise_audio)}') if len(SpeechDataset.bg_audio) == 0: SpeechDataset.bg_audio = [ np.load(file) for file in config["bg_files"] ] if len(SpeechDataset.bg_audio) == 0: SpeechDataset.bg_audio = [np.zeros(self.input_length * 2)] print(f'# of background: {len(SpeechDataset.bg_audio)}')
def _flatten_processing(data, labels): X = [] for dat in data.values(): X.extend(dat) return X, labels
def _xvector_processing(data, labels): return [ torch.from_numpy(np.transpose(np.dstack(dat), (2, 0, 1))) for dat in data.values() ], labels
trained_state_dict = torch.load("{}/aeparams.pt".format(outpath)) trained_state_dict = { k: v for k, v in trained_state_dict.items() if k in state_dict } state_dict.update(trained_state_dict) ae.module.load_state_dict(state_dict, strict=False) # eval iternum = 0 itemnum = 0 starttime = time.time() with torch.no_grad(): for data in dataloader: b = next(iter(data.values())).size(0) # forward output = ae(iternum, [], **{k: x.to("cuda") for k, x in data.items()}, **profile.get_ae_args()) writer.batch(iternum, itemnum + torch.arange(b), **data, **output) endtime = time.time() ips = 1. / (endtime - starttime) print("{:4} / {:4} ({:.4f} iter/sec)".format( itemnum, len(dataset), ips), end="\n") starttime = time.time()