def _load_utterances(self): with self.root.joinpath("_sources.txt").open("r") as sources_file: sources = [l.split(",") for l in sources_file] self.utterances = [ Utterance(self.root.joinpath(f), w) for f, w in sources ] self.utterance_cycler = RandomCycler(self.utterances)
def _load_utterances(self): with self.root.joinpath("_sources.txt").open("r") as sources_file: sources = [l.split(",") for l in sources_file] sources = {frames_fname: wave_fpath for frames_fname, wave_fpath in sources} self.utterances = [Utterance(self.root.joinpath(f), w) for f, w in sources.items()] try: self.utterance_cycler = RandomCycler(self.utterances) except Exception as e: print(self.root.joinpath("_sources.txt")) raise(e)
def _load_utterances(self): with self.root.joinpath("_sources.txt").open("r") as sources_file: sources = [l.split(",") for l in sources_file] sources = { frames_fname: wave_fpath for frames_fname, wave_fpath in sources } self.utterances = [ Utterance(self.root.joinpath(f), w) for f, w in sources.items() ] #print("length of utterences ", len(self.utterances)) self.utterance_cycler = RandomCycler(self.utterances)
class Speaker: def __init__(self, root: Path): self.root = root self.name = root.name self.utterances = None self.utterance_cycler = None def _load_utterances(self): with self.root.joinpath("_sources.txt").open("r") as sources_file: sources = [l.split(",") for l in sources_file] sources = {frames_fname: wave_fpath for frames_fname, wave_fpath in sources} self.utterances = [Utterance(self.root.joinpath(f), w) for f, w in sources.items()] self.utterance_cycler = RandomCycler(self.utterances) def random_partial(self, count, n_frames): """ Samples a batch of <count> unique partial utterances from the disk in a way that all utterances come up at least once every two cycles and in a random order every time. :param count: The number of partial utterances to sample from the set of utterances from that speaker. Utterances are guaranteed not to be repeated if <count> is not larger than the number of utterances available. :param n_frames: The number of frames in the partial utterance. :return: A list of tuples (utterance, frames, range) where utterance is an Utterance, frames are the frames of the partial utterances and range is the range of the partial utterance with regard to the complete utterance. """ if self.utterances is None: self._load_utterances() utterances = self.utterance_cycler.sample(count) a = [(u,) + u.random_partial(n_frames) for u in utterances] return a
def __init__(self, datasets_root, dataset_len, train_frac): """ :param datasets_root: :param dataset_len: the number of pseudo-speakers in the dataset. Speakers are sampled with replacement. Each time a speaker is returned, a random set of utterances and random segment from each utterance is selected. :param train_frac: the fraction of training set to use. """ self.root = datasets_root speaker_dirs = [f for f in self.root.glob("*") if f.is_dir()] n_speakers = len(speaker_dirs) if n_speakers == 0: raise Exception( "No speakers found. Make sure you are pointing to the directory " "containing all preprocessed speaker directories.") n_speakers_to_use = int(train_frac * n_speakers) self.speakers = [ Speaker(speaker_dir) for speaker_dir in speaker_dirs[:n_speakers_to_use] ] self.speaker_cycler = RandomCycler(self.speakers) self.dataset_len = dataset_len print("Training set - number of speakers is {} ({}% of total)".format( len(self.speakers), train_frac * 100))
def __init__(self, datasets_root: Path, user_folder: Path): self.root = datasets_root speaker_dirs = [f for f in self.root.glob("*") if f.is_dir()] if len(speaker_dirs) == 0: raise Exception("No speakers found. Make sure you are pointing to the directory " "containing all preprocessed speaker directories.") self.speakers = [Speaker(speaker_dir) for speaker_dir in speaker_dirs] + [Speaker(user_folder)] self.speaker_cycler = RandomCycler(self.speakers)
def __init__(self, datasets_root: Path, img_per_cls: int, train: bool): self.root = datasets_root cls_dirs = [f for f in self.root.glob("*") if f.is_dir()] if len(cls_dirs) == 0: raise Exception( "No image class found. Make sure you are pointing to the directory " "containing all preprocessed image class directories.") self.classes = [Cls(cls_dir, train) for cls_dir in cls_dirs] self.cls_cycler = RandomCycler(self.classes)
def __init__(self, root: Path, train: bool): self.is_training = train self.root = root self.name = root.name self.images = [image for image in self.root.iterdir()] self.image_cycler = RandomCycler(self.images)
class Cls: def __init__(self, root: Path, train: bool): self.is_training = train self.root = root self.name = root.name self.images = [image for image in self.root.iterdir()] self.image_cycler = RandomCycler(self.images) def random_sample(self, count): images = self.image_cycler.sample(count) return [self.process_img(cv2.imread(str(img)), 224)/255. for img in images] # Randomly add gaussian noise to img def random_gaussian_noise(self, img, mean=0, std=5.0): noisy_img = img + np.random.normal(mean, std, img.shape) noisy_img_clipped = np.clip(noisy_img, 0, 255) return noisy_img_clipped # Randomly change the brightness of img def random_brightness(self, img, max_change=60): hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) h, s, v = cv2.split(hsv) # generate random brightness noise value = random.randint(-max_change, max_change+1) # Clap the result to 0 - 255 if value >= 0: lim = 255 - value v[v > lim] = 255 v[v <= lim] += value else: value = int(-value) lim = 0 + value v[v < lim] = 0 v[v >= lim] -= value final_hsv = cv2.merge((h, s, v)) img = cv2.cvtColor(final_hsv, cv2.COLOR_HSV2BGR) return img def process_img(self, img, size): height, width, channels = img.shape if width == height: new_img = cv2.resize(img, (size, size)) top, bottom, left, right = 0, 0, 0, 0 else: ratio = float(size)/max(height, width) new_height = int(height*ratio) new_width = int(width*ratio) new_img = cv2.resize(img, (new_width, new_height)) delta_w = size - new_width delta_h = size - new_height top, bottom = delta_h//2, delta_h-(delta_h//2) left, right = delta_w//2, delta_w-(delta_w//2) if self.is_training: p_flip = 0.5 if (random.uniform(0,1) > 1 - p_flip): new_img= cv2.flip(new_img, 1) p_brightness = 0.8 if (random.uniform(0,1) > 1 - p_brightness): new_img = self.random_brightness(new_img) p_noise = 0.5 if (random.uniform(0,1) > 1 - p_noise): new_img = self.random_gaussian_noise(new_img) # small variation applied to validation data else: p_flip = 0.5 if (random.uniform(0,1) > 1 - p_flip): new_img= cv2.flip(new_img, 1) p_brightness = 0.75 if (random.uniform(0,1) > 1 - p_brightness): new_img = self.random_brightness(new_img, max_change=40) new_img = cv2.copyMakeBorder(new_img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=[0,0,0]) # print(self.is_training, img.shape, new_img.shape) # cv2.imwrite("/home/ubuntu/google-landmark/landmark-retrieval/baseline/augmented/original.jpg", img) # cv2.imwrite("/home/ubuntu/google-landmark/landmark-retrieval/baseline/augmented/augmented.jpg", new_img) # exit(0) return np.transpose(new_img, (2, 0, 1))