Exemple #1
0
    def load_all_frames(self, pairs):
        """
        Loads all frames that appear in pair of tokens.
        It will return
            - a dictionnary token_feats that contains the list of frames for
            a given token (f, s, e)
            - a `frame` list, which contains the frame dataset :
            It is a list of (f1, s1, e1, index1, f2, s2, e2, index2, same)
            where
                -f1, f2 are the files
                - s1, s2, e1, e2 are the beginning and end of token
                - i1, i2 is the position in the token_feats dictionnary
                - same : value +1 or -1 depending if the two frames
                are the same or not.

        :param pairs:
            list of pairs under the form (f1, s1, e1, f2, s2, e2, same)
        """

        frames = []

        pairs = group_pairs(pairs)
        token_feats = self.get_token_feats(pairs)

        # get features for each same pair based on DTW alignment paths
        for f1, s1, e1, f2, s2, e2 in pairs['same']:
            if (s1 > e1) or (s2 > e2):
                continue
            feat1 = token_feats[f1, s1, e1]
            feat2 = token_feats[f2, s2, e2]
            try:
                path1, path2 = get_dtw_alignment(feat1, feat2)
            except Exception:
                continue

            for i1, i2 in zip(path1, path2):
                frames.append((f1, s1, e1, i1, f2, s2, e2, i2, 1))
            self.statistics_training['SameType'] += 1

        for f1, s1, e1, f2, s2, e2 in pairs['diff']:
            if (s1 > e1) or (s2 > e2):
                continue
            feat1 = token_feats[f1, s1, e1]
            feat2 = token_feats[f2, s2, e2]

            n1 = feat1.shape[0]
            n2 = feat2.shape[0]

            for i in range(min(n1, n2)):
                frames.append((f1, s1, e1, i, f2, s2, e2, i, -1))

            self.statistics_training['DiffType'] += 1

        np.random.shuffle(frames)
        return token_feats, frames
Exemple #2
0
    def batch_iterator(self, train_mode=True):
        """Build iterator next batch from folder for a specific epoch
        This function can be used when the batches were already created
        by the sampler.

        If you use the sampler that didn't create batches, use the
        new_get_batches function
        Returns batches of the form (X1, X2, y)

        """
        # load features
        self.load_data()

        if train_mode:
            mode = 'train'
        else:
            mode = 'dev'
        pairs = self.pairs[mode]
        num_pairs = len(pairs)

        if self.shuffle_between_epochs:
            random.shuffle(pairs)

        # make batches
        sliced_indexes = range(0, num_pairs, self.batch_size)
        batches = [pairs[idx:idx + self.batch_size] for idx in sliced_indexes]
        num_batches = len(batches)

        if self.num_max_minibatches < num_batches:
            selected_batches = np.random.choice(range(num_batches),
                                                self.num_max_minibatches,
                                                replace=False)
        else:
            print("Number of batches not sufficient," +
                  " iterating over all the batches")
            selected_batches = np.random.permutation(range(num_batches))
        for batch_id in selected_batches:
            grouped_pairs = group_pairs(batches[batch_id])
            batch = self.load_frames_from_pairs(grouped_pairs)

            # add Temporal coherence loss
            if self.tcl > 0:
                batch = self.add_tcl_to_batch(batch)

            X1, X2, Y = batch
            X1, X2, Y = map(torch.from_numpy, [X1, X2, Y])
            X_batch1 = Variable(X1, volatile=not train_mode)
            X_batch2 = Variable(X2, volatile=not train_mode)
            y_batch = Variable(Y, volatile=not train_mode)
            yield X_batch1, X_batch2, y_batch
Exemple #3
0
    def load_all_frames(self, pairs):
        token_feats_list = []  #list of token feats for every modality
        self.features = self.features_dict[self.features_path[0]]
        token_feats, frames = super(MultimodalDataLoader,
                                    self).load_all_frames(pairs)
        #loads token feats, alignment and
        #frames for first path
        token_feats_list.append(token_feats)

        pairs = group_pairs(pairs)
        for path in self.features_path[1:]:  #add token feats of the other
            #modalities to the token feats dict
            self.features = self.features_dict[path]
            path_token_feats = self.get_token_feats(pairs)
            token_feats_list.append(path_token_feats)

        return token_feats_list, frames
Exemple #4
0
    def batch_iterator(self, train_mode=True):
        print("constructing batches")
        mode = 'train' if train_mode else 'test'
        iterations = self.iterations[mode]
        self.load_data()

        all_positive_pairs = self.pairs[mode]
        tokens = self.tokens[mode]

        num_pairs = iterations * self.batch_size
        num_positive_pairs = int(num_pairs * self.proportion_positive_pairs)

        # deal with maximum pairs
        if num_positive_pairs > len(all_positive_pairs):
            print("Not enough positive pairs to sample this number of "
                  "iterations. There is only {}, but {} requested".format(
                      len(all_positive_pairs), num_positive_pairs))
            num_positive_pairs = len(all_positive_pairs)
        num_negative_pairs = num_pairs - num_positive_pairs
        positive_pairs = random.sample(all_positive_pairs, num_positive_pairs)
        positive_pairs = [pair + ['same'] for pair in positive_pairs]
        # for negative pairs, we sample same pairs and we align them wrongly
        tokens = random.choices(tokens, k=2 * num_negative_pairs)
        negative_pairs = [
            list(tokens[i]) + list(tokens[i + 1]) + ["diff"]
            for i in range(0, len(tokens), 2)
        ]

        pairs = positive_pairs + negative_pairs
        random.shuffle(pairs)
        print("done constructing batches for epoch")
        for i in range(iterations):
            pairs_batch = pairs[i * self.batch_size:(i + 1) * self.batch_size]
            if len(pairs_batch) == 0:
                break
            grouped_pairs = group_pairs(pairs_batch)
            X1, X2, Y = self.load_frames_from_pairs(grouped_pairs, frames=True)
            X1, X2, Y = map(torch.from_numpy, [X1, X2, Y])
            X_batch1 = Variable(X1, volatile=not train_mode)
            X_batch2 = Variable(X2, volatile=not train_mode)
            y_batch = Variable(Y, volatile=not train_mode)
            yield X_batch1, X_batch2, y_batch
Exemple #5
0
    def batch_iterator(self, train_mode=True):
        """Build iteratior next batch from folder for a specific epoch
        Returns batches of the form (X1, X2, y_spk, y_phn)

        """
        # load features
        self.load_data()

        if train_mode:
            mode = 'train'
        else:
            mode = 'dev'
        pairs = self.pairs[mode]
        num_pairs = len(pairs)

        # TODO : shuffle the pairs before creating batches
        # make batches
        sliced_indexes = range(0, num_pairs, self.batch_size)
        batches = [pairs[idx:idx + self.batch_size] for idx in sliced_indexes]
        num_batches = len(batches)

        fid2spk = read_spkid_file(self.fid2spk_file)

        if self.num_max_minibatches < num_batches:
            selected_batches = np.random.choice(range(num_batches),
                                                self.num_max_minibatches,
                                                replace=False)
        else:
            print("Number of batches not sufficient," +
                  " iterating over all the batches")
            selected_batches = np.random.permutation(range(num_batches))
        for idx in selected_batches:
            pairs = group_pairs(batches[idx])
            batch_els = self.load_frames_from_pairs(pairs, fid2spk=fid2spk)
            batch_els = map(torch.from_numpy, batch_els)
            X_batch1, X_batch2, y_spk_batch, y_phn_batch = map(
                Variable, batch_els)
            yield X_batch1, X_batch2, y_spk_batch, y_phn_batch