コード例 #1
0
ファイル: pickle_io.py プロジェクト: mclaughlin6464/pdnn
    def load_next_partition(self, shared_xy):
        print 'load_next_partition!'
        pfile_path = self.pfile_path_list[self.cur_pfile_index]
        if self.feat_mat is None or len(self.pfile_path_list) > 1:

            fopen = smart_open(pfile_path, 'rb')
            self.feat_mat, self.label_vec = cPickle.load(fopen)

            fopen.close()
            shared_x, shared_y = shared_xy

            #TODO no longer label_vec, is array
            self.feat_mat, self.label_vec = \
                preprocess_feature_and_label(self.feat_mat, self.label_vec, self.read_opts)
            if self.read_opts['random']:
                shuffle_feature_and_label(self.feat_mat, self.label_vec)

            shared_x.set_value(self.feat_mat, borrow=True)
            #TODO types wrong here? Maybe?
            shared_y.set_value(self.label_vec.astype(theano.config.floatX), borrow=True)

        self.cur_frame_num = len(self.feat_mat)
        print len(self.feat_mat), len(self.label_vec), self.feat_mat.shape
        self.cur_pfile_index += 1

        if self.cur_pfile_index >= len(self.pfile_path_list):   # the end of one epoch
            self.end_reading = True
            self.cur_pfile_index = 0
コード例 #2
0
    def load_next_partition(self, shared_xy):
        pfile_path = self.pfile_path_list[self.cur_pfile_index]
        if self.feat_mat is None or len(self.pfile_path_list) > 1:
            fopen = smart_open(pfile_path, 'rb')
            test = cPickle.load(fopen)
            if len(test) == 2:
                self.feat_mat, self.label_vec = test
            elif len(test) == 3:
                self.feat_mat, self.label_vec, unused = test
            fopen.close()
            shared_x, shared_y = shared_xy

            self.feat_mat, self.label_vec = \
                preprocess_feature_and_label(self.feat_mat, self.label_vec, self.read_opts)
            if self.read_opts['random']:
                shuffle_feature_and_label(self.feat_mat, self.label_vec)

            shared_x.set_value(self.feat_mat, borrow=True)
            shared_y.set_value(self.label_vec, borrow=True)
        self.cur_frame_num = len(self.feat_mat)
        self.cur_pfile_index += 1

        if self.cur_pfile_index >= len(
                self.pfile_path_list):  # the end of one epoch
            self.end_reading = True
            self.cur_pfile_index = 0
コード例 #3
0
ファイル: pickle_io.py プロジェクト: plaffitte/script
    def load_next_partition(self, shared_xy):
        pfile_path = self.pfile_path_list[self.cur_pfile_index]
        print("pfile_path:", pfile_path)
        if self.feat_mat is None or len(self.pfile_path_list) > 1:
            fopen = smart_open(pfile_path, "rb")
            test = cPickle.load(fopen)
            if len(test) == 2:
                self.feat_mat, self.label_vec = test
            elif len(test) == 3:
                self.feat_mat, self.label_vec, unused = test
            fopen.close()
            shared_x, shared_y = shared_xy

            self.feat_mat, self.label_vec = preprocess_feature_and_label(self.feat_mat, self.label_vec, self.read_opts)
            if self.read_opts["random"]:
                shuffle_feature_and_label(self.feat_mat, self.label_vec)

            shared_x.set_value(self.feat_mat, borrow=True)
            shared_y.set_value(self.label_vec.astype(theano.config.floatX), borrow=True)
        self.cur_frame_num = len(self.feat_mat)
        print("self.cur_frame_num is;", self.cur_frame_num)
        self.cur_pfile_index += 1

        if self.cur_pfile_index >= len(self.pfile_path_list):  # the end of one epoch
            self.end_reading = True
            self.cur_pfile_index = 0
コード例 #4
0
ファイル: kaldi_io.py プロジェクト: Brainstormers/pdnn
    def load_next_partition(self, shared_xy):
        shared_x, shared_y = shared_xy

        if self.feat_buffer is None:
            read_frame_num = 0
        else:   # An utterance hasn't been completely consumed yet
            read_frame_num = min(self.max_frame_num, len(self.feat_buffer))
            self.feats[0:read_frame_num] = self.feat_buffer[0:read_frame_num]
            if self.ali_provided:
                self.labels[0:read_frame_num] = self.label_buffer[0:read_frame_num]
            if read_frame_num == len(self.feat_buffer):
                self.feat_buffer = None
                self.label_buffer = None
            else:
                self.feat_buffer = self.feat_buffer[read_frame_num:]
                if self.ali_provided:
                    self.label_buffer = self.label_buffer[read_frame_num:]

        while read_frame_num < self.max_frame_num:
            utt_id, utt_mat = self.read_next_utt()
            if utt_id == '':    # No more utterances available
                self.end_reading = True
                break
            if self.ali_provided and (self.alignment.has_key(utt_id) is False):
                continue
            rows = len(utt_mat)

            if self.ali_provided:
                ali_utt = self.alignment[utt_id]
                if len(ali_utt) != rows:
                    continue
            else:
                ali_utt = None

            utt_mat, ali_utt = preprocess_feature_and_label(utt_mat, ali_utt, self.read_opts)
            rows = len(utt_mat)

            if read_frame_num + rows > self.max_frame_num:
                # Utterance won't fit in current partition, use some frames and keep the rest for the next partition
                rows = self.max_frame_num - read_frame_num
                self.feat_buffer = utt_mat[rows:]
                utt_mat = utt_mat[:rows]
                if self.ali_provided:
                    self.label_buffer = ali_utt[rows:]
                    ali_utt = ali_utt[:rows]

            self.feats[read_frame_num:(read_frame_num + rows)] = utt_mat
            if self.ali_provided:
                self.labels[read_frame_num:(read_frame_num + rows)] = ali_utt
            read_frame_num += rows

        if self.read_opts['random']:
            shuffle_feature_and_label(self.feats[0:read_frame_num], self.labels[0:read_frame_num])

        shared_x.set_value(self.feats[0:read_frame_num], borrow=True)
        if self.ali_provided:
            shared_y.set_value(self.labels[0:read_frame_num], borrow=True)
        self.cur_frame_num = read_frame_num
コード例 #5
0
ファイル: pfile_io.py プロジェクト: bingo4508/pdnn
    def load_next_partition(self, shared_xy):
        shared_x, shared_y = shared_xy

        # read one partition from disk; data format for pfile reading
        # d -- features; l -- label
        self.dtype = numpy.dtype({'names': ['d', 'l'],
                                'formats': [('>f', self.original_feat_dim), '>i'],
                                'offsets': [self.feat_start_column * 4, self.label_start_column * 4]})

        while len(self.feat_buffer) < self.frame_per_partition and self.sentence_index < self.num_sentences:
            num_frames = self.sentence_offset[self.sentence_index + 1] - self.sentence_offset[self.sentence_index]
            if self.file_read is file:  # Not a compressed file
                sentence_array = numpy.fromfile(self.file_read, self.dtype, num_frames)
            else:
                nbytes = 4 * num_frames * (self.label_start_column + self.num_labels)
                d_tmp = self.file_read.read(nbytes)
                sentence_array = numpy.fromstring(d_tmp, self.dtype, num_frames)
            feat_mat = numpy.asarray(sentence_array['d'])
            label_vec = numpy.asarray(sentence_array['l'])
            feat_mat, label_vec = preprocess_feature_and_label(feat_mat, label_vec, self.read_opts)
            self.feat_buffer = numpy.concatenate((self.feat_buffer, feat_mat))
            self.label_buffer = numpy.concatenate((self.label_buffer, label_vec))
            self.sentence_index += 1

        self.feat = self.feat_buffer[:self.frame_per_partition].astype(theano.config.floatX)
        self.label = self.label_buffer[:self.frame_per_partition].astype(theano.config.floatX)
        self.feat_buffer = self.feat_buffer[self.frame_per_partition:]
        self.label_buffer = self.label_buffer[self.frame_per_partition:]

        self.cur_frame_num = len(self.feat)

        if self.read_opts['random']:
            shuffle_feature_and_label(self.feat, self.label)

        shared_x.set_value(self.feat, borrow = True)
        shared_y.set_value(self.label, borrow = True)

        if self.sentence_index >= self.num_sentences and len(self.feat_buffer) == 0:
            # move on to the next pfile
            self.cur_pfile_index += 1
            if self.cur_pfile_index >= len(self.pfile_path_list):
                self.end_reading = True
                self.cur_pfile_index = 0
            else:
                self.initialize_read()
コード例 #6
0
ファイル: ark_io.py プロジェクト: bingo4508/pdnn
    def load_next_partition(self, shared_xy):
        pfile_path = self.pfile_path_list[self.cur_pfile_index]
        if self.feat_mat is None or len(self.pfile_path_list) > 1:
            fopen = smart_open(pfile_path, 'rb')
            self.feat_mat, self.label_vec = load(fopen)
            fopen.close()
            shared_x, shared_y = shared_xy

            self.feat_mat, self.label_vec = \
                preprocess_feature_and_label(self.feat_mat, self.label_vec, self.read_opts)
            if self.read_opts['random']:
                shuffle_feature_and_label(self.feat_mat, self.label_vec)

            shared_x.set_value(self.feat_mat, borrow=True)
            shared_y.set_value(self.label_vec.astype(theano.config.floatX), borrow=True)

        self.cur_frame_num = len(self.feat_mat)
        self.cur_pfile_index += 1

        if self.cur_pfile_index >= len(self.pfile_path_list):   # the end of one epoch
            self.end_reading = True
            self.cur_pfile_index = 0
コード例 #7
0
ファイル: kaldi_io.py プロジェクト: bingo4508/pdnn
    def load_next_partition(self, shared_xy):
        shared_x, shared_y = shared_xy
        read_frame_num = 0
        while True:
            utt_id, utt_mat = self.read_next_utt()
            if utt_id == '':
                self.end_reading = True
                break
            if self.ali_provided and (self.alignment.has_key(utt_id) is False):
                continue
            rows = utt_mat.shape[0]

            if self.ali_provided:
                ali_utt = self.alignment[utt_id]
                if ali_utt.shape[0] != rows:
                    continue
            else:
                ali_utt = None

            utt_mat, ali_utt = preprocess_feature_and_label(utt_mat, ali_utt, self.read_opts)
            rows = utt_mat.shape[0]

            if read_frame_num + rows > self.max_frame_num:
                self.scp_file_read.seek(self.scp_cur_pos)
                break

            self.feats[read_frame_num:(read_frame_num+rows)] = utt_mat
            if self.ali_provided:
                self.labels[read_frame_num:(read_frame_num+rows)] = ali_utt
            read_frame_num += rows

        if self.read_opts['random']:
            shuffle_feature_and_label(self.feats[0:read_frame_num], self.labels[0:read_frame_num])

        shared_x.set_value(self.feats[0:read_frame_num], borrow=True)
        if self.ali_provided:
            shared_y.set_value(self.labels[0:read_frame_num], borrow=True)
        self.cur_frame_num = read_frame_num
コード例 #8
0
ファイル: bloscpack_io.py プロジェクト: josvr/pdnn
    def load_next_partition(self, shared_xy):
        pfile_path = self.pfile_path_list[self.cur_pfile_index]
        if self.feat_mat is None or len(self.pfile_path_list) > 1:
            #log("Start reading partition "+pfile_path) 
            self.feat_mat = bp.unpack_ndarray_file(pfile_path)
            self.label_vec = bp.unpack_ndarray_file(pfile_path+".labels")  
            shared_x, shared_y = shared_xy

            self.feat_mat, self.label_vec = \
                preprocess_feature_and_label(self.feat_mat, self.label_vec, self.read_opts)
            if self.read_opts['random']:
                shuffle_feature_and_label(self.feat_mat, self.label_vec)

            shared_x.set_value(self.feat_mat, borrow=True)
            shared_y.set_value(self.label_vec.astype(theano.config.floatX), borrow=True)
            #log("Finished reading partition "+pfile_path)
        self.cur_frame_num = len(self.feat_mat)
        self.cur_pfile_index += 1

        if self.cur_pfile_index >= len(self.pfile_path_list):   # the end of one epoch
            self.end_reading = True
            self.cur_pfile_index = 0
        return pfile_path
コード例 #9
0
ファイル: kaldi_io.py プロジェクト: ducle90/chai_share
    def load_next_partition(self, shared_xy):
        shared_x, shared_y = shared_xy

        if self.feat_buffer is None:
            read_frame_num = 0
        else:  # An utterance hasn't been completely consumed yet
            read_frame_num = min(self.max_frame_num, len(self.feat_buffer))
            self.feats[0:read_frame_num] = self.feat_buffer[0:read_frame_num]
            if self.ali_provided:
                self.labels[0:read_frame_num] = self.label_buffer[
                    0:read_frame_num]
            if read_frame_num == len(self.feat_buffer):
                self.feat_buffer = None
                self.label_buffer = None
            else:
                self.feat_buffer = self.feat_buffer[read_frame_num:]
                if self.ali_provided:
                    self.label_buffer = self.label_buffer[read_frame_num:]

        while read_frame_num < self.max_frame_num:
            utt_id, utt_mat = self.read_next_utt()
            if utt_id == '':  # No more utterances available
                self.end_reading = True
                break
            if self.ali_provided and (self.alignment.has_key(utt_id) is False):
                continue
            rows = len(utt_mat)

            if self.ali_provided:
                ali_utt = self.alignment[utt_id]
                if len(ali_utt) != rows:
                    continue
            else:
                ali_utt = None

            utt_mat, ali_utt = preprocess_feature_and_label(
                utt_mat, ali_utt, self.read_opts)
            rows = len(utt_mat)

            if read_frame_num + rows > self.max_frame_num:
                # Utterance won't fit in current partition, use some frames and keep the rest for the next partition
                rows = self.max_frame_num - read_frame_num
                self.feat_buffer = utt_mat[rows:]
                utt_mat = utt_mat[:rows]
                if self.ali_provided:
                    self.label_buffer = ali_utt[rows:]
                    ali_utt = ali_utt[:rows]

            self.feats[read_frame_num:(read_frame_num + rows)] = utt_mat
            if self.ali_provided:
                self.labels[read_frame_num:(read_frame_num + rows)] = ali_utt
            read_frame_num += rows

        if self.read_opts['random']:
            shuffle_feature_and_label(self.feats[0:read_frame_num],
                                      self.labels[0:read_frame_num])

        shared_x.set_value(self.feats[0:read_frame_num], borrow=True)
        if self.ali_provided:
            shared_y.set_value(self.labels[0:read_frame_num], borrow=True)
        self.cur_frame_num = read_frame_num
コード例 #10
0
ファイル: pfile_io.py プロジェクト: yanweifu/pdnn
    def load_next_partition(self, shared_xy):
        shared_x, shared_y = shared_xy

        # read one partition from disk; data format for pfile reading
        # d -- features; l -- label
        self.dtype = numpy.dtype({'names': ['d', 'l'],
                                'formats': [('>f', self.original_feat_dim), '>i'],
                                'offsets': [self.feat_start_column * 4, self.label_start_column * 4]})

        if self.feat_buffer is None:
            read_frames = 0
        else:   # An utterance hasn't been completely consumed yet
            read_frames = min(self.frame_per_partition, len(self.feat_buffer))
            self.feat[0:read_frames] = self.feat_buffer[0:read_frames]
            self.label[0:read_frames] = self.label_buffer[0:read_frames]
            if read_frames == len(self.feat_buffer):
                self.feat_buffer = None
                self.label_buffer = None
            else:
                self.feat_buffer = self.feat_buffer[read_frames:]
                self.label_buffer = self.label_buffer[read_frames:]

        while read_frames < self.frame_per_partition and self.sentence_index < self.num_sentences:
            num_frames = self.sentence_offset[self.sentence_index + 1] - self.sentence_offset[self.sentence_index]
            if self.file_read is file:  # Not a compressed file
                sentence_array = numpy.fromfile(self.file_read, self.dtype, num_frames)
            else:
                nbytes = 4 * num_frames * (self.label_start_column + self.num_labels)
                d_tmp = self.file_read.read(nbytes)
                sentence_array = numpy.fromstring(d_tmp, self.dtype, num_frames)
            feat_mat = numpy.asarray(sentence_array['d'])
            label_vec = numpy.asarray(sentence_array['l'])
            feat_mat, label_vec = preprocess_feature_and_label(feat_mat, label_vec, self.read_opts)
            num_frames = len(feat_mat)

            if read_frames + num_frames > self.frame_per_partition:
                # Utterance won't fit in current partition, use some frames and keep the rest for the next partition
                num_frames = self.frame_per_partition - read_frames
                self.feat_buffer = feat_mat[num_frames:]
                self.label_buffer = label_vec[num_frames:]
                feat_mat = feat_mat[:num_frames]
                label_vec = label_vec[:num_frames]

            self.feat[read_frames:(read_frames + num_frames)] = feat_mat
            self.label[read_frames:(read_frames + num_frames)] = label_vec
            read_frames += num_frames
            self.sentence_index += 1

        if self.read_opts['random']:
            shuffle_feature_and_label(self.feat[:read_frames], self.label[:read_frames])

        shared_x.set_value(self.feat, borrow = True)
        shared_y.set_value(self.label, borrow = True)
        self.cur_frame_num = read_frames

        if self.sentence_index >= self.num_sentences and self.feat_buffer is None:
            # move on to the next pfile
            self.cur_pfile_index += 1
            if self.cur_pfile_index >= len(self.pfile_path_list):
                self.end_reading = True
                self.cur_pfile_index = 0
            else:
                self.initialize_read()
コード例 #11
0
ファイル: pfile_io.py プロジェクト: ducle90/chai_share
    def load_next_partition(self, shared_xy):
        shared_x, shared_y = shared_xy

        # read one partition from disk; data format for pfile reading
        # d -- features; l -- label
        self.dtype = numpy.dtype({
            'names': ['d', 'l'],
            'formats': [('>f', self.original_feat_dim), '>i'],
            'offsets':
            [self.feat_start_column * 4, self.label_start_column * 4]
        })

        if self.feat_buffer is None:
            read_frames = 0
        else:  # An utterance hasn't been completely consumed yet
            read_frames = min(self.frame_per_partition, len(self.feat_buffer))
            self.feat[0:read_frames] = self.feat_buffer[0:read_frames]
            self.label[0:read_frames] = self.label_buffer[0:read_frames]
            if read_frames == len(self.feat_buffer):
                self.feat_buffer = None
                self.label_buffer = None
            else:
                self.feat_buffer = self.feat_buffer[read_frames:]
                self.label_buffer = self.label_buffer[read_frames:]

        while read_frames < self.frame_per_partition and self.sentence_index < self.num_sentences:
            num_frames = self.sentence_offset[self.sentence_index +
                                              1] - self.sentence_offset[
                                                  self.sentence_index]
            if self.file_read is file:  # Not a compressed file
                sentence_array = numpy.fromfile(self.file_read, self.dtype,
                                                num_frames)
            else:
                nbytes = 4 * num_frames * (self.label_start_column +
                                           self.num_labels)
                d_tmp = self.file_read.read(nbytes)
                sentence_array = numpy.fromstring(d_tmp, self.dtype,
                                                  num_frames)
            feat_mat = numpy.asarray(sentence_array['d'])
            label_vec = numpy.asarray(sentence_array['l'])
            feat_mat, label_vec = preprocess_feature_and_label(
                feat_mat, label_vec, self.read_opts)
            num_frames = len(feat_mat)

            if read_frames + num_frames > self.frame_per_partition:
                # Utterance won't fit in current partition, use some frames and keep the rest for the next partition
                num_frames = self.frame_per_partition - read_frames
                self.feat_buffer = feat_mat[num_frames:]
                self.label_buffer = label_vec[num_frames:]
                feat_mat = feat_mat[:num_frames]
                label_vec = label_vec[:num_frames]

            self.feat[read_frames:(read_frames + num_frames)] = feat_mat
            self.label[read_frames:(read_frames + num_frames)] = label_vec
            read_frames += num_frames
            self.sentence_index += 1

        if self.read_opts['random']:
            shuffle_feature_and_label(self.feat[:read_frames],
                                      self.label[:read_frames])

        shared_x.set_value(self.feat, borrow=True)
        shared_y.set_value(self.label, borrow=True)
        self.cur_frame_num = read_frames

        if self.sentence_index >= self.num_sentences and self.feat_buffer is None:
            # move on to the next pfile
            self.cur_pfile_index += 1
            if self.cur_pfile_index >= len(self.pfile_path_list):
                self.end_reading = True
                self.cur_pfile_index = 0
            else:
                self.initialize_read()