コード例 #1
0
    def train_truncated_model(self, train_x, train_y, batch_size,
                              num_of_epochs, shuffle_data):
        ### Method 1 ###
        temp_train_x = data_utils.transform_data_to_3d_matrix(
            train_x,
            seq_length=self.seq_length,
            merge_size=self.merge_size,
            shuffle_data=shuffle_data)
        print(("Input shape: " + str(temp_train_x.shape)))

        temp_train_y = data_utils.transform_data_to_3d_matrix(
            train_y,
            seq_length=self.seq_length,
            merge_size=self.merge_size,
            shuffle_data=shuffle_data)
        print(("Output shape: " + str(temp_train_y.shape)))

        if self.stateful:
            temp_train_x, temp_train_y = data_utils.get_stateful_data(
                temp_train_x, temp_train_y, batch_size)

        self.model.fit(temp_train_x,
                       temp_train_y,
                       batch_size=batch_size,
                       epochs=num_of_epochs,
                       shuffle=False,
                       verbose=1)
コード例 #2
0
    def train_bucket_model_without_padding(self, train_x, train_y, train_flen,
                                           batch_size, num_of_epochs,
                                           shuffle_data):
        ### Method 3 ###
        train_count_list = list(train_flen['framenum2utt'].keys())
        if shuffle_data:
            random.seed(271638)
            random.shuffle(train_count_list)

        train_file_number = len(train_x)
        for epoch_num in range(num_of_epochs):
            print(('Epoch: %d/%d ' % (epoch_num + 1, num_of_epochs)))
            file_num = 0
            for sequence_length in train_count_list:
                train_idx_list = train_flen['framenum2utt'][sequence_length]
                sub_train_x = dict((filename, train_x[filename])
                                   for filename in train_idx_list)
                sub_train_y = dict((filename, train_y[filename])
                                   for filename in train_idx_list)
                temp_train_x = data_utils.transform_data_to_3d_matrix(
                    sub_train_x, max_length=sequence_length)
                temp_train_y = data_utils.transform_data_to_3d_matrix(
                    sub_train_y, max_length=sequence_length)
                self.model.fit(temp_train_x,
                               temp_train_y,
                               batch_size=batch_size,
                               epochs=1,
                               verbose=0)

                file_num += len(train_idx_list)
                data_utils.drawProgressBar(file_num, train_file_number)

            sys.stdout.write("\n")
コード例 #3
0
    def train_split_model(self, train_x, train_y, valid_x, valid_y, train_flen,
                          batch_size, num_of_epochs, shuffle_data):
        ### Method 3 ###
        train_id_list = list(train_flen['utt2framenum'].keys())
        if shuffle_data:
            random.seed(271638)
            random.shuffle(train_id_list)

        train_file_number = len(train_id_list)
        for epoch_num in range(num_of_epochs):
            print(('Epoch: %d/%d ' % (epoch_num + 1, num_of_epochs)))
            file_num = 0
            while file_num < train_file_number:
                train_idx_list = train_id_list[file_num:file_num + batch_size]
                sub_train_x = dict((filename, train_x[filename])
                                   for filename in train_idx_list)
                sub_train_y = dict((filename, train_y[filename])
                                   for filename in train_idx_list)
                temp_train_x = data_utils.transform_data_to_3d_matrix(
                    sub_train_x,
                    seq_length=self.seq_length,
                    merge_size=self.merge_size)
                temp_train_y = data_utils.transform_data_to_3d_matrix(
                    sub_train_y,
                    seq_length=self.seq_length,
                    merge_size=self.merge_size)

                self.model.train_on_batch(temp_train_x, temp_train_y)

                file_num += len(train_idx_list)
                data_utils.drawProgressBar(file_num, train_file_number)

            print(" Validation error: %.3f" %
                  (self.get_validation_error(valid_x, valid_y)))
コード例 #4
0
    def train_split_model_keras_version(self, train_x, train_y, valid_x,
                                        valid_y, train_flen, batch_size,
                                        num_of_epochs, shuffle_data):
        """This function is not used as of now 
        """
        ### Method 3 ###
        temp_train_x = data_utils.transform_data_to_3d_matrix(
            train_x,
            seq_length=self.seq_length,
            merge_size=self.merge_size,
            shuffle_data=shuffle_data)
        print(("Input shape: " + str(temp_train_x.shape)))

        temp_train_y = data_utils.transform_data_to_3d_matrix(
            train_y,
            seq_length=self.seq_length,
            merge_size=self.merge_size,
            shuffle_data=shuffle_data)
        print(("Output shape: " + str(temp_train_y.shape)))

        if self.stateful:
            temp_train_x, temp_train_y = data_utils.get_stateful_data(
                temp_train_x, temp_train_y, batch_size)

        self.model.fit(temp_train_x,
                       temp_train_y,
                       batch_size=batch_size,
                       epochs=num_of_epochs)
コード例 #5
0
ファイル: train.py プロジェクト: CSTR-Edinburgh/merlin
    def train_bucket_model(self, train_x, train_y, valid_x, valid_y, train_flen, batch_size, num_of_epochs, shuffle_data):
        ### Method 2 ###
        train_fnum_list  = np.array(list(train_flen['framenum2utt'].keys()))
        train_range_list = list(range(min(train_fnum_list), max(train_fnum_list)+1, self.bucket_range))
        if shuffle_data:
            random.seed(271638)
            random.shuffle(train_range_list)

        train_file_number = len(train_x)
        for epoch_num in range(num_of_epochs):
            print(('Epoch: %d/%d ' %(epoch_num+1, num_of_epochs)))
            file_num = 0
            for frame_num in train_range_list:
                min_seq_length = frame_num
                max_seq_length = frame_num+self.bucket_range
                sub_train_list = train_fnum_list[(train_fnum_list>=min_seq_length) & (train_fnum_list<max_seq_length)]
                if len(sub_train_list)==0:
                    continue;
                train_idx_list = sum([train_flen['framenum2utt'][framenum] for framenum in sub_train_list], [])
                sub_train_x    = dict((filename, train_x[filename]) for filename in train_idx_list)
                sub_train_y    = dict((filename, train_y[filename]) for filename in train_idx_list)
                temp_train_x   = data_utils.transform_data_to_3d_matrix(sub_train_x, max_length=max_seq_length)
                temp_train_y   = data_utils.transform_data_to_3d_matrix(sub_train_y, max_length=max_seq_length)
                self.model.fit(temp_train_x, temp_train_y, batch_size=batch_size, shuffle=False, epochs=1, verbose=0)

                file_num += len(train_idx_list)
                data_utils.drawProgressBar(file_num, train_file_number)

            print(" Validation error: %.3f" % (self.get_validation_error(valid_x, valid_y)))
コード例 #6
0
ファイル: train.py プロジェクト: CSTR-Edinburgh/merlin
    def train_bucket_model_without_padding(self, train_x, train_y, valid_x, valid_y, train_flen, batch_size, num_of_epochs, shuffle_data):
        """This function is not used as of now
        """
        ### Method 4 ###
        train_count_list = list(train_flen['framenum2utt'].keys())
        if shuffle_data:
            random.seed(271638)
            random.shuffle(train_count_list)

        train_file_number = len(train_x)
        for epoch_num in range(num_of_epochs):
            print(('Epoch: %d/%d ' %(epoch_num+1, num_of_epochs)))
            file_num = 0
            for sequence_length in train_count_list:
                train_idx_list = train_flen['framenum2utt'][sequence_length]
                sub_train_x    = dict((filename, train_x[filename]) for filename in train_idx_list)
                sub_train_y    = dict((filename, train_y[filename]) for filename in train_idx_list)
                temp_train_x   = data_utils.transform_data_to_3d_matrix(sub_train_x, max_length=sequence_length)
                temp_train_y   = data_utils.transform_data_to_3d_matrix(sub_train_y, max_length=sequence_length)
                self.model.fit(temp_train_x, temp_train_y, batch_size=batch_size, epochs=1, verbose=0)

                file_num += len(train_idx_list)
                data_utils.drawProgressBar(file_num, train_file_number)

            sys.stdout.write("\n")
コード例 #7
0
    def train_bucket_model_with_padding(self, train_x, train_y, train_flen, batch_size, num_of_epochs, shuffle_data):
        ### Method 3 ###
        train_fnum_list  = np.array(train_flen['framenum2utt'].keys())
        train_range_list = range(min(train_fnum_list), max(train_fnum_list), self.bucket_range)
        if shuffle_data:
            random.seed(271638)
            random.shuffle(train_range_list)
        
        train_file_number = len(train_x)
        for epoch_num in xrange(num_of_epochs):
            print 'Epoch: %d/%d ' %(epoch_num+1, num_of_epochs)
            file_num = 0
            for frame_num in train_range_list:
                min_seq_length = frame_num
                max_seq_length = frame_num+self.bucket_range
                sub_train_list  = train_fnum_list[(train_fnum_list>min_seq_length) & (train_fnum_list<=max_seq_length)]
                if len(sub_train_list)==0:
                    continue;
                train_idx_list  = sum([train_flen['framenum2utt'][framenum] for framenum in sub_train_list], [])
                sub_train_x     = dict((filename, train_x[filename]) for filename in train_idx_list)
                sub_train_y     = dict((filename, train_y[filename]) for filename in train_idx_list)
                temp_train_x    = data_utils.transform_data_to_3d_matrix(sub_train_x, max_length=max_seq_length)
                temp_train_y    = data_utils.transform_data_to_3d_matrix(sub_train_y, max_length=max_seq_length) 
                self.model.fit(temp_train_x, temp_train_y, batch_size=batch_size, epochs=1, verbose=0)

                file_num += len(train_idx_list)
                data_utils.drawProgressBar(file_num, train_file_number)

            sys.stdout.write("\n")
コード例 #8
0
ファイル: train.py プロジェクト: ronanki/merlin
    def train_bucket_model_with_padding(self, train_x, train_y, train_flen, batch_size, num_of_epochs, shuffle_data):
        ### Method 2 ###
        train_fnum_list  = np.array(train_flen['framenum2utt'].keys())
        train_range_list = range(min(train_fnum_list), max(train_fnum_list), self.bucket_range)
        if shuffle_data:
            random.seed(271638)
            random.shuffle(train_range_list)
        
        train_file_number = len(train_x)
        for epoch_num in xrange(num_of_epochs):
            print('Epoch: %d/%d ' %(epoch_num+1, num_of_epochs))
            file_num = 0
            for frame_num in train_range_list:
                min_seq_length = frame_num
                max_seq_length = frame_num+self.bucket_range
                sub_train_list  = train_fnum_list[(train_fnum_list>min_seq_length) & (train_fnum_list<=max_seq_length)]
                if len(sub_train_list)==0:
                    continue;
                train_idx_list  = sum([train_flen['framenum2utt'][framenum] for framenum in sub_train_list], [])
                sub_train_x     = dict((filename, train_x[filename]) for filename in train_idx_list)
                sub_train_y     = dict((filename, train_y[filename]) for filename in train_idx_list)
                temp_train_x    = data_utils.transform_data_to_3d_matrix(sub_train_x, max_length=max_seq_length)
                temp_train_y    = data_utils.transform_data_to_3d_matrix(sub_train_y, max_length=max_seq_length) 
                self.model.fit(temp_train_x, temp_train_y, batch_size=batch_size, epochs=1, verbose=0)

                file_num += len(train_idx_list)
                data_utils.drawProgressBar(file_num, train_file_number)

            sys.stdout.write("\n")
コード例 #9
0
ファイル: train.py プロジェクト: cadia-lvl/merlin
    def train_bucket_model(self, train_x, train_y, valid_x, valid_y, train_flen):
        # TODO: use packaged params
        ### Method 2 ###
        train_fnum_list  = np.array(list(train_flen['framenum2utt'].keys()))
        train_range_list = list(range(min(train_fnum_list), max(train_fnum_list)+1, self.bucket_range))
        if self.shuffle_data:
            random.seed(271638)
            random.shuffle(train_range_list)

        train_file_number = len(train_x)
        for epoch_num in range(self.num_of_epochs):
            print(('Epoch: %d/%d ' %(epoch_num+1, self.num_of_epochs)))
            file_num = 0
            for frame_num in train_range_list:
                min_seq_length = frame_num
                max_seq_length = frame_num+self.bucket_range
                sub_train_list = train_fnum_list[(train_fnum_list>=min_seq_length) & (train_fnum_list<max_seq_length)]
                if len(sub_train_list)==0:
                    continue;
                train_idx_list = sum([train_flen['framenum2utt'][framenum] for framenum in sub_train_list], [])
                sub_train_x    = dict((filename, train_x[filename]) for filename in train_idx_list)
                sub_train_y    = dict((filename, train_y[filename]) for filename in train_idx_list)
                temp_train_x   = data_utils.transform_data_to_3d_matrix(sub_train_x, max_length=max_seq_length)
                temp_train_y   = data_utils.transform_data_to_3d_matrix(sub_train_y, max_length=max_seq_length)
                self.model.fit(temp_train_x, temp_train_y, batch_size=self.batch_size, shuffle=False, epochs=1, verbose=0)

                file_num += len(train_idx_list)
                data_utils.drawProgressBar(file_num, train_file_number)

            print(" Validation error: %.3f" % (self.get_validation_error(valid_x, valid_y)))
コード例 #10
0
ファイル: train.py プロジェクト: cadia-lvl/merlin
    def train_padding_model(self, train_x, train_y, valid_x, valid_y, train_flen):
        # TODO: use packaged params
        ### Method 1 ###
        train_id_list = list(train_flen['utt2framenum'].keys())
        if self.shuffle_data:
            random.seed(271638)
            random.shuffle(train_id_list)

        train_file_number = len(train_id_list)
        for epoch_num in range(self.num_of_epochs):
            print(('Epoch: %d/%d ' %(epoch_num+1, self.num_of_epochs)))
            file_num = 0
            while file_num < train_file_number:
                train_idx_list = train_id_list[file_num: file_num + self.batch_size]
                seq_len_arr    = [train_flen['utt2framenum'][filename] for filename in train_idx_list]
                max_seq_length = max(seq_len_arr)
                sub_train_x    = dict((filename, train_x[filename]) for filename in train_idx_list)
                sub_train_y    = dict((filename, train_y[filename]) for filename in train_idx_list)
                temp_train_x   = data_utils.transform_data_to_3d_matrix(sub_train_x, max_length=max_seq_length)
                temp_train_y   = data_utils.transform_data_to_3d_matrix(sub_train_y, max_length=max_seq_length)
                self.model.train_on_batch(temp_train_x, temp_train_y)
                file_num += len(train_idx_list)
                data_utils.drawProgressBar(file_num, train_file_number)

            print(" Validation error: %.3f" % (self.get_validation_error(valid_x, valid_y)))
コード例 #11
0
ファイル: train.py プロジェクト: ronanki/merlin
 def train_truncated_model(self, train_x, train_y, batch_size, num_of_epochs, shuffle_data):
     ### Method 1 ###
     temp_train_x = data_utils.transform_data_to_3d_matrix(train_x, seq_length=self.seq_length, merge_size=self.merge_size, shuffle_data=shuffle_data)
     print("Input shape: "+str(temp_train_x.shape))
      
     temp_train_y = data_utils.transform_data_to_3d_matrix(train_y, seq_length=self.seq_length, merge_size=self.merge_size, shuffle_data=shuffle_data) 
     print("Output shape: "+str(temp_train_y.shape))
            
     if self.stateful:
         temp_train_x, temp_train_y = get_stateful_data(temp_train_x, temp_train_y, batch_size)
                 
     self.model.fit(temp_train_x, temp_train_y, batch_size=batch_size, epochs=num_of_epochs, shuffle=False, verbose=1)
コード例 #12
0
ファイル: train.py プロジェクト: CSTR-Edinburgh/merlin
 def train_split_model_keras_version(self, train_x, train_y, valid_x, valid_y, train_flen, batch_size, num_of_epochs, shuffle_data):
     """This function is not used as of now 
     """
     ### Method 3 ###
     temp_train_x = data_utils.transform_data_to_3d_matrix(train_x, seq_length=self.seq_length, merge_size=self.merge_size, shuffle_data=shuffle_data)
     print(("Input shape: "+str(temp_train_x.shape)))
     
     temp_train_y = data_utils.transform_data_to_3d_matrix(train_y, seq_length=self.seq_length, merge_size=self.merge_size, shuffle_data=shuffle_data)
     print(("Output shape: "+str(temp_train_y.shape)))
     
     if self.stateful:
         temp_train_x, temp_train_y = data_utils.get_stateful_data(temp_train_x, temp_train_y, batch_size)
 
     self.model.fit(temp_train_x, temp_train_y, batch_size=batch_size, epochs=num_of_epochs)
コード例 #13
0
ファイル: train.py プロジェクト: CSTR-Edinburgh/merlin
    def train_split_model(self, train_x, train_y, valid_x, valid_y, train_flen, batch_size, num_of_epochs, shuffle_data):
        ### Method 3 ###
        train_id_list = list(train_flen['utt2framenum'].keys())
        if shuffle_data:
            random.seed(271638)
            random.shuffle(train_id_list)

        train_file_number = len(train_id_list)
        for epoch_num in range(num_of_epochs):
            print(('Epoch: %d/%d ' %(epoch_num+1, num_of_epochs)))
            file_num = 0
            while file_num < train_file_number:
                train_idx_list = train_id_list[file_num: file_num + batch_size]
                sub_train_x    = dict((filename, train_x[filename]) for filename in train_idx_list)
                sub_train_y    = dict((filename, train_y[filename]) for filename in train_idx_list)
                temp_train_x   = data_utils.transform_data_to_3d_matrix(sub_train_x, seq_length=self.seq_length, merge_size=self.merge_size)
                temp_train_y   = data_utils.transform_data_to_3d_matrix(sub_train_y, seq_length=self.seq_length, merge_size=self.merge_size)
    
                self.model.train_on_batch(temp_train_x, temp_train_y)

                file_num += len(train_idx_list)
                data_utils.drawProgressBar(file_num, train_file_number)

            print(" Validation error: %.3f" % (self.get_validation_error(valid_x, valid_y)))