Ejemplo n.º 1
0
    def backprop_adagrad_single_batch(self):
        print "Starting backprop using adagrad"
        adagrad_weight = Bidirectional_RNNLM_Weight()
        adagrad_weight.init_zero_weights(self.model.get_architecture())

        buffer_weight = Bidirectional_RNNLM_Weight()
        buffer_weight.init_zero_weights(self.model.get_architecture())

        fudge_factor = 1.0
        adagrad_weight = adagrad_weight + fudge_factor
        gradient = Bidirectional_RNNLM_Weight()
        gradient.init_zero_weights(self.model.get_architecture())
        if self.validation_feature_file_name is not None:
            cross_entropy, perplexity, num_correct, num_examples, loss = self.calculate_classification_statistics(
                self.validation_features, self.validation_labels,
                self.validation_fsl, self.model)
            print "cross-entropy before steepest descent is", cross_entropy
            print "perplexity is", perplexity
            if self.l2_regularization_const > 0.0:
                print "regularized loss is", loss
            print "number correctly classified is", num_correct, "of", num_examples

#        excluded_keys = {'bias':['0'], 'weights':[]}
#        frame_table = np.cumsum(self.feature_sequence_lens)
        for epoch_num in range(len(self.steepest_learning_rate)):
            print "At epoch", epoch_num + 1, "of", len(
                self.steepest_learning_rate
            ), "with learning rate", self.steepest_learning_rate[epoch_num]
            start_frame = 0
            end_frame = 0
            cross_entropy = 0.0
            num_examples = 0
            #            if hasattr(self, 'momentum_rate'):
            #                momentum_rate = self.momentum_rate[epoch_num]
            #                print "momentum is", momentum_rate
            #            else:
            #                momentum_rate = 0.0
            for batch_index, feature_sequence_len in enumerate(
                    self.feature_sequence_lens):
                end_frame = start_frame + feature_sequence_len
                batch_features = self.features[:feature_sequence_len,
                                               batch_index]
                batch_labels = self.labels[start_frame:end_frame, 1]
                #                print ""
                #                print batch_index
                #                print batch_features
                #                print batch_labels
                cur_xent = self.calculate_gradient_single_batch(
                    batch_features,
                    batch_labels,
                    gradient,
                    return_cross_entropy=True,
                    check_gradient=False)
                #                print self.model.norm()
                #                print gradient.norm()
                if self.l2_regularization_const > 0.0:
                    buffer_weight.assign_weights(self.model)
                    buffer_weight *= self.l2_regularization_const
                    gradient += buffer_weight
                buffer_weight.assign_weights(gradient)
                #                print gradient.init_hiddens
                buffer_weight **= 2.0
                adagrad_weight += buffer_weight
                #                print adagrad_weight.init_hiddens
                buffer_weight.assign_weights(adagrad_weight)
                buffer_weight **= 0.5
                #                print buffer_weight.init_hiddens
                gradient /= buffer_weight
                #                print gradient.init_hiddens
                cross_entropy += cur_xent
                per_done = float(batch_index) / self.num_sequences * 100
                sys.stdout.write(
                    "\r                                                                \r"
                )  #clear line
                sys.stdout.write("\r%.1f%% done " %
                                 per_done), sys.stdout.flush()
                ppp = cross_entropy / end_frame
                sys.stdout.write("train X-ent: %f " % ppp), sys.stdout.flush()
                gradient *= -self.steepest_learning_rate[epoch_num]
                self.model += gradient  #/ batch_size
                #                if momentum_rate > 0.0:
                #                    prev_step *= momentum_rate
                #                    self.model += prev_step
                #                prev_step.assign_weights(gradient)
                #                prev_step *= -self.steepest_learning_rate[epoch_num]

                start_frame = end_frame

            if self.validation_feature_file_name is not None:
                cross_entropy, perplexity, num_correct, num_examples, loss = self.calculate_classification_statistics(
                    self.validation_features, self.validation_labels,
                    self.validation_fsl, self.model)
                print "cross-entropy at the end of the epoch is", cross_entropy
                print "perplexity is", perplexity
                if self.l2_regularization_const > 0.0:
                    print "regularized loss is", loss
                print "number correctly classified is", num_correct, "of", num_examples

            sys.stdout.write("\r100.0% done \r")
            sys.stdout.write(
                "\r                                                                \r"
            )  #clear line
            if self.save_each_epoch:
                self.model.write_weights(''.join(
                    [self.output_name, '_epoch_',
                     str(epoch_num + 1)]))
Ejemplo n.º 2
0
    def backprop_adagrad_single_batch(self):
        print "Starting backprop using adagrad"
        adagrad_weight = Bidirectional_RNNLM_Weight()
        adagrad_weight.init_zero_weights(self.model.get_architecture())
        
        buffer_weight = Bidirectional_RNNLM_Weight()
        buffer_weight.init_zero_weights(self.model.get_architecture())
        
        fudge_factor = 1.0
        adagrad_weight = adagrad_weight + fudge_factor
        gradient = Bidirectional_RNNLM_Weight()
        gradient.init_zero_weights(self.model.get_architecture())
        if self.validation_feature_file_name is not None:
            cross_entropy, perplexity, num_correct, num_examples, loss = self.calculate_classification_statistics(self.validation_features, self.validation_labels, self.validation_fsl, self.model)
            print "cross-entropy before steepest descent is", cross_entropy
            print "perplexity is", perplexity
            if self.l2_regularization_const > 0.0:
                print "regularized loss is", loss
            print "number correctly classified is", num_correct, "of", num_examples
        
#        excluded_keys = {'bias':['0'], 'weights':[]}
#        frame_table = np.cumsum(self.feature_sequence_lens)
        for epoch_num in range(len(self.steepest_learning_rate)):
            print "At epoch", epoch_num+1, "of", len(self.steepest_learning_rate), "with learning rate", self.steepest_learning_rate[epoch_num]
            start_frame = 0
            end_frame = 0
            cross_entropy = 0.0
            num_examples = 0
#            if hasattr(self, 'momentum_rate'):
#                momentum_rate = self.momentum_rate[epoch_num]
#                print "momentum is", momentum_rate
#            else:
#                momentum_rate = 0.0
            for batch_index, feature_sequence_len in enumerate(self.feature_sequence_lens):
                end_frame = start_frame + feature_sequence_len
                batch_features = self.features[:feature_sequence_len, batch_index]
                batch_labels = self.labels[start_frame:end_frame,1]
#                print ""
#                print batch_index
#                print batch_features
#                print batch_labels
                cur_xent = self.calculate_gradient_single_batch(batch_features, batch_labels, gradient, return_cross_entropy = True, 
                                                                check_gradient = False)
#                print self.model.norm()
#                print gradient.norm()
                if self.l2_regularization_const > 0.0:
                    buffer_weight.assign_weights(self.model)
                    buffer_weight *= self.l2_regularization_const
                    gradient += buffer_weight
                buffer_weight.assign_weights(gradient)
#                print gradient.init_hiddens
                buffer_weight **= 2.0
                adagrad_weight += buffer_weight
#                print adagrad_weight.init_hiddens
                buffer_weight.assign_weights(adagrad_weight)
                buffer_weight **= 0.5
#                print buffer_weight.init_hiddens
                gradient /= buffer_weight
#                print gradient.init_hiddens
                cross_entropy += cur_xent 
                per_done = float(batch_index)/self.num_sequences*100
                sys.stdout.write("\r                                                                \r") #clear line
                sys.stdout.write("\r%.1f%% done " % per_done), sys.stdout.flush()
                ppp = cross_entropy / end_frame
                sys.stdout.write("train X-ent: %f " % ppp), sys.stdout.flush()
                gradient *= -self.steepest_learning_rate[epoch_num]
                self.model += gradient #/ batch_size
#                if momentum_rate > 0.0:
#                    prev_step *= momentum_rate
#                    self.model += prev_step
#                prev_step.assign_weights(gradient)
#                prev_step *= -self.steepest_learning_rate[epoch_num]
                
                start_frame = end_frame
                
            if self.validation_feature_file_name is not None:
                cross_entropy, perplexity, num_correct, num_examples, loss = self.calculate_classification_statistics(self.validation_features, self.validation_labels, self.validation_fsl, self.model)
                print "cross-entropy at the end of the epoch is", cross_entropy
                print "perplexity is", perplexity
                if self.l2_regularization_const > 0.0:
                    print "regularized loss is", loss
                print "number correctly classified is", num_correct, "of", num_examples
                
            sys.stdout.write("\r100.0% done \r")
            sys.stdout.write("\r                                                                \r") #clear line
            if self.save_each_epoch:
                self.model.write_weights(''.join([self.output_name, '_epoch_', str(epoch_num+1)]))