Ejemplo n.º 1
0
    def testing(self, fea2obj):
        config = self._config
        dsdir = config['dsdir']
        devfile = dsdir + '/dev.txt'
        testfile = dsdir + '/test.txt'
        networkfile = config['net']
        batch_size = 10000#int(config['batchsize'])
        devMentions = load_ent_ds(devfile)
        tstMentions = load_ent_ds(testfile)
        logger.info('#dev: %d #test: %d', len(devMentions), len(tstMentions))
        
        main_loop = load(networkfile + '.best.pkl')
        logger.info('Model loaded. Building prediction function...')
        old_model = main_loop.model
        logger.info(old_model.inputs)
        sources = [inp.name for inp in old_model.inputs]
#         fea2obj = build_input_objs(sources, config)
        t2idx = fea2obj['targets'].t2idx
        deterministic = str_to_bool(config['use_mean_pred']) if 'use_mean_pred' in config else True 
        kl_weight = shared_floatx(0.001, 'kl_weight')
        entropy_weight= shared_floatx(0.001, 'entropy_weight')
       
       
        cost, _, y_hat, _, _,_,_ = build_model_new(fea2obj, len(t2idx), self._config, kl_weight, entropy_weight, deterministic=deterministic, test=True)
        model = Model(cost)
        model.set_parameter_values(old_model.get_parameter_values())
        
        theinputs = []
        for fe in fea2obj.keys():
            if 'targets' in fe:
                continue
            for inp in model.inputs:
                if inp.name == fe:
                    theinputs.append(inp)
                    
#         theinputs = [inp for inp in model.inputs if inp.name != 'targets']
        print "theinputs: ", theinputs
        predict = theano.function(theinputs, y_hat)
        
        test_stream, num_samples_test = get_comb_stream(fea2obj, 'test', batch_size, shuffle=False)
        dev_stream, num_samples_dev = get_comb_stream(fea2obj, 'dev', batch_size, shuffle=False)
        logger.info('sources: %s -- number of test/dev samples: %d/%d', test_stream.sources, num_samples_test, num_samples_dev)
        idx2type = {idx:t for t,idx in t2idx.iteritems()}
        
        logger.info('Starting to apply on dev inputs...')
        self.applypredict(theinputs, predict, dev_stream, devMentions, num_samples_dev, batch_size, os.path.join(config['exp_dir'], config['matrixdev']), idx2type)
        logger.info('...apply on dev data finished')
        
        logger.info('Starting to apply on test inputs...')
        self.applypredict(theinputs, predict, test_stream, tstMentions, num_samples_test, batch_size, os.path.join(config['exp_dir'], config['matrixtest']), idx2type)
        logger.info('...apply on test data finished')
Ejemplo n.º 2
0
    def training(self, fea2obj, batch_size, learning_rate=0.005, steprule='adagrad', wait_epochs=5, kl_weight_init=None, klw_ep=50, klw_inc_rate=0, num_epochs=None):
        networkfile = self._config['net']
        
        n_epochs = num_epochs or int(self._config['nepochs'])
        reg_weight=float(self._config['loss_weight'])
        reg_type=self._config['loss_reg']
        numtrain = int(self._config['num_train']) if 'num_train' in self._config else None
        train_stream, num_samples_train = get_comb_stream(fea2obj, 'train', batch_size, shuffle=True, num_examples=numtrain)
        dev_stream, num_samples_dev = get_comb_stream(fea2obj, 'dev', batch_size=None, shuffle=False)
        logger.info('sources: %s -- number of train/dev samples: %d/%d', train_stream.sources, num_samples_train, num_samples_dev)
        
        t2idx = fea2obj['targets'].t2idx
        klw_init = kl_weight_init or float(self._config['kld_weight']) if 'kld_weight' in self._config else 1
        logger.info('kl_weight_init: %d', klw_init)
        kl_weight = shared_floatx(klw_init, 'kl_weight')
        entropy_weight = shared_floatx(1., 'entropy_weight')
        
        cost, p_at_1, _, KLD, logpy_xz, pat1_recog, misclassify_rate= build_model_new(fea2obj, len(t2idx), self._config, kl_weight, entropy_weight)
        
        cg = ComputationGraph(cost)
        
        weights = VariableFilter(roles=[WEIGHT])(cg.parameters)
        logger.info('Model weights are: %s', weights)
        if 'L2' in reg_type:
            cost += reg_weight * l2_norm(weights)
            logger.info('applying %s with weight: %f ', reg_type, reg_weight)
          
        dropout = -0.1
        if dropout > 0:
            cg = apply_dropout(cg, weights, dropout)
            cost = cg.outputs[0]      
        
        cost.name = 'cost'
        logger.info('Our Algorithm is : %s, and learning_rate: %f', steprule, learning_rate)
        if 'adagrad' in steprule:
            cnf_step_rule = AdaGrad(learning_rate)
        elif 'adadelta' in steprule:
            cnf_step_rule = AdaDelta(decay_rate=0.95)
        elif 'decay' in steprule:
            cnf_step_rule = RMSProp(learning_rate=learning_rate, decay_rate=0.90)
            cnf_step_rule = CompositeRule([cnf_step_rule, StepClipping(1)])
        elif 'momentum' in steprule:
            cnf_step_rule = Momentum(learning_rate=learning_rate, momentum=0.9)
        elif 'adam' in steprule:
            cnf_step_rule = Adam(learning_rate=learning_rate)
        else:
            logger.info('The steprule param is wrong! which is: %s', steprule)
        
        algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=cnf_step_rule, on_unused_sources='warn')
        #algorithm.add_updates(updates)
        gradient_norm = aggregation.mean(algorithm.total_gradient_norm)
        step_norm = aggregation.mean(algorithm.total_step_norm)
        monitored_vars = [cost, gradient_norm, step_norm, p_at_1, KLD, logpy_xz, kl_weight, pat1_recog]
        train_monitor = TrainingDataMonitoring(variables=monitored_vars, after_batch=True,
                                               before_first_epoch=True, prefix='tra')
        
        dev_monitor = DataStreamMonitoring(variables=[cost, p_at_1, KLD, logpy_xz, pat1_recog, misclassify_rate], after_epoch=True,
                                           before_first_epoch=True, data_stream=dev_stream, prefix="dev")
        
        extensions = [dev_monitor, train_monitor, Timing(), 
                      TrackTheBest('dev_cost'),
                      FinishIfNoImprovementAfter('dev_cost_best_so_far', epochs=wait_epochs),
                      Printing(after_batch=False), #, ProgressBar()
                      FinishAfter(after_n_epochs=n_epochs),
                      saveload.Load(networkfile+'.toload.pkl'),
                      ] + track_best('dev_cost', networkfile+ '.best.pkl')
                      
        #extensions.append(SharedVariableModifier(kl_weight,
        #                                          lambda n, klw: numpy.cast[theano.config.floatX] (klw_inc_rate + klw), after_epoch=False, every_n_epochs=klw_ep, after_batch=False))
#         extensions.append(SharedVariableModifier(entropy_weight,
#                                                   lambda n, crw: numpy.cast[theano.config.floatX](crw - klw_inc_rate), after_epoch=False, every_n_epochs=klw_ep, after_batch=False))
        
        logger.info('number of parameters in the model: %d', tensor.sum([p.size for p in cg.parameters]).eval())
        logger.info('Lookup table sizes: %s', [p.size.eval() for p in cg.parameters if 'lt' in p.name])
        
        main_loop = MainLoop(data_stream=train_stream, algorithm=algorithm,
                             model=Model(cost), extensions=extensions)
        main_loop.run()