def run_pipeline(config_files, out_fn): # In the pipeline mode we have two config files, # separate for each stage. First, we do syntactic ordering, # then surface realization. # Stage 1 syn_config_fn = config_files[0] syn_config_d = load_yaml_config(syn_config_fn) log_fn = '%s.prediction_log' % (os.path.abspath(out_fn)) logger = set_logging(logging.DEBUG, log_fn) fix_seed(syn_config_d['random_seed']) logger.info('PIPELINE MODE') logger.info('STAGE 1: syntactic ordering') syn_config_d['output_fn'] = out_fn syn_engine = Engine(syn_config_d, mode='predict') syn_engine.setup() depgraphs = syn_engine.run() # Stage 2 print('\nSTAGE 2: morphological inflection') morph_config_fn = config_files[1] morph_config_d = load_yaml_config(morph_config_fn) morph_config_d['output_fn'] = out_fn morph_engine = Engine(morph_config_d, mode='predict') morph_engine.setup() # load the weights morph_model_fname = morph_engine.config["model_fn"] load_model(morph_engine.model_module, morph_model_fname) if use_cuda: morph_engine.model_module.cuda() assert type(depgraphs) == dict logger.info('Predicting on Syn outputs ...') for data_split, digraphs in depgraphs.items(): for dg in digraphs: # can later examine digraphs, if needed for node_id in dg.graph['node_order']: form = morph_engine.nlgen.predict_from_dgnode( morph_engine.model_module, morph_engine.data_module.vocab, dg, node_id) dg.node[node_id]['PRED_FORM'] = form for data_split, dgs in depgraphs.items(): fname = '%s.%s.final.txt' % (out_fn, data_split) logger.info('Saving Pipeline outputs (*%s*) to --> %s', data_split, fname) targets, predicted_snts = save_final_predictions_from_dgs(dgs, fname) if data_split == 'dev': SynEvaluator.compute_final_scores(targets, predicted_snts)
def start_prediction(self): stage_name = self.config['stage'] model_fname = self.config["model_fn"] load_model(self.model_module, model_fname) output_fname_base = self.config.get('output_fn', None) if output_fname_base is None: output_fname_base = model_fname all_predictions = {} if use_cuda: self.model_module.cuda() if self.data_module.fnames.dev_fn != None: predictions_fname_base = '%s.dev.%s' % (output_fname_base, stage_name) predictions_fname = '%s.predictions' % predictions_fname_base reference_fname = '%s.reference' % predictions_fname_base errors_fname = '%s.errors' % predictions_fname_base dev_predictions = self.nlgen.predict_from_file( self.model_module, self.data_module.config['dev_data'], self.data_module.vocab) all_predictions['dev'] = dev_predictions # Saving predictions, references and evaluating the predictions self.nlgen.save_predictions(dev_predictions, predictions_fname) if self.data_module.fnames.test_fn != None: predictions_fname_base = '%s.test.%s' % (output_fname_base, stage_name) predictions_fname = '%s.predictions' % predictions_fname_base test_predictions = self.nlgen.predict_from_file( self.model_module, self.data_module.config['test_data'], self.data_module.vocab) self.nlgen.save_predictions(test_predictions, predictions_fname) all_predictions['test'] = test_predictions return all_predictions
def run(config_dict): # Fetch all relevant modules. data_module = config_dict['data-module'] model_module = config_dict['model-module'] training_module = config_dict['training-module'] evaluation_module = config_dict.get('evaluation-module', None) mode = config_dict['mode'] # Load the modules DataClass = importlib.import_module(data_module).component ModelClass = importlib.import_module(model_module).component TrainingClass = importlib.import_module(training_module).component EvaluationClass = importlib.import_module( evaluation_module).component if evaluation_module else None model_dirname = make_model_dir(config_dict) logger = set_logger(config_dict["log_level"], os.path.join(model_dirname, "log.txt")) # Setup the data data = DataClass(config_dict["data_params"]) data.setup() # Setup the model fix_seed(config_d['random_seed']) # fix seed generators model = ModelClass(config_dict["model_params"]) model.setup( data) # there are some data-specific params => pass data as arg if mode == "train": training_params = config_dict['training_params'] trainer = TrainingClass(training_params) trainer.training_start(model, data) save_config(config_dict, os.path.join(model_dirname, 'config.json')) elif mode == "predict": assert evaluation_module is not None, "No evaluation module -- check config file!" evaluator = EvaluationClass(config_dict) model_fname = config_dict["model_fn"] load_model(model, model_fname) id2word = data.vocab.id2tok # predict on dev set if 'dev' in data.fnames: logger.info("Predicting on dev data") predicted_ids, attention_weights = evaluator.evaluate_model( model, data.dev[0]) data_lexicalizations = data.lexicalizations['dev'] predicted_snts = evaluator.lexicalize_predictions( predicted_ids, data_lexicalizations, id2word) save_predictions_txt(predicted_snts, '%s.devset.predictions.txt' % model_fname) # predict on test set if 'test' in data.fnames: logger.info("Predicting on test data") predicted_ids, attention_weights = evaluator.evaluate_model( model, data.test[0]) data_lexicalizations = data.lexicalizations['test'] predicted_snts = evaluator.lexicalize_predictions( predicted_ids, data_lexicalizations, id2word) save_predictions_txt(predicted_snts, '%s.testset.predictions.txt' % model_fname) else: logger.warning("Check the 'mode' field in the config file: %s" % mode) logger.info('DONE')
def run(config_dict): # Fetch all relevant modules. data_module = config_dict['data-module'] model_module = config_dict['model-module'] training_module = config_dict['training-module'] evaluation_module = config_dict.get('evaluation-module', None) mode = config_dict['mode'] # Load the modules DataClass = importlib.import_module(data_module).component ModelClass = importlib.import_module(model_module).component TrainingClass = importlib.import_module(training_module).component EvaluationClass = importlib.import_module( evaluation_module).component if evaluation_module else None model_dirname = make_model_dir(config_dict) logger = set_logger(config_dict["log_level"], os.path.join(model_dirname, "log.txt")) # Setup the data data = DataClass(config_dict["data_params"]) data.setup() # Setup the model fix_seed(config_d['random_seed']) # fix seed generators model = ModelClass(config_dict["model_params"]) print("build model done") model.setup( data) # there are some data-specific params => pass data as arg print("setup data done") #print(len(data.lexicalizations['test'])) if mode == "train": training_params = config_dict['training_params'] trainer = TrainingClass(training_params) trainer.training_start(model, data) save_config(config_dict, os.path.join(model_dirname, 'config.json')) elif mode == "predict": assert evaluation_module is not None, "No evaluation module -- check config file!" evaluator = EvaluationClass(config_dict) model_fname = config_dict["model_fn"] load_model(model, model_fname) #print(model.state_dict()) model = model.to('cuda') id2word = data.vocab.id2tok beam_size = None #10 alpha = 0.3 #""" if 'dev' in data.fnames: logger.info("Predicting on dev data") print(len(data.uni_mr['dev']), len(data.dev[0]), len(data.lexicalizations['dev'])) dec_snt_beam, fw_beam = [], [] predicted_ids, fw_beam = evaluator.evaluate_model( model, data.dev[0], data.uni_mr['dev'], beam_size=beam_size, alpha=alpha) data_lexicalizations = data.lexicalizations['dev'] #dec_snt_beam = [] #for _ in range(beam_size): # predicted_snt = evaluator.lexicalize_predictions(predicted_ids[_], data_lexicalizations, id2word) # dec_snt_beam.append( predicted_snt ) print(len(predicted_ids), len(data_lexicalizations)) predicted_snts = evaluator.lexicalize_predictions( predicted_ids, data_lexicalizations, id2word) #save_predictions_txt(predicted_snts, '%s.devset.predictions.txt_incre_0.7' % model_fname) save_predictions_txt( predicted_snts, '%s.devset.predictions.txt_incre_%.1f' % (model_fname, alpha)) # for beam_idx in range(beam_size): # predicted_ids, attention_weights = evaluator.evaluate_model(model, data.dev[0], data.uni_mr['dev']) # data_lexicalizations = data.lexicalizations['dev'] # #print(len(predicted_ids), len(data_lexicalizations)) # predicted_snts = evaluator.lexicalize_predictions(predicted_ids, # data_lexicalizations, # id2word) # fw_prob = [[x.item() for x in x_list ] for x_list in attention_weights] # #print(len(fw_prob), len(fw_prob[0]))#, fw_prob) # dec_snt_beam.append( predicted_snts ), fw_beam.append( fw_prob ) # save_predictions_txt(predicted_snts, '%s.devset.predictions.txt' % model_fname) #save_beam_fw(fw_beam, dec_snt_beam, beam_size, '%s.devset.recs.txt' % model_fname) exit() #""" if 'test' in data.fnames: logger.info("Predicting on test data") print(len(data.test[0])) predicted_ids, attention_weights = evaluator.evaluate_model( model, data.test[0], data.uni_mr['test'], beam_size=beam_size, alpha=alpha) data_lexicalizations = data.lexicalizations['test'] print(len(predicted_ids), len(data_lexicalizations)) predicted_snts = evaluator.lexicalize_predictions( predicted_ids, data_lexicalizations, id2word) save_predictions_txt( predicted_snts, '%s.testset.predictions.txt_inre_%.1f' % (model_fname, alpha)) else: logger.warning("Check the 'mode' field in the config file: %s" % mode) logger.info('DONE')