def generate(save_to, config_file_path): config, logger = config_utils.load_config(config_file_path) if "generation" not in config: logger.error("Generation parameters not in the config file!") sys.exit() config_global = config.get('global', {}) linking_config = config['entity.linking'] with open(config['generation']['questions']) as f: webquestions_questions = json.load(f) logger.info('Loaded training questions, size: {}'.format( len(webquestions_questions))) logger.info("Load entity linker") entitylinker = getattr(linker, linking_config['linker'])( logger=logger, **linking_config['linker.options']) len_webquestion = len(webquestions_questions) start_with = 0 data_iterator = tqdm.tqdm(range(start_with, len_webquestion), ncols=100) for i in data_iterator: q_obj = webquestions_questions[i] q = q_obj.get('utterance', q_obj.get('question')) q_index = q_obj['questionid'] sent = entitylinker.link_entities_in_raw_input(q, element_id=q_index) q_obj['entities'] = sent.entities logger.debug("Entity linking is finished.") with open(save_to, 'w') as out: json.dump(webquestions_questions, out, sort_keys=True, indent=4)
def process(config_file_path): config, logger = config_utils.load_config(config_file_path) with open(config['generation']['questions']) as f: questions_dataset = json.load(f) mapped_dataset = [] for q in tqdm.tqdm(questions_dataset['Questions'], ascii=True, ncols=100): mq = { "utterance": q['RawQuestion'], "answers": [], "answers_str": [], "questionid": q['QuestionId'] } for p in q['Parses']: mq['answers_str'].extend([ a['EntityName'] if a['EntityName'] else a['AnswerArgument'] for a in p['Answers'] ]) mq['answers'].extend([a['AnswerArgument'] for a in p['Answers']]) mq['answers'] = [ queries.map_f_id(a) if a.startswith('m') else a for a in mq['answers'] ] for i, a in enumerate(mq['answers']): if not a: entities = endpoint_access.query_wikidata( queries.query_get_entity_by_label(mq['answers_str'][i])) if len(entities) == 1: mq['answers'][i] = entities[0][queries.ENTITY_VAR[1:]] mapped_dataset.append(mq) with open(config['generation']['save.silver.to'], "w") as out: json.dump(mapped_dataset, out, indent=4, sort_keys=True) print( f"Coverage: {sum(all(q['answers']) for q in mapped_dataset) / len(mapped_dataset)}" )
def train(config_file_path, seed, gpuid, model_description, experiment_tag): config, logger = config_utils.load_config(config_file_path, seed, gpuid) if "training" not in config: print("Training parameters not in the config file!") sys.exit() results_logger = None if 'log.results' in config['training']: results_logger = logging.getLogger("results_logger") results_logger.setLevel(logging.INFO) fh = logging.FileHandler(filename=config['training']['log.results']) fh.setLevel(logging.INFO) results_logger.addHandler(fh) results_logger.info(str(config)) # Load data if not isinstance(config['training']["path_to_dataset"], list): config['training']["path_to_dataset"] = [ config['training']["path_to_dataset"] ] training_dataset = [] for path_to_train in config['training']["path_to_dataset"]: with open(path_to_train) as f: training_dataset += json.load(f, object_hook=sentence_object_hook) logger.info(f"Train: {len(training_dataset)}") train_size_available = len(training_dataset) dataset_name = config['training']["path_to_dataset"][0].split( "/")[-1].split(".")[0] if "path_to_validation" not in config['training']: config['training']["path_to_validation"] = config['training'][ "path_to_dataset"][-1] logger.info(f"No validation set, using part of the training data.") with open(config['training']["path_to_validation"]) as f: val_dataset = json.load(f, object_hook=sentence_object_hook) logger.info(f"Validation: {len(val_dataset)}") val_size_available = len(val_dataset) wordembeddings, word2idx = V.extend_embeddings_with_special_tokens( *_utils.load_word_embeddings( _utils.RESOURCES_FOLDER + "../../resources/embeddings/glove/glove.6B.100d.txt")) logger.info(f"Loaded word embeddings: {wordembeddings.shape}") model_type = config['training']["model_type"] logger.info(f"Model type: {model_type}") V.MAX_NEGATIVE_GRAPHS = 50 training_dataset = [ s for s in training_dataset if any(scores[2] > losses.MIN_TARGET_VALUE for g, scores in s.graphs) ] training_samples, training_targets = pack_data(training_dataset, word2idx, model_type) logger.info(f"Data encoded: {[m.shape for m in training_samples]}") V.MAX_NEGATIVE_GRAPHS = 100 val_dataset = [ s for s in val_dataset if any(scores[2] > losses.MIN_TARGET_VALUE for g, scores in s.graphs) ] print( f"Val F1 upper bound: {np.average([q.graphs[0].scores[2] for q in val_dataset])}" ) val_samples, val_targets = pack_data(val_dataset, word2idx, model_type) logger.info(f"Val data encoded: {[m.shape for m in val_samples]}") encoder = models.ConvWordsEncoder(hp_vocab_size=wordembeddings.shape[0], hp_word_emb_size=wordembeddings.shape[1], **config['model']) encoder.load_word_embeddings_from_numpy(wordembeddings) net = getattr(models, model_type)(encoder, **config['model']) def metrics(targets, predictions, validation=False): _, predicted_targets = torch.topk(predictions, 1, dim=-1) _, targets = torch.topk(targets, 1, dim=-1) predicted_targets = predicted_targets.squeeze(1) targets = targets.squeeze(1) cur_acc = torch.sum(predicted_targets == targets).float() cur_acc /= predicted_targets.size(0) cur_f1 = 0.0 if validation: for i, q in enumerate(val_dataset): if i < predicted_targets.size(0): idx = predicted_targets.data[i] if abs(idx) < len(q.graphs): cur_f1 += q.graphs[idx].scores[2] cur_f1 /= predicted_targets.size(0) return { 'acc': cur_acc.data[0], 'f1': cur_f1, 'predictions': predicted_targets.data.unsqueeze(0) } # Save models into model specific directory if "save_to_dir" in config['training']: now = datetime.datetime.now() model_gated = net._gnn.hp_gated if model_type == "GNNModel" else False config['training']['save_to_dir'] = config['training']['save_to_dir'] + \ f"{'g' if model_gated else ''}" \ f"{model_type.lower()}s_{now.year}Q{now.month // 4 + 1}/" if not os.path.exists(config['training']['save_to_dir']): os.makedirs(config['training']['save_to_dir']) container = fackel.TorchContainer( torch_model=net, criterion=losses.VariableMarginLoss(), # criterion=nn.MultiMarginLoss(margin=0.5, size_average=False), metrics=metrics, optimizer_params={ 'weight_decay': 0.05, # 'lr': 0.01 }, optimizer="Adam", logger=logger, init_model_weights=True, description=model_description, **config['training']) if results_logger: results_logger.info("Model save to: {}".format( container._save_model_to)) log_history = container.train(training_samples, training_targets, dev=val_samples, dev_targets=val_targets) for q in val_dataset: random.shuffle(q.graphs) if container._model_checkpoint: container.reload_from_saved() val_samples, val_targets = pack_data(val_dataset, word2idx, model_type) predictions = container.predict_batchwise(*val_samples) results = metrics(*container._torchify_data(True, val_targets), predictions, validation=True) _, predictions = torch.topk(predictions, 1, dim=-1) print(f"Acc: {results['acc']}, F1: {results['f1']}") print(f"Predictions head: {predictions.data[:10].view(1,-1)}") model_name = container._save_model_to.name model_gated = container._model._gnn.hp_gated if model_type == "GNNModel" else False # Print out the model path for the evaluation script to pick up if "add.results.to" in config['training']: print( f"Adding training results to {config['training']['add.results.to']}" ) with open(config['training']["add.results.to"], 'a+') as results_out: results_out.write(",".join([ model_name, model_type, "Gated" if model_gated else "Simple", model_description, str(seed), dataset_name, f"{len(training_dataset)}/{train_size_available}", f"{len(val_dataset)}/{val_size_available}", str(len(log_history)), str(results['acc']), str(results['f1']), experiment_tag ])) results_out.write("\n") # Print out the model path for the evaluation script to pick up print(container._save_model_to)
def generate(path_to_model, config_file_path): config, logger = config_utils.load_config(config_file_path) if "evaluation" not in config: print("Evaluation parameters not in the config file!") sys.exit() with open(config['evaluation']['questions']) as f: webquestions_questions = json.load(f) entitylinker = None if 'entity.linking' in config: PATH_EL = "../../entity-linking/" sys.path.insert(0, PATH_EL) from entitylinking import core linking_config = config['entity.linking'] logger.info("Load entity linker") entitylinker = getattr(core, linking_config['linker'])( logger=logger, **linking_config['linker.options'], pos_tags=True) _, word2idx = V.extend_embeddings_with_special_tokens( *_utils.load_word_embeddings( _utils.RESOURCES_FOLDER + "../../resources/embeddings/glove/glove.6B.100d.txt")) V.WORD_2_IDX = word2idx model_type = path_to_model.split("/")[-1].split("_")[0] logger.info(f"Model type: {model_type}") logger.info('Loading the model from: {}'.format(path_to_model)) dummy_net = getattr(models, model_type)() container = fackel.TorchContainer(torch_model=dummy_net, logger=logger) container.load_from_file(path_to_model) graph_queries.FREQ_THRESHOLD = config['evaluation'].get( "min.relation.freq", 500) logger.debug('Testing') global_answers = [] avg_metrics = np.zeros(3) data_iterator = tqdm.tqdm(webquestions_questions, ncols=100, ascii=True) for i, q_obj in enumerate(data_iterator): q = q_obj.get('utterance', q_obj.get('question')) q_index = q_obj['questionid'] if entitylinker: sent = entitylinker.link_entities_in_raw_input(q, element_id=q_index) if "max.num.entities" in config['evaluation']: sent.entities = sent.entities[:config['evaluation'] ["max.num.entities"]] sent = sentence.Sentence(input_text=sent.input_text, tagged=sent.tagged, entities=sent.entities) else: tagged = _utils.get_tagged_from_server(q, caseless=q.islower()) sent = sentence.Sentence(input_text=q, tagged=tagged, entities=q_obj['entities']) chosen_graphs = staged_generation.generate_with_model( sent, container, beam_size=config['evaluation'].get("beam.size", 10)) model_answers = [] g = ({}, ) if chosen_graphs: j = 0 while not model_answers and j < len(chosen_graphs): g = chosen_graphs[j] model_answers = graph_queries.get_graph_denotations(g.graph) j += 1 gold_answers = webquestions_io.get_answers_from_question(q_obj) metrics = evaluation.retrieval_prec_rec_f1(gold_answers, model_answers) global_answers.append((q_index, list(metrics), model_answers, [ (c_g.graph, float(c_g.scores[2])) for c_g in chosen_graphs[:10] ])) avg_metrics += metrics precision, recall, f1 = tuple(avg_metrics / (i + 1)) data_iterator.set_postfix(prec=precision, rec=recall, f1=f1) if i > 0 and i % 100 == 0: with open(config['evaluation']["save.answers.to"], 'w') as answers_out: json.dump(global_answers, answers_out, sort_keys=True, indent=4, cls=sentence.SentenceEncoder) print("Average metrics: {}".format( (avg_metrics / (len(webquestions_questions))))) logger.debug('Testing is finished') with open(config['evaluation']["save.answers.to"], 'w') as answers_out: json.dump(global_answers, answers_out, sort_keys=True, indent=4, cls=sentence.SentenceEncoder)
def train(config_file_path, seed, gpuid): config, logger = config_utils.load_config(config_file_path, seed, gpuid) if "training" not in config: print("Training parameters not in the config file!") sys.exit() results_logger = None if 'log.results' in config['training']: results_logger = logging.getLogger("results_logger") results_logger.setLevel(logging.INFO) fh = logging.FileHandler(filename=config['training']['log.results']) fh.setLevel(logging.INFO) results_logger.addHandler(fh) results_logger.info(str(config)) # Load data if not isinstance(config['training']["path_to_dataset"], list): config['training']["path_to_dataset"] = [config['training']["path_to_dataset"]] training_dataset = [] for path_to_train in config['training']["path_to_dataset"]: with open(path_to_train) as f: training_dataset += json.load(f, object_hook=sentence_object_hook) logger.info(f"Train: {len(training_dataset)}") if "path_to_validation" not in config['training']: config['training']["path_to_validation"] = config['training']["path_to_dataset"][-1] logger.info(f"No validation set, using part of the training data.") with open(config['training']["path_to_validation"]) as f: val_dataset = json.load(f, object_hook=sentence_object_hook) logger.info(f"Validation: {len(val_dataset)}") wordembeddings, word2idx = V.extend_embeddings_with_special_tokens( *_utils.load_word_embeddings(_utils.RESOURCES_FOLDER + "../../resources/embeddings/glove/glove.6B.100d.txt") ) logger.info(f"Loaded word embeddings: {wordembeddings.shape}") model_type = config['training']["model_type"] logger.info(f"Model type: {model_type}") V.MAX_NEGATIVE_GRAPHS = 50 training_dataset = [s for s in training_dataset if any(scores[2] > 0.25 for g, scores in s.graphs)] training_samples, training_targets = pack_data(training_dataset, word2idx, model_type) logger.info(f"Data encoded: {[m.shape for m in training_samples]}") V.MAX_NEGATIVE_GRAPHS = 100 val_dataset = [s for s in val_dataset if any(scores[2] > 0.25 for g, scores in s.graphs)] print(f"Val F1 upper bound: {np.average([q.graphs[0].scores[2] for q in val_dataset])}") val_samples, val_targets = pack_data(val_dataset, word2idx, model_type) logger.info(f"Val data encoded: {[m.shape for m in val_samples]}") encoder = models.ConvWordsEncoder( hp_vocab_size=wordembeddings.shape[0], hp_word_emb_size=wordembeddings.shape[1], **config['model'] ) encoder.load_word_embeddings_from_numpy(wordembeddings) net = getattr(models, model_type)(encoder, **config['model']) def metrics(targets, predictions, validation=False): _, predicted_targets = torch.topk(predictions, 1, dim=-1) _, targets = torch.topk(targets, 1, dim=-1) predicted_targets = predicted_targets.squeeze(1) targets = targets.squeeze(1) cur_acc = torch.sum(predicted_targets == targets).float() cur_acc /= predicted_targets.size(0) cur_f1 = 0.0 if validation: for i, q in enumerate(val_dataset): if i < predicted_targets.size(0): idx = predicted_targets.data[i] if abs(idx) < len(q.graphs): cur_f1 += q.graphs[idx].scores[2] cur_f1 /= predicted_targets.size(0) return {'acc': cur_acc.data[0], 'f1': cur_f1, 'predictions': predicted_targets.data.unsqueeze(0)} container = fackel.TorchContainer( torch_model=net, criterion=losses.VariableMarginLoss(), # criterion=nn.MultiMarginLoss(margin=0.5, size_average=False), metrics=metrics, optimizer_params={ 'weight_decay': 0.05, # 'lr': 0.01 }, optimizer="Adam", logger=logger, init_model_weights=True, **config['training'] ) if results_logger: results_logger.info("Model save to: {}".format(container._save_model_to)) log_history = container.train( training_samples, training_targets, dev=val_samples, dev_targets=val_targets ) for q in val_dataset: random.shuffle(q.graphs) if container._model_checkpoint: container.reload_from_saved() val_samples, val_targets = pack_data(val_dataset, word2idx, model_type) predictions = container.predict_batchwise(*val_samples) results = metrics(*container._torchify_data(True, val_targets), predictions, validation=True) _, predictions = torch.topk(predictions, 1, dim=-1) print(f"Acc: {results['acc']}, F1: {results['f1']}") print(f"Predictions: {predictions.data[:10].view(1,-1)}")
def generate(path_to_model, config_file_path, seed, gpuid, experiment_tag): config, logger = config_utils.load_config(config_file_path, gpuid=gpuid, seed=seed) if "evaluation" not in config: print("Evaluation parameters not in the config file!") sys.exit() # Get the data set name and load the data set as specified in the config file dataset_name = config['evaluation']['questions'].split("/")[-1].split( ".")[0] logger.info(f"Dataset: {dataset_name}") with open(config['evaluation']['questions']) as f: webquestions_questions = json.load(f) # Load the entity linker if specified, otherwise the entity annotations in the data set will be used entitylinker = None if 'entity.linking' in config: PATH_EL = "../../entity-linking/" sys.path.insert(0, PATH_EL) from entitylinking import core linking_config = config['entity.linking'] logger.info("Load entity linker") entitylinker = getattr(core, linking_config['linker'])( logger=logger, **linking_config['linker.options'], pos_tags=True) # Load the GloVe word embeddings and embeddings for special tokens _, word2idx = V.extend_embeddings_with_special_tokens( *_utils.load_word_embeddings( _utils.RESOURCES_FOLDER + "../../resources/embeddings/glove/glove.6B.100d.txt")) # Set the global mapping for words to indices V.WORD_2_IDX = word2idx # Derive the model type and the full model name from the model file model_type = path_to_model.split("/")[-1].split("_")[0] model_name = path_to_model.split("/")[-1].replace(".pkl", "") logger.info(f"Model type: {model_type}") logger.info('Loading the model from: {}'.format(path_to_model)) # Load the PyTorch model dummy_net = getattr(models, model_type)() container = fackel.TorchContainer(torch_model=dummy_net, logger=logger) container.load_from_file(path_to_model) model_gated = container._model._gnn.hp_gated if model_type == "GNNModel" else False # Load the freebase entity set that was used top restrict the answer space by the previous work if specified. freebase_entity_set = set() if config['evaluation'].get('entities.list', False): print(f"Using the Freebase entity list") freebase_entity_set = _utils.load_blacklist(_utils.RESOURCES_FOLDER + "freebase-entities.txt") # Compose a file name for the output file save_answer_to = config['evaluation']["save.answers.to"] if not save_answer_to.endswith(".json"): dir_name = config['evaluation'][ "save.answers.to"] + f"{dataset_name}/{model_type.lower()}/" save_answer_to = dir_name + f"{dataset_name}_predictions_{'g' if model_gated else ''}{model_name.lower()}.json" if not os.path.exists(dir_name): os.makedirs(dir_name) print(f"Save output to {save_answer_to}") # Init the variables to store the results logger.debug('Testing') graph_queries.FREQ_THRESHOLD = config['evaluation'].get( "min.relation.freq", 500) global_answers = [] avg_metrics = np.zeros(4) # Iterate over the questions in the dataset data_iterator = tqdm.tqdm(webquestions_questions, ncols=100, ascii=True) for i, q_obj in enumerate(data_iterator): q = q_obj.get('utterance', q_obj.get('question')) q_index = q_obj['questionid'] if entitylinker: sent = entitylinker.link_entities_in_raw_input(q, element_id=q_index) if "max.num.entities" in config['evaluation']: sent.entities = sent.entities[:config['evaluation'] ["max.num.entities"]] sent = sentence.Sentence(input_text=sent.input_text, tagged=sent.tagged, entities=sent.entities) else: tagged = _utils.get_tagged_from_server(q, caseless=q.islower()) sent = sentence.Sentence(input_text=q, tagged=tagged, entities=q_obj['entities']) chosen_graphs = staged_generation.generate_with_model( sent, container, beam_size=config['evaluation'].get("beam.size", 10)) model_answers = [] g = ({}, ) j = -1 if chosen_graphs: j = 0 valid_answer_set = False while not valid_answer_set and j < len(chosen_graphs): g = chosen_graphs[j] model_answers = graph_queries.get_graph_denotations(g.graph) if model_answers: valid_answer_set = True if freebase_entity_set: labeled_answers = { l.lower() for _, labels in queries.get_labels_for_entities( model_answers).items() for l in labels } valid_answer_set = len( labeled_answers & freebase_entity_set) > len(model_answers) - 1 j += 1 gold_answers = webquestions_io.get_answers_from_question(q_obj) metrics = evaluation.retrieval_prec_rec_f1(gold_answers, model_answers) global_answers.append((q_index, list(metrics), model_answers, [ (c_g.graph, float(c_g.scores[2])) for c_g in chosen_graphs[:10] ])) avg_metrics += metrics + (j, ) precision, recall, f1, g_j = tuple(avg_metrics / (i + 1)) data_iterator.set_postfix(prec=precision, rec=recall, f1=f1, g_j=g_j) # Save intermediate results if i > 0 and i % 100 == 0: with open(save_answer_to, 'w') as answers_out: json.dump(global_answers, answers_out, sort_keys=True, indent=4, cls=sentence.SentenceEncoder) avg_metrics = avg_metrics / (len(webquestions_questions)) print("Average metrics: {}".format(avg_metrics)) # Fine-grained results, if there is a mapping of questions to the number of relation to find the correct answer results_by_hops = {} if "qid2hop" in config['evaluation']: with open(config['evaluation']['qid2hop']) as f: q_index2hop = json.load(f) print("Results by hop: ") hops_dist = Counter([q_index2hop[p[0]] for p in global_answers]) results_by_hops = { i: np.zeros(3) for i in range(max(hops_dist.keys()) + 1) } for p in global_answers: metrics = tuple(p[1]) results_by_hops[q_index2hop[p[0]]] += metrics for m in results_by_hops: if hops_dist[m] > 0: results_by_hops[m] = results_by_hops[m] / hops_dist[m] print(results_by_hops) # Add results to the results file if "add.results.to" in config['evaluation']: print(f"Adding results to {config['evaluation']['add.results.to']}") with open(config['evaluation']["add.results.to"], 'a+') as results_out: results_out.write(",".join([ model_name, model_type, "Gated" if model_gated else "Simple", str(seed), dataset_name, "full", "EntityList" if freebase_entity_set else "NoEntityList" ] + [str(el) for el in avg_metrics[:3]])) results_out.write("\n") # Include fine grained results if available if results_by_hops: for i in range(max(results_by_hops.keys()) + 1): results_out.write(",".join([ model_name, model_type, "Gated" if model_gated else "Simple", container.description, str(seed), dataset_name, str(i), "EntityList" if freebase_entity_set else "NoEntityList" ] + [str(el) for el in results_by_hops[i]] + [experiment_tag])) results_out.write("\n") # Save final model output with open(save_answer_to, 'w') as answers_out: json.dump(global_answers, answers_out, sort_keys=True, indent=4, cls=sentence.SentenceEncoder)