def main(args): config = json.load(open(args.config, 'r')) # save config into experiment directory json.dump(config, open(os.path.join(args.experiment_dir, 'config.json'), 'w')) logging.info("Config json: %s", config) ball = Ball(config) file_config = config['files'] base_data_dir = file_config['base-data-dir'] model_config = config['model'] train_config = config['training'] model_builder = ModelBuilder(model_config, ball.get_word_vecs(), ball.get_character_vecs(), ball.get_feature_indxr(), ball.get_ent_cbow_vecs()) logging.info("Model Summary:") model_builder.build_f().summary() logging.info("Building model...") model = model_builder.build_trainable_model() logging.info("Model Built!") logging.info("Building data ball...") data_converter = DataConverter(ball) logging.info("Data ball built!") trainer = Trainer(model, ball, data_converter, neg_sample_k=train_config['neg_samples'], batch_size=train_config['batch_size'], neg_sample_from_cands=train_config['neg_sample_from_cands']) # Optimization Loop logging.info("--- Starting optimization loop ---") for epoch in xrange(0, train_config['epochs']): logging.info("Starting epoch %d", epoch + 1) data_iterator = WikilinksIterator(base_data_dir + 'train') for item in data_iterator.jsons(): trainer.train_on(item) trainer.epoch_done() logging.info("Finished training epoch %d", epoch + 1) # temp weight save for evaluation tmp_weights_path = os.path.join(args.experiment_dir, 'tmp-model.weights') model.save_weights(tmp_weights_path) logging.info("Evaluating epoch %d", epoch + 1) test_model = model_builder.build_f(weights=tmp_weights_path) evaluator = Evaluator(test_model, ball, data_converter) data_iterator = WikilinksIterator(base_data_dir + 'dev') for item in data_iterator.jsons(): evaluator.evaluate_on(item) accuracy = evaluator.evaluate_model() logging.info("Model accuracy for epoch %d is %.2f", epoch + 1, accuracy) logging.info("Saving final model") final_weights_path = os.path.join(args.experiment_dir, 'final-model.weights') model.save_weights(final_weights_path) test_model = model_builder.build_f(weights=final_weights_path) data_iterator = WikilinksIterator(base_data_dir + 'dev') final_evaluator = Evaluator(test_model, ball, data_converter) logging.info("Starting final model dev evaluation") for item in data_iterator.jsons(): final_evaluator.evaluate_on(item) accuracy = final_evaluator.evaluate_model() logging.info("Final model dev accuracy is %.2f", accuracy) data_iterator = WikilinksIterator(base_data_dir + 'test') final_evaluator = Evaluator(test_model, ball, data_converter) logging.info("Starting final model test evaluation") for item in data_iterator.jsons(): final_evaluator.evaluate_on(item) accuracy = final_evaluator.evaluate_model() logging.info("Final model test accuracy is %.2f", accuracy)