def main(opt, device_id): opt = training_opt_postprocessing(opt, device_id) init_logger(opt.log_file) out_file = None best_test_score, best_ckpt = -10000, None dummy_parser = argparse.ArgumentParser(description='meta_dev_zs.py') opts.model_opts(dummy_parser) dummy_opt = dummy_parser.parse_known_args([])[0] for i in range(57, opt.meta_iterations * opt.inner_iterations + 57, 57 * 5): ckpt_path = '{}_epoch_{}.pt'.format(opt.save_model, i) logger.info('Loading checkpoint from %s' % ckpt_path) if out_file is None: out_file = codecs.open(opt.output, 'w+', 'utf-8') fields, model, model_opt = onmt.model_builder.load_test_model(opt, dummy_opt.__dict__, model_path=ckpt_path) scorer = onmt.translate.GNMTGlobalScorer(opt.alpha, opt.beta, opt.coverage_penalty, opt.length_penalty) kwargs = {k: getattr(opt, k) for k in ["beam_size", "n_best", "max_length", "min_length", "stepwise_penalty", "block_ngram_repeat", "ignore_when_blocking", "dump_beam", "report_bleu", "replace_unk", "gpu", "verbose", "fast", "mask_from"]} fields['graph'] = torchtext.data.Field(sequential = False) translator = Translator(model, fields, global_scorer=scorer, out_file=out_file, report_score=False, copy_attn=model_opt.copy_attn, logger=logger, log_probs_out_file=None, **kwargs) # make translation and save result all_scores, all_predictions = translator.translate( src_path='processed_data/meta-dev/' + opt.meta_dev_task + '/src-test.txt', tgt_path=None, src_dir=None, batch_size=opt.translate_batch_size, attn_debug=False) # dump predictions f = open('experiments/meta_dev/' + opt.meta_dev_task + '/test_predictions.csv', 'w', encoding='utf-8') f.write('smiles,property\n') for n_best_mols in all_predictions: for mol in n_best_mols: f.write(mol.replace(' ', '')+',0\n') f.close() # call chemprop to get scores test_path = '\"' + 'experiments/meta_dev/' + opt.meta_dev_task + '/test_predictions.csv' + '\"' checkpoint_path = '\"' + 'scorer_ckpts/' + opt.meta_dev_task + '/model.pt' + '\"' preds_path = '\"' + 'experiments/meta_dev/' + opt.meta_dev_task + '/test_scores.csv' + '\"' # in case of all mols are invalid (will produce not output file by chemprop) # the predictions are copied into score file cmd = 'cp {} {}'.format(test_path, preds_path) result = os.popen(cmd) result.close() cmd = 'python chemprop/predict.py --test_path {} --checkpoint_path {} --preds_path {} --num_workers 0'.format(test_path, checkpoint_path, preds_path) scorer_result = os.popen(cmd) # logger.info('{}'.format('\n'.join(scorer_result.readlines()))) scorer_result.close() # read score file and get score score = read_score_csv('experiments/meta_dev/' + opt.meta_dev_task + '/test_scores.csv') assert len(score) % opt.beam_size == 0 # report if it is the best on test test_metrics = calculate_metrics(opt.meta_dev_task, 'dev', 'test', score) logger.info('test metrics: ' + str(test_metrics)) test_score = test_metrics['success_rate'] if test_score > best_test_score: best_ckpt = ckpt_path logger.info('New best test success rate: {:.4f} by {}'.format(test_score, ckpt_path)) best_test_score = test_score else: logger.info('test success rate: {:.4f} by {}'.format(test_score, ckpt_path)) del model_opt del fields del model gc.collect()
def main(opt, device_id): opt = training_opt_postprocessing(opt, device_id) init_logger(opt.log_file) out_file = None best_test_score, best_ckpt = -10000, None dummy_parser = argparse.ArgumentParser(description='all_dev.py') opts.model_opts(dummy_parser) dummy_opt = dummy_parser.parse_known_args([])[0] for i in range(0, opt.train_epochs, 10): ckpt_path = '{}_epoch_{}.pt'.format(opt.save_model, i + 1) logger.info('Loading checkpoint from %s' % ckpt_path) checkpoint = torch.load(ckpt_path, map_location=lambda storage, loc: storage) model_opt = checkpoint['opt'] fields = load_fields_from_vocab(checkpoint['vocab'], data_type="text") # Build model. model = build_model(model_opt, opt, fields, checkpoint) assert opt.train_from == '' # do not load optimizer state optim = build_optim(model, opt, checkpoint) # Build model saver, no need to create task dir for dev if not os.path.exists('experiments/all_dev'): os.mkdir('experiments/all_dev') os.mkdir('experiments/all_dev/' + opt.meta_dev_task) elif not os.path.exists('experiments/all_dev/' + opt.meta_dev_task): os.mkdir('experiments/all_dev/' + opt.meta_dev_task) model_saver = build_model_saver( model_opt, 'experiments/all_dev/' + opt.meta_dev_task + '/model', opt, model, fields, optim) trainer = build_trainer(opt, device_id, model, fields, optim, "text", model_saver=model_saver) train_iter = list( build_dataset_iter(lazily_load_dataset("train", opt), fields, opt)) # do training on trainset of meta-dev task trainer.train(train_iter, opt.inner_iterations) # do evaluation on devset of meta-dev task best_dev_score, best_model_path = -10000, None for model_path in os.listdir('experiments/all_dev/' + opt.meta_dev_task): if model_path.find('.pt') == -1: continue if out_file is None: out_file = codecs.open(opt.output, 'w+', 'utf-8') fields, model, model_opt = onmt.model_builder.load_test_model( opt, dummy_opt.__dict__, model_path='experiments/all_dev/' + opt.meta_dev_task + '/' + model_path) scorer = onmt.translate.GNMTGlobalScorer(opt.alpha, opt.beta, opt.coverage_penalty, opt.length_penalty) kwargs = { k: getattr(opt, k) for k in [ "beam_size", "n_best", "max_length", "min_length", "stepwise_penalty", "block_ngram_repeat", "ignore_when_blocking", "dump_beam", "report_bleu", "replace_unk", "gpu", "verbose", "fast", "mask_from" ] } fields['graph'] = torchtext.data.Field(sequential=False) translator = Translator(model, fields, global_scorer=scorer, out_file=out_file, report_score=False, copy_attn=model_opt.copy_attn, logger=logger, log_probs_out_file=None, **kwargs) # make translation and save result all_scores, all_predictions = translator.translate( src_path='processed_data/meta-dev/' + opt.meta_dev_task + '/src-dev.txt', tgt_path=None, src_dir=None, batch_size=opt.translate_batch_size, attn_debug=False) # dump predictions f = open('experiments/all_dev/' + opt.meta_dev_task + '/dev_predictions.csv', 'w', encoding='utf-8') f.write('smiles,property\n') for n_best_mols in all_predictions: for mol in n_best_mols: f.write(mol.replace(' ', '') + ',0\n') f.close() # call chemprop to get scores test_path = '\"' + 'experiments/all_dev/' + opt.meta_dev_task + '/dev_predictions.csv' + '\"' checkpoint_path = '\"' + 'scorer_ckpts/' + opt.meta_dev_task + '/model.pt' + '\"' preds_path = '\"' + 'experiments/all_dev/' + opt.meta_dev_task + '/dev_scores.csv' + '\"' # in case of all mols are invalid (will produce not output file by chemprop) # the predictions are copied into score file cmd = 'cp {} {}'.format(test_path, preds_path) result = os.popen(cmd) result.close() cmd = 'python chemprop/predict.py --test_path {} --checkpoint_path {} --preds_path {} --num_workers 0'.format( test_path, checkpoint_path, preds_path) scorer_result = os.popen(cmd) scorer_result.close() # read score file and get score score = read_score_csv('experiments/all_dev/' + opt.meta_dev_task + '/dev_scores.csv') assert len(score) % opt.beam_size == 0 # dev_scores = [] # for i in range(0, len(score), opt.beam_size): # dev_scores.append(sum([x[1] for x in score[i:i+opt.beam_size]]) / opt.beam_size) # report dev score dev_metrics = calculate_metrics(opt.meta_dev_task, 'dev', 'dev', score) logger.info('dev metrics: ' + str(dev_metrics)) dev_score = dev_metrics['success_rate'] if dev_score > best_dev_score: logger.info('New best dev success rate: {:.4f} by {}'.format( dev_score, model_path)) best_model_path = model_path best_dev_score = dev_score else: logger.info('dev success rate: {:.4f} by {}'.format( dev_score, model_path)) del fields del model del model_opt del scorer del translator gc.collect() assert best_model_path != None # do testing on testset of meta-dev task if out_file is None: out_file = codecs.open(opt.output, 'w+', 'utf-8') fields, model, model_opt = onmt.model_builder.load_test_model( opt, dummy_opt.__dict__, model_path='experiments/all_dev/' + opt.meta_dev_task + '/' + best_model_path) scorer = onmt.translate.GNMTGlobalScorer(opt.alpha, opt.beta, opt.coverage_penalty, opt.length_penalty) kwargs = { k: getattr(opt, k) for k in [ "beam_size", "n_best", "max_length", "min_length", "stepwise_penalty", "block_ngram_repeat", "ignore_when_blocking", "dump_beam", "report_bleu", "replace_unk", "gpu", "verbose", "fast", "mask_from" ] } fields['graph'] = torchtext.data.Field(sequential=False) translator = Translator(model, fields, global_scorer=scorer, out_file=out_file, report_score=False, copy_attn=model_opt.copy_attn, logger=logger, log_probs_out_file=None, **kwargs) # make translation and save result all_scores, all_predictions = translator.translate( src_path='processed_data/meta-dev/' + opt.meta_dev_task + '/src-test.txt', tgt_path=None, src_dir=None, batch_size=opt.translate_batch_size, attn_debug=False) # dump predictions f = open('experiments/all_dev/' + opt.meta_dev_task + '/test_predictions.csv', 'w', encoding='utf-8') f.write('smiles,property\n') for n_best_mols in all_predictions: for mol in n_best_mols: f.write(mol.replace(' ', '') + ',0\n') f.close() # call chemprop to get scores test_path = '\"' + 'experiments/all_dev/' + opt.meta_dev_task + '/test_predictions.csv' + '\"' checkpoint_path = '\"' + 'scorer_ckpts/' + opt.meta_dev_task + '/model.pt' + '\"' preds_path = '\"' + 'experiments/all_dev/' + opt.meta_dev_task + '/test_scores.csv' + '\"' # in case of all mols are invalid (will produce not output file by chemprop) # the predictions are copied into score file cmd = 'cp {} {}'.format(test_path, preds_path) result = os.popen(cmd) result.close() cmd = 'python chemprop/predict.py --test_path {} --checkpoint_path {} --preds_path {} --num_workers 0'.format( test_path, checkpoint_path, preds_path) scorer_result = os.popen(cmd) # logger.info('{}'.format('\n'.join(scorer_result.readlines()))) scorer_result.close() # read score file and get score score = read_score_csv('experiments/all_dev/' + opt.meta_dev_task + '/test_scores.csv') assert len(score) % opt.beam_size == 0 # test_scores = [] # for i in range(0, len(score), opt.beam_size): # test_scores.append(sum([x[1] for x in score[i:i+opt.beam_size]]) / opt.beam_size) # report if it is the best on test test_metrics = calculate_metrics(opt.meta_dev_task, 'dev', 'test', score) logger.info('test metrics: ' + str(test_metrics)) test_score = test_metrics['success_rate'] if test_score > best_test_score: best_ckpt = ckpt_path logger.info('New best test success rate: {:.4f} by {}'.format( test_score, ckpt_path)) best_test_score = test_score else: logger.info('test success rate: {:.4f} by {}'.format( test_score, ckpt_path)) del model_opt del fields del checkpoint del model del optim del model_saver del trainer gc.collect()