def _load_model(self): # Load models model_name = pull_model(self.args.model) self.config = SoloistConfig.from_pretrained(model_name) self._update_config_and_args(self.config, self.args) self.tokenizer = SoloistTokenizer.from_pretrained(model_name) model = SoloistModel.from_pretrained(model_name, config=self.config) if self.args.model == 'gpt2': self.tokenizer, model = add_custom_tokens(self.tokenizer, model) if self.args.fp16: self.scaler = torch.cuda.amp.GradScaler() self.tokenizer.pad_token = self.tokenizer.eos_token if self.is_master(): wandb.watch(model, log_freq=max(1000, self.args.logging_steps)) number_of_parameters = sum(x.numel() for x in model.parameters()) self.logger.info(f'model loaded, number of parameters: {number_of_parameters}') self.model = model
# Resume run and fill metrics os.environ.pop('WANDB_NAME', None) wandb.init(resume=args.resume) elif args.wandb: import wandb # It is an artifact # Start a new evaluate run wandb.init(job_type='evaluation') else: wandb = None dataset = load_dataset(args.dataset, use_goal=True) dataset = wrap_dataset_with_cache(dataset) if args.file is None or not os.path.exists(args.file): args.model = pull_model(args.model) if args.file is not None: path = args.file if not os.path.exists(path): path = os.path.join(args.model, args.file) responses, beliefs, gold_responses, delex_responses, delex_gold_responses = parse_predictions(dataset, path) else: logger.info('generating responses') pipeline = transformers.pipeline('augpt-conversational', args.model, device=0 if torch.cuda.is_available() else -1) responses, beliefs, gold_responses, delex_responses, delex_gold_responses = \ generate_predictions(pipeline, dataset, os.path.join(wandb.run.dir if wandb and wandb.run else '.', 'test-predictions.txt')) logger.info('evaluation started') evaluator = MultiWozEvaluator(dataset, is_multiwoz_eval=True, logger=logger) success, matches, domain_results = evaluator.evaluate(beliefs, delex_responses, progressbar=True) logger.info('evaluation finished')
print(sample.belief, file=fout) print(sample.database, file=fout) if pipeline.lexicalizer: print(f'R:{sample.response}', file=fout) else: print('R:', file=fout) print(f'RD:{conversation.raw_response}', file=fout) raw_belief = belief_parser(belief) beliefs.append(raw_belief) responses.append(conversation.generated_responses[-1]) delex_responses.append(conversation.raw_response) return responses, beliefs, gold_responses, delex_responses, delex_gold_responses if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--model', default='jkulhanek/augpt-mw-21') parser.add_argument('--file', default='predictions.txt') parser.add_argument('--dataset', default='multiwoz-2.1-test') args = parser.parse_args() setup_logging() logger = logging.getLogger() model_name = pull_model(args.model) pipeline = transformers.pipeline('soloist-conversational', device=0 if torch.cuda.is_available() else -1) # Generate from data import load_dataset dataset = load_dataset(args.dataset) generate_predictions(pipeline, dataset, args.file)
name = run_name artifact = wandb.Artifact(f'{name}-model', 'model') for f in os.listdir(path): if f.startswith('wandb-'): continue # noqa: 701 if f == 'output.log': continue # noqa: 701 if f == 'requirements.txt': continue # noqa: 701 if f.startswith('events.'): continue # noqa: 701 if os.path.isdir(os.path.join(path, f)): continue # noqa: 701 artifact.add_file(os.path.join(path, f), f) wandb.run.log_artifact(artifact, aliases=['latest', run_name]) if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('run') parser.add_argument('--name', default=None, help='artifact name') args = parser.parse_args() run = args.run root = pull_model(run) setup_logging() logger = logging.getLogger() logger.info('publishing artifact') wandb.init(resume=run) publish_model(root, args.name) logger.info('model published')