def model_provider(pre_process=True, post_process=True): """Build the model.""" print_rank_0('building GPT model ...') model = GPTModel(num_tokentypes=0, parallel_output=True, pre_process=pre_process, post_process=post_process) return model
def model_provider(pre_process=True, post_process=True): """Build the model.""" if eval_metric == 'loss': parallel_output = True elif eval_metric == 'accuracy': parallel_output = False else: raise NotImplementedError('output type for {} evaluation metric ' 'is not supported.'.format(eval_metric)) print_rank_0('building GPT model ...') model = GPTModel(num_tokentypes=0, parallel_output=parallel_output, pre_process=pre_process, post_process=post_process) return model
def model_provider(): """Build the model.""" print_rank_0('building GPT model ...') args = get_args() if mpu.get_pipeline_model_parallel_world_size() > 1: # Determine model based on position of stage in pipeline. if mpu.is_pipeline_first_stage(): model = GPTModelFirstStage(num_tokentypes=0) elif mpu.is_pipeline_last_stage(): model = GPTModelLastStage(num_tokentypes=0, parallel_output=True) else: model = GPTModelIntermediateStage(num_tokentypes=0) else: model = GPTModel(num_tokentypes=0, parallel_output=True) return model