コード例 #1
0
ファイル: pretrain_gpt.py プロジェクト: smoorjani/Megatron-LM
def model_provider(pre_process=True, post_process=True):
    """Build the model."""

    print_rank_0('building GPT model ...')
    model = GPTModel(num_tokentypes=0,
                     parallel_output=True,
                     pre_process=pre_process,
                     post_process=post_process)
    return model
コード例 #2
0
    def model_provider(pre_process=True, post_process=True):
        """Build the model."""

        if eval_metric == 'loss':
            parallel_output = True
        elif eval_metric == 'accuracy':
            parallel_output = False
        else:
            raise NotImplementedError('output type for {} evaluation metric '
                                      'is not supported.'.format(eval_metric))

        print_rank_0('building GPT model ...')
        model = GPTModel(num_tokentypes=0, parallel_output=parallel_output,
                         pre_process=pre_process, post_process=post_process)

        return model
コード例 #3
0
def model_provider():
    """Build the model."""

    print_rank_0('building GPT model ...')
    args = get_args()
    if mpu.get_pipeline_model_parallel_world_size() > 1:
        # Determine model based on position of stage in pipeline.
        if mpu.is_pipeline_first_stage():
            model = GPTModelFirstStage(num_tokentypes=0)
        elif mpu.is_pipeline_last_stage():
            model = GPTModelLastStage(num_tokentypes=0, parallel_output=True)
        else:
            model = GPTModelIntermediateStage(num_tokentypes=0)
    else:
        model = GPTModel(num_tokentypes=0, parallel_output=True)

    return model