Ejemplo n.º 1
0
    def build_model(cls, args, task):
        """Build a new model instance."""
        # make sure all arguments are present in older models
        base_lm_architecture(args)

        if hasattr(args, "max_target_positions") and not hasattr(
                args, "tokens_per_sample"):
            args.tokens_per_sample = args.max_target_positions

        decoder = FConvDecoder(
            dictionary=task.target_dictionary,
            embed_dim=args.decoder_embed_dim,
            convolutions=eval(args.decoder_layers),
            out_embed_dim=args.decoder_embed_dim,
            attention=eval(args.decoder_attention),
            dropout=args.dropout,
            max_positions=args.tokens_per_sample,
            share_embed=False,
            positional_embeddings=False,
            adaptive_softmax_cutoff=(utils.eval_str_list(
                args.adaptive_softmax_cutoff, type=int) if args.criterion
                                     == "adaptive_loss" else None),
            adaptive_softmax_dropout=args.adaptive_softmax_dropout,
        )
        return FConvLanguageModel(decoder)
Ejemplo n.º 2
0
def load_word_model(pytorch_model_path, fairseq_dict, dataset_type):
    layer = eval(
        "[(512, 5)] + [(128, 1, 0), (128, 5, 0), (512, 1, 3)] * 3 + "
        "[(512, 1, 0), (512, 5, 0), (1024, 1, 3)] * 3 + "
        "[(1024, 1, 0), (1024, 5, 0), (2048, 1, 3)] * 6 + "
        "[(1024, 1, 0), (1024, 5, 0), (4096, 1, 3)]"
    )
    model_state = torch.load(pytorch_model_path)

    if dataset_type == "wsj":
        cutoff = "10000,50000,100000"
    elif dataset_type == "ls":
        cutoff = "10000,50000,200000"
    else:
        cutoff = ""
    convLM = FConvDecoder(
        fairseq_dict,
        embed_dim=128,
        out_embed_dim=128,
        max_positions=1024,
        convolutions=layer,
        dropout=0.1,
        share_embed=False,
        attention=False,
        positional_embeddings=False,
        adaptive_softmax_cutoff=(options.eval_str_list(cutoff, type=int)),
        adaptive_softmax_dropout=0,
    ).cuda()

    convLM.load_state_dict(compute_new_state(model_state))
    convLM.eval()
    convLM.adaptive_softmax.eval()
    return convLM
Ejemplo n.º 3
0
def load_char_model_14B(pytorch_model_path, fairseq_dict, dataset_type):
    layer = eval(
        "[(512, 5)] + [(128, 1, 0), (128, 5, 0), (512, 1, 3)] * 3 + "
        "[(512, 1, 0), (512, 5, 0), (1024, 1, 3)] * 3 + "
        "[(1024, 1, 0), (1024, 5, 0), (2048, 1, 3)] * 6 + "
        "[(1024, 1, 0), (1024, 5, 0), (4096, 1, 3)]"
    )
    model_state = torch.load(pytorch_model_path)

    convLM_char = FConvDecoder(
        fairseq_dict,
        embed_dim=128,
        out_embed_dim=128,
        max_positions=1024,
        convolutions=layer,
        dropout=0.1,
        share_embed=False,
        attention=False,
        positional_embeddings=False,
        adaptive_softmax_cutoff=None,
        adaptive_softmax_dropout=0,
    ).cuda()

    convLM_char.load_state_dict(compute_new_state(model_state))
    convLM_char.eval()
    return convLM_char
Ejemplo n.º 4
0
def create_partitioned_model(device, nsamples, padding_index, ntokens, dim,
                             noise_std, dropout, dictionary, max_positions,
                             nconv) -> NoEncoderFConvDecoderModel:
    # build model

    encoder = NoEncoderSmartPartitions(device=device,
                                       sample_size=nsamples,
                                       padding_index=padding_index,
                                       ntokens=ntokens,
                                       embed_dim=dim,
                                       noise_std=noise_std,
                                       dropout=dropout)

    decoder = FConvDecoder(dictionary,
                           embed_dim=dim,
                           out_embed_dim=dim // 2,
                           max_positions=max_positions,
                           convolutions=((dim, 3), ) * nconv)

    model = NoEncoderFConvDecoderModel(encoder, decoder).to(device)

    return model