Exemple #1
0
def get_model(text_proc, args):
    sent_vocab = text_proc.vocab
    model = ActionPropDenseCap(d_model=args.d_model,
                               d_hidden=args.d_hidden,
                               n_layers=args.n_layers,
                               n_heads=args.n_heads,
                               vocab=sent_vocab,
                               in_emb_dropout=args.in_emb_dropout,
                               attn_dropout=args.attn_dropout,
                               vis_emb_dropout=args.vis_emb_dropout,
                               cap_dropout=args.cap_dropout,
                               nsamples=0,
                               kernel_list=args.kernel_list,
                               stride_factor=args.stride_factor,
                               learn_mask=args.learn_mask)

    # Initialize the networks and the criterion
    if len(args.start_from) > 0:
        print("Initializing weights from {}".format(args.start_from))
        model.load_state_dict(
            torch.load(args.start_from,
                       map_location=lambda storage, location: storage))

    # Ship the model to GPU, maybe
    if args.cuda:
        model.cuda()

    return model
Exemple #2
0
def get_model(text_proc, args):
    sent_vocab = text_proc.vocab
    model = ActionPropDenseCap(d_model=args.d_model,
                               d_hidden=args.d_hidden,
                               n_layers=args.n_layers,
                               n_heads=args.n_heads,
                               vocab=sent_vocab,
                               in_emb_dropout=args.in_emb_dropout,
                               attn_dropout=args.attn_dropout,
                               vis_emb_dropout=args.vis_emb_dropout,
                               cap_dropout=args.cap_dropout,
                               nsamples=args.train_sample,
                               kernel_list=args.kernel_list,
                               stride_factor=args.stride_factor,
                               learn_mask=args.mask_weight > 0)

    # Initialize the networks and the criterion
    if len(args.start_from) > 0:
        print("Initializing weights from {}".format(args.start_from))
        model.load_state_dict(
            torch.load(args.start_from,
                       map_location=lambda storage, location: storage))

    # Ship the model to GPU, maybe
    if args.cuda:
        if args.distributed:
            model.cuda()
            model = torch.nn.parallel.DistributedDataParallel(model)
        else:
            model = torch.nn.DataParallel(model).cuda()
        # elif torch.cuda.device_count() > 1:
        #     model = torch.nn.DataParallel(model).cuda()
        # else:
        #     model.cuda()
    return model
Exemple #3
0
def get_model(text_proc, args):
    sent_vocab = text_proc.vocab  # 字典

    model = ActionPropDenseCap(
        d_model=args.d_model,
        d_hidden=args.d_hidden,
        n_layers=args.n_layers,
        n_heads=args.n_heads,
        vocab=sent_vocab,  # 字典对象
        in_emb_dropout=args.in_emb_dropout,  # 0.1
        attn_dropout=args.attn_dropout,  # 0.2
        vis_emb_dropout=args.vis_emb_dropout,  # 0.1
        cap_dropout=args.cap_dropout,  # 0.2
        nsamples=args.train_sample,  # 20
        kernel_list=args.kernel_list,
        stride_factor=args.stride_factor,
        learn_mask=args.mask_weight > 0)

    # Initialize the networks and the criterion
    if len(args.start_from) > 0:
        print("Initializing weights from {}".format(args.start_from))
        model.load_state_dict(
            torch.load(args.start_from,
                       map_location=lambda storage, location: storage))

    # Ship the model to GPU, maybe
    if args.cuda:
        if args.distributed:
            model.cuda()
            """
            在多机多卡情况下分布式训练数据的读取也是一个问题,不同的卡读取到的数据应该是不同的。
            dataparallel(单机多卡)的做法是直接将batch切分到不同的卡,这种方法对于多机来说不可取,因为多
            机之间直接进行数据传输会严重影响效率。于是有了利用sampler确保dataloader只会load到
            整个数据集的一个特定子集的做法。DistributedSampler(多机多卡)就是做这件事的。它为每一个子进程
            划分出一部分数据集,以避免不同进程之间数据重复。
            
            """
            model = torch.nn.parallel.DistributedDataParallel(model)
        else:
            model = torch.nn.DataParallel(model).cuda()  # 单机多卡
        # elif torch.cuda.device_count() > 1:
        #     model = torch.nn.DataParallel(model).cuda()
        # else:
        #     model.cuda()
    return model