args = parser.parse_args()

corpus_train = SNLIDataset(train=True, vocab_size=args.vocab_size-4, path=args.data_path)
corpus_test = SNLIDataset(train=False, vocab_size=args.vocab_size-4, path=args.data_path)
trainloader= torch.utils.data.DataLoader(corpus_train, batch_size = args.batch_size, collate_fn=collate_snli, shuffle=True)
train_iter = iter(trainloader)
testloader= torch.utils.data.DataLoader(corpus_test, batch_size = args.batch_size, collate_fn=collate_snli, shuffle=False)

random.seed(args.seed)
np.random.seed(args.seed)
torch.manual_seed(args.seed)

if args.model_type=="lstm":
    baseline_model = Baseline_LSTM(100,300,maxlen=args.maxlen, gpu=args.cuda)
elif args.model_type=="emb":
    baseline_model = Baseline_Embeddings(100, vocab_size=args.vocab_size)
    
if args.cuda:
    baseline_model = baseline_model.cuda()
optimizer = optim.Adam(baseline_model.parameters(),
                           lr=args.lr,
                           betas=(args.beta1, 0.999))
criterion = nn.CrossEntropyLoss().cuda()

best_accuracy = 0
if args.train_mode:
    for epoch in range(0, args.epochs):
        niter = 0
        loss_total = 0
        while niter < len(trainloader):
            niter+=1
Beispiel #2
0
    if not args.convolution_enc:
        args.packed_rep = True

    train_data = batchify(corpus.train, args.batch_size, args.maxlen,
                          packed_rep=args.packed_rep, shuffle=True)
    valid_data = batchify(corpus.test, args.batch_size, args.maxlen,
                          packed_rep=args.packed_rep, shuffle=False)

    corpus_test = SNLIDataset(train=False, vocab_size=args.vocab_size+4,
                              reset_vocab=corpus.dictionary.word2idx)
    testloader = torch.utils.data.DataLoader(corpus_test, batch_size=10,
                                             collate_fn=collate_snli, shuffle=False)
    test_data = iter(testloader)        # different format from train_data and valid_data

    classifier1 = Baseline_Embeddings(100, vocab_size=args.vocab_size+4)
    classifier1.load_state_dict(torch.load(args.classifier_path + "/baseline/model_emb.pt"))
    vocab_classifier1 = pkl.load(open(args.classifier_path + "/vocab.pkl", 'rb'))

    classifier2 = Baseline_LSTM(100, 300, maxlen=10, gpu=args.cuda)
    classifier2.load_state_dict(torch.load(args.classifier_path + "/baseline/model_lstm.pt"))
    vocab_classifier2 = pkl.load(open(args.classifier_path + "/vocab.pkl", 'rb'))

    print("Loaded data and target classifiers!")

    ###############################################################################
    # Build the models
    ###############################################################################
    ntokens = len(corpus.dictionary.word2idx)
    args.ntokens = ntokens
    print("Vocabulary Size: {}".format(ntokens))
Beispiel #3
0
train_data = batchify(corpus.train,
                      args.batch_size,
                      args.maxlen,
                      packed_rep=args.packed_rep,
                      shuffle=True)
corpus_test = SNLIDataset(
    train=False,
    vocab_size=41578,
    reset_vocab="/home/ddua/data/arae/output/example/1504200881/vocab.json")
testloader = torch.utils.data.DataLoader(corpus_test,
                                         batch_size=10,
                                         collate_fn=collate_snli,
                                         shuffle=False)
test_data = iter(testloader)

classifier1 = Baseline_Embeddings(100, maxlen=10, gpu=True, vocab_size=41578)
classifier1.load_state_dict(
    torch.load("/home/ddua/data/snli/baseline/model_emb.pt"))
classifier2 = Baseline_LSTM(100, 300, maxlen=10, gpu=args.cuda)
classifier2.load_state_dict(
    torch.load("/home/ddua/data/snli/baseline/model_lstm.pt"))

vocab_classifier1 = pkl.load(
    open("/home/ddua/data/snli/snli_1.0/vocab_41578.pkl", 'r'))
vocab_classifier2 = pkl.load(
    open("/home/ddua/data/snli/snli_1.0/vocab_11004.pkl", 'r'))

print("Loaded data!")

ntokens = len(corpus.dictionary.word2idx)
start_epoch = 1
                                          shuffle=True)
train_iter = iter(trainloader)
testloader = torch.utils.data.DataLoader(corpus_test,
                                         batch_size=args.batch_size,
                                         collate_fn=collate_snli,
                                         shuffle=False)

random.seed(args.seed)
np.random.seed(args.seed)
torch.manual_seed(args.seed)

if args.model_type == "lstm":
    baseline_model = Baseline_LSTM(100, 300, maxlen=args.maxlen, gpu=args.cuda)
elif args.model_type == "emb":
    baseline_model = Baseline_Embeddings(100,
                                         maxlen=args.maxlen,
                                         gpu=args.cuda,
                                         vocab_size=11004)

if args.cuda:
    baseline_model = baseline_model.cuda()
optimizer = optim.Adam(baseline_model.parameters(),
                       lr=args.lr,
                       betas=(args.beta1, 0.999))
criterion = nn.CrossEntropyLoss()

best_accuracy = 0
if args.train_mode:
    for epoch in range(0, args.epochs):
        niter = 0
        loss_total = 0
        while niter < len(trainloader):