def train_data_require(args, word2idx): corpus_train = SNLIDataset(train=True, vocab_size=args.vocab_size, path=args.data_path, attack_label=args.attack_label, reset_vocab=word2idx) corpus_test = SNLIDataset(train=False, vocab_size=args.vocab_size, path=args.data_path, attack_label=args.attack_label, reset_vocab=word2idx) trainloader = torch.utils.data.DataLoader(corpus_train, batch_size=args.batch_size, collate_fn=collate_snli, shuffle=True) testloader = torch.utils.data.DataLoader(corpus_test, batch_size=args.batch_size, collate_fn=collate_snli, shuffle=False) return trainloader, testloader
parser.add_argument('--lr', type=float, default=1e-05, help='learning rate') parser.add_argument('--seed', type=int, default=1111, help='seed') parser.add_argument('--beta1', type=float, default=0.9, help='beta1 for adam. default=0.9') parser.add_argument('--cuda', action='store_true', default=True, help='use CUDA') parser.add_argument('--save_path', type=str, required=True, help='used for saving the models') parser.add_argument('--vocab_size', type=int, default=11004, help='vocabulary size') args = parser.parse_args() corpus_train = SNLIDataset(train=True, vocab_size=args.vocab_size-4, path=args.data_path) corpus_test = SNLIDataset(train=False, vocab_size=args.vocab_size-4, path=args.data_path) trainloader= torch.utils.data.DataLoader(corpus_train, batch_size = args.batch_size, collate_fn=collate_snli, shuffle=True) train_iter = iter(trainloader) testloader= torch.utils.data.DataLoader(corpus_test, batch_size = args.batch_size, collate_fn=collate_snli, shuffle=False) random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) if args.model_type=="lstm": baseline_model = Baseline_LSTM(100,300,maxlen=args.maxlen, gpu=args.cuda) elif args.model_type=="emb": baseline_model = Baseline_Embeddings(100, vocab_size=args.vocab_size) if args.cuda:
else: corpus = Corpus(args.data_path, maxlen=args.maxlen, vocab_size=args.vocab_size, lowercase=args.lowercase) eval_batch_size = 10 if not args.convolution_enc: args.packed_rep = True train_data = batchify(corpus.train, args.batch_size, args.maxlen, packed_rep=args.packed_rep, shuffle=True) corpus_test = SNLIDataset( train=False, vocab_size=41578, reset_vocab="/home/ddua/data/arae/output/example/1504200881/vocab.json") testloader = torch.utils.data.DataLoader(corpus_test, batch_size=10, collate_fn=collate_snli, shuffle=False) test_data = iter(testloader) classifier1 = Baseline_Embeddings(100, maxlen=10, gpu=True, vocab_size=41578) classifier1.load_state_dict( torch.load("/home/ddua/data/snli/baseline/model_emb.pt")) classifier2 = Baseline_LSTM(100, 300, maxlen=10, gpu=args.cuda) classifier2.load_state_dict( torch.load("/home/ddua/data/snli/baseline/model_lstm.pt")) vocab_classifier1 = pkl.load(
load_vocab=cur_dir + '/vocab.json') else: corpus = Corpus(args.data_path, maxlen=args.maxlen, vocab_size=args.vocab_size, lowercase=args.lowercase) if not args.convolution_enc: args.packed_rep = True train_data = batchify(corpus.train, args.batch_size, args.maxlen, packed_rep=args.packed_rep, shuffle=True) valid_data = batchify(corpus.test, args.batch_size, args.maxlen, packed_rep=args.packed_rep, shuffle=False) corpus_test = SNLIDataset(train=False, vocab_size=args.vocab_size+4, reset_vocab=corpus.dictionary.word2idx) testloader = torch.utils.data.DataLoader(corpus_test, batch_size=10, collate_fn=collate_snli, shuffle=False) test_data = iter(testloader) # different format from train_data and valid_data classifier1 = Baseline_Embeddings(100, vocab_size=args.vocab_size+4) classifier1.load_state_dict(torch.load(args.classifier_path + "/baseline/model_emb.pt")) vocab_classifier1 = pkl.load(open(args.classifier_path + "/vocab.pkl", 'rb')) classifier2 = Baseline_LSTM(100, 300, maxlen=10, gpu=args.cuda) classifier2.load_state_dict(torch.load(args.classifier_path + "/baseline/model_lstm.pt")) vocab_classifier2 = pkl.load(open(args.classifier_path + "/vocab.pkl", 'rb')) print("Loaded data and target classifiers!") ###############################################################################
type=float, default=0.9, help='beta1 for adam. default=0.9') parser.add_argument('--cuda', action='store_true', default=True, help='use CUDA') parser.add_argument('--save_path', action='store_true', default=True, help='used for saving the models') args = parser.parse_args() corpus_train = SNLIDataset(train=True, vocab_size=11004, lvt=False, path=args.data_path) corpus_test = SNLIDataset(train=False, vocab_size=11004, lvt=False, path=args.data_path) trainloader = torch.utils.data.DataLoader(corpus_train, batch_size=args.batch_size, collate_fn=collate_snli, shuffle=True) train_iter = iter(trainloader) testloader = torch.utils.data.DataLoader(corpus_test, batch_size=args.batch_size, collate_fn=collate_snli, shuffle=False)
help='vocabulary size') parser.add_argument('--vocab_path', type=str, default='./output/1593075369/vocab.json', help='vocabulary size') parser.add_argument('--hidden_size', type=int, default=300, help='hidden size') parser.add_argument('--dropout', type=int, default=0.5, help='drop_out') args = parser.parse_args() word2idx = json.load(open(args.vocab_path, "rb")) # model_idx2word = {v: k for k, v in model_word2idx.items()} corpus_train = SNLIDataset(train=True, vocab_size=args.vocab_size, path=args.data_path, reset_vocab=word2idx) corpus_test = SNLIDataset(train=False, vocab_size=args.vocab_size, path=args.data_path, reset_vocab=word2idx) # embed_matrix = corpus_train.build_embedding_matrix('./data/embeddings/glove.840B.300d.txt') embeddings_file = './data/embeddings/embeddings.pkl' with open(embeddings_file, "rb") as pkl: embeddings = torch.tensor(pickle.load(pkl), dtype=torch.float) trainloader = torch.utils.data.DataLoader(corpus_train, batch_size=args.batch_size, collate_fn=collate_snli, shuffle=True)
parser.add_argument('--vocab_size', type=int, default=11000, help='vocabulary size') parser.add_argument('--attack_label', type=int, default=0, help='attack_label') parser.add_argument('--vocab_path', type=str, default='./output/1593075369/vocab.json', help='vocabulary size') args = parser.parse_args() word2idx = json.load(open(args.vocab_path, "rb")) corpus_test = SNLIDataset(train=False, vocab_size=args.vocab_size, path=args.data_path, attack_label=args.attack_label, reset_vocab=word2idx) print(len(corpus_test.test_data)) testloader = torch.utils.data.DataLoader(corpus_test, batch_size=args.batch_size, collate_fn=collate_snli, shuffle=False) random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") if args.model_type == "lstm": baseline_model = Baseline_LSTM(100, 300, maxlen=args.maxlen, gpu=args.cuda)