] filenames = copy.deepcopy(train_filenames) fp_train.close() fp_test = open(test_file, 'r') test_filenames = [os.path.join(TEST_DIR, line.strip()) for line in fp_test] fp_test.close() filenames.extend(test_filenames) corpus = DP.Corpus(DATA_DIR, filenames) nlabel = 8 # create model model = LSTMC.LSTMClassifier(embedding_dim=embedding_dim, hidden_dim=hidden_dim, vocab_size=len(corpus.dictionary), label_size=nlabel, batch_size=batch_size, use_gpu=use_gpu, attn_flag=attn_flag) if use_gpu: model = model.cuda() # data processing dtrain_set = DP.TxtDatasetProcessing(DATA_DIR, TRAIN_DIR, TRAIN_FILE, TRAIN_LABEL, sentence_len, corpus) train_loader = DataLoader(dtrain_set, batch_size=batch_size, shuffle=True) dtest_set = DP.TxtDatasetProcessing(DATA_DIR, TEST_DIR, TEST_FILE, TEST_LABEL, sentence_len, corpus) test_loader = DataLoader(dtest_set, batch_size=batch_size, shuffle=False)
char2int = {ch: ii for ii, ch in int2char.items()} ### data processing if html: dtrain_set = HTMLCharDataset(int2char, char2int, input_len, 'html_trainset.pkl') dtest_set = HTMLCharDataset(int2char, char2int, input_len, 'html_valset.pkl') else: dtrain_set = URLCharDataset(int2char, char2int, input_len, TRAIN_URLS, TRAIN_LABELS) dtest_set = URLCharDataset(int2char, char2int, input_len, TEST_URLS, TEST_LABELS) ### create model model = LSTMC.LSTMClassifier(embedding_dim=embedding_dim, hidden_dim=hidden_dim, vocab_size=dtrain_set.vocab_size, label_size=nlabel, batch_size=batch_size, gpu=gpu) if gpu >= 0: model = model.cuda(gpu) optimizer = optim.SGD(model.parameters(), lr=learning_rate) loss_function = nn.CrossEntropyLoss() train_loss_ = [] test_loss_ = [] train_acc_ = [] test_acc_ = [] ### training procedure train_loader = DataLoader(dtrain_set, batch_size=batch_size, shuffle=True,