def loadData(opt): if not opt.from_torchtext: import dataHelper as helper return helper.loadData(opt) device = 0 if torch.cuda.is_available() else -1 TEXT = data.Field(lower=True, include_lengths=True, batch_first=True, fix_length=opt.max_seq_len) LABEL = data.Field(sequential=False) if opt.dataset == "imdb": train, test = datasets.IMDB.splits(TEXT, LABEL) elif opt.dataset == "sst": train, val, test = datasets.SST.splits( TEXT, LABEL, fine_grained=True, train_subtrees=True, filter_pred=lambda ex: ex.label != 'neutral') elif opt.dataset == "trec": train, test = datasets.TREC.splits(TEXT, LABEL, fine_grained=True) else: print("does not support this datset") TEXT.build_vocab(train, vectors=GloVe(name='6B', dim=300)) LABEL.build_vocab(train) # print vocab information print('len(TEXT.vocab)', len(TEXT.vocab)) print('TEXT.vocab.vectors.size()', TEXT.vocab.vectors.size()) train_iter, test_iter = data.BucketIterator.splits( (train, test), batch_size=opt.batch_size, device=device, repeat=False, shuffle=True) opt.label_size = len(LABEL.vocab) opt.vocab_size = len(TEXT.vocab) opt.embedding_dim = TEXT.vocab.vectors.size()[1] opt.embeddings = TEXT.vocab.vectors return train_iter, test_iter
from_torchtext = True opt = opts.parse_opt() #opt.proxy="http://xxxx.xxxx.com:8080" if "CUDA_VISIBLE_DEVICES" not in os.environ.keys(): os.environ["CUDA_VISIBLE_DEVICES"] =opt.gpu #opt.model ='lstm' #opt.model ='capsule' if from_torchtext: train_iter, test_iter = utils.loadData(opt) else: import dataHelper as helper train_iter, test_iter = dataHelper.loadData(opt) opt.lstm_layers=2 print('Print loading models') model2=models.setup(opt) model2.load_state_dict(torch.load('saved_models/lstm_test.pt')) model2.cuda() percision=utils.evaluation(model2,test_iter,from_torchtext) print("After iteration with model 2 Test Acc %.4f" % (percision)) ipdb.set_trace() model=models.setup(opt) # model.load_state_dict(torch.load('lstm_new.pt')) if torch.cuda.is_available(): model.cuda() model.train() print("# parameters:", sum(param.numel() for param in model.parameters() if param.requires_grad))
from_torchtext = False opt = opts.parse_opt() #opt.proxy="http://xxxx.xxxx.com:8080" if "CUDA_VISIBLE_DEVICES" not in os.environ.keys(): os.environ["CUDA_VISIBLE_DEVICES"] = opt.gpu #opt.model ='lstm' #opt.model ='capsule' if from_torchtext: train_iter, test_iter = utils.loadData(opt) else: import dataHelper as helper train_iter, dev_iter, test_iter = dataHelper.loadData(opt) opt.lstm_layers = 2 torch.manual_seed(0) model = models.setup(opt) if torch.cuda.is_available(): model.cuda() model.train() print( "# parameters:", sum(param.numel() for param in model.parameters() if param.requires_grad)) optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=opt.learning_rate) optimizer.zero_grad() loss_fun = F.cross_entropy
from sklearn.feature_extraction.text import CountVectorizer from sklearn.feature_extraction.text import TfidfTransformer from sklearn.naive_bayes import MultinomialNB from sklearn.pipeline import Pipeline from sklearn.pipeline import make_pipeline from sklearn.linear_model import SGDClassifier from sklearn import metrics from sklearn.model_selection import train_test_split from sklearn.model_selection import cross_val_score import numpy as np import opts import dataHelper #refer to "https://zhuanlan.zhihu.com/p/26729228" opt = opts.parse_opt() import dataHelper as helper train_iter, test_iter = dataHelper.loadData(opt,embedding=False) #categories = ['good', 'bad', 'mid'] x_train,y_train=train_iter x_test,y_test = test_iter #opt.model ="haha" if opt.model == "bayes": """ Naive Bayes classifier """ # sklearn有一套很成熟的管道流程Pipeline,快速搭建机器学习模型神器 bayes_clf = Pipeline([('vect', CountVectorizer()), ('tfidf', TfidfTransformer()), ('clf', MultinomialNB()) ]) bayes_clf.fit(x_train, y_train) """ Predict the test dataset using Naive Bayes""" predicted = bayes_clf.predict(x_test)