logger = logging.getLogger() # on met le niveau du logger à DEBUG, comme ça il écrit tout logger.setLevel(logging.INFO) fmt = logging.Formatter('%(asctime)s: %(message)s', '%m/%d/%Y %I:%M:%S %p') console = logging.StreamHandler() console.setFormatter(fmt) console.setLevel(logging.INFO) logger.addHandler(console) # Here we load only a small chunk of the embeddings (100k most common words) # You can change it if you want all_words = set(line.strip() for line in open('all_sst_words.txt')) emb_dict = EmbeddingsDictionary(word_whitelist=all_words) data = SifDataset() train_exs, train_labels, train_freq = dataset.preprocess_dataset( data.train, emb_dict.dictionary) logging.info('Loaded train, size={}, npos={}'.format(len(train_exs), sum(train_labels).sum())) dev_exs, dev_labels, dev_freq = dataset.preprocess_dataset( data.dev, emb_dict.dictionary) logging.info('Loaded dev, size={}, npos={}'.format(len(dev_exs), sum(dev_labels).sum())) model = BowModel(emb_dict.emb, train_freq) loss_fn = nn.NLLLoss() optimized_params = filter(lambda p: p.requires_grad, model.parameters()) optimizer = optim.Adam(optimized_params, lr=0.003)
import torch from torch.autograd import Variable import dataset from dataset import SifDataset data = SifDataset() checkpoint = torch.load('model.pth') model = checkpoint['net'].eval() dictionary = checkpoint['dict'] # print('Loaded model {}, reported w/ accuracy {}'.format(model, checkpoint['score'])) test_loss = 0 correct = 0 exs, labels = dataset.preprocess_dataset(data.dev, dictionary) for data, target in zip(exs, labels): data, target = Variable(data.unsqueeze(0), volatile=True), Variable(target) output = model(data) pred = output.data.max(1)[1] # get the index of the max log-probability correct += pred.eq(target.data).cpu().sum() test_loss /= len(exs) print("%.1f" % (100 * correct / len(exs)))