# read dataset if os.path.exists('dataset.pickle'): with open('dataset.pickle', 'rb') as f: train_iter, dev_iter, test_iter, vocab = pickle.load(f) else: root_dir = opt.data segments = ['train', 'dev', 'test'] token_files = [ os.path.join(root_dir, seg, '%s.toks' % tok) for tok in ['a', 'b'] for seg in segments ] vocab = Vocab(filepaths=token_files, embedpath=opt.word_embed) train_iter, dev_iter, test_iter = [ SICKDataIter(os.path.join(root_dir, segment), vocab, num_classes) for segment in segments ] with open('dataset.pickle', 'wb') as f: pickle.dump([train_iter, dev_iter, test_iter, vocab], f) logging.info('==> SICK vocabulary size : %d ' % vocab.size) logging.info('==> Size of train data : %d ' % len(train_iter)) logging.info('==> Size of dev data : %d ' % len(dev_iter)) logging.info('==> Size of test data : %d ' % len(test_iter)) # get network net = SimilarityTreeLSTM(sim_hidden_size, rnn_hidden_size, vocab.size, vocab.embed.shape[1], num_classes) # use pearson correlation and mean-square error for evaluation
batch_size = opt.batch_size # read dataset if os.path.exists('dataset.pickle'): with open('dataset.pickle', 'rb') as f: train_iter, dev_iter, test_iter, vocab = pickle.load(f) else: root_dir = opt.data segments = ['train', 'dev', 'test'] token_files = [os.path.join(root_dir, seg, '%s.toks'%tok) for tok in ['a', 'b'] for seg in segments] vocab = Vocab(filepaths=token_files, embedpath=opt.word_embed) train_iter, dev_iter, test_iter = [SICKDataIter(os.path.join(root_dir, segment), vocab, num_classes) for segment in segments] with open('dataset.pickle', 'wb') as f: pickle.dump([train_iter, dev_iter, test_iter, vocab], f) logging.info('==> SICK vocabulary size : %d ' % vocab.size) logging.info('==> Size of train data : %d ' % len(train_iter)) logging.info('==> Size of dev data : %d ' % len(dev_iter)) logging.info('==> Size of test data : %d ' % len(test_iter)) # get network net = SimilarityTreeLSTM(sim_hidden_size, rnn_hidden_size, vocab.size, vocab.embed.shape[1], num_classes) # use pearson correlation and mean-square error for evaluation metric = mx.metric.create(['pearsonr', 'mse'])