def gen_iterators(self): if self.filepath is None: self.load_data() data = pad_data(self.filepath, vocab_size=self.vocab_size, sentence_length=self.sentence_length) (X_train, y_train), (X_test, y_test), nclass = data self._data_dict = {'nclass': nclass} self._data_dict['train'] = ArrayIterator(X_train, y_train, nclass=2) self._data_dict['test'] = ArrayIterator(X_test, y_test, nclass=2) return self._data_dict
def pad_data(path, vocab_size=20000, sentence_length=100, oov=2, start=1, index_from=3, seed=113, test_split=0.2): logger.error('pad_data in the Text class is deprecated. This function' 'is now in neon.data.text_preprocessing') return pad_data(path, vocab_size=vocab_size, sentence_length=sentence_length, oov=oov, start=start, index_from=index_from, seed=seed, test_split=test_split)
args.batch_size = 128 gradient_clip_value = 15 vocab_size = 20000 sentence_length = 128 embedding_dim = 128 hidden_size = 128 reset_cells = True # setup backend be = gen_backend(**extract_valid_args(args, gen_backend)) # make dataset path = load_imdb(path=args.data_dir) (X_train, y_train), (X_test, y_test), nclass = pad_data(path, vocab_size=vocab_size, sentence_length=sentence_length) print "Vocab size - ", vocab_size print "Sentence Length - ", sentence_length print "# of train sentences", X_train.shape[0] print "# of test sentence", X_test.shape[0] train_set = ArrayIterator(X_train, y_train, nclass=2) valid_set = ArrayIterator(X_test, y_test, nclass=2) # weight initialization uni = Uniform(low=-0.1 / embedding_dim, high=0.1 / embedding_dim) g_uni = GlorotUniform() if args.rlayer_type == 'lstm':
# hyperparameters from the reference args.batch_size = 128 gradient_clip_value = 15 vocab_size = 20000 sentence_length = 128 embedding_dim = 128 hidden_size = 128 reset_cells = True # setup backend be = gen_backend(**extract_valid_args(args, gen_backend)) # make dataset path = load_imdb(path=args.data_dir) (X_train, y_train), (X_test, y_test), nclass = pad_data(path, vocab_size=vocab_size, sentence_length=sentence_length) neon_logger.display("Vocab size - {}".format(vocab_size)) neon_logger.display("Sentence Length - {}".format(sentence_length)) neon_logger.display("# of train sentences {}".format(X_train.shape[0])) neon_logger.display("# of test sentence {}".format(X_test.shape[0])) train_set = ArrayIterator(X_train, y_train, nclass=2) valid_set = ArrayIterator(X_test, y_test, nclass=2) # weight initialization uni = Uniform(low=-0.1 / embedding_dim, high=0.1 / embedding_dim) g_uni = GlorotUniform() if args.rlayer_type == 'lstm':