Beispiel #1
0
    def gen_iterators(self):
        if self.filepath is None:
            self.load_data()

        data = pad_data(self.filepath, vocab_size=self.vocab_size,
                        sentence_length=self.sentence_length)
        (X_train, y_train), (X_test, y_test), nclass = data

        self._data_dict = {'nclass': nclass}
        self._data_dict['train'] = ArrayIterator(X_train, y_train, nclass=2)
        self._data_dict['test'] = ArrayIterator(X_test, y_test, nclass=2)
        return self._data_dict
Beispiel #2
0
 def pad_data(path, vocab_size=20000, sentence_length=100, oov=2,
              start=1, index_from=3, seed=113, test_split=0.2):
     logger.error('pad_data in the Text class is deprecated.  This function'
                  'is now in neon.data.text_preprocessing')
     return pad_data(path,
                     vocab_size=vocab_size,
                     sentence_length=sentence_length,
                     oov=oov,
                     start=start,
                     index_from=index_from,
                     seed=seed,
                     test_split=test_split)
Beispiel #3
0
    def gen_iterators(self):
        if self.filepath is None:
            self.load_data()

        data = pad_data(self.filepath,
                        vocab_size=self.vocab_size,
                        sentence_length=self.sentence_length)
        (X_train, y_train), (X_test, y_test), nclass = data

        self._data_dict = {'nclass': nclass}
        self._data_dict['train'] = ArrayIterator(X_train, y_train, nclass=2)
        self._data_dict['test'] = ArrayIterator(X_test, y_test, nclass=2)
        return self._data_dict
Beispiel #4
0
 def pad_data(path,
              vocab_size=20000,
              sentence_length=100,
              oov=2,
              start=1,
              index_from=3,
              seed=113,
              test_split=0.2):
     logger.error('pad_data in the Text class is deprecated.  This function'
                  'is now in neon.data.text_preprocessing')
     return pad_data(path,
                     vocab_size=vocab_size,
                     sentence_length=sentence_length,
                     oov=oov,
                     start=start,
                     index_from=index_from,
                     seed=seed,
                     test_split=test_split)
Beispiel #5
0
args.batch_size = 128
gradient_clip_value = 15
vocab_size = 20000
sentence_length = 128
embedding_dim = 128
hidden_size = 128
reset_cells = True

# setup backend
be = gen_backend(**extract_valid_args(args, gen_backend))

# make dataset
path = load_imdb(path=args.data_dir)
(X_train,
 y_train), (X_test, y_test), nclass = pad_data(path,
                                               vocab_size=vocab_size,
                                               sentence_length=sentence_length)

print "Vocab size - ", vocab_size
print "Sentence Length - ", sentence_length
print "# of train sentences", X_train.shape[0]
print "# of test sentence", X_test.shape[0]

train_set = ArrayIterator(X_train, y_train, nclass=2)
valid_set = ArrayIterator(X_test, y_test, nclass=2)

# weight initialization
uni = Uniform(low=-0.1 / embedding_dim, high=0.1 / embedding_dim)
g_uni = GlorotUniform()

if args.rlayer_type == 'lstm':
Beispiel #6
0
# hyperparameters from the reference
args.batch_size = 128
gradient_clip_value = 15
vocab_size = 20000
sentence_length = 128
embedding_dim = 128
hidden_size = 128
reset_cells = True

# setup backend
be = gen_backend(**extract_valid_args(args, gen_backend))

# make dataset
path = load_imdb(path=args.data_dir)
(X_train, y_train), (X_test, y_test), nclass = pad_data(path,
                                                        vocab_size=vocab_size,
                                                        sentence_length=sentence_length)

neon_logger.display("Vocab size - {}".format(vocab_size))
neon_logger.display("Sentence Length - {}".format(sentence_length))
neon_logger.display("# of train sentences {}".format(X_train.shape[0]))
neon_logger.display("# of test sentence {}".format(X_test.shape[0]))

train_set = ArrayIterator(X_train, y_train, nclass=2)
valid_set = ArrayIterator(X_test, y_test, nclass=2)

# weight initialization
uni = Uniform(low=-0.1 / embedding_dim, high=0.1 / embedding_dim)
g_uni = GlorotUniform()

if args.rlayer_type == 'lstm':