vocab_size = 20000 sentence_length = 100 embedding_dim = 128 hidden_size = 128 reset_cells = True # setup backend be = gen_backend(backend=args.backend, batch_size=batch_size, rng_seed=args.rng_seed, device_id=args.device_id, default_dtype=args.datatype) # make dataset path = load_text('imdb', path=args.data_dir) (X_train, y_train), (X_test, y_test), nclass = Text.pad_data( path, vocab_size=vocab_size, sentence_length=sentence_length) print "Vocab size - ", vocab_size print "Sentence Length - ", sentence_length print "# of train sentences", X_train.shape[0] print "# of test sentence", X_test.shape[0] train_set = DataIterator(X_train, y_train, nclass=2) valid_set = DataIterator(X_test, y_test, nclass=2) # weight initialization init_emb = Uniform(low=-0.1/embedding_dim, high=0.1/embedding_dim) init_glorot = GlorotUniform() layers = [ LookupTable(vocab_size=vocab_size, embedding_dim=embedding_dim, init=init_emb),
vocab_size = 20000 sentence_length = 100 embedding_dim = 128 hidden_size = 128 reset_cells = True # setup backend be = gen_backend(backend=args.backend, batch_size=batch_size, rng_seed=args.rng_seed, device_id=args.device_id, default_dtype=args.datatype) # make dataset path = load_text('imdb', path=args.data_dir) (X_train, y_train), (X_test, y_test), nclass = Text.pad_data( path, vocab_size=vocab_size, sentence_length=sentence_length) print "Vocab size - ", vocab_size print "Sentence Length - ", sentence_length print "# of train sentences", X_train.shape[0] print "# of test sentence", X_test.shape[0] import numpy as np train_set = DataIterator(X_train, y_train, nclass=2) test_set = DataIterator(X_test, y_test, nclass=2) # weight initialization init_emb = Uniform(low=-0.1/embedding_dim, high=0.1/embedding_dim) init_glorot = GlorotUniform() layers = [
print( 'batch_size: %s \nvocab_size: %s \nsentence_length: %s \nembedding_dim: %s \nhidden_size: %s' % (batch_size, vocab_size, sentence_length, embedding_dim, hidden_size)) # setup backend be = gen_backend(backend=args.backend, batch_size=batch_size, rng_seed=args.rng_seed, device_id=args.device_id, default_dtype=args.datatype) # make dataset (X_train, y_train), (X_test, y_test), nclass = Text.pad_data( os.path.join( data_root, 'train_valid_text_index_in_binary_label_shuffled_10000.pickle'), vocab_size=vocab_size, sentence_length=sentence_length) print "Vocab size - ", vocab_size print "Sentence Length - ", sentence_length print "# of train sentences", X_train.shape[0] print "# of test sentence", X_test.shape[0] train_set = DataIterator(X_train, y_train, nclass=2) valid_set = DataIterator(X_test, y_test, nclass=2) # weight initialization init_emb = Uniform(low=-0.1 / embedding_dim, high=0.1 / embedding_dim) init_glorot = GlorotUniform()
hidden_size = 128 reset_cells = True print('batch_size: %s \nvocab_size: %s \nsentence_length: %s \nembedding_dim: %s \nhidden_size: %s' % (batch_size, vocab_size, sentence_length, embedding_dim, hidden_size)) # setup backend be = gen_backend(backend=args.backend, batch_size=batch_size, rng_seed=args.rng_seed, device_id=args.device_id, default_dtype=args.datatype) # make dataset (X_train, y_train), (X_test, y_test), nclass = Text.pad_data( os.path.join( data_root, 'train_valid_text_index_in_binary_label_shuffled_10000.pickle'), vocab_size=vocab_size, sentence_length=sentence_length) print "Vocab size - ", vocab_size print "Sentence Length - ", sentence_length print "# of train sentences", X_train.shape[0] print "# of test sentence", X_test.shape[0] train_set = DataIterator(X_train, y_train, nclass=2) valid_set = DataIterator(X_test, y_test, nclass=2) # weight initialization init_emb = Uniform(low=-0.1 / embedding_dim, high=0.1 / embedding_dim) init_glorot = GlorotUniform() layers = [