return dataset # Split into training and test data. dataset = get_dataset() split = int(0.66 * len(dataset.data)) train_data, test_data = dataset.data[:split], dataset.data[split:] train_target, test_target = dataset.target[:split], dataset.target[split:] # Compute graph. _, length, image_size = train_data.shape num_classes = train_target.shape[2] data = tf.placeholder(tf.float32, [None, length, image_size]) target = tf.placeholder(tf.float32, [None, length, num_classes]) model = BidirectionalSequenceLabellingModel(data, target, params) batches = batched(train_data, train_target, params.batch_size) sess = tf.Session() sess.run(tf.global_variables_initializer()) for index, batch in enumerate(batches): batch_data = batch[0] batch_target = batch[1] epoch = batch[2] if epoch >= params.epochs: break feed = {data: batch_data, target: batch_target} error, _ = sess.run([model.error, model.optimize], feed) if index % params.epoch_size == 0: print('{}: {:3.2f}%'.format(index, 100 * error)) test_feed = {data: test_data, target: test_target}
return dataset # Split into training and test data. dataset = get_dataset() split = int(0.66 * len(dataset.data)) train_data, test_data = dataset.data[:split], dataset.data[split:] train_target, test_target = dataset.target[:split], dataset.target[split:] # Compute graph. _, length, image_size = train_data.shape num_classes = train_target.shape[2] data = tf.placeholder(tf.float32, [None, length, image_size]) target = tf.placeholder(tf.float32, [None, length, num_classes]) model = BidirectionalSequenceLabellingModel(data, target, params) batches = batched(train_data, train_target, params.batch_size) sess = tf.Session() sess.run(tf.initialize_all_variables()) for index, batch in enumerate(batches): batch_data = batch[0] batch_target = batch[1] epoch = batch[2] if epoch >= params.epochs: break feed = {data: batch_data, target: batch_target} error, _ = sess.run([model.error, model.optimize], feed) print("{}: {:3.6f}%".format(index + 1, 100 * error)) test_feed = {data: test_data, target: test_target} test_error, _ = sess.run([model.error, model.optimize], test_feed)
contrastive_examples=100, learning_rate=0.5, momentum=0.5, batch_size=1000, ) data = tf.placeholder(tf.int32, [None]) target = tf.placeholder(tf.int32, [None]) model = EmbeddingModel(data, target, params) corpus = Wikipedia( 'https://dumps.wikimedia.org/enwiki/20160501/' 'enwiki-20160501-pages-meta-current1.xml-p000000010p000030303.bz2', WIKI_DOWNLOAD_DIR, params.vocabulary_size) examples = skipgrams(corpus, params.max_context) batches = batched(examples, params.batch_size) sess = tf.Session() sess.run(tf.initialize_all_variables()) average = collections.deque(maxlen=100) for index, batch in enumerate(batches): feed_dict = {data: batch[0], target: batch[1]} cost, _ = sess.run([model.cost, model.optimize], feed_dict) average.append(cost) print('{}: {:5.1f}'.format(index + 1, sum(average) / len(average))) if index > 100000: break embeddings = sess.run(model.embeddings) np.save(WIKI_DOWNLOAD_DIR + '/embeddings.npy', embeddings)
learning_rate=0.5, momentum=0.5, batch_size=1000, ) data = tf.placeholder(tf.int32, [None]) target = tf.placeholder(tf.int32, [None]) model = EmbeddingModel(data, target, params) corpus = Wikipedia( 'https://dumps.wikimedia.org/enwiki/20160501/' 'enwiki-20160501-pages-meta-current1.xml-p000000010p000030303.bz2', WIKI_DOWNLOAD_DIR, params.vocabulary_size) examples = skipgrams(corpus, params.max_context) batches = batched(examples, params.batch_size) sess = tf.Session() sess.run(tf.initialize_all_variables()) average = collections.deque(maxlen=100) for index, batch in enumerate(batches): feed_dict = {data: batch[0], target: batch[1]} cost, _ = sess.run([model.cost, model.optimize], feed_dict) average.append(cost) print('{}: {:5.1f}'.format(index + 1, sum(average) / len(average))) if index > 100000: break embeddings = sess.run(model.embeddings) np.save(WIKI_DOWNLOAD_DIR + '/embeddings.npy', embeddings)