def sample(args): numpy.random.seed(args.random_seed) if args.debug: theano.config.compute_test_value = 'warn' else: theano.config.compute_test_value = 'off' with h5py.File(args.model_path, 'r') as state: print("Reading vocabulary from network state.") sys.stdout.flush() vocabulary = Vocabulary.from_state(state) print("Number of words in vocabulary:", vocabulary.num_words()) print("Number of word classes:", vocabulary.num_classes()) print("Building neural network.") sys.stdout.flush() architecture = Architecture.from_state(state) network = Network(vocabulary, architecture, mode=Network.Mode(minibatch=False)) print("Restoring neural network state.") network.set_state(state) print("Building text sampler.") sys.stdout.flush() sampler = TextSampler(network) sequences = sampler.generate(30, args.num_sentences) for sequence in sequences: try: eos_pos = sequence.index('</s>') sequence = sequence[:eos_pos + 1] except: pass args.output_file.write(' '.join(sequence) + '\n')
def sample(args): numpy.random.seed(args.random_seed) if args.debug: theano.config.compute_test_value = 'warn' else: theano.config.compute_test_value = 'off' with h5py.File(args.model_path, 'r') as state: print("Reading vocabulary from network state.") sys.stdout.flush() vocabulary = Vocabulary.from_state(state) print("Number of words in vocabulary:", vocabulary.num_words()) print("Number of word classes:", vocabulary.num_classes()) print("Building neural network.") sys.stdout.flush() architecture = Architecture.from_state(state) network = Network(vocabulary, architecture, predict_next_distribution=True) print("Restoring neural network state.") network.set_state(state) print("Building text sampler.") sys.stdout.flush() sampler = TextSampler(network) for i in range(args.num_sentences): words = sampler.generate() args.output_file.write('{}: {}\n'.format( i, ' '.join(words)))
def sample(args): numpy.random.seed(args.random_seed) if args.debug: theano.config.compute_test_value = 'warn' else: theano.config.compute_test_value = 'off' with h5py.File(args.model_path, 'r') as state: print("Reading vocabulary from network state.") sys.stdout.flush() vocabulary = Vocabulary.from_state(state) print("Number of words in vocabulary:", vocabulary.num_words()) print("Number of word classes:", vocabulary.num_classes()) print("Building neural network.") sys.stdout.flush() architecture = Architecture.from_state(state) network = Network(architecture, vocabulary, mode=Network.Mode(minibatch=False)) print("Restoring neural network state.") network.set_state(state) print("Building text sampler.") sys.stdout.flush() sampler = TextSampler(network) sequences = sampler.generate(30, args.num_sentences) for sequence in sequences: try: eos_pos = sequence.index('</s>') sequence = sequence[:eos_pos+1] except: pass args.output_file.write(' '.join(sequence) + '\n')
def test_generate(self): # Network predicts <unk> probability. sampler = TextSampler(self.dummy_network) words = sampler.generate(50, 10) self.assertEqual(len(words), 10) for sequence in words: self.assertEqual(len(sequence), 50) self.assertEqual(sequence[0], '<s>') for left, right in zip(sequence, sequence[1:]): if left == '<s>': self.assertTrue(right == 'yksi' or right == 'kaksi') elif left == 'yksi': self.assertTrue(right == 'kaksi' or right == '</s>') elif left == 'kaksi': self.assertTrue(right == 'yksi' or right == '</s>') elif left == '</s>': self.assertEqual(right, '<s>')
def sample(args): """A function that performs the "theanolm sample" command. :type args: argparse.Namespace :param args: a collection of command line arguments """ numpy.random.seed(args.random_seed) if args.debug: theano.config.compute_test_value = 'warn' else: theano.config.compute_test_value = 'off' with h5py.File(args.model_path, 'r') as state: logging.info("Reading vocabulary from network state.") vocabulary = Vocabulary.from_state(state) logging.info("Number of words in vocabulary: %d", vocabulary.num_words()) logging.info("Number of words in shortlist: %d", vocabulary.num_shortlist_words()) logging.info("Number of word classes: %d", vocabulary.num_classes()) logging.info("Building neural network.") architecture = Architecture.from_state(state) default_device = get_default_device(args.default_device) network = Network(architecture, vocabulary, mode=Network.Mode(minibatch=False), default_device=default_device) logging.info("Restoring neural network state.") network.set_state(state) logging.info("Building text sampler.") sampler = TextSampler(network) sequences = sampler.generate(args.sentence_length, args.num_sentences, seed_sequence=args.seed_sequence) for sequence in sequences: try: eos_pos = sequence.index('</s>') sequence = sequence[:eos_pos + 1] except ValueError: pass args.output_file.write(' '.join(sequence) + '\n')
def __init__(self, model_path): self.model_path = model_path numpy.random.seed() theano.config.compute_test_value = 'off' with h5py.File(model_path, 'r') as self.state: print("Reading vocabulary from network state.") #sys.stdout.flush() self.vocabulary = Vocabulary.from_state(self.state) print("Number of words in vocabulary:", self.vocabulary.num_words()) print("Number of words in shortlist:", self.vocabulary.num_shortlist_words()) print("Number of word classes:", self.vocabulary.num_classes()) print("Building neural network.") #sys.stdout.flush() self.architecture = Architecture.from_state(self.state) self.network = Network(self.architecture, self.vocabulary, mode=Network.Mode(minibatch=False)) print("Restoring neural network state.") self.network.set_state(self.state) print("Building text sampler.") #sys.stdout.flush() self.sampler = TextSampler(self.network)
class Sampler: def __init__(self, model_path): self.model_path = model_path numpy.random.seed() theano.config.compute_test_value = 'off' with h5py.File(model_path, 'r') as self.state: print("Reading vocabulary from network state.") #sys.stdout.flush() self.vocabulary = Vocabulary.from_state(self.state) print("Number of words in vocabulary:", self.vocabulary.num_words()) print("Number of words in shortlist:", self.vocabulary.num_shortlist_words()) print("Number of word classes:", self.vocabulary.num_classes()) print("Building neural network.") #sys.stdout.flush() self.architecture = Architecture.from_state(self.state) self.network = Network(self.architecture, self.vocabulary, mode=Network.Mode(minibatch=False)) print("Restoring neural network state.") self.network.set_state(self.state) print("Building text sampler.") #sys.stdout.flush() self.sampler = TextSampler(self.network) def sample(self, sen_len, sen_num): sequences = self.sampler.generate(sen_len, sen_num) ret = "" seqnum = 0 for sequence in sequences: try: eos_pos = sequence.index('</s>') sequence = sequence[:eos_pos+1] except ValueError: pass except TypeError: pass if (len(sequence) < 4): continue ret = ret + ' '.join(sequence) if (seqnum == 0): ret = "<div class=\"col-md-4\">\n" + ret elif (seqnum % 2 == 0): ret = ret + "\n</div><div class=\"col-md-4\">\n" seqnum = seqnum + 1 return ret + "\n<div>\n"
vocabulary = Vocabulary.from_state(state) print("Number of words in vocabulary:", vocabulary.num_words()) print("Number of words in shortlist:", vocabulary.num_shortlist_words()) print("Number of word classes:", vocabulary.num_classes()) print("Building neural network.") sys.stdout.flush() architecture = Architecture.from_state(state) network = Network(architecture, vocabulary, mode=Network.Mode(minibatch=False)) print("Restoring neural network state.") network.set_state(state) return network #model = tlm.Network.from_file('kalevalaV100.h5') model = restoreModel(modelPath) sampler = TextSampler(model) #samp = sampler.generate(15, num_sequences=30) def printSample(samp): allwords = '' for sequence in samp: pstr = '' for i,s in enumerate(sequence): if i > 0: pstr = pstr + s + ' ' #print(pstr) allwords += sequence[-1] + ' ' return allwords