Esempio n. 1
0
def sample(args):
    numpy.random.seed(args.random_seed)

    if args.debug:
        theano.config.compute_test_value = 'warn'
    else:
        theano.config.compute_test_value = 'off'

    with h5py.File(args.model_path, 'r') as state:
        print("Reading vocabulary from network state.")
        sys.stdout.flush()
        vocabulary = Vocabulary.from_state(state)
        print("Number of words in vocabulary:", vocabulary.num_words())
        print("Number of word classes:", vocabulary.num_classes())
        print("Building neural network.")
        sys.stdout.flush()
        architecture = Architecture.from_state(state)
        network = Network(vocabulary,
                          architecture,
                          mode=Network.Mode(minibatch=False))
        print("Restoring neural network state.")
        network.set_state(state)

    print("Building text sampler.")
    sys.stdout.flush()
    sampler = TextSampler(network)

    sequences = sampler.generate(30, args.num_sentences)
    for sequence in sequences:
        try:
            eos_pos = sequence.index('</s>')
            sequence = sequence[:eos_pos + 1]
        except:
            pass
        args.output_file.write(' '.join(sequence) + '\n')
Esempio n. 2
0
def sample(args):
    numpy.random.seed(args.random_seed)

    if args.debug:
        theano.config.compute_test_value = 'warn'
    else:
        theano.config.compute_test_value = 'off'

    with h5py.File(args.model_path, 'r') as state:
        print("Reading vocabulary from network state.")
        sys.stdout.flush()
        vocabulary = Vocabulary.from_state(state)
        print("Number of words in vocabulary:", vocabulary.num_words())
        print("Number of word classes:", vocabulary.num_classes())
        print("Building neural network.")
        sys.stdout.flush()
        architecture = Architecture.from_state(state)
        network = Network(vocabulary, architecture,
                          predict_next_distribution=True)
        print("Restoring neural network state.")
        network.set_state(state)

    print("Building text sampler.")
    sys.stdout.flush()
    sampler = TextSampler(network)

    for i in range(args.num_sentences):
        words = sampler.generate()
        args.output_file.write('{}: {}\n'.format(
            i, ' '.join(words)))
Esempio n. 3
0
def sample(args):
    numpy.random.seed(args.random_seed)

    if args.debug:
        theano.config.compute_test_value = 'warn'
    else:
        theano.config.compute_test_value = 'off'

    with h5py.File(args.model_path, 'r') as state:
        print("Reading vocabulary from network state.")
        sys.stdout.flush()
        vocabulary = Vocabulary.from_state(state)
        print("Number of words in vocabulary:", vocabulary.num_words())
        print("Number of word classes:", vocabulary.num_classes())
        print("Building neural network.")
        sys.stdout.flush()
        architecture = Architecture.from_state(state)
        network = Network(architecture, vocabulary, mode=Network.Mode(minibatch=False))
        print("Restoring neural network state.")
        network.set_state(state)

    print("Building text sampler.")
    sys.stdout.flush()
    sampler = TextSampler(network)

    sequences = sampler.generate(30, args.num_sentences)
    for sequence in sequences:
        try:
            eos_pos = sequence.index('</s>')
            sequence = sequence[:eos_pos+1]
        except:
            pass
        args.output_file.write(' '.join(sequence) + '\n')
Esempio n. 4
0
 def test_generate(self):
     # Network predicts <unk> probability.
     sampler = TextSampler(self.dummy_network)
     words = sampler.generate(50, 10)
     self.assertEqual(len(words), 10)
     for sequence in words:
         self.assertEqual(len(sequence), 50)
         self.assertEqual(sequence[0], '<s>')
         for left, right in zip(sequence, sequence[1:]):
             if left == '<s>':
                 self.assertTrue(right == 'yksi' or right == 'kaksi')
             elif left == 'yksi':
                 self.assertTrue(right == 'kaksi' or right == '</s>')
             elif left == 'kaksi':
                 self.assertTrue(right == 'yksi' or right == '</s>')
             elif left == '</s>':
                 self.assertEqual(right, '<s>')
Esempio n. 5
0
 def test_generate(self):
     # Network predicts <unk> probability.
     sampler = TextSampler(self.dummy_network)
     words = sampler.generate(50, 10)
     self.assertEqual(len(words), 10)
     for sequence in words:
         self.assertEqual(len(sequence), 50)
         self.assertEqual(sequence[0], '<s>')
         for left, right in zip(sequence, sequence[1:]):
             if left == '<s>':
                 self.assertTrue(right == 'yksi' or right == 'kaksi')
             elif left == 'yksi':
                 self.assertTrue(right == 'kaksi' or right == '</s>')
             elif left == 'kaksi':
                 self.assertTrue(right == 'yksi' or right == '</s>')
             elif left == '</s>':
                 self.assertEqual(right, '<s>')
Esempio n. 6
0
def sample(args):
    """A function that performs the "theanolm sample" command.

    :type args: argparse.Namespace
    :param args: a collection of command line arguments
    """

    numpy.random.seed(args.random_seed)

    if args.debug:
        theano.config.compute_test_value = 'warn'
    else:
        theano.config.compute_test_value = 'off'

    with h5py.File(args.model_path, 'r') as state:
        logging.info("Reading vocabulary from network state.")
        vocabulary = Vocabulary.from_state(state)
        logging.info("Number of words in vocabulary: %d",
                     vocabulary.num_words())
        logging.info("Number of words in shortlist: %d",
                     vocabulary.num_shortlist_words())
        logging.info("Number of word classes: %d", vocabulary.num_classes())
        logging.info("Building neural network.")
        architecture = Architecture.from_state(state)
        default_device = get_default_device(args.default_device)
        network = Network(architecture,
                          vocabulary,
                          mode=Network.Mode(minibatch=False),
                          default_device=default_device)
        logging.info("Restoring neural network state.")
        network.set_state(state)

    logging.info("Building text sampler.")
    sampler = TextSampler(network)

    sequences = sampler.generate(args.sentence_length,
                                 args.num_sentences,
                                 seed_sequence=args.seed_sequence)
    for sequence in sequences:
        try:
            eos_pos = sequence.index('</s>')
            sequence = sequence[:eos_pos + 1]
        except ValueError:
            pass
        args.output_file.write(' '.join(sequence) + '\n')
Esempio n. 7
0
    def __init__(self, model_path):
        self.model_path = model_path
        numpy.random.seed()
        theano.config.compute_test_value = 'off'

        with h5py.File(model_path, 'r') as self.state:
            print("Reading vocabulary from network state.")
            #sys.stdout.flush()
            self.vocabulary = Vocabulary.from_state(self.state)
            print("Number of words in vocabulary:", self.vocabulary.num_words())
            print("Number of words in shortlist:", self.vocabulary.num_shortlist_words())
            print("Number of word classes:", self.vocabulary.num_classes())
            print("Building neural network.")
            #sys.stdout.flush()
            self.architecture = Architecture.from_state(self.state)
            self.network = Network(self.architecture, self.vocabulary, mode=Network.Mode(minibatch=False))
            print("Restoring neural network state.")
            self.network.set_state(self.state)

        print("Building text sampler.")
        #sys.stdout.flush()
        self.sampler = TextSampler(self.network)
Esempio n. 8
0
class Sampler:

    def __init__(self, model_path):
        self.model_path = model_path
        numpy.random.seed()
        theano.config.compute_test_value = 'off'

        with h5py.File(model_path, 'r') as self.state:
            print("Reading vocabulary from network state.")
            #sys.stdout.flush()
            self.vocabulary = Vocabulary.from_state(self.state)
            print("Number of words in vocabulary:", self.vocabulary.num_words())
            print("Number of words in shortlist:", self.vocabulary.num_shortlist_words())
            print("Number of word classes:", self.vocabulary.num_classes())
            print("Building neural network.")
            #sys.stdout.flush()
            self.architecture = Architecture.from_state(self.state)
            self.network = Network(self.architecture, self.vocabulary, mode=Network.Mode(minibatch=False))
            print("Restoring neural network state.")
            self.network.set_state(self.state)

        print("Building text sampler.")
        #sys.stdout.flush()
        self.sampler = TextSampler(self.network)

    def sample(self, sen_len, sen_num):
        sequences = self.sampler.generate(sen_len, sen_num)
        ret = ""
        seqnum = 0
        for sequence in sequences:
            try:
                eos_pos = sequence.index('</s>')
                sequence = sequence[:eos_pos+1]
            except ValueError:
                pass
            except TypeError:
                pass

            if (len(sequence) < 4):
                continue

            ret = ret + ' '.join(sequence)
            if (seqnum == 0):
                ret = "<div class=\"col-md-4\">\n" + ret
            elif (seqnum % 2 == 0):
                ret = ret + "\n</div><div class=\"col-md-4\">\n"

            seqnum = seqnum + 1

        return ret + "\n<div>\n"
        vocabulary = Vocabulary.from_state(state)
        print("Number of words in vocabulary:", vocabulary.num_words())
        print("Number of words in shortlist:", vocabulary.num_shortlist_words())
        print("Number of word classes:", vocabulary.num_classes())
        print("Building neural network.")
        sys.stdout.flush()
        architecture = Architecture.from_state(state)
        network = Network(architecture, vocabulary, mode=Network.Mode(minibatch=False))
        print("Restoring neural network state.")
        network.set_state(state)
        return network


#model = tlm.Network.from_file('kalevalaV100.h5')
model = restoreModel(modelPath)
sampler = TextSampler(model)
#samp = sampler.generate(15, num_sequences=30)



def printSample(samp):
    allwords = ''
    for sequence in samp:
        pstr = ''
        for i,s in enumerate(sequence):
            if i > 0:
                pstr = pstr + s + ' '
        #print(pstr)
        allwords += sequence[-1] + ' '
    return allwords