Esempio n. 1
0
            rnd = random.random()
            for i,p in enumerate(dist):
                rnd -= p
                if rnd <= 0: break
            res.append(i)
            cw = i
            if cw == stop: break
            if nchars and len(res) > nchars: break
        return res

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('corpus', help='Path to the corpus file.')
    args = parser.parse_args()

    train = util.CharsCorpusReader(args.corpus, begin="<s>")
    vocab = util.Vocab.from_corpus(train)
    
    VOCAB_SIZE = vocab.size()

    model = Model()
    trainer = SimpleSGDTrainer(model)

    #lm = RNNLanguageModel(model, LAYERS, INPUT_DIM, HIDDEN_DIM, VOCAB_SIZE, builder=SimpleRNNBuilder)
    lm = RNNLanguageModel(model, LAYERS, INPUT_DIM, HIDDEN_DIM, VOCAB_SIZE, builder=LSTMBuilder)

    train = list(train)

    chars = loss = 0.0
    for ITER in range(100):
        random.shuffle(train)
Esempio n. 2
0
            r_t = bias + (R * y_t)
            ydist = softmax(r_t)
            dist = ydist.vec_value()
            rnd = random.random()
            for i, p in enumerate(dist):
                rnd -= p
                if rnd <= 0: break
            res.append(i)
            cw = i
            if cw == stop: break
            if nchars and len(res) > nchars: break
        return res


if __name__ == '__main__':
    train = util.CharsCorpusReader(sys.argv[1], begin="<s>")
    vocab = util.Vocab.from_corpus(train)

    VOCAB_SIZE = vocab.size()

    model = Model()
    sgd = SimpleSGDTrainer(model)

    #lm = RNNLanguageModel(model, builder=LSTMBuilder)
    lm = RNNLanguageModel(model,
                          LAYERS,
                          INPUT_DIM,
                          HIDDEN_DIM,
                          VOCAB_SIZE,
                          builder=SimpleRNNBuilder)
Esempio n. 3
0
if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('train', help='Path to the corpus file.')
    parser.add_argument('dev', help='Path to the validation corpus file.')
    parser.add_argument('test', help='Path to the test corpus file.')
    parser.add_argument(
        '--print_probs',
        action="store_true",
        help=
        'whether to print the probabilities per word over the validation set')
    parser.add_argument('--perform_train',
                        action="store_true",
                        help='whether to perform training')
    args, unknown = parser.parse_known_args()

    train = util.CharsCorpusReader(args.train, begin="<s>")
    dev = util.CharsCorpusReader(args.dev, begin="<s>")
    test = util.CharsCorpusReader(args.test, begin="<s>")

    vocab = util.Vocab.from_corpus(train)

    VOCAB_SIZE = vocab.size()

    model = dy.Model()
    trainer = dy.SimpleSGDTrainer(model, learning_rate=1.0)

    lm = RNNLanguageModel(model,
                          LAYERS,
                          INPUT_DIM,
                          HIDDEN_DIM,
                          VOCAB_SIZE,