Example #1
0
    num_words = len(reader.word_dict)
    num_tags = len(reader.tag_dict)

    n = lambda x: np.asarray(x, dtype=np.int32)

    codified_sentences = [n([t.codified_word for t in s]) for s in reader.sentences]
    codified_tags = [n([t.codified_tag for t in s]) for s in reader.sentences]

    print('#sentences : {}, #words: {}, #tags : {}, learning rate : {}, #hidden : {}, embedding size: {} '.format(\
        num_sentences, num_words, num_tags, args.learning_rate, args.hidden, args.num_features))

    if args.validation_filename != None:
        valid_md = Metadata(args, args.validation_filename)
        reader_valid = Reader(valid_md)
        reader_valid.word_dict = reader.word_dict
        reader_valid.tag_dict = reader.tag_dict
        reader_valid.codify_sentences()

        codified_sentences_valid = [n([t.codified_word for t in s]) for s in reader_valid.sentences]
        codified_tags_valid = [n([t.codified_tag for t in s]) for s in reader_valid.sentences]

    x = T.ivector('x')
    y = T.ivector('y')
    mask  = T.ivector('mask')

    emb = Embedding(x, args.num_features, num_words+1)
    if args.dropout:
        dropout = Dropout(emb.output, args.num_features, args.dropout)
        lstm = LSTM(dropout.output, args.l2, args.hidden, num_words + 1, num_tags, args.num_features)
    else:
        lstm = LSTM(emb.output, args.l2, args.hidden, num_words + 1, num_tags, args.num_features)
Example #2
0
    """
    Special options
    """
    #reader.load_files(directory_model)
    #reader.codify_sentences()

    # Generate the training set
    num_sentences = len(reader.sentences)
    num_words = len(reader.word_dict)
    num_tags = len(reader.tag_dict)

    if args.validation_filename:
        valid_md = Metadata(args, args.validation_filename, args.fixed_embeddings or args.learn_embeddings)
        valid_reader = Reader(valid_md)
        valid_reader.word_dict = reader.word_dict
        valid_reader.tag_dict = reader.tag_dict
        valid_reader.codify_sentences()

    if args.fixed_embeddings:
        codified_sentences = [numpy.concatenate(numpy.asarray(\
                utils.contextwin([reader.get_embedding(t.codified_word) for t in s],
                    args.window,\
                reader.get_padding_left(), reader.get_padding_right()\
                ), dtype=theano.config.floatX), axis=0)\
                for s in reader.sentences]

        if args.validation_filename:
            codified_sentences_valid = [numpy.concatenate(numpy.asarray(\
                    utils.contextwin([reader.get_embedding(t.codified_word) for t in s],
                        args.window,\
                    reader.get_padding_left(), reader.get_padding_right()\