num_tags = len(reader.tag_dict) n = lambda x: np.asarray(x, dtype=np.int32) codified_sentences = [n([t.codified_word for t in s]) for s in reader.sentences] codified_tags = [n([t.codified_tag for t in s]) for s in reader.sentences] print('#sentences : {}, #words: {}, #tags : {}, learning rate : {}, #hidden : {}, embedding size: {} '.format(\ num_sentences, num_words, num_tags, args.learning_rate, args.hidden, args.num_features)) if args.validation_filename != None: valid_md = Metadata(args, args.validation_filename) reader_valid = Reader(valid_md) reader_valid.word_dict = reader.word_dict reader_valid.tag_dict = reader.tag_dict reader_valid.codify_sentences() codified_sentences_valid = [n([t.codified_word for t in s]) for s in reader_valid.sentences] codified_tags_valid = [n([t.codified_tag for t in s]) for s in reader_valid.sentences] x = T.ivector('x') y = T.ivector('y') mask = T.ivector('mask') emb = Embedding(x, args.num_features, num_words+1) if args.dropout: dropout = Dropout(emb.output, args.num_features, args.dropout) lstm = LSTM(dropout.output, args.l2, args.hidden, num_words + 1, num_tags, args.num_features) else: lstm = LSTM(emb.output, args.l2, args.hidden, num_words + 1, num_tags, args.num_features)
Special options """ #reader.load_files(directory_model) #reader.codify_sentences() # Generate the training set num_sentences = len(reader.sentences) num_words = len(reader.word_dict) num_tags = len(reader.tag_dict) if args.validation_filename: valid_md = Metadata(args, args.validation_filename, args.fixed_embeddings or args.learn_embeddings) valid_reader = Reader(valid_md) valid_reader.word_dict = reader.word_dict valid_reader.tag_dict = reader.tag_dict valid_reader.codify_sentences() if args.fixed_embeddings: codified_sentences = [numpy.concatenate(numpy.asarray(\ utils.contextwin([reader.get_embedding(t.codified_word) for t in s], args.window,\ reader.get_padding_left(), reader.get_padding_right()\ ), dtype=theano.config.floatX), axis=0)\ for s in reader.sentences] if args.validation_filename: codified_sentences_valid = [numpy.concatenate(numpy.asarray(\ utils.contextwin([reader.get_embedding(t.codified_word) for t in s], args.window,\ reader.get_padding_left(), reader.get_padding_right()\ ), dtype=theano.config.floatX), axis=0)\