def __init__(self, data, opt):
        self.opt = opt
        self.model = dy.ParameterCollection()
        self.trainer = dy.MomentumSGDTrainer(self.model)
        self.w2i = data.w2i
        self.wdims = opt.embedding_size
        self.ldims = opt.hidden_size
        self.attsize = opt.attention_size

        self.ext_embeddings = data.ext_embeddings
        # Model Parameters
        self.wlookup = self.model.add_lookup_parameters((len(self.w2i), self.wdims))

        self.__load_external_embeddings()

        if self.opt.encoder_dir == "single":
            if self.opt.encoder_type == "lstm":
                self.sentence_rnn = [
                    dy.VanillaLSTMBuilder(1, self.wdims, self.ldims, self.model)
                ]
            elif self.opt.encoder_type == "gru":
                self.sentence_rnn = [
                    dy.GRUBuilder(1, self.wdims, self.ldims, self.model)
                ]
            self.attention_w = self.model.add_parameters((self.attsize, self.ldims))
            self.attention_b = self.model.add_parameters(self.attsize)
            self.att_context = self.model.add_parameters(self.attsize)
            self.mlp_w = self.model.add_parameters((1, self.ldims + 2 * self.ldims))
            self.mlp_b = self.model.add_parameters(1)
        elif self.opt.encoder_dir == "bidirectional":
            if self.opt.encoder_type == "lstm":
                self.sentence_rnn = [
                    dy.VanillaLSTMBuilder(1, self.wdims, self.ldims, self.model),
                    dy.VanillaLSTMBuilder(1, self.wdims, self.ldims, self.model),
                ]
            elif self.opt.encoder_type == "gru":
                self.sentence_rnn = [
                    dy.GRUBuilder(1, self.wdims, self.ldims, self.model),
                    dy.GRUBuilder(1, self.wdims, self.ldims, self.model),
                ]

            self.attention_w = self.model.add_parameters((self.attsize, 2 * self.ldims))
            self.attention_b = self.model.add_parameters(self.attsize)
            self.att_context = self.model.add_parameters(self.attsize)
            self.mlp_w = self.model.add_parameters((1, 2 * self.ldims + 4 * self.ldims))
            self.mlp_b = self.model.add_parameters(1)
コード例 #2
0
ファイル: pos.py プロジェクト: karlstratos/iaan
 def load_and_populate(self, model_path):
     self.m = dy.ParameterCollection()
     with open(os.path.join(model_path, "info.pickle")) as inf:
         (self._w2i,
          self._c2i,
          self._jamo2i,
          self.arch,
          self.loss_type,
          self.zsize,
          self.wdim,
          self.cdim,
          self.jdim,
          self.width,
          self.swap,
          self.pseudocount) = pickle.load(inf)
     self.init_parameters(wembs={})
     self.m.populate(os.path.join(model_path, "model"))
コード例 #3
0
    def __init__(self, rnn_num_of_layers, embeddings_size, state_size,
                 vocab_size, char2int, int2char):
        self.model = dy.ParameterCollection()

        # the embedding paramaters
        self.embeddings = self.model.add_lookup_parameters(
            (vocab_size, embeddings_size))

        # the rnn
        self.RNN = RNN_BUILDER(rnn_num_of_layers, embeddings_size, state_size,
                               self.model)

        # project the rnn output to a vector of VOCAB_SIZE length
        self.output_w = self.model.add_parameters((vocab_size, state_size))
        self.output_b = self.model.add_parameters((vocab_size))
        self.int2char = int2char
        self.char2int = char2int
コード例 #4
0
    def __init__(self, char_dim, feat_dim, hidden_dim, char_size, feat_sizes):
        self._char_dim = char_dim
        self._feat_dim = feat_dim

        self._pc = dy.ParameterCollection()

        if config.adam:
            self._trainer = dy.AdamTrainer(self._pc, config.learning_rate, config.beta_1, config.beta_2, config.epsilon)
        else:
            # self._trainer = dy.AdadeltaTrainer(self._pc)
            trainer = dy.SimpleSGDTrainer(self._pc, config.learning_rate)
            trainer.set_clip_threshold(config.clip_threshold)

        # self._trainer.set_clip_threshold(1.0)

        self.params = dict()

        self.lp_c = self._pc.add_lookup_parameters((char_size, char_dim))
        self.lp_feats = []
        for idx in range(len(feat_sizes)):
            self.lp_feats.append(self._pc.add_lookup_parameters((feat_sizes[idx], feat_dim), init=dy.ConstInitializer(0.)))

        # self._pdrop_embs = pdrop_embs
        # self._pdrop_lstm = pdrop_lstm
        # self._pdrop_mlp = pdrop_mlp

        self.LSTM_builders = []

        f = dy.VanillaLSTMBuilder(1, char_dim, hidden_dim, self._pc)
        b = dy.VanillaLSTMBuilder(1, char_dim, hidden_dim, self._pc)

        self.LSTM_builders.append((f, b))
        for i in range(config.layers - 1):
            f = dy.VanillaLSTMBuilder(1, 2 * hidden_dim, hidden_dim, self._pc)
            b = dy.VanillaLSTMBuilder(1, 2 * hidden_dim, hidden_dim, self._pc)
            self.LSTM_builders.append((f, b))

        self.dec_LSTM = dy.VanillaLSTMBuilder(1, hidden_dim, hidden_dim, self._pc)

        self.MLP = self._pc.add_parameters((char_dim + feat_dim * 6 + 6, hidden_dim))
        self.MLP_bias = self._pc.add_parameters((hidden_dim))
        self.classifier = self._pc.add_parameters((hidden_dim, char_size))
        self.classifier_bias = self._pc.add_parameters((char_size))
        self.MLP_attn = self.add_parameters((char_dim + feat_dim * 6 + 6, hidden_dim))
        self.MLP_attn_bias = self.add_parameters((hidden_dim))
        self.attn_weight = self._pc.add_parameters((char_dim))
コード例 #5
0
    def __init__(self,
                 alphabet,
                 num_layers=2,
                 input_dim=3,
                 hidden_dim=5,
                 reject_threshold=0.01,
                 RNNClass=LSTMNetwork,
                 pc=None):

        self.alphabet = alphabet
        self.input_alphabet = list(
            self.alphabet
        )  #there's no reason to have these two "different" things and it doesn't really matter except i've made a mess that needs cleaning up so i need both names atm
        self.end_token = "<EOS>"
        #         self.begin_token = "<BOS>" #if decide to add BOS again, need to remember to apply it to every input sequence and treat state after <BOS> as first state
        self.internal_alphabet = self.input_alphabet + [self.end_token
                                                        ]  #+[self.begin_token]
        self.int2char = list(self.internal_alphabet)
        self.char2int = {c: i for i, c in enumerate(self.int2char)}
        self.vocab_size = len(self.internal_alphabet)
        self.pc = pc if not None == pc else dy.ParameterCollection()
        self.lookup = self.pc.add_lookup_parameters(
            (self.vocab_size, input_dim))
        self.linear_transform = LinearTransform(hidden_dim, self.vocab_size,
                                                self.pc)
        self.rnn = RNNClass(num_layers=num_layers,
                            input_dim=input_dim,
                            hidden_dim=hidden_dim,
                            pc=self.pc)
        self.reject_threshold = reject_threshold
        # not a real state because LSTM, GRUs both have
        # h values, which are in [-1,1]
        self.store_expressions()
        # gets the initial state started, which is a roundabout way of enabling this:
        full_hidden_vec = self.rnn.initial_state.as_vec()
        self.sink_reject_vec = [2 for a in full_hidden_vec]
        self.all_losses = []
        self.spec = {
            "alphabet": alphabet,
            "input_dim": input_dim,
            "num_layers": num_layers,
            "hidden_dim": hidden_dim,
            "reject_threshold": reject_threshold,
            "RNNClass": RNNClass
        }
コード例 #6
0
def entailment(train_file, dev_file, test_file, embed_file, epochs, eps,
               reg_lambda, batch_size, per_log, LSTM_params, training_sample,
               sample_type, improvement):
    curr_time = strftime("%Y-%m-%d %H:%M:%S", gmtime())
    print(curr_time + ": starting process")

    # read train and dev data sets
    train, train_words, max_len_train = read_data(
        train_file
    )  # read train data to list. each list item is a sentence. each sentence is a tuple
    dev, dev_words, max_len_dev = read_data(
        dev_file
    )  # read train data to list. each list item is a sentence. each sentence is a tuple
    test, test_words, max_len_test = read_data(
        test_file
    )  # read train data to list. each list item is a sentence. each sentence is a tuple
    P_rows = max([max_len_train, max_len_dev, max_len_test])

    # unify all unique words to one set and delete independent sets
    all_words = train_words.union(dev_words).union(test_words)
    del train_words
    del dev_words
    del test_words

    # get embeddings
    embed_vec, vocab = get_embeddings(embed_file, all_words, LSTM_params[2])

    # define vocabulary and help structures
    word2int = {w: i for i, w in enumerate(vocab)}
    label2int = {
        l: i
        for i, l in enumerate(["entailment", "neutral", "contradiction"])
    }
    vocab_size = len(vocab)
    num_labels = 3

    # create a classifier
    m = dy.ParameterCollection()
    trainer = dy.AdadeltaTrainer(m, eps)  # define trainer
    snli_classifier = ReRead_LSTM(vocab_size, num_labels, LSTM_params,
                                  embed_vec, P_rows, m,
                                  improvement)  # create classifier
    train_model(train, dev, test, epochs, batch_size, reg_lambda, trainer,
                snli_classifier, word2int, label2int, per_log, training_sample,
                sample_type, improvement)
コード例 #7
0
    def _xavier_initializer(shape, **kwargs):
        """Defines an initializer for the Xavier distribution.
        Specifically, the output should be sampled uniformly from [-epsilon, epsilon] where
            epsilon = sqrt(6) / <sum of the sizes of shape's dimensions>
        e.g., if shape = (2, 3), epsilon = sqrt(6 / (2 + 3))

        This function will be used as a variable initializer.

        Args:
            shape: Tuple or 1-d array that species the dimensions of the requested tensor.
        Returns:
            out: tf.Tensor of specified shape sampled from the Xavier distribution.
        """
        ### YOUR CODE HERE
        m = dy.ParameterCollection()
        out = m.add_parameters(shape).as_array()
        ### END YOUR CODE
        return out
コード例 #8
0
    def __init__(self, w2i, options):
        print('Similarity Experiment - init')
        self.options = options
        self.model = dy.ParameterCollection()
        self.trainer = dy.AdamTrainer(self.model)
        self.w2i = w2i
        self.wdims = options.wembedding_dims
        self.ldims = options.lstm_dims

        self.ext_embeddings = None
        #Model Parameters
        self.wlookup = self.model.add_lookup_parameters((len(w2i), self.wdims))

        self.__load_model()

        self.phrase_rnn = [dy.VanillaLSTMBuilder(1, self.wdims, self.ldims, self.model)]
        self.mlp_w = self.model.add_parameters((1, self.ldims))
        self.mlp_b = self.model.add_parameters(1)
コード例 #9
0
def main():
	import sys
	action = sys.argv[1]
	emb_name = sys.argv[2]
	filen = sys.argv[3]
	testname = sys.argv[4]
	
	print 'setting up'
	train_words, train_tags, tag_idx, idx_tag, test_words, test_tags = setup(filen, testname)
	num_tags = len(tag_idx.keys())
	gru_model = dy.ParameterCollection()
	word_index, embeddings_mat = import_embeddings(emb_name, 300)
	hidden_layer_len = 200
	layers = 1 
	eparams = gru_model.lookup_parameters_from_numpy(embeddings_mat.A) #flatten matrix
	gru_unit = dy.GRUBuilder(layers, 300, hidden_layer_len, gru_model)
	param_mat = gru_model.add_parameters((hidden_layer_len, num_tags))
	param_bias = gru_model.add_parameters((num_tags))
	#gmodel.save("grumodel.model")
	#mdl2 = dy.ParameterCollection()
	#ep = mdl2.lookup_parameters_from_numpy(embeddings_mat.A)
	#parmat = mdl2.add_parameters((200, num_tags))
	#parbias = mdl2.add_parameters((num_tags))
	#gmodel.populate("grumodel.model")
	
	if action == 'train':
		print 'training'
		bsize, gmodel = training(3, eparams, gru_unit, idx_tag, gru_model, train_words, train_tags, word_index, param_mat, param_bias)
	if action == 'tune':
		print 'tuning'
		for r in range(3, 6):
			print 'training'
			print 'epochs: ', r
			bsize,gmodel = training(r, eparams, gru_unit, idx_tag, gru_model, train_words, train_tags, word_index, param_mat, param_bias)
			print 'testing'
			testing(idx_tag, gru_unit, gmodel, bsize, test_words, test_tags, word_index, param_mat, param_bias, eparams)
	if action == 'test':
		print 'training'
		#use 5 epochs
		bsize, gmodel = training(5, eparams, gru_unit, idx_tag, gru_model, train_words, train_tags, word_index, param_mat, param_bias)
		print 'testing'
		testing(idx_tag, gru_unit, gmodel, bsize, test_words, test_tags, word_index, param_mat, param_bias, eparams)

	return
コード例 #10
0
    def set_up_predictor(self, nmt_model_path):
        """Initializes the predictor with the given NMT model.
            """

        model_folder = nmt_model_path
        best_model_path = model_folder + '/bestmodel.txt'
        hypoparams_file = model_folder + '/best.dev'

        hypoparams_file_reader = codecs.open(hypoparams_file, 'r', 'utf-8')
        hyperparams_dict = dict([
            line.strip().split(' = ')
            for line in hypoparams_file_reader.readlines()
        ])
        self.hyperparams = {
            'INPUT_DIM': int(hyperparams_dict['INPUT_DIM']),
            'HIDDEN_DIM': int(hyperparams_dict['HIDDEN_DIM']),
            #'FEAT_INPUT_DIM': int(hyperparams_dict['FEAT_INPUT_DIM']),
            'LAYERS': int(hyperparams_dict['LAYERS']),
            'VOCAB_PATH': hyperparams_dict['VOCAB_PATH']
        }

        self.pc = dy.ParameterCollection()

        #        print 'Loading vocabulary from {}:'.format(self.hyperparams['VOCAB_PATH'])
        #        self.vocab = Vocab.from_file(self.hyperparams['VOCAB_PATH'])
        #        BEGIN_CHAR   = u'<s>'
        #        STOP_CHAR   = u'</s>'
        #        UNK_CHAR = u'<unk>'
        #        self.BEGIN   = self.vocab.w2i[BEGIN_CHAR]
        #        self.STOP   = self.vocab.w2i[STOP_CHAR]
        #        self.UNK       = self.vocab.w2i[UNK_CHAR]
        self.BEGIN = utils.GO_ID
        self.STOP = utils.EOS_ID
        self.UNK = utils.UNK_ID
        #        self.hyperparams['VOCAB_SIZE'] = self.vocab.size()

        print 'Model Hypoparameters:'
        for k, v in self.hyperparams.items():
            print '{:20} = {}'.format(k, v)
        print

        print 'Loading model from: {}'.format(best_model_path)
        self.fbuffRNN, self.bbuffRNN, self.VOCAB_LOOKUP, self.decoder, self.R, self.bias, self.W_c, self.W__a, self.U__a, self.v__a = dy.load(
            best_model_path, self.pc)
コード例 #11
0
 def __init__(self,
              in_dim,
              h_dim,
              c_in_dim,
              h_layers,
              pred_layer,
              embeds_file=None,
              activation=ACTIVATION_MAP["tanh"],
              mlp=0,
              activation_mlp=ACTIVATION_MAP["rectify"],
              backprob_embeds=True,
              noise_sigma=0.1,
              tasks_ids=[],
              initializer=INITIALIZER_MAP["glorot"],
              builder=BUILDERS["lstmc"],
              max_vocab_size=None):
     self.w2i = {}  # word to index mapping
     self.c2i = {}  # char to index mapping
     self.tasks_ids = tasks_ids  # list of names for each task
     self.task2tag2idx = {}  # need one dictionary per task
     self.pred_layer = [int(layer) for layer in pred_layer
                        ]  # at which layer to predict each task
     self.model = dynet.ParameterCollection()  #init model
     self.in_dim = in_dim
     self.h_dim = h_dim
     self.c_in_dim = c_in_dim
     self.activation = activation
     self.mlp = mlp
     self.activation_mlp = activation_mlp
     self.noise_sigma = noise_sigma
     self.h_layers = h_layers
     self.predictors = {
         "inner": [],
         "output_layers_dict": {},
         "task_expected_at": {}
     }  # the inner layers and predictors
     self.wembeds = None  # lookup: embeddings for words
     self.cembeds = None  # lookup: embeddings for characters
     self.embeds_file = embeds_file
     self.backprob_embeds = backprob_embeds
     self.initializer = initializer
     self.char_rnn = None  # biRNN for character input
     self.builder = builder  # default biRNN is an LSTM
     self.max_vocab_size = max_vocab_size
コード例 #12
0
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('input',
                        help='Input file containing saved parameters.')
    parser.add_argument(
        '--hidden-units',
        type=int,
        default=256,
        help='Number of hidden units used in the LSTM controller.')
    parser.add_argument(
        '--source-alphabet-size',
        type=int,
        default=128,
        help='Number of symbols to use in the source sequence.')
    parser.add_argument('--embedding-size',
                        type=int,
                        default=64,
                        help='Input embedding size.')
    parser.add_argument(
        '--stack-embedding-size',
        type=int,
        default=256,
        help='Size of vector values stored on the neural stack.')
    parser.add_argument(
        '--test-length-range',
        type=parse_range,
        default=(65, 128),
        help='Range of lengths of source sequences during testing.',
        metavar='MIN,MAX')
    parser.add_argument('--test-data-size',
                        type=int,
                        default=1000,
                        help='Number of samples used in the test data.')
    args = parser.parse_args()

    params = dynet.ParameterCollection()
    builder = StackLSTMBuilder(params,
                               source_alphabet_size=args.source_alphabet_size,
                               embedding_size=args.embedding_size,
                               stack_embedding_size=args.stack_embedding_size,
                               hidden_units=args.hidden_units)
    params.populate(args.input)
    test(args, builder)
コード例 #13
0
def main():
    print('Invoked as:', ' '.join(sys.argv), file=sys.stderr)
    parser = argparse.ArgumentParser()
    parser.add_argument('train_corpus')
    parser.add_argument('dev_corpus')
    parser.add_argument('--layers', type=int, default=1)
    parser.add_argument('--hidden_dim', type=int, default=128)
    parser.add_argument('--minibatch_size', type=int, default=1)
    parser.add_argument('--autobatch', action='store_true')
    parser.add_argument('--tied', action='store_true')
    parser.add_argument('--dropout', type=float, default=0.0)
    parser.add_argument('--output', type=str, default='')
    harness.add_optimizer_args(parser)
    args = parser.parse_args()

    if args.output == '':
        args.output = '/tmp/model%d' % random.randint(0, 0xFFFF)
    print('Output file:', args.output, file=sys.stderr)

    action_vocab = Vocabulary()
    terminal_vocab = Vocabulary()
    rel_vocab = Vocabulary()
    train_corpus = read_corpus(args.train_corpus, action_vocab, terminal_vocab,
                               rel_vocab)
    action_vocab.frozen = True
    terminal_vocab.frozen = True
    rel_vocab.frozen = True
    dev_corpus = read_corpus(args.dev_corpus, action_vocab, terminal_vocab,
                             rel_vocab)

    print('Vocabulary sizes:',
          len(action_vocab),
          len(terminal_vocab),
          len(rel_vocab),
          file=sys.stderr)

    pc = dy.ParameterCollection()
    optimizer = harness.make_optimizer(args, pc)
    model = BottomUpDepLM(pc, action_vocab, len(terminal_vocab),
                          len(rel_vocab), args.layers, args.hidden_dim, False,
                          args.tied)
    print('Total parameters:', pc.parameter_count(), file=sys.stderr)

    harness.train(model, train_corpus, dev_corpus, optimizer, args)
コード例 #14
0
    def __init__(self, vocab, options):
        random.seed(1)
        self.model = dy.ParameterCollection()
        self.trainer = helpers.get_trainer(options, self.model)
        self.get_violation = helpers.update_method(options)

        word_count = vocab.word_freq
        word_vocab = vocab.wordlookup_tbl
        pos_vocab = vocab.poslookup_tbl
        rel_vocab = vocab.rellookup_tbl
        self.rels = rel_vocab
        self.__enc = helpers.get_encoder(self.model, options, word_count,
                                         word_vocab, pos_vocab)
        self.__scr = helpers.get_scorer(self.model, options, rel_vocab)
        self.__tree_enc = TreeEncoder.get_tree_encoder(self.model, options,
                                                       rel_vocab)
        self.__train_flag = True
        self.oracle = options.oracle
        self.exploration_rate = options.exploration_rate
コード例 #15
0
ファイル: rnn_model.py プロジェクト: halecakir/side-projects
    def __init__(self, w2i, permissions, options):
        super().__init__(options)
        self.model = dy.ParameterCollection()
        self.trainer = dy.AdamTrainer(self.model)
        self.w2i = w2i
        self.wdims = options.wembedding_dims
        self.ldims = options.lstm_dims
        self.all_permissions = permissions

        # Model Parameters
        self.wlookup = self.model.add_lookup_parameters((len(w2i), self.wdims))

        # RNNs
        self.sentence_rnn = [
            dy.SimpleRNNBuilder(1, self.wdims, self.ldims, self.model)
        ]

        if options.external_embedding is not None:
            self.__load_external_embeddings()
コード例 #16
0
ファイル: wbag_predictor.py プロジェクト: karlstratos/iaan
    def train(self, model_path, articles_X, articles_Y, dev_articles_X,
              dev_articles_Y, lrate, epochs):
        self.m = dy.ParameterCollection()
        self._prepare_model_directory(model_path)
        wseqs = [
            wseq for wseq_list in articles_X + articles_Y for wseq in wseq_list
        ]
        self.build_dicts(wseqs, [[]])
        self.init_parameters()
        self._train_report(articles_X, articles_Y, lrate, epochs, 1)
        self.common.turn_on_training(0)  # No dropout

        best_hY_X = float("inf")
        trainer = dy.AdamTrainer(self.m, lrate)
        for epoch in xrange(epochs):
            self._log("Epoch {0:2d}  ".format(epoch + 1), False)
            epoch_start_time = time.time()
            inds = [i for i in xrange(len(articles_Y))]
            random.shuffle(inds)
            avg_loss = 0.0
            for data_num, i in enumerate(inds):
                if (data_num + 1) % 100 == 0:
                    print data_num + 1,
                    sys.stdout.flush()
                loss = self.get_loss(articles_X[i], articles_Y[i])
                avg_loss += loss.value() / len(inds)
                loss.backward()
                trainer.update()

            self._log("updates: {0}  ".format(len(inds)), False)
            self._log("avg_loss: {0:.2f}  ".format(avg_loss), False)
            self._log("({0:.1f}s)  ".format(time.time() - epoch_start_time),
                      False)

            self.common.turn_off_training()
            hY_X = self.test(dev_articles_X, dev_articles_Y)
            self.common.turn_on_training(0)  # No dropout
            self._log("dev {0:.2f}  ".format(hY_X), False)
            if hY_X < best_hY_X:
                best_hY_X = hY_X
                self._log("new best - saving  ", False)
                self.save(model_path)
            self._log("")
コード例 #17
0
    def _create_model(self):
        self.logger.info('Creating the model...')

        model = dy.ParameterCollection()

        # context gru encoders
        c_fwdRnn = dy.GRUBuilder(self.model_args["gru_layers"],
                                 self.model_args["gru_input_dim"],
                                 self.model_args["gru_hidden_dim"],
                                 model)
        c_bwdRnn = dy.GRUBuilder(self.model_args["gru_layers"],
                                 self.model_args["gru_input_dim"],
                                 self.model_args["gru_hidden_dim"],
                                 model)

        # question gru encoders
        q_fwdRnn = dy.GRUBuilder(self.model_args["gru_layers"],
                                 self.model_args["gru_input_dim"],
                                 self.model_args["gru_hidden_dim"],
                                 model)
        q_bwdRnn = dy.GRUBuilder(self.model_args["gru_layers"],
                                 self.model_args["gru_input_dim"],
                                 self.model_args["gru_hidden_dim"],
                                 model)

        # embedding parameter
        lookup_params = model.add_lookup_parameters((self.model_args["vocab_size"],
                                                     self.model_args["gru_input_dim"]),
                                                    dy.UniformInitializer(self.model_args["lookup_init_scale"]))

        unk_lookup_params = model.add_lookup_parameters((self.model_args["number_of_unks"],
                                                         self.model_args["gru_input_dim"]),
                                                        dy.UniformInitializer(self.model_args["lookup_init_scale"]))

        self.logger.info('Done creating the model')

        model_parameters = {"c_fwdRnn": c_fwdRnn,
                            "c_bwdRnn": c_bwdRnn,
                            "q_fwdRnn": q_fwdRnn,
                            "q_bwdRnn": q_bwdRnn,
                            "lookup_params": lookup_params,
                            "unk_lookup_params": unk_lookup_params}
        return model, model_parameters
コード例 #18
0
ファイル: simple_rnnlm.py プロジェクト: ekayen/parsing_as_LM
 def allocate_params(self):
     """
             Allocates memory for the model parameters.
             """
     self.model = dy.ParameterCollection()
     self.word_embeddings = self.model.add_lookup_parameters(
         (self.lexicon.size(), self.word_embedding_size))
     self.rnn = dy.LSTMBuilder(
         2, self.word_embedding_size + self.char_embedding_size,
         self.hidden_size, self.model)
     self.char_rnn = CharRNNBuilder(self.char_embedding_size,
                                    self.char_embedding_size, self.charset,
                                    self.model)
     self.word_softmax = dy.ClassFactoredSoftmaxBuilder(
         self.hidden_size,
         self.brown_file,
         self.lexicon.words2i,
         self.model,
         bias=True)
コード例 #19
0
ファイル: arc_hybrid.py プロジェクト: mdelhoneux/avc_analyser
    def __init__(self, vocab, options):

        # import here so we don't load Dynet if just running parser.py --help for example
        from multilayer_perceptron import MLP
        from feature_extractor import FeatureExtractor
        import dynet as dy
        global dy

        global LEFT_ARC, RIGHT_ARC, SHIFT, SWAP
        LEFT_ARC, RIGHT_ARC, SHIFT, SWAP = 0,1,2,3

        self.model = dy.ParameterCollection()
        self.trainer = dy.AdamTrainer(self.model, alpha=options.learning_rate)

        self.activations = {'tanh': dy.tanh, 'sigmoid': dy.logistic, 'relu':
                            dy.rectify, 'tanh3': (lambda x:
                            dy.tanh(dy.cwise_multiply(dy.cwise_multiply(x, x), x)))}
        self.activation = self.activations[options.activation]

        self.oracle = options.oracle


        self.headFlag = options.headFlag
        self.rlMostFlag = options.rlMostFlag
        self.rlFlag = options.rlFlag
        self.k = options.k
        self.recursive_composition = options.use_recursive_composition
        #ugly hack

        #dimensions depending on extended features
        self.nnvecs = (1 if self.headFlag else 0) + (2 if self.rlFlag or self.rlMostFlag else 0) + (1 if self.recursive_composition else 0)
        self.feature_extractor = FeatureExtractor(self.model,options,vocab,self.nnvecs)
        self.irels = self.feature_extractor.irels

        if options.no_bilstms > 0:
            mlp_in_dims = options.lstm_output_size*2*self.nnvecs*(self.k+1)
        else:
            mlp_in_dims = options.lstm_input_size*self.nnvecs*(self.k+1)

        self.unlabeled_MLP = MLP(self.model, 'unlabeled', mlp_in_dims, options.mlp_hidden_dims,
                                 options.mlp_hidden2_dims, 4, self.activation)
        self.labeled_MLP = MLP(self.model, 'labeled' ,mlp_in_dims, options.mlp_hidden_dims,
                               options.mlp_hidden2_dims,2*len(self.irels)+2,self.activation)
コード例 #20
0
 def __init__(
         self,
         src_vocab,
         tgt_vocab,
         src_emb_dim,
         tgt_emb_dim,
         enc_nlayers,
         enc_hidden_dim,  # encoder settings
         dec_nlayers,
         dec_hidden_dim,  # decoder settings
         att_dim,
         label_smoothing):
     # Model settings
     self.label_smoothing = label_smoothing
     # Contains all of the model's parameters
     self.pc = dy.ParameterCollection()
     # Vocabulary objects
     self.src_vocab = src_vocab
     self.tgt_vocab = tgt_vocab
     # Embeddings
     self.src_embeddings = self.pc.add_lookup_parameters(
         (len(self.src_vocab), src_emb_dim), name='src-embedding')
     self.tgt_embeddings = self.pc.add_lookup_parameters(
         (len(self.tgt_vocab), tgt_emb_dim), name='tgt-embedding')
     # Init encoder/decoder
     self.encoder = BiLSTMEncoder(self.pc, enc_nlayers, src_emb_dim,
                                  enc_hidden_dim)
     self.decoder = LSTMDecoder(self.pc,
                                dec_nlayers, tgt_emb_dim, dec_hidden_dim,
                                len(tgt_vocab), enc_hidden_dim)
     # Init attention
     self.attention = MLPAttention(self.pc, att_dim, enc_hidden_dim,
                                   dec_hidden_dim)
     # For affine transform between last state of encoder and first state of decoder
     self.W_bridge = self.pc.add_parameters(
         (dec_hidden_dim, enc_hidden_dim))  # TODO: make class?
     self.b_bridge = self.pc.add_parameters(dec_hidden_dim)
     # Softmax over tgt vocab
     self.W_sm = self.pc.add_parameters((len(tgt_vocab), dec_hidden_dim))
     self.b_sm = self.pc.add_parameters(len(tgt_vocab))
     # For storing matrix of encodings produced by encoder
     self.encodings = None
コード例 #21
0
def run_test2(args):

    model = dy.ParameterCollection()
    # [parser] = dy.load(args.model_path_base, model)
    [parser] = dy.load(
        "models/chartdyRBTC-model_addr_dytree_giga_0.4_200_1_chartdyRBTC_dytree_1_houseno_0_0_dev=0.90",
        model)

    test_chunk_insts = util.read_chunks(args.test_path, args.normal)

    # ftreelog = open(args.expname + '.test.predtree.txt', 'w', encoding='utf-8')
    ftreelog = open('aaa' + '.test.predtree.txt', 'w', encoding='utf-8')
    test_predicted = []
    test_start_time = time.time()
    test_predicted = []
    test_gold = []
    for inst in test_chunk_insts:
        chunks = util.inst2chunks(inst)
        test_gold.append(chunks)

    for x, chunks in test_chunk_insts:
        dy.renew_cg()
        sentence = [(parse.XX, ch) for ch in x]
        predicted, _ = parser.parse(sentence)
        pred_tree = predicted.convert()
        ftreelog.write(pred_tree.linearize() + '\n')
        test_predicted.append(pred_tree.to_chunks())

    ftreelog.close()

    # test_fscore = evaluate.eval_chunks2(args.evalb_dir, test_gold, test_predicted, output_filename=args.expname + '.test.txt')  # evalb
    test_fscore = evaluate.eval_chunks2(args.evalb_dir,
                                        test_gold,
                                        test_predicted,
                                        output_filename='aaaabbbb' +
                                        '.test.txt')  # evalb

    print("test-fscore {} "
          "test-elapsed {} ".format(
              test_fscore,
              format_elapsed(test_start_time),
          ))
コード例 #22
0
    def train(self, allSentences, trainManager, devData, predictTrain):
        if not self.__parser.handlesNonProjectiveTrees():
            sentences = utils.filterNonProjective(allSentences)
            self.__logger.info("Filtered %i non-projective trees " %
                               (len(allSentences) - len(sentences)))
        else:
            sentences = allSentences

        # fill all dictionaries
        self.__reprBuilder.readData(sentences)
        self.__labeler.readData(sentences)

        # initialize parameters
        self.__model = dynet.ParameterCollection()
        self.__initializeParameters()

        # for logging
        trainLogger = NNParserTrainLogger(predictTrain)
        self.__trainOnSentences(sentences, devData, trainManager, trainLogger,
                                predictTrain)
コード例 #23
0
ファイル: main.py プロジェクト: vidurj/parser-adaptation
def run_test(args):
    if not os.path.exists(args.experiment_directory):
        os.mkdir(args.experiment_directory)
    print("Loading test trees from {}...".format(args.input_file))

    test_treebank = trees.load_trees(args.input_file)
    test_tokenized_lines = parse_trees_to_string_lines(test_treebank)
    test_embeddings_file = compute_elmo_embeddings(test_tokenized_lines,
                                                   os.path.join(
                                                       args.experiment_directory,
                                                       'test_embeddings'))

    print("Loaded {:,} test examples.".format(len(test_treebank)))

    print("Loading model from {}...".format(args.model_path))
    model = dy.ParameterCollection()
    [parser] = dy.load(args.model_path, model)

    print("Parsing test sentences...")
    check_performance(parser, test_treebank, test_embeddings_file, args)
コード例 #24
0
def init_model_c(vocab, tag_set, trained_model):
    model = dy.ParameterCollection()
    params = {}
    TAGSET_SIZE = len(tag_set)
    VOCAB_SIZE = len(vocab)

    params["lookup"] = model.add_lookup_parameters((VOCAB_SIZE, LSTM_INPUT_DIM), init='uniform', scale=(np.sqrt(6)/np.sqrt(LSTM_INPUT_DIM)))

    f1_lstm = dy.LSTMBuilder(LSTM_LAYERS, LSTM_INPUT_DIM, LSTM_STATE_DIM, model)
    b1_lstm = dy.LSTMBuilder(LSTM_LAYERS, LSTM_INPUT_DIM, LSTM_STATE_DIM, model)
    f2_lstm = dy.LSTMBuilder(LSTM_LAYERS, 2 * LSTM_STATE_DIM, LSTM_STATE_DIM, model)
    b2_lstm = dy.LSTMBuilder(LSTM_LAYERS, 2 * LSTM_STATE_DIM, LSTM_STATE_DIM, model)
    lstms = (f1_lstm, b1_lstm, f2_lstm, b2_lstm)

    params["w"] = model.add_parameters((TAGSET_SIZE, 2 * LSTM_STATE_DIM))
    params["bias"] = model.add_parameters((TAGSET_SIZE))

    model.populate(trained_model)
    trainer = dy.AdamTrainer(model)
    return (lstms, params, model, trainer)
コード例 #25
0
 def load_model(path, model_version):
     full_saving_path = os.path.join(path, model_version)
     new_model_obj = pickle.load(open(full_saving_path + ".p", "rb"))
     model_to_load = dy.ParameterCollection()
     W_emb, W_cnn, b_cnn, W_mlp, b_mlp, V_mlp, a_mlp = dy.load(
         full_saving_path, model_to_load)
     new_model_obj.W_emb = W_emb
     new_model_obj.W_cnn = W_cnn
     new_model_obj.b_cnn = b_cnn
     new_model_obj.W_mlp = W_mlp
     new_model_obj.b_mlp = b_mlp
     new_model_obj.V_mlp = V_mlp
     new_model_obj.a_mlp = a_mlp
     # converting default dict into dict, since pickle can only save dict objects and not defaultdict ones
     new_model_obj.w2i = defaultdict(lambda: len(new_model_obj.w2i),
                                     new_model_obj.w2i)
     new_model_obj.t2i = defaultdict(lambda: len(new_model_obj.t2i),
                                     new_model_obj.t2i)
     new_model_obj.model = model_to_load
     return new_model_obj
コード例 #26
0
    def train(self, data, dev=None):
        self.m = dy.ParameterCollection()
        self.__init_params(data)
        self.__enable_lstm_dropout()
        self.__is_training = True
        if os.path.isfile(self.model_path): os.remove(self.model_path)
        if not os.path.exists(self.model_path): os.makedirs(self.model_path)

        trainer = dy.AdamTrainer(self.m, self.learning_rate)
        perf_best = 0.
        exists = False
        for epoch in xrange(self.epochs):
            inds = [i for i in xrange(len(data.seqs))]
            random.shuffle(inds)
            for i in inds:
                loss = self.get_loss(data.seqs[i])
                loss.backward()
                trainer.update()

            if dev:
                self.__is_training = False
                self.__disable_lstm_dropout()
                perf, _ = self.get_perf(dev)
                print "Epoch {0:d} F1: {1:.2f}".format(epoch + 1, perf),
                if perf > perf_best:
                    perf_best = perf
                    print 'new best - saving model',
                    self.save()
                    exists = True
                self.__is_training = True
                self.__enable_lstm_dropout()
                print

            else:
                self.save()

        if exists:
            m = Mention2Vec()
            m.load_and_populate(self.model_path)
            perf, _ = m.get_perf(dev)
            print "Best dev F1: {0:.2f}".format(perf)
コード例 #27
0
def test_softmax_model():
    """Train softmax model for a number of steps."""
    config = Config()

    # Generate random data to train the model on
    np.random.seed(1234)
    inputs = np.random.rand(config.n_samples, config.n_features)
    labels = np.zeros((config.n_samples, config.n_classes), dtype=np.int32)
    labels[:, 1] = 1
    #for i in xrange(config.n_samples):
    #    labels[i, i%config.n_classes] = 1

    mini_batches = [[
        inputs[k:k + config.batch_size], labels[k:k + config.batch_size]
    ] for k in xrange(0, config.n_samples, config.batch_size)]

    m = dy.ParameterCollection()
    trainer = dy.SimpleSGDTrainer(m)
    trainer.learning_rate = config.lr
    net = SoftmaxModel(config, m)
    for epoch in range(config.n_epochs):
        start_time = time.time()
        for mini_batch in mini_batches:
            dy.renew_cg()
            losses = []
            for ix in xrange(config.batch_size):
                l = net.create_network_return_loss(
                    np.array(mini_batch[0][ix]).reshape(1, config.n_features),
                    np.array(mini_batch[1][ix]).reshape(1, config.n_classes))
                losses.append(l)
            loss = dy.esum(losses) / config.batch_size
            loss.forward()
            loss.backward()
            trainer.update()
        duration = time.time() - start_time
        print 'Epoch {:}: loss = {:.2f} ({:.3f} sec)'.format(
            epoch, loss.value(), duration)

    print loss.value()
    assert loss.value() < .5
    print "Basic (non-exhaustive) classifier tests pass"
コード例 #28
0
    def test_get_bilstm_all_update(self):
        pc = dy.ParameterCollection()
        trainer = dy.AdamTrainer(pc, 0.1)
        flstm = dy.LSTMBuilder(1, 1, 1, pc)
        blstm = dy.LSTMBuilder(1, 1, 1, pc)
        model = Model()
        common = CommonArchitecture(model)

        def make_inputs():
            return [dy.inputTensor([1.0]), dy.inputTensor([2.0]),
                    dy.inputTensor([3.0]), dy.inputTensor([4.0])]

        def test(sqnorm_original_value, assert_equal):
            dy.renew_cg()
            inputs = make_inputs()
            avg = dy.average(common.get_bilstm_all(inputs, flstm, blstm))
            sqnorm = dy.squared_norm(avg)
            if assert_equal:
                self.assertAlmostEqual(sqnorm_original_value, sqnorm.value(),
                                       places=10)
            else:
                self.assertNotAlmostEqual(sqnorm_original_value, sqnorm.value(),
                                          places=10)

        inputs = make_inputs()
        avg = dy.average(common.get_bilstm_all(inputs, flstm, blstm, False))
        sqnorm = dy.squared_norm(avg)
        sqnorm_original_value = sqnorm.value()
        sqnorm.backward()
        trainer.update()  # Shouldn't update LSTMs.

        test(sqnorm_original_value, True)

        dy.renew_cg()
        inputs = make_inputs()
        avg = dy.average(common.get_bilstm_all(inputs, flstm, blstm))
        sqnorm = dy.squared_norm(avg)
        sqnorm.backward()
        trainer.update()  # Should update LSTMs.

        test(sqnorm_original_value, False)
コード例 #29
0
    def __init__(self, config, pretrained_embeddings, parser):
        self.config = config
        print len(pretrained_embeddings)
        self.m = dy.ParameterCollection()
        self.Initializer = dy.ConstInitializer(0.0)
        self.pW = self.m.add_parameters(
            (self.config.n_features * self.config.embed_size,
             self.config.hidden_size))
        self.pB1 = self.m.add_parameters((1, self.config.hidden_size),
                                         init=self.Initializer)
        self.pU = self.m.add_parameters(
            (self.config.hidden_size, self.config.n_classes))
        self.pB2 = self.m.add_parameters((1, self.config.n_classes),
                                         init=self.Initializer)

        self.word_lookup = self.m.lookup_parameters_from_numpy(
            pretrained_embeddings)
        self.pos_lookup = self.m.add_lookup_parameters(
            (self.config.n_pos, self.config.embed_size))

        self.trainer = dy.AdamTrainer(self.m)
コード例 #30
0
    def __init__(self, input_dim, hidden_dim, output_dim, learning_rate=0.001):
        self._model = dy.ParameterCollection()

        self._input_dim = input_dim
        self._hidden_dim = hidden_dim
        self._output_dim = output_dim

        self._rnn = dy.SimpleRNNBuilder(self.LAYERS, self._input_dim,
                                        self._hidden_dim, self._model)
        # self._rnn.disable_dropout()
        self._W = self._model.add_parameters(
            (self._output_dim, self._hidden_dim), init=dy.NormalInitializer())

        self._learning_rate = learning_rate
        self._trainer = dy.MomentumSGDTrainer(
            self._model, learning_rate=self._learning_rate)

        self._l2_param = 0.0006
        # self._l2_param = 0.0

        self._init_layers()