Beispiel #1
0
    def estimate_joint_priors(self, qp_pairs, smooth=False):
        model = self.model
        zsize = model.zsize
        pseudocount = model.pseudocount

        joint_samples = []
        qZ_samples = []
        pZ_samples = []
        for (qZ_X, pZ_Y) in qp_pairs:
            joint_samples.append(qZ_X * dy.transpose(pZ_Y))
            qZ_samples.append(qZ_X)
            pZ_samples.append(pZ_Y)

        if smooth:
            v_unif = dy.inputTensor(np.ones((zsize)) / zsize)
            A_unif = dy.inputTensor(np.ones((zsize, zsize)) / (zsize**2))
            for _ in xrange(pseudocount):
                joint_samples.append(A_unif)
                qZ_samples.append(v_unif)
                pZ_samples.append(v_unif)

        joint = dy.average(joint_samples)
        qZ = dy.average(qZ_samples)
        pZ = dy.average(pZ_samples)

        return joint, qZ, pZ
def Train(instances, itercount):
    dy.renew_cg()
    ontoparser.initialize_graph_nodes(train=True)

    loss = []
    errors = 0.0
    for instance in instances:
        fexpr, sexpr, groundtruth = instance
        # context insensitive embeddings or local embeddings
        subtype = [sb.lower()
                   for sb in fexpr.split()]  #if sb.lower() not in stop]
        supertype = [sp.lower()
                     for sp in sexpr.split()]  #if sp.lower() not in stop]
        fembs, DSTATUS_X = ontoparser.get_linear_embd(subtype)
        sembs, DSTATUS_Y = ontoparser.get_linear_embd(supertype)

        #if (DSTATUS_X is False) or (DSTATUS_Y is False): continue
        fembs = fembs[0] if len(fembs) == 1 else dy.average(fembs)
        sembs = sembs[0] if len(sembs) == 1 else dy.average(sembs)

        x = dy.concatenate([fembs, sembs])

        #e_dist = dy.squared_distance(fembs, sembs)
        e_dist = 1 - distance.cosine(fembs.npvalue(), sembs.npvalue())
        #weighted_x = x * e_dist
        output = ontoparser.W2 * (dy.rectify(ontoparser.W1 * x) +
                                  ontoparser.b1) + ontoparser.b2

        prediction = np.argmax(output.npvalue())
        loss.append(
            dy.pickneglogsoftmax(output, ontoparser.meta.tdmaps[groundtruth]))
        #if ((ontoparser.meta.rmaps[prediction] == "Hypernym") and ("Hypernym" != groundtruth)) and (e_dist < 0.5):
        #    loss[-1] += -log(0.6)
        errors += 0 if groundtruth == ontoparser.meta.rmaps[prediction] else 1
    return loss, errors
    def predict_hyp(self, subtype, supertype):
        dy.renew_cg()
        self.initialize_graph_nodes()

        subtype = [
            self.lmtzr.lemmatize(sb) for sb in subtype.split()
            if sb not in self.stop
        ]
        supertype = [
            self.lmtzr.lemmatize(sp) for sp in supertype.split()
            if sp not in self.stop
        ]
        if subtype == supertype: return
        # context insensitive embeddings or local embeddings
        fembs, DSTATUS_X = self.get_linear_embd(subtype)
        sembs, DSTATUS_Y = self.get_linear_embd(supertype)

        if len(fembs) < 1 or len(sembs) < 1: return
        if (DSTATUS_X is False) or (DSTATUS_Y is False): return
        fembs = fembs[0] if len(fembs) == 1 else dy.average(fembs)
        sembs = sembs[0] if len(sembs) == 1 else dy.average(sembs)

        x = dy.concatenate([fembs, sembs])

        #e_dist = dy.squared_distance(fembs, sembs)
        #e_dist = distance.euclidean(fembs.npvalue(), sembs.npvalue())
        e_dist = 1 - distance.cosine(fembs.npvalue(), sembs.npvalue())
        #weighted_x = x * e_dist
        output = dy.softmax(self.W2 * (dy.rectify(self.W1 * x) + self.b1) +
                            self.b2)
        prediction = np.argmax(output.npvalue())
        confidence = np.max(output.npvalue())
        return self.meta.rmaps[prediction], confidence, e_dist
Beispiel #4
0
    def _predict(self, batch, train=True):

        # load the network parameters
        W_hid = dy.parameter(self.W_hid)
        b_hid = dy.parameter(self.b_hid)
        w_clf = dy.parameter(self.w_clf)
        b_clf = dy.parameter(self.b_clf)

        probas = []
        # predict the probability of positive sentiment for each sentence
        for _, sent in batch:
            sent_embed = [dy.lookup(self.embed, w) for w in sent]
            dropout_embed = []
            # $@$ Task3 implying dropout to regluarization training
            if train == True:
                for embed in sent_embed:
                    embed = dy.dropout(embed, 0.5)
                    dropout_embed.append(embed)
                sent_embed = dy.average(dropout_embed)
            else:
                sent_embed = dy.average(sent_embed)

            # hid = tanh(b + W * sent_embed)
            # but it's faster to use affine_transform in dynet
            hid = dy.affine_transform([b_hid, W_hid, sent_embed])
            hid = dy.tanh(hid)

            y_score = dy.affine_transform([b_clf, w_clf, hid])
            y_proba = dy.logistic(y_score)
            probas.append(y_proba)

        return probas
        def __call__(self, query, options, gold, lengths, query_no):
            if len(options) == 1:
                return None, 0

            final = []
            if args.word_vectors:
                qvecs = [dy.lookup(self.pEmbedding, w) for w in query]
                qvec_max = dy.emax(qvecs)
                qvec_mean = dy.average(qvecs)
            for otext, features in options:
                if not args.no_features:
                    inputs = dy.inputTensor(features)
                if args.word_vectors:
                    ovecs = [dy.lookup(self.pEmbedding, w) for w in otext]
                    ovec_max = dy.emax(ovecs)
                    ovec_mean = dy.average(ovecs)
                    if args.no_features:
                        inputs = dy.concatenate(
                            [qvec_max, qvec_mean, ovec_max, ovec_mean])
                    else:
                        inputs = dy.concatenate(
                            [inputs, qvec_max, qvec_mean, ovec_max, ovec_mean])
                if args.drop > 0:
                    inputs = dy.dropout(inputs, args.drop)
                h = inputs
                for pH, pB in zip(self.hidden, self.bias):
                    h = dy.affine_transform([pB, pH, h])
                    if args.nonlin == "linear":
                        pass
                    elif args.nonlin == "tanh":
                        h = dy.tanh(h)
                    elif args.nonlin == "cube":
                        h = dy.cube(h)
                    elif args.nonlin == "logistic":
                        h = dy.logistic(h)
                    elif args.nonlin == "relu":
                        h = dy.rectify(h)
                    elif args.nonlin == "elu":
                        h = dy.elu(h)
                    elif args.nonlin == "selu":
                        h = dy.selu(h)
                    elif args.nonlin == "softsign":
                        h = dy.softsign(h)
                    elif args.nonlin == "swish":
                        h = dy.cmult(h, dy.logistic(h))
                final.append(dy.sum_dim(h, [0]))

            final = dy.concatenate(final)
            nll = -dy.log_softmax(final)
            dense_gold = []
            for i in range(len(options)):
                dense_gold.append(1.0 / len(gold) if i in gold else 0.0)
            answer = dy.inputTensor(dense_gold)
            loss = dy.transpose(answer) * nll
            predicted_link = np.argmax(final.npvalue())

            return loss, predicted_link
Beispiel #6
0
    def __init__(self, nmodel, qinfo, vw, init_example=True):
        SqaState.__init__(self, qinfo)
        self.path_score_expression = dt.scalarInput(0)
        self.score = 0
        self.nm = nmodel
        self.vw = vw
        self.H = dt.parameter(self.nm.pH)

        if init_example:
            UNK = self.vw.w2i["_UNK_"]

            # vectors of question words
            self.ques_emb = [
                self.nm.E[self.vw.w2i.get(w, UNK)]
                for w in self.qinfo.ques_word_sequence
            ]
            #self.ques_avg_emb = dt.average(self.ques_emb)
            #self.ques_emb = dt.concatenate_cols([self.nm.E[self.vw.w2i.get(w, UNK)] for w in self.qinfo.ques_word_sequence])

            # avg. vectors of column names
            self.headers_embs = []
            for colname_word_sequence in self.qinfo.headers_word_sequences:
                colname_emb = dt.average([
                    self.nm.E[self.vw.w2i.get(w, UNK)]
                    for w in colname_word_sequence
                ])
                self.headers_embs.append(colname_emb)

            # avg. vectors of table entries
            self.entries_embs = []
            for row_word_sequences in self.qinfo.entries_word_sequences:
                row_embs = []
                for cell_word_sequence in row_word_sequences:
                    row_embs.append(
                        dt.average([
                            self.nm.E[self.vw.w2i.get(w, UNK)]
                            for w in cell_word_sequence
                        ]))
                self.entries_embs.append(row_embs)

            self.NulW = dt.parameter(self.nm.NulW)
            self.ColW = dt.parameter(self.nm.ColW)
            self.SelColW = dt.parameter(self.nm.SelColW)
            self.SelColWhereW = dt.parameter(self.nm.SelColWhereW)

            # question LSTM
            f_init, b_init = [b.initial_state() for b in self.nm.builders]
            wembs = [
                self.nm.E[self.vw.w2i.get(w, UNK)]
                for w in self.qinfo.ques_word_sequence
            ]
            self.fw = [x.output() for x in f_init.add_inputs(wembs)]
            self.bw = [x.output() for x in b_init.add_inputs(reversed(wembs))]
            self.bw.reverse()
Beispiel #7
0
    def evaluate(self, input_sentences, labels):

        dy.renew_cg()

        self.word_rnn.disable_dropout()
        self.sent_rnn.disable_dropout()

        embed_sents = []

        for input_sentence in input_sentences:
            input_sentence = self._preprocess_input(input_sentence,
                                                    self.word_to_ix)
            #input_sentence = [self.word_to_ix['<start>']] + input_sentence + [self.word_to_ix['<end>']]

            embed_words = self._embed_sentence(input_sentence)
            word_rnn_outputs = self._run_rnn(self.word_rnn, embed_words)
            sent_embed = dy.average(word_rnn_outputs)
            embed_sents.append(sent_embed)

        rnn_outputs = self._run_rnn(self.sent_rnn, embed_sents)

        doc_output_w = dy.parameter(self.doc_output_w)
        doc_output_b = dy.parameter(self.doc_output_b)
        doc_output = dy.tanh(doc_output_w * dy.average(rnn_outputs) +
                             doc_output_b)

        probs = []
        sum_output = dy.zeros(self.args.sent_hidden_dim)
        pred_labels = []
        correct = 0
        total = 0
        loss = dy.zeros(1)
        for i, rnn_output in enumerate(rnn_outputs):

            abspos_embed = dy.lookup(self.abspos_embeddings, self.abspos_ix[i])
            relpos_embed = dy.lookup(self.relpos_embeddings, self.relpos_ix[i])

            prob = self._get_probs(rnn_output, doc_output, sum_output,
                                   abspos_embed, relpos_embed)
            sum_output += dy.cmult(prob, rnn_output)
            pred_label = self._predict(prob)
            pred_labels.append(pred_label)

            if pred_label == labels[i]:
                correct += 1
            total += 1

            if labels[i] == 1:
                loss -= dy.log(prob)
            else:
                loss -= dy.log(dy.scalarInput(1) - prob)

        return loss.value(), pred_labels, correct, total
Beispiel #8
0
    def train(self, trainning_set):
        loss_chunk = 0
        loss_all = 0
        total_chunk = 0
        total_all = 0
        losses = []
        for datapoint in trainning_set:
            query = datapoint[0]
            eq = dy.average([
                self.word_embeddings[self.w2i[w]]
                if w in self.w2i else self.word_embeddings[0] for w in query
            ])
            hyper = datapoint[1]
            eh = dy.average([
                self.word_embeddings[self.w2i[w]]
                if w in self.w2i else self.word_embeddings[0] for w in hyper
            ])
            t = dy.scalarInput(datapoint[2])
            Ps = []
            for i in range(self.k):
                Ps.append(self.Phis[i].expr() * eq)
            P = dy.transpose(dy.concatenate_cols(Ps))
            s = P * eh
            y = dy.reshape(dy.logistic(self.W.expr() * s + self.b.expr()),
                           (1, ))

            losses.append(dy.binary_log_loss(y, t))

            # process losses in chunks
            if len(losses) > 50:
                loss = dy.esum(losses)
                l = loss.scalar_value()
                loss.backward()
                self.trainer.update()
                dy.renew_cg()
                losses = []
                loss_chunk += l
                loss_all += l
                total_chunk += 1
                total_all += 1

        # consider any remaining losses
        if len(losses) > 0:
            loss = dy.esum(losses)
            loss.scalar_value()
            loss.backward()
            self.trainer.update()
            dy.renew_cg()
        print(f'loss: {loss_all/total_all:.4f}')
Beispiel #9
0
 def _calc_scores_embedded_mlp(self,
                               sentences,
                               W_emb,
                               W_mlp,
                               b_mlp,
                               V_mlp,
                               a_mlp,
                               meta_data=None):
     """
     calculating the score for a a NN network (in a specific state along learning phase)
     :param sentences: list
         list of lists of sentences (represented already as numbers and not letters)
     :param W_emb: lookup parameter (dynet obj). size: (emb_size x nwords)
         matrix holding the word embedding values
     :param W_mlp: model parameter (dynet obj). size: (hid_size, emb_size + meta_data_dim)
         matrix holding weights of the mlp phase
     :param b_mlp: model parameter (dynet obj). size: (hid_size,)
         vector holding weights of intercept for each hidden state
     :param V_mlp: model parameter (dynet obj). size: (2, hid_size)
         matrix holding weights of the logisitc regression phase. 2 is there due to the fact we are in a binary
         classification
     :param a_mlp: model parameter (dynet obj). size: (1,)
         intercept value for the logistic regression phase
     :param meta_data: dict or None
         meta data features for the model. If None - meta data is not used
     :return: dynet parameter. size: (2,)
         prediction of the instance to be a drawing one according to the model (vector of 2, first place is the
         probability to be a drawing team)
     """
     dy.renew_cg()
     # sentences_len = len(sentences)
     word_embs = [[dy.lookup(W_emb, w) for w in words]
                  for words in sentences]
     # taking the average over all words
     first_layer_avg = dy.average([dy.average(w_em) for w_em in word_embs])
     # case we don't wish to use meta features for the model
     if meta_data is None:
         h = dy.tanh((W_mlp * first_layer_avg) + b_mlp)
         prediction = dy.logistic((V_mlp * h) + a_mlp)
     else:
         meta_data_ordered = [
             value for key, value in sorted(meta_data.items())
         ]
         meta_data_vector = dy.inputVector(meta_data_ordered)
         first_layer_avg_and_meta_data = dy.concatenate(
             [first_layer_avg, meta_data_vector])
         h = dy.tanh((W_mlp * first_layer_avg_and_meta_data) + b_mlp)
         prediction = dy.logistic((V_mlp * h) + a_mlp)
     return prediction
Beispiel #10
0
 def get_hier_bilstm_avg(self,
                         input_seqs,
                         flstm1,
                         blstm1,
                         flstm2,
                         blstm2,
                         update_flag=True):
     seqreps = []
     for input_seq in input_seqs:
         seqreps.append(
             dy.average(
                 self.get_bilstm_all(input_seq, flstm1, blstm1,
                                     update_flag)))
     return dy.average(
         self.get_bilstm_all(seqreps, flstm2, blstm2, update_flag))
Beispiel #11
0
    def beam_train_max_margin_with_answer_guidence(self, init_state, gold_ans):
        # perform two beam search; one for prediction and the other for state action suff
        # max reward y = argmax(r(y)) with the help of gold_ans
        # max y' = argmax f(x,y) - R(y')
        # loss = max(f(x,y') - f(x,y) + R(y) - R(y') , 0)

        #end_state_list = self.beam_predict(init_state)
        end_state_list = self.beam_predict_max_violation(
            init_state, gold_ans)  # have to use this to make it work....
        reward_list = [x.reward(gold_ans) for x in end_state_list]
        violation_list = [
            s.path_score_expression.value() - reward
            for s, reward in zip(end_state_list, reward_list)
        ]

        best_score_state_idx = violation_list.index(max(
            violation_list))  # find the best scoring seq with minimal reward
        best_score_state = end_state_list[best_score_state_idx]
        best_score_state_reward = reward_list[best_score_state_idx]

        loss_value = 0

        if self.only_one_best:
            best_states = self.beam_find_actions_with_answer_guidence(
                init_state, gold_ans)
            if best_states == []:
                return 0, []
            best_reward_state = best_states[0]
            #print ("debug: found best_reward_state: qid =", best_reward_state.qinfo.seq_qid, best_reward_state)
            best_reward_state_reward = best_reward_state.reward(gold_ans)
            #print ("debug: best_reward_state_reward =", best_reward_state_reward)
            loss = dt.rectify(best_score_state.path_score_expression -
                              best_reward_state.path_score_expression +
                              dt.scalarInput(best_reward_state_reward -
                                             best_score_state_reward))
        else:
            best_states = self.beam_find_actions_with_answer_guidence(
                init_state, gold_ans)
            best_states_rewards = [s.reward(gold_ans) for s in best_states]
            max_reward = max(best_states_rewards)
            best_states = [
                s for s, r in zip(best_states, best_states_rewards)
                if r == max_reward
            ]
            loss = dt.average([
                dt.rectify(best_score_state.path_score_expression -
                           best_reward_state.path_score_expression +
                           dt.scalarInput(max_reward -
                                          best_score_state_reward))
                for best_reward_state in best_states
            ])

        loss_value = loss.value()
        loss.backward()

        self.neural_model.learner.update()

        #print ("debug: beam_train_max_margin_with_answer_guidence done. loss_value =", loss_value)

        return loss_value, best_states
Beispiel #12
0
    def learn(self, seq):
        dy.renew_cg()
        softmax_list, aux_softmax_list = self._predict(seq, runtime=False)
        losses = []
        for entry, softmax, aux_softmax in zip(seq, softmax_list,
                                               aux_softmax_list):
            # if self.tagset == 'upos':
            #    label_index = self.encodings.label2int[entry.upos]
            # elif self.tagset == 'xpos':
            #    label_index = self.encodings.label2int[entry.xpos]
            # else:
            #    label_index = self.encodings.label2int[entry.attrs]
            upos_index = self.encodings.upos2int[entry.upos]
            xpos_index = self.encodings.xpos2int[entry.xpos]
            attrs_index = self.encodings.attrs2int[entry.attrs]

            losses.append(-dy.log(dy.pick(softmax[0], upos_index)))
            losses.append(-dy.log(dy.pick(softmax[1], xpos_index)))
            losses.append(-dy.log(dy.pick(softmax[2], attrs_index)))
            losses.append(-dy.log(dy.pick(aux_softmax[0], upos_index)) *
                          (self.aux_softmax_weight / 3))
            losses.append(-dy.log(dy.pick(aux_softmax[1], xpos_index)) *
                          (self.aux_softmax_weight / 3))
            losses.append(-dy.log(dy.pick(aux_softmax[2], attrs_index)) *
                          (self.aux_softmax_weight / 3))

        loss = dy.average(losses)
        loss_val = loss.value()
        loss.backward()
        self.trainer.update()
        return loss_val
def internal_eval(batches,
                  transducer,
                  vocab,
                  previous_predicted_actions,
                  check_condition=True,
                  name='train'):

    then = time.time()
    print('evaluating on {} data...'.format(name))

    number_correct = 0.
    total_loss = 0.
    predictions = []
    pred_acts = []
    i = 0  # counter of samples
    for j, batch in enumerate(batches):
        dy.renew_cg()
        batch_loss = []
        for sample in batch:
            feats = sample.pos, sample.feats
            loss, prediction, predicted_actions = transducer.transduce(
                sample.lemma, feats, external_cg=True)
            ###
            predictions.append(prediction)
            pred_acts.append(predicted_actions)
            batch_loss.extend(loss)

            # evaluation
            correct_prediction = False
            if (prediction in vocab.word
                    and vocab.word.w2i[prediction] == sample.word):
                correct_prediction = True
                number_correct += 1

            if check_condition:
                # display prediction for this sample if it differs the prediction
                # of the previous epoch or its an error
                if predicted_actions != previous_predicted_actions[
                        i] or not correct_prediction:
                    #
                    print(
                        'BEFORE:    ',
                        datasets.action2string(previous_predicted_actions[i],
                                               vocab))
                    print('THIS TIME: ',
                          datasets.action2string(predicted_actions, vocab))
                    print('TRUE:      ', sample.act_repr)
                    print('PRED:      ', prediction)
                    print('WORD:      ', sample.word_str)
                    print('X' if correct_prediction else 'V')
            # increment counter of samples
            i += 1
        batch_loss = -dy.average(batch_loss)
        total_loss += batch_loss.scalar_value()
        # report progress
        if j > 0 and j % 100 == 0: print('\t\t...{} batches'.format(j))

    accuracy = number_correct / i
    print('\t...finished in {:.3f} sec'.format(time.time() - then))
    return accuracy, total_loss, predictions, pred_acts
Beispiel #14
0
    def decode(self, tokens, constraints=[], train_mode=False):
        loss = 0
        errs = []

        fr_vecs = [self.special[0]] + [t.vecs[self.vec_key] for t in tokens]
        to_vecs = [self.special[1]] + [t.vecs[self.vec_key] for t in tokens]
        score_mat = self.biaffine.attend(fr_vecs, to_vecs)
        scores = score_mat.npvalue()

        if train_mode:
            oids = [0] + [t['original_id'] for t in tokens]
            gold_path = np.argsort(oids).tolist() + [0]
            trans_mat = dy.transpose(score_mat)
            for i, j in zip(gold_path, gold_path[1:]):
                errs.append(dy.hinge(score_mat[i], j))
                errs.append(dy.hinge(trans_mat[j], i))
            if errs:
                loss = dy.average(errs)

        costs = (1000 * (scores.max() - scores)).astype(int).tolist()
        solution = solve_tsp(costs, constraints,
                             self.args.guided_local_search)  # first is best
        if not solution:
            # self.log('no solution, remove constraints')
            solution = solve_tsp(costs, [], self.args.guided_local_search)

        assert solution != []
        seq = [tokens[i - 1] for i in solution[1:-1]]

        return {'loss': loss, 'seq': seq}
Beispiel #15
0
 def il_training_batch_update(batch, *args):
     # How to update model parameters from
     # a batch of training samples with il training?
     dy.renew_cg()
     epoch = args[0]
     e = 1 - decay(epoch-pretrain_epochs) if epoch >= pretrain_epochs else 0.
     if verbose and e: print 'Sampling probability = {:.3f}'.format(e)
     batch_loss = []
     for sample in batch:
         feats = sample.pos, sample.feats
         # @TODO This will fail if a target character has never been seen
         # in lemmas and parameter tying is not used!
         loss, prediction, predicted_actions = self.transducer.transduce(
             lemma=sample.lemma,
             feats=feats,
             oracle_actions={'loss'               : loss_expression,
                             'rollout_mixin_beta' : rollout_mixin_beta,
                             'global_rollout'     : global_rollout,
                             'target_word'        : sample.actions,
                             # support for legacy, buggy experiments
                             'optimal'            : optimal_oracle,
                             'bias_inserts'       : bias_inserts},
             sampling=e,
             external_cg=True,
             verbose=verbose)
         batch_loss.extend(loss)
     batch_loss = -dy.average(batch_loss)
     if l2: batch_loss += l2 * self.transducer.l2_norm(with_embeddings=False)
     loss = batch_loss.scalar_value()  # forward
     batch_loss.backward()             # backward
     self.trainer.update()
     return loss
Beispiel #16
0
    def bow_snippets(self, token, snippets=None):
        """ Bag of words embedding for snippets"""
        if snippet_handler.is_snippet(token):
            """
            Somehow in this part the program goes wrong in the server.(OK in my own computer)
            Phenomenon: token is predicted to be a snippet, and wrongly goes into this branch.
            Just ignore the assertion error.
            """
            try:
                assert snippets
            except:
                return self(token)
            snippet_sequence = []
            for snippet in snippets:
                if snippet.name == token:
                    snippet_sequence = snippet.sequence
                    break
            assert snippet_sequence

            snippet_embeddings = [self(subtoken)
                                  for subtoken in snippet_sequence]

            return dy.average(snippet_embeddings)
        else:
            return self(token)
    def _predict(self, batch, train=True):

        # load the network parameters
        W_hid = dy.parameter(self.W_hid)
        b_hid = dy.parameter(self.b_hid)
        w_clf = dy.parameter(self.w_clf)
        b_clf = dy.parameter(self.b_clf)

        probas = []
        # predict the probability of positive sentiment for each sentence
        for _, sent in batch:

            sent_embed = [dy.lookup(self.embed, w) for w in sent]
            sent_embed = dy.average(sent_embed)

            # hid = tanh(b + W * sent_embed)
            # but it's faster to use affine_transform in dynet
            hid = dy.affine_transform([b_hid, W_hid, sent_embed])
            hid = dy.tanh(hid)

            y_score = dy.affine_transform([b_clf, w_clf, hid])
            y_proba = dy.logistic(y_score)
            probas.append(y_proba)

        return probas
Beispiel #18
0
def adapt_user(s2s, trainer, train_src, train_trg, test_src, opt):
    timer = utils.Timer()
    log = utils.Logger(opt.verbose)
    n_train = len(train_src)
    n_tokens = (sum(map(len, train_trg)) - len(train_trg))
    # Train for n_iter
    timer.restart()
    best_ppl = np.inf
    for epoch in range(opt.num_epochs):
        timer.tick()
        dy.renew_cg()
        losses = []
        # Add losses for all samples
        for x, y in zip(train_src, train_trg):
            losses.append(
                s2s.calculate_user_loss([x], [y], [0],
                                        update_mode=opt.update_mode))
        loss = dy.average(losses)
        # Backward + update
        loss.backward()
        trainer.update()
        # Print loss etc...
        train_loss = loss.value() / n_tokens
        train_ppl = np.exp(train_loss)
        trainer.status()
        elapsed = timer.tick()
        log.info(" Training_loss=%f, ppl=%f, time=%f s, tok/s=%.1f" %
                 (train_loss, train_ppl, elapsed, n_tokens / elapsed))
        if train_ppl < best_ppl:
            best_ppl = train_ppl
            translations = evaluate_model(s2s, test_src, opt.beam_size)
        else:
            log.info("Early stopping after %d iterations" % (epoch + 1))
            break
    return translations
Beispiel #19
0
def sent_rep(sent):
    tokens = w_tokenizer(sent)
    tokens_rep_list = []
    for token in tokens:
        tokens_rep_list.append(token_lookup(token))
    
    return dy.average(tokens_rep_list)
Beispiel #20
0
    def __call__(self, H,is_train=True):
        """

        :param xs: a list of ngrams (or words if win is set to 1)
        :return: embeddings looked from tables
        """
        
        seq_len = len(H)
        if is_train:
            # in the training phase, perform dropout
            W1 = dy.dropout(self.W1, self.dropout_rate)
            W2 = dy.dropout(self.W2, self.dropout_rate)
        else:
            W1= self.W1
            W2 = self.W2  
        
        pool= dy.average(H)
           
        aspect_attentions = []
        Weights=[]
        for t in range(seq_len):
            ht = H[t]
            scores = dy.tanh(dy.transpose(ht)*W1*pool+self.bd)
#             print(scores.value())
            Weights.append(scores.value() )
            ht_hat=dy.cmult(dy.softmax(scores),ht)
#             print(ht_hat.value())
            aspect_attentions.append(ht_hat)
        
        Weights_np=[]
        return aspect_attentions,Weights_np      
Beispiel #21
0
 def _loss(outputs, labels):
     losses = [
         dy.pickneglogsoftmax_batch(out, label)
         for out, label in zip(outputs, labels)
     ]
     loss = dy.mean_batches(dy.average(losses))
     return loss
Beispiel #22
0
 def compose(
         self, embeds: Union[dy.Expression,
                             List[dy.Expression]]) -> dy.Expression:
     if type(embeds) != list:
         return dy.mean_batches(embeds)
     else:
         return dy.average(embeds)
Beispiel #23
0
    def _step(self,
              loader,
              update,
              log,
              reporting_fns,
              verbose=None,
              output=None,
              txts=None):
        steps = len(loader)
        pg = create_progress_bar(steps)
        cm = ConfusionMatrix(self.labels)
        epoch_loss = 0
        epoch_div = 0
        preds, losses, ys = [], [], []
        dy.renew_cg()
        for i, batch_dict in enumerate(pg(loader), 1):
            inputs = self.model.make_input(batch_dict)
            y = inputs.pop('y')
            pred = self.model.forward(inputs)
            preds.append(pred)
            loss = self.model.loss(pred, y)
            losses.append(loss)
            ys.append(y)
            if i % self.autobatchsz == 0:
                loss = dy.average(losses)
                preds = dy.concatenate_cols(preds)
                batchsz = len(losses)
                lossv = loss.npvalue().item() * batchsz
                epoch_loss += lossv
                epoch_div += batchsz
                _add_to_cm(cm, np.array(ys), preds.npvalue())
                update(loss)
                log(self.optimizer.global_step, lossv, batchsz, reporting_fns)
                preds, losses, ys = [], [], []
                dy.renew_cg()
        loss = dy.average(losses)
        preds = dy.concatenate_cols(preds)
        batchsz = len(losses)
        epoch_loss += loss.npvalue().item() * batchsz
        epoch_div += batchsz
        _add_to_cm(cm, np.array(ys), preds.npvalue())
        update(loss)

        metrics = cm.get_all_metrics()
        metrics['avg_loss'] = epoch_loss / float(epoch_div)
        verbose_output(verbose, cm)
        return metrics
def average_pooling(encoded_sequence):
    averages = []
    for col in range(encoded_sequence[0].dim()[0][0]):
        avg = []
        for row in range(len(encoded_sequence)):
            avg.append(encoded_sequence[row][col])
        averages.append(dy.average(avg))
    return dy.concatenate(averages)
Beispiel #25
0
 def create_network_return_best(self, x):
     dy.renew_cg()
     emb_vectors = [self.lookup[self.corpus.get(item, len(self.corpus))] for item in x]
     calc_avg = dy.average(emb_vectors)
     emb_vectors_mean = dy.reshape(calc_avg, (1, self.dim))
     z1 = (emb_vectors_mean * self._pW1) + self._pB1
     a1 = dy.tanh(z1)
     net_output = dy.softmax(dy.reshape((a1 * self._kW1) + self._kB1, (self.numClasses,)))
     return np.argmax(net_output.npvalue())
Beispiel #26
0
    def score_expression(self, qwVecs, numWdPos):
        if numWdPos == 0:
            kwVec = qwVecs[numWdPos + 1]
        elif numWdPos == 1:
            kwVec = qwVecs[0]
        else:
            kwVec = dt.average(qwVecs[numWdPos - 2:numWdPos])

        ret = dt.dot_product(kwVec, self.OpW)
        return ret
 def calc_errors(self, batch: List[Tuple]):
     dy.renew_cg()
     errors_exp = dy.concatenate([
         dy.average(list(self.model.forward(_in, _out)))
         for _in, _out in batch
     ])
     errors = errors_exp.value()
     if len(batch) == 1:
         errors = [errors]
     return np.array(errors)
Beispiel #28
0
    def __init__(self, nmodel, qinfo, vw, init_example=True):
        SqaState.__init__(self, qinfo)
        self.path_score_expression = dt.scalarInput(0)
        self.score = 0
        self.nm = nmodel
        self.vw = vw

        if init_example:
            UNK = self.vw.w2i["_UNK_"]
            self.ques_word_sequence = self.qinfo.ques_word_sequence()

            # vectors of question words
            #self.ques_emb = [self.nm.E[self.vw.w2i.get(w, UNK)] for w in self.ques_word_sequence]
            self.ques_emb = dt.concatenate_cols([
                self.nm.E[self.vw.w2i.get(w, UNK)]
                for w in self.ques_word_sequence
            ])
            #self.ques_avg_emb = dt.average(self.ques_emb)

            # avg. vectors of column names
            self.headers_embs = []
            for colname_word_sequence in self.qinfo.headers_word_sequences():
                colname_emb = dt.average([
                    self.nm.E[self.vw.w2i.get(w, UNK)]
                    for w in colname_word_sequence
                ])
                self.headers_embs.append(colname_emb)

            # avg. vectors of table entries
            self.entries_embs = []
            for row_word_sequences in self.qinfo.entries_word_sequences():
                row_embs = []
                for cell_word_sequence in row_word_sequences:
                    row_embs.append(
                        dt.average([
                            self.nm.E[self.vw.w2i.get(w, UNK)]
                            for w in cell_word_sequence
                        ]))
                self.entries_embs.append(row_embs)

            self.R = dt.parameter(self.nm.R)
            self.NulW = dt.parameter(self.nm.NulW)
Beispiel #29
0
 def test(sqnorm_original_value, assert_equal):
     dy.renew_cg()
     inputs = make_inputs()
     avg = dy.average(common.get_bilstm_all(inputs, flstm, blstm))
     sqnorm = dy.squared_norm(avg)
     if assert_equal:
         self.assertAlmostEqual(sqnorm_original_value, sqnorm.value(),
                                places=10)
     else:
         self.assertNotAlmostEqual(sqnorm_original_value, sqnorm.value(),
                                   places=10)
Beispiel #30
0
 def calc_loss(self, src, trg, loss_calculator):
   sub_losses = collections.defaultdict(list)
   for model in self.models:
     for loss_name, loss in model.calc_loss(src, trg, loss_calculator).loss_values.items():
       sub_losses[loss_name].append(loss)
   model_loss = LossBuilder()
   for loss_name, losslist in sub_losses.items():
     # TODO: dy.average(losslist)  _or_  dy.esum(losslist) / len(self.models) ?
     #       -- might not be the same if not all models return all losses
     model_loss.add_loss(loss_name, dy.average(losslist))
   return model_loss
Beispiel #31
0
def test():
    correct = 0
    dy.renew_cg()
    losses = []
    for lbl, img in test_data:
        losses.append(network.create_network_return_loss(img, lbl, dropout=False))
        if lbl == network.create_network_return_best(img, dropout=False):
            correct += 1
    mbloss = dy.average(losses)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
            mbloss.value(), correct, len(test_data),
            100. * correct / len(test_data)))
Beispiel #32
0
    def _step(self, loader, update, log, reporting_fns, verbose=None):
        steps = len(loader)
        pg = create_progress_bar(steps)
        cm = ConfusionMatrix(self.labels)
        epoch_loss = 0
        epoch_div = 0
        preds, losses, ys = [], [], []
        dy.renew_cg()
        for i, batch_dict in enumerate(pg(loader), 1):
            inputs = self.model.make_input(batch_dict)
            y = inputs.pop('y')
            pred = self.model.forward(inputs)
            preds.append(pred)
            loss = self.model.loss(pred, y)
            losses.append(loss)
            ys.append(y)
            if i % self.autobatchsz == 0:
                loss = dy.average(losses)
                preds = dy.concatenate_cols(preds)
                batchsz = len(losses)
                lossv = loss.npvalue().item() * batchsz
                epoch_loss += lossv
                epoch_div += batchsz
                _add_to_cm(cm, np.array(ys), preds.npvalue())
                update(loss)
                log(self.optimizer.global_step, lossv, batchsz, reporting_fns)
                preds, losses, ys = [], [], []
                dy.renew_cg()
        loss = dy.average(losses)
        preds = dy.concatenate_cols(preds)
        batchsz = len(losses)
        epoch_loss += loss.npvalue().item() * batchsz
        epoch_div += batchsz
        _add_to_cm(cm, np.array(ys), preds.npvalue())
        update(loss)

        metrics = cm.get_all_metrics()
        metrics['avg_loss'] = epoch_loss / float(epoch_div)
        verbose_output(verbose, cm)
        return metrics
Beispiel #33
0
def train(epoch):
    random.shuffle(train_data)
    i = 0
    epoch_start = time.time()
    while i < len(train_data):
        dy.renew_cg()
        losses = []
        for lbl, img in train_data[i:i+args.batch_size]:
            loss = network.create_network_return_loss(img, lbl, dropout=True)
            losses.append(loss)
        mbloss = dy.average(losses)
        if (int(i/args.batch_size)) % args.log_interval == 0:
                        print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                                epoch, i, len(train_data),
                                100. * i/len(train_data), mbloss.value()))
        mbloss.backward()
        trainer.update()
        i += args.batch_size
    epoch_end = time.time()
    print("{} s per epoch".format(epoch_end-epoch_start))
Beispiel #34
0
 def _loss(outputs, labels):
     losses = [dy.pickneglogsoftmax_batch(out, label) for out, label in zip(outputs, labels)]
     loss = dy.mean_batches(dy.average(losses))
     return loss
Beispiel #35
0
    def _train(self, ts, **kwargs):
        self.model.train = True
        reporting_fns = kwargs.get('reporting_fns', [])
        epoch_loss = 0
        epoch_norm = 0
        auto_norm = 0
        metrics = {}
        steps = len(ts)
        last = steps
        losses = []
        i = 1
        pg = create_progress_bar(steps)
        dy.renew_cg()
        for batch_dict in pg(ts):

            inputs = self.model.make_input(batch_dict)
            y = inputs.pop('y')
            pred = self.model.compute_unaries(inputs)
            bsz = self._get_batchsz(y)
            if self.autobatchsz is None:
                losses = self.model.loss(pred, y)
                loss = dy.mean_batches(losses)
                lossv = loss.npvalue().item()
                report_loss = lossv * bsz
                epoch_loss += report_loss
                epoch_norm += bsz
                self.nstep_agg += report_loss
                self.nstep_div += bsz
                loss.backward()
                self.optimizer.update()
                dy.renew_cg()
                # TODO: Abstract this somewhat, or else once we have a batched tagger have 2 trainers
                if (self.optimizer.global_step + 1) % self.nsteps == 0:
                    metrics = self.calc_metrics(self.nstep_agg, self.nstep_div)
                    self.report(
                        self.optimizer.global_step + 1, metrics, self.nstep_start,
                        'Train', 'STEP', reporting_fns, self.nsteps
                    )
                    self.reset_nstep()
            else:
                loss = self.model.loss(pred, y)
                losses.append(loss)
                self.nstep_div += bsz
                epoch_norm += bsz
                auto_norm += bsz

                if i % self.autobatchsz == 0 or i == last:
                    loss = dy.average(losses)
                    lossv = loss.npvalue().item()
                    loss.backward()
                    self.optimizer.update()
                    report_loss = lossv * auto_norm
                    epoch_loss += report_loss
                    self.nstep_agg += report_loss
                    losses = []
                    dy.renew_cg()
                    if (self.optimizer.global_step + 1) % self.nsteps == 0:
                        metrics = self.calc_metrics(self.nstep_agg, self.nstep_div)
                        self.report(
                            self.optimizer.global_step + 1, metrics, self.nstep_start,
                            'Train', 'STEP', reporting_fnsa, self.nsteps
                        )
                        self.reset_nstep()
                    auto_norm = 0
            i += 1

        metrics = self.calc_metrics(epoch_loss, epoch_norm)
        return metrics