def train(self, trainning_set):
        for sentence, eid, entity, trigger, label, pos, chars, rule in trainning_set:
            features = self.encode_sentence(sentence, pos, chars)
            loss = []            

            entity_embeds = features[entity]

            attention, context = self.self_attend(features)
            ty = dy.vecInput(len(sentence))
            ty.set([0 if i!=trigger else 1 for i in range(len(sentence))])
            loss.append(dy.binary_log_loss(dy.reshape(attention,(len(sentence),)), ty))
            h_t = dy.concatenate([context, entity_embeds])
            hidden = dy.tanh(self.lb.expr() * h_t + self.lb_bias.expr())
            out_vector = dy.reshape(dy.logistic(self.lb2.expr() * hidden + self.lb2_bias.expr()), (1,))
            label = dy.scalarInput(label)
            loss.append(dy.binary_log_loss(out_vector, label))

            pres = [0]
            for pattern in rule:
                probs = self.decoder(features, pres)
                loss.append(-dy.log(dy.pick(probs, pattern)))
                pres.append(pattern)

            loss = dy.esum(loss)
            loss.backward()
            self.trainer.update()
            dy.renew_cg()
Beispiel #2
0
def __train(model, data):
    tagged_loss = 0
    untagged_loss = 0
    for index, sentence_report in enumerate(data):
        for phrase in sentence_report.all_phrases:
            loss = None
            encoded_phrase = __encode_sequence(model, phrase)

            if model.options.external_info != "no_info":
                encoded_phrase = dy.concatenate(
                    [encoded_phrase, model.doclookup[sentence_report.app_id]])

            y_pred = dy.logistic((model.mlp_w * encoded_phrase) + model.mlp_b)

            if sentence_report.mark:
                loss = dy.binary_log_loss(y_pred, dy.scalarInput(1))
            else:
                loss = dy.binary_log_loss(y_pred, dy.scalarInput(0))

            if sentence_report.mark:
                tagged_loss += loss.scalar_value() / (index + 1)
            else:
                untagged_loss += loss.scalar_value() / (index + 1)
            loss.backward()
            model.trainer.update()
            dy.renew_cg()
Beispiel #3
0
    def __train(self, data):
        def encode_sequence(seq):
            rnn_forward = self.phrase_rnn[0].initial_state()
            for entry in seq:
                vec = self.wlookup[int(self.w2i.get(entry, 0))]
                rnn_forward = rnn_forward.add_input(vec)
            return rnn_forward.output()
        tagged_loss = 0
        untagged_loss = 0
        for index, sentence_report in enumerate(data):
            for phrase in sentence_report.all_phrases:
                loss = None
                encoded_phrase = encode_sequence(phrase)
                y_pred = dy.logistic((self.mlp_w*encoded_phrase) + self.mlp_b)

                if sentence_report.mark:
                    loss = dy.binary_log_loss(y_pred, dy.scalarInput(1))
                else:
                    loss = dy.binary_log_loss(y_pred, dy.scalarInput(0))
                if index % 1000 == 0:
                    print("Description : {}".format(index+1))
                    print("Marked {} Prediction Result {} : ".format(sentence_report.mark, y_pred.scalar_value()))
                    print("Tagged loss {} Untagged Loss {} Total loss {}".format(tagged_loss, untagged_loss, tagged_loss+untagged_loss))

                if sentence_report.mark:
                    tagged_loss += loss.scalar_value()/(index+1)
                else:
                    untagged_loss += loss.scalar_value()/(index+1)
                loss.backward()
                self.trainer.update()
                dy.renew_cg()
def train_item(args, model, sentence):
    loss = None
    seq = [
        model.wlookup[int(model.w2i.get(entry, 0))]
        for entry in sentence.preprocessed_sentence
    ]
    if len(seq) > 0:
        encoded_sequence = encode_sequence(model, seq, model.sentence_rnn)
        last_output = encoded_sequence[-1]
        global_max = max_pooling(encoded_sequence)
        global_min = average_pooling(encoded_sequence)
        context = dy.concatenate([last_output, global_max, global_min])
        y_pred = dy.logistic((model.mlp_w * context) + model.mlp_b)

        if sentence.permissions[args.permission_type]:
            loss = dy.binary_log_loss(y_pred, dy.scalarInput(1))
        else:
            loss = dy.binary_log_loss(y_pred, dy.scalarInput(0))

        loss.backward()
        model.trainer.update()
        loss_val = loss.scalar_value()
        dy.renew_cg()
        return loss_val
    return 0
Beispiel #5
0
    def learn(self, seq):
        output, proj_x3 = self._predict(seq, runtime=False)

        # arcs
        for iSrc in range(len(seq)):
            for iDst in range(len(seq)):
                if iDst > iSrc:
                    o = output[iSrc][iDst]  # the softmax portion
                    t = get_link(seq, iSrc, iDst)
                    # if t==1:
                    # self.losses.append(-dy.log(dy.pick(o, t)))
                    self.losses.append(dy.binary_log_loss(
                        o, dy.scalarInput(t)))

        # labels
        gs_chains, labels = self._get_gs_chains(seq)

        for chain, label in zip(gs_chains, labels):
            label_rnn = self.label_decoder.initial_state()
            for index in chain:
                label_rnn = label_rnn.add_input(proj_x3[index])
            label_softmax = dy.softmax(
                self.label_w.expr(update=True) * label_rnn.output() +
                self.label_b.expr(update=True))
            self.losses.append(-dy.log(
                dy.pick(label_softmax, self.encodings.label2int[label])))
Beispiel #6
0
def run_model():
    data_in = np.array([[0, 0, 1, 1], [0, 1, 0, 1]])
    data_out = np.array([0, 1, 1, 0]).reshape(1, 4)

    m = dy.Model()
    sgd = dy.SimpleSGDTrainer(m, 0.5)

    W = m.add_parameters((2, 2))
    b = m.add_parameters(2)

    V = m.add_parameters((1, 2))
    a = m.add_parameters(1)

    errors = []
    for iter in range(ITERATIONS):
        dy.renew_cg()

        x = dy.inputTensor(data_in, batched=True)
        y = dy.inputTensor(data_out)

        h = dy.logistic(W * x + b)
        y_pred = dy.logistic((V * h) + a)
        y_pred = dy.reshape(y_pred, y.dim()[0])
        loss = dy.binary_log_loss(y_pred, y)

        errors.append(loss.scalar_value() / 4)
        loss.backward()
        sgd.update()
    return errors
Beispiel #7
0
def loss_function(recon_x, x, mu, logvar):
    BCE = dy.binary_log_loss(recon_x, x)  # equiv to torch.nn.functional.binary_cross_entropy(?,?, size_average=False)
    # see Appendix B from VAE paper:
    # Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014
    # https://arxiv.org/abs/1312.6114
    # 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
    KLD = -0.5 * dy.sum_elems(1 + logvar - dy.pow(mu, dy.scalarInput(2)) - dy.exp(logvar))

    return BCE + KLD
def create_xor_network(pW, pV, pb, inputs, expected_answer):
    dy.renew_cg()  # new computation graph
    W = dy.parameter(pW)  # add parameters to graph as expressions
    V = dy.parameter(pV)
    b = dy.parameter(pb)
    x = dy.vecInput(len(inputs))
    x.set(inputs)
    y = dy.scalarInput(expected_answer)
    output = dy.logistic(V * (dy.tanh((W * x) + b)))
    loss = dy.binary_log_loss(output, y)
    return loss
def train_item(args, model, document):
    loss = None
    word_lookups = []
    for preprocessed_sentence in document.preprocessed_sentences:
        seq = [
            model.wlookup[int(model.w2i.get(entry, 0))]
            for entry in preprocessed_sentence
        ]
        if len(seq) > 0:
            word_lookups.append(seq)

    sentences_lookups = []
    for seq in word_lookups:
        sentence_encode = encode_sequence(model, seq, model.sentence_rnn)
        global_max = max_pooling(sentence_encode)
        global_min = average_pooling(sentence_encode)
        if len(sentence_encode) > 0:
            last_out = sentence_encode[-1]
            context = dy.concatenate([last_out, global_max, global_min])
            sentences_lookups.append(context)

    document_encode = encode_sequence(model, sentences_lookups,
                                      model.document_rnn)
    global_max = max_pooling(document_encode)
    global_min = average_pooling(document_encode)
    if len(document_encode) > 0:
        last_out = sentence_encode[-1]
        context = dy.concatenate([last_out, global_max, global_min])
        y_pred = dy.logistic((model.mlp_w * context) + model.mlp_b)

        if document.permissions[args.permission_type]:
            loss = dy.binary_log_loss(y_pred, dy.scalarInput(1))
        else:
            loss = dy.binary_log_loss(y_pred, dy.scalarInput(0))

        loss.backward()
        model.trainer.update()
        loss_val = loss.scalar_value()
        dy.renew_cg()
        return loss_val
    return 0
def train_item(args, model, sentence):
    loss = None
    seq = [
        model.wlookup[int(model.w2i.get(entry, 0))]
        for entry in sentence.preprocessed_sentence
    ]
    if len(seq) > 0:
        encoded_sequence = encode_sequence(model, seq, model.sentence_rnn)
        global_max = max_pooling(encoded_sequence)
        global_min = average_pooling(encoded_sequence)
        if len(encoded_sequence) > 0:
            att_mlp_outputs = []
            for e in encoded_sequence:
                mlp_out = (model.attention_w * e) + model.attention_b
                att_mlp_outputs.append(mlp_out)

            lst = []
            for o in att_mlp_outputs:
                lst.append(dy.exp(dy.sum_elems(dy.cmult(o,
                                                        model.att_context))))

            sum_all = dy.esum(lst)

            probs = [dy.cdiv(e, sum_all) for e in lst]
            att_context = dy.esum(
                [dy.cmult(p, h) for p, h in zip(probs, encoded_sequence)])
            context = dy.concatenate([att_context, global_max, global_min])
            #context = dy.concatenate([att_context])
            y_pred = dy.logistic((model.mlp_w * context) + model.mlp_b)

            if sentence.permissions[args.permission_type]:
                loss = dy.binary_log_loss(y_pred, dy.scalarInput(1))
            else:
                loss = dy.binary_log_loss(y_pred, dy.scalarInput(0))

            loss.backward()
            model.trainer.update()
            loss_val = loss.scalar_value()
            dy.renew_cg()
            return loss_val
    return 0
Beispiel #11
0
def loss_function(recon_x, x, mu, logvar):
    BCE = dy.binary_log_loss(
        recon_x, x
    )  # equiv to torch.nn.functional.binary_cross_entropy(?,?, size_average=False)
    # see Appendix B from VAE paper:
    # Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014
    # https://arxiv.org/abs/1312.6114
    # 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
    KLD = -0.5 * dy.sum_elems(1 + logvar - dy.pow(mu, dy.scalarInput(2)) -
                              dy.exp(logvar))

    return BCE + KLD
Beispiel #12
0
def create_xor_network(mW, mb, mV, input, expected_output):
    # For each input, create the computational graph and get the loss
    dy.renew_cg()
    W = dy.parameter(mW)
    b = dy.parameter(mb)
    V = dy.parameter(mV)
    x = dy.vecInput(len(input))
    x.set(input)
    y = dy.scalarInput(expected_output)
    graph_output = dy.logistic(V*(dy.tanh(W*x+b)))
    loss = dy.binary_log_loss(graph_output, y)
    return loss
Beispiel #13
0
def create_network(pWeight, inputs, expected_answer):
    # new computation graph
    dy.renew_cg()

    # add parameters to graph as expressions
    Weight = dy.parameter(pWeight)
    input_dy = dy.vecInput(len(inputs))
    input_dy.set(inputs)
    target_output = dy.scalarInput(expected_answer)
    output = dy.logistic(dy.tanh(Weight * input_dy))
    loss = dy.binary_log_loss(output, target_output)
    return loss
Beispiel #14
0
    def batch_loss(self, sents, train=True):
        probas = self._predict(sents, train)

        # we pack all predicted probas into one vector of length batch_size
        probas = dy.concatenate(probas)

        # we make a dynet vector out of the true ys
        y_true = dy.inputVector([y for y, _ in sents])

        # classification loss: we use the logistic loss
        # this function automatically sums over all entries.
        total_loss = dy.binary_log_loss(probas, y_true)

        return total_loss
Beispiel #15
0
    def get_loss(self, article_X, article_Y):
        dy.renew_cg()
        input_seqs_X = [[self.common.get_wemb(w) for w in sent]
                        for sent in article_X]
        rep = self.get_article_rep(input_seqs_X)
        on_probs = dy.logistic(rep)

        answers_numpy = np.zeros(len(self._w2i))
        present = {self._w2i[w]: True for sent in article_Y for w in sent}
        for i in present:
            answers_numpy[i] = 1.0
        answers = dy.inputTensor(answers_numpy)
        loss = dy.binary_log_loss(on_probs, answers)
        return loss
Beispiel #16
0
    def train(self, trainning_set):
        loss_chunk = 0
        loss_all = 0
        total_chunk = 0
        total_all = 0
        losses = []
        for datapoint in trainning_set:
            query = datapoint[0]
            eq = dy.average([
                self.word_embeddings[self.w2i[w]]
                if w in self.w2i else self.word_embeddings[0] for w in query
            ])
            hyper = datapoint[1]
            eh = dy.average([
                self.word_embeddings[self.w2i[w]]
                if w in self.w2i else self.word_embeddings[0] for w in hyper
            ])
            t = dy.scalarInput(datapoint[2])
            Ps = []
            for i in range(self.k):
                Ps.append(self.Phis[i].expr() * eq)
            P = dy.transpose(dy.concatenate_cols(Ps))
            s = P * eh
            y = dy.reshape(dy.logistic(self.W.expr() * s + self.b.expr()),
                           (1, ))

            losses.append(dy.binary_log_loss(y, t))

            # process losses in chunks
            if len(losses) > 50:
                loss = dy.esum(losses)
                l = loss.scalar_value()
                loss.backward()
                self.trainer.update()
                dy.renew_cg()
                losses = []
                loss_chunk += l
                loss_all += l
                total_chunk += 1
                total_all += 1

        # consider any remaining losses
        if len(losses) > 0:
            loss = dy.esum(losses)
            loss.scalar_value()
            loss.backward()
            self.trainer.update()
            dy.renew_cg()
        print(f'loss: {loss_all/total_all:.4f}')
Beispiel #17
0
 def train(self, tweets):
     loss_all = 0
     total_all = 0
     start_all = time.time()
     for i, tweet in enumerate(tweets):
         v = self.extract_features(tweet)
         output = self.forward(v)
         gold = tweet.emotions
         loss = dy.binary_log_loss(output, dy.inputTensor(gold))
         loss_all += loss.npvalue()[0]
         total_all += 1
         loss.scalar_value()
         loss.backward()
         self.trainer.update()
         dy.renew_cg()
     end = time.time()
     print(f'loss: {loss_all/total_all:.4f}\ttime: {end-start_all:,.2f} secs')
    def compute_loss(self, pred):
        """Adds Ops for the loss function to the computational graph.
        In this case we are using cross entropy loss.
        The loss should be averaged over all examples in the current minibatch.

        Args:
            pred: A tensor of shape (batch_size, n_classes) containing the output of the neural
                  network before the softmax layer.
        Returns:
            loss: A 0-d tensor (scalar)
        """
        ### YOUR CODE HERE
        y = dy.inputTensor(np.transpose(self.labels), batched=True)

        losses = dy.binary_log_loss(pred, y)
        loss = dy.sum_batches(losses) / self.config.batch_size
        ### END YOUR CODE
        return loss
def calc_sent_loss(sent):
  # Create a computation graph
  dy.renew_cg()

  
  # Get embeddings for the sentence
  emb = [W_w_p[x] for x in sent]

  # Step through the sentence and calculate binary prediction losses
  all_losses = [] 
  for i, my_emb in enumerate(emb):
    scores = dy.logistic(W_c * my_emb)
    pos_words = ([sent[x] if x >= 0 else S for x in range(i-N,i)] +
                 [sent[x] if x < len(sent) else S for x in range(i+1,i+N+1)])
    word_repr = [[float(y) for y in np.binary_repr(x).zfill(nbits)] for x in pos_words]
    word_repr = [dy.inputVector(x) for x in word_repr]
    all_losses.extend([dy.binary_log_loss(scores, x) for x in word_repr])
  return dy.esum(all_losses)
def calc_sent_loss(sent):
    # Create a computation graph
    dy.renew_cg()

    # Get embeddings for the sentence
    emb = [W_w_p[x] for x in sent]

    # Step through the sentence and calculate binary prediction losses
    all_losses = []
    for i, my_emb in enumerate(emb):
        scores = dy.logistic(W_c * my_emb)
        pos_words = (
            [sent[x] if x >= 0 else S for x in range(i - N, i)] +
            [sent[x] if x < len(sent) else S for x in range(i + 1, i + N + 1)])
        word_repr = [[float(y) for y in np.binary_repr(x).zfill(nbits)]
                     for x in pos_words]
        word_repr = [dy.inputVector(x) for x in word_repr]
        all_losses.extend([dy.binary_log_loss(scores, x) for x in word_repr])
    return dy.esum(all_losses)
Beispiel #21
0
def main():
 
    #Read csv data into a dataframe 
    data = pd.read_csv("Reviews.csv").sample(1000)
    #data = pd.read_csv("Reviews.csv").sample(frac=1)
    split = round(data.shape[0]*0.8)
    print (split)    
   
    #Set up the training parameters
    vocab_sizes = 30000   #cutoff for num of most common words for text
    #max_len = 300          #cutoff for length of sequence for text

    #Convert training text to sequence
    t = Tokenizer(num_words=vocab_size)
    t.fit_on_texts(data['Text'])
    x = t.texts_to_sequences(data['Text'].astype(str))
    y = data['Score'].apply(lambda x: int(x >= 4))
    
    m = dy.Model()
    trainer = dy.AdamTrainer(m)
    embeds = m.add_lookup_parameters((vocab_size, embed_size))
    acceptor = LstmAcceptor(embed_size, hidden_size, 1, m) 

    sum_of_losses = 0.0
    for epoch in range(10):
        for sequence, label in zip(x[:split], y[:split]):
            dy.renew_cg()
            label = dy.scalarInput(label)
            vecs = [embeds[i] for i in sequence]
            preds = acceptor(vecs)
            loss = dy.binary_log_loss(preds, label)
            sum_of_losses += loss.npvalue()
            loss.backward()
            trainer.update()
        print (sum_of_losses / split)
        sum_of_losses = 0.0
    print ("\n\nPrediction time!\n")
    for sequence, label in zip(x[split:], y[split:]):
        dy.renew_cg()
        vecs = [embeds[i] for i in sequence]
        preds = acceptor(vecs).value()
        print (preds, label)
Beispiel #22
0
 def calculate_loss_classification(self, input, output):
     #dy.renew_cg()
     weight_matrix_array = []
     biases_array = []
     for (W, b) in zip(self.weight_matrix_array, self.biases_array):
         weight_matrix_array.append(dy.parameter(W))
         biases_array.append(dy.parameter(b))
     acts = self.act
     w = weight_matrix_array[0]
     b = biases_array[0]
     act = acts[0]
     intermediate = act(dy.affine_transform([b, w, input]))
     activations = [intermediate]
     for (W, b, g) in zip(weight_matrix_array[1:], biases_array[1:],
                          acts[1:]):
         pred = g(dy.affine_transform([b, W, activations[-1]]))
         activations.append(pred)
     #print output.value(), pred.value()
     losses = dy.binary_log_loss(pred, output)
     return losses
Beispiel #23
0
  def calc_nll(self, src, trg):
    event_trigger.start_sent(src)
    embeddings = self.src_embedder.embed_sent(src)
    encodings = self.encoder.transduce(embeddings)
    if not batchers.is_batched(trg): trg = batchers.mark_as_batch([trg])

    if self.mode in ["avg_mlp", "final_mlp"]:
      if self.mode=="avg_mlp":
        if encodings.mask:
          encoding_fixed_size = dy.cdiv(dy.sum_dim(encodings.as_tensor(), [1]),
                                 dy.inputTensor(np.sum(1.0 - encodings.mask.np_arr, axis=1), batched=True))
        else:
          encoding_fixed_size = dy.sum_dim(encodings.as_tensor(), [1]) / encodings.dim()[0][1]
      elif self.mode=="final_mlp":
        encoding_fixed_size = self.encoder.get_final_states()[-1].main_expr()
      scores = dy.logistic(self.output_layer.transform(encoding_fixed_size))
    elif self.mode=="lin_sum_sig":
      enc_lin = []
      for step_i, enc_i in enumerate(encodings):
        step_linear = self.output_layer.transform(enc_i)
        if encodings.mask and np.sum(encodings.mask.np_arr[:,step_i])>0:
          step_linear = dy.cmult(step_linear, dy.inputTensor(1.0 - encodings.mask.np_arr[:,step_i], batched=True))
        enc_lin.append(step_linear)
      if encodings.mask:
        encoding_fixed_size = dy.cdiv(dy.esum(enc_lin),
                                      dy.inputTensor(np.sum(1.0 - encodings.mask.np_arr, axis=1), batched=True))
      else:
        encoding_fixed_size = dy.esum(enc_lin) / encodings.dim()[0][1]
      scores = dy.logistic(encoding_fixed_size)

    else: raise ValueError(f"unknown mode '{self.mode}'")

    idxs = ([], [])
    for batch_i in range(trg.batch_size()):
      for word in set(trg[batch_i]):
        if word not in {vocabs.Vocab.ES, vocabs.Vocab.SS}:
          idxs[0].append(word)
          idxs[1].append(batch_i)
    trg_scores = dy.sparse_inputTensor(idxs, values = np.ones(len(idxs[0])), shape=scores.dim()[0] + (scores.dim()[1],), batched=True, )
    loss_expr = dy.binary_log_loss(scores, trg_scores)
    return loss_expr
Beispiel #24
0
    def train(self, trainning_set):
        for sentence, eid, entity, trigger, label, pos, chars in trainning_set:
            features = self.encode_sentence(sentence, pos, chars)
            loss = []

            entity_embeds = dy.average([features[word] for word in entity])

            attention, context = self.attend(features)
            # loss.append(-dy.log(dy.pick(attention, trigger)))
            h_t = dy.concatenate([context, entity_embeds])
            hidden = dy.tanh(self.lb * h_t + self.lb_bias)
            out_vector = dy.reshape(
                dy.logistic(self.lb2 * hidden + self.lb2_bias), (1, ))
            # probs = dy.softmax(out_vector)
            label = dy.scalarInput(label)
            loss.append(dy.binary_log_loss(out_vector, label))

            # Get decoding losses
            last_output_embeddings = self.pattern_embeddings[0]
            s = self.decoder_lstm.initial_state().add_input(
                dy.concatenate(
                    [dy.vecInput(self.hidden_dim), last_output_embeddings]))

            rule.append(1)
            for pattern in rule:
                h_t = s.output()
                context = self.attend(features, h_t)
                out_vector = self.pt.expr() * dy.concatenate(
                    [context, h_t]) + self.pt_bias.expr()
                probs = dy.softmax(out_vector)
                loss.append(-dy.log(dy.pick(probs, pattern)))
                last_output_embeddings = self.pattern_embeddings[pattern]
                s = s.add_input(
                    dy.concatenate([context, last_output_embeddings]))
            loss = dy.esum(loss)

            loss = dy.esum(loss)
            loss.backward()
            self.trainer.update()
            dy.renew_cg()
Beispiel #25
0
    def _create_network(self, inputs, expected_answer):
        dy.renew_cg()  # new computation graph

        self.weights = {
            'h1': dy.parameter(self._pw1),
            'h2': dy.parameter(self._pw2),
            'out': dy.parameter(self._pw3)
        }

        self.biases = {
            'b1': dy.parameter(self._pb1),
            'b2': dy.parameter(self._pb2),
            'out': dy.parameter(self._pb3)
        }

        x = dy.vecInput(len(inputs))
        # x = dy.vecInput(len(X_train[1]))
        x.set(inputs)
        x = dy.reshape(x, (1, X_train.shape[1]))
        y = dy.inputTensor(expected_answer)
        yy = dy.reshape(y, (20, 1))
        output = self._multilayer_perceptron(x)
        loss = dy.binary_log_loss(output, yy)
        return loss
Beispiel #26
0
# `autobatching` allows us to feed each datapoint in one at a time, and `dyNet` will figure out how to "optimize" the operations.  Let's iterate through our training data.

# In[23]:


# iterate through the dataset
for i in range(trainX.shape[0]):
    # prepare input
    x = np.expand_dims(trainX[i], axis=0)   # must make it a vector with dimensions (1 x voc_size)
    # prepare output
    y = dy.scalarInput(trainY[i])
    # make a forward pass
    pred = forward_pass(x)
    # calculate loss for each example
    loss = dy.binary_log_loss(pred, y) 
    losses.append(loss)
    pred
    y


# Now let's accumulate the loss and backpropogate it.

# In[24]:


# get total loss for dataset
total_loss = dy.esum(losses)
# apply the calculations of the computational graph
total_loss.forward()
# calculate loss to backpropogate
Beispiel #27
0
e = dy.concatenate_cols(
    [e1, e2, ...]
)  # e1, e2,.. are column vectors. return a matrix. (sim to np.hstack([e1,e2,...])
e = dy.concatenate([e1, e2, ...])  # concatenate

e = dy.affine_transform([e0, e1, e2, ...])  # e = e0 + ((e1*e2) + (e3*e4) ...)

## Loss functions
e = dy.squared_distance(e1, e2)
e = dy.l1_distance(e1, e2)
e = dy.huber_distance(e1, e2, c=1.345)

# e1 must be a scalar that is a value between 0 and 1
# e2 (ty) must be a scalar that is a value between 0 and 1
# e = ty * log(e1) + (1 - ty) * log(1 - e1)
e = dy.binary_log_loss(e1, e2)

# e1 is row vector or scalar
# e2 is row vector or scalar
# m is number
# e = max(0, m - (e1 - e2))
e = dy.pairwise_rank_loss(e1, e2, m=1.0)

# Convolutions
# e1 \in R^{d x s} (input)
# e2 \in R^{d x m} (filter)
e = dy.conv1d_narrow(e1, e2)  # e = e1 *conv e2
e = dy.conv1d_wide(e1, e2)  # e = e1 *conv e2
e = dy.filter1d_narrow(e1, e2)  # e = e1 *filter e2

e = dy.kmax_pooling(e1, k)  #  kmax-pooling operation (Kalchbrenner et al 2014)
 def calculateLoss(self, output, y):
     #function to calculate the loss value
     loss = dy.binary_log_loss(output, y)
     return loss
Beispiel #29
0
if len(sys.argv) == 2:
    m.populate_from_textfile(sys.argv[1])

W = dy.parameter(pW)
b = dy.parameter(pb)
V = dy.parameter(pV)
a = dy.parameter(pa)

x = dy.vecInput(2, "GPU:0")
y = dy.scalarInput(0, "CPU")
h = dy.tanh((W * x) + b)
h_cpu = dy.to_device(h, "CPU")
if xsent:
    y_pred = dy.logistic((V * h_cpu) + a)
    loss = dy.binary_log_loss(y_pred, y)
    T = 1
    F = 0
else:
    y_pred = (V * h_cpu) + a
    loss = dy.squared_distance(y_pred, y)
    T = 1
    F = -1

for iter in range(ITERATIONS):
    mloss = 0.0
    for mi in range(4):
        x1 = mi % 2
        x2 = (mi // 2) % 2
        x.set([T if x1 else F, T if x2 else F])
        y.set(T if x1 != x2 else F)
Beispiel #30
0
dy.renew_cg(
)  # new computation graph. not strictly needed here, but good practice.

# associate the parameters with cg Expressions
# creates a computation graph and adds the parameters to it,
# transforming them into Expressions.
# The need to distinguish model parameters from “expressions” will become clearer later.
W = dy.parameter(pW)
V = dy.parameter(pV)
b = dy.parameter(pb)

x = dy.vecInput(2)  # an input vector of size 2. Also an expression.
output = dy.logistic(V * (dy.tanh((W * x) + b)))

y = dy.scalarInput(0)
loss = dy.binary_log_loss(output, y)

print(x.value())

print(b.value())

trainer = dy.SimpleSGDTrainer(m, learning_rate=.001)
for i in range(1000):
    x.set([1, 1])
    y.set(0)
    # loss.value()
    loss.backward()
    trainer.update()
print(x.value())

print(b.value())
Beispiel #31
0
trainer = dy.SimpleSGDTrainer(m)

W = m.add_parameters((HIDDEN_SIZE, 2))
b = m.add_parameters(HIDDEN_SIZE)
V = m.add_parameters((1, HIDDEN_SIZE))
a = m.add_parameters(1)

if len(sys.argv) == 2:
  m.populate_from_textfile(sys.argv[1])

x = dy.vecInput(2)
y = dy.scalarInput(0)
h = dy.tanh((W*x) + b)
if xsent:
    y_pred = dy.logistic((V*h) + a)
    loss = dy.binary_log_loss(y_pred, y)
    T = 1
    F = 0
else:
    y_pred = (V*h) + a
    loss = dy.squared_distance(y_pred, y)
    T = 1
    F = -1


for iter in range(ITERATIONS):
    mloss = 0.0
    for mi in range(4):
        x1 = mi % 2
        x2 = (mi // 2) % 2
        x.set([T if x1 else F, T if x2 else F])
Beispiel #32
0
 def _compute_binary_divergence(self, pred, target):
     return dy.binary_log_loss(pred, target)
 losses = []
 batch_size = 1000
 dynet.renew_cg()
     
 for j in tqdm(range(int(X.shape[0]/batch_size) + 1)):
     for k in range(batch_size):
         index = (j*batch_size) + k
         if index > X.shape[0]-1: break
     # prepare input
         little_x = X[index].reshape(1,-1)   # must make it a vector with dimensions (1 x voc_size)
     # prepare output
         little_y = dynet.inputTensor(y[index])
     # make a forward pass
         pred = m.one_pass(little_x)
     # calculate loss for each example
         loss = dynet.binary_log_loss(pred, little_y)
         if not numpy.isnan(loss.npvalue()):
             losses.append(loss)
         else:
             print(i,j,'loss was nan!')
     total_loss = dynet.esum(losses)
 # apply the calculations of the computational graph
     total_loss.forward()
 # calculate loss to backpropogate
     total_loss.backward()
 # update parameters with backpropogated error
     m.trainer.update()
 #### end of batch
 if last_loss:
     cur_loss = total_loss.npvalue()[0]
     print('cur loss:', cur_loss)