コード例 #1
0
    def train_step(self, X_train, y_train, state):
        y_preds = []
        caches = []
        loss = 0.

        # Forward
        for x, y_true in zip(X_train, y_train):
            y, state, cache = self.forward(x, state, train=True)
            loss += loss_fun.cross_entropy(self.model, y, y_true, lam=0)

            y_preds.append(y)
            caches.append(cache)

        loss /= X_train.shape[0]

        # Backward
        dh_next = np.zeros((1, self.H))
        dc_next = np.zeros((1, self.H))
        d_next = (dh_next, dc_next)

        grads = {k: np.zeros_like(v) for k, v in self.model.items()}

        for y_pred, y_true, cache in reversed(
                list(zip(y_preds, y_train, caches))):
            grad, d_next = self.backward(y_pred, y_true, d_next, cache)

            for k in grads.keys():
                grads[k] += grad[k]

        for k, v in grads.items():
            grads[k] = np.clip(v, -5., 5.)

        return grads, loss, state
コード例 #2
0
    def train(self, x_train, y_train):

        y_pred, cache = self.forward(x_train)
        loss = cross_entropy(y_pred, y_train)
        grad = self.backward(y_pred, y_train, cache)

        return grad, loss
コード例 #3
0
ファイル: layers.py プロジェクト: tsgts/representation-music
    def add_loss_layer(self, layer_name, prediction_layer_id,
                       ground_truth_layer_id, loss_type):
        """
        Adds a layer corresponding to the loss function
        :param layer_name: The name of the layer. Type=string
        :param prediction_layer_id: The identifier for the prediction layer
        :param ground_truth_layer_id: The identifier for the ground truth layer
        :param loss_type: The loss function to use. Available options defined by LossTypes.
        :return: None
        """
        layer_id = self._get_layer_id(layer_name)
        assert self._layer_verifier(
            layer_id), 'Invalid: This layer is already present.'
        assert not self._layer_verifier(
            prediction_layer_id), 'Invalid: Output layer id is invalid.'
        assert not self._layer_verifier(
            ground_truth_layer_id
        ), 'Invalid: Ground truth layer id is invalid.'

        output = self.layers[prediction_layer_id]
        ground_truth = self.layers[ground_truth_layer_id]
        if loss_type == LossTypes.mse:
            self.layers[layer_id] = mse(ground_truth, output)
        elif loss_type == LossTypes.cross_entropy:
            self.layers[layer_id] = cross_entropy(ground_truth, output)
        else:
            raise ValueError('The type of loss can only be one of ["mse"]')
コード例 #4
0
    def train_step(self, X_train, y_train, h):
        ys = []
        caches = []
        loss = 0.

        # Forward
        for x, y in zip(X_train, y_train):
            y_pred, h, cache = self.forward(x, h, train=True)
            loss += loss_fun.cross_entropy(self.model, y_pred, y, lam=0)
            ys.append(y_pred)
            caches.append(cache)

        loss /= X_train.shape[0]

        # Backward
        dh_next = np.zeros((1, self.H))
        grads = {k: np.zeros_like(v) for k, v in self.model.items()}

        for t in reversed(range(len(X_train))):
            grad, dh_next = self.backward(ys[t], y_train[t], dh_next,
                                          caches[t])

            for k in grads.keys():
                grads[k] += grad[k]

        for k, v in grads.items():
            grads[k] = np.clip(v, -5., 5.)

        return grads, loss, h
コード例 #5
0
    def train(self, training_data, training_label, batch_size, epoch,
              weights_file):
        total_acc = 0
        for e in range(epoch):
            for batch_index in range(0, training_data.shape[0], batch_size):
                # batch input
                if batch_index + batch_size < training_data.shape[0]:
                    data = training_data[batch_index:batch_index + batch_size]
                    label = training_label[batch_index:batch_index +
                                           batch_size]
                else:
                    data = training_data[batch_index:training_data.shape[0]]
                    label = training_label[batch_index:training_label.shape[0]]
                loss = 0
                acc = 0
                start_time = time.time()

                for b in range(len(data)):
                    x = data[b]
                    y = label[b]
                    # print(y)
                    # forward pass
                    for l in range(self.lay_num):
                        output = self.layers[l].forward(x)
                        x = output
                    loss += cross_entropy(output, y)
                    if np.argmax(output) == np.argmax(y):
                        acc += 1
                        total_acc += 1
                    # backward pass
                    dy = y
                    for l in range(self.lay_num - 1, -1, -1):
                        dout = self.layers[l].backward(dy)
                        dy = dout
                # time
                end_time = time.time()
                batch_time = end_time - start_time
                remain_time = (
                    training_data.shape[0] * epoch - batch_index -
                    training_data.shape[0] * e) / batch_size * batch_time
                hrs = int(remain_time) / 3600
                mins = int((remain_time / 60 - hrs * 60))
                secs = int(remain_time - mins * 60 - hrs * 3600)
                # result
                loss /= batch_size
                batch_acc = float(acc) / float(batch_size)
                training_acc = float(total_acc) / float(
                    (batch_index + batch_size) * (e + 1))
                print(
                    '=== Epoch: {0:d}/{1:d} === Iter:{2:d} === Loss: {3:.2f} === BAcc: {4:.2f} === TAcc: {5:.2f} === Remain: {6:d} Hrs {7:d} Mins {8:d} Secs ==='
                    .format(e, epoch, batch_index + batch_size, loss,
                            batch_acc, training_acc, int(hrs), int(mins),
                            int(secs)))
        # dump weights and bias
        obj = []
        for i in range(self.lay_num):
            cache = self.layers[i].extract()
            obj.append(cache)
        with open(weights_file, 'wb') as handle:
            pickle.dump(obj, handle, protocol=pickle.HIGHEST_PROTOCOL)
コード例 #6
0
ファイル: topic_lstm.py プロジェクト: tbepler/rnn
 def loss(self, X, mask=None, flank=0, Z=None):
     if Z is None:
         Z = self.transform(self.noise(X), mask=mask)
     E = self.emit(Z)
     L = cross_entropy(E, X)
     C = confusion(T.argmax(E,axis=-1), X, E.shape[-1])
     if mask is not None:
         L *= T.shape_padright(mask)
         C *= T.shape_padright(T.shape_padright(mask))
     n = X.shape[0]
     return L[flank:n-flank], C[flank:n-flank]
コード例 #7
0
    def train(iteration):
        model.train()
        avg_ans_loss = 0
        avg_att_loss = 0
        avg_sem_loss = 0

        for batch_idx, (img, bbox, que, ans, op,
                        att) in enumerate(trainloader):
            img, bbox, que, ans, op, att = Variable(img), Variable(
                bbox), Variable(que), Variable(ans), Variable(op), Variable(
                    att)
            img, bbox, que, ans, op, att = img.cuda(), bbox.cuda(), que.cuda(
            ), ans.cuda(), op.cuda(), att.cuda()
            optimizer.zero_grad()
            #different training objetives
            output, pred_op, pred_att = model(img, bbox, que)
            ans_mask, att_mask = get_mask(op)
            ans_loss = cross_entropy(output, ans)
            att_loss = attention_loss_mask_kld(pred_att, att, att_mask)
            sem_loss = semantic_loss(pred_op, op)
            loss = ans_loss + att_loss * args.alpha * max(
                (1 + np.cos(np.pi * (iteration / 300000))),
                0) + args.beta * sem_loss  # originally 0.5, 300000
            loss.backward()

            if not args.clip == 0:
                clip_grad_norm_(model.parameters(), args.clip)
            optimizer.step()
            avg_ans_loss = (avg_ans_loss * np.maximum(0, batch_idx) +
                            ans_loss.data.cpu().numpy()) / (batch_idx + 1)
            avg_att_loss = (avg_att_loss * np.maximum(0, batch_idx) +
                            att_loss.data.cpu().numpy()) / (batch_idx + 1)
            avg_sem_loss = (avg_sem_loss * np.maximum(0, batch_idx) +
                            sem_loss.data.cpu().numpy()) / (batch_idx + 1)

            if batch_idx % 25 == 0:
                with tf_summary_writer.as_default():
                    tf.summary.scalar('answer loss',
                                      avg_ans_loss,
                                      step=iteration)
                    tf.summary.scalar('step attention loss',
                                      avg_att_loss,
                                      step=iteration)
                    tf.summary.scalar('semantic loss',
                                      avg_sem_loss,
                                      step=iteration)

            iteration += 1

        return iteration
コード例 #8
0
ファイル: decoder.py プロジェクト: qq342035261/OpenLAS
    def forward(self,
                enc_outputs,
                enc_lengths,
                src_ids,
                tgt_ids,
                label_smooth=0):
        bz = enc_outputs.shape[0]
        if bz != src_ids.shape[0]:
            raise ValueError("enc_outputs does not match src_ids.")

        encout_max_length = enc_outputs.shape[1]
        dec_max_length = src_ids.shape[1]
        att_masks = (
            1 - get_seq_mask_by_shape(encout_max_length, dec_max_length,
                                      enc_lengths).transpose(1, 2)).byte()

        rnn_in = self.emb(src_ids)
        rnn_in = self.dropout(rnn_in)

        rnn = self.rnns[0]
        rnn_output, _ = rnn(rnn_in)

        for l in range(1, self.num_layers):
            att_scores, att = self.attentions[l - 1](enc_outputs,
                                                     rnn_output,
                                                     enc_outputs,
                                                     mask=att_masks)
            rnn_in = torch.cat([rnn_output, att], dim=-1)
            rnn_in = self.dropout(rnn_in)
            rnn_output, _ = self.rnns[l](rnn_in)

        rnn_output = self.dropout(rnn_output)
        logits = self.output_affine(rnn_output)

        ce = cross_entropy(logits.view(-1, logits.size(-1)), tgt_ids.view(-1))
        if label_smooth > 0:
            ls = uniform_label_smooth_regulerizer(
                logits.view(-1, logits.size(-1)), tgt_ids.view(-1))
            loss = (1 - label_smooth) * ce + label_smooth * ls
        else:
            loss = ce
        return loss
コード例 #9
0
ファイル: test.py プロジェクト: jiefisher/Omega
    def __init__(self):
        self.embed = nn.Embedding(3,4)
        self.embed.embed_w.const = em
        self.fc3=nn.rnn((20,2))
    def forward(self,x):
        y1=self.embed(x)
        y2=reshape(y1,[-1,4*5])
        y = self.fc3(y2)
        return y
x = node.Node("x")
labels=node.Node("label")

c = embnet()


eloss = loss.cross_entropy(c(x),labels)
a=[np.array([0, 2, 1, 2,1]).reshape(1,5),np.array([0, 2, 1, 2,2]).reshape(1,5)]
b=[np.array([1,0]).reshape(1,2),np.array([0,1]).reshape(1,2)]
optimizer=opt.SGD(eloss,c.parameters())

for epoch in range(10):
    for batch in range(2):
        optimizer.step(feed_dict={x:a[batch],labels:b[batch]})
        print(optimizer.parameters[1].const,optimizer.parameters[0].const)
exit()
class mynet(module.Module):
    def __init__(self):
        self.conv1 = nn.Conv2d(filter_shapes=(1,6,5,5),padding=(2,2),stride=(1,1))
        self.pool1 = nn.MaxPool(ksize=(2,2),padding=(0,0),stride=(2,2))
        self.conv2 = nn.Conv2d(filter_shapes=(6,16,5,5),padding=(0,0),stride=(1,1))
        self.pool2 = nn.MaxPool(ksize=(2,2),padding=(0,0),stride=(2,2))
コード例 #10
0
if args.tf_model_type == 'capsule-A':
    poses, activations = capsule_model_A(X_embedding, args.num_classes)
if args.tf_model_type == 'capsule-B':
    poses, activations = capsule_model_B(X_embedding, args.num_classes)
if args.tf_model_type == 'CNN':
    poses, activations = baseline_model_cnn(X_embedding, args.num_classes)
if args.tf_model_type == 'KIMCNN':
    poses, activations = baseline_model_kimcnn(X_embedding, args.max_sent,
                                               args.num_classes)

if args.tf_loss_type == 'spread_loss':
    loss = spread_loss(y, activations, margin)
if args.tf_loss_type == 'margin_loss':
    loss = margin_loss(y, activations)
if args.tf_loss_type == 'cross_entropy':
    loss = cross_entropy(y, activations)

y_pred = tf.argmax(activations, axis=1, name="y_proba")
correct = tf.equal(tf.argmax(y, axis=1), y_pred, name="correct")
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")
# tf.summary.scalar('accuracy', accuracy)
# merged = tf.summary.merge_all()
# writer = tf.summary.FileWriter('/tmp/writer_log')

optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(loss, name="training_op")
gradients, variables = zip(*optimizer.compute_gradients(loss))

grad_check = [
    tf.check_numerics(g, message='Gradient NaN Found!')
    for g in gradients if g is not None
コード例 #11
0
ファイル: crf.py プロジェクト: tbepler/rnn
 def loss(self, Yh, Y):
     return cross_entropy(Yh, Y)
コード例 #12
0
def main(_):
    # Create the input placeholder
    x_image = tf.placeholder(
        tf.float32, [batch_size, input_res, input_res, input_channels])

    # Define loss and optimizer
    y_image_ = tf.placeholder(tf.float32,
                              [batch_size, input_res, input_res, 1])

    y_image, mode_training = model.make_unet(x_image=x_image)

    # Build the objective loss function as well as the accuracy parts of the graph
    total_loss = loss.cross_entropy(y_image, y_image_)
    tf.summary.scalar('total_loss', total_loss)

    global_step = tf.Variable(0,
                              dtype=tf.int32,
                              trainable=False,
                              name='global_step')
    learning_rate = tf.train.piecewise_constant(
        global_step, LEARNING_RATE_PARAMS["boundaries"],
        LEARNING_RATE_PARAMS["values"])

    with tf.name_scope('adam_optimizer'):
        train_step = tf.train.AdamOptimizer(learning_rate).minimize(
            total_loss, global_step=global_step)

    # Summaries
    merged_summaries = tf.summary.merge_all()
    train_writer = tf.summary.FileWriter(os.path.join(LOGS_DIR, "train"),
                                         tf.get_default_graph())
    val_writer = tf.summary.FileWriter(os.path.join(LOGS_DIR, "val"),
                                       tf.get_default_graph())

    # Dataset
    train_dataset_filename = os.path.join(TFRECORDS_DIR, "train.tfrecord")
    train_images, train_polygons, train_raster_polygons = dataset.read_and_decode(
        train_dataset_filename, input_res, output_vertex_count, batch_size,
        INPUT_DYNAMIC_RANGE)
    val_dataset_filename = os.path.join(TFRECORDS_DIR, "val.tfrecord")
    val_images, val_polygons, val_raster_polygons = dataset.read_and_decode(
        val_dataset_filename,
        input_res,
        output_vertex_count,
        batch_size,
        INPUT_DYNAMIC_RANGE,
        augment_dataset=False)

    # Savers
    saver = tf.train.Saver()

    # The op for initializing the variables.
    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())
    with tf.Session() as sess:
        sess.run(init_op)

        # Restore checkpoint if one exists
        checkpoint = tf.train.get_checkpoint_state(CHECKPOINTS_DIR)
        if checkpoint and checkpoint.model_checkpoint_path:  # First check if the whole model has a checkpoint
            print("Restoring {} checkpoint {}".format(
                model_name, checkpoint.model_checkpoint_path))
            saver.restore(sess, checkpoint.model_checkpoint_path)

        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)

        init_plots()

        print("Model has {} trainable variables".format(
            tf_utils.count_number_trainable_params()))

        i = tf.train.global_step(sess, global_step)
        while i <= max_iter:
            train_image_batch, train_polygon_batch, train_raster_polygon_batch = sess.run(
                [train_images, train_polygons, train_raster_polygons])
            if i % train_loss_accuracy_steps == 0:
                run_options = tf.RunOptions(
                    trace_level=tf.RunOptions.FULL_TRACE)
                run_metadata = tf.RunMetadata()
                train_summary, _, train_loss, train_y_image = sess.run(
                    [merged_summaries, train_step, total_loss, y_image],
                    feed_dict={
                        x_image: train_image_batch,
                        y_image_: train_raster_polygon_batch,
                        mode_training: True
                    },
                    options=run_options,
                    run_metadata=run_metadata)
                train_writer.add_summary(train_summary, i)
                train_writer.add_run_metadata(run_metadata, 'step%03d' % i)
                print('step %d, training loss = %g' % (i, train_loss))
                plot_results(1, train_image_batch, train_polygon_batch,
                             train_y_image)
            else:
                _ = sess.run(
                    [train_step],
                    feed_dict={
                        x_image: train_image_batch,
                        y_image_: train_raster_polygon_batch,
                        mode_training: True
                    })

            # Measure validation loss and accuracy
            if i % val_loss_accuracy_steps == 1:
                val_image_batch, val_polygon_batch, val_raster_polygon_batch = sess.run(
                    [val_images, val_polygons, val_raster_polygons])
                val_summary, val_loss, val_y_image = sess.run(
                    [merged_summaries, total_loss, y_image],
                    feed_dict={
                        x_image: val_image_batch,
                        y_image_: val_raster_polygon_batch,
                        mode_training: True
                    })
                val_writer.add_summary(val_summary, i)

                print('step %d, validation loss = %g' % (i, val_loss))
                plot_results(2, val_image_batch, val_polygon_batch,
                             val_y_image)

            # Save checkpoint
            if i % checkpoint_steps == (checkpoint_steps - 1):
                saver.save(sess,
                           os.path.join(CHECKPOINTS_DIR, model_name),
                           global_step=global_step)

            i = tf.train.global_step(sess, global_step)

        coord.request_stop()
        coord.join(threads)

        train_writer.close()
        val_writer.close()
コード例 #13
0
ファイル: 2_inference.py プロジェクト: yhexie/polycnn
def main(_):
    # Create the input placeholder
    x_image = tf.placeholder(tf.float32, [batch_size, input_res, input_res, input_channels])

    # Define loss and optimizer
    y_image_ = tf.placeholder(tf.float32, [batch_size, input_res, input_res, 1])

    y_image, mode_training = model.make_unet(x_image=x_image)

    total_loss = loss.cross_entropy(y_image, y_image_)

    # Dataset
    test_dataset_filename = os.path.join(TFRECORDS_DIR, "test.tfrecord")
    test_images, test_polygons, test_raster_polygons = dataset.read_and_decode(test_dataset_filename, input_res,
                                                                               output_vertex_count, batch_size,
                                                                               INPUT_DYNAMIC_RANGE,
                                                                               augment_dataset=False)

    # Saver
    saver = tf.train.Saver()
    with tf.Session() as sess:
        # Restore checkpoint if one exists
        checkpoint = tf.train.get_checkpoint_state(CHECKPOINTS_DIR)
        if checkpoint and checkpoint.model_checkpoint_path:  # First check if the whole model has a checkpoint
            print("Restoring {} checkpoint {}".format(model_name, checkpoint.model_checkpoint_path))
            saver.restore(sess, checkpoint.model_checkpoint_path)
        else:
            print("No checkpoint was found, exiting...")
            exit()

        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)

        test_image_batch, test_polygon_batch, test_raster_polygon_batch = sess.run([test_images, test_polygons, test_raster_polygons])
        test_loss, test_y_image_batch = sess.run(
            [total_loss, y_image],
            feed_dict={
                x_image: test_image_batch,
                y_image_: test_raster_polygon_batch, mode_training: True
            })

        print("Test loss= {}".format(test_loss))

        # Threshold output
        test_raster_polygon_batch = 0.5 < test_raster_polygon_batch
        test_y_image_batch = 0.5 < test_y_image_batch

        # Polygonize
        print("Polygonizing...")
        y_coord_batch_list = []
        for test_raster_polygon, test_y_image in zip(test_raster_polygon_batch, test_y_image_batch):
            test_raster_polygon = test_raster_polygon[:, :, 0]
            test_y_image = test_y_image[:, :, 0]

            # Select only one blob
            seed = np.logical_and(test_raster_polygon, test_y_image)
            test_y_image = skimage.morphology.reconstruction(seed, test_y_image, method='dilation', selem=None, offset=None)

            # Vectorize
            test_y_coords = polygon_utils.raster_to_polygon(test_y_image, output_vertex_count)
            y_coord_batch_list.append(test_y_coords)
        y_coord_batch = np.array(y_coord_batch_list)

        # Normalize
        y_coord_batch = y_coord_batch / input_res

        if not os.path.exists(SAVE_DIR):
            os.makedirs(SAVE_DIR)
        save_results(test_image_batch, test_polygon_batch, y_coord_batch, SAVE_DIR)

        coord.request_stop()
        coord.join(threads)
コード例 #14
0
ファイル: main.py プロジェクト: qq345736500/wh
if args.model_type == 'capsule-A':    
    poses, activations = capsule_model_A(X_embedding, args.num_classes)    
if args.model_type == 'capsule-B':    
    poses, activations = capsule_model_B(X_embedding, args.num_classes)    
if args.model_type == 'CNN':    
    poses, activations = baseline_model_cnn(X_embedding, args.num_classes)
if args.model_type == 'KIMCNN':    
    poses, activations = baseline_model_kimcnn(X_embedding, args.max_sent, args.num_classes)   
    
if args.loss_type == 'spread_loss':
    loss = spread_loss(y, activations, margin)
if args.loss_type == 'margin_loss':    
    loss = margin_loss(y, activations)
if args.loss_type == 'cross_entropy':
    loss = cross_entropy(y, activations)

y_pred = tf.argmax(activations, axis=1, name="y_proba")    
correct = tf.equal(tf.argmax(y, axis=1), y_pred, name="correct")
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")

optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)   
training_op = optimizer.minimize(loss, name="training_op")
gradients, variables = zip(*optimizer.compute_gradients(loss))

grad_check = [tf.check_numerics(g, message='Gradient NaN Found!')
              for g in gradients if g is not None] + [tf.check_numerics(loss, message='Loss NaN Found')]
with tf.control_dependencies(grad_check):
    training_op = optimizer.apply_gradients(zip(gradients, variables), global_step=global_step)      

コード例 #15
0
    def loss(self, x, t):
        z = self.predict(x)
        y = softmax(z)
        loss = cross_entropy(y, t)

        return loss
コード例 #16
0
 def loss(self, x, t):
     y = self.predict(x)
     return cross_entropy(y, t)