Python cocoの例、dataset.coco Pythonの例

コード例 #1

0

ファイルを表示

ファイル: pipeline.py プロジェクト: alanguo001/image-captioning-for-mortals

def prepVect(min_df=2, max_features=50000, n_captions=5, n_sbu=None,
             multilabel=False):
    print "prepping the Word Tokenizer..."
    _0, _1, trY, _3 = coco(mode='full', n_captions=n_captions)
    if n_sbu:
        _4, sbuY, _5 = sbuXYFilenames(n_sbu)
        trY.extend(sbuY)
    vect = Tokenizer(min_df=min_df, max_features=max_features)
    captions = sampleCaptions(trY, n_captions)
    vect.fit(captions)
    if multilabel:
        mlb = MultiLabelBinarizer()
        mlb.fit(vect.transform(captions))
        return vect, mlb
    # if not multilabel:
    return vect

コード例 #2

0

ファイルを表示

ファイル: pipeline.py プロジェクト: rsingh2083/image-captioning-for-mortals

def prepVect(min_df=2,
             max_features=50000,
             n_captions=5,
             n_sbu=None,
             multilabel=False):
    print "prepping the Word Tokenizer..."
    _0, _1, trY, _3 = coco(mode='full', n_captions=n_captions)
    if n_sbu:
        _4, sbuY, _5 = sbuXYFilenames(n_sbu)
        trY.extend(sbuY)
    vect = Tokenizer(min_df=min_df, max_features=max_features)
    captions = sampleCaptions(trY, n_captions)
    vect.fit(captions)
    if multilabel:
        mlb = MultiLabelBinarizer()
        mlb.fit(vect.transform(captions))
        return vect, mlb
    # if not multilabel:
    return vect

コード例 #3

0

ファイルを表示

ファイル: caption.py プロジェクト: youralien/MLFun

def traindecoder(
      sources = ("image_vects", "word_vects")
    , sources_k = ("image_vects_k", "word_vects_k")
    , batch_size=128
    , embedding_dim=300
    , n_captions=5
    ):
    # data should not be shuffled, as there is semantics in their placement
    trX, teX, trY, teY = coco(mode="dev", batch_size=batch_size, n_captions=n_captions)

    # # # # # # # # # # #
    # Modeling Building #
    # # # # # # # # # # #

    stream = DataETL.getFinalStream(trX, trY, sources=sources, sources_k=sources_k,
            batch_size=batch_size)
    batch = stream.get_epoch_iterator().next()
    f_emb = ModelIO.load('/home/luke/datasets/coco/predict/fullencoder_maxfeatures.50000')
    import ipdb
    ipdb.set_trace()

コード例 #4

0

ファイルを表示

sys.path.insert(0, '../../python')

import planner as pln
import hardware as hw
import dataset
import models

import torch.nn
import torch

import time

simd_cfg_path = '../../hwcfg/simd.json'
hw_spec = hw.HardwareSpec(simd_cfg_path)

data = dataset.coco()
yolov2 = models.yolov2()

pnn = pln.Planner()

start_time = time.time()

conv_5_5 = None
conv_6_7 = None
conv_7 = None
for name, module in yolov2.named_modules():
    if isinstance(module, torch.nn.Sequential):
        continue
    if name == 'conv_7':
        data = conv_5_5
    elif name == 'conv_8':

コード例 #5

0

ファイルを表示

ファイル: caption.py プロジェクト: youralien/MLFun

def trainend2end(
      sources = ("image_vects", "word_tokens")
    , batch_size=128
    , embedding_dim=300
    , n_captions=5
    , mode='sample'
    , n_sbu=None
    , recurrent_unit='lstm'
    ):
    """Train a full end to end system, w/out the encoder/decoder model.
    Like the google paper, "Show and Tell: Image Caption Generation"

    Like how we did it with MNIST
    """
    # data should not be shuffled, as there is semantics in their placement
    trX, teX, trY, teY = coco(mode="full", batch_size=batch_size, n_captions=n_captions)

    # add SBU
    if n_sbu:
        sbuX, sbuY, _ = sbuXYFilenames(n_sbu)
        trX.extend(sbuX)
        trY.extend(sbuY)


    image_vects = T.matrix(sources[0])
    word_tokens = T.lmatrix(sources[1])
    image_vects.tag.test_value = np.zeros((2, 4096), dtype='float32')
    word_tokens.tag.test_value = np.zeros((2, 15), dtype='int64')

    from modelbuilding import ShowAndTell
    show_and_tell = ShowAndTell(
          image_dim=4096
        , dim=embedding_dim
        , dictionary_size=vect.n_features
        , max_sequence_length=30
        # , lookup_file='glove_lookup_53454.npy' # gloveglove
        , recurrent_unit=recurrent_unit
        , norm=True
        , biases_init=Constant(0.)
        , weights_init=IsotropicGaussian(0.02)
        )
    show_and_tell.initialize()
    cost = show_and_tell.cost(image_vects, word_tokens)
    cost.name = "seq_log_likelihood"
    cg = ComputationGraph(cost)

    name = "sbu+coco_NIC_%s_dim.%s" % (show_and_tell.recurrent_unit, embedding_dim)
    savename = '/home/luke/datasets/coco/predict/%s' % name

    def save_f_gen(self):
        generated = show_and_tell.generate(image_vects)
        f_gen = ComputationGraph(generated).get_theano_function()
        ModelIO.save(f_gen, savename)
        print "Generation function saved while training"

    model = Model(cost)
    algorithm = GradientDescent(
          cost=cost
        , parameters=cg.parameters
        , step_rule=Adam(learning_rate=0.0002)
        )
    main_loop = MainLoop(
          model=model
        , data_stream=DataETL.getTokenizedStream(trX, trY, sources=sources,
            batch_size=batch_size)
        , algorithm=algorithm
        , extensions=[
              DataStreamMonitoring(
                    [cost]
                  , DataETL.getTokenizedStream(trX, trY, sources=sources,
                      batch_size=batch_size)
                  , prefix='train')
            , DataStreamMonitoring(
                    [cost]
                  , DataETL.getTokenizedStream(teX, teY, sources=sources,
                      batch_size=batch_size)
                  , prefix='test')
            , Printing()
            , UserFunc(save_f_gen, after_epoch=True)
            , FinishIfNoImprovementAfter(notification_name="test_seq_log_likelihood",
                iterations=1000)
            ]
        )
    main_loop.run()

    # Training finished; save the generator function w/ learned params
    generated = show_and_tell.generate(image_vects)

    # Beam Search
    if mode == "beam":
        samples, = VariableFilter(
            applications=[show_and_tell.generator.generate], name="outputs")(
                ComputationGraph(generated)) # generated[1] is next_outputs
        beam_search = BeamSearch(samples)
        try:
            path = '/home/luke/datasets/coco/predict/'
            filename = 'end2end_beam_maxseqlen.30_embeddingdim.300'
            ModelIO.save(beam_search, '%s%s' % (path, filename))
            print "It saved! Thanks pickle!"
        except Exception, e:
            print "F**k pickle and move on with your life :)"
            print e
        ModelEval.beamsearch(beam_search)

コード例 #6

0

ファイルを表示

ファイル: caption.py プロジェクト: youralien/MLFun

def trainencoder(
      sources = ("image_vects", "word_vects")
    , sources_k = ("image_vects_k", "word_vects_k")
    , batch_size=128
    , embedding_dim=300
    , n_captions=5
    , n_sbu=None
    ):
    # data should not be shuffled, as there is semantics in their placement
    trX, teX, trY, teY = coco(mode='full', batch_size=batch_size, n_captions=n_captions)

    # add SBU
    if n_sbu:
        sbuX, sbuY, _ = sbuXYFilenames(n_sbu)
        trX.extend(sbuX)
        trY.extend(sbuY)
    # # # # # # # # # # #
    # Modeling Building #
    # # # # # # # # # # #

    s = Encoder(
          image_feature_dim=4096
        , embedding_dim=embedding_dim
        , biases_init=Constant(0.)
        , weights_init=Uniform(width=0.08)
        )
    s.initialize()

    image_vects = T.matrix(sources[0]) # named to match the source name
    word_vects = T.tensor3(sources[1]) # named to match the source name
    image_vects_k = T.matrix(sources_k[0]) # named to match the contrastive source name
    word_vects_k = T.tensor3(sources_k[1]) # named to match the contrastive source name

    # image_vects.tag.test_value = np.zeros((2, 4096), dtype='float32')
    # word_vects.tag.test_value = np.zeros((2, 15, 50), dtype='float32')
    # image_vects_k.tag.test_value = np.zeros((2, 4096), dtype='float32')
    # word_vects_k.tag.test_value = np.zeros((2, 15, 50), dtype='float32')

    # learned image embedding, learned sentence embedding
    lim, ls = s.apply(image_vects, word_vects)

    # learned constrastive im embedding, learned contrastive s embedding
    lcim, lcs = s.apply(image_vects_k, word_vects_k)

    # l2norms
    lim = l2norm(lim)
    lcim = l2norm(lcim)
    ls = l2norm(ls)
    lcs = l2norm(lcs)

    margin = 0.2 # alpha term, should not be more than 1!

    # pairwise ranking loss (https://github.com/youralien/skip-thoughts/blob/master/eval_rank.py)
    cost_im = margin - (lim * ls).sum(axis=1) + (lim * lcs).sum(axis=1)
    cost_im = cost_im * (cost_im > 0.) # this is like the max(0, pairwise-ranking-loss)
    cost_im = cost_im.sum(0)

    cost_s = margin - (ls * lim).sum(axis=1) + (ls * lcim).sum(axis=1)
    cost_s = cost_s * (cost_s > 0.) # this is like max(0, pairwise-ranking-loss)
    cost_s = cost_s.sum(0)

    cost = cost_im + cost_s
    cost.name = "pairwise_ranking_loss"

    # function to produce embedding
    f_emb = theano.function([image_vects, word_vects], [lim, ls])

    if n_sbu:
        sbuname = "sbu.%d" % n_sbu
    else:
        sbuname = ''
    name = "%s+coco_encoder_lstm_dim.%s_adadelta" % (sbuname, embedding_dim)
    savename = '/home/luke/datasets/coco/predict/%s' % name

    def save_function(self):
        ModelIO.save(f_emb, savename)
        print "Similarity Embedding function saved while training"

    def rank_function(self):
        # Get 1000 images / captions to test rank
        stream = DataETL.getFinalStream(teX, teY, sources=sources,
                            sources_k=sources_k, batch_size=1000,   
                            shuffle=True)
        
        images, captions, _0, _1 = stream.get_epoch_iterator().next()
        image_embs, caption_embs = f_emb(images, captions)
        ModelEval.ImageSentenceRanking(image_embs, caption_embs)

    cg = ComputationGraph(cost)

    # # # # # # # # # # #
    # Modeling Training #
    # # # # # # # # # # #

    algorithm = GradientDescent(
          cost=cost
        , parameters=cg.parameters
        # , step_rule=Adam(learning_rate=0.0002)
        , step_rule=AdaDelta()
        )
    main_loop = MainLoop(
          model=Model(cost)
        , data_stream=DataETL.getFinalStream(trX, trY, sources=sources,
              sources_k=sources_k, batch_size=batch_size)
        , algorithm=algorithm
        , extensions=[
              DataStreamMonitoring(
                  [cost]
                , DataETL.getFinalStream(trX, trY, sources=sources,
                      sources_k=sources_k, batch_size=batch_size)
                , prefix='train')
            , DataStreamMonitoring(
                  [cost]
                , DataETL.getFinalStream(teX, teY, sources=sources,
                      sources_k=sources_k, batch_size=batch_size)
                , prefix='test')
            , UserFunc(save_function, after_epoch=True)
            , UserFunc(rank_function, after_epoch=True)
            , Printing()
            , FinishIfNoImprovementAfter(notification_name="test_pairwise_ranking_loss",
                iterations=500)
            ]
        )
    main_loop.run()

    # ModelIO.save(f_emb, '/home/luke/datasets/coco/predict/fullencoder_maxfeatures.50000_epochsampler')
    ModelIO.save(f_emb, savename)

コード例 #7

0

ファイルを表示

def train():
	"""Train SqueezeDet model"""
	
	os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

	with tf.Graph().as_default():

		assert FLAGS.net == 'squeezeDet' or FLAGS.net == 'squeezeDet+' or FLAGS.net == 'squeezeDetSmall', \
				'Selected neural net architecture not supported: {}'.format(FLAGS.net)
		
		if FLAGS.dataset == 'COCO':
			mc = coco_config()
			print("COCO")
		elif FLAGS.dataset == 'KITTI':
			mc = kitti_squeezeDet_config()
			print("KITTI")
		elif FLAGS.dataset == 'BALL':
			mc = ball_config()
			print("BALL")

		if FLAGS.net == 'squeezeDet':
			mc.IS_TRAINING = True
			mc.PRETRAINED_MODEL_PATH = FLAGS.pretrained_model_path
			model = SqueezeDet(mc)			
		elif FLAGS.net == 'squeezeDet+':
			mc.IS_TRAINING = True
			mc.PRETRAINED_MODEL_PATH = FLAGS.pretrained_model_path
			model = SqueezeDetPlus(mc)
		elif FLAGS.net == 'squeezeDetSmall':			
			mc.IS_TRAINING = True
			mc.PRETRAINED_MODEL_PATH = FLAGS.pretrained_model_path
			model = SqueezeDetSmall(mc)

		if FLAGS.dataset == 'COCO':
			imdb = coco(FLAGS.image_set, FLAGS.data_path, mc)
		elif FLAGS.dataset == 'KITTI':
			imdb = kitti(FLAGS.image_set, FLAGS.data_path, mc)
		elif FLAGS.dataset == 'BALL':
			imdb= ball(FLAGS.image_set, FLAGS.data_path, mc)


		# save model size, flops, activations by layers
		with open(os.path.join(FLAGS.train_dir, 'model_metrics.txt'), 'w') as f:
			f.write('Number of parameter by layer:\n')
			count = 0
			for c in model.model_size_counter:
				f.write('\t{}: {}\n'.format(c[0], c[1]))
				count += c[1]
			f.write('\ttotal: {}\n'.format(count))

			count = 0
			f.write('\nActivation size by layer:\n')
			for c in model.activation_counter:
				f.write('\t{}: {}\n'.format(c[0], c[1]))
				count += c[1]
			f.write('\ttotal: {}\n'.format(count))

			count = 0
			f.write('\nNumber of flops by layer:\n')
			for c in model.flop_counter:
				f.write('\t{}: {}\n'.format(c[0], c[1]))
				count += c[1]
			f.write('\ttotal: {}\n'.format(count))
		f.close()
		print ('Model statistics saved to {}.'.format(
			os.path.join(FLAGS.train_dir, 'model_metrics.txt')))

		def _load_data(load_to_placeholder=True):
			# read batch input			 
			image_per_batch, label_per_batch, box_delta_per_batch, aidx_per_batch, \
					bbox_per_batch = imdb.read_batch()

			label_indices, bbox_indices, box_delta_values, mask_indices, box_values, \
					= [], [], [], [], []
			aidx_set = set()
			num_discarded_labels = 0
			num_labels = 0
			for i in range(len(label_per_batch)): # batch_size
				for j in range(len(label_per_batch[i])): # number of annotations
					num_labels += 1
					if (i, aidx_per_batch[i][j]) not in aidx_set:
						aidx_set.add((i, aidx_per_batch[i][j]))
						label_indices.append(
								[i, aidx_per_batch[i][j], label_per_batch[i][j]])
						mask_indices.append([i, aidx_per_batch[i][j]])
						bbox_indices.extend(
								[[i, aidx_per_batch[i][j], k] for k in range(4)])
						box_delta_values.extend(box_delta_per_batch[i][j])
						box_values.extend(bbox_per_batch[i][j])
					else:
						num_discarded_labels += 1

			if mc.DEBUG_MODE:
				print ('Warning: Discarded {}/({}) labels that are assigned to the same '
							 'anchor'.format(num_discarded_labels, num_labels))

			if load_to_placeholder:
				image_input = model.ph_image_input
				input_mask = model.ph_input_mask
				box_delta_input = model.ph_box_delta_input
				box_input = model.ph_box_input
				labels = model.ph_labels
			else:
				image_input = model.image_input
				input_mask = model.input_mask
				box_delta_input = model.box_delta_input
				box_input = model.box_input
				labels = model.labels

			feed_dict = {
					image_input: image_per_batch,
					input_mask: np.reshape(
							sparse_to_dense(
									mask_indices, [mc.BATCH_SIZE, mc.ANCHORS],
									[1.0]*len(mask_indices)),
							[mc.BATCH_SIZE, mc.ANCHORS, 1]),
					box_delta_input: sparse_to_dense(
							bbox_indices, [mc.BATCH_SIZE, mc.ANCHORS, 4],
							box_delta_values),
					box_input: sparse_to_dense(
							bbox_indices, [mc.BATCH_SIZE, mc.ANCHORS, 4],
							box_values),
					labels: sparse_to_dense(
							label_indices,
							[mc.BATCH_SIZE, mc.ANCHORS, mc.CLASSES],
							[1.0]*len(label_indices)),
			}

			return feed_dict, image_per_batch, label_per_batch, bbox_per_batch

		def _enqueue(sess, coord):
			try:
				while not coord.should_stop():
					feed_dict, _, _, _ = _load_data()
					sess.run(model.enqueue_op, feed_dict=feed_dict)
					if mc.DEBUG_MODE:
						print ("added to the queue")
				if mc.DEBUG_MODE:
					print ("Finished enqueue")
			except Exception as e:
				coord.request_stop(e)

		sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))

		saver = tf.train.Saver(tf.global_variables())
		summary_op = tf.summary.merge_all()

		ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir)
		if ckpt and ckpt.model_checkpoint_path:
				saver.restore(sess, ckpt.model_checkpoint_path)

		summary_writer = tf.summary.FileWriter(FLAGS.train_dir, sess.graph)

		init = tf.global_variables_initializer()
		sess.run(init)

		coord = tf.train.Coordinator()

		if mc.NUM_THREAD > 0:
			enq_threads = []
			for _ in range(mc.NUM_THREAD):
				enq_thread = threading.Thread(target=_enqueue, args=[sess, coord])
				# enq_thread.isDaemon()
				enq_thread.start()
				enq_threads.append(enq_thread)

		threads = tf.train.start_queue_runners(coord=coord, sess=sess)
		run_options = tf.RunOptions(timeout_in_ms=60000)

		# try: 
		for step in xrange(FLAGS.max_steps):
			if coord.should_stop():
				sess.run(model.FIFOQueue.close(cancel_pending_enqueues=True))
				coord.request_stop()
				coord.join(threads)
				break

			start_time = time.time()

			if step % FLAGS.summary_step == 0:
				feed_dict, image_per_batch, label_per_batch, bbox_per_batch = \
						_load_data(load_to_placeholder=False)
				op_list = [
						model.train_op, model.loss, summary_op, model.det_boxes,
						model.det_probs, model.det_class, model.conf_loss,
						model.bbox_loss, model.class_loss
				]
				_, loss_value, summary_str, det_boxes, det_probs, det_class, \
						conf_loss, bbox_loss, class_loss = sess.run(
								op_list, feed_dict=feed_dict)

				_viz_prediction_result(
						model, image_per_batch, bbox_per_batch, label_per_batch, det_boxes,
						det_class, det_probs)
				image_per_batch = bgr_to_rgb(image_per_batch)
				viz_summary = sess.run(
						model.viz_op, feed_dict={model.image_to_show: image_per_batch})

				summary_writer.add_summary(summary_str, step)
				summary_writer.add_summary(viz_summary, step)
				summary_writer.flush()

				print ('conf_loss: {}, bbox_loss: {}, class_loss: {}'.
						format(conf_loss, bbox_loss, class_loss))
			else:
				if mc.NUM_THREAD > 0:
					_, loss_value, conf_loss, bbox_loss, class_loss = sess.run(
							[model.train_op, model.loss, model.conf_loss, model.bbox_loss,
							 model.class_loss], options=run_options)
				else:
					feed_dict, _, _, _ = _load_data(load_to_placeholder=False)
					_, loss_value, conf_loss, bbox_loss, class_loss = sess.run(
							[model.train_op, model.loss, model.conf_loss, model.bbox_loss,
							 model.class_loss], feed_dict=feed_dict)

			duration = time.time() - start_time

			assert not np.isnan(loss_value), \
					'Model diverged. Total loss: {}, conf_loss: {}, bbox_loss: {}, ' \
					'class_loss: {}'.format(loss_value, conf_loss, bbox_loss, class_loss)

			if step % 10 == 0:
				num_images_per_step = mc.BATCH_SIZE
				images_per_sec = num_images_per_step / duration
				sec_per_batch = float(duration)
				format_str = ('%s: step %d, loss = %.2f (%.1f images/sec; %.3f '
											'sec/batch)')
				print (format_str % (datetime.now(), step, loss_value,
														 images_per_sec, sec_per_batch))
				sys.stdout.flush()

			# Save the model checkpoint periodically.
			if step % FLAGS.checkpoint_step == 0 or (step + 1) == FLAGS.max_steps:
				checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt')
				saver.save(sess, checkpoint_path, global_step=step)

コード例 #8

0

ファイルを表示

ファイル: main.py プロジェクト: codelvin/image-captioning-for-mortals

def trainencoder(
      sources = ("image_vects", "word_vects")
    , sources_k = ("image_vects_k", "word_vects_k")
    , batch_size=128
    , embedding_dim=300
    , n_captions=5
    , n_sbu=None
    , separate_emb=False
    , test_size=1000 # per dataset
    , mode='dev'
    ):
    if mode=="coco120k+flickr38k":
        XYsplit_cum = ([], [], [], [])
        xyloaders = [
              "cocoXYFilenames(dataType='train2014')"
            , "cocoXYFilenames(dataType='val2014')"
            , "flickrXYFilenames(dataType='8k')"
            , "flickrXYFilenames(dataType='30k')"
            ]
        ntrains = [80000, 40000, 8000, 30000]

        for xyloader, ntrain in zip(xyloaders, ntrains):
            X, Y, _ = eval(xyloader)
            XYsplit = train_test_split(X, Y, train_size=ntrain)
            for i in range(len(XYsplit)):
                XYsplit_cum[i].extend(XYsplit[i])

        trX, teX, trY, teY = XYsplit_cum
    else:
        trX, teX, trY, teY = coco(mode=mode, n_captions=n_captions, test_size=test_size)
        if n_sbu:
            sbutrX, sbuteX, sbutrY, sbuteY = sbu(mode=mode, test_size=test_size)
            pairs = (
                  (trX, sbutrX)
                , (teX, sbuteX)
                , (trY, sbutrY)
                , (teY, sbuteY)
                )

            for coco_data, sbu_data in pairs:
                if isinstance(coco_data, list):
                    coco_data.extend(sbu_data)

    print("n_train: %d" % len(trX))
    print("n_test: %d" % len(teX))

    # # # # # # # # # # #
    # Modeling Building #
    # # # # # # # # # # #

    s = Encoder(
          image_feature_dim=4096
        , embedding_dim=embedding_dim
        , biases_init=Constant(0.)
        , weights_init=Uniform(width=0.08)
        )
    s.initialize()

    image_vects = tensor.matrix(sources[0]) # named to match the source name
    word_vects = tensor.tensor3(sources[1]) # named to match the source name
    image_vects_k = tensor.matrix(sources_k[0]) # named to match the contrastive source name
    word_vects_k = tensor.tensor3(sources_k[1]) # named to match the contrastive source name

    # image_vects.tag.test_value = np.zeros((2, 4096), dtype='float32')
    # word_vects.tag.test_value = np.zeros((2, 15, 50), dtype='float32')
    # image_vects_k.tag.test_value = np.zeros((2, 4096), dtype='float32')
    # word_vects_k.tag.test_value = np.zeros((2, 15, 50), dtype='float32')

    # learned image embedding, learned sentence embedding
    lim, ls = s.apply(image_vects, word_vects)

    # learned constrastive im embedding, learned contrastive s embedding
    lcim, lcs = s.apply(image_vects_k, word_vects_k)

    # identical cost code thanks to Ryan Kiros
    # https://github.com/youralien/skip-thoughts/blob/master/eval_rank.py
    lim = l2norm(lim)
    lcim = l2norm(lcim)
    ls = l2norm(ls)
    lcs = l2norm(lcs)

    margin = 0.2 # alpha term should not be more than 1

    cost_im = margin - (lim * ls).sum(axis=1) + (lim * lcs).sum(axis=1)
    cost_im = cost_im * (cost_im > 0.) # this is like the max(0, pairwise-ranking-loss)
    cost_im = cost_im.sum(0)

    cost_s = margin - (ls * lim).sum(axis=1) + (ls * lcim).sum(axis=1)
    cost_s = cost_s * (cost_s > 0.) # this is like max(0, pairwise-ranking-loss)
    cost_s = cost_s.sum(0)

    cost = cost_im + cost_s
    cost.name = "pairwise_ranking_loss"

    # function(s) to produce embedding
    if separate_emb:
        img_encoder = theano.function([image_vects], lim)
        txt_encoder = theano.function([word_vects], ls)
    f_emb = theano.function([image_vects, word_vects], [lim, ls])

    if n_sbu:
        sbuname = "sbu%d+" % n_sbu
    else:
        sbuname = ''
    name = "%sproject1.%s.jointembedder" % (sbuname, mode)
    savename = MODEL_FILES_DIR + name

    def save_function(self):
        if separate_emb:
            ModelIO.save(
                  img_encoder
                , savename + "_Img")
            ModelIO.save(
                  txt_encoder
                , savename + "_Txt")
        ModelIO.save(f_emb, savename)
        print "Similarity Embedding function(s) saved while training"

    def rank_function(stream):
        images, captions, _0, _1 = stream.get_epoch_iterator().next()
        image_embs, caption_embs = f_emb(images, captions)
        ModelEval.ImageSentenceRanking(image_embs, caption_embs)

    def rank_coco(self=None):
        # Get 1000 images / captions to test rank
        stream = DataETL.getFinalStream(teX, teY, sources=sources,
                            sources_k=sources_k, batch_size=test_size,
                            shuffle=True)
        print "COCO test"
        rank_function(stream)

    def rank_sbu(self=None):
        stream = DataETL.getFinalStream(sbuteX, sbuteY, sources=sources,
                            sources_k=sources_k, batch_size=test_size,
                            shuffle=True)
        print "SBU test"
        rank_function(stream)

    def rank_em(self=None):
        rank_coco()
        if n_sbu:
            rank_sbu()

    cg = ComputationGraph(cost)

    # # # # # # # # # # #
    # Modeling Training #
    # # # # # # # # # # #

    algorithm = GradientDescent(
          cost=cost
        , parameters=cg.parameters
        , step_rule=Adam(learning_rate=0.0002)
        )
    main_loop = MainLoop(
          model=Model(cost)
        , data_stream=DataETL.getFinalStream(trX, trY, sources=sources,
              sources_k=sources_k, batch_size=batch_size)
        , algorithm=algorithm
        , extensions=[
              DataStreamMonitoring(
                  [cost]
                , DataETL.getFinalStream(trX, trY, sources=sources,
                      sources_k=sources_k, batch_size=batch_size, shuffle=True)
                , prefix='train')
            , DataStreamMonitoring(
                  [cost]
                , DataETL.getFinalStream(teX, teY, sources=sources,
                      sources_k=sources_k, batch_size=batch_size, shuffle=True)
                , prefix='test')
            , UserFunc(save_function, after_epoch=True)
            , UserFunc(rank_em, after_epoch=True)
            , Printing()
            , LogToFile('logs/%s.csv' % name)
            ]
        )
    main_loop.run()