def prepVect(min_df=2, max_features=50000, n_captions=5, n_sbu=None, multilabel=False): print "prepping the Word Tokenizer..." _0, _1, trY, _3 = coco(mode='full', n_captions=n_captions) if n_sbu: _4, sbuY, _5 = sbuXYFilenames(n_sbu) trY.extend(sbuY) vect = Tokenizer(min_df=min_df, max_features=max_features) captions = sampleCaptions(trY, n_captions) vect.fit(captions) if multilabel: mlb = MultiLabelBinarizer() mlb.fit(vect.transform(captions)) return vect, mlb # if not multilabel: return vect
def traindecoder( sources = ("image_vects", "word_vects") , sources_k = ("image_vects_k", "word_vects_k") , batch_size=128 , embedding_dim=300 , n_captions=5 ): # data should not be shuffled, as there is semantics in their placement trX, teX, trY, teY = coco(mode="dev", batch_size=batch_size, n_captions=n_captions) # # # # # # # # # # # # Modeling Building # # # # # # # # # # # # stream = DataETL.getFinalStream(trX, trY, sources=sources, sources_k=sources_k, batch_size=batch_size) batch = stream.get_epoch_iterator().next() f_emb = ModelIO.load('/home/luke/datasets/coco/predict/fullencoder_maxfeatures.50000') import ipdb ipdb.set_trace()
sys.path.insert(0, '../../python') import planner as pln import hardware as hw import dataset import models import torch.nn import torch import time simd_cfg_path = '../../hwcfg/simd.json' hw_spec = hw.HardwareSpec(simd_cfg_path) data = dataset.coco() yolov2 = models.yolov2() pnn = pln.Planner() start_time = time.time() conv_5_5 = None conv_6_7 = None conv_7 = None for name, module in yolov2.named_modules(): if isinstance(module, torch.nn.Sequential): continue if name == 'conv_7': data = conv_5_5 elif name == 'conv_8':
def trainend2end( sources = ("image_vects", "word_tokens") , batch_size=128 , embedding_dim=300 , n_captions=5 , mode='sample' , n_sbu=None , recurrent_unit='lstm' ): """Train a full end to end system, w/out the encoder/decoder model. Like the google paper, "Show and Tell: Image Caption Generation" Like how we did it with MNIST """ # data should not be shuffled, as there is semantics in their placement trX, teX, trY, teY = coco(mode="full", batch_size=batch_size, n_captions=n_captions) # add SBU if n_sbu: sbuX, sbuY, _ = sbuXYFilenames(n_sbu) trX.extend(sbuX) trY.extend(sbuY) image_vects = T.matrix(sources[0]) word_tokens = T.lmatrix(sources[1]) image_vects.tag.test_value = np.zeros((2, 4096), dtype='float32') word_tokens.tag.test_value = np.zeros((2, 15), dtype='int64') from modelbuilding import ShowAndTell show_and_tell = ShowAndTell( image_dim=4096 , dim=embedding_dim , dictionary_size=vect.n_features , max_sequence_length=30 # , lookup_file='glove_lookup_53454.npy' # gloveglove , recurrent_unit=recurrent_unit , norm=True , biases_init=Constant(0.) , weights_init=IsotropicGaussian(0.02) ) show_and_tell.initialize() cost = show_and_tell.cost(image_vects, word_tokens) cost.name = "seq_log_likelihood" cg = ComputationGraph(cost) name = "sbu+coco_NIC_%s_dim.%s" % (show_and_tell.recurrent_unit, embedding_dim) savename = '/home/luke/datasets/coco/predict/%s' % name def save_f_gen(self): generated = show_and_tell.generate(image_vects) f_gen = ComputationGraph(generated).get_theano_function() ModelIO.save(f_gen, savename) print "Generation function saved while training" model = Model(cost) algorithm = GradientDescent( cost=cost , parameters=cg.parameters , step_rule=Adam(learning_rate=0.0002) ) main_loop = MainLoop( model=model , data_stream=DataETL.getTokenizedStream(trX, trY, sources=sources, batch_size=batch_size) , algorithm=algorithm , extensions=[ DataStreamMonitoring( [cost] , DataETL.getTokenizedStream(trX, trY, sources=sources, batch_size=batch_size) , prefix='train') , DataStreamMonitoring( [cost] , DataETL.getTokenizedStream(teX, teY, sources=sources, batch_size=batch_size) , prefix='test') , Printing() , UserFunc(save_f_gen, after_epoch=True) , FinishIfNoImprovementAfter(notification_name="test_seq_log_likelihood", iterations=1000) ] ) main_loop.run() # Training finished; save the generator function w/ learned params generated = show_and_tell.generate(image_vects) # Beam Search if mode == "beam": samples, = VariableFilter( applications=[show_and_tell.generator.generate], name="outputs")( ComputationGraph(generated)) # generated[1] is next_outputs beam_search = BeamSearch(samples) try: path = '/home/luke/datasets/coco/predict/' filename = 'end2end_beam_maxseqlen.30_embeddingdim.300' ModelIO.save(beam_search, '%s%s' % (path, filename)) print "It saved! Thanks pickle!" except Exception, e: print "F**k pickle and move on with your life :)" print e ModelEval.beamsearch(beam_search)
def trainencoder( sources = ("image_vects", "word_vects") , sources_k = ("image_vects_k", "word_vects_k") , batch_size=128 , embedding_dim=300 , n_captions=5 , n_sbu=None ): # data should not be shuffled, as there is semantics in their placement trX, teX, trY, teY = coco(mode='full', batch_size=batch_size, n_captions=n_captions) # add SBU if n_sbu: sbuX, sbuY, _ = sbuXYFilenames(n_sbu) trX.extend(sbuX) trY.extend(sbuY) # # # # # # # # # # # # Modeling Building # # # # # # # # # # # # s = Encoder( image_feature_dim=4096 , embedding_dim=embedding_dim , biases_init=Constant(0.) , weights_init=Uniform(width=0.08) ) s.initialize() image_vects = T.matrix(sources[0]) # named to match the source name word_vects = T.tensor3(sources[1]) # named to match the source name image_vects_k = T.matrix(sources_k[0]) # named to match the contrastive source name word_vects_k = T.tensor3(sources_k[1]) # named to match the contrastive source name # image_vects.tag.test_value = np.zeros((2, 4096), dtype='float32') # word_vects.tag.test_value = np.zeros((2, 15, 50), dtype='float32') # image_vects_k.tag.test_value = np.zeros((2, 4096), dtype='float32') # word_vects_k.tag.test_value = np.zeros((2, 15, 50), dtype='float32') # learned image embedding, learned sentence embedding lim, ls = s.apply(image_vects, word_vects) # learned constrastive im embedding, learned contrastive s embedding lcim, lcs = s.apply(image_vects_k, word_vects_k) # l2norms lim = l2norm(lim) lcim = l2norm(lcim) ls = l2norm(ls) lcs = l2norm(lcs) margin = 0.2 # alpha term, should not be more than 1! # pairwise ranking loss (https://github.com/youralien/skip-thoughts/blob/master/eval_rank.py) cost_im = margin - (lim * ls).sum(axis=1) + (lim * lcs).sum(axis=1) cost_im = cost_im * (cost_im > 0.) # this is like the max(0, pairwise-ranking-loss) cost_im = cost_im.sum(0) cost_s = margin - (ls * lim).sum(axis=1) + (ls * lcim).sum(axis=1) cost_s = cost_s * (cost_s > 0.) # this is like max(0, pairwise-ranking-loss) cost_s = cost_s.sum(0) cost = cost_im + cost_s cost.name = "pairwise_ranking_loss" # function to produce embedding f_emb = theano.function([image_vects, word_vects], [lim, ls]) if n_sbu: sbuname = "sbu.%d" % n_sbu else: sbuname = '' name = "%s+coco_encoder_lstm_dim.%s_adadelta" % (sbuname, embedding_dim) savename = '/home/luke/datasets/coco/predict/%s' % name def save_function(self): ModelIO.save(f_emb, savename) print "Similarity Embedding function saved while training" def rank_function(self): # Get 1000 images / captions to test rank stream = DataETL.getFinalStream(teX, teY, sources=sources, sources_k=sources_k, batch_size=1000, shuffle=True) images, captions, _0, _1 = stream.get_epoch_iterator().next() image_embs, caption_embs = f_emb(images, captions) ModelEval.ImageSentenceRanking(image_embs, caption_embs) cg = ComputationGraph(cost) # # # # # # # # # # # # Modeling Training # # # # # # # # # # # # algorithm = GradientDescent( cost=cost , parameters=cg.parameters # , step_rule=Adam(learning_rate=0.0002) , step_rule=AdaDelta() ) main_loop = MainLoop( model=Model(cost) , data_stream=DataETL.getFinalStream(trX, trY, sources=sources, sources_k=sources_k, batch_size=batch_size) , algorithm=algorithm , extensions=[ DataStreamMonitoring( [cost] , DataETL.getFinalStream(trX, trY, sources=sources, sources_k=sources_k, batch_size=batch_size) , prefix='train') , DataStreamMonitoring( [cost] , DataETL.getFinalStream(teX, teY, sources=sources, sources_k=sources_k, batch_size=batch_size) , prefix='test') , UserFunc(save_function, after_epoch=True) , UserFunc(rank_function, after_epoch=True) , Printing() , FinishIfNoImprovementAfter(notification_name="test_pairwise_ranking_loss", iterations=500) ] ) main_loop.run() # ModelIO.save(f_emb, '/home/luke/datasets/coco/predict/fullencoder_maxfeatures.50000_epochsampler') ModelIO.save(f_emb, savename)
def train(): """Train SqueezeDet model""" os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu with tf.Graph().as_default(): assert FLAGS.net == 'squeezeDet' or FLAGS.net == 'squeezeDet+' or FLAGS.net == 'squeezeDetSmall', \ 'Selected neural net architecture not supported: {}'.format(FLAGS.net) if FLAGS.dataset == 'COCO': mc = coco_config() print("COCO") elif FLAGS.dataset == 'KITTI': mc = kitti_squeezeDet_config() print("KITTI") elif FLAGS.dataset == 'BALL': mc = ball_config() print("BALL") if FLAGS.net == 'squeezeDet': mc.IS_TRAINING = True mc.PRETRAINED_MODEL_PATH = FLAGS.pretrained_model_path model = SqueezeDet(mc) elif FLAGS.net == 'squeezeDet+': mc.IS_TRAINING = True mc.PRETRAINED_MODEL_PATH = FLAGS.pretrained_model_path model = SqueezeDetPlus(mc) elif FLAGS.net == 'squeezeDetSmall': mc.IS_TRAINING = True mc.PRETRAINED_MODEL_PATH = FLAGS.pretrained_model_path model = SqueezeDetSmall(mc) if FLAGS.dataset == 'COCO': imdb = coco(FLAGS.image_set, FLAGS.data_path, mc) elif FLAGS.dataset == 'KITTI': imdb = kitti(FLAGS.image_set, FLAGS.data_path, mc) elif FLAGS.dataset == 'BALL': imdb= ball(FLAGS.image_set, FLAGS.data_path, mc) # save model size, flops, activations by layers with open(os.path.join(FLAGS.train_dir, 'model_metrics.txt'), 'w') as f: f.write('Number of parameter by layer:\n') count = 0 for c in model.model_size_counter: f.write('\t{}: {}\n'.format(c[0], c[1])) count += c[1] f.write('\ttotal: {}\n'.format(count)) count = 0 f.write('\nActivation size by layer:\n') for c in model.activation_counter: f.write('\t{}: {}\n'.format(c[0], c[1])) count += c[1] f.write('\ttotal: {}\n'.format(count)) count = 0 f.write('\nNumber of flops by layer:\n') for c in model.flop_counter: f.write('\t{}: {}\n'.format(c[0], c[1])) count += c[1] f.write('\ttotal: {}\n'.format(count)) f.close() print ('Model statistics saved to {}.'.format( os.path.join(FLAGS.train_dir, 'model_metrics.txt'))) def _load_data(load_to_placeholder=True): # read batch input image_per_batch, label_per_batch, box_delta_per_batch, aidx_per_batch, \ bbox_per_batch = imdb.read_batch() label_indices, bbox_indices, box_delta_values, mask_indices, box_values, \ = [], [], [], [], [] aidx_set = set() num_discarded_labels = 0 num_labels = 0 for i in range(len(label_per_batch)): # batch_size for j in range(len(label_per_batch[i])): # number of annotations num_labels += 1 if (i, aidx_per_batch[i][j]) not in aidx_set: aidx_set.add((i, aidx_per_batch[i][j])) label_indices.append( [i, aidx_per_batch[i][j], label_per_batch[i][j]]) mask_indices.append([i, aidx_per_batch[i][j]]) bbox_indices.extend( [[i, aidx_per_batch[i][j], k] for k in range(4)]) box_delta_values.extend(box_delta_per_batch[i][j]) box_values.extend(bbox_per_batch[i][j]) else: num_discarded_labels += 1 if mc.DEBUG_MODE: print ('Warning: Discarded {}/({}) labels that are assigned to the same ' 'anchor'.format(num_discarded_labels, num_labels)) if load_to_placeholder: image_input = model.ph_image_input input_mask = model.ph_input_mask box_delta_input = model.ph_box_delta_input box_input = model.ph_box_input labels = model.ph_labels else: image_input = model.image_input input_mask = model.input_mask box_delta_input = model.box_delta_input box_input = model.box_input labels = model.labels feed_dict = { image_input: image_per_batch, input_mask: np.reshape( sparse_to_dense( mask_indices, [mc.BATCH_SIZE, mc.ANCHORS], [1.0]*len(mask_indices)), [mc.BATCH_SIZE, mc.ANCHORS, 1]), box_delta_input: sparse_to_dense( bbox_indices, [mc.BATCH_SIZE, mc.ANCHORS, 4], box_delta_values), box_input: sparse_to_dense( bbox_indices, [mc.BATCH_SIZE, mc.ANCHORS, 4], box_values), labels: sparse_to_dense( label_indices, [mc.BATCH_SIZE, mc.ANCHORS, mc.CLASSES], [1.0]*len(label_indices)), } return feed_dict, image_per_batch, label_per_batch, bbox_per_batch def _enqueue(sess, coord): try: while not coord.should_stop(): feed_dict, _, _, _ = _load_data() sess.run(model.enqueue_op, feed_dict=feed_dict) if mc.DEBUG_MODE: print ("added to the queue") if mc.DEBUG_MODE: print ("Finished enqueue") except Exception as e: coord.request_stop(e) sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) saver = tf.train.Saver(tf.global_variables()) summary_op = tf.summary.merge_all() ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) summary_writer = tf.summary.FileWriter(FLAGS.train_dir, sess.graph) init = tf.global_variables_initializer() sess.run(init) coord = tf.train.Coordinator() if mc.NUM_THREAD > 0: enq_threads = [] for _ in range(mc.NUM_THREAD): enq_thread = threading.Thread(target=_enqueue, args=[sess, coord]) # enq_thread.isDaemon() enq_thread.start() enq_threads.append(enq_thread) threads = tf.train.start_queue_runners(coord=coord, sess=sess) run_options = tf.RunOptions(timeout_in_ms=60000) # try: for step in xrange(FLAGS.max_steps): if coord.should_stop(): sess.run(model.FIFOQueue.close(cancel_pending_enqueues=True)) coord.request_stop() coord.join(threads) break start_time = time.time() if step % FLAGS.summary_step == 0: feed_dict, image_per_batch, label_per_batch, bbox_per_batch = \ _load_data(load_to_placeholder=False) op_list = [ model.train_op, model.loss, summary_op, model.det_boxes, model.det_probs, model.det_class, model.conf_loss, model.bbox_loss, model.class_loss ] _, loss_value, summary_str, det_boxes, det_probs, det_class, \ conf_loss, bbox_loss, class_loss = sess.run( op_list, feed_dict=feed_dict) _viz_prediction_result( model, image_per_batch, bbox_per_batch, label_per_batch, det_boxes, det_class, det_probs) image_per_batch = bgr_to_rgb(image_per_batch) viz_summary = sess.run( model.viz_op, feed_dict={model.image_to_show: image_per_batch}) summary_writer.add_summary(summary_str, step) summary_writer.add_summary(viz_summary, step) summary_writer.flush() print ('conf_loss: {}, bbox_loss: {}, class_loss: {}'. format(conf_loss, bbox_loss, class_loss)) else: if mc.NUM_THREAD > 0: _, loss_value, conf_loss, bbox_loss, class_loss = sess.run( [model.train_op, model.loss, model.conf_loss, model.bbox_loss, model.class_loss], options=run_options) else: feed_dict, _, _, _ = _load_data(load_to_placeholder=False) _, loss_value, conf_loss, bbox_loss, class_loss = sess.run( [model.train_op, model.loss, model.conf_loss, model.bbox_loss, model.class_loss], feed_dict=feed_dict) duration = time.time() - start_time assert not np.isnan(loss_value), \ 'Model diverged. Total loss: {}, conf_loss: {}, bbox_loss: {}, ' \ 'class_loss: {}'.format(loss_value, conf_loss, bbox_loss, class_loss) if step % 10 == 0: num_images_per_step = mc.BATCH_SIZE images_per_sec = num_images_per_step / duration sec_per_batch = float(duration) format_str = ('%s: step %d, loss = %.2f (%.1f images/sec; %.3f ' 'sec/batch)') print (format_str % (datetime.now(), step, loss_value, images_per_sec, sec_per_batch)) sys.stdout.flush() # Save the model checkpoint periodically. if step % FLAGS.checkpoint_step == 0 or (step + 1) == FLAGS.max_steps: checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step)
def trainencoder( sources = ("image_vects", "word_vects") , sources_k = ("image_vects_k", "word_vects_k") , batch_size=128 , embedding_dim=300 , n_captions=5 , n_sbu=None , separate_emb=False , test_size=1000 # per dataset , mode='dev' ): if mode=="coco120k+flickr38k": XYsplit_cum = ([], [], [], []) xyloaders = [ "cocoXYFilenames(dataType='train2014')" , "cocoXYFilenames(dataType='val2014')" , "flickrXYFilenames(dataType='8k')" , "flickrXYFilenames(dataType='30k')" ] ntrains = [80000, 40000, 8000, 30000] for xyloader, ntrain in zip(xyloaders, ntrains): X, Y, _ = eval(xyloader) XYsplit = train_test_split(X, Y, train_size=ntrain) for i in range(len(XYsplit)): XYsplit_cum[i].extend(XYsplit[i]) trX, teX, trY, teY = XYsplit_cum else: trX, teX, trY, teY = coco(mode=mode, n_captions=n_captions, test_size=test_size) if n_sbu: sbutrX, sbuteX, sbutrY, sbuteY = sbu(mode=mode, test_size=test_size) pairs = ( (trX, sbutrX) , (teX, sbuteX) , (trY, sbutrY) , (teY, sbuteY) ) for coco_data, sbu_data in pairs: if isinstance(coco_data, list): coco_data.extend(sbu_data) print("n_train: %d" % len(trX)) print("n_test: %d" % len(teX)) # # # # # # # # # # # # Modeling Building # # # # # # # # # # # # s = Encoder( image_feature_dim=4096 , embedding_dim=embedding_dim , biases_init=Constant(0.) , weights_init=Uniform(width=0.08) ) s.initialize() image_vects = tensor.matrix(sources[0]) # named to match the source name word_vects = tensor.tensor3(sources[1]) # named to match the source name image_vects_k = tensor.matrix(sources_k[0]) # named to match the contrastive source name word_vects_k = tensor.tensor3(sources_k[1]) # named to match the contrastive source name # image_vects.tag.test_value = np.zeros((2, 4096), dtype='float32') # word_vects.tag.test_value = np.zeros((2, 15, 50), dtype='float32') # image_vects_k.tag.test_value = np.zeros((2, 4096), dtype='float32') # word_vects_k.tag.test_value = np.zeros((2, 15, 50), dtype='float32') # learned image embedding, learned sentence embedding lim, ls = s.apply(image_vects, word_vects) # learned constrastive im embedding, learned contrastive s embedding lcim, lcs = s.apply(image_vects_k, word_vects_k) # identical cost code thanks to Ryan Kiros # https://github.com/youralien/skip-thoughts/blob/master/eval_rank.py lim = l2norm(lim) lcim = l2norm(lcim) ls = l2norm(ls) lcs = l2norm(lcs) margin = 0.2 # alpha term should not be more than 1 cost_im = margin - (lim * ls).sum(axis=1) + (lim * lcs).sum(axis=1) cost_im = cost_im * (cost_im > 0.) # this is like the max(0, pairwise-ranking-loss) cost_im = cost_im.sum(0) cost_s = margin - (ls * lim).sum(axis=1) + (ls * lcim).sum(axis=1) cost_s = cost_s * (cost_s > 0.) # this is like max(0, pairwise-ranking-loss) cost_s = cost_s.sum(0) cost = cost_im + cost_s cost.name = "pairwise_ranking_loss" # function(s) to produce embedding if separate_emb: img_encoder = theano.function([image_vects], lim) txt_encoder = theano.function([word_vects], ls) f_emb = theano.function([image_vects, word_vects], [lim, ls]) if n_sbu: sbuname = "sbu%d+" % n_sbu else: sbuname = '' name = "%sproject1.%s.jointembedder" % (sbuname, mode) savename = MODEL_FILES_DIR + name def save_function(self): if separate_emb: ModelIO.save( img_encoder , savename + "_Img") ModelIO.save( txt_encoder , savename + "_Txt") ModelIO.save(f_emb, savename) print "Similarity Embedding function(s) saved while training" def rank_function(stream): images, captions, _0, _1 = stream.get_epoch_iterator().next() image_embs, caption_embs = f_emb(images, captions) ModelEval.ImageSentenceRanking(image_embs, caption_embs) def rank_coco(self=None): # Get 1000 images / captions to test rank stream = DataETL.getFinalStream(teX, teY, sources=sources, sources_k=sources_k, batch_size=test_size, shuffle=True) print "COCO test" rank_function(stream) def rank_sbu(self=None): stream = DataETL.getFinalStream(sbuteX, sbuteY, sources=sources, sources_k=sources_k, batch_size=test_size, shuffle=True) print "SBU test" rank_function(stream) def rank_em(self=None): rank_coco() if n_sbu: rank_sbu() cg = ComputationGraph(cost) # # # # # # # # # # # # Modeling Training # # # # # # # # # # # # algorithm = GradientDescent( cost=cost , parameters=cg.parameters , step_rule=Adam(learning_rate=0.0002) ) main_loop = MainLoop( model=Model(cost) , data_stream=DataETL.getFinalStream(trX, trY, sources=sources, sources_k=sources_k, batch_size=batch_size) , algorithm=algorithm , extensions=[ DataStreamMonitoring( [cost] , DataETL.getFinalStream(trX, trY, sources=sources, sources_k=sources_k, batch_size=batch_size, shuffle=True) , prefix='train') , DataStreamMonitoring( [cost] , DataETL.getFinalStream(teX, teY, sources=sources, sources_k=sources_k, batch_size=batch_size, shuffle=True) , prefix='test') , UserFunc(save_function, after_epoch=True) , UserFunc(rank_em, after_epoch=True) , Printing() , LogToFile('logs/%s.csv' % name) ] ) main_loop.run()