def main(batch_size, output_size, learning_rate, whichGPU, is_finetuning, is_overfitting, pretrained_net): def handler(signum, frame): print 'Saving checkpoint before closing' pretrained_net = os.path.join(ckpt_dir, 'checkpoint-' + param_str) saver.save(sess, pretrained_net, global_step=step) print 'Checkpoint-', pretrained_net + '-' + str(step), ' saved!' sys.exit(0) signal.signal(signal.SIGINT, handler) ckpt_dir = './output/npairs/doctoring/ckpts' log_dir = './output/npairs/doctoring/logs' train_filename = './input/train_by_hotel.txt' mean_file = './input/meanIm.npy' img_size = [256, 256] crop_size = [224, 224] num_iters = 200000 summary_iters = 25 save_iters = 5000 featLayer = 'resnet_v2_50/logits' is_training = True batch_size = int(batch_size) output_size = int(output_size) learning_rate = float(learning_rate) whichGPU = str(whichGPU) if batch_size % 10 != 0: print 'Batch size must be divisible by 10!' sys.exit(0) # Create data "batcher" train_data = Npairs(train_filename, mean_file, img_size, crop_size, batch_size, isTraining=is_training) numHotels = len(train_data.hotels.keys()) numIms = np.sum( [len(train_data.hotels[h]['ims']) for h in train_data.hotels.keys()]) datestr = datetime.now().strftime("%Y_%m_%d_%H%M") param_str = datestr + '_lr' + str(learning_rate).replace( '.', 'pt') + '_outputSz' + str(output_size) logfile_path = os.path.join(log_dir, param_str + '_npairs_train.txt') train_log_file = open(logfile_path, 'a') print '------------' print '' print 'Going to train with the following parameters:' print 'Num hotels:', numHotels train_log_file.write('Num hotels: ' + str(numHotels) + '\n') print 'Num ims:', numIms train_log_file.write('Num ims: ' + str(numIms) + '\n') print 'Output size: ', output_size train_log_file.write('Output size: ' + str(output_size) + '\n') print 'Learning rate: ', learning_rate train_log_file.write('Learning rate: ' + str(learning_rate) + '\n') print 'Logging to: ', logfile_path train_log_file.write('Param_str: ' + param_str + '\n') train_log_file.write('----------------\n') print '' print '------------' # Queuing op loads data into input tensor repMeanIm = np.tile(np.expand_dims(train_data.meanImage, 0), [batch_size, 1, 1, 1]) # this is dumb, but in the non-doctored case we subtract off the mean in the batch generation. here we want to do it after the data augmentation image_batch_mean_subtracted = tf.placeholder( tf.float32, shape=[batch_size, crop_size[0], crop_size[0], 3]) image_batch = tf.add(image_batch_mean_subtracted, repMeanIm) label_batch = tf.placeholder(tf.int32, shape=[batch_size]) people_mask_batch = tf.placeholder( tf.float32, shape=[batch_size, crop_size[0], crop_size[0], 1]) # doctor image params percent_crop = .5 percent_people = .5 percent_rotate = .2 percent_filters = .4 percent_text = .1 # # richard's argument: since the data is randomly loaded, we don't need to change the indices that we perform operations on every time; i am on board with this, but had already implemented the random crops, so will leave that for now # # apply random rotations num_rotate = int(batch_size * percent_rotate) rotate_inds = np.random.choice(np.arange(0, batch_size), num_rotate, replace=False) rotate_vals = np.random.randint(-65, 65, num_rotate).astype('float32') / float(100) rotate_angles = np.zeros((batch_size)) rotate_angles[rotate_inds] = rotate_vals rotated_batch = tf.contrib.image.rotate(image_batch, rotate_angles, interpolation='BILINEAR') # do random crops num_to_crop = int(batch_size * percent_crop) num_to_not_crop = batch_size - num_to_crop shuffled_inds = tf.random_shuffle(np.arange(0, batch_size, dtype='int32')) # shuffled_inds = np.arange(0,batch_size,dtype='int32') # np.random.shuffle(shuffled_inds) crop_inds = tf.slice(shuffled_inds, [0], [num_to_crop]) uncropped_inds = tf.slice(shuffled_inds, [num_to_crop], [num_to_not_crop]) # crop_ratio = float(3)/float(5) # crop_yx = tf.random_uniform([num_to_crop,2], 0,1-crop_ratio, dtype=tf.float32, seed=0) # crop_sz = tf.add(crop_yx,np.tile([crop_ratio,crop_ratio],[num_to_crop, 1])) # crop_boxes = tf.concat([crop_yx,crop_sz],axis=1) # randomly select a crop between 3/5 of the image and the entire image crop_ratio = tf.random_uniform([num_to_crop, 1], float(3) / float(5), 1, dtype=tf.float32, seed=0) # randomly select a starting location between 0 and the max valid x position crop_yx = tf.random_uniform([1, 2], 0., 1. - crop_ratio, dtype=tf.float32, seed=0) crop_sz = tf.add(crop_yx, tf.concat([crop_ratio, crop_ratio], axis=1)) crop_boxes = tf.concat([crop_yx, crop_sz], axis=1) uncropped_boxes = np.tile([0, 0, 1, 1], [num_to_not_crop, 1]) all_inds = tf.concat([crop_inds, uncropped_inds], axis=0) all_boxes = tf.concat([crop_boxes, uncropped_boxes], axis=0) sorted_inds = tf.nn.top_k(-shuffled_inds, sorted=True, k=batch_size).indices cropped_batch = tf.gather( tf.image.crop_and_resize(rotated_batch, all_boxes, all_inds, crop_size), sorted_inds) # apply different filters flt_image = convert_image_dtype(cropped_batch, dtypes.float32) num_to_filter = int(batch_size * percent_filters) filter_inds = np.random.choice(np.arange(0, batch_size), num_to_filter, replace=False) filter_mask = np.zeros(batch_size) filter_mask[filter_inds] = 1 filter_mask = filter_mask.astype('float32') inv_filter_mask = np.ones(batch_size) inv_filter_mask[filter_inds] = 0 inv_filter_mask = inv_filter_mask.astype('float32') # hsv = gen_image_ops.rgb_to_hsv(flt_image) hue = array_ops.slice(hsv, [0, 0, 0, 0], [batch_size, -1, -1, 1]) saturation = array_ops.slice(hsv, [0, 0, 0, 1], [batch_size, -1, -1, 1]) value = array_ops.slice(hsv, [0, 0, 0, 2], [batch_size, -1, -1, 1]) # hue delta_vals = random_ops.random_uniform([batch_size], -.15, .15) hue_deltas = tf.multiply(filter_mask, delta_vals) hue_deltas2 = tf.expand_dims( tf.transpose( tf.tile(tf.reshape(hue_deltas, [1, 1, batch_size]), (crop_size[0], crop_size[1], 1)), (2, 0, 1)), 3) # hue = math_ops.mod(hue + (hue_deltas2 + 1.), 1.) hue_mod = tf.add(hue, hue_deltas2) hue = clip_ops.clip_by_value(hue_mod, 0.0, 1.0) # saturation saturation_factor = random_ops.random_uniform([batch_size], -.05, .05) saturation_factor2 = tf.multiply(filter_mask, saturation_factor) saturation_factor3 = tf.expand_dims( tf.transpose( tf.tile(tf.reshape(saturation_factor2, [1, 1, batch_size]), (crop_size[0], crop_size[1], 1)), (2, 0, 1)), 3) saturation_mod = tf.add(saturation, saturation_factor3) saturation = clip_ops.clip_by_value(saturation_mod, 0.0, 1.0) hsv_altered = array_ops.concat([hue, saturation, value], 3) rgb_altered = gen_image_ops.hsv_to_rgb(hsv_altered) # brightness brightness_factor = random_ops.random_uniform([batch_size], -.25, .25) brightness_factor2 = tf.multiply(filter_mask, brightness_factor) brightness_factor3 = tf.expand_dims( tf.transpose( tf.tile(tf.reshape(brightness_factor2, [1, 1, batch_size]), (crop_size[0], crop_size[1], 1)), (2, 0, 1)), 3) adjusted = math_ops.add(rgb_altered, math_ops.cast(brightness_factor3, dtypes.float32)) filtered_batch = clip_ops.clip_by_value(adjusted, 0.0, 255.0) # insert people masks num_people_masks = int(batch_size * percent_people) mask_inds = np.random.choice(np.arange(0, batch_size), num_people_masks, replace=False) start_masks = np.zeros([batch_size, crop_size[0], crop_size[0], 1], dtype='float32') start_masks[mask_inds, :, :, :] = 1 inv_start_masks = np.ones([batch_size, crop_size[0], crop_size[0], 1], dtype='float32') inv_start_masks[mask_inds, :, :, :] = 0 masked_masks = tf.add( inv_start_masks, tf.cast(tf.multiply(people_mask_batch, start_masks), dtype=tf.float32)) masked_masks2 = tf.cast(tf.tile(masked_masks, [1, 1, 1, 3]), dtype=tf.float32) masked_batch = tf.multiply(masked_masks, filtered_batch) noise = tf.random_normal(shape=[batch_size, crop_size[0], crop_size[0], 1], mean=0.0, stddev=0.0025, dtype=tf.float32) final_batch = tf.add(tf.subtract(masked_batch, repMeanIm), noise) print("Preparing network...") with slim.arg_scope(resnet_v2.resnet_arg_scope()): _, layers = resnet_v2.resnet_v2_50(final_batch, num_classes=output_size, is_training=True) variables_to_restore = [] for var in slim.get_model_variables(): excluded = False if is_finetuning.lower() == 'true' and var.op.name.startswith( 'resnet_v2_50/logits') or 'momentum' in var.op.name.lower(): excluded = True if not excluded: variables_to_restore.append(var) # numpy stuff for figuring out which elements are from the same class and which aren't anchor_inds = np.arange(0, batch_size, 2) pos_inds = np.arange(1, batch_size, 2) labels = tf.gather(label_batch, anchor_inds) all_feats = tf.squeeze(layers[featLayer]) anchor_feats = tf.gather(all_feats, anchor_inds) pos_feats = tf.gather(all_feats, pos_inds) loss = npairs_loss(labels, anchor_feats, pos_feats) # slightly counterintuitive to not define "init_op" first, but tf vars aren't known until added to graph update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): # train_op = tf.train.AdamOptimizer(learning_rate).minimize(loss) optimizer = tf.train.AdamOptimizer(learning_rate) train_op = slim.learning.create_train_op(loss, optimizer) summary_op = tf.summary.merge_all() init_op = tf.global_variables_initializer() # Create a saver for writing training checkpoints. saver = tf.train.Saver(max_to_keep=2000) # tf will consume any GPU it finds on the system. Following lines restrict it to specific gpus c = tf.ConfigProto() c.gpu_options.visible_device_list = whichGPU print("Starting session...") sess = tf.Session(config=c) sess.run(init_op) writer = tf.summary.FileWriter(log_dir, sess.graph) if pretrained_net.lower() != 'none': restore_fn = slim.assign_from_checkpoint_fn(pretrained_net, variables_to_restore) restore_fn(sess) print("Start training...") ctr = 0 for step in range(num_iters): start_time = time.time() batch, hotels, ims = train_data.getBatch() people_masks = train_data.getPeopleMasks() batch_time = time.time() - start_time start_time = time.time() _, fb, loss_val = sess.run( [train_op, masked_batch, loss], feed_dict={ image_batch_mean_subtracted: batch, label_batch: hotels, people_mask_batch: people_masks }) end_time = time.time() duration = end_time - start_time out_str = 'Step %d: loss = %.6f (batch creation: %.3f | training: %.3f sec)' % ( step, loss_val, batch_time, duration) # print(out_str) if step == 0: np.save( os.path.join(log_dir, 'checkpoint-' + param_str + '_example_batch.npy'), fb) if step % summary_iters == 0 or is_overfitting.lower() == 'true': print(out_str) train_log_file.write(out_str + '\n') # Update the events file. # summary_str = sess.run(summary_op) # writer.add_summary(summary_str, step) # writer.flush() # # Save a checkpoint if (step + 1) % save_iters == 0: print('Saving checkpoint at iteration: %d' % (step)) pretrained_net = os.path.join(ckpt_dir, 'checkpoint-' + param_str) saver.save(sess, pretrained_net, global_step=step) print 'checkpoint-', pretrained_net + '-' + str(step), ' saved!' if (step + 1) == num_iters: print('Saving final') pretrained_net = os.path.join(ckpt_dir, 'final-' + param_str) saver.save(sess, pretrained_net, global_step=step) print 'final-', pretrained_net + '-' + str(step), ' saved!' sess.close() train_log_file.close()
def main(fraction_same_chain,batch_size,output_size,learning_rate,whichGPU,is_finetuning,is_overfitting,pretrained_net): def handler(signum, frame): print 'Saving checkpoint before closing' pretrained_net = os.path.join(ckpt_dir, 'checkpoint-'+param_str) saver.save(sess, pretrained_net, global_step=step) print 'Checkpoint-',pretrained_net+'-'+str(step), ' saved!' sys.exit(0) signal.signal(signal.SIGINT, handler) ckpt_dir = './output/sameChain/npairs/no_doctoring/ckpts' log_dir = './output/sameChain/npairs/no_doctoring/logs' train_filename = './input/train_by_hotel.txt' jsonTrainData = json.load(open('./input/train_set.json')) cls_to_chain = {} for hotel in jsonTrainData.keys(): if jsonTrainData[hotel]['chainId'] != -1 and jsonTrainData[hotel]['chainId'] != 23 and jsonTrainData[hotel]['chainId'] != 25: #-1 is unknown chain, 23 is "Prince Hotel", 25 is "*w Hotel" cls_to_chain[int(hotel)] = jsonTrainData[hotel]['chainId'] mean_file = './input/meanIm.npy' img_size = [256, 256] crop_size = [224, 224] num_iters = 200000 summary_iters = 25 save_iters = 5000 featLayer = 'resnet_v2_50/logits' is_training = True batch_size = int(batch_size) output_size = int(output_size) learning_rate = float(learning_rate) whichGPU = str(whichGPU) fraction_same_chain = float(fraction_same_chain) if batch_size%10 != 0: print 'Batch size must be divisible by 10!' sys.exit(0) # Create data "batcher" train_data = SameChainNpairs(train_filename, cls_to_chain, mean_file, img_size, crop_size, batch_size, isTraining=is_training,fractionSameChain=fraction_same_chain) if is_overfitting.lower() == 'true': min_count = int(float(batch_size)*fraction_same_chain) good_chains1 = [c for c in train_data.chains.keys() if len(train_data.chains[c].keys()) > min_count] good_chains = np.random.choice(good_chains1,3,replace=False) for chain in train_data.chains.keys(): if not chain in good_chains: train_data.chains.pop(chain) else: good_hotels = train_data.chains[chain].keys()[:min_count] for hotel in train_data.chains[chain].keys(): if not hotel in good_hotels: train_data.chains[chain].pop(hotel) numChains = len(train_data.chains.keys()) numHotels = np.sum([len(train_data.chains[c].keys()) for c in train_data.chains.keys()]) numIms = np.sum([len(train_data.chains[c][h]['ims']) for c in train_data.chains.keys() for h in train_data.chains[c].keys()]) datestr = datetime.now().strftime("%Y_%m_%d_%H%M") param_str = datestr+'_fracSameChain'+str(fraction_same_chain).replace('.','pt')+'_lr'+str(learning_rate).replace('.','pt')+'_outputSz'+str(output_size) logfile_path = os.path.join(log_dir,param_str+'_npairs_train.txt') train_log_file = open(logfile_path,'a') print '------------' print '' print 'Going to train with the following parameters:' print 'Num chains:', numChains train_log_file.write('Num chains: '+str(numChains)+'\n') print 'Num hotels:', numHotels train_log_file.write('Num hotels: '+str(numHotels)+'\n') print 'Num ims:', numIms train_log_file.write('Num ims: '+str(numIms)+'\n') print 'Output size: ', output_size train_log_file.write('Output size: '+str(output_size)+'\n') print 'Learning rate: ',learning_rate train_log_file.write('Learning rate: '+str(learning_rate)+'\n') print 'Logging to: ',logfile_path train_log_file.write('Param_str: '+param_str+'\n') train_log_file.write('----------------\n') print '' print '------------' # Queuing op loads data into input tensor image_batch = tf.placeholder(tf.float32, shape=[batch_size, crop_size[0], crop_size[0], 3]) label_batch = tf.placeholder(tf.int32, shape=[batch_size]) noise = tf.random_normal(shape=[batch_size, crop_size[0], crop_size[0], 1], mean=0.0, stddev=0.0025, dtype=tf.float32) final_batch = tf.add(image_batch,noise) print("Preparing network...") with slim.arg_scope(resnet_v2.resnet_arg_scope()): _, layers = resnet_v2.resnet_v2_50(final_batch, num_classes=output_size, is_training=True) variables_to_restore = [] for var in slim.get_model_variables(): excluded = False if is_finetuning.lower() == 'true' and var.op.name.startswith('resnet_v2_50/logits') or 'momentum' in var.op.name.lower(): excluded = True if not excluded: variables_to_restore.append(var) # numpy stuff for figuring out which elements are from the same class and which aren't anchor_inds = np.arange(0,batch_size,2) pos_inds = np.arange(1,batch_size,2) labels = tf.gather(label_batch,anchor_inds) all_feats = tf.squeeze(layers[featLayer]) anchor_feats = tf.gather(all_feats,anchor_inds) pos_feats = tf.gather(all_feats,pos_inds) loss = npairs_loss(labels,anchor_feats,pos_feats) # slightly counterintuitive to not define "init_op" first, but tf vars aren't known until added to graph update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): # train_op = tf.train.AdamOptimizer(learning_rate).minimize(loss) optimizer = tf.train.AdamOptimizer(learning_rate) train_op = slim.learning.create_train_op(loss, optimizer) summary_op = tf.summary.merge_all() init_op = tf.global_variables_initializer() # Create a saver for writing training checkpoints. saver = tf.train.Saver(max_to_keep=2000) # tf will consume any GPU it finds on the system. Following lines restrict it to specific gpus c = tf.ConfigProto() c.gpu_options.visible_device_list=whichGPU print("Starting session...") sess = tf.Session(config=c) sess.run(init_op) writer = tf.summary.FileWriter(log_dir, sess.graph) if pretrained_net.lower() != 'none': restore_fn = slim.assign_from_checkpoint_fn(pretrained_net,variables_to_restore) restore_fn(sess) print("Start training...") ctr = 0 for step in range(num_iters): start_time = time.time() batch, hotels, chains, ims = train_data.getBatch() batch_time = time.time() - start_time start_time = time.time() _, fb, loss_val = sess.run([train_op, final_batch, loss], feed_dict={image_batch: batch,label_batch:hotels}) if step == 0: np.save(os.path.join(log_dir,'checkpoint-'+param_str+'_example_batch.npy'),fb) end_time = time.time() duration = end_time-start_time out_str = 'Step %d: loss = %.6f (batch creation: %.3f | training: %.3f sec)' % (step, loss_val, batch_time,duration) # print(out_str) if step % summary_iters == 0 or is_overfitting.lower()=='true': print(out_str) train_log_file.write(out_str+'\n') # Update the events file. # summary_str = sess.run(summary_op) # writer.add_summary(summary_str, step) # writer.flush() # # Save a checkpoint if (step + 1) % save_iters == 0: print('Saving checkpoint at iteration: %d' % (step)) pretrained_net = os.path.join(ckpt_dir, 'checkpoint-'+param_str) saver.save(sess, pretrained_net, global_step=step) print 'checkpoint-',pretrained_net+'-'+str(step), ' saved!' if (step + 1) == num_iters: print('Saving final') pretrained_net = os.path.join(ckpt_dir, 'final-'+param_str) saver.save(sess, pretrained_net, global_step=step) print 'final-',pretrained_net+'-'+str(step), ' saved!' sess.close() train_log_file.close()