def write_simple_segmentation_model_errors(mode, *args, **kwargs): cvid = int(mode[-1]) names, _, zones_all = body_zone_segmentation.get_body_zones('all') hmaps_all = threat_segmentation_models.get_all_multitask_cnn_predictions('all') idx = get_train_idx('all', cvid) if mode.startswith('train') else get_valid_idx('all', cvid) predict = train_simple_segmentation_model(*args, **kwargs) labels = get_train_labels() errors = [] total_loss = 0 for i, pred in zip(idx, predict(zones_all, hmaps_all, idx)): name = names[i] label = np.array(labels[name]) loss = log_loss(pred, label) for i in range(17): errors.append((loss[i], '%s_Zone%s' % (name, i+1), pred[i], label[i])) total_loss += np.mean(loss) / len(idx) errors.sort(reverse=True) with open('errors.txt', 'w') as f: lines = ['total loss: %s' % total_loss] lines += ['%.3f_%s_%.3f_%.3f' % i for i in errors] f.write('\n'.join(lines))
def train_multitask_fcn(mode, cvid, lid, duration, learning_rate=1e-5, num_layers=5, downsize=2): height, width = 660//downsize, 512//downsize def get_hmap(input_tensor, name): layer_idxs = [4, 37, 79, 141, 173] base_model = keras.applications.ResNet50(include_top=False, weights='imagenet', input_tensor=input_tensor, input_shape=(height, width, 3)) for layer in base_model.layers: layer.name = '%s_%s' % (name, layer.name) def resize_bilinear(images): return tf.image.resize_bilinear(images, [height, width]) hmaps = [] for i in layer_idxs[-num_layers:]: output = base_model.layers[i].output hmap = keras.layers.Convolution2D(1, (1, 1))(output) hmap = keras.layers.Lambda(resize_bilinear)(hmap) hmaps.append(hmap) merged = keras.layers.Add()(hmaps) return merged aps_input = keras.layers.Input(shape=(height, width, 3)) a3daps_input = keras.layers.Input(shape=(height, width, 3)) logits = keras.layers.Add()([get_hmap(aps_input, 'aps'), get_hmap(a3daps_input, 'a3daps')]) preds = keras.layers.Activation('sigmoid')(logits) model = keras.models.Model(inputs=[aps_input, a3daps_input], outputs=preds) model.compile(optimizer=keras.optimizers.Adam(learning_rate), loss='binary_crossentropy') def random_resize(images, amount=0.25): _, w, h, _ = images.shape pw, ph = np.random.randint(1, int(w*amount/2)), np.random.randint(1, int(h*amount/2)) images = np.stack([skimage.transform.resize(image, [w-2*pw, h-2*ph]) for image in images / 10], axis=0) * 10 images = np.pad(images, [(0, 0), (pw, pw), (ph, ph), (0, 0)], 'constant') return images, (pw, ph) model_path = os.getcwd() + '/model.h5' dset_all, _ = passenger_clustering.get_augmented_segmentation_data(mode, 10) labels_all, _ = dataio.get_augmented_threat_heatmaps(mode) train_idx, valid_idx = get_train_idx(mode, cvid), get_valid_idx(mode, cvid) def data_generator(idx): while True: for i in idx: data = dset_all[i, :, ::downsize, ::downsize] label = labels_all[i, :, ::downsize, ::downsize, lid:lid+1] aps_image = np.stack([data[..., 0] - data[..., 2]] * 3, axis=-1) a3daps_image = np.stack([data[..., 4] - data[..., 6]] * 3, axis=-1) ret, _ = random_resize(np.concatenate([aps_image, a3daps_image], axis=-1)) ret = ret*256 + 128 aps_image, a3daps_image = ret[..., :3], ret[..., 3:] aps_image = keras.applications.imagenet_utils.preprocess_input(aps_image) a3daps_image = keras.applications.imagenet_utils.preprocess_input(a3daps_image) yield [aps_image, a3daps_image], label t0 = time.time() while True: if time.time() - t0 > duration * 3600: break hist = model.fit_generator(data_generator(train_idx), steps_per_epoch=len(train_idx), epochs=1, validation_data=data_generator(valid_idx), validation_steps=len(valid_idx)) model.save('model.h5') for key in hist.history: with open('%s.txt' % key, 'a') as f: f.write(str(hist.history[key][-1]) + '\n')
def train_multitask_cnn(mode, cvid, duration, weights, sanity_check=False, normalize_data=True, scale_data=1, num_filters=64, downsize=1): angles, height, width, res, filters = 16, 660//downsize, 512//downsize, 512//downsize, 14 tf.reset_default_graph() data_in = tf.placeholder(tf.float32, [angles, height, width, filters]) means_in = tf.placeholder(tf.float32, [6]) # random resize size = tf.random_uniform([2], minval=int(0.75*res), maxval=res, dtype=tf.int32) h_pad, w_pad = (res-size[0])//2, (res-size[1])//2 padding = [[0, 0], [h_pad, res-size[0]-h_pad], [w_pad, res-size[1]-w_pad]] data = tf.image.resize_images(data_in, size) data = tf.stack([tf.pad(data[..., i], padding) for i in range(filters)], axis=-1) # random left-right flip flip_lr = tf.random_uniform([], maxval=2, dtype=tf.int32) data = tf.cond(flip_lr > 0, lambda: data[:, :, ::-1, :], lambda: data) # input normalization labels = data[..., 8:] if sanity_check: data = data[..., :4] * sanity_check else: data = data[..., :8] * scale_data # get logits _, logits = tf_models.hourglass_cnn(data, res, 4, res, num_filters, num_output=6) # loss on segmentations losses, summaries = [], [] for i in range(6): cur_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=labels[..., i], logits=logits[..., i])) cur_summary = tf.summary.scalar('loss_%s' % i, cur_loss) default_loss = -(means_in[i]*tf.log(means_in[i]) + (1-means_in[i])*tf.log(1-means_in[i])) losses.append(cur_loss / default_loss * weights[i]) summaries.append(cur_summary) loss = tf.add_n(losses) summaries.append(tf.summary.scalar('loss', loss)) # actual predictions preds = tf.sigmoid(logits) preds = tf.cond(flip_lr > 0, lambda: preds[:, :, ::-1, :], lambda: preds) preds = preds[:, padding[1][0]:-padding[1][1]-1, padding[2][0]:-padding[2][0]-1, :] preds = tf.squeeze(tf.image.resize_images(preds, [height, width])) # optimization optimizer = tf.train.AdamOptimizer() train_step = optimizer.minimize(loss) saver = tf.train.Saver() model_path = os.getcwd() + '/model.ckpt' def predict(dset, n_sample=16): with tf.Session() as sess: saver.restore(sess, model_path) for data in tqdm.tqdm(dset): pred = np.zeros((angles, height, width, 6)) data = np.concatenate([data[:, ::downsize, ::downsize], np.zeros((angles, height, width, 6))], axis=-1) for _ in range(n_sample): pred += sess.run(preds, feed_dict={ data_in: data, }) yield pred / n_sample if os.path.exists('done'): return predict dset_all, _ = passenger_clustering.get_augmented_segmentation_data(mode, 10) labels_all, means_all = dataio.get_augmented_threat_heatmaps(mode) train_idx, valid_idx = get_train_idx(mode, cvid), get_valid_idx(mode, cvid) with read_log_dir(): writer = tf.summary.FileWriter(os.getcwd()) def data_gen(dset, labels, means, idx): for i in tqdm.tqdm(idx): data = np.concatenate([dset[i], labels[i]], axis=-1) yield { data_in: data[:, ::downsize, ::downsize], means_in: means } def eval_model(sess): losses = [] for data in data_gen(dset_all, labels_all, means_all, valid_idx): cur_loss = sess.run(loss, feed_dict=data) losses.append(cur_loss) return np.mean(losses) if losses else 0 def train_model(sess): it = 0 t0 = time.time() best_valid_loss = None while time.time() - t0 < duration * 3600: for data in data_gen(dset_all, labels_all, means_all, train_idx): cur_summaries = sess.run(summaries + [train_step], feed_dict=data) cur_summaries.pop() for summary in cur_summaries: writer.add_summary(summary, it) it += 1 valid_loss = eval_model(sess) cur_valid_summary = tf.Summary() cur_valid_summary.value.add(tag='valid_loss', simple_value=valid_loss) writer.add_summary(cur_valid_summary, it) if best_valid_loss is None or valid_loss <= best_valid_loss: best_valid_loss = valid_loss saver.save(sess, model_path) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) train_model(sess) open('done', 'w').close() return predict
def train_1d_cnn(mode, cvid, duration, learning_rate=1e-3): tf.reset_default_graph() width, depth, height = 128, 128, 165 a3d_in = tf.placeholder(tf.float32, [width, depth, height]) labels_in = tf.placeholder(tf.float32, [height, width]) a3d = tf.transpose(a3d_in, [2, 0, 1])[::-1] a3d = tf.reshape(a3d, [-1, depth, 1]) * 1000 logits = tf_models.cnn_1d(a3d, 64, 4) labels = tf.reshape(labels_in, [-1]) loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(labels=labels, logits=logits)) train_summary = tf.summary.scalar('train_loss', loss) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) train_step = optimizer.minimize(loss) saver = tf.train.Saver() model_path = os.getcwd() + '/model.ckpt' dset_all = get_downsized_a3d_data(mode) labels_train = dataio.get_threat_heatmaps('train-%s' % cvid) labels_valid = dataio.get_threat_heatmaps('valid-%s' % cvid) train_idx, valid_idx = get_train_idx(mode, cvid), get_valid_idx(mode, cvid) with read_log_dir(): writer = tf.summary.FileWriter(os.getcwd()) def data_gen(dset, labels, idx): for i, label in zip(tqdm.tqdm(idx), labels): yield { a3d_in: dset[i], labels_in: np.sum(label[::4, ::4, 0], axis=-1) } def eval_model(sess): losses = [] for data in data_gen(dset_all, labels_valid, valid_idx): cur_loss = sess.run(loss, feed_dict=data) losses.append(cur_loss) return np.mean(losses) def train_model(sess): it = 0 t0 = time.time() best_valid_loss = None while time.time() - t0 < duration * 3600: for data in data_gen(dset_all, labels_train, train_idx): _, cur_summary = sess.run([train_step, train_summary], feed_dict=data) writer.add_summary(cur_summary, it) it += 1 valid_loss = eval_model(sess) cur_valid_summary = tf.Summary() cur_valid_summary.value.add(tag='valid_loss', simple_value=valid_loss) writer.add_summary(cur_valid_summary, it) if best_valid_loss is None or valid_loss < best_valid_loss: best_valid_loss = valid_loss saver.save(sess, model_path) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) train_model(sess) open('done', 'w').close()
def train_simple_segmentation_model(mode, cvid, duration, learning_rate=1e-3, num_filters=0, num_layers=0, blur_size=0, per_zone=None, use_hourglass=False, use_rotation=False, log_scale=False, num_conv=1, num_conv_filters=0, init_conf=1, zones_bias=False): tf.reset_default_graph() zones_in = tf.placeholder(tf.float32, [16, 330, 256, 18]) hmaps_in = tf.placeholder(tf.float32, [16, 330, 256, 6]) labels_in = tf.placeholder(tf.float32, [17]) confidence = tf.get_variable('confidence', [], initializer=tf.constant_initializer(init_conf)) if blur_size > 0: rx = tf.expand_dims(tf.pow(tf.range(blur_size, dtype=tf.float32)-(blur_size-1)/2, 2.0), -1) rmat = tf.tile(rx, [1, blur_size]) rmat = rmat + tf.transpose(rmat) blur_amt = tf.get_variable('blur_amt', []) kernel = tf.exp(rmat * blur_amt) kernel /= tf.reduce_sum(kernel) kernel = tf.reshape(kernel, [blur_size, blur_size, 1, 1]) zones = tf.concat([ tf.nn.conv2d(zones_in[..., i:i+1], kernel, [1]*4, padding='SAME') for i in range(18) ], axis=-1) else: zones = zones_in sym_zone = [1, 2, 1, 2, 5, 6, 6, 8, 9, 8, 11, 11, 13, 13, 15, 15, 17] zones = zones / tf.reduce_sum(zones, axis=-1, keep_dims=True) zones = tf.log(zones + 1e-6) if zones_bias: with tf.variable_scope('zones_bias', reuse=tf.AUTO_REUSE): weights = tf.stack([ tf.get_variable('zone_weights_%s' % zone, [], initializer=tf.constant_initializer(1)) for zone in [0] + sym_zone ], axis=0) bias = tf.stack([ tf.get_variable('zones_bias_%s' % zone, [], initializer=tf.constant_initializer(0)) for zone in [0] + sym_zone ], axis=0) zones = zones*tf.square(weights) + bias else: zones *= tf.square(confidence) zones = tf.exp(zones) zones = zones / tf.reduce_sum(zones, axis=-1, keep_dims=True) if log_scale: hmaps = tf.log(hmaps_in) else: scales = np.array([2, 2000, 8, 600, 3, 2000]) hmaps = tf.stack([hmaps_in[..., i] * scales[i] for i in range(6)], axis=-1) if use_hourglass: res = 256 size = tf.random_uniform([2], minval=int(0.75*res), maxval=res, dtype=tf.int32) h_pad, w_pad = (res-size[0])//2, (res-size[1])//2 padding = [[0, 0], [h_pad, res-size[0]-h_pad], [w_pad, res-size[1]-w_pad]] hmaps = tf.image.resize_images(hmaps, size) hmaps = tf.expand_dims(tf.pad(hmaps[..., 0], padding), axis=-1) if use_rotation: angle = tf.random_uniform([], maxval=2*math.pi) hmaps = tf.contrib.image.rotate(hmaps, angle) hmaps, _ = tf_models.hourglass_cnn(hmaps, res, 32, res, num_filters, downsample=False) if use_rotation: hmaps = tf.contrib.image.rotate(hmaps, -angle) hmaps = hmaps[:, padding[1][0]:-padding[1][1]-1, padding[2][0]:-padding[2][0]-1, :] hmaps = tf.image.resize_images(hmaps, [330, 256]) elif num_filters > 0: for _ in range(num_conv): hmaps = tf.layers.conv2d(hmaps, num_conv_filters or num_filters, 1, activation=tf.nn.relu) zones = tf.reshape(tf.transpose(zones, [0, 3, 1, 2]), [16, 18, -1]) hmaps = tf.reshape(hmaps, [16, -1, max(num_conv_filters or num_filters, 1)]) prod = tf.transpose(tf.matmul(zones, hmaps), [1, 0, 2])[1:] flip_lr = tf.random_uniform([], maxval=2, dtype=tf.int32) prod = tf.cond(flip_lr > 0, lambda: prod[:, ::-1], lambda: prod) if num_filters == 0: prod = tf.reduce_mean(prod, axis=(1, 2)) bias = tf.get_variable('bias', [17], initializer=tf.constant_initializer(-2.24302)) weights = tf.get_variable('weights', [17], initializer=tf.constant_initializer(0)) logits = prod*weights + bias else: def circular_conv(x, num_layers, num_filters, reduce_dim=True, reduce_max=True): for _ in range(num_layers): x = tf.concat([x[:, 15:16, :], x, x[:, 0:1, :]], axis=1) x = tf.layers.conv1d(x, num_filters, 3, activation=tf.nn.relu) if reduce_dim: x = tf.layers.conv1d(x, 1, 1) if reduce_max: x = tf.reduce_max(x, axis=(1, 2)) return x if per_zone == 'bias': logits = circular_conv(prod, num_layers, num_filters) with tf.variable_scope('zones', reuse=tf.AUTO_REUSE): weights = tf.stack([ tf.get_variable('weights_%s' % zone, [], initializer=tf.constant_initializer(1)) for zone in sym_zone ], axis=0) bias = tf.stack([ tf.get_variable('bias_%s' % zone, [], initializer=tf.constant_initializer(0)) for zone in sym_zone ], axis=0) logits = logits*weights + bias elif per_zone == 'matmul': logits = circular_conv(prod, num_layers, num_filters, reduce_max=False) logits = tf.reduce_max(logits, axis=1) logits = tf.matmul(tf.get_variable('zone_mat', [17, 17], initializer=tf.constant_initializer(np.eye(17))), logits) logits += tf.get_variable('zone_bias', [17], initializer=tf.constant_initializer(0)) logits = tf.squeeze(logits) elif per_zone == 'graph': logits = circular_conv(prod, num_layers, num_filters, reduce_max=False) def graph_refinement(a1, a2, num_layers, num_filters): x = tf.expand_dims(tf.concat([a1, a2], axis=-1), 0) with tf.variable_scope('graph'): x = circular_conv(x, num_layers, num_filters) return tf.reduce_max(x) adj = [ [2], [1], [4], [3], [6, 7], [5, 7, 17], [5, 6, 17], [6, 9, 11], [8, 10], [7, 9, 12], [8, 13], [10, 14], [11, 15], [12, 16], [13], [14], [6, 7] ] logits_list = [] with tf.variable_scope('apply_graph') as scope: for i in range(17): cur_logits = [] for j in adj[i]: cur_logits.append(graph_refinement(logits[i], logits[j-1], 1, 1)) scope.reuse_variables() logits_list.append(tf.reduce_min(tf.stack(cur_logits))) logits = tf.stack(logits_list) elif per_zone == 'dense': logits = circular_conv(prod, num_layers, num_filters, reduce_dim=False) logits = tf.reduce_max(logits, axis=1) with tf.variable_scope('zones', reuse=tf.AUTO_REUSE): weights = tf.stack([ tf.get_variable('weights_%s' % zone, [num_filters]) for zone in sym_zone ], axis=0) bias = tf.stack([ tf.get_variable('bias_%s' % zone, []) for zone in sym_zone ], axis=0) logits = tf.squeeze(tf.reduce_sum(logits*weights, axis=1) + bias) else: logits = circular_conv(prod, num_layers, num_filters) loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=labels_in, logits=logits)) preds = tf.sigmoid(logits) train_summary = tf.summary.scalar('train_loss', loss) optimizer = tf.train.AdamOptimizer(learning_rate) train_step = optimizer.minimize(loss) saver = tf.train.Saver() model_path = os.getcwd() + '/model.ckpt' def predict(zones_all, hmaps_all, idx=None, n_sample=1): if idx is None: idx = range(len(zones_all)) with tf.Session() as sess: saver.restore(sess, model_path) for i in tqdm.tqdm(idx): ret = np.zeros(17) for _ in range(n_sample): ret += sess.run(preds, feed_dict={ zones_in: zones_all[i], hmaps_in: hmaps_all[i] }) yield ret / n_sample if os.path.exists('done'): return predict _, _, zones_all = body_zone_segmentation.get_body_zones(mode) hmaps_all = threat_segmentation_models.get_all_multitask_cnn_predictions(mode) labels_all = [y for x, y in sorted(get_train_labels().items())] train_idx, valid_idx = get_train_idx(mode, cvid), get_valid_idx(mode, cvid) with read_log_dir(): writer = tf.summary.FileWriter(os.getcwd()) def data_gen(zones_all, hmaps_all, labels_all, idx): for i in tqdm.tqdm(idx): yield { zones_in: zones_all[i], hmaps_in: hmaps_all[i], labels_in: np.array(labels_all[i]) } def eval_model(sess): losses = [] for data in data_gen(zones_all, hmaps_all, labels_all, valid_idx): cur_loss = sess.run(loss, feed_dict=data) losses.append(cur_loss) return np.mean(losses) def train_model(sess): it = 0 t0 = time.time() best_valid_loss = None while time.time() - t0 < duration * 3600: for data in data_gen(zones_all, hmaps_all, labels_all, train_idx): _, cur_summary = sess.run([train_step, train_summary], feed_dict=data) writer.add_summary(cur_summary, it) it += 1 valid_loss = eval_model(sess) cur_valid_summary = tf.Summary() cur_valid_summary.value.add(tag='valid_loss', simple_value=valid_loss) writer.add_summary(cur_valid_summary, it) if best_valid_loss is None or valid_loss < best_valid_loss: best_valid_loss = valid_loss saver.save(sess, model_path) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) train_model(sess) open('done', 'w').close() return predict