예제 #1
0
def run(which_dataset):
    #Run build_image_data script
    print('Organizing files')

    #Make dirs if they do not exist
    config = GEDIconfig(which_dataset=which_dataset)
    dir_list = [
        config.train_directory, config.validation_directory,
        config.tfrecord_dir, config.train_checkpoint
    ]
    [make_dir(d) for d in dir_list]

    #Prepare lists with file pointers
    files = get_file_list(config.GEDI_path, config.label_directories,
                          config.im_ext)
    label_list = config.GEDI_path + 'list_of_' + config.which_dataset + '_labels.txt'  #to be created by prepare
    write_label_list(files, label_list)

    #Copy data into the appropriate training/testing directories
    hw = misc.imread(files[0]).shape
    new_files = split_files(files, config.train_proportion, config.tvt_flags)
    move_files(new_files['train'], config.train_directory)
    #process_image_data('train',new_files,config.tfrecord_dir,config.im_ext,config.train_shards,hw,config.normalize)
    simple_tf_records('train', new_files, config.tfrecord_dir, config.im_ext,
                      config.train_shards, hw, config.normalize)
    if 'val' in config.tvt_flags:
        move_files(new_files['val'], config.validation_directory)
        #process_image_data('val',new_files,config.tfrecord_dir,config.im_ext,config.train_shards,hw,config.normalize)
        simple_tf_records('val', new_files, config.tfrecord_dir, config.im_ext,
                          config.train_shards, hw, config.normalize)
    if 'test' in config.tvt_flags:
        move_files(new_files['test'], config.test_directory)
        #process_image_data('test',new_files,config.tfrecord_dir,config.im_ext,config.train_shards,hw,config.normalize)
        simple_tf_records('test', new_files, config.tfrecord_dir,
                          config.im_ext, config.train_shards, hw,
                          config.normalize)

    # Finally, write the labels file:
    labels_to_class_names = dict(
        zip(range(len(config.label_directories)), config.label_directories))
    write_label_file(labels_to_class_names, config.tfrecord_dir)
예제 #2
0
def test_vgg16(
        which_dataset,
        validation_data=None,
        model_dir=None):  #Fine tuning defaults to wipe out the final two FCs
    config = GEDIconfig(which_dataset)
    if validation_data == None:  #Use globals
        validation_data = config.tfrecord_dir + 'val.tfrecords'

    #Make output directories if they do not exist
    out_dir = config.results + config.which_dataset + '/'
    dir_list = [config.results, out_dir]
    [make_dir(d) for d in dir_list]
    im_shape = get_image_size(config)

    #Find model checkpoints
    ckpts, ckpt_names = find_ckpts(config)

    #Prepare data on CPU
    with tf.device('/cpu:0'):
        val_images, val_labels = inputs(validation_data,
                                        config.validation_batch,
                                        im_shape,
                                        config.model_image_size[:2],
                                        num_epochs=1)

    #Prepare model on GPU
    with tf.device('/gpu:0'):
        vgg = vgg16.Vgg16(vgg16_npy_path=config.vgg16_weight_path,
                          fine_tune_layers=config.fine_tune_layers)
        validation_mode = tf.Variable(False, name='training')
        vgg.build(val_images,
                  output_shape=config.output_shape,
                  train_mode=validation_mode)

        #Setup validation op
        eval_accuracy = class_accuracy(vgg.prob,
                                       val_labels)  #training accuracy now...

    #Set up saver
    saver = tf.train.Saver(tf.all_variables())

    #Loop through each checkpoint, loading the model weights, then testing the entire validation set
    ckpt_accs = []
    for idx in tqdm(range(len(ckpts))):
        accs = []
        try:
            #Initialize the graph
            sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
            sess.run(
                tf.group(tf.initialize_all_variables(),
                         tf.initialize_local_variables())
            )  #need to initialize both if supplying num_epochs to inputs

            #Set up exemplar threading
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(sess=sess, coord=coord)
            saver.restore(sess, ckpts[idx])
            start_time = time.time()
            while not coord.should_stop():
                accs = np.append(accs, sess.run([eval_accuracy]))

        except tf.errors.OutOfRangeError:
            ckpt_accs.append(accs)
            print('Batch %d took %.1f seconds', idx, time.time() - start_time)
        finally:
            coord.request_stop()
        coord.join(threads)
        sess.close()

    #Plot everything
    plot_accuracies(ckpt_accs, ckpt_names,
                    out_dir + 'validation_accuracies.png')
    np.savez(out_dir + 'validation_accuracies',
             ckpt_accs=ckpt_accs,
             ckpt_names=ckpt_names)
예제 #3
0
def train_vgg16(which_dataset,train_data=None,validation_data=None): #Fine tuning defaults to wipe out the final two FCs
    config = GEDIconfig(which_dataset)
    if train_data == None: #Use globals
        train_data = config.tfrecord_dir + 'train.tfrecords'
    if validation_data == None: #Use globals
        validation_data = config.tfrecord_dir + 'val.tfrecords'

    #Make output directories if they do not exist
    dt_stamp = re.split(' ',str(datetime.now()))[0].replace('-','_')
    config.train_checkpoint = os.path.join(config.train_checkpoint,config.which_dataset + '_' + dt_stamp) #timestamp this run
    out_dir = config.results + config.which_dataset + '/'
    dir_list = [config.train_checkpoint,config.train_summaries,config.results,out_dir]
    [make_dir(d) for d in dir_list]
    im_shape = get_image_size(config)

    #Prepare data on CPU
    with tf.device('/cpu:0'):
        train_images, train_labels = inputs(train_data, config.train_batch, im_shape, config.model_image_size[:2], train=config.data_augmentations, num_epochs=config.epochs)
        tf.image_summary('train images', train_images)

    #Prepare model on GPU
    with tf.device('/gpu:0'):
        vgg = vgg16.Vgg16(vgg16_npy_path=config.vgg16_weight_path,fine_tune_layers=config.fine_tune_layers)
        train_mode = tf.Variable(True, name='training')
        vgg.build(train_images,output_shape=config.output_shape,train_mode=train_mode)

        #Prepare the cost function
        cost = softmax_cost(vgg.fc8, train_labels)
        tf.scalar_summary("cost", cost)
        
        #Finetune the learning rates
        other_opt_vars,ft_opt_vars = fine_tune_prepare_layers(tf.trainable_variables(),config.fine_tune_layers) #for all variables in trainable variables, print name if there's duplicates you f****d up
        train_op = ft_non_optimized(cost,other_opt_vars,ft_opt_vars,tf.train.AdamOptimizer,config.hold_lr,config.new_lr) #actually is faster :)

        #Setup validation op
        eval_accuracy = class_accuracy(vgg.prob,train_labels) #training accuracy now...
        tf.scalar_summary("train accuracy", eval_accuracy)

    #Set up summaries and saver
    saver = tf.train.Saver(tf.all_variables(),max_to_keep=100)
    summary_op = tf.merge_all_summaries()

    #Initialize the graph
    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement = True))
    sess.run(tf.group(tf.initialize_all_variables(), tf.initialize_local_variables())) #need to initialize both if supplying num_epochs to inputs
    summary_writer = tf.train.SummaryWriter(os.path.join(config.train_summaries,config.which_dataset + '_' + dt_stamp), sess.graph)

    #Set up exemplar threading
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess,coord=coord)

    #Start training loop
    try:
        step = 0
        losses = []
        accs=[]
        while not coord.should_stop():
            start_time = time.time()
            _, loss_value, acc = sess.run([train_op, cost, eval_accuracy])
            losses.append(loss_value)
            accs.append(acc)
            duration = time.time() - start_time
            assert not np.isnan(loss_value), 'Model diverged with loss = NaN'

            if step % 100 == 0:
                if step % 500 == 0:
                    #Summaries
                    summary_str = sess.run(summary_op)
                    summary_writer.add_summary(summary_str, step)
                else:
                    #Training status
                    format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                                  'sec/batch)')
                    print (format_str % (datetime.now(), step, loss_value,
                                         config.train_batch / duration, float(duration)))

            # Save the model checkpoint periodically.
            if step % 500 == 0 and step>0 :
                saver.save(sess, os.path.join(config.train_checkpoint, 'model_' + str(step) + '.ckpt'),
                 global_step=step)
                if np.average(accs[-500:-1])>0.95:
                     coord.request_stop()
            step += 1

    except tf.errors.OutOfRangeError:
        print('Done training for %d epochs, %d steps.' % (config.epochs, step))
        saver.save(sess, os.path.join(config.train_checkpoint, 'model_' + str(step) + '.ckpt'),
                 global_step=step)
    finally:
        coord.request_stop()
    coord.join(threads)
    sess.close()
    np.save(out_dir + 'training_loss',losses)
예제 #4
0
def train_att(which_dataset,train_data=None,validation_data=None): #Fine tuning defaults to wipe out the final two FCs
    config = GEDIconfig(which_dataset)
    if train_data == None: #Use globals
        train_data = config.tfrecord_dir + 'train.tfrecords'
    if validation_data == None: #Use globals
        validation_data = config.tfrecord_dir + 'val.tfrecords'

    #Make output directories if they do not exist
    dt_stamp = re.split(' ',str(datetime.now()))[0].replace('-','_')
    config.train_checkpoint = os.path.join(config.train_checkpoint,config.which_dataset + '_' + dt_stamp) #timestamp this run
    out_dir = config.results + config.which_dataset + '/'
    dir_list = [config.train_checkpoint,config.train_summaries,config.results,out_dir]
    [make_dir(d) for d in dir_list]
    im_shape = get_image_size(config)

    #Prepare data on CPU
    with tf.device('/cpu:0'):
        train_images, train_labels = inputs(train_data, config.train_batch, im_shape, config.model_image_size[:2], train=config.data_augmentations, num_epochs=config.epochs)
        tf.image_summary('train images', train_images)

    #Prepare model on GPU
    with tf.device('/gpu:0'):
        att_model = att.Attention()
       
        train_mode = tf.Variable(True, name='training')
        att_model.build(train_images,enc_size=config.enc_size,read_n=config.read_n,T=config.T,output_shape=config.output_shape,train_mode=train_mode)

        #Prepare the cost function
        cost = softmax_cost(att_model.fc2, train_labels)
        
        tf.scalar_summary("cost", cost)
        #print type(tf.trainable_variables()[0])
        print [x.name for x in tf.trainable_variables()]
        #Finetune the learning rates
        optimizer=tf.train.AdamOptimizer(learning_rate=config.new_lr,beta1=0.5)
        grads=optimizer.compute_gradients(cost)
        #v_i=att_model.v_i
	for i,(g,v) in enumerate(grads):
	    if g is not None:
            
		grads[i]=(tf.clip_by_norm(g,5),v) # clip gradients
	train_op=optimizer.apply_gradients(grads)
        #print a
        #Setup validation op
        eval_accuracy = class_accuracy(att_model.prob,train_labels) #training accuracy now...
        tf.scalar_summary("train accuracy", eval_accuracy)

    #Set up summaries and saver
    saver = tf.train.Saver(tf.all_variables(),max_to_keep=100)
    summary_op = tf.merge_all_summaries()

    #Initialize the graph
    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement = True))
    sess.run(tf.group(tf.initialize_all_variables(), tf.initialize_local_variables())) #need to initialize both if supplying num_epochs to inputs
    summary_writer = tf.train.SummaryWriter(os.path.join(config.train_summaries,config.which_dataset + '_' + dt_stamp), sess.graph)

    #Set up exemplar threading
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess,coord=coord)

    #Start training loop
    try:
        step = 0
        losses = []
        accs=[]
        while not coord.should_stop():
            start_time = time.time()
            _,loss_value, acc= sess.run([train_op, cost, eval_accuracy])
            #print v_j
            losses.append(loss_value)
            accs.append(acc)
  
            duration = time.time() - start_time
            assert not np.isnan(loss_value), 'Model diverged with loss = NaN'

            if step % 100 == 0:
                if step % 2000 == 0:
                    #Summaries
                    summary_str = sess.run(summary_op)
                    summary_writer.add_summary(summary_str, step)
                else:
                    #Training status
                    format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                                  'sec/batch)')
                    print (format_str % (datetime.now(), step, loss_value,
                                         config.train_batch / duration, float(duration)))
               
                if np.average(accs[-100:-1])>0.9 or np.average(losses[-100:-1])<0.1:
                     saver.save(sess, os.path.join(config.train_checkpoint, 'model_' + str(step) + '.ckpt'),
                 global_step=step)
                
                     coord.request_stop()

            # Save the model checkpoint periodically.
            if step % 1000 == 0 and step>0 :
                saver.save(sess, os.path.join(config.train_checkpoint, 'model_' + str(step) + '.ckpt'),
                 global_step=step)
                
            step += 1

    except tf.errors.OutOfRangeError:
        print('Done training for %d epochs, %d steps.' % (config.epochs, step))
        saver.save(sess, os.path.join(config.train_checkpoint, 'model_' + str(step) + '.ckpt'),
                 global_step=step)
    finally:
        coord.request_stop()
    coord.join(threads)
    sess.close()
    np.save(out_dir + 'training_loss',losses)
예제 #5
0
    image_filenames = glob.glob(labels + '/*')
    labels = []
    for im in image_filenames:
        for k, v in in_dict.iteritems():
            label_name = re.split('\d+', re.split('/', im)[-1])[0]
            if label_name == k:
                labels += [v]
    return image_filenames, labels


if __name__ == "__main__":
    config = GEDIconfig()

    #Make directories if they haven't been made yet
    dir_list = [config.heatmap_source_images, config.heatmap_dataset_images]
    [make_dir(d) for d in dir_list]
    image_filenames, image_labels = random_sample_images(
        config.heatmap_image_labels, config.heatmap_image_dict,
        config.heatmap_dataset_images, config.im_ext,
        config.heatmap_image_amount)
    #Run bubbles
    generate_heatmaps_for_images(config,
                                 None,
                                 image_filenames,
                                 image_labels,
                                 'vgg16',
                                 'bubbles',
                                 'neg',
                                 block_size=config.block_size,
                                 block_stride=config.block_stride,
                                 generate_plots=config.generate_plots,
예제 #6
0
def test_vgg16(validation_data, model_dir, selected_ckpts=-1):
    config = GEDIconfig()
    if validation_data is None:  # Use globals
        validation_data = config.tf_record_names['val']
        meta_data = np.load(
            os.path.join(config.tfrecord_dir, 'val_' + config.max_file))
    else:
        meta_data = np.load(
            validation_data.split('.tfrecords')[0] + '_maximum_value.npz')
    label_list = os.path.join(
        config.processed_image_patch_dir,
        'list_of_' + '_'.join(x
                              for x in config.image_prefixes) + '_labels.txt')
    with open(label_list) as f:
        file_pointers = [l.rstrip('\n') for l in f.readlines()]

    # Prepare image normalization values
    try:
        max_value = np.max(meta_data['max_array']).astype(np.float32)
    except:
        max_value = np.asarray([config.max_gedi])
    try:
        min_value = np.max(meta_data['min_array']).astype(np.float32)
    except:
        min_value = np.asarray([config.min_gedi])

    # Find model checkpoints
    ckpts, ckpt_names = find_ckpts(config, model_dir)
    ds_dt_stamp = re.split('/', ckpts[0])[-2]
    out_dir = os.path.join(config.results, ds_dt_stamp + '/')
    try:
        config = np.load(os.path.join(out_dir, 'meta_info.npy')).item()
        # Make sure this is always at 1
        config.validation_batch = 1
        print '-' * 60
        print 'Loading config meta data for:%s' % out_dir
        print '-' * 60
    except:
        print '-' * 60
        print 'Using config from gedi_config.py for model:%s' % out_dir
        print '-' * 60

    # Make output directories if they do not exist
    dir_list = [config.results, out_dir]
    [make_dir(d) for d in dir_list]
    # im_shape = get_image_size(config)
    im_shape = config.gedi_image_size

    # Prepare data on CPU
    with tf.device('/cpu:0'):
        val_images, val_labels = inputs(validation_data,
                                        1,
                                        im_shape,
                                        config.model_image_size[:2],
                                        max_value=max_value,
                                        min_value=min_value,
                                        num_epochs=1,
                                        normalize=config.normalize)

    # Prepare model on GPU
    with tf.device('/gpu:0'):
        with tf.variable_scope('cnn'):
            vgg = vgg16.Vgg16(vgg16_npy_path=config.vgg16_weight_path,
                              fine_tune_layers=config.fine_tune_layers)
            vgg.build(val_images, output_shape=config.output_shape)

        # Setup validation op
        scores = vgg.prob
        preds = tf.argmax(vgg.prob, 1)
        targets = tf.cast(val_labels, dtype=tf.int64)
        oh_targets = tf.one_hot(val_labels, config.output_shape)

        # Masked LRP op
        heatmap = lrp.lrp(logits * oh_targets, -123.68, 255 - 123.68)
        # heatmap = lrp.get_lrp_im(sess, F, images, y, img, lab)[0]

    # Set up saver
    saver = tf.train.Saver(tf.global_variables())

    # Get class indices for all files
    if use_true_label:
        label_key = np.asarray(config.label_directories)
        class_indices = [
            np.where(config.which_dataset + '_' + fn == label_key)
            for fn in labels
        ]
    else:
        class_indices = [None] * len(image_filenames)

    # Loop through each checkpoint then test the entire validation set
    ckpt_yhat, ckpt_y, ckpt_scores = [], [], []
    print '-' * 60
    print 'Beginning visualization'
    print '-' * 60

    if selected_ckpts is not None:
        # Select a specific ckpt
        if selected_ckpts < 0:
            ckpts = ckpts[selected_ckpts:]
        else:
            ckpts = ckpts[:selected_ckpts]

    dec_scores, yhat, y, yoh, ims, hms = [], [], [], [], [], []
    for idx, c in tqdm(enumerate(ckpts), desc='Running checkpoints'):
        try:
            # Initialize the graph
            sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
            sess.run(
                tf.group(tf.global_variables_initializer(),
                         tf.local_variables_initializer()))

            # Set up exemplar threading
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(sess=sess, coord=coord)
            saver.restore(sess, c)
            start_time = time.time()
            while not coord.should_stop():
                sc, tyh, ty, tyoh, imb, ihm = sess.run(
                    [scores, preds, targets, oh_targets, val_images, heatmap])
                dec_scores += [sc]
                yhat += [tyh]
                y += [ty]
                yoh += [tyoh]
                ims += [imb]
                hms += [ihm]
        except tf.errors.OutOfRangeError:
            print 'Batch %d took %.1f seconds' % (idx,
                                                  time.time() - start_time)
        finally:
            coord.request_stop()
        coord.join(threads)
        sess.close()
예제 #7
0
def test_vgg16(
        which_dataset,
        validation_data=None,
        model_dir=None):  #Fine tuning defaults to wipe out the final two FCs
    config = GEDIconfig(which_dataset)
    if validation_data == None:  #Use globals
        validation_data = config.tfrecord_dir + 'val.tfrecords'

    #Make output directories if they do not exist
    out_dir = config.results + config.which_dataset + '/'
    dir_list = [config.results, out_dir]
    [make_dir(d) for d in dir_list]
    im_shape = get_image_size(config)

    #Find model checkpoints
    ckpts, ckpt_names = find_ckpts(config)
    print ckpts, ckpt_names
    #Prepare data on CPU
    with tf.device('/cpu:0'):
        val_images, val_labels = inputs(validation_data,
                                        config.validation_batch,
                                        im_shape,
                                        config.model_image_size[:2],
                                        num_epochs=1)

    #Prepare model on GPU
    with tf.device('/gpu:0'):
        att_model = att.Attention()

        validation_mode = tf.Variable(False, name='training')
        att_model.build(val_images,
                        enc_size=config.enc_size,
                        read_n=config.read_n,
                        T=config.T,
                        output_shape=config.output_shape,
                        train_mode=validation_mode)
        image_0 = val_images
        image_1 = att_model.image_show
        image_loc = att_model.location
        # print image_0.get_shape()
        # print image_1[0].get_shape()
        #Setup validation op
        eval_accuracy = class_accuracy(att_model.prob,
                                       val_labels)  #training accuracy now...

    #Set up saver
    saver = tf.train.Saver(tf.all_variables())

    #Loop through each checkpoint, loading the model weights, then testing the entire validation set
    ckpt_accs = []
    max_acc = 0
    max_ind = 0
    max_show_0 = []
    max_show_1 = []
    max_loc = []
    for idx in tqdm(range(len(ckpts))):
        print ckpts[idx]
        accs = []
        show_0 = np.array([])
        show_1 = np.array([])
        show_loc = np.array([])
        try:

            #print type(show_0)
            #Initialize the graph
            sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
            sess.run(
                tf.group(tf.initialize_all_variables(),
                         tf.initialize_local_variables())
            )  #need to initialize both if supplying num_epochs to inputs

            #Set up exemplar threading
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(sess=sess, coord=coord)
            saver.restore(sess, ckpts[idx])
            start_time = time.time()
            while not coord.should_stop():
                #print '1'
                #print type(accs)
                acc, aa, bb, cc = sess.run(
                    [eval_accuracy, image_0, image_1, image_loc])
                accs = np.append(accs, acc)
                if accs[-1] > 0.8 and show_0.shape[-1] < 5:
                    #print show_0.shape[-1]
                    #print aa.shape, bb
                    aa = aa
                    bb = bb
                    (x1, x2, x3, x4) = aa.shape
                    (y1, y2, y3, y4) = bb.shape
                    (z1, z2, z3) = cc.shape
                    aa = np.reshape(aa, (x1, x2, x3, x4, 1))
                    bb = np.reshape(bb, (y1, y2, y3, y4, 1))
                    cc = np.reshape(cc, (z1, z2, z3, 1))
                    if show_0.shape[0] <= 2:
                        #print sess.run([image_1])

                        show_0 = aa
                        show_1 = bb
                        show_loc = cc
                    else:
                        #print sess.run([image_0])[0].shape, show_0.shape

                        show_0 = np.concatenate((show_0, aa), 4)
                        show_1 = np.concatenate((show_1, bb), 4)
                        show_loc = np.concatenate((show_loc, cc), 3)

        except tf.errors.OutOfRangeError:
            if np.mean(accs) > max_acc:
                max_acc = np.mean(accs)
                max_ind = idx
                max_show_0 = show_0
                max_show_1 = show_1
                max_loc = show_loc
            ckpt_accs.append(accs)
            print('Batch %d took %.1f seconds', idx, time.time() - start_time)
        finally:
            coord.request_stop()
        coord.join(threads)
        sess.close()

    print ckpt_accs, ckpt_names

    #Plot everything
    plot_accuracies(ckpt_accs, ckpt_names,
                    out_dir + 'validation_accuracies.png')
    np.savez(out_dir + 'validation_accuracies',
             ckpt_accs=ckpt_accs,
             ckpt_names=ckpt_names)
    np.savez(out_dir + 'att_verification_' + which_dataset,
             max_show_0=max_show_0,
             max_show_1=max_show_1,
             max_loc=max_loc)
    for idx in range(len(ckpts)):
        if idx != max_ind:
            os.remove(ckpts[idx] + '.data-00000-of-00001')
            os.remove(ckpts[idx] + '.meta')