Exemple #1
0
def train_neural_network():

    sess = setup_tensorflow()

    train_feature_filenames, train_label_filenames = get_filenames()
    test_feature_filenames, test_label_filenames = get_test_filenames()

    # we can also have input summaries written out to tensorboard
    train_features, train_labels = input_pipeline.get_files(
        sess, train_feature_filenames, train_label_filenames)
    test_features, test_labels = input_pipeline.get_files(
        sess, test_feature_filenames, test_label_filenames)

    # get outputs and variable lists
    output, cnn_var_list, test_input, test_label, test_output = model.create_model(
        sess, train_features, train_labels)

    # get loss and minimize operations
    with tf.name_scope("loss"):
        cnn_loss = model.create_cnn_loss(output, train_labels)
        tf.summary.scalar("loss", cnn_loss)

    (global_step, learning_rate,
     cnn_minimize) = model.create_optimizer(cnn_loss, cnn_var_list)

    # get loss summaries for visualization in tensorboard
    tf.summary.scalar('loss', cnn_loss)

    # train the network
    sess.run(tf.global_variables_initializer())

    # cache test features and labels so we can monitor the progress
    test_feature_batch, test_label_batch = sess.run(
        [test_features, test_labels])
    num_batches = TRAINING_DATASET_SIZE / FLAGS.BATCH_SIZE

    # add computation graph to the summary writer
    writer = tf.summary.FileWriter(summary_dir)
    writer.add_graph(sess.graph)

    merged_summaries = tf.summary.merge_all()

    for epoch in range(1, EPOCHS + 1):
        for batch in range(1, (TRAINING_DATASET_SIZE / FLAGS.BATCH_SIZE) + 1):
            # create feed dictionary for passing hyperparameters
            feed_dict = {learning_rate: LEARNING_RATE}

            #create operations list for root nodes of computation graph
            ops = [cnn_minimize, cnn_loss, merged_summaries]
            _, loss, summaries = sess.run(ops, feed_dict=feed_dict)

            print("Epoch : " + str(epoch) + "/" + str(EPOCHS) + " , Batch : " +
                  str(batch) + "/" + str(num_batches) + " completed; Loss " +
                  str(loss))

            if batch % SUMMARY_PERIOD == 0:
                # write summary to logdir
                writer.add_summary(summaries)
                print "Summary Written to Logdir"

            if batch % CHECKPOINT_PERIOD == 0:
                # save model progress and save output images for this batch
                feed_dict = {
                    test_input: test_feature_batch,
                    test_label: test_label_batch
                }
                output_batch = sess.run(test_output, feed_dict=feed_dict)

                # save the output images
                _save_image_batch(epoch, batch, output_batch)
                _save_tf_model(sess)

                print "Image batch and model saved!!"
Exemple #2
0
def evaluate_model():
    """Evaluate model with calculating test accuracy
	"""
    sess = setup_tensorflow()

    # SetUp Input PipeLine for queue inputs
    with tf.name_scope('train_input'):
        evaluate_features, evaluate_labels = input_pipeline.get_files(
            evaluate_dir)

    # Create Model creating graph
    output, var_list, is_training1 = model.create_model(
        sess, evaluate_features, evaluate_labels)

    # Create Model loss  & optimizer
    with tf.name_scope("loss"):
        total_loss, softmax_loss = model.compute_loss(output, evaluate_labels)

    (global_step, learning_rate,
     minimize) = model.create_optimizer(total_loss, var_list)

    # Acurracy setup
    out_eval, eval_input, eval_label, accuracy, is_training2 = model.compute_accuracy(
        sess)

    sess.run(tf.global_variables_initializer())

    # Basic stuff for input pipeline
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)

    # Calculate number of batches to run
    num_batches = EVALUATE_DATASET_SIZE / FLAGS.BATCH_SIZE

    # Add ops to restore all the variables.
    saver = tf.train.Saver()

    # Give the path of model with weights u wanna load
    saver.restore(sess, "./model/model100.ckpt")

    # Calculate acurracy for whole evaluate data
    total_accuracy = 0

    for batch in range(1, num_batches + 1):

        # Load input from the pipeline in batches , batch by batch
        input_batch, label_batch = sess.run(
            [evaluate_features, evaluate_labels])

        feed_dict = {
            eval_input: input_batch,
            eval_label: label_batch,
            is_training2: False
        }
        ops = [out_eval, accuracy]

        # Get the accuracy on evaluate batch run
        _, acc = sess.run(ops, feed_dict=feed_dict)

        print(" batch /" + str(batch) + " /" + str(num_batches) + " acc: " +
              str(acc))
        total_accuracy += acc

    total_accuracy /= (num_batches + 1)

    # Total Accuracy for Evaluate dataset
    print(" ACCURACY : " + str(total_accuracy))
Exemple #3
0
def run(PATH_TO_IMAGES, LR, WEIGHT_DECAY, opt):
    """
    Train torchvision model to NIH data given high level hyperparameters.

    Args:
        PATH_TO_IMAGES: path to NIH images
        LR: learning rate
        WEIGHT_DECAY: weight decay parameter for SGD

    Returns:
        preds: torchvision model predictions on test fold with ground truth for comparison
        aucs: AUCs for each train,test tuple

    """

    use_gpu = torch.cuda.is_available()
    gpu_count = torch.cuda.device_count()
    print("Available GPU count:" + str(gpu_count))

    wandb.init(project=opt.project, name=opt.run_name)
    wandb.config.update(opt, allow_val_change=True)

    NUM_EPOCHS = 60
    BATCH_SIZE = opt.batch_size

    if opt.eval_only:
        # test only. it is okay to have duplicate run_path
        os.makedirs(opt.run_path, exist_ok=True)
    else:
        # train from scratch, should not have the same run_path. Otherwise it will overwrite previous runs.
        try:
            os.makedirs(opt.run_path)
        except FileExistsError:
            print("[ERROR] run_path {} exists. try to assign a unique run_path".format(opt.run_path))
            return None, None
        except Exception as e:
            print("exception while creating run_path {}".format(opt.run_path))
            print(str(e))
            return None, None

    # use imagenet mean,std for normalization
    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]

    N_LABELS = 14  # we are predicting 14 labels

    # define torchvision transforms
    if opt.random_crop:

        data_transforms = {
            'train': transforms.Compose([
                transforms.RandomHorizontalFlip(),
                transforms.RandomResizedCrop(size=opt.input_size, scale=(0.8, 1.0)),  # crop then resize
                transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
                transforms.ToTensor(),
                transforms.Normalize(mean, std)
            ]),
            'val': transforms.Compose([
                transforms.Resize(int(opt.input_size * 1.05)),
                transforms.CenterCrop(opt.input_size),
                transforms.ToTensor(),
                transforms.Normalize(mean, std)
            ]),
        }

    else:
        data_transforms = {
            'train': transforms.Compose([
                transforms.RandomHorizontalFlip(),
                transforms.Resize(opt.input_size),
                transforms.ToTensor(),
                transforms.Normalize(mean, std)
            ]),
            'val': transforms.Compose([
                transforms.Resize(opt.input_size),
                transforms.ToTensor(),
                transforms.Normalize(mean, std)
            ]),
        }
    # create train/val dataloaders
    transformed_datasets = {}
    transformed_datasets['train'] = CXR.CXRDataset(
        path_to_images=PATH_TO_IMAGES,
        fold='train',
        transform=data_transforms['train'])
    transformed_datasets['val'] = CXR.CXRDataset(
        path_to_images=PATH_TO_IMAGES,
        fold='val',
        transform=data_transforms['val'])

    worker_init_fn = set_seed(opt)

    dataloaders = {}
    dataloaders['train'] = torch.utils.data.DataLoader(
        transformed_datasets['train'],
        batch_size=BATCH_SIZE,
        shuffle=True,
        num_workers=30,
        drop_last=True,
        worker_init_fn=worker_init_fn
    )
    dataloaders['val'] = torch.utils.data.DataLoader(
        transformed_datasets['val'],
        batch_size=BATCH_SIZE,
        shuffle=False,
        num_workers=30,
        drop_last=True,
        worker_init_fn=worker_init_fn
    )

    # please do not attempt to train without GPU as will take excessively long
    if not use_gpu:
        raise ValueError("Error, requires GPU")

    # load model
    model = load_model(N_LABELS, opt)

    # define criterion, optimizer for training
    criterion = nn.BCELoss()

    optimizer = create_optimizer(model, LR, WEIGHT_DECAY, opt)

    scheduler = lr_scheduler.ReduceLROnPlateau(
        optimizer,
        'max',
        factor=opt.lr_decay_ratio,
        patience=opt.patience,
        verbose=True
    )

    dataset_sizes = {x: len(transformed_datasets[x]) for x in ['train', 'val']}

    if opt.eval_only:
        print("loading best model statedict")
        # load best model weights to return
        checkpoint_best = torch.load(os.path.join(opt.run_path, 'checkpoint'))
        model = load_model(N_LABELS, opt=opt)
        model.load_state_dict(checkpoint_best['state_dict'])

    else:
        # train model
        model, best_epoch = train_model(
            model,
            criterion,
            optimizer,
            LR,
            scheduler=scheduler,
            num_epochs=NUM_EPOCHS,
            dataloaders=dataloaders,
            dataset_sizes=dataset_sizes,
            PATH_TO_IMAGES=PATH_TO_IMAGES,
            data_transforms=data_transforms,
            opt=opt,
        )

    # get preds and AUCs on test fold
    preds, aucs = E.make_pred_multilabel(
        data_transforms,
        model,
        PATH_TO_IMAGES,
        fold="test",
        opt=opt,
    )

    wandb.log({
        'val_official': np.average(list(aucs.auc))
    })

    return preds, aucs
Exemple #4
0
def train_model():
	"""Training model with calculating training and dev accuracy
	"""
	sess = setup_tensorflow()

	# SetUp Input PipeLine for queue inputs
	with tf.name_scope('train_input'):
		train_features, train_labels = input_pipeline.get_files(train_dir)
	with tf.name_scope('dev_input'):
		dev_features , dev_labels  = input_pipeline.get_files(dev_dir)

	# Create Model creating graph
	output, var_list, is_training1 = model.create_model(sess, train_features, train_labels)

	# Create Model loss  & optimizer
	with tf.name_scope("loss"):
		total_loss, softmax_loss  = model.compute_loss(output, train_labels )
		tf.summary.scalar("loss",total_loss)

	(global_step, learning_rate, minimize) = model.create_optimizer(total_loss, var_list)	

	# Adds summary tensorboard
	tf.summary.scalar("loss",total_loss)

	# Acurracy setup 
	out_eval,eval_input, eval_label, accuracy, is_training2 = model.compute_accuracy(sess)

	sess.run(tf.global_variables_initializer())
	
	# Basic stuff for input pipeline
	coord = tf.train.Coordinator()
	threads = tf.train.start_queue_runners(sess=sess,coord=coord)

	# Add ops to save and restore all the variables.
	saver = tf.train.Saver()


	num_batches = TRAINING_DATASET_SIZE/FLAGS.BATCH_SIZE
	num_batches_dev = DEV_DATASET_SIZE/FLAGS.BATCH_SIZE

	#add computation graph to summary writer
	writer = tf.summary.FileWriter(summary_dir)
	writer.add_graph(sess.graph)

	merged_summaries = tf.summary.merge_all()

	for epoch in range(1,EPOCHS+1):

		# Train Model feeding data in batches calculating total loss
		Tsloss = 0
		Tloss = 0

		for batch in range(1,num_batches+1 ):
			feed_dict = {learning_rate: LEARNING_RATE,is_training1:True}
			ops = [minimize, softmax_loss, total_loss, merged_summaries]
			_, sloss, loss, summaries = sess.run(ops, feed_dict=feed_dict)
			#print ("Epoch /" + str (epoch) + " /" + str(EPOCHS)+" batch /" + str (batch) + " /" + str(num_batches)   + " ; Loss " + str(loss)+ " softmax Loss " + str(sloss))
			Tsloss += sloss
			Tloss  += loss

		Tsloss /= (num_batches+1)
		Tloss /= (num_batches+1)

		print ("Epoch /" + str (epoch) + " /" + str(EPOCHS)  + " ; Loss " + str(Tloss)+ " softmax Loss " + str(Tsloss))

		# Calculate training acurracy for whole training data
		total_accuracy = 0
		
		for batch in range(1,num_batches+1 ):

			input_batch, label_batch = sess.run([train_features, train_labels])

			feed_dict = {eval_input:input_batch,eval_label:label_batch,is_training2:False}
			ops = [out_eval,accuracy]
			_,acc = sess.run(ops, feed_dict=feed_dict)

			#print("Epoch /" + str (epoch) + " /" + str(EPOCHS)+" batch /" + str (batch) + " /" + str(num_batches) + " acc: " + str( acc ) )
			total_accuracy += acc
		
		total_accuracy /= (num_batches+1)

		print(" TRAINING ACCURACY : " + str( total_accuracy ) )


		# Calculate dev acurracy
		total_accuracy = 0

		for batch in range(1,num_batches_dev+1 ):

			input_batch, label_batch = sess.run([dev_features, dev_labels])

			feed_dict = {eval_input:input_batch,eval_label:label_batch,is_training2:False}
			ops = [out_eval,accuracy]
			_,acc = sess.run(ops, feed_dict=feed_dict)

			#print("Epoch /" + str (epoch) + " /" + str(EPOCHS)+" batch /" + str (batch) + " /" + str(num_batches_dev) + " acc: " + str( acc ) )
			total_accuracy += acc
		
		total_accuracy /= (num_batches_dev+1)

		print(" DEV ACCURACY : " + str( total_accuracy ) )
		

		# Write summary to logdir
		writer.add_summary(summaries)
		print "Summary Written to Logdir"

		# Save model for each eopch 
		make_dir_if_not_exists(model_dir)
		save_path = saver.save(sess, model_dir + "model" + str(epoch) +".ckpt")
		print("Model saved in path: %s" % save_path)