Example #1
0
def train(log_dir, config):
	config.data_paths = config.data_paths

	data_dirs = [os.path.join(data_path, "data") \
			for data_path in config.data_paths]
	num_speakers = len(data_dirs)
	config.num_test = config.num_test_per_speaker * num_speakers

	if num_speakers > 1 and hparams.model_type not in ["deepvoice", "simple"]:
		raise Exception("[!] Unknown model_type for multi-speaker: {}".format(config.model_type))

	commit = get_git_commit() if config.git else 'None'
	checkpoint_path = os.path.join(log_dir, 'model.ckpt')

	log(' [*] git recv-parse HEAD:\n%s' % get_git_revision_hash())
	log('=' * 50)
	log(' [*] dit diff:\n%s' % get_git_diff())
	log('=' * 50)
	log(' [*] Checkpoint path: %s' % checkpoint_path)
	log(' [*] Loading training data from: %s' % data_dirs)
	log(' [*] Using model: %s' % config.model_dir)
	log(hparams_debug_string())


	# Set Up DataFeeder
	coord = tf.train.Coordinator()
	with tf.variable_scope('datafeeder') as scope:
		train_feeder = DataFeeder(
			coord, data_dirs, hparams, config, 32,
			data_type='train', batch_size=hparams.batch_size)
		train_feeder = DataFeeder(
			coord, data_dirs, hparams, config, 8,
			data_type='test', batch_size=config.num_test)


	# Set up model:
	is_randomly_initialized = config.initialize_path is None
	global_step = tf.Variable(0, name='global_step', trainable=False)

	with tf.variable_scope('model') as scope:
		model = create_model(hparams)
		model.initialize(
			train_feeder.inputs, train_feeder.input_lengths,
			num_speakers, train_feeder.speaker_id,
			train_feeder.mel_targets, train_feeder.linear_targets,
			train_feeder.loss_coeff,
			is_randomly_initialized=is_randomly_initialized)

		model.add_loss()
		model.add_optimizer(global_step)
		train_stats = add_stats(model, scope_name='stats') # legacy

	with tf.variable_scope('model', reuse=True) as scope:
		test_model = create_model(hparams)
		test_model.initialize(
			test_feeder.inputs, test_feeder.input_lengths,
			num_speakers, test_feeder.speaker_id,
			test_feeder.mel_targets, test_feeder.linear_targets,
			test_feeder.loss_coeff, rnn_decoder_test_mode=True,
			is_randomly_initialized=is_randomly_initialized)
		test_model.add_loss()


	test_stats = add_stats(test_model, model, scope_name='test')
	test_stats = tf.summary.merge([test_stats, train_stats])

	#Bookkeeping:
	step = 0
	time_window	= ValueWindow(100)
	loss_window = ValueWindow(100)
	saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours=2)

	sess_config = tf.ConfigProto(
		log_device_placement=False,
		allow_soft_placement=True)
	sess_confg.gpu_options.allow_growth=True

	# Train part
	with tf.Session() as sess:
		try:
			summary_writer = tf.summary.FileWriter(log_dir, sess.graph)
			sess.run(tf.global_variables_initializer())

			if config.load_path:
				# Restore from a checkpoint if the user requested it.
				restore_path = get_most_recent_checkpoint(config.model_dir)
				saver.restore(sess, restore_path)
				log('Resuming from checkpoint: %s at commit: %s' % (restore_path, commit), slack=True)
			elif config.initialize_path:
				restore_path = get_most_recent_checkpoint(config.initialize)
				saver.restore(sess, restore_path)
				log('Initialize from checkpoint: %s at commit: %s' % (restore_path, commit), slack=True)

				zero_step_assign = tf.assign(global_step, 0)
				sess.run(zero_step_assign)

				start_step = sess.run(global_step)
				log('='*50)
				log(' [*] Global step is reset to {}'. \
					format(start_step))
				log('='*50)
			else:
				log('Starting new training run at commit: %s' % commit, slack=True)

			start_step = sess.run(global_step)

			train_feeder.start_in_session(sess, start_step)
			test_feeder.start_in_session(sess, start_step)

			while not coord.should_stop():
				start_time = time.time()
				step, loss, opt = sess.run(
					[global_step, model.loss_without_coeff, model.optimize],
					feed_dict=model.get_dummy_feed_dict())

				time_window.append(time.time() - start_time)
				loss_window.append(loss)

				message = 'Step %-7d [%.03f sec/step, loss=%.05f, avg_loss=%.05f]' %(
					step, time_window.average, loss, loss_window.average)
				log(message, slack=(step % config.checkpoint_interval == 0))

				if loss > 100 or math.isnan(loss):
					log('Loss exploded to %.05f at step %d!' % (loss, step), slack=True)
					raise Exception('Loss Exploded')

				if step % config.summary_interval == 0:
					log('Writing summary at step: %d' % step)

					feed_dict = {
							**model.get_dummy_feed_dict(),
							**test_model.get_dummy_feed_dict()
					}
					summary_writer.add_summary(sess.run(
						test_stats, feed_dict=feed_dict), step)


				if step % config.checkpoint_interval == 0:
					log('Saving checkpoint to: %s-%d' % (checkpoint_path, step))
					saver.save(sess, checkpoint_path, global_step=step)

				if step % config.test_interval == 0:
					log('Saving audio and alignments...')
					num_test = config.num_test

					fetches = [
						model.inputs[:num_test],
						model.linear_outputs[:num_test]
						model.alignments[:num_test],
						test_model.inputs[:num_test],
						test_model.linear_outputs[:num_test],
						test_model.alignments[:num_test],
					]
					feed_dict = {
							**model.get_dummy_feed_dict(),
							**test_model.get_dummy_feed_dict()
					}

					sequences, spectrograms, alignments, \
						test_sequences, test_spectrograms, test_alignments = \
							sess.run(fetches, feed_dict=feed_dict)

					save_and_plot(sequences[:1], spectrograms[:1], alignments[:1],
						log_dir, step, loss, "train")
					save_and_plot(test_sequences, test_spectrograms, test_alignments,
						log_dir, step, loss, "test")


		except Exception as e:
			log('Exiting due to exception: %s' % e, slack=True)
			traceback.print_exc()
			coord.request_stop(e)
def predict_slice_model(expid, outfile, mfile=None):
    metadata_path = MODEL_PATH + "%s.pkl" % (expid if not mfile else mfile)

    if theano.config.optimizer != "fast_run":
        print "WARNING: not running in fast mode!"

    print "Build model"
    interface_layers = config().build_model()

    output_layers = interface_layers["outputs"]
    input_layers = interface_layers["inputs"]
    top_layer = lasagne.layers.MergeLayer(
        incomings=output_layers.values()
    )
    _check_slicemodel(input_layers)

    # Print the architecture
    _print_architecture(top_layer)

    xs_shared = {
        key: lasagne.utils.shared_empty(dim=len(l_in.output_shape), dtype='float32') for (key, l_in) in input_layers.iteritems()
    }
    idx = T.lscalar('idx')

    givens = dict()

    for key in input_layers.keys():
        if key=="sunny":
            givens[input_layers[key].input_var] = xs_shared[key][idx*config().sunny_batch_size:(idx+1)*config().sunny_batch_size]
        else:
            givens[input_layers[key].input_var] = xs_shared[key][idx*config().batch_size:(idx+1)*config().batch_size]

    network_outputs = [
        lasagne.layers.helper.get_output(network_output_layer, deterministic=True)
        for network_output_layer in output_layers.values()
    ]

    iter_test = theano.function([idx], network_outputs + theano_printer.get_the_stuff_to_print(),
                                 givens=givens, on_unused_input="ignore",
                                 # mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True)
                                 )

    print "Load model parameters for resuming"
    resume_metadata = np.load(metadata_path)
    lasagne.layers.set_all_param_values(top_layer, resume_metadata['param_values'])
    num_batches_chunk = config().batches_per_chunk
    num_batches = get_number_of_test_batches()
    num_chunks = int(np.ceil(num_batches / float(config().batches_per_chunk)))

    chunks_train_idcs = range(1, num_chunks+1)

    create_test_gen = partial(config().create_test_gen,
                              required_input_keys = xs_shared.keys(),
                              required_output_keys = ["patients", "slices"],
                              )

    print "Generate predictions with this model"
    start_time = time.time()
    prev_time = start_time


    predictions = [{"patient": i+1,
                    "slices": {
                        slice_id: {
                            "systole": np.zeros((0,600)),
                            "diastole": np.zeros((0,600))
                        } for slice_id in data_loader.get_slice_ids_for_patient(i+1)
                    }
                   } for i in xrange(NUM_PATIENTS)]


    # Loop over data and generate predictions
    for e, test_data in izip(itertools.count(start=1), buffering.buffered_gen_threaded(create_test_gen())):
        print "  load testing data onto GPU"

        for key in xs_shared:
            xs_shared[key].set_value(test_data["input"][key])


        patient_ids = test_data["output"]["patients"]
        slice_ids = test_data["output"]["slices"]
        print "  patients:", " ".join(map(str, patient_ids))
        print "  chunk %d/%d" % (e, num_chunks)

        for b in xrange(num_batches_chunk):
            iter_result = iter_test(b)
            network_outputs = tuple(iter_result[:len(output_layers)])
            network_outputs_dict = {output_layers.keys()[i]: network_outputs[i] for i in xrange(len(output_layers))}
            kaggle_systoles, kaggle_diastoles = config().postprocess(network_outputs_dict)
            kaggle_systoles, kaggle_diastoles = kaggle_systoles.astype('float64'), kaggle_diastoles.astype('float64')
            for idx, (patient_id, slice_id) in enumerate(
                    zip(patient_ids[b*config().batch_size:(b+1)*config().batch_size],
                        slice_ids[b*config().batch_size:(b+1)*config().batch_size])):
                if patient_id != 0:
                    index = patient_id-1
                    patient_data = predictions[index]
                    assert patient_id==patient_data["patient"]
                    patient_slice_data = patient_data["slices"][slice_id]
                    patient_slice_data["systole"] =  np.concatenate((patient_slice_data["systole"],  kaggle_systoles[idx:idx+1,:]),axis=0)
                    patient_slice_data["diastole"] = np.concatenate((patient_slice_data["diastole"], kaggle_diastoles[idx:idx+1,:]),axis=0)

        now = time.time()
        time_since_start = now - start_time
        time_since_prev = now - prev_time
        prev_time = now
        est_time_left = time_since_start * (float(num_chunks - (e + 1)) / float(e + 1 - chunks_train_idcs[0]))
        eta = datetime.now() + timedelta(seconds=est_time_left)
        eta_str = eta.strftime("%c")
        print "  %s since start (%.2f s)" % (utils.hms(time_since_start), time_since_prev)
        print "  estimated %s to go (ETA: %s)" % (utils.hms(est_time_left), eta_str)
        print

    # Average predictions
    already_printed = False
    for prediction in predictions:
        for prediction_slice_id in prediction["slices"]:
            prediction_slice = prediction["slices"][prediction_slice_id]
            if prediction_slice["systole"].size>0 and prediction_slice["diastole"].size>0:
                average_method =  getattr(config(), 'tta_average_method', partial(np.mean, axis=0))
                prediction_slice["systole_average"] = average_method(prediction_slice["systole"])
                prediction_slice["diastole_average"] = average_method(prediction_slice["diastole"])
                try:
                    test_if_valid_distribution(prediction_slice["systole_average"])
                    test_if_valid_distribution(prediction_slice["diastole_average"])
                except:
                    if not already_printed:
                        print "WARNING: These distributions are not distributions"
                        already_printed = True
                    prediction_slice["systole_average"] = make_monotone_distribution(prediction_slice["systole_average"])
                    prediction_slice["diastole_average"] = make_monotone_distribution(prediction_slice["diastole_average"])


    print "Calculating training and validation set scores for reference"
    # Add CRPS scores to the predictions
    # Iterate over train and validation sets
    for patient_ids, set_name in [(validation_patients_indices, "validation"),
                                      (train_patients_indices,  "train")]:
        # Iterate over patients in the set
        for patient in patient_ids:
            prediction = predictions[patient-1]
            # Iterate over the slices
            for slice_id in prediction["slices"]:
                prediction_slice = prediction["slices"][slice_id]
                if "systole_average" in prediction_slice:
                    assert patient == regular_labels[patient-1, 0]
                    error_sys = CRSP(prediction_slice["systole_average"], regular_labels[patient-1, 1])
                    prediction_slice["systole_CRPS"] = error_sys
                    prediction_slice["target_systole"] = regular_labels[patient-1, 1]
                    error_dia = CRSP(prediction_slice["diastole_average"], regular_labels[patient-1, 2])
                    prediction_slice["diastole_CRPS"] = error_dia
                    prediction_slice["target_diastole"] = regular_labels[patient-1, 2]
                    prediction_slice["CRPS"] = 0.5 * error_sys + 0.5 * error_dia


    print "dumping prediction file to %s" % outfile
    with open(outfile, 'w') as f:
        pickle.dump({
                        'metadata_path': metadata_path,
                        'configuration_file': config().__name__,
                        'git_revision_hash': utils.get_git_revision_hash(),
                        'experiment_id': expid,
                        'time_since_start': time_since_start,
                        'param_values': lasagne.layers.get_all_param_values(top_layer),
                        'predictions_per_slice': predictions,
                    }, f, pickle.HIGHEST_PROTOCOL)
    print "prediction file dumped"


    return
Example #3
0
def main():

    # Parameter #
    params = dict(
        high_pass_fraction=
        0.15,  # only frequencies higher than this fraction of the fourier domain image will pass
        low_pass_fraction=
        0.4  # only frequencies lower than this fraction of the fourier domain image will pass
    )
    #############

    parser = argparse.ArgumentParser()
    parser.add_argument("--input_path",
                        default="./Input/demo",
                        help="Path to the folder containing the input images.")
    parser.add_argument(
        "--output_path",
        default="./Output/demo",
        help="Path to the folder which will contain the output.")
    parser.add_argument(
        "--param_file",
        default="",
        help=
        "Name of a parameter file in the input folder. Will be used to override the local param dictionary."
    )
    args = parser.parse_args()

    # Preparation
    time_stamp = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
    current_file = os.path.splitext(os.path.basename(__file__))[0]

    input_path = args.input_path
    output_path = os.path.join(args.output_path,
                               current_file + "_" + time_stamp)

    if not os.path.exists(output_path):
        os.makedirs(output_path)

    # set up logging
    logging.basicConfig(filename=os.path.join(output_path,
                                              current_file + '.log'),
                        level=logging.DEBUG,
                        format='%(asctime)s - %(levelname)s: %(message)s',
                        datefmt='%Y-%m-%d %H:%M:%S')

    logging.info("Current git revision: {}".format(
        utils.get_git_revision_hash()))

    # override parameter if external ones are given
    param_path = os.path.join(input_path, args.param_file)
    if os.path.isfile(param_path):
        with open(param_path, "r") as param_file:
            params = json.load(param_file)
        logging.info("Using parameter given in {}".format(param_path))
    else:
        logging.info("Using local parameter")

    # dump used parameter
    with open(os.path.join(output_path, 'params.json'), 'w') as f:
        json.dump(
            params,
            f,
            sort_keys=True,
            indent=4,
        )
        f.write('\n')

    print("Start processing...")
    counter = 0
    # Loop through all images in input path
    for root, dirs, files in os.walk(input_path):
        for input_name in files:

            start = time.time()
            input_name_base = os.path.splitext(os.path.basename(input_name))[0]

            img_original = cv2.imread(os.path.join(input_path, input_name))
            if img_original is None:  # reading failed (e.g. file is not an image)
                continue

            img = cv2.cvtColor(img_original, cv2.COLOR_BGR2GRAY)

            low_pass, fourier_spectrum = low_pass_filter(img, **params)
            high_pass, _ = high_pass_filter(img, **params)
            middle_pass, _, middle_mask = middle_pass_filter(img, **params)

            # OTSU Thresholding
            low_pass = low_pass.astype(np.uint8)
            ret, low_pass_otsu = cv2.threshold(
                low_pass, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

            middle_pass = middle_pass.astype(np.uint8)
            ret, middle_pass_otsu = cv2.threshold(
                middle_pass, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

            cv2.imwrite(os.path.join(output_path, input_name), img_original)
            cv2.imwrite(
                os.path.join(output_path,
                             input_name_base + "_fourier_spectrum.tiff"),
                fourier_spectrum)
            cv2.imwrite(
                os.path.join(output_path, input_name_base + "_low_pass.tiff"),
                low_pass)
            cv2.imwrite(
                os.path.join(output_path, input_name_base + "_high_pass.tiff"),
                high_pass)
            cv2.imwrite(
                os.path.join(output_path,
                             input_name_base + "_middle_pass.tiff"),
                middle_pass)
            cv2.imwrite(
                os.path.join(output_path,
                             input_name_base + "_middle_mask.tiff"),
                middle_mask)
            cv2.imwrite(
                os.path.join(output_path,
                             input_name_base + "_middle_pass_otsu.tiff"),
                middle_pass_otsu)
            cv2.imwrite(
                os.path.join(output_path,
                             input_name_base + "_low_pass_otsu.tiff"),
                low_pass_otsu)

            duration = time.time() - start
            logging.info("Processed {0} (Duration: {1:.3f} s)".format(
                input_name, duration))
            counter += 1

    logging.info("Processed {} images in total".format(counter))
    print("Processing done. See log file in '{}' for more details".format(
        output_path))
Example #4
0
def main(
    args_lst,
    eid, experiment_path, out, valid_after,
    load_params, save_params,
    debug, track_log,
    n_cells, emb_size, x_include_score, no_train_emb,
    n_epochs, lr, opt_type, momentum,
    mb_size, mb_mult_data,
    oclf_n_hidden, oclf_n_layers, oclf_activation,
    rnn_n_layers,
    lstm_peepholes, lstm_bidi,
    p_drop, init_emb_from, input_n_layers, input_n_hidden,
    input_activation,
    eval_on_full_train, x_include_token_ftrs, enable_branch_exp, l1, l2,
    x_include_mlp, enable_token_supervision, model_type,
    ontology
):

    output_dir = init_env(out + os.path.basename(experiment_path))
    mon_train = TrainingStats()
    mon_valid = TrainingStats()
    mon_extreme_examples = TrainingStats()
    stats_obj = dict(
        train=mon_train.data,
        mon_extreme_examples=mon_extreme_examples.data,
        args=args_lst
    )
    logging.info('XTrack has been started.')
    logging.info('GIT rev: %s' % get_git_revision_hash())
    logging.info('Output dir: %s' % output_dir)
    logging.info('Initializing random seed to 271.')
    random.seed(271)
    logging.info('Argv: %s' % str(sys.argv))
    logging.info('Effective args:')
    for arg_name, arg_value in args_lst:
        logging.info('    %s: %s' % (arg_name, arg_value))
    logging.info('Experiment path: %s' % experiment_path)

    train_path = os.path.join(experiment_path, 'train.json')
    xtd_t = Data.load(train_path)

    valid_path = os.path.join(experiment_path, 'dev.json')
    xtd_v = Data.load(valid_path)

    slots = xtd_t.slots
    classes = xtd_t.classes
    class_groups = xtd_t.slot_groups

    t = time.time()

    logging.info('Building model: %s' % model_type)
    model = get_model(
        args_lst,
        eid, experiment_path, out, valid_after,
        load_params, save_params,
        debug, track_log,
        n_cells, emb_size, x_include_score, no_train_emb,
        n_epochs, lr, opt_type, momentum,
        mb_size, mb_mult_data,
        oclf_n_hidden, oclf_n_layers, oclf_activation,
        rnn_n_layers,
        lstm_peepholes, lstm_bidi,
        p_drop, init_emb_from, input_n_layers, input_n_hidden,
        input_activation,
        eval_on_full_train, x_include_token_ftrs, enable_branch_exp, l1, l2,
        x_include_mlp, enable_token_supervision, model_type,
        ontology, xtd_t
    )

    logging.info('Rebuilding took: %.1f' % (time.time() - t))

    if load_params:
        logging.info('Loading parameters from: %s' % load_params)
        model.load_params(load_params)

    onto = OntologyReader(ontology)
    tracker_valid = XTrack2DSTCTracker(xtd_v, [model], onto)
    tracker_train = XTrack2DSTCTracker(xtd_t, [model], onto)

    valid_data_y = model.prepare_data_train(xtd_v.sequences, slots)
    valid_data = model.prepare_data_predict(xtd_v.sequences, slots)
    if not eval_on_full_train:
        selected_train_seqs = []
        for i in range(100):
            ndx = random.randint(0, len(xtd_t.sequences) - 1)
            selected_train_seqs.append(xtd_t.sequences[ndx])
    else:
        selected_train_seqs = xtd_t.sequences

    # train_data = model.prepare_data_train(selected_train_seqs, slots)
    best_tracking_acc = 0.0
    seqs = list(xtd_t.sequences)
    seqs = seqs * mb_mult_data
    random.shuffle(seqs)
    minibatches = prepare_minibatches(seqs, mb_size, model, slots)
    minibatches = zip(itertools.count(), minibatches)
    logging.info('We have %d minibatches.' % len(minibatches))

    example_cntr = 0
    timestep_cntr = 0
    stats = TrainingStats()
    mb_histogram = defaultdict(int)
    mb_ids = range(len(minibatches))
    mb_to_go = []

    epoch = 0

    init_valid_loss = model._loss(*valid_data_y)
    logging.info('Initial valid loss: %.10f' % init_valid_loss)

    if not valid_after:
        valid_after = len(seqs)

    mb_loss = {}
    last_valid = 0
    last_inline_print = time.time()
    last_inline_print_cnt = 0
    best_track_metric = defaultdict(float)

    keep_on_training = True
    while keep_on_training:
        if len(mb_to_go) == 0:
            mb_to_go = list(mb_ids)
            epoch += 1

            if 0 < n_epochs < epoch:
                keep_on_training = False
                continue

        mb_ndx = random.choice(mb_to_go)
        mb_to_go.remove(mb_ndx)

        #mb_id, mb_data = random.choice(minibatches)
        mb_id, mb_data = minibatches[mb_ndx]
        mb_histogram[mb_ndx] += 1
        mb_done = 0
        t = time.time()
        (loss, update_ratio) = model._train(lr, *mb_data)
        mb_loss[mb_ndx] = loss
        t = time.time() - t
        stats.insert(loss=loss, update_ratio=update_ratio, time=t)

        x = mb_data[0]
        example_cntr += x.shape[1]
        timestep_cntr += x.shape[0]
        mb_done += 1

        if time.time() - last_inline_print > 1.0:
            last_inline_print = time.time()
            inline_print(
                "     %6d examples, %4d examples/s" % (
                    example_cntr,
                    example_cntr - last_inline_print_cnt
                )
            )
            last_inline_print_cnt = example_cntr

        if (example_cntr - last_valid) >= valid_after:
            inline_print("")
            last_valid = example_cntr
            params_file = os.path.join(
                output_dir, 'params.%.10d.p' % example_cntr
            )
            logging.info('Saving parameters: %s' % params_file)
            model.save_params(params_file)

            valid_loss = model._loss(*valid_data_y)
            update_ratio = stats.mean('update_ratio')

            _, track_score = tracker_valid.track(track_log)

            for metric, value in track_score.iteritems():
                logging.info('Valid %15s: %10.2f %%' % (metric, value * 100))
                best_track_metric[metric] = max(
                    value,
                    best_track_metric[metric]
                )
            for metric, value in best_track_metric.iteritems():
                logging.info('Best %15s:  %10.2f %%' % (metric, value * 100))
            logging.info('Train loss:         %10.2f' % stats.mean('loss'))
            logging.info('Mean update ratio:  %10.6f' % update_ratio)
            logging.info('Mean mb time:       %10.4f' % stats.mean('time'))
            logging.info('Epoch:              %10d (%d mb remain)' % (
                epoch,
                len(mb_to_go)
            ))
            logging.info('Example:            %10d' % example_cntr)

            mon_train.insert(
                time=time.time(),
                example=example_cntr,
                timestep_cntr=timestep_cntr,
                mb_id=mb_id,
                train_loss=stats.mean('loss'),
                valid_loss=valid_loss,
                update_ratio=stats.mean('update_ratio'),
                tracking_acc=track_score
            )

            stats_path = os.path.join(output_dir, 'stats.json')
            with open(stats_path, 'w') as f_out:
                json.dump(stats_obj, f_out)
                os.system(
                    'ln -f -s "%s" "xtrack2_vis/stats.json"' %
                    os.path.join('..', stats_path)
                )

            stats = TrainingStats()

    params_file = os.path.join(output_dir, 'params.final.p')
    logging.info('Saving final params to: %s' % params_file)
    model.save_params(params_file)

    return best_tracking_acc
Example #5
0
def main(args_lst,
         eid, experiment_path, out, valid_after,
         load_params, save_params,
         debug, track_log,
         n_cells, emb_size, x_include_score, no_train_emb,
         n_epochs, lr, opt_type, momentum,
         mb_size, mb_mult_data,
         oclf_n_hidden, oclf_n_layers, oclf_activation,
         rnn_n_layers,
         lstm_peepholes, lstm_bidi,
         p_drop, init_emb_from, input_n_layers, input_n_hidden,
         input_activation,
         eval_on_full_train, x_include_token_ftrs, enable_branch_exp, l1, l2,
         x_include_mlp, enable_token_supervision, model_type):

    output_dir = init_env(out)
    mon_train = TrainingStats()
    mon_valid = TrainingStats()
    mon_extreme_examples = TrainingStats()
    stats_obj = dict(
        train=mon_train.data,
        mon_extreme_examples=mon_extreme_examples.data,
        args=args_lst
    )

    logging.info('XTrack has been started.')
    logging.info('GIT rev: %s' % get_git_revision_hash())
    logging.info('Output dir: %s' % output_dir)
    logging.info('Initializing random seed to 0.')
    random.seed(0)
    logging.info('Argv: %s' % str(sys.argv))
    logging.info('Effective args:')
    for arg_name, arg_value in args_lst:
        logging.info('    %s: %s' % (arg_name, arg_value))
    logging.info('Experiment path: %s' % experiment_path)

    train_path = os.path.join(experiment_path, 'train.json')
    xtd_t = Data.load(train_path)

    valid_path = os.path.join(experiment_path, 'dev.json')
    xtd_v = Data.load(valid_path)

    slots = xtd_t.slots
    classes = xtd_t.classes
    class_groups = xtd_t.slot_groups
    n_input_tokens = len(xtd_t.vocab)
    n_input_score_bins = len(xtd_t.score_bins)

    t = time.time()

    logging.info('Building model: %s' % model_type)
    if model_type == 'lstm':
        model = Model(slots=slots,
                      slot_classes=xtd_t.classes,
                      emb_size=emb_size,
                      no_train_emb=no_train_emb,
                      x_include_score=x_include_score,
                      x_include_token_ftrs=x_include_token_ftrs,
                      x_include_mlp=x_include_mlp,
                      n_input_score_bins=n_input_score_bins,
                      n_cells=n_cells,
                      n_input_tokens=n_input_tokens,
                      oclf_n_hidden=oclf_n_hidden,
                      oclf_n_layers=oclf_n_layers,
                      oclf_activation=oclf_activation,
                      debug=debug,
                      rnn_n_layers=rnn_n_layers,
                      lstm_peepholes=lstm_peepholes,
                      lstm_bidi=lstm_bidi,
                      opt_type=opt_type,
                      momentum=momentum,
                      p_drop=p_drop,
                      init_emb_from=init_emb_from,
                      vocab=xtd_t.vocab,
                      input_n_layers=input_n_layers,
                      input_n_hidden=input_n_hidden,
                      input_activation=input_activation,
                      token_features=None,
                      enable_branch_exp=enable_branch_exp,
                      token_supervision=enable_token_supervision,
                      l1=l1,
                      l2=l2
        )
    elif model_type == 'conv':
        model = SimpleConvModel(slots=slots,
                      slot_classes=xtd_t.classes,
                      emb_size=emb_size,
                      no_train_emb=no_train_emb,
                      x_include_score=x_include_score,
                      x_include_token_ftrs=x_include_token_ftrs,
                      x_include_mlp=x_include_mlp,
                      n_input_score_bins=n_input_score_bins,
                      n_cells=n_cells,
                      n_input_tokens=n_input_tokens,
                      oclf_n_hidden=oclf_n_hidden,
                      oclf_n_layers=oclf_n_layers,
                      oclf_activation=oclf_activation,
                      debug=debug,
                      rnn_n_layers=rnn_n_layers,
                      lstm_peepholes=lstm_peepholes,
                      lstm_bidi=lstm_bidi,
                      opt_type=opt_type,
                      momentum=momentum,
                      p_drop=p_drop,
                      init_emb_from=init_emb_from,
                      vocab=xtd_t.vocab,
                      input_n_layers=input_n_layers,
                      input_n_hidden=input_n_hidden,
                      input_activation=input_activation,
                      token_features=None,
                      enable_branch_exp=enable_branch_exp,
                      token_supervision=enable_token_supervision,
                      l1=l1,
                      l2=l2
        )
    elif model_type == 'baseline':
        model = BaselineModel(slots=slots,
                      slot_classes=xtd_t.classes,
                      oclf_n_hidden=oclf_n_hidden,
                      oclf_n_layers=oclf_n_layers,
                      oclf_activation=oclf_activation,
                      n_cells=n_cells,
                      debug=debug,
                      opt_type=opt_type,
                      momentum=momentum,
                      p_drop=p_drop,
                      vocab=xtd_t.vocab,
                      input_n_layers=input_n_layers,
                      input_n_hidden=input_n_hidden,
                      input_activation=input_activation,
                      token_features=None,
                      enable_branch_exp=enable_branch_exp,
                      token_supervision=enable_token_supervision,
                      l1=l1,
                      l2=l2
        )
    else:
        raise Exception()

    logging.info('Rebuilding took: %.1f' % (time.time() - t))

    if load_params:
        logging.info('Loading parameters from: %s' % load_params)
        model.load_params(load_params)

    tracker_valid = XTrack2DSTCTracker(xtd_v, [model])
    tracker_train = XTrack2DSTCTracker(xtd_t, [model])

    valid_data_y = model.prepare_data_train(xtd_v.sequences, slots)
    valid_data = model.prepare_data_predict(xtd_v.sequences, slots)
    if not eval_on_full_train:
        selected_train_seqs = []
        for i in range(100):
            ndx = random.randint(0, len(xtd_t.sequences) - 1)
            selected_train_seqs.append(xtd_t.sequences[ndx])
    else:
        selected_train_seqs = xtd_t.sequences

    train_data = model.prepare_data_train(selected_train_seqs, slots)
    joint_slots = ['joint_%s' % str(grp) for grp in class_groups.keys()]
    best_acc = {slot: 0 for slot in xtd_v.slots + joint_slots}
    best_acc_train = {slot: 0 for slot in xtd_v.slots + joint_slots}
    best_tracking_acc = 0.0
    n_valid_not_increased = 0
    et = None
    seqs = list(xtd_t.sequences)
    seqs = seqs * mb_mult_data
    random.shuffle(seqs)
    minibatches = prepare_minibatches(seqs, mb_size, model, slots)
    minibatches = zip(itertools.count(), minibatches)
    logging.info('We have %d minibatches.' % len(minibatches))

    example_cntr = 0
    timestep_cntr = 0
    stats = TrainingStats()
    mb_histogram = defaultdict(int)
    mb_ids = range(len(minibatches))
    mb_to_go = []
    mb_bad = []

    epoch = 0

    init_valid_loss = model._loss(*valid_data_y)
    logging.info('Initial valid loss: %.10f' % init_valid_loss)

    if not valid_after:
        valid_after = len(seqs)

    mb_loss = {}
    last_valid = 0
    last_inline_print = time.time()
    last_inline_print_cnt = 0
    best_track_acc = defaultdict(float)
    while True:
        if len(mb_to_go) == 0:
            mb_to_go = list(mb_ids)
            epoch += 1

            if n_epochs > 0 and n_epochs < epoch:
                break

        mb_ndx = random.choice(mb_to_go)
        mb_to_go.remove(mb_ndx)

        #mb_id, mb_data = random.choice(minibatches)
        mb_id, mb_data = minibatches[mb_ndx]
        mb_histogram[mb_ndx] += 1
        #if et is not None:
        #    epoch_time = time.time() - et
        #else:
        #    epoch_time = -1.0
        #logging.info('Epoch #%d (last epoch took %.1fs) (seen %d examples)' %
        #             (i, epoch_time, example_cntr ))

        #et = time.time()
        mb_done = 0
        t = time.time()
        (loss, update_ratio) = model._train(lr, *mb_data)
        mb_loss[mb_ndx] = loss
        t = time.time() - t
        stats.insert(loss=loss, update_ratio=update_ratio, time=t)

        x = mb_data[0]
        example_cntr += x.shape[1]
        timestep_cntr += x.shape[0]
        mb_done += 1

        if time.time() - last_inline_print > 1.0:
            last_inline_print = time.time()
            inline_print("     %6d examples, %4d examples/s" % (
                example_cntr,
                example_cntr - last_inline_print_cnt
            ))
            last_inline_print_cnt = example_cntr

        if (example_cntr - last_valid) >= valid_after:
            inline_print("")
            last_valid = example_cntr
            params_file = os.path.join(output_dir, 'params.%.10d.p' %
                                       example_cntr)
            logging.info('Saving parameters: %s' % params_file)
            model.save_params(params_file)

            valid_loss = model._loss(*valid_data_y)
            update_ratio = stats.mean('update_ratio')
            update_ratio = stats.mean('update_ratio')

            _, track_score = tracker_valid.track(track_log)

            for group, accuracy in sorted(track_score.iteritems(),
                                          key=lambda (g, _): g):
                logging.info('Valid acc %15s: %10.2f %%'
                             % (group, accuracy * 100))
                best_track_acc[group] = max(accuracy, best_track_acc[group])
            for group in sorted(track_score, key=lambda g: g):
                logging.info('Best acc %15s:  %10.2f %%'
                             % (group, best_track_acc[group] * 100))
            logging.info('Train loss:         %10.2f' % stats.mean('loss'))
            logging.info('Mean update ratio:  %10.6f' % update_ratio)
            logging.info('Mean mb time:       %10.4f' % stats.mean('time'))
            logging.info('Epoch:              %10d (%d mb remain)' % (epoch,
                                                                     len(mb_to_go)))
            logging.info('Example:            %10d' % example_cntr)


            mon_train.insert(
                time=time.time(),
                example=example_cntr,
                timestep_cntr=timestep_cntr,
                mb_id=mb_id,
                train_loss=stats.mean('loss'),
                valid_loss=valid_loss,
                update_ratio=stats.mean('update_ratio'),
                tracking_acc=track_score
            )

            stats_path = os.path.join(output_dir, 'stats.json')
            with open(stats_path, 'w') as f_out:
                json.dump(stats_obj, f_out)
                os.system('ln -f -s "%s" "xtrack2_vis/stats.json"' %
                          os.path.join('..', stats_path))

            stats = TrainingStats()

    params_file = os.path.join(output_dir, 'params.final.p')
    logging.info('Saving final params to: %s' % params_file)
    model.save_params(params_file)

    return best_tracking_acc
Example #6
0
def predict_model(expid, mfile=None):
    metadata_path = MODEL_PATH + "%s.pkl" % (expid if not mfile else mfile)
    prediction_path = INTERMEDIATE_PREDICTIONS_PATH + "%s.pkl" % expid
    submission_path = SUBMISSION_PATH + "%s.csv" % expid

    if theano.config.optimizer != "fast_run":
        print("WARNING: not running in fast mode!")

    print("Using")
    print("  %s" % metadata_path)
    print("To generate")
    print("  %s" % prediction_path)
    print("  %s" % submission_path)

    print("Build model")
    interface_layers = config().build_model()

    output_layers = interface_layers["outputs"]
    input_layers = interface_layers["inputs"]
    top_layer = lasagne.layers.MergeLayer(
        incomings=list(output_layers.values()))
    all_layers = lasagne.layers.get_all_layers(top_layer)
    num_params = lasagne.layers.count_params(top_layer)
    print("  number of parameters: %d" % num_params)
    print(string.ljust("  layer output shapes:", 36), end=' ')
    print(string.ljust("#params:", 10), end=' ')
    print("output shape:")
    for layer in all_layers[:-1]:
        name = string.ljust(layer.__class__.__name__, 32)
        num_param = sum(
            [np.prod(p.get_value().shape) for p in layer.get_params()])
        num_param = string.ljust(num_param.__str__(), 10)
        print("    %s %s %s" % (name, num_param, layer.output_shape))

    xs_shared = {
        key: lasagne.utils.shared_empty(dim=len(l_in.output_shape),
                                        dtype='float32')
        for (key, l_in) in input_layers.items()
    }
    idx = T.lscalar('idx')

    givens = dict()

    for key in list(input_layers.keys()):
        if key == "sunny":
            givens[input_layers[key].input_var] = xs_shared[key][idx * config(
            ).sunny_batch_size:(idx + 1) * config().sunny_batch_size]
        else:
            givens[input_layers[key].
                   input_var] = xs_shared[key][idx *
                                               config().batch_size:(idx + 1) *
                                               config().batch_size]

    network_outputs = [
        lasagne.layers.helper.get_output(network_output_layer,
                                         deterministic=True)
        for network_output_layer in list(output_layers.values())
    ]

    iter_test = theano.function(
        [idx],
        network_outputs + theano_printer.get_the_stuff_to_print(),
        givens=givens,
        on_unused_input="ignore",
        # mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True)
    )

    print("Load model parameters for resuming")
    resume_metadata = np.load(metadata_path)
    lasagne.layers.set_all_param_values(top_layer,
                                        resume_metadata['param_values'])
    num_batches_chunk = config().batches_per_chunk
    num_batches = get_number_of_test_batches()
    num_chunks = int(np.ceil(num_batches / float(config().batches_per_chunk)))

    chunks_train_idcs = list(range(1, num_chunks + 1))

    data_loader.filter_patient_folders()

    create_test_gen = partial(
        config().create_test_gen,
        required_input_keys=list(xs_shared.keys()),
        required_output_keys=[
            "patients", "classification_correction_function"
        ],
    )

    print("Generate predictions with this model")
    start_time = time.time()
    prev_time = start_time

    predictions = [{
        "patient": i + 1,
        "systole": np.zeros((0, 600)),
        "diastole": np.zeros((0, 600))
    } for i in range(NUM_PATIENTS)]

    for e, test_data in zip(itertools.count(start=1),
                            buffering.buffered_gen_threaded(
                                create_test_gen())):
        print("  load testing data onto GPU")

        for key in xs_shared:
            xs_shared[key].set_value(test_data["input"][key])

        patient_ids = test_data["output"]["patients"]
        classification_correction = test_data["output"][
            "classification_correction_function"]
        print("  patients:", " ".join(map(str, patient_ids)))
        print("  chunk %d/%d" % (e, num_chunks))

        for b in range(num_batches_chunk):
            iter_result = iter_test(b)
            network_outputs = tuple(iter_result[:len(output_layers)])
            network_outputs_dict = {
                list(output_layers.keys())[i]: network_outputs[i]
                for i in range(len(output_layers))
            }
            kaggle_systoles, kaggle_diastoles = config().postprocess(
                network_outputs_dict)
            kaggle_systoles, kaggle_diastoles = kaggle_systoles.astype(
                'float64'), kaggle_diastoles.astype('float64')
            for idx, patient_id in enumerate(
                    patient_ids[b * config().batch_size:(b + 1) *
                                config().batch_size]):
                if patient_id != 0:
                    index = patient_id - 1
                    patient_data = predictions[index]
                    assert patient_id == patient_data["patient"]

                    kaggle_systole = kaggle_systoles[idx:idx + 1, :]
                    kaggle_diastole = kaggle_diastoles[idx:idx + 1, :]
                    assert np.isfinite(kaggle_systole).all() and np.isfinite(
                        kaggle_systole).all()
                    kaggle_systole = classification_correction[
                        b * config().batch_size + idx](kaggle_systole)
                    kaggle_diastole = classification_correction[
                        b * config().batch_size + idx](kaggle_diastole)
                    assert np.isfinite(kaggle_systole).all() and np.isfinite(
                        kaggle_systole).all()
                    patient_data["systole"] = np.concatenate(
                        (patient_data["systole"], kaggle_systole), axis=0)
                    patient_data["diastole"] = np.concatenate(
                        (patient_data["diastole"], kaggle_diastole), axis=0)

        now = time.time()
        time_since_start = now - start_time
        time_since_prev = now - prev_time
        prev_time = now
        est_time_left = time_since_start * (
            float(num_chunks - (e + 1)) / float(e + 1 - chunks_train_idcs[0]))
        eta = datetime.now() + timedelta(seconds=est_time_left)
        eta_str = eta.strftime("%c")
        print("  %s since start (%.2f s)" %
              (utils.hms(time_since_start), time_since_prev))
        print("  estimated %s to go (ETA: %s)" %
              (utils.hms(est_time_left), eta_str))
        print()

    already_printed = False
    for prediction in predictions:
        if prediction["systole"].size > 0 and prediction["diastole"].size > 0:
            average_method = getattr(config(), 'tta_average_method',
                                     partial(np.mean, axis=0))
            prediction["systole_average"] = average_method(
                prediction["systole"])
            prediction["diastole_average"] = average_method(
                prediction["diastole"])
            try:
                test_if_valid_distribution(prediction["systole_average"])
                test_if_valid_distribution(prediction["diastole_average"])
            except:
                if not already_printed:
                    print("WARNING: These distributions are not distributions")
                    already_printed = True
                prediction["systole_average"] = make_monotone_distribution(
                    prediction["systole_average"])
                prediction["diastole_average"] = make_monotone_distribution(
                    prediction["diastole_average"])
                test_if_valid_distribution(prediction["systole_average"])
                test_if_valid_distribution(prediction["diastole_average"])

    print("Calculating training and validation set scores for reference")

    validation_dict = {}
    for patient_ids, set_name in [(validation_patients_indices, "validation"),
                                  (train_patients_indices, "train")]:
        errors = []
        for patient in patient_ids:
            prediction = predictions[patient - 1]
            if "systole_average" in prediction:
                assert patient == regular_labels[patient - 1, 0]
                error = CRSP(prediction["systole_average"],
                             regular_labels[patient - 1, 1])
                errors.append(error)
                error = CRSP(prediction["diastole_average"],
                             regular_labels[patient - 1, 2])
                errors.append(error)
        if len(errors) > 0:
            errors = np.array(errors)
            estimated_CRSP = np.mean(errors)
            print("  %s kaggle loss: %f" %
                  (string.rjust(set_name, 12), estimated_CRSP))
            validation_dict[set_name] = estimated_CRSP
        else:
            print("  %s kaggle loss: not calculated" %
                  (string.rjust(set_name, 12)))

    print("dumping prediction file to %s" % prediction_path)
    with open(prediction_path, 'w') as f:
        pickle.dump(
            {
                'metadata_path': metadata_path,
                'prediction_path': prediction_path,
                'submission_path': submission_path,
                'configuration_file': config().__name__,
                'git_revision_hash': utils.get_git_revision_hash(),
                'experiment_id': expid,
                'time_since_start': time_since_start,
                'param_values': lasagne.layers.get_all_param_values(top_layer),
                'predictions': predictions,
                'validation_errors': validation_dict,
            }, f, pickle.HIGHEST_PROTOCOL)
    print("prediction file dumped")

    print("dumping submission file to %s" % submission_path)
    with open(submission_path, 'w') as csvfile:
        csvwriter = csv.writer(csvfile,
                               delimiter=',',
                               quotechar='|',
                               quoting=csv.QUOTE_MINIMAL)
        csvwriter.writerow(['Id'] + ['P%d' % i for i in range(600)])
        for prediction in predictions:
            # the submission only has patients 501 to 700
            if prediction["patient"] in data_loader.test_patients_indices:
                if "diastole_average" not in prediction or "systole_average" not in prediction:
                    raise Exception("Not all test-set patients were predicted")
                csvwriter.writerow(["%d_Diastole" % prediction["patient"]] + [
                    "%.18f" % p
                    for p in prediction["diastole_average"].flatten()
                ])
                csvwriter.writerow(["%d_Systole" % prediction["patient"]] + [
                    "%.18f" % p
                    for p in prediction["systole_average"].flatten()
                ])
    print("submission file dumped")

    return
Example #7
0
def train(log_dir, config):
    config.data_paths = config.data_paths  # 파싱된 명령행 인자값 중 데이터 경로 : default='datasets/kr_example'

    data_dirs = [os.path.join(data_path, "data") \
            for data_path in config.data_paths]
    num_speakers = len(data_dirs) # 학습하는 화자 수 측정 : 단일화자 모델-1, 다중화자 모델-2
    config.num_test = config.num_test_per_speaker * num_speakers

    if num_speakers > 1 and hparams.model_type not in ["deepvoice", "simple"]:  # 다중화자 모델 학습일 때 모델 타입이 "deepvoice"나 "simple"이 아니라면
        raise Exception("[!] Unkown model_type for multi-speaker: {}".format(config.model_type))  # hparams.modle_type을 config.model_type으로 오타남.

    commit = get_git_commit() if config.git else 'None'  # git 관련된거여서 무시
    checkpoint_path = os.path.join(log_dir, 'model.ckpt')  # checkpoint_path 경로 지정-model.skpt 파일 경로

    log(' [*] git recv-parse HEAD:\n%s' % get_git_revision_hash())  # git log
    log('='*50)  # 줄 구분용 =====
    #log(' [*] dit diff:\n%s' % get_git_diff())
    log('='*50)  # 줄 구분용 =====
    log(' [*] Checkpoint path: %s' % checkpoint_path)  # check_point 경로 출력
    log(' [*] Loading training data from: %s' % data_dirs)
    log(' [*] Using model: %s' % config.model_dir)
    log(hparams_debug_string())

    # Set up DataFeeder:
    coord = tf.train.Coordinator()  # 쓰레드 사용 선언
    with tf.variable_scope('datafeeder') as scope:
        train_feeder = DataFeeder(
                coord, data_dirs, hparams, config, 32,
                data_type='train', batch_size=hparams.batch_size)
        # def __init__(self, coordinator, data_dirs, hparams, config, batches_per_group, data_type, batch_size):
        test_feeder = DataFeeder(
                coord, data_dirs, hparams, config, 8,
                data_type='test', batch_size=config.num_test)

    # Set up model:
    is_randomly_initialized = config.initialize_path is None
    global_step = tf.Variable(0, name='global_step', trainable=False)

    with tf.variable_scope('model') as scope:
        model = create_model(hparams)  # Tacotron 모델 생성
        model.initialize(
                train_feeder.inputs, train_feeder.input_lengths,
                num_speakers,  train_feeder.speaker_id,
                train_feeder.mel_targets, train_feeder.linear_targets,
                train_feeder.loss_coeff,
                is_randomly_initialized=is_randomly_initialized)

        model.add_loss()
        model.add_optimizer(global_step)
        train_stats = add_stats(model, scope_name='stats') # legacy

    with tf.variable_scope('model', reuse=True) as scope:
        test_model = create_model(hparams)  # Tacotron test모델 생성
        test_model.initialize(
                test_feeder.inputs, test_feeder.input_lengths,
                num_speakers, test_feeder.speaker_id,
                test_feeder.mel_targets, test_feeder.linear_targets,
                test_feeder.loss_coeff, rnn_decoder_test_mode=True,
                is_randomly_initialized=is_randomly_initialized)
        test_model.add_loss()

    test_stats = add_stats(test_model, model, scope_name='test')  # model의 loss값같은것들을 tensorboard에 기록 / model에 test_model, model2에 model
    test_stats = tf.summary.merge([test_stats, train_stats])

    # Bookkeeping:
    step = 0
    time_window = ValueWindow(100)  # ValueWindow 클래스 window_size = 100
    loss_window = ValueWindow(100)
    saver = tf.train.Saver(max_to_keep=None, keep_checkpoint_every_n_hours=2)  # 2시간에 한번씩 자동저장, checkpoint 삭제 안됨

    sess_config = tf.ConfigProto(
            log_device_placement=False,  # log_device_placement 작성하는동안 할당장치 알려줌.
            allow_soft_placement=True)  # allow_soft_placement False면 GPU없을때 오류남
    sess_config.gpu_options.allow_growth=True  # 탄력적으로 GPU메모리 사용

    # Train!
    #with tf.Session(config=sess_config) as sess:
    with tf.Session() as sess:  # with문 내의 모든 명령들은 CPU 혹은 GPU 사용 선언
        try:
            summary_writer = tf.summary.FileWriter(log_dir, sess.graph)  # summary 오퍼레이션이 평가된 결과 및 텐서보드 그래프를 파라미터 형식으로 log_dir 에 저장
            sess.run(tf.global_variables_initializer())  # 데이터셋이 로드되고 그래프가 모두 정의되면 변수를 초기화하여 훈련 시작

            if config.load_path:  # log의 설정 값들 경로를 지정하였다면
                # Restore from a checkpoint if the user requested it.
                restore_path = get_most_recent_checkpoint(config.model_dir)  # 가장 마지막에 저장된 파일경로 저장
                saver.restore(sess, restore_path)  # restore_path 값 가져오기
                log('Resuming from checkpoint: %s at commit: %s' % (restore_path, commit), slack=True)  # git과 slack을 이용한 log 출력
            elif config.initialize_path:  # log의 설정 값들로 초기화하여 사용하기로 지정하였다면
                restore_path = get_most_recent_checkpoint(config.initialize_path)  # 지정된 경로에서 가장 마지막에 저장된 파일경로 저장
                saver.restore(sess, restore_path)  # restore_path 값 가져오기
                log('Initialized from checkpoint: %s at commit: %s' % (restore_path, commit), slack=True)  # git과 slack을 이용한 log 출력

                zero_step_assign = tf.assign(global_step, 0)  # global_step의 텐서 객체 참조 변수 값을 0으로 바꿔주는 명령어 지정
                sess.run(zero_step_assign)  # 변수들을 모두 0으로 바꾸는 명령어 실행

                start_step = sess.run(global_step)  # global_step 값 부분을 시작지점으로 하여 연산 시작
                log('='*50)
                log(' [*] Global step is reset to {}'. \
                        format(start_step))  # 즉, 연산 시작 부분이 0으로 초기화 되었다고 알려줌.
                log('='*50)
            else:
                log('Starting new training run at commit: %s' % commit, slack=True)  # 과거의 데이터를 사용하지 않을 경우 새로운 학습이라고 log 출력

            start_step = sess.run(global_step)  # 연산 시작지점 가져오기

            train_feeder.start_in_session(sess, start_step)
            test_feeder.start_in_session(sess, start_step)

            while not coord.should_stop():  # 쓰레드가 멈춰야하는 상황이 아니라면
                start_time = time.time()  # 시작시간 지정(1970년 1월 1일 이후 경과된 시간을 UTC 기준으로 초로 반환)
                step, loss, opt = sess.run(
                        [global_step, model.loss_without_coeff, model.optimize],
                        feed_dict=model.get_dummy_feed_dict())  # step 값은 global_step 값으로 지정, loss 값은

                time_window.append(time.time() - start_time)
                loss_window.append(loss)

                message = 'Step %-7d [%.03f sec/step, loss=%.05f, avg_loss=%.05f]' % (
                        step, time_window.average, loss, loss_window.average)
                log(message, slack=(step % config.checkpoint_interval == 0))

                if loss > 100 or math.isnan(loss):
                    log('Loss exploded to %.05f at step %d!' % (loss, step), slack=True)
                    raise Exception('Loss Exploded')

                if step % config.summary_interval == 0:
                    log('Writing summary at step: %d' % step)

                    feed_dict = {
                            **model.get_dummy_feed_dict(),
                            **test_model.get_dummy_feed_dict()
                    }
                    summary_writer.add_summary(sess.run(
                            test_stats, feed_dict=feed_dict), step)

                if step % config.checkpoint_interval == 0:
                    log('Saving checkpoint to: %s-%d' % (checkpoint_path, step))
                    saver.save(sess, checkpoint_path, global_step=step)

                if step % config.test_interval == 0:
                    log('Saving audio and alignment...')
                    num_test = config.num_test

                    fetches = [
                            model.inputs[:num_test],
                            model.linear_outputs[:num_test],
                            model.alignments[:num_test],
                            test_model.inputs[:num_test],
                            test_model.linear_outputs[:num_test],
                            test_model.alignments[:num_test],
                    ]
                    feed_dict = {
                            **model.get_dummy_feed_dict(),
                            **test_model.get_dummy_feed_dict()
                    }

                    sequences, spectrograms, alignments, \
                            test_sequences, test_spectrograms, test_alignments = \
                                    sess.run(fetches, feed_dict=feed_dict)

                    save_and_plot(sequences[:1], spectrograms[:1], alignments[:1],
                            log_dir, step, loss, "train")
                    save_and_plot(test_sequences, test_spectrograms, test_alignments,
                            log_dir, step, loss, "test")

        except Exception as e:
            log('Exiting due to exception: %s' % e, slack=True)
            traceback.print_exc()
            coord.request_stop(e)
Example #8
0
        prev_time = now
        est_time_left = time_since_start * (config().max_nchunks - chunk_idx +
                                            1.) / (chunk_idx + 1. -
                                                   start_chunk_idx)
        eta = datetime.now() + timedelta(seconds=est_time_left)
        eta_str = eta.strftime("%c")
        print("  %s since start (%.2f s)" %
              (utils.hms(time_since_start), time_since_prev))
        print("  estimated %s to go (ETA: %s)" %
              (utils.hms(est_time_left), eta_str))
        print()

    if ((chunk_idx + 1) % config().save_every) == 0:
        print()
        print('Chunk %d/%d' % (chunk_idx + 1, config().max_nchunks))
        print('Saving metadata, parameters')

        with open(metadata_path, 'w') as f:
            pickle.dump(
                {
                    'configuration_file': config_name,
                    'git_revision_hash': utils.get_git_revision_hash(),
                    'experiment_id': expid,
                    'chunks_since_start': chunk_idx,
                    'losses_eval_train': losses_eval_train,
                    'losses_eval_valid': losses_eval_valid,
                    'param_values': nn.layers.get_all_param_values(model.l_out)
                }, f, pickle.HIGHEST_PROTOCOL)
            print('  saved to %s' % metadata_path)
            print()
Example #9
0
def train_model(expid):
    """
    This function trains the model, and will use the name expid to store and report the results
    :param expid: the name
    :return:
    """
    metadata_path = MODEL_PATH + "%s.pkl" % expid

    # Fast_run is very slow, but might be better of debugging.
    # Make sure you don't leave it on accidentally!
    if theano.config.optimizer != "fast_run":
        print "WARNING: not running in fast mode!"

    print "Build model"
    # Get the input and output layers of our model
    interface_layers = config.build_model()

    output_layers = interface_layers["outputs"]
    input_layers = interface_layers["inputs"]

    # merge all output layers into a fictional dummy layer which is not actually used
    top_layer = lasagne.layers.MergeLayer(incomings=output_layers.values())
    # get all the trainable parameters from the model
    all_layers = lasagne.layers.get_all_layers(top_layer)
    all_params = lasagne.layers.get_all_params(top_layer, trainable=True)

    # do not train beyond the layers in cutoff_gradients. Remove all their parameters from the optimization process
    if "cutoff_gradients" in interface_layers:
        submodel_params = [
            param for value in interface_layers["cutoff_gradients"]
            for param in lasagne.layers.get_all_params(value)
        ]
        all_params = [p for p in all_params if p not in submodel_params]

    # some parameters might already be pretrained! Load their values from the requested configuration name.
    if "pretrained" in interface_layers:
        for config_name, layers_dict in interface_layers[
                "pretrained"].iteritems():
            pretrained_metadata_path = MODEL_PATH + "%s.pkl" % config_name
            pretrained_resume_metadata = np.load(pretrained_metadata_path)
            pretrained_top_layer = lasagne.layers.MergeLayer(
                incomings=layers_dict.values())
            lasagne.layers.set_all_param_values(
                pretrained_top_layer,
                pretrained_resume_metadata['param_values'])

    # Count all the parameters we are actually optimizing, and visualize what the model looks like.

    print string.ljust("  layer output shapes:", 26),
    print string.ljust("#params:", 10),
    print string.ljust("#data:", 10),
    print "output shape:"

    def comma_seperator(v):
        return '{:,.0f}'.format(v)

    for layer in all_layers[:-1]:
        name = string.ljust(layer.__class__.__name__, 22)
        num_param = sum(
            [np.prod(p.get_value().shape) for p in layer.get_params()])
        num_param = string.ljust(comma_seperator(num_param), 10)
        num_size = string.ljust(
            comma_seperator(np.prod(layer.output_shape[1:])), 10)
        print "    %s %s %s %s" % (name, num_param, num_size,
                                   layer.output_shape)

    num_params = sum([np.prod(p.get_value().shape) for p in all_params])
    print "  number of parameters:", comma_seperator(num_params)

    # Build all the objectives requested by the configuration
    objectives = config.build_objectives(interface_layers)

    train_losses_theano = {
        key: ob.get_loss()
        for key, ob in objectives["train"].iteritems()
    }

    validate_losses_theano = {
        key: ob.get_loss(deterministic=True)
        for key, ob in objectives["validate"].iteritems()
    }

    # Create the Theano variables necessary to interface with the models
    # the input:
    xs_shared = {
        key: lasagne.utils.shared_empty(dim=len(l_in.output_shape),
                                        dtype='float32')
        for (key, l_in) in input_layers.iteritems()
    }

    # the output:
    ys_shared = {
        key: lasagne.utils.shared_empty(dim=target_var.ndim,
                                        dtype=target_var.dtype)
        for (_, ob) in itertools.chain(objectives["train"].iteritems(),
                                       objectives["validate"].iteritems())
        for (key, target_var) in ob.target_vars.iteritems()
    }

    # Set up the learning rate schedule
    learning_rate_schedule = config.learning_rate_schedule
    learning_rate = theano.shared(np.float32(learning_rate_schedule[0]))

    # We only work on one batch at the time on our chunk. Set up the Theano code which does this
    idx = T.lscalar(
        'idx'
    )  # the value representing the number of the batch we are currently into our chunk of data

    givens = dict()
    for (_, ob) in itertools.chain(objectives["train"].iteritems(),
                                   objectives["validate"].iteritems()):
        for (key, target_var) in ob.target_vars.iteritems():
            givens[target_var] = ys_shared[key][idx *
                                                config.batch_size:(idx + 1) *
                                                config.batch_size]

    for (key, l_in) in input_layers.iteritems():
        givens[l_in.input_var] = xs_shared[key][idx *
                                                config.batch_size:(idx + 1) *
                                                config.batch_size]

    # sum over the losses of the objective we optimize. We will optimize this sum (either minimize or maximize)
    # sum makes the learning rate independent of batch size!
    if hasattr(config, "dont_sum_losses") and config.dont_sum_losses:
        train_loss_theano = T.mean(train_losses_theano["objective"])
    else:
        train_loss_theano = T.sum(train_losses_theano["objective"]) * (
            -1 if objectives["train"]["objective"].optimize == MAXIMIZE else 1)

    # build the update step for Theano
    updates = config.build_updates(train_loss_theano, all_params,
                                   learning_rate)

    if hasattr(config, "print_gradnorm") and config.print_gradnorm:
        all_grads = theano.grad(train_loss_theano,
                                all_params,
                                disconnected_inputs='warn')
        grad_norm = T.sqrt(T.sum([(g**2).sum() for g in all_grads]) + 1e-9)
        grad_norm.name = "grad_norm"
        theano_printer.print_me_this("  grad norm", grad_norm)
        # train_losses_theano["grad_norm"] = grad_norm

    # Compile the Theano function of your model+objective
    print "Compiling..."
    iter_train = theano.function(
        [idx],
        train_losses_theano.values() + theano_printer.get_the_stuff_to_print(),
        givens=givens,
        on_unused_input="ignore",
        updates=updates,
        # mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True)
    )

    if hasattr(config, "print_gradnorm") and config.print_gradnorm:
        del theano_printer._stuff_to_print[-1]

    # For validation, we also like to have something which returns the output of our model without the objective
    network_outputs = [
        lasagne.layers.helper.get_output(network_output_layer,
                                         deterministic=True)
        for network_output_layer in output_layers.values()
    ]
    iter_predict = theano.function([idx],
                                   network_outputs +
                                   theano_printer.get_the_stuff_to_print(),
                                   givens=givens,
                                   on_unused_input="ignore")

    # The data loader will need to know which kinds of data it actually needs to load
    # collect all the necessary tags for the model.
    required_input = {
        key: l_in.output_shape
        for (key, l_in) in input_layers.iteritems()
    }
    required_output = {
        key: None  # size is not needed
        for (_, ob) in itertools.chain(objectives["train"].iteritems(),
                                       objectives["validate"].iteritems())
        for (key, target_var) in ob.target_vars.iteritems()
    }

    # The data loaders need to prepare before they should start
    # This is usually where the data is loaded from disk onto memory
    print "Preparing dataloaders"
    config.training_data.prepare()
    for validation_data in config.validation_data.values():
        validation_data.prepare()

    print "Will train for %s epochs" % config.training_data.epochs

    # If this is the second time we run this configuration, we might need to load the results of the previous
    # optimization. Check if this is the case, and load the parameters and stuff. If not, start from zero.
    if config.restart_from_save and os.path.isfile(metadata_path):
        print "Load model parameters for resuming"
        resume_metadata = np.load(metadata_path)
        lasagne.layers.set_all_param_values(top_layer,
                                            resume_metadata['param_values'])
        start_chunk_idx = resume_metadata['chunks_since_start'] + 1

        # set lr to the correct value
        current_lr = np.float32(
            utils.current_learning_rate(learning_rate_schedule,
                                        start_chunk_idx))
        print "  setting learning rate to %.7f" % current_lr
        learning_rate.set_value(current_lr)
        losses = resume_metadata['losses']
        config.training_data.skip_first_chunks(start_chunk_idx)
    else:
        start_chunk_idx = 0
        losses = dict()
        losses[TRAINING] = dict()
        losses[VALIDATION] = dict()
        for loss_name in train_losses_theano.keys():
            losses[TRAINING][loss_name] = list()

        for dataset_name in config.validation_data.keys():
            losses[VALIDATION][dataset_name] = dict()
            for loss_name in validate_losses_theano.keys():
                losses[VALIDATION][dataset_name][loss_name] = list()

    # Make a data generator which returns preprocessed chunks of data which are fed to the model
    # Note that this is a generator object! It is a special kind of iterator.
    chunk_size = config.batches_per_chunk * config.batch_size

    # Weight normalization
    if hasattr(config, "init_weight_norm") and not config.restart_from_save:
        theano_printer._stuff_to_print = []
        from theano_utils.weight_norm import train_weight_norm
        train_weight_norm(config, output_layers, all_layers, idx, givens,
                          xs_shared, chunk_size, required_input,
                          required_output)

    training_data_generator = buffering.buffered_gen_threaded(
        config.training_data.generate_batch(
            chunk_size=chunk_size,
            required_input=required_input,
            required_output=required_output,
        ))

    # Estimate the number of batches we will train for.
    chunks_train_idcs = itertools.count(start_chunk_idx)
    if config.training_data.epochs:
        num_chunks_train = int(1.0 * config.training_data.epochs *
                               config.training_data.number_of_samples /
                               (config.batch_size * config.batches_per_chunk))
    else:
        num_chunks_train = None

    # Start the timer objects
    start_time, prev_time = None, None
    print "Loading first chunks"
    data_load_time = Timer()
    gpu_time = Timer()

    #========================#
    # This is the train loop #
    #========================#
    data_load_time.start()
    for e, train_data in izip(chunks_train_idcs, training_data_generator):
        data_load_time.stop()
        if start_time is None:
            start_time = time.time()
            prev_time = start_time

        print
        if num_chunks_train:
            print "Chunk %d/%d" % (e + 1, num_chunks_train)
        else:
            print "Chunk %d" % (e + 1)
        print "=============="
        print "  %s" % config.__name__

        # Estimate the current epoch we are at
        epoch = (1.0 * config.batch_size * config.batches_per_chunk * (e + 1) /
                 config.training_data.number_of_samples)
        if epoch >= 0.1:
            print "  Epoch %.1f/%s" % (epoch, str(config.training_data.epochs))
        else:
            print "  Epoch %.0e/%s" % (epoch, str(config.training_data.epochs))

        # for debugging the data loader, it might be useful to dump everything it loaded and analyze it.
        if config.dump_network_loaded_data:
            pickle.dump(train_data,
                        open("data_loader_dump_train_%d.pkl" % e, "wb"))

        # Update the learning rate with the new epoch the number
        for key, rate in learning_rate_schedule.iteritems():
            if epoch >= key:
                lr = np.float32(rate)
                learning_rate.set_value(lr)
        print "  learning rate %.0e" % lr

        # Move this data from the data loader onto the Theano variables
        for key in xs_shared:
            xs_shared[key].set_value(train_data["input"][key])

        for key in ys_shared:
            if key not in train_data["output"]:
                raise Exception(
                    "You forgot to add key %s to OUTPUT_DATA_SIZE_TYPE in your data loader"
                    % key)
            ys_shared[key].set_value(train_data["output"][key])

        # loop over all the batches in one chunk, and keep the losses
        chunk_losses = np.zeros((len(train_losses_theano), 0))
        for b in xrange(config.batches_per_chunk):
            gpu_time.start()
            th_result = iter_train(b)
            gpu_time.stop()

            resulting_losses = np.stack(th_result[:len(train_losses_theano)],
                                        axis=0)

            # these are not needed anyway, just to make Theano call the print function
            # stuff_to_print = th_result[-len(theano_printer.get_the_stuff_to_print()):]
            # print resulting_losses.shape, chunk_losses.shape
            chunk_losses = np.concatenate((chunk_losses, resulting_losses),
                                          axis=1)

        # check if we found NaN's. When there are NaN's we might as well exit.
        utils.detect_nans(chunk_losses, xs_shared, ys_shared, all_params)

        # Average our losses, and print them.
        mean_train_loss = np.mean(chunk_losses, axis=1)
        for loss_name, loss in zip(train_losses_theano.keys(),
                                   mean_train_loss):
            losses[TRAINING][loss_name].append(loss)
            print string.rjust(loss_name + ":", 15), "%.6f" % loss

        # Now, we will do validation. We do this about every config.epochs_per_validation epochs.
        # We also always validate at the end of every training!
        validate_every = max(
            int((config.epochs_per_validation *
                 config.training_data.number_of_samples) /
                (config.batch_size * config.batches_per_chunk)), 1)

        if ((e + 1) % validate_every) == 0 or (num_chunks_train
                                               and e + 1 >= num_chunks_train):
            print
            print "  Validating "

            # We might test on multiple datasets, such as the Train set, Validation set, ...
            for dataset_name, dataset_generator in config.validation_data.iteritems(
            ):

                # Start loading the validation data!
                validation_chunk_generator = dataset_generator.generate_batch(
                    chunk_size=chunk_size,
                    required_input=required_input,
                    required_output=required_output,
                )

                print "  %s (%d/%d samples)" % (
                    dataset_name,
                    dataset_generator.number_of_samples_in_iterator,
                    dataset_generator.number_of_samples)
                print "  -----------------------"

                # If there are no validation samples, don't bother validating.
                if dataset_generator.number_of_samples == 0:
                    continue

                validation_predictions = None

                # Keep the labels of the validation data for later.
                output_keys_to_store = set()
                losses_to_store = dict()
                for key, ob in objectives["validate"].iteritems():
                    if ob.mean_over_samples:
                        losses_to_store[key] = None
                    else:
                        output_keys_to_store.add(ob.target_key)
                chunk_labels = {k: None for k in output_keys_to_store}
                store_network_output = (len(output_keys_to_store) > 0)

                # loop over all validation data chunks
                data_load_time.start()
                for validation_data in buffering.buffered_gen_threaded(
                        validation_chunk_generator):
                    data_load_time.stop()
                    num_batches_chunk_eval = config.batches_per_chunk

                    # set the validation data to the required Theano variables. Note, there is no
                    # use setting the output variables, as we do not have labels of the validation set!
                    for key in xs_shared:
                        xs_shared[key].set_value(validation_data["input"][key])

                    # store all the output keys required for finding the validation error
                    for key in output_keys_to_store:
                        new_data = validation_data["output"][
                            key][:validation_data["valid_samples"]]

                        if chunk_labels[key] is None:
                            chunk_labels[key] = new_data
                        else:
                            chunk_labels[key] = np.concatenate(
                                (chunk_labels[key], new_data), axis=0)

                    # loop over the batches of one chunk, and keep the predictions
                    chunk_predictions = None
                    for b in xrange(num_batches_chunk_eval):
                        gpu_time.start()
                        th_result = iter_predict(b)
                        gpu_time.stop()
                        resulting_predictions = np.stack(
                            th_result[:len(network_outputs)], axis=0)
                        assert len(
                            network_outputs
                        ) == 1, "Multiple outputs not implemented yet"
                        if chunk_predictions is None:
                            chunk_predictions = resulting_predictions
                        else:
                            chunk_predictions = np.concatenate(
                                (chunk_predictions, resulting_predictions),
                                axis=1)

                    # Check for NaN's. Panic if there are NaN's during validation.
                    utils.detect_nans(chunk_predictions, xs_shared, ys_shared,
                                      all_params)

                    # add the predictions of this chunk, to the global predictions (if needed)
                    if chunk_predictions is not None:
                        chunk_predictions = chunk_predictions[:validation_data[
                            VALID_SAMPLES]]
                        if store_network_output:
                            if validation_predictions is None:
                                validation_predictions = chunk_predictions
                            else:
                                validation_predictions = np.concatenate(
                                    (validation_predictions,
                                     chunk_predictions),
                                    axis=1)

                    # if you can calculate the losses per chunk, and take the mean afterwards, do that.
                    for key, ob in objectives["validate"].iteritems():
                        if ob.mean_over_samples:
                            new_losses = []
                            for i in xrange(validation_data[VALID_SAMPLES]):
                                loss = ob.get_loss_from_lists(
                                    chunk_predictions[0, i:i + 1],
                                    validation_data["output"][
                                        ob.target_key][i:i + 1])
                                new_losses.append(loss)

                            new_losses = np.array(new_losses)
                            if losses_to_store[key] is None:
                                losses_to_store[key] = new_losses
                            else:
                                losses_to_store[key] = np.concatenate(
                                    (losses_to_store[key], new_losses), axis=0)

                    data_load_time.start()
                data_load_time.stop()

                # Compare the predictions with the actual labels and print them.
                for key, ob in objectives["validate"].iteritems():
                    if ob.mean_over_samples:
                        loss = np.mean(losses_to_store[key])
                    else:
                        loss = ob.get_loss_from_lists(
                            validation_predictions[0, :],
                            chunk_labels[ob.target_key])
                    losses[VALIDATION][dataset_name][key].append(loss)
                    print string.rjust(key + ":", 17), "%.6f" % loss
                print

        # Good, we did one chunk. Let us check how much time this took us. Print out some stats.
        now = time.time()
        time_since_start = now - start_time
        time_since_prev = now - prev_time
        prev_time = now
        # This is the most useful stat of all! Keep this number low, and your total optimization time will be low too.
        print "  on average %dms per training sample" % (
            1000. * time_since_start /
            ((e + 1 - start_chunk_idx) * config.batch_size *
             config.batches_per_chunk))
        print "  %s since start (+%.2f s)" % (utils.hms(time_since_start),
                                              time_since_prev)
        print "  %s waiting on gpu vs %s waiting for data" % (gpu_time,
                                                              data_load_time)
        try:
            if num_chunks_train:  # only if we ever stop running
                est_time_left = time_since_start * (
                    float(num_chunks_train - (e + 1 - start_chunk_idx)) /
                    float(e + 1 - start_chunk_idx))
                eta = datetime.datetime.now() + datetime.timedelta(
                    seconds=est_time_left)
                eta_str = eta.strftime("%c")
                print "  estimated %s to go" % utils.hms(est_time_left)
                print "  (ETA: %s)" % eta_str
                if hasattr(config, "print_mean_chunks"):
                    avg_train = losses[TRAINING]["objective"]
                    n = min(len(avg_train), config.print_mean_chunks)
                    avg_train = avg_train[-n:]
                    print "  mean loss last %i chunks: %.3f" % (
                        n, np.mean(avg_train))
        except OverflowError:
            # Shit happens
            print "  This will take really long, like REALLY long."
        if hasattr(config, "print_score_every_chunk") and config.print_score_every_chunk\
                and len(losses[VALIDATION]["training set"]["objective"]) > 0:
            print "  train: best %.3f latest %.3f, valid: best %.3f latest %.3f " % (
                np.min(losses[VALIDATION]["training set"]["objective"]),
                losses[VALIDATION]["training set"]["objective"][-1],
                np.min(losses[VALIDATION]["validation set"]["objective"]),
                losses[VALIDATION]["validation set"]["objective"][-1])

        # Save the data every config.save_every_chunks chunks. Or at the end of the training.
        # We should make it config.save_every_epochs epochs sometimes. Consistency
        if ((e + 1) % config.save_every_chunks) == 0 or (
                num_chunks_train and e + 1 >= num_chunks_train):
            print
            print "Saving metadata, parameters"

            with open(metadata_path, 'w') as f:
                pickle.dump(
                    {
                        'metadata_path':
                        metadata_path,
                        'configuration_file':
                        config.__name__,
                        'git_revision_hash':
                        utils.get_git_revision_hash(),
                        'experiment_id':
                        expid,
                        'chunks_since_start':
                        e,
                        'losses':
                        losses,
                        'time_since_start':
                        time_since_start,
                        'param_values':
                        lasagne.layers.get_all_param_values(top_layer)
                    }, f, pickle.HIGHEST_PROTOCOL)

            print "  saved to %s" % metadata_path
            print

        # reset the timers for next round. This needs to happen here, because at the end of the big for loop
        # we already want te get a chunk immediately for the next loop. The iterator is an argument of the for loop.
        gpu_time.reset()
        data_load_time.reset()
        data_load_time.start()

    return
Example #10
0
#!/usr/bin/env python

import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__),
                                             '../../uegpy')))
import ueg_sys as ue
import finite as fp
import utils as ut
import monte_carlo as mc

time = float(sys.argv[1])
beta = float(sys.argv[2])
ne = float(sys.argv[3])

cutoff = ut.kinetic_cutoff(ne, 1.0/beta)

system = ue.System(0.1, ne, cutoff, 2)
t_per_it = mc.sample_canonical_energy(system, beta/system.ef, 10)[1] / 10

time = 0.9 * time
iterations = min(int(time/t_per_it), 1e6)

(frame, time) = mc.sample_canonical_energy(system, beta/system.ef, iterations)

print ("# Running uegpy version: %s"%(ut.get_git_revision_hash()))

print ("# Time taken: %s s"%time)

print frame.to_string(index=False)
utils.save_pkl(avg_patient_predictions, test_prediction_path)
print "\npredictions saved to %s" % test_prediction_path

# utils.save_submission(avg_patient_predictions, submission_path)
# print ' submission saved to %s' % submission_path

try:
    with open(jonas_prediction_path, "w") as f:
        pickle.dump(
            {
                "metadata_path": metadata_path,
                "prediction_path": test_prediction_path,
                "submission_path": submission_path,
                "configuration_file": config().__name__,
                "git_revision_hash": utils.get_git_revision_hash(),
                "predictions": predictions,
            },
            f,
            pickle.HIGHEST_PROTOCOL,
        )
except:
    with open("ira_%s.pkl" % config().__name__, "w") as f:
        pickle.dump(
            {
                "metadata_path": metadata_path,
                "prediction_path": test_prediction_path,
                "submission_path": submission_path,
                "configuration_file": config().__name__,
                "git_revision_hash": utils.get_git_revision_hash(),
                "predictions": predictions,
Example #12
0
def predict_model(expid, mfile=None):
    metadata_path = MODEL_PATH + "%s.pkl" % (expid if not mfile else mfile)
    prediction_path = INTERMEDIATE_PREDICTIONS_PATH + "%s.pkl" % expid
    submission_path = SUBMISSION_PATH + "%s.csv" % expid

    if theano.config.optimizer != "fast_run":
        print "WARNING: not running in fast mode!"

    print "Using"
    print "  %s" % metadata_path
    print "To generate"
    print "  %s" % prediction_path
    print "  %s" % submission_path

    print "Build model"
    interface_layers = config().build_model()

    output_layers = interface_layers["outputs"]
    input_layers = interface_layers["inputs"]
    top_layer = lasagne.layers.MergeLayer(
        incomings=output_layers.values()
    )
    all_layers = lasagne.layers.get_all_layers(top_layer)
    num_params = lasagne.layers.count_params(top_layer)
    print "  number of parameters: %d" % num_params
    print string.ljust("  layer output shapes:",36),
    print string.ljust("#params:",10),
    print "output shape:"
    for layer in all_layers[:-1]:
        name = string.ljust(layer.__class__.__name__, 32)
        num_param = sum([np.prod(p.get_value().shape) for p in layer.get_params()])
        num_param = string.ljust(num_param.__str__(), 10)
        print "    %s %s %s" % (name,  num_param, layer.output_shape)

    xs_shared = {
        key: lasagne.utils.shared_empty(dim=len(l_in.output_shape), dtype='float32') for (key, l_in) in input_layers.iteritems()
    }
    idx = T.lscalar('idx')

    givens = dict()

    for key in input_layers.keys():
        if key=="sunny":
            givens[input_layers[key].input_var] = xs_shared[key][idx*config().sunny_batch_size:(idx+1)*config().sunny_batch_size]
        else:
            givens[input_layers[key].input_var] = xs_shared[key][idx*config().batch_size:(idx+1)*config().batch_size]

    network_outputs = [
        lasagne.layers.helper.get_output(network_output_layer, deterministic=True)
        for network_output_layer in output_layers.values()
    ]

    iter_test = theano.function([idx], network_outputs + theano_printer.get_the_stuff_to_print(),
                                 givens=givens, on_unused_input="ignore",
                                 # mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True)
                                 )

    print "Load model parameters for resuming"
    resume_metadata = np.load(metadata_path)
    lasagne.layers.set_all_param_values(top_layer, resume_metadata['param_values'])
    num_batches_chunk = config().batches_per_chunk
    num_batches = get_number_of_test_batches()
    num_chunks = int(np.ceil(num_batches / float(config().batches_per_chunk)))

    chunks_train_idcs = range(1, num_chunks+1)

    data_loader.filter_patient_folders()

    create_test_gen = partial(config().create_test_gen,
                              required_input_keys = xs_shared.keys(),
                              required_output_keys = ["patients", "classification_correction_function"],
                              )

    print "Generate predictions with this model"
    start_time = time.time()
    prev_time = start_time


    predictions = [{"patient": i+1,
                    "systole": np.zeros((0,600)),
                    "diastole": np.zeros((0,600))
                    } for i in xrange(NUM_PATIENTS)]


    for e, test_data in izip(itertools.count(start=1), buffering.buffered_gen_threaded(create_test_gen())):
        print "  load testing data onto GPU"

        for key in xs_shared:
            xs_shared[key].set_value(test_data["input"][key])


        patient_ids = test_data["output"]["patients"]
        classification_correction = test_data["output"]["classification_correction_function"]
        print "  patients:", " ".join(map(str, patient_ids))
        print "  chunk %d/%d" % (e, num_chunks)

        for b in xrange(num_batches_chunk):
            iter_result = iter_test(b)
            network_outputs = tuple(iter_result[:len(output_layers)])
            network_outputs_dict = {output_layers.keys()[i]: network_outputs[i] for i in xrange(len(output_layers))}
            kaggle_systoles, kaggle_diastoles = config().postprocess(network_outputs_dict)
            kaggle_systoles, kaggle_diastoles = kaggle_systoles.astype('float64'), kaggle_diastoles.astype('float64')
            for idx, patient_id in enumerate(patient_ids[b*config().batch_size:(b+1)*config().batch_size]):
                if patient_id != 0:
                    index = patient_id-1
                    patient_data = predictions[index]
                    assert patient_id==patient_data["patient"]

                    kaggle_systole = kaggle_systoles[idx:idx+1,:]
                    kaggle_diastole = kaggle_diastoles[idx:idx+1,:]
                    assert np.isfinite(kaggle_systole).all() and np.isfinite(kaggle_systole).all()
                    kaggle_systole = classification_correction[b*config().batch_size + idx](kaggle_systole)
                    kaggle_diastole = classification_correction[b*config().batch_size + idx](kaggle_diastole)
                    assert np.isfinite(kaggle_systole).all() and np.isfinite(kaggle_systole).all()
                    patient_data["systole"] =  np.concatenate((patient_data["systole"], kaggle_systole ),axis=0)
                    patient_data["diastole"] = np.concatenate((patient_data["diastole"], kaggle_diastole ),axis=0)

        now = time.time()
        time_since_start = now - start_time
        time_since_prev = now - prev_time
        prev_time = now
        est_time_left = time_since_start * (float(num_chunks - (e + 1)) / float(e + 1 - chunks_train_idcs[0]))
        eta = datetime.now() + timedelta(seconds=est_time_left)
        eta_str = eta.strftime("%c")
        print "  %s since start (%.2f s)" % (utils.hms(time_since_start), time_since_prev)
        print "  estimated %s to go (ETA: %s)" % (utils.hms(est_time_left), eta_str)
        print

    already_printed = False
    for prediction in predictions:
        if prediction["systole"].size>0 and prediction["diastole"].size>0:
            average_method =  getattr(config(), 'tta_average_method', partial(np.mean, axis=0))
            prediction["systole_average"] = average_method(prediction["systole"])
            prediction["diastole_average"] = average_method(prediction["diastole"])
            try:
                test_if_valid_distribution(prediction["systole_average"])
                test_if_valid_distribution(prediction["diastole_average"])
            except:
                if not already_printed:
                    print "WARNING: These distributions are not distributions"
                    already_printed = True
                prediction["systole_average"] = make_monotone_distribution(prediction["systole_average"])
                prediction["diastole_average"] = make_monotone_distribution(prediction["diastole_average"])
                test_if_valid_distribution(prediction["systole_average"])
                test_if_valid_distribution(prediction["diastole_average"])


    print "Calculating training and validation set scores for reference"

    validation_dict = {}
    for patient_ids, set_name in [(validation_patients_indices, "validation"),
                                      (train_patients_indices,  "train")]:
        errors = []
        for patient in patient_ids:
            prediction = predictions[patient-1]
            if "systole_average" in prediction:
                assert patient == regular_labels[patient-1, 0]
                error = CRSP(prediction["systole_average"], regular_labels[patient-1, 1])
                errors.append(error)
                error = CRSP(prediction["diastole_average"], regular_labels[patient-1, 2])
                errors.append(error)
        if len(errors)>0:
            errors = np.array(errors)
            estimated_CRSP = np.mean(errors)
            print "  %s kaggle loss: %f" % (string.rjust(set_name, 12), estimated_CRSP)
            validation_dict[set_name] = estimated_CRSP
        else:
            print "  %s kaggle loss: not calculated" % (string.rjust(set_name, 12))


    print "dumping prediction file to %s" % prediction_path
    with open(prediction_path, 'w') as f:
        pickle.dump({
                        'metadata_path': metadata_path,
                        'prediction_path': prediction_path,
                        'submission_path': submission_path,
                        'configuration_file': config().__name__,
                        'git_revision_hash': utils.get_git_revision_hash(),
                        'experiment_id': expid,
                        'time_since_start': time_since_start,
                        'param_values': lasagne.layers.get_all_param_values(top_layer),
                        'predictions': predictions,
                        'validation_errors': validation_dict,
                    }, f, pickle.HIGHEST_PROTOCOL)
    print "prediction file dumped"

    print "dumping submission file to %s" % submission_path
    with open(submission_path, 'w') as csvfile:
        csvwriter = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL)
        csvwriter.writerow(['Id'] + ['P%d'%i for i in xrange(600)])
        for prediction in predictions:
            # the submission only has patients 501 to 700
            if prediction["patient"] in data_loader.test_patients_indices:
                if "diastole_average" not in prediction or "systole_average" not in prediction:
                    raise Exception("Not all test-set patients were predicted")
                csvwriter.writerow(["%d_Diastole" % prediction["patient"]] + ["%.18f" % p for p in prediction["diastole_average"].flatten()])
                csvwriter.writerow(["%d_Systole" % prediction["patient"]] + ["%.18f" % p for p in prediction["systole_average"].flatten()])
    print "submission file dumped"

    return
Example #13
0
def main(args):
    "Put all the pieces together"
    if args.dump_per_instance_results:
        args.dump = True
    if args.dump:
        args.disable_tqdm = True
        if len(args.logfile.name) == 0:
            basename_fusion = [
                str(i.with_suffix('').with_name(i.stem)) for i in args.snapshot
            ]
            args.logfile = Path('-'.join(basename_fusion) + '_corpus-eval')
        if args.logfile.exists():
            raise ValueError(
                f'{args.logfile} already exists. Please provide a logfile or'
                'backup existing results.')
    setup_logging(args)

    logging.info('Corpus Retrieval Evaluation for CAL/MCN')
    logging.info(f'Git revision hash: {get_git_revision_hash()}')
    load_hyperparameters(args)
    logging.info(args)

    engine_prm = {}
    if args.arch == 'MCN':
        args.dataset = 'UntrimmedMCN'
        args.engine = 'MomentRetrievalFromProposalsTable'
    elif args.arch == 'SMCN':
        args.dataset = 'UntrimmedSMCN'
        args.engine = 'MomentRetrievalFromClipBasedProposalsTable'
    else:
        ValueError('Unknown/unsupported architecture')

    logging.info('Loading dataset')
    dataset_novisual = True
    dataset_cues = {feat: None for feat in args.tags}
    if args.h5_path:
        for i, key in enumerate(args.tags):
            dataset_cues[key] = {'file': args.h5_path[i]}
        dataset_novisual = False
        clip_length = None
    else:
        clip_length = args.clip_length
    proposals_interface = proposals.__dict__[args.proposal_interface](
        args.min_length, args.scales, args.stride)
    dataset_setup = dict(json_file=args.test_list,
                         cues=dataset_cues,
                         loc=args.loc,
                         context=args.context,
                         debug=args.debug,
                         eval=True,
                         no_visual=dataset_novisual,
                         proposals_interface=proposals_interface,
                         clip_length=clip_length)
    dataset = dataset_untrimmed.__dict__[args.dataset](**dataset_setup)
    if args.arch == 'SMCN':
        logging.info('Set padding on UntrimmedSMCN dataset')
        dataset.set_padding(False)

    logging.info('Setting up models')
    models_dict = {}
    for i, key in enumerate(args.snapshot_tags):
        arch_setup = dict(
            visual_size=dataset.visual_size[key],
            lang_size=dataset.language_size,
            max_length=dataset.max_words,
            embedding_size=args.embedding_size,
            visual_hidden=args.visual_hidden,
            lang_hidden=args.lang_hidden,
            visual_layers=args.visual_layers,
        )
        models_dict[key] = model.__dict__[args.arch](**arch_setup)
        filename = args.snapshot[i].with_suffix('.pth.tar')
        snapshot_ = torch.load(filename,
                               map_location=lambda storage, loc: storage)
        models_dict[key].load_state_dict(snapshot_['state_dict'])
        models_dict[key].eval()

    logging.info('Creating database alas indexing corpus')
    engine = corpus.__dict__[args.engine](dataset, models_dict, **engine_prm)
    engine.indexing()

    logging.info('Launch evaluation...')
    # log-scale up to the end of the database
    if len(args.topk) == 1 and args.topk[0] == 0:
        exp = int(np.floor(np.log10(engine.num_moments)))
        args.topk = [10**i for i in range(0, exp + 1)]
        args.topk.append(engine.num_moments)
    num_instances_retrieved = []
    judge = CorpusVideoMomentRetrievalEval(topk=args.topk)
    args.n_display = max(int(args.n_display * len(dataset.metadata)), 1)
    for it, query_metadata in tqdm(enumerate(dataset.metadata),
                                   disable=args.disable_tqdm):
        result_per_query = engine.query(
            query_metadata['language_input'],
            return_indices=args.dump_per_instance_results)
        if args.dump_per_instance_results:
            vid_indices, segments, proposals_ind = result_per_query
        else:
            vid_indices, segments = result_per_query
        judge.add_single_predicted_moment_info(query_metadata,
                                               vid_indices,
                                               segments,
                                               max_rank=engine.num_moments)
        num_instances_retrieved.append(len(vid_indices))
        if args.disable_tqdm and (it + 1) % args.n_display == 0:
            logging.info(f'Processed queries [{it}/{len(dataset.metadata)}]')

        if args.dump_per_instance_results:
            # TODO: wrap-up this inside a class. We could even dump in a
            # non-blocking thread using a Queue
            if it == 0:
                filename = args.logfile.with_suffix('.h5')
                fid = h5py.File(filename, 'x')
                if args.reduced_dump:
                    fid_vi = fid.create_dataset(name='vid_indices',
                                                chunks=True,
                                                shape=(len(dataset),
                                                       dataset.num_videos),
                                                dtype='int64')
                else:
                    fid.create_dataset(name='proposals',
                                       data=engine.proposals,
                                       chunks=True)
                    fid_vi = fid.create_dataset(name='vid_indices',
                                                chunks=True,
                                                shape=(len(dataset), ) +
                                                vid_indices.shape,
                                                dtype='int64')
                    fid_pi = fid.create_dataset(name='proposals_ind',
                                                chunks=True,
                                                shape=(len(dataset), ) +
                                                proposals_ind.shape,
                                                dtype='int64')

            if args.reduced_dump:
                fid_vi[it, ...] = pd.unique(vid_indices.numpy())
            else:
                fid_vi[it, ...] = vid_indices
                fid_pi[it, ...] = proposals_ind

    if args.dump_per_instance_results:
        fid.close()

    logging.info('Summarizing results')
    num_instances_retrieved = np.array(num_instances_retrieved)
    logging.info(f'Number of queries: {len(judge.map_query)}')
    logging.info(f'Number of proposals: {engine.num_moments}')
    retrieved_proposals_median = int(np.median(num_instances_retrieved))
    retrieved_proposals_min = int(num_instances_retrieved.min())
    if (num_instances_retrieved != engine.num_moments).any():
        logging.info('Triggered approximate search')
        logging.info('Median numbers of retrieved proposals: '
                     f'{retrieved_proposals_median:d}')
        logging.info('Min numbers of retrieved proposals: '
                     f'{retrieved_proposals_min:d}')
    result = judge.evaluate()
    _ = [logging.info(f'{k}: {v}') for k, v in result.items()]
    if args.dump:
        filename = args.logfile.with_suffix('.json')
        logging.info(f'Dumping results into: {filename}')
        with open(filename, 'x') as fid:
            for key, value in result.items():
                result[key] = float(value)
            result['snapshot'] = [str(i) for i in args.snapshot]
            result['corpus'] = str(args.test_list)
            result['topk'] = args.topk
            result['iou_threshold'] = judge.iou_thresholds
            result['median_proposals_retrieved'] = retrieved_proposals_median
            result['min_proposals_retrieved'] = retrieved_proposals_min
            result['date'] = datetime.now().isoformat()
            result['git_hash'] = get_git_revision_hash()
            json.dump(result, fid, indent=1)
Example #14
0
def main(args):
    "Put all the pieces together"
    if args.dump:
        args.disable_tqdm = True
        if len(args.logfile.name) == 0:
            basename = args.snapshot[0].with_suffix('')
            args.logfile = basename.parent.joinpath(
                args.output_prefix, basename.stem + '_corpus-2nd-eval')
            if not args.logfile.parent.exists():
                args.logfile.parent.mkdir()
        if args.logfile.exists():
            raise ValueError(
                f'{args.logfile} already exists. Please provide a logfile or'
                'backup existing results.')
    setup_logging(args)

    logging.info('Corpus Retrieval Evaluation for 2nd Stage')
    load_hyperparameters(args)
    logging.info(args)

    if args.arch == 'MCN':
        args.dataset = 'UntrimmedMCN'
    elif args.arch == 'SMCN':
        args.dataset = 'UntrimmedSMCN'
    else:
        ValueError('Unknown/unsupported architecture')

    logging.info('Loading dataset')
    if args.h5_path.exists():
        dataset_novisual = False
        dataset_cues = {args.feat: {'file': args.h5_path}}
    else:
        raise NotImplementedError('WIP')
    proposals_interface = proposals.__dict__[args.proposal_interface](
        args.min_length, args.scales, args.stride)
    dataset_setup = dict(
        json_file=args.test_list, cues=dataset_cues, loc=args.loc,
        context=args.context, debug=args.debug, eval=True,
        no_visual=dataset_novisual,
        proposals_interface=proposals_interface
    )
    dataset = dataset_untrimmed.__dict__[args.dataset](**dataset_setup)
    logging.info('Setting up models')
    arch_setup = dict(
        visual_size=dataset.visual_size[args.feat],
        lang_size=dataset.language_size,
        max_length=dataset.max_words,
        embedding_size=args.embedding_size,
        visual_hidden=args.visual_hidden,
        lang_hidden=args.lang_hidden,
        visual_layers=args.visual_layers,
        bi_lstm=args.bi_lstm,
        lang_dropout=args.lang_dropout
    )

    net = model.__dict__[args.arch](**arch_setup)
    model_param = setup_snapshot(args.snapshot)
    net.load_state_dict(model_param['state_dict'])
    net.eval()

    logging.info('Setting up engine')
    engine = setup_engine(args, dataset, net)

    logging.info('Launch evaluation...')
    # log-scale up to the end of the database
    if len(args.topk) == 1 and args.topk[0] == 0:
        exp = int(np.floor(np.log10(engine.num_moments)))
        args.topk = [10**i for i in range(0, exp + 1)]
        args.topk.append(engine.num_moments)
    num_instances_retrieved = []
    judge = CorpusVideoMomentRetrievalEval(topk=args.topk)
    args.n_display = max(int(args.n_display * len(dataset.metadata)), 1)
    for it, query_metadata in tqdm(enumerate(dataset.metadata),
                                   disable=args.disable_tqdm):
        vid_indices, segments = engine.query(
            query_metadata['language_input'], description_ind=it)
        judge.add_single_predicted_moment_info(
            query_metadata, vid_indices, segments, max_rank=engine.num_moments)
        num_instances_retrieved.append(len(vid_indices))
        if args.disable_tqdm and (it + 1) % args.n_display == 0:
            logging.info(f'Processed queries [{it}/{len(dataset.metadata)}]')

    logging.info('Summarizing results')
    num_instances_retrieved = np.array(num_instances_retrieved)
    logging.info(f'Number of queries: {len(judge.map_query)}')
    logging.info(f'Number of proposals: {engine.num_moments}')
    retrieved_proposals_median = int(np.median(num_instances_retrieved))
    retrieved_proposals_min = int(num_instances_retrieved.min())
    if (num_instances_retrieved != engine.num_moments).any():
        logging.info('Triggered approximate search')
        logging.info('Median numbers of retrieved proposals: '
                     f'{retrieved_proposals_median:d}')
        logging.info('Min numbers of retrieved proposals: '
                     f'{retrieved_proposals_min:d}')
    result = judge.evaluate()
    _ = [logging.info(f'{k}: {v}') for k, v in result.items()]
    if args.dump:
        filename = args.logfile.with_suffix('.json')
        logging.info(f'Dumping results into: {filename}')
        with open(filename, 'x') as fid:
            for key, value in result.items():
                result[key] = float(value)
            result['snapshot'] = [str(i) for i in args.snapshot]
            result['corpus'] = str(args.test_list)
            result['h5_path'] = str(args.h5_path)
            result['h5_1ststage'] = str(args.h5_1ststage)
            result['snapshot_1ststage'] = str(args.snapshot_1ststage)
            result['topk'] = args.topk
            result['iou_threshold'] = judge.iou_thresholds
            result['k_first'] = args.k_first
            result['median_proposals_retrieved'] = retrieved_proposals_median
            result['min_proposals_retrieved'] = retrieved_proposals_min
            result['nms_threshold'] = args.nms_threshold
            result['corpus_setup'] = args.corpus_setup
            result['date'] = datetime.now().isoformat()
            result['git_hash'] = get_git_revision_hash()
            json.dump(result, fid, indent=1, sort_keys=True)
Example #15
0
                                     )
        train_data["intermediates"] = iter_train(0)
        pickle.dump(train_data, open(metadata_path + "-dump", "wb"))

    return


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description=__doc__)
    required = parser.add_argument_group('required arguments')
    required.add_argument('-c', '--config',
                          help='configuration to run',
                          required=True)
    args = parser.parse_args()
    set_configuration(args.config)

    expid = utils.generate_expid(args.config)

    log_file = LOGS_PATH + "%s.log" % expid
    with print_to_file(log_file):

        print "Running configuration:", config().__name__
        print "Current git version:", utils.get_git_revision_hash()

        train_model(expid)
        print "log saved to '%s'" % log_file
        predict_model(expid)
        print "log saved to '%s'" % log_file


Example #16
0
def main():

    # Parameter #
    params = dict(
        size_open=
        3,  # size of the opening structure element (opening is erosion followed by dilation)
        size_close=
        2  # size of the closing structure element (closing is dilation followed by erosion)
    )
    #############

    parser = argparse.ArgumentParser()
    parser.add_argument("--input_path",
                        default="./Input/demo",
                        help="Path to the folder containing the input images.")
    parser.add_argument(
        "--output_path",
        default="./Output/demo",
        help="Path to the folder which will contain the output.")
    parser.add_argument(
        "--param_file",
        default="",
        help=
        "Name of a parameter file in the input folder. Will be used to override the local param dictionary."
    )
    args = parser.parse_args()

    # Preparation
    time_stamp = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
    current_file = os.path.splitext(os.path.basename(__file__))[0]

    input_path = args.input_path
    output_path = os.path.join(args.output_path,
                               current_file + "_" + time_stamp)

    if not os.path.exists(output_path):
        os.makedirs(output_path)

    # set up logging
    logging.basicConfig(filename=os.path.join(output_path,
                                              current_file + '.log'),
                        level=logging.DEBUG,
                        format='%(asctime)s - %(levelname)s: %(message)s',
                        datefmt='%Y-%m-%d %H:%M:%S')

    logging.info("Current git revision: {}".format(
        utils.get_git_revision_hash()))

    # override parameter if external ones are given
    param_path = os.path.join(input_path, args.param_file)
    if os.path.isfile(param_path):
        with open(param_path, "r") as param_file:
            params = json.load(param_file)
        logging.info("Using parameter given in {}".format(param_path))
    else:
        logging.info("Using local parameter")

    # dump used parameter
    with open(os.path.join(output_path, 'params.json'), 'w') as f:
        json.dump(
            params,
            f,
            sort_keys=True,
            indent=4,
        )
        f.write('\n')

    print("Start processing...")
    counter = 0
    # Loop through all images in input path
    for root, dirs, files in os.walk(input_path):
        for input_name in files:

            start = time.time()
            input_name_base = os.path.splitext(os.path.basename(input_name))[0]

            img_original = cv2.imread(os.path.join(input_path, input_name))
            if img_original is None:  # reading failed (e.g. file is not an image)
                continue

            img = cv2.cvtColor(img_original, cv2.COLOR_BGR2GRAY)
            img = utils.prepare_for_morph_filter(img)

            img = utils.morph_denoise(img, **params)

            img = utils.restore_after_morph_filter(img)

            cv2.imwrite(os.path.join(output_path, input_name), img_original)
            cv2.imwrite(
                os.path.join(output_path, input_name_base + "_processed.tiff"),
                img)

            duration = time.time() - start
            logging.info("Processed {0} (Duration: {1:.3f} s)".format(
                input_name, duration))
            counter += 1

    logging.info("Processed {} images in total".format(counter))
    print("Processing done. See log file in '{}' for more details".format(
        output_path))
Example #17
0
def train_model(expid):
    metadata_path = MODEL_PATH + "%s.pkl" % expid

    if theano.config.optimizer != "fast_run":
        print "WARNING: not running in fast mode!"

    data_loader.filter_patient_folders()

    print "Build model"
    interface_layers = config().build_model()

    output_layers = interface_layers["outputs"]
    input_layers = interface_layers["inputs"]
    top_layer = lasagne.layers.MergeLayer(
        incomings=output_layers.values()
    )
    all_layers = lasagne.layers.get_all_layers(top_layer)

    all_params = lasagne.layers.get_all_params(top_layer, trainable=True)
    if "cutoff_gradients" in interface_layers:
        submodel_params = [param for value in interface_layers["cutoff_gradients"] for param in lasagne.layers.get_all_params(value)]
        all_params = [p for p in all_params if p not in submodel_params]

    if "pretrained" in interface_layers:
        for config_name, layers_dict in interface_layers["pretrained"].iteritems():
            pretrained_metadata_path = MODEL_PATH + "%s.pkl" % config_name.split('.')[1]
            pretrained_resume_metadata = np.load(pretrained_metadata_path)
            pretrained_top_layer = lasagne.layers.MergeLayer(
                incomings = layers_dict.values()
            )
            lasagne.layers.set_all_param_values(pretrained_top_layer, pretrained_resume_metadata['param_values'])

    num_params = sum([np.prod(p.get_value().shape) for p in all_params])

    print string.ljust("  layer output shapes:",36),
    print string.ljust("#params:",10),
    print string.ljust("#data:",10),
    print "output shape:"
    for layer in all_layers[:-1]:
        name = string.ljust(layer.__class__.__name__, 32)
        num_param = sum([np.prod(p.get_value().shape) for p in layer.get_params()])
        num_param = string.ljust(int(num_param).__str__(), 10)
        num_size = string.ljust(np.prod(layer.output_shape[1:]).__str__(), 10)
        print "    %s %s %s %s" % (name,  num_param, num_size, layer.output_shape)
    print "  number of parameters: %d" % num_params

    obj = config().build_objective(interface_layers)

    train_loss_theano = obj.get_loss()
    kaggle_loss_theano = obj.get_kaggle_loss()
    segmentation_loss_theano = obj.get_segmentation_loss()

    validation_other_losses = collections.OrderedDict()
    validation_train_loss = obj.get_loss(average=False, deterministic=True, validation=True, other_losses=validation_other_losses)
    validation_kaggle_loss = obj.get_kaggle_loss(average=False, deterministic=True, validation=True)
    validation_segmentation_loss = obj.get_segmentation_loss(average=False, deterministic=True, validation=True)


    xs_shared = {
        key: lasagne.utils.shared_empty(dim=len(l_in.output_shape), dtype='float32') for (key, l_in) in input_layers.iteritems()
    }

    # contains target_vars of the objective! Not the output layers desired values!
    # There can be more output layers than are strictly required for the objective
    # e.g. for debugging

    ys_shared = {
        key: lasagne.utils.shared_empty(dim=target_var.ndim, dtype='float32') for (key, target_var) in obj.target_vars.iteritems()
    }

    learning_rate_schedule = config().learning_rate_schedule

    learning_rate = theano.shared(np.float32(learning_rate_schedule[0]))
    idx = T.lscalar('idx')

    givens = dict()
    for key in obj.target_vars.keys():
        if key=="segmentation":
            givens[obj.target_vars[key]] = ys_shared[key][idx*config().sunny_batch_size : (idx+1)*config().sunny_batch_size]
        else:
            givens[obj.target_vars[key]] = ys_shared[key][idx*config().batch_size : (idx+1)*config().batch_size]

    for key in input_layers.keys():
        if key=="sunny":
            givens[input_layers[key].input_var] = xs_shared[key][idx*config().sunny_batch_size:(idx+1)*config().sunny_batch_size]
        else:
            givens[input_layers[key].input_var] = xs_shared[key][idx*config().batch_size:(idx+1)*config().batch_size]

    updates = config().build_updates(train_loss_theano, all_params, learning_rate)

    #grad_norm = T.sqrt(T.sum([(g**2).sum() for g in theano.grad(train_loss_theano, all_params)]))
    #theano_printer.print_me_this("Grad norm", grad_norm)

    iter_train = theano.function([idx], [train_loss_theano, kaggle_loss_theano, segmentation_loss_theano] + theano_printer.get_the_stuff_to_print(),
                                 givens=givens, on_unused_input="ignore", updates=updates,
                                 # mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True)
                                 )
    iter_validate = theano.function([idx], [validation_train_loss, validation_kaggle_loss, validation_segmentation_loss] + [v for _, v in validation_other_losses.items()] + theano_printer.get_the_stuff_to_print(),
                                    givens=givens, on_unused_input="ignore")

    num_chunks_train = int(config().num_epochs_train * NUM_TRAIN_PATIENTS / (config().batch_size * config().batches_per_chunk))
    print "Will train for %d chunks" % num_chunks_train
    if config().restart_from_save and os.path.isfile(metadata_path):
        print "Load model parameters for resuming"
        resume_metadata = np.load(metadata_path)
        lasagne.layers.set_all_param_values(top_layer, resume_metadata['param_values'])
        start_chunk_idx = resume_metadata['chunks_since_start'] + 1
        chunks_train_idcs = range(start_chunk_idx, num_chunks_train)

        # set lr to the correct value
        current_lr = np.float32(utils.current_learning_rate(learning_rate_schedule, start_chunk_idx))
        print "  setting learning rate to %.7f" % current_lr
        learning_rate.set_value(current_lr)
        losses_train = resume_metadata['losses_train']
        losses_eval_valid = resume_metadata['losses_eval_valid']
        losses_eval_train = resume_metadata['losses_eval_train']
        losses_eval_valid_kaggle = [] #resume_metadata['losses_eval_valid_kaggle']
        losses_eval_train_kaggle = [] #resume_metadata['losses_eval_train_kaggle']
    else:
        chunks_train_idcs = range(num_chunks_train)
        losses_train = []
        losses_eval_valid = []
        losses_eval_train = []
        losses_eval_valid_kaggle = []
        losses_eval_train_kaggle = []


    create_train_gen = partial(config().create_train_gen,
                               required_input_keys = xs_shared.keys(),
                               required_output_keys = ys_shared.keys()# + ["patients"],
                               )


    create_eval_valid_gen = partial(config().create_eval_valid_gen,
                                   required_input_keys = xs_shared.keys(),
                                   required_output_keys = ys_shared.keys()# + ["patients"]
                                   )

    create_eval_train_gen = partial(config().create_eval_train_gen,
                                   required_input_keys = xs_shared.keys(),
                                   required_output_keys = ys_shared.keys()
                                   )

    print "Train model"
    start_time = time.time()
    prev_time = start_time

    num_batches_chunk = config().batches_per_chunk


    for e, train_data in izip(chunks_train_idcs, buffering.buffered_gen_threaded(create_train_gen())):
        print "Chunk %d/%d" % (e + 1, num_chunks_train)
        epoch = (1.0 * config().batch_size * config().batches_per_chunk * (e+1) / NUM_TRAIN_PATIENTS)
        print "  Epoch %.1f" % epoch

        for key, rate in learning_rate_schedule.iteritems():
            if epoch >= key:
                lr = np.float32(rate)
                learning_rate.set_value(lr)
        print "  learning rate %.7f" % lr

        if config().dump_network_loaded_data:
            pickle.dump(train_data, open("data_loader_dump_train_%d.pkl"%e, "wb"))

        for key in xs_shared:
            xs_shared[key].set_value(train_data["input"][key])

        for key in ys_shared:
            ys_shared[key].set_value(train_data["output"][key])

        #print "train:", sorted(train_data["output"]["patients"])
        losses = []
        kaggle_losses = []
        segmentation_losses = []
        for b in xrange(num_batches_chunk):
            iter_result = iter_train(b)

            loss, kaggle_loss, segmentation_loss = tuple(iter_result[:3])
            utils.detect_nans(loss, xs_shared, ys_shared, all_params)
 
            losses.append(loss)
            kaggle_losses.append(kaggle_loss)
            segmentation_losses.append(segmentation_loss)

        mean_train_loss = np.mean(losses)
        print "  mean training loss:\t\t%.6f" % mean_train_loss
        losses_train.append(mean_train_loss)

        print "  mean kaggle loss:\t\t%.6f" % np.mean(kaggle_losses)
        print "  mean segment loss:\t\t%.6f" % np.mean(segmentation_losses)

        if ((e + 1) % config().validate_every) == 0:
            print
            print "Validating"
            if config().validate_train_set:
                subsets = ["validation", "train"]
                gens = [create_eval_valid_gen, create_eval_train_gen]
                losses_eval = [losses_eval_valid, losses_eval_train]
                losses_kaggle = [losses_eval_valid_kaggle, losses_eval_train_kaggle]
            else:
                subsets = ["validation"]
                gens = [create_eval_valid_gen]
                losses_eval = [losses_eval_valid]
                losses_kaggle = [losses_eval_valid_kaggle]

            for subset, create_gen, losses_validation, losses_kgl in zip(subsets, gens, losses_eval, losses_kaggle):

                vld_losses = []
                vld_kaggle_losses = []
                vld_segmentation_losses = []
                vld_other_losses = {k:[] for k,_ in validation_other_losses.items()}
                print "  %s set (%d samples)" % (subset, get_number_of_validation_samples(set=subset))

                for validation_data in buffering.buffered_gen_threaded(create_gen()):
                    num_batches_chunk_eval = config().batches_per_chunk

                    if config().dump_network_loaded_data:
                        pickle.dump(validation_data, open("data_loader_dump_valid_%d.pkl"%e, "wb"))

                    for key in xs_shared:
                        xs_shared[key].set_value(validation_data["input"][key])

                    for key in ys_shared:
                        ys_shared[key].set_value(validation_data["output"][key])

                    #print "validate:", validation_data["output"]["patients"]

                    for b in xrange(num_batches_chunk_eval):
                        losses = tuple(iter_validate(b)[:3+len(validation_other_losses)])
                        loss, kaggle_loss, segmentation_loss = losses[:3]
                        other_losses = losses[3:]
                        vld_losses.extend(loss)
                        vld_kaggle_losses.extend(kaggle_loss)
                        vld_segmentation_losses.extend(segmentation_loss)
                        for k, other_loss in zip(validation_other_losses, other_losses):
                            vld_other_losses[k].extend(other_loss)

                vld_losses = np.array(vld_losses)
                vld_kaggle_losses = np.array(vld_kaggle_losses)
                vld_segmentation_losses = np.array(vld_segmentation_losses)
                for k in validation_other_losses:
                    vld_other_losses[k] = np.array(vld_other_losses[k])

                # now select only the relevant section to average
                sunny_len = get_lenght_of_set(name="sunny", set=subset)
                regular_len = get_lenght_of_set(name="regular", set=subset)
                num_valid_samples = get_number_of_validation_samples(set=subset)

                #print losses[:num_valid_samples]
                #print kaggle_losses[:regular_len]
                #print segmentation_losses[:sunny_len]
                loss_to_save = obj.compute_average(vld_losses[:num_valid_samples])
                print "  mean training loss:\t\t%.6f" % loss_to_save
                print "  mean kaggle loss:\t\t%.6f"   % np.mean(vld_kaggle_losses[:regular_len])
                print "  mean segment loss:\t\t%.6f"  % np.mean(vld_segmentation_losses[:sunny_len])
                # print "    acc:\t%.2f%%" % (acc * 100)
                for k, v in vld_other_losses.items():
                    print "  mean %s loss:\t\t%.6f"  % (k, obj.compute_average(v[:num_valid_samples], loss_name=k))
                print

                losses_validation.append(loss_to_save)

                kaggle_to_save = np.mean(vld_kaggle_losses[:regular_len])
                losses_kgl.append(kaggle_to_save)

        now = time.time()
        time_since_start = now - start_time
        time_since_prev = now - prev_time
        prev_time = now
        est_time_left = time_since_start * (float(num_chunks_train - (e + 1)) / float(e + 1 - chunks_train_idcs[0]))
        eta = datetime.now() + timedelta(seconds=est_time_left)
        eta_str = eta.strftime("%c")
        print "  %s since start (%.2f s)" % (utils.hms(time_since_start), time_since_prev)
        print "  estimated %s to go (ETA: %s)" % (utils.hms(est_time_left), eta_str)
        print

        if ((e + 1) % config().save_every) == 0:
            print
            print "Saving metadata, parameters"

            with open(metadata_path, 'w') as f:
                pickle.dump({
                    'metadata_path': metadata_path,
                    'configuration_file': config().__name__,
                    'git_revision_hash': utils.get_git_revision_hash(),
                    'experiment_id': expid,
                    'chunks_since_start': e,
                    'losses_train': losses_train,
                    'losses_eval_train': losses_eval_train,
                    'losses_eval_train_kaggle': losses_eval_train_kaggle,
                    'losses_eval_valid': losses_eval_valid,
                    'losses_eval_valid_kaggle': losses_eval_valid_kaggle,
                    'time_since_start': time_since_start,
                    'param_values': lasagne.layers.get_all_param_values(top_layer)
                }, f, pickle.HIGHEST_PROTOCOL)

            print "  saved to %s" % metadata_path
            print

    # store all known outputs from last batch:
    if config().take_a_dump:
        all_theano_variables = [train_loss_theano, kaggle_loss_theano, segmentation_loss_theano] + theano_printer.get_the_stuff_to_print()
        for layer in all_layers[:-1]:
            all_theano_variables.append(lasagne.layers.helper.get_output(layer))

        iter_train = theano.function([idx], all_theano_variables,
                                     givens=givens, on_unused_input="ignore", updates=updates,
                                     # mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True)
                                     )
        train_data["intermediates"] = iter_train(0)
        pickle.dump(train_data, open(metadata_path + "-dump", "wb"))

    return
Example #18
0

        now = time.time()
        time_since_start = now - start_time
        time_since_prev = now - prev_time
        prev_time = now
        est_time_left = time_since_start * (config().max_nchunks - chunk_idx + 1.) / (chunk_idx + 1. - start_chunk_idx)
        eta = datetime.now() + timedelta(seconds=est_time_left)
        eta_str = eta.strftime("%c")
        print "  %s since start (%.2f s)" % (utils.hms(time_since_start), time_since_prev)
        print "  estimated %s to go (ETA: %s)" % (utils.hms(est_time_left), eta_str)
        print

    if ((chunk_idx + 1) % config().save_every) == 0:
        print
        print 'Chunk %d/%d' % (chunk_idx + 1, config().max_nchunks)
        print 'Saving metadata, parameters'

        with open(metadata_path, 'w') as f:
            pickle.dump({
                'configuration_file': config_name,
                'git_revision_hash': utils.get_git_revision_hash(),
                'experiment_id': expid,
                'chunks_since_start': chunk_idx,
                'losses_eval_train': losses_eval_train,
                'losses_eval_valid': losses_eval_valid,
                'param_values': nn.layers.get_all_param_values(model.l_out)
            }, f, pickle.HIGHEST_PROTOCOL)
            print '  saved to %s' % metadata_path
            print
Example #19
0
def predict_model(expid, mfile=None):
    metadata_path = MODEL_PATH + "%s.pkl" % (expid if not mfile else mfile)
    prediction_path = MODEL_PREDICTIONS_PATH + "%s.pkl" % expid
    submission_path = SUBMISSION_PATH + "%s.csv" % expid

    if theano.config.optimizer != "fast_run":
        print "WARNING: not running in fast mode!"

    print "Using"
    print "  %s" % metadata_path
    print "To generate"
    print "  %s" % prediction_path

    print "Build model"
    interface_layers = config.build_model()

    output_layers = interface_layers["outputs"]
    input_layers = interface_layers["inputs"]
    top_layer = lasagne.layers.MergeLayer(incomings=output_layers.values())
    all_layers = lasagne.layers.get_all_layers(top_layer)
    all_params = lasagne.layers.get_all_params(top_layer, trainable=True)

    num_params = sum([np.prod(p.get_value().shape) for p in all_params])

    print string.ljust("  layer output shapes:", 34),
    print string.ljust("#params:", 10),
    print string.ljust("#data:", 10),
    print "output shape:"
    for layer in all_layers[:-1]:
        name = string.ljust(layer.__class__.__name__, 30)
        num_param = sum(
            [np.prod(p.get_value().shape) for p in layer.get_params()])
        num_param = string.ljust(int(num_param).__str__(), 10)
        num_size = string.ljust(np.prod(layer.output_shape[1:]).__str__(), 10)
        print "    %s %s %s %s" % (name, num_param, num_size,
                                   layer.output_shape)
    print "  number of parameters: %d" % num_params

    xs_shared = {
        key: lasagne.utils.shared_empty(dim=len(l_in.output_shape),
                                        dtype='float32')
        for (key, l_in) in input_layers.iteritems()
    }

    idx = T.lscalar('idx')

    givens = dict()

    for (key, l_in) in input_layers.iteritems():
        givens[l_in.input_var] = xs_shared[key][idx *
                                                config.batch_size:(idx + 1) *
                                                config.batch_size]

    network_outputs = [
        lasagne.layers.helper.get_output(network_output_layer,
                                         deterministic=True)
        for network_output_layer in output_layers.values()
    ]

    print "Compiling..."
    iter_test = theano.function(
        [idx],
        network_outputs + theano_printer.get_the_stuff_to_print(),
        givens=givens,
        on_unused_input="ignore",
        # mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True)
    )

    required_input = {
        key: l_in.output_shape
        for (key, l_in) in input_layers.iteritems()
    }

    print "Preparing dataloaders"
    config.test_data.prepare()
    chunk_size = config.batches_per_chunk * config.batch_size

    test_data_generator = buffering.buffered_gen_threaded(
        config.test_data.generate_batch(
            chunk_size=chunk_size,
            required_input=required_input,
            required_output={},
        ))

    print "Load model parameters for resuming"
    resume_metadata = np.load(metadata_path)
    lasagne.layers.set_all_param_values(top_layer,
                                        resume_metadata['param_values'])

    chunks_test_idcs = itertools.count(0)
    num_chunks_test = math.ceil(1.0 * config.test_data.epochs *
                                config.test_data.number_of_samples /
                                (config.batch_size * config.batches_per_chunk))

    start_time, prev_time = None, None
    all_predictions = dict()

    print "Loading first chunks"
    for e, test_data in izip(chunks_test_idcs, test_data_generator):

        if start_time is None:
            start_time = time.time()
            prev_time = start_time
        print

        print "Chunk %d/%d" % (e + 1, num_chunks_test)
        print "=============="

        if config.dump_network_loaded_data:
            pickle.dump(test_data,
                        open("data_loader_dump_test_%d.pkl" % e, "wb"))

        for key in xs_shared:
            xs_shared[key].set_value(test_data["input"][key])

        sample_ids = test_data[IDS]

        for b in xrange(config.batches_per_chunk):
            th_result = iter_test(b)

            predictions = th_result[:len(network_outputs)]

            for output_idx, key in enumerate(output_layers.keys()):
                for sample_idx in xrange(b * config.batch_size,
                                         (b + 1) * config.batch_size):
                    prediction_pos = sample_idx % config.batch_size
                    sample_id = sample_ids[sample_idx]
                    if sample_id is not None:
                        if sample_id not in all_predictions:
                            all_predictions[sample_id] = dict()
                        if key not in all_predictions[sample_id]:
                            all_predictions[sample_id][key] = predictions[
                                output_idx][prediction_pos]
                        else:
                            all_predictions[sample_id][key] = np.concatenate(
                                (all_predictions[sample_id][key],
                                 predictions[output_idx][prediction_pos]),
                                axis=0)

        now = time.time()
        time_since_start = now - start_time
        time_since_prev = now - prev_time
        prev_time = now
        print "  %s since start (+%.2f s)" % (utils.hms(time_since_start),
                                              time_since_prev)
        try:
            if num_chunks_test:
                est_time_left = time_since_start * (float(num_chunks_test -
                                                          (e + 1)) /
                                                    float(e + 1))
                eta = datetime.datetime.now() + datetime.timedelta(
                    seconds=est_time_left)
                eta_str = eta.strftime("%c")
                print "  estimated %s to go" % utils.hms(est_time_left)
                print "  (ETA: %s)" % eta_str
        except OverflowError:
            print "  This will take really long, like REALLY long."

        print "  %dms per testing sample" % (1000. * time_since_start / (
            (e + 1) * config.batch_size * config.batches_per_chunk))

    with open(prediction_path, 'w') as f:
        pickle.dump(
            {
                'metadata_path': metadata_path,
                'prediction_path': prediction_path,
                'configuration_file': config.__name__,
                'git_revision_hash': utils.get_git_revision_hash(),
                'experiment_id': expid,
                'predictions': all_predictions,
            }, f, pickle.HIGHEST_PROTOCOL)

    print "  saved to %s" % prediction_path
    print

    return