def train(log_dir, config): config.data_paths = config.data_paths data_dirs = [os.path.join(data_path, "data") \ for data_path in config.data_paths] num_speakers = len(data_dirs) config.num_test = config.num_test_per_speaker * num_speakers if num_speakers > 1 and hparams.model_type not in ["deepvoice", "simple"]: raise Exception("[!] Unknown model_type for multi-speaker: {}".format(config.model_type)) commit = get_git_commit() if config.git else 'None' checkpoint_path = os.path.join(log_dir, 'model.ckpt') log(' [*] git recv-parse HEAD:\n%s' % get_git_revision_hash()) log('=' * 50) log(' [*] dit diff:\n%s' % get_git_diff()) log('=' * 50) log(' [*] Checkpoint path: %s' % checkpoint_path) log(' [*] Loading training data from: %s' % data_dirs) log(' [*] Using model: %s' % config.model_dir) log(hparams_debug_string()) # Set Up DataFeeder coord = tf.train.Coordinator() with tf.variable_scope('datafeeder') as scope: train_feeder = DataFeeder( coord, data_dirs, hparams, config, 32, data_type='train', batch_size=hparams.batch_size) train_feeder = DataFeeder( coord, data_dirs, hparams, config, 8, data_type='test', batch_size=config.num_test) # Set up model: is_randomly_initialized = config.initialize_path is None global_step = tf.Variable(0, name='global_step', trainable=False) with tf.variable_scope('model') as scope: model = create_model(hparams) model.initialize( train_feeder.inputs, train_feeder.input_lengths, num_speakers, train_feeder.speaker_id, train_feeder.mel_targets, train_feeder.linear_targets, train_feeder.loss_coeff, is_randomly_initialized=is_randomly_initialized) model.add_loss() model.add_optimizer(global_step) train_stats = add_stats(model, scope_name='stats') # legacy with tf.variable_scope('model', reuse=True) as scope: test_model = create_model(hparams) test_model.initialize( test_feeder.inputs, test_feeder.input_lengths, num_speakers, test_feeder.speaker_id, test_feeder.mel_targets, test_feeder.linear_targets, test_feeder.loss_coeff, rnn_decoder_test_mode=True, is_randomly_initialized=is_randomly_initialized) test_model.add_loss() test_stats = add_stats(test_model, model, scope_name='test') test_stats = tf.summary.merge([test_stats, train_stats]) #Bookkeeping: step = 0 time_window = ValueWindow(100) loss_window = ValueWindow(100) saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours=2) sess_config = tf.ConfigProto( log_device_placement=False, allow_soft_placement=True) sess_confg.gpu_options.allow_growth=True # Train part with tf.Session() as sess: try: summary_writer = tf.summary.FileWriter(log_dir, sess.graph) sess.run(tf.global_variables_initializer()) if config.load_path: # Restore from a checkpoint if the user requested it. restore_path = get_most_recent_checkpoint(config.model_dir) saver.restore(sess, restore_path) log('Resuming from checkpoint: %s at commit: %s' % (restore_path, commit), slack=True) elif config.initialize_path: restore_path = get_most_recent_checkpoint(config.initialize) saver.restore(sess, restore_path) log('Initialize from checkpoint: %s at commit: %s' % (restore_path, commit), slack=True) zero_step_assign = tf.assign(global_step, 0) sess.run(zero_step_assign) start_step = sess.run(global_step) log('='*50) log(' [*] Global step is reset to {}'. \ format(start_step)) log('='*50) else: log('Starting new training run at commit: %s' % commit, slack=True) start_step = sess.run(global_step) train_feeder.start_in_session(sess, start_step) test_feeder.start_in_session(sess, start_step) while not coord.should_stop(): start_time = time.time() step, loss, opt = sess.run( [global_step, model.loss_without_coeff, model.optimize], feed_dict=model.get_dummy_feed_dict()) time_window.append(time.time() - start_time) loss_window.append(loss) message = 'Step %-7d [%.03f sec/step, loss=%.05f, avg_loss=%.05f]' %( step, time_window.average, loss, loss_window.average) log(message, slack=(step % config.checkpoint_interval == 0)) if loss > 100 or math.isnan(loss): log('Loss exploded to %.05f at step %d!' % (loss, step), slack=True) raise Exception('Loss Exploded') if step % config.summary_interval == 0: log('Writing summary at step: %d' % step) feed_dict = { **model.get_dummy_feed_dict(), **test_model.get_dummy_feed_dict() } summary_writer.add_summary(sess.run( test_stats, feed_dict=feed_dict), step) if step % config.checkpoint_interval == 0: log('Saving checkpoint to: %s-%d' % (checkpoint_path, step)) saver.save(sess, checkpoint_path, global_step=step) if step % config.test_interval == 0: log('Saving audio and alignments...') num_test = config.num_test fetches = [ model.inputs[:num_test], model.linear_outputs[:num_test] model.alignments[:num_test], test_model.inputs[:num_test], test_model.linear_outputs[:num_test], test_model.alignments[:num_test], ] feed_dict = { **model.get_dummy_feed_dict(), **test_model.get_dummy_feed_dict() } sequences, spectrograms, alignments, \ test_sequences, test_spectrograms, test_alignments = \ sess.run(fetches, feed_dict=feed_dict) save_and_plot(sequences[:1], spectrograms[:1], alignments[:1], log_dir, step, loss, "train") save_and_plot(test_sequences, test_spectrograms, test_alignments, log_dir, step, loss, "test") except Exception as e: log('Exiting due to exception: %s' % e, slack=True) traceback.print_exc() coord.request_stop(e)
def predict_slice_model(expid, outfile, mfile=None): metadata_path = MODEL_PATH + "%s.pkl" % (expid if not mfile else mfile) if theano.config.optimizer != "fast_run": print "WARNING: not running in fast mode!" print "Build model" interface_layers = config().build_model() output_layers = interface_layers["outputs"] input_layers = interface_layers["inputs"] top_layer = lasagne.layers.MergeLayer( incomings=output_layers.values() ) _check_slicemodel(input_layers) # Print the architecture _print_architecture(top_layer) xs_shared = { key: lasagne.utils.shared_empty(dim=len(l_in.output_shape), dtype='float32') for (key, l_in) in input_layers.iteritems() } idx = T.lscalar('idx') givens = dict() for key in input_layers.keys(): if key=="sunny": givens[input_layers[key].input_var] = xs_shared[key][idx*config().sunny_batch_size:(idx+1)*config().sunny_batch_size] else: givens[input_layers[key].input_var] = xs_shared[key][idx*config().batch_size:(idx+1)*config().batch_size] network_outputs = [ lasagne.layers.helper.get_output(network_output_layer, deterministic=True) for network_output_layer in output_layers.values() ] iter_test = theano.function([idx], network_outputs + theano_printer.get_the_stuff_to_print(), givens=givens, on_unused_input="ignore", # mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True) ) print "Load model parameters for resuming" resume_metadata = np.load(metadata_path) lasagne.layers.set_all_param_values(top_layer, resume_metadata['param_values']) num_batches_chunk = config().batches_per_chunk num_batches = get_number_of_test_batches() num_chunks = int(np.ceil(num_batches / float(config().batches_per_chunk))) chunks_train_idcs = range(1, num_chunks+1) create_test_gen = partial(config().create_test_gen, required_input_keys = xs_shared.keys(), required_output_keys = ["patients", "slices"], ) print "Generate predictions with this model" start_time = time.time() prev_time = start_time predictions = [{"patient": i+1, "slices": { slice_id: { "systole": np.zeros((0,600)), "diastole": np.zeros((0,600)) } for slice_id in data_loader.get_slice_ids_for_patient(i+1) } } for i in xrange(NUM_PATIENTS)] # Loop over data and generate predictions for e, test_data in izip(itertools.count(start=1), buffering.buffered_gen_threaded(create_test_gen())): print " load testing data onto GPU" for key in xs_shared: xs_shared[key].set_value(test_data["input"][key]) patient_ids = test_data["output"]["patients"] slice_ids = test_data["output"]["slices"] print " patients:", " ".join(map(str, patient_ids)) print " chunk %d/%d" % (e, num_chunks) for b in xrange(num_batches_chunk): iter_result = iter_test(b) network_outputs = tuple(iter_result[:len(output_layers)]) network_outputs_dict = {output_layers.keys()[i]: network_outputs[i] for i in xrange(len(output_layers))} kaggle_systoles, kaggle_diastoles = config().postprocess(network_outputs_dict) kaggle_systoles, kaggle_diastoles = kaggle_systoles.astype('float64'), kaggle_diastoles.astype('float64') for idx, (patient_id, slice_id) in enumerate( zip(patient_ids[b*config().batch_size:(b+1)*config().batch_size], slice_ids[b*config().batch_size:(b+1)*config().batch_size])): if patient_id != 0: index = patient_id-1 patient_data = predictions[index] assert patient_id==patient_data["patient"] patient_slice_data = patient_data["slices"][slice_id] patient_slice_data["systole"] = np.concatenate((patient_slice_data["systole"], kaggle_systoles[idx:idx+1,:]),axis=0) patient_slice_data["diastole"] = np.concatenate((patient_slice_data["diastole"], kaggle_diastoles[idx:idx+1,:]),axis=0) now = time.time() time_since_start = now - start_time time_since_prev = now - prev_time prev_time = now est_time_left = time_since_start * (float(num_chunks - (e + 1)) / float(e + 1 - chunks_train_idcs[0])) eta = datetime.now() + timedelta(seconds=est_time_left) eta_str = eta.strftime("%c") print " %s since start (%.2f s)" % (utils.hms(time_since_start), time_since_prev) print " estimated %s to go (ETA: %s)" % (utils.hms(est_time_left), eta_str) print # Average predictions already_printed = False for prediction in predictions: for prediction_slice_id in prediction["slices"]: prediction_slice = prediction["slices"][prediction_slice_id] if prediction_slice["systole"].size>0 and prediction_slice["diastole"].size>0: average_method = getattr(config(), 'tta_average_method', partial(np.mean, axis=0)) prediction_slice["systole_average"] = average_method(prediction_slice["systole"]) prediction_slice["diastole_average"] = average_method(prediction_slice["diastole"]) try: test_if_valid_distribution(prediction_slice["systole_average"]) test_if_valid_distribution(prediction_slice["diastole_average"]) except: if not already_printed: print "WARNING: These distributions are not distributions" already_printed = True prediction_slice["systole_average"] = make_monotone_distribution(prediction_slice["systole_average"]) prediction_slice["diastole_average"] = make_monotone_distribution(prediction_slice["diastole_average"]) print "Calculating training and validation set scores for reference" # Add CRPS scores to the predictions # Iterate over train and validation sets for patient_ids, set_name in [(validation_patients_indices, "validation"), (train_patients_indices, "train")]: # Iterate over patients in the set for patient in patient_ids: prediction = predictions[patient-1] # Iterate over the slices for slice_id in prediction["slices"]: prediction_slice = prediction["slices"][slice_id] if "systole_average" in prediction_slice: assert patient == regular_labels[patient-1, 0] error_sys = CRSP(prediction_slice["systole_average"], regular_labels[patient-1, 1]) prediction_slice["systole_CRPS"] = error_sys prediction_slice["target_systole"] = regular_labels[patient-1, 1] error_dia = CRSP(prediction_slice["diastole_average"], regular_labels[patient-1, 2]) prediction_slice["diastole_CRPS"] = error_dia prediction_slice["target_diastole"] = regular_labels[patient-1, 2] prediction_slice["CRPS"] = 0.5 * error_sys + 0.5 * error_dia print "dumping prediction file to %s" % outfile with open(outfile, 'w') as f: pickle.dump({ 'metadata_path': metadata_path, 'configuration_file': config().__name__, 'git_revision_hash': utils.get_git_revision_hash(), 'experiment_id': expid, 'time_since_start': time_since_start, 'param_values': lasagne.layers.get_all_param_values(top_layer), 'predictions_per_slice': predictions, }, f, pickle.HIGHEST_PROTOCOL) print "prediction file dumped" return
def main(): # Parameter # params = dict( high_pass_fraction= 0.15, # only frequencies higher than this fraction of the fourier domain image will pass low_pass_fraction= 0.4 # only frequencies lower than this fraction of the fourier domain image will pass ) ############# parser = argparse.ArgumentParser() parser.add_argument("--input_path", default="./Input/demo", help="Path to the folder containing the input images.") parser.add_argument( "--output_path", default="./Output/demo", help="Path to the folder which will contain the output.") parser.add_argument( "--param_file", default="", help= "Name of a parameter file in the input folder. Will be used to override the local param dictionary." ) args = parser.parse_args() # Preparation time_stamp = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S') current_file = os.path.splitext(os.path.basename(__file__))[0] input_path = args.input_path output_path = os.path.join(args.output_path, current_file + "_" + time_stamp) if not os.path.exists(output_path): os.makedirs(output_path) # set up logging logging.basicConfig(filename=os.path.join(output_path, current_file + '.log'), level=logging.DEBUG, format='%(asctime)s - %(levelname)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S') logging.info("Current git revision: {}".format( utils.get_git_revision_hash())) # override parameter if external ones are given param_path = os.path.join(input_path, args.param_file) if os.path.isfile(param_path): with open(param_path, "r") as param_file: params = json.load(param_file) logging.info("Using parameter given in {}".format(param_path)) else: logging.info("Using local parameter") # dump used parameter with open(os.path.join(output_path, 'params.json'), 'w') as f: json.dump( params, f, sort_keys=True, indent=4, ) f.write('\n') print("Start processing...") counter = 0 # Loop through all images in input path for root, dirs, files in os.walk(input_path): for input_name in files: start = time.time() input_name_base = os.path.splitext(os.path.basename(input_name))[0] img_original = cv2.imread(os.path.join(input_path, input_name)) if img_original is None: # reading failed (e.g. file is not an image) continue img = cv2.cvtColor(img_original, cv2.COLOR_BGR2GRAY) low_pass, fourier_spectrum = low_pass_filter(img, **params) high_pass, _ = high_pass_filter(img, **params) middle_pass, _, middle_mask = middle_pass_filter(img, **params) # OTSU Thresholding low_pass = low_pass.astype(np.uint8) ret, low_pass_otsu = cv2.threshold( low_pass, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) middle_pass = middle_pass.astype(np.uint8) ret, middle_pass_otsu = cv2.threshold( middle_pass, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) cv2.imwrite(os.path.join(output_path, input_name), img_original) cv2.imwrite( os.path.join(output_path, input_name_base + "_fourier_spectrum.tiff"), fourier_spectrum) cv2.imwrite( os.path.join(output_path, input_name_base + "_low_pass.tiff"), low_pass) cv2.imwrite( os.path.join(output_path, input_name_base + "_high_pass.tiff"), high_pass) cv2.imwrite( os.path.join(output_path, input_name_base + "_middle_pass.tiff"), middle_pass) cv2.imwrite( os.path.join(output_path, input_name_base + "_middle_mask.tiff"), middle_mask) cv2.imwrite( os.path.join(output_path, input_name_base + "_middle_pass_otsu.tiff"), middle_pass_otsu) cv2.imwrite( os.path.join(output_path, input_name_base + "_low_pass_otsu.tiff"), low_pass_otsu) duration = time.time() - start logging.info("Processed {0} (Duration: {1:.3f} s)".format( input_name, duration)) counter += 1 logging.info("Processed {} images in total".format(counter)) print("Processing done. See log file in '{}' for more details".format( output_path))
def main( args_lst, eid, experiment_path, out, valid_after, load_params, save_params, debug, track_log, n_cells, emb_size, x_include_score, no_train_emb, n_epochs, lr, opt_type, momentum, mb_size, mb_mult_data, oclf_n_hidden, oclf_n_layers, oclf_activation, rnn_n_layers, lstm_peepholes, lstm_bidi, p_drop, init_emb_from, input_n_layers, input_n_hidden, input_activation, eval_on_full_train, x_include_token_ftrs, enable_branch_exp, l1, l2, x_include_mlp, enable_token_supervision, model_type, ontology ): output_dir = init_env(out + os.path.basename(experiment_path)) mon_train = TrainingStats() mon_valid = TrainingStats() mon_extreme_examples = TrainingStats() stats_obj = dict( train=mon_train.data, mon_extreme_examples=mon_extreme_examples.data, args=args_lst ) logging.info('XTrack has been started.') logging.info('GIT rev: %s' % get_git_revision_hash()) logging.info('Output dir: %s' % output_dir) logging.info('Initializing random seed to 271.') random.seed(271) logging.info('Argv: %s' % str(sys.argv)) logging.info('Effective args:') for arg_name, arg_value in args_lst: logging.info(' %s: %s' % (arg_name, arg_value)) logging.info('Experiment path: %s' % experiment_path) train_path = os.path.join(experiment_path, 'train.json') xtd_t = Data.load(train_path) valid_path = os.path.join(experiment_path, 'dev.json') xtd_v = Data.load(valid_path) slots = xtd_t.slots classes = xtd_t.classes class_groups = xtd_t.slot_groups t = time.time() logging.info('Building model: %s' % model_type) model = get_model( args_lst, eid, experiment_path, out, valid_after, load_params, save_params, debug, track_log, n_cells, emb_size, x_include_score, no_train_emb, n_epochs, lr, opt_type, momentum, mb_size, mb_mult_data, oclf_n_hidden, oclf_n_layers, oclf_activation, rnn_n_layers, lstm_peepholes, lstm_bidi, p_drop, init_emb_from, input_n_layers, input_n_hidden, input_activation, eval_on_full_train, x_include_token_ftrs, enable_branch_exp, l1, l2, x_include_mlp, enable_token_supervision, model_type, ontology, xtd_t ) logging.info('Rebuilding took: %.1f' % (time.time() - t)) if load_params: logging.info('Loading parameters from: %s' % load_params) model.load_params(load_params) onto = OntologyReader(ontology) tracker_valid = XTrack2DSTCTracker(xtd_v, [model], onto) tracker_train = XTrack2DSTCTracker(xtd_t, [model], onto) valid_data_y = model.prepare_data_train(xtd_v.sequences, slots) valid_data = model.prepare_data_predict(xtd_v.sequences, slots) if not eval_on_full_train: selected_train_seqs = [] for i in range(100): ndx = random.randint(0, len(xtd_t.sequences) - 1) selected_train_seqs.append(xtd_t.sequences[ndx]) else: selected_train_seqs = xtd_t.sequences # train_data = model.prepare_data_train(selected_train_seqs, slots) best_tracking_acc = 0.0 seqs = list(xtd_t.sequences) seqs = seqs * mb_mult_data random.shuffle(seqs) minibatches = prepare_minibatches(seqs, mb_size, model, slots) minibatches = zip(itertools.count(), minibatches) logging.info('We have %d minibatches.' % len(minibatches)) example_cntr = 0 timestep_cntr = 0 stats = TrainingStats() mb_histogram = defaultdict(int) mb_ids = range(len(minibatches)) mb_to_go = [] epoch = 0 init_valid_loss = model._loss(*valid_data_y) logging.info('Initial valid loss: %.10f' % init_valid_loss) if not valid_after: valid_after = len(seqs) mb_loss = {} last_valid = 0 last_inline_print = time.time() last_inline_print_cnt = 0 best_track_metric = defaultdict(float) keep_on_training = True while keep_on_training: if len(mb_to_go) == 0: mb_to_go = list(mb_ids) epoch += 1 if 0 < n_epochs < epoch: keep_on_training = False continue mb_ndx = random.choice(mb_to_go) mb_to_go.remove(mb_ndx) #mb_id, mb_data = random.choice(minibatches) mb_id, mb_data = minibatches[mb_ndx] mb_histogram[mb_ndx] += 1 mb_done = 0 t = time.time() (loss, update_ratio) = model._train(lr, *mb_data) mb_loss[mb_ndx] = loss t = time.time() - t stats.insert(loss=loss, update_ratio=update_ratio, time=t) x = mb_data[0] example_cntr += x.shape[1] timestep_cntr += x.shape[0] mb_done += 1 if time.time() - last_inline_print > 1.0: last_inline_print = time.time() inline_print( " %6d examples, %4d examples/s" % ( example_cntr, example_cntr - last_inline_print_cnt ) ) last_inline_print_cnt = example_cntr if (example_cntr - last_valid) >= valid_after: inline_print("") last_valid = example_cntr params_file = os.path.join( output_dir, 'params.%.10d.p' % example_cntr ) logging.info('Saving parameters: %s' % params_file) model.save_params(params_file) valid_loss = model._loss(*valid_data_y) update_ratio = stats.mean('update_ratio') _, track_score = tracker_valid.track(track_log) for metric, value in track_score.iteritems(): logging.info('Valid %15s: %10.2f %%' % (metric, value * 100)) best_track_metric[metric] = max( value, best_track_metric[metric] ) for metric, value in best_track_metric.iteritems(): logging.info('Best %15s: %10.2f %%' % (metric, value * 100)) logging.info('Train loss: %10.2f' % stats.mean('loss')) logging.info('Mean update ratio: %10.6f' % update_ratio) logging.info('Mean mb time: %10.4f' % stats.mean('time')) logging.info('Epoch: %10d (%d mb remain)' % ( epoch, len(mb_to_go) )) logging.info('Example: %10d' % example_cntr) mon_train.insert( time=time.time(), example=example_cntr, timestep_cntr=timestep_cntr, mb_id=mb_id, train_loss=stats.mean('loss'), valid_loss=valid_loss, update_ratio=stats.mean('update_ratio'), tracking_acc=track_score ) stats_path = os.path.join(output_dir, 'stats.json') with open(stats_path, 'w') as f_out: json.dump(stats_obj, f_out) os.system( 'ln -f -s "%s" "xtrack2_vis/stats.json"' % os.path.join('..', stats_path) ) stats = TrainingStats() params_file = os.path.join(output_dir, 'params.final.p') logging.info('Saving final params to: %s' % params_file) model.save_params(params_file) return best_tracking_acc
def main(args_lst, eid, experiment_path, out, valid_after, load_params, save_params, debug, track_log, n_cells, emb_size, x_include_score, no_train_emb, n_epochs, lr, opt_type, momentum, mb_size, mb_mult_data, oclf_n_hidden, oclf_n_layers, oclf_activation, rnn_n_layers, lstm_peepholes, lstm_bidi, p_drop, init_emb_from, input_n_layers, input_n_hidden, input_activation, eval_on_full_train, x_include_token_ftrs, enable_branch_exp, l1, l2, x_include_mlp, enable_token_supervision, model_type): output_dir = init_env(out) mon_train = TrainingStats() mon_valid = TrainingStats() mon_extreme_examples = TrainingStats() stats_obj = dict( train=mon_train.data, mon_extreme_examples=mon_extreme_examples.data, args=args_lst ) logging.info('XTrack has been started.') logging.info('GIT rev: %s' % get_git_revision_hash()) logging.info('Output dir: %s' % output_dir) logging.info('Initializing random seed to 0.') random.seed(0) logging.info('Argv: %s' % str(sys.argv)) logging.info('Effective args:') for arg_name, arg_value in args_lst: logging.info(' %s: %s' % (arg_name, arg_value)) logging.info('Experiment path: %s' % experiment_path) train_path = os.path.join(experiment_path, 'train.json') xtd_t = Data.load(train_path) valid_path = os.path.join(experiment_path, 'dev.json') xtd_v = Data.load(valid_path) slots = xtd_t.slots classes = xtd_t.classes class_groups = xtd_t.slot_groups n_input_tokens = len(xtd_t.vocab) n_input_score_bins = len(xtd_t.score_bins) t = time.time() logging.info('Building model: %s' % model_type) if model_type == 'lstm': model = Model(slots=slots, slot_classes=xtd_t.classes, emb_size=emb_size, no_train_emb=no_train_emb, x_include_score=x_include_score, x_include_token_ftrs=x_include_token_ftrs, x_include_mlp=x_include_mlp, n_input_score_bins=n_input_score_bins, n_cells=n_cells, n_input_tokens=n_input_tokens, oclf_n_hidden=oclf_n_hidden, oclf_n_layers=oclf_n_layers, oclf_activation=oclf_activation, debug=debug, rnn_n_layers=rnn_n_layers, lstm_peepholes=lstm_peepholes, lstm_bidi=lstm_bidi, opt_type=opt_type, momentum=momentum, p_drop=p_drop, init_emb_from=init_emb_from, vocab=xtd_t.vocab, input_n_layers=input_n_layers, input_n_hidden=input_n_hidden, input_activation=input_activation, token_features=None, enable_branch_exp=enable_branch_exp, token_supervision=enable_token_supervision, l1=l1, l2=l2 ) elif model_type == 'conv': model = SimpleConvModel(slots=slots, slot_classes=xtd_t.classes, emb_size=emb_size, no_train_emb=no_train_emb, x_include_score=x_include_score, x_include_token_ftrs=x_include_token_ftrs, x_include_mlp=x_include_mlp, n_input_score_bins=n_input_score_bins, n_cells=n_cells, n_input_tokens=n_input_tokens, oclf_n_hidden=oclf_n_hidden, oclf_n_layers=oclf_n_layers, oclf_activation=oclf_activation, debug=debug, rnn_n_layers=rnn_n_layers, lstm_peepholes=lstm_peepholes, lstm_bidi=lstm_bidi, opt_type=opt_type, momentum=momentum, p_drop=p_drop, init_emb_from=init_emb_from, vocab=xtd_t.vocab, input_n_layers=input_n_layers, input_n_hidden=input_n_hidden, input_activation=input_activation, token_features=None, enable_branch_exp=enable_branch_exp, token_supervision=enable_token_supervision, l1=l1, l2=l2 ) elif model_type == 'baseline': model = BaselineModel(slots=slots, slot_classes=xtd_t.classes, oclf_n_hidden=oclf_n_hidden, oclf_n_layers=oclf_n_layers, oclf_activation=oclf_activation, n_cells=n_cells, debug=debug, opt_type=opt_type, momentum=momentum, p_drop=p_drop, vocab=xtd_t.vocab, input_n_layers=input_n_layers, input_n_hidden=input_n_hidden, input_activation=input_activation, token_features=None, enable_branch_exp=enable_branch_exp, token_supervision=enable_token_supervision, l1=l1, l2=l2 ) else: raise Exception() logging.info('Rebuilding took: %.1f' % (time.time() - t)) if load_params: logging.info('Loading parameters from: %s' % load_params) model.load_params(load_params) tracker_valid = XTrack2DSTCTracker(xtd_v, [model]) tracker_train = XTrack2DSTCTracker(xtd_t, [model]) valid_data_y = model.prepare_data_train(xtd_v.sequences, slots) valid_data = model.prepare_data_predict(xtd_v.sequences, slots) if not eval_on_full_train: selected_train_seqs = [] for i in range(100): ndx = random.randint(0, len(xtd_t.sequences) - 1) selected_train_seqs.append(xtd_t.sequences[ndx]) else: selected_train_seqs = xtd_t.sequences train_data = model.prepare_data_train(selected_train_seqs, slots) joint_slots = ['joint_%s' % str(grp) for grp in class_groups.keys()] best_acc = {slot: 0 for slot in xtd_v.slots + joint_slots} best_acc_train = {slot: 0 for slot in xtd_v.slots + joint_slots} best_tracking_acc = 0.0 n_valid_not_increased = 0 et = None seqs = list(xtd_t.sequences) seqs = seqs * mb_mult_data random.shuffle(seqs) minibatches = prepare_minibatches(seqs, mb_size, model, slots) minibatches = zip(itertools.count(), minibatches) logging.info('We have %d minibatches.' % len(minibatches)) example_cntr = 0 timestep_cntr = 0 stats = TrainingStats() mb_histogram = defaultdict(int) mb_ids = range(len(minibatches)) mb_to_go = [] mb_bad = [] epoch = 0 init_valid_loss = model._loss(*valid_data_y) logging.info('Initial valid loss: %.10f' % init_valid_loss) if not valid_after: valid_after = len(seqs) mb_loss = {} last_valid = 0 last_inline_print = time.time() last_inline_print_cnt = 0 best_track_acc = defaultdict(float) while True: if len(mb_to_go) == 0: mb_to_go = list(mb_ids) epoch += 1 if n_epochs > 0 and n_epochs < epoch: break mb_ndx = random.choice(mb_to_go) mb_to_go.remove(mb_ndx) #mb_id, mb_data = random.choice(minibatches) mb_id, mb_data = minibatches[mb_ndx] mb_histogram[mb_ndx] += 1 #if et is not None: # epoch_time = time.time() - et #else: # epoch_time = -1.0 #logging.info('Epoch #%d (last epoch took %.1fs) (seen %d examples)' % # (i, epoch_time, example_cntr )) #et = time.time() mb_done = 0 t = time.time() (loss, update_ratio) = model._train(lr, *mb_data) mb_loss[mb_ndx] = loss t = time.time() - t stats.insert(loss=loss, update_ratio=update_ratio, time=t) x = mb_data[0] example_cntr += x.shape[1] timestep_cntr += x.shape[0] mb_done += 1 if time.time() - last_inline_print > 1.0: last_inline_print = time.time() inline_print(" %6d examples, %4d examples/s" % ( example_cntr, example_cntr - last_inline_print_cnt )) last_inline_print_cnt = example_cntr if (example_cntr - last_valid) >= valid_after: inline_print("") last_valid = example_cntr params_file = os.path.join(output_dir, 'params.%.10d.p' % example_cntr) logging.info('Saving parameters: %s' % params_file) model.save_params(params_file) valid_loss = model._loss(*valid_data_y) update_ratio = stats.mean('update_ratio') update_ratio = stats.mean('update_ratio') _, track_score = tracker_valid.track(track_log) for group, accuracy in sorted(track_score.iteritems(), key=lambda (g, _): g): logging.info('Valid acc %15s: %10.2f %%' % (group, accuracy * 100)) best_track_acc[group] = max(accuracy, best_track_acc[group]) for group in sorted(track_score, key=lambda g: g): logging.info('Best acc %15s: %10.2f %%' % (group, best_track_acc[group] * 100)) logging.info('Train loss: %10.2f' % stats.mean('loss')) logging.info('Mean update ratio: %10.6f' % update_ratio) logging.info('Mean mb time: %10.4f' % stats.mean('time')) logging.info('Epoch: %10d (%d mb remain)' % (epoch, len(mb_to_go))) logging.info('Example: %10d' % example_cntr) mon_train.insert( time=time.time(), example=example_cntr, timestep_cntr=timestep_cntr, mb_id=mb_id, train_loss=stats.mean('loss'), valid_loss=valid_loss, update_ratio=stats.mean('update_ratio'), tracking_acc=track_score ) stats_path = os.path.join(output_dir, 'stats.json') with open(stats_path, 'w') as f_out: json.dump(stats_obj, f_out) os.system('ln -f -s "%s" "xtrack2_vis/stats.json"' % os.path.join('..', stats_path)) stats = TrainingStats() params_file = os.path.join(output_dir, 'params.final.p') logging.info('Saving final params to: %s' % params_file) model.save_params(params_file) return best_tracking_acc
def predict_model(expid, mfile=None): metadata_path = MODEL_PATH + "%s.pkl" % (expid if not mfile else mfile) prediction_path = INTERMEDIATE_PREDICTIONS_PATH + "%s.pkl" % expid submission_path = SUBMISSION_PATH + "%s.csv" % expid if theano.config.optimizer != "fast_run": print("WARNING: not running in fast mode!") print("Using") print(" %s" % metadata_path) print("To generate") print(" %s" % prediction_path) print(" %s" % submission_path) print("Build model") interface_layers = config().build_model() output_layers = interface_layers["outputs"] input_layers = interface_layers["inputs"] top_layer = lasagne.layers.MergeLayer( incomings=list(output_layers.values())) all_layers = lasagne.layers.get_all_layers(top_layer) num_params = lasagne.layers.count_params(top_layer) print(" number of parameters: %d" % num_params) print(string.ljust(" layer output shapes:", 36), end=' ') print(string.ljust("#params:", 10), end=' ') print("output shape:") for layer in all_layers[:-1]: name = string.ljust(layer.__class__.__name__, 32) num_param = sum( [np.prod(p.get_value().shape) for p in layer.get_params()]) num_param = string.ljust(num_param.__str__(), 10) print(" %s %s %s" % (name, num_param, layer.output_shape)) xs_shared = { key: lasagne.utils.shared_empty(dim=len(l_in.output_shape), dtype='float32') for (key, l_in) in input_layers.items() } idx = T.lscalar('idx') givens = dict() for key in list(input_layers.keys()): if key == "sunny": givens[input_layers[key].input_var] = xs_shared[key][idx * config( ).sunny_batch_size:(idx + 1) * config().sunny_batch_size] else: givens[input_layers[key]. input_var] = xs_shared[key][idx * config().batch_size:(idx + 1) * config().batch_size] network_outputs = [ lasagne.layers.helper.get_output(network_output_layer, deterministic=True) for network_output_layer in list(output_layers.values()) ] iter_test = theano.function( [idx], network_outputs + theano_printer.get_the_stuff_to_print(), givens=givens, on_unused_input="ignore", # mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True) ) print("Load model parameters for resuming") resume_metadata = np.load(metadata_path) lasagne.layers.set_all_param_values(top_layer, resume_metadata['param_values']) num_batches_chunk = config().batches_per_chunk num_batches = get_number_of_test_batches() num_chunks = int(np.ceil(num_batches / float(config().batches_per_chunk))) chunks_train_idcs = list(range(1, num_chunks + 1)) data_loader.filter_patient_folders() create_test_gen = partial( config().create_test_gen, required_input_keys=list(xs_shared.keys()), required_output_keys=[ "patients", "classification_correction_function" ], ) print("Generate predictions with this model") start_time = time.time() prev_time = start_time predictions = [{ "patient": i + 1, "systole": np.zeros((0, 600)), "diastole": np.zeros((0, 600)) } for i in range(NUM_PATIENTS)] for e, test_data in zip(itertools.count(start=1), buffering.buffered_gen_threaded( create_test_gen())): print(" load testing data onto GPU") for key in xs_shared: xs_shared[key].set_value(test_data["input"][key]) patient_ids = test_data["output"]["patients"] classification_correction = test_data["output"][ "classification_correction_function"] print(" patients:", " ".join(map(str, patient_ids))) print(" chunk %d/%d" % (e, num_chunks)) for b in range(num_batches_chunk): iter_result = iter_test(b) network_outputs = tuple(iter_result[:len(output_layers)]) network_outputs_dict = { list(output_layers.keys())[i]: network_outputs[i] for i in range(len(output_layers)) } kaggle_systoles, kaggle_diastoles = config().postprocess( network_outputs_dict) kaggle_systoles, kaggle_diastoles = kaggle_systoles.astype( 'float64'), kaggle_diastoles.astype('float64') for idx, patient_id in enumerate( patient_ids[b * config().batch_size:(b + 1) * config().batch_size]): if patient_id != 0: index = patient_id - 1 patient_data = predictions[index] assert patient_id == patient_data["patient"] kaggle_systole = kaggle_systoles[idx:idx + 1, :] kaggle_diastole = kaggle_diastoles[idx:idx + 1, :] assert np.isfinite(kaggle_systole).all() and np.isfinite( kaggle_systole).all() kaggle_systole = classification_correction[ b * config().batch_size + idx](kaggle_systole) kaggle_diastole = classification_correction[ b * config().batch_size + idx](kaggle_diastole) assert np.isfinite(kaggle_systole).all() and np.isfinite( kaggle_systole).all() patient_data["systole"] = np.concatenate( (patient_data["systole"], kaggle_systole), axis=0) patient_data["diastole"] = np.concatenate( (patient_data["diastole"], kaggle_diastole), axis=0) now = time.time() time_since_start = now - start_time time_since_prev = now - prev_time prev_time = now est_time_left = time_since_start * ( float(num_chunks - (e + 1)) / float(e + 1 - chunks_train_idcs[0])) eta = datetime.now() + timedelta(seconds=est_time_left) eta_str = eta.strftime("%c") print(" %s since start (%.2f s)" % (utils.hms(time_since_start), time_since_prev)) print(" estimated %s to go (ETA: %s)" % (utils.hms(est_time_left), eta_str)) print() already_printed = False for prediction in predictions: if prediction["systole"].size > 0 and prediction["diastole"].size > 0: average_method = getattr(config(), 'tta_average_method', partial(np.mean, axis=0)) prediction["systole_average"] = average_method( prediction["systole"]) prediction["diastole_average"] = average_method( prediction["diastole"]) try: test_if_valid_distribution(prediction["systole_average"]) test_if_valid_distribution(prediction["diastole_average"]) except: if not already_printed: print("WARNING: These distributions are not distributions") already_printed = True prediction["systole_average"] = make_monotone_distribution( prediction["systole_average"]) prediction["diastole_average"] = make_monotone_distribution( prediction["diastole_average"]) test_if_valid_distribution(prediction["systole_average"]) test_if_valid_distribution(prediction["diastole_average"]) print("Calculating training and validation set scores for reference") validation_dict = {} for patient_ids, set_name in [(validation_patients_indices, "validation"), (train_patients_indices, "train")]: errors = [] for patient in patient_ids: prediction = predictions[patient - 1] if "systole_average" in prediction: assert patient == regular_labels[patient - 1, 0] error = CRSP(prediction["systole_average"], regular_labels[patient - 1, 1]) errors.append(error) error = CRSP(prediction["diastole_average"], regular_labels[patient - 1, 2]) errors.append(error) if len(errors) > 0: errors = np.array(errors) estimated_CRSP = np.mean(errors) print(" %s kaggle loss: %f" % (string.rjust(set_name, 12), estimated_CRSP)) validation_dict[set_name] = estimated_CRSP else: print(" %s kaggle loss: not calculated" % (string.rjust(set_name, 12))) print("dumping prediction file to %s" % prediction_path) with open(prediction_path, 'w') as f: pickle.dump( { 'metadata_path': metadata_path, 'prediction_path': prediction_path, 'submission_path': submission_path, 'configuration_file': config().__name__, 'git_revision_hash': utils.get_git_revision_hash(), 'experiment_id': expid, 'time_since_start': time_since_start, 'param_values': lasagne.layers.get_all_param_values(top_layer), 'predictions': predictions, 'validation_errors': validation_dict, }, f, pickle.HIGHEST_PROTOCOL) print("prediction file dumped") print("dumping submission file to %s" % submission_path) with open(submission_path, 'w') as csvfile: csvwriter = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL) csvwriter.writerow(['Id'] + ['P%d' % i for i in range(600)]) for prediction in predictions: # the submission only has patients 501 to 700 if prediction["patient"] in data_loader.test_patients_indices: if "diastole_average" not in prediction or "systole_average" not in prediction: raise Exception("Not all test-set patients were predicted") csvwriter.writerow(["%d_Diastole" % prediction["patient"]] + [ "%.18f" % p for p in prediction["diastole_average"].flatten() ]) csvwriter.writerow(["%d_Systole" % prediction["patient"]] + [ "%.18f" % p for p in prediction["systole_average"].flatten() ]) print("submission file dumped") return
def train(log_dir, config): config.data_paths = config.data_paths # 파싱된 명령행 인자값 중 데이터 경로 : default='datasets/kr_example' data_dirs = [os.path.join(data_path, "data") \ for data_path in config.data_paths] num_speakers = len(data_dirs) # 학습하는 화자 수 측정 : 단일화자 모델-1, 다중화자 모델-2 config.num_test = config.num_test_per_speaker * num_speakers if num_speakers > 1 and hparams.model_type not in ["deepvoice", "simple"]: # 다중화자 모델 학습일 때 모델 타입이 "deepvoice"나 "simple"이 아니라면 raise Exception("[!] Unkown model_type for multi-speaker: {}".format(config.model_type)) # hparams.modle_type을 config.model_type으로 오타남. commit = get_git_commit() if config.git else 'None' # git 관련된거여서 무시 checkpoint_path = os.path.join(log_dir, 'model.ckpt') # checkpoint_path 경로 지정-model.skpt 파일 경로 log(' [*] git recv-parse HEAD:\n%s' % get_git_revision_hash()) # git log log('='*50) # 줄 구분용 ===== #log(' [*] dit diff:\n%s' % get_git_diff()) log('='*50) # 줄 구분용 ===== log(' [*] Checkpoint path: %s' % checkpoint_path) # check_point 경로 출력 log(' [*] Loading training data from: %s' % data_dirs) log(' [*] Using model: %s' % config.model_dir) log(hparams_debug_string()) # Set up DataFeeder: coord = tf.train.Coordinator() # 쓰레드 사용 선언 with tf.variable_scope('datafeeder') as scope: train_feeder = DataFeeder( coord, data_dirs, hparams, config, 32, data_type='train', batch_size=hparams.batch_size) # def __init__(self, coordinator, data_dirs, hparams, config, batches_per_group, data_type, batch_size): test_feeder = DataFeeder( coord, data_dirs, hparams, config, 8, data_type='test', batch_size=config.num_test) # Set up model: is_randomly_initialized = config.initialize_path is None global_step = tf.Variable(0, name='global_step', trainable=False) with tf.variable_scope('model') as scope: model = create_model(hparams) # Tacotron 모델 생성 model.initialize( train_feeder.inputs, train_feeder.input_lengths, num_speakers, train_feeder.speaker_id, train_feeder.mel_targets, train_feeder.linear_targets, train_feeder.loss_coeff, is_randomly_initialized=is_randomly_initialized) model.add_loss() model.add_optimizer(global_step) train_stats = add_stats(model, scope_name='stats') # legacy with tf.variable_scope('model', reuse=True) as scope: test_model = create_model(hparams) # Tacotron test모델 생성 test_model.initialize( test_feeder.inputs, test_feeder.input_lengths, num_speakers, test_feeder.speaker_id, test_feeder.mel_targets, test_feeder.linear_targets, test_feeder.loss_coeff, rnn_decoder_test_mode=True, is_randomly_initialized=is_randomly_initialized) test_model.add_loss() test_stats = add_stats(test_model, model, scope_name='test') # model의 loss값같은것들을 tensorboard에 기록 / model에 test_model, model2에 model test_stats = tf.summary.merge([test_stats, train_stats]) # Bookkeeping: step = 0 time_window = ValueWindow(100) # ValueWindow 클래스 window_size = 100 loss_window = ValueWindow(100) saver = tf.train.Saver(max_to_keep=None, keep_checkpoint_every_n_hours=2) # 2시간에 한번씩 자동저장, checkpoint 삭제 안됨 sess_config = tf.ConfigProto( log_device_placement=False, # log_device_placement 작성하는동안 할당장치 알려줌. allow_soft_placement=True) # allow_soft_placement False면 GPU없을때 오류남 sess_config.gpu_options.allow_growth=True # 탄력적으로 GPU메모리 사용 # Train! #with tf.Session(config=sess_config) as sess: with tf.Session() as sess: # with문 내의 모든 명령들은 CPU 혹은 GPU 사용 선언 try: summary_writer = tf.summary.FileWriter(log_dir, sess.graph) # summary 오퍼레이션이 평가된 결과 및 텐서보드 그래프를 파라미터 형식으로 log_dir 에 저장 sess.run(tf.global_variables_initializer()) # 데이터셋이 로드되고 그래프가 모두 정의되면 변수를 초기화하여 훈련 시작 if config.load_path: # log의 설정 값들 경로를 지정하였다면 # Restore from a checkpoint if the user requested it. restore_path = get_most_recent_checkpoint(config.model_dir) # 가장 마지막에 저장된 파일경로 저장 saver.restore(sess, restore_path) # restore_path 값 가져오기 log('Resuming from checkpoint: %s at commit: %s' % (restore_path, commit), slack=True) # git과 slack을 이용한 log 출력 elif config.initialize_path: # log의 설정 값들로 초기화하여 사용하기로 지정하였다면 restore_path = get_most_recent_checkpoint(config.initialize_path) # 지정된 경로에서 가장 마지막에 저장된 파일경로 저장 saver.restore(sess, restore_path) # restore_path 값 가져오기 log('Initialized from checkpoint: %s at commit: %s' % (restore_path, commit), slack=True) # git과 slack을 이용한 log 출력 zero_step_assign = tf.assign(global_step, 0) # global_step의 텐서 객체 참조 변수 값을 0으로 바꿔주는 명령어 지정 sess.run(zero_step_assign) # 변수들을 모두 0으로 바꾸는 명령어 실행 start_step = sess.run(global_step) # global_step 값 부분을 시작지점으로 하여 연산 시작 log('='*50) log(' [*] Global step is reset to {}'. \ format(start_step)) # 즉, 연산 시작 부분이 0으로 초기화 되었다고 알려줌. log('='*50) else: log('Starting new training run at commit: %s' % commit, slack=True) # 과거의 데이터를 사용하지 않을 경우 새로운 학습이라고 log 출력 start_step = sess.run(global_step) # 연산 시작지점 가져오기 train_feeder.start_in_session(sess, start_step) test_feeder.start_in_session(sess, start_step) while not coord.should_stop(): # 쓰레드가 멈춰야하는 상황이 아니라면 start_time = time.time() # 시작시간 지정(1970년 1월 1일 이후 경과된 시간을 UTC 기준으로 초로 반환) step, loss, opt = sess.run( [global_step, model.loss_without_coeff, model.optimize], feed_dict=model.get_dummy_feed_dict()) # step 값은 global_step 값으로 지정, loss 값은 time_window.append(time.time() - start_time) loss_window.append(loss) message = 'Step %-7d [%.03f sec/step, loss=%.05f, avg_loss=%.05f]' % ( step, time_window.average, loss, loss_window.average) log(message, slack=(step % config.checkpoint_interval == 0)) if loss > 100 or math.isnan(loss): log('Loss exploded to %.05f at step %d!' % (loss, step), slack=True) raise Exception('Loss Exploded') if step % config.summary_interval == 0: log('Writing summary at step: %d' % step) feed_dict = { **model.get_dummy_feed_dict(), **test_model.get_dummy_feed_dict() } summary_writer.add_summary(sess.run( test_stats, feed_dict=feed_dict), step) if step % config.checkpoint_interval == 0: log('Saving checkpoint to: %s-%d' % (checkpoint_path, step)) saver.save(sess, checkpoint_path, global_step=step) if step % config.test_interval == 0: log('Saving audio and alignment...') num_test = config.num_test fetches = [ model.inputs[:num_test], model.linear_outputs[:num_test], model.alignments[:num_test], test_model.inputs[:num_test], test_model.linear_outputs[:num_test], test_model.alignments[:num_test], ] feed_dict = { **model.get_dummy_feed_dict(), **test_model.get_dummy_feed_dict() } sequences, spectrograms, alignments, \ test_sequences, test_spectrograms, test_alignments = \ sess.run(fetches, feed_dict=feed_dict) save_and_plot(sequences[:1], spectrograms[:1], alignments[:1], log_dir, step, loss, "train") save_and_plot(test_sequences, test_spectrograms, test_alignments, log_dir, step, loss, "test") except Exception as e: log('Exiting due to exception: %s' % e, slack=True) traceback.print_exc() coord.request_stop(e)
prev_time = now est_time_left = time_since_start * (config().max_nchunks - chunk_idx + 1.) / (chunk_idx + 1. - start_chunk_idx) eta = datetime.now() + timedelta(seconds=est_time_left) eta_str = eta.strftime("%c") print(" %s since start (%.2f s)" % (utils.hms(time_since_start), time_since_prev)) print(" estimated %s to go (ETA: %s)" % (utils.hms(est_time_left), eta_str)) print() if ((chunk_idx + 1) % config().save_every) == 0: print() print('Chunk %d/%d' % (chunk_idx + 1, config().max_nchunks)) print('Saving metadata, parameters') with open(metadata_path, 'w') as f: pickle.dump( { 'configuration_file': config_name, 'git_revision_hash': utils.get_git_revision_hash(), 'experiment_id': expid, 'chunks_since_start': chunk_idx, 'losses_eval_train': losses_eval_train, 'losses_eval_valid': losses_eval_valid, 'param_values': nn.layers.get_all_param_values(model.l_out) }, f, pickle.HIGHEST_PROTOCOL) print(' saved to %s' % metadata_path) print()
def train_model(expid): """ This function trains the model, and will use the name expid to store and report the results :param expid: the name :return: """ metadata_path = MODEL_PATH + "%s.pkl" % expid # Fast_run is very slow, but might be better of debugging. # Make sure you don't leave it on accidentally! if theano.config.optimizer != "fast_run": print "WARNING: not running in fast mode!" print "Build model" # Get the input and output layers of our model interface_layers = config.build_model() output_layers = interface_layers["outputs"] input_layers = interface_layers["inputs"] # merge all output layers into a fictional dummy layer which is not actually used top_layer = lasagne.layers.MergeLayer(incomings=output_layers.values()) # get all the trainable parameters from the model all_layers = lasagne.layers.get_all_layers(top_layer) all_params = lasagne.layers.get_all_params(top_layer, trainable=True) # do not train beyond the layers in cutoff_gradients. Remove all their parameters from the optimization process if "cutoff_gradients" in interface_layers: submodel_params = [ param for value in interface_layers["cutoff_gradients"] for param in lasagne.layers.get_all_params(value) ] all_params = [p for p in all_params if p not in submodel_params] # some parameters might already be pretrained! Load their values from the requested configuration name. if "pretrained" in interface_layers: for config_name, layers_dict in interface_layers[ "pretrained"].iteritems(): pretrained_metadata_path = MODEL_PATH + "%s.pkl" % config_name pretrained_resume_metadata = np.load(pretrained_metadata_path) pretrained_top_layer = lasagne.layers.MergeLayer( incomings=layers_dict.values()) lasagne.layers.set_all_param_values( pretrained_top_layer, pretrained_resume_metadata['param_values']) # Count all the parameters we are actually optimizing, and visualize what the model looks like. print string.ljust(" layer output shapes:", 26), print string.ljust("#params:", 10), print string.ljust("#data:", 10), print "output shape:" def comma_seperator(v): return '{:,.0f}'.format(v) for layer in all_layers[:-1]: name = string.ljust(layer.__class__.__name__, 22) num_param = sum( [np.prod(p.get_value().shape) for p in layer.get_params()]) num_param = string.ljust(comma_seperator(num_param), 10) num_size = string.ljust( comma_seperator(np.prod(layer.output_shape[1:])), 10) print " %s %s %s %s" % (name, num_param, num_size, layer.output_shape) num_params = sum([np.prod(p.get_value().shape) for p in all_params]) print " number of parameters:", comma_seperator(num_params) # Build all the objectives requested by the configuration objectives = config.build_objectives(interface_layers) train_losses_theano = { key: ob.get_loss() for key, ob in objectives["train"].iteritems() } validate_losses_theano = { key: ob.get_loss(deterministic=True) for key, ob in objectives["validate"].iteritems() } # Create the Theano variables necessary to interface with the models # the input: xs_shared = { key: lasagne.utils.shared_empty(dim=len(l_in.output_shape), dtype='float32') for (key, l_in) in input_layers.iteritems() } # the output: ys_shared = { key: lasagne.utils.shared_empty(dim=target_var.ndim, dtype=target_var.dtype) for (_, ob) in itertools.chain(objectives["train"].iteritems(), objectives["validate"].iteritems()) for (key, target_var) in ob.target_vars.iteritems() } # Set up the learning rate schedule learning_rate_schedule = config.learning_rate_schedule learning_rate = theano.shared(np.float32(learning_rate_schedule[0])) # We only work on one batch at the time on our chunk. Set up the Theano code which does this idx = T.lscalar( 'idx' ) # the value representing the number of the batch we are currently into our chunk of data givens = dict() for (_, ob) in itertools.chain(objectives["train"].iteritems(), objectives["validate"].iteritems()): for (key, target_var) in ob.target_vars.iteritems(): givens[target_var] = ys_shared[key][idx * config.batch_size:(idx + 1) * config.batch_size] for (key, l_in) in input_layers.iteritems(): givens[l_in.input_var] = xs_shared[key][idx * config.batch_size:(idx + 1) * config.batch_size] # sum over the losses of the objective we optimize. We will optimize this sum (either minimize or maximize) # sum makes the learning rate independent of batch size! if hasattr(config, "dont_sum_losses") and config.dont_sum_losses: train_loss_theano = T.mean(train_losses_theano["objective"]) else: train_loss_theano = T.sum(train_losses_theano["objective"]) * ( -1 if objectives["train"]["objective"].optimize == MAXIMIZE else 1) # build the update step for Theano updates = config.build_updates(train_loss_theano, all_params, learning_rate) if hasattr(config, "print_gradnorm") and config.print_gradnorm: all_grads = theano.grad(train_loss_theano, all_params, disconnected_inputs='warn') grad_norm = T.sqrt(T.sum([(g**2).sum() for g in all_grads]) + 1e-9) grad_norm.name = "grad_norm" theano_printer.print_me_this(" grad norm", grad_norm) # train_losses_theano["grad_norm"] = grad_norm # Compile the Theano function of your model+objective print "Compiling..." iter_train = theano.function( [idx], train_losses_theano.values() + theano_printer.get_the_stuff_to_print(), givens=givens, on_unused_input="ignore", updates=updates, # mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True) ) if hasattr(config, "print_gradnorm") and config.print_gradnorm: del theano_printer._stuff_to_print[-1] # For validation, we also like to have something which returns the output of our model without the objective network_outputs = [ lasagne.layers.helper.get_output(network_output_layer, deterministic=True) for network_output_layer in output_layers.values() ] iter_predict = theano.function([idx], network_outputs + theano_printer.get_the_stuff_to_print(), givens=givens, on_unused_input="ignore") # The data loader will need to know which kinds of data it actually needs to load # collect all the necessary tags for the model. required_input = { key: l_in.output_shape for (key, l_in) in input_layers.iteritems() } required_output = { key: None # size is not needed for (_, ob) in itertools.chain(objectives["train"].iteritems(), objectives["validate"].iteritems()) for (key, target_var) in ob.target_vars.iteritems() } # The data loaders need to prepare before they should start # This is usually where the data is loaded from disk onto memory print "Preparing dataloaders" config.training_data.prepare() for validation_data in config.validation_data.values(): validation_data.prepare() print "Will train for %s epochs" % config.training_data.epochs # If this is the second time we run this configuration, we might need to load the results of the previous # optimization. Check if this is the case, and load the parameters and stuff. If not, start from zero. if config.restart_from_save and os.path.isfile(metadata_path): print "Load model parameters for resuming" resume_metadata = np.load(metadata_path) lasagne.layers.set_all_param_values(top_layer, resume_metadata['param_values']) start_chunk_idx = resume_metadata['chunks_since_start'] + 1 # set lr to the correct value current_lr = np.float32( utils.current_learning_rate(learning_rate_schedule, start_chunk_idx)) print " setting learning rate to %.7f" % current_lr learning_rate.set_value(current_lr) losses = resume_metadata['losses'] config.training_data.skip_first_chunks(start_chunk_idx) else: start_chunk_idx = 0 losses = dict() losses[TRAINING] = dict() losses[VALIDATION] = dict() for loss_name in train_losses_theano.keys(): losses[TRAINING][loss_name] = list() for dataset_name in config.validation_data.keys(): losses[VALIDATION][dataset_name] = dict() for loss_name in validate_losses_theano.keys(): losses[VALIDATION][dataset_name][loss_name] = list() # Make a data generator which returns preprocessed chunks of data which are fed to the model # Note that this is a generator object! It is a special kind of iterator. chunk_size = config.batches_per_chunk * config.batch_size # Weight normalization if hasattr(config, "init_weight_norm") and not config.restart_from_save: theano_printer._stuff_to_print = [] from theano_utils.weight_norm import train_weight_norm train_weight_norm(config, output_layers, all_layers, idx, givens, xs_shared, chunk_size, required_input, required_output) training_data_generator = buffering.buffered_gen_threaded( config.training_data.generate_batch( chunk_size=chunk_size, required_input=required_input, required_output=required_output, )) # Estimate the number of batches we will train for. chunks_train_idcs = itertools.count(start_chunk_idx) if config.training_data.epochs: num_chunks_train = int(1.0 * config.training_data.epochs * config.training_data.number_of_samples / (config.batch_size * config.batches_per_chunk)) else: num_chunks_train = None # Start the timer objects start_time, prev_time = None, None print "Loading first chunks" data_load_time = Timer() gpu_time = Timer() #========================# # This is the train loop # #========================# data_load_time.start() for e, train_data in izip(chunks_train_idcs, training_data_generator): data_load_time.stop() if start_time is None: start_time = time.time() prev_time = start_time print if num_chunks_train: print "Chunk %d/%d" % (e + 1, num_chunks_train) else: print "Chunk %d" % (e + 1) print "==============" print " %s" % config.__name__ # Estimate the current epoch we are at epoch = (1.0 * config.batch_size * config.batches_per_chunk * (e + 1) / config.training_data.number_of_samples) if epoch >= 0.1: print " Epoch %.1f/%s" % (epoch, str(config.training_data.epochs)) else: print " Epoch %.0e/%s" % (epoch, str(config.training_data.epochs)) # for debugging the data loader, it might be useful to dump everything it loaded and analyze it. if config.dump_network_loaded_data: pickle.dump(train_data, open("data_loader_dump_train_%d.pkl" % e, "wb")) # Update the learning rate with the new epoch the number for key, rate in learning_rate_schedule.iteritems(): if epoch >= key: lr = np.float32(rate) learning_rate.set_value(lr) print " learning rate %.0e" % lr # Move this data from the data loader onto the Theano variables for key in xs_shared: xs_shared[key].set_value(train_data["input"][key]) for key in ys_shared: if key not in train_data["output"]: raise Exception( "You forgot to add key %s to OUTPUT_DATA_SIZE_TYPE in your data loader" % key) ys_shared[key].set_value(train_data["output"][key]) # loop over all the batches in one chunk, and keep the losses chunk_losses = np.zeros((len(train_losses_theano), 0)) for b in xrange(config.batches_per_chunk): gpu_time.start() th_result = iter_train(b) gpu_time.stop() resulting_losses = np.stack(th_result[:len(train_losses_theano)], axis=0) # these are not needed anyway, just to make Theano call the print function # stuff_to_print = th_result[-len(theano_printer.get_the_stuff_to_print()):] # print resulting_losses.shape, chunk_losses.shape chunk_losses = np.concatenate((chunk_losses, resulting_losses), axis=1) # check if we found NaN's. When there are NaN's we might as well exit. utils.detect_nans(chunk_losses, xs_shared, ys_shared, all_params) # Average our losses, and print them. mean_train_loss = np.mean(chunk_losses, axis=1) for loss_name, loss in zip(train_losses_theano.keys(), mean_train_loss): losses[TRAINING][loss_name].append(loss) print string.rjust(loss_name + ":", 15), "%.6f" % loss # Now, we will do validation. We do this about every config.epochs_per_validation epochs. # We also always validate at the end of every training! validate_every = max( int((config.epochs_per_validation * config.training_data.number_of_samples) / (config.batch_size * config.batches_per_chunk)), 1) if ((e + 1) % validate_every) == 0 or (num_chunks_train and e + 1 >= num_chunks_train): print print " Validating " # We might test on multiple datasets, such as the Train set, Validation set, ... for dataset_name, dataset_generator in config.validation_data.iteritems( ): # Start loading the validation data! validation_chunk_generator = dataset_generator.generate_batch( chunk_size=chunk_size, required_input=required_input, required_output=required_output, ) print " %s (%d/%d samples)" % ( dataset_name, dataset_generator.number_of_samples_in_iterator, dataset_generator.number_of_samples) print " -----------------------" # If there are no validation samples, don't bother validating. if dataset_generator.number_of_samples == 0: continue validation_predictions = None # Keep the labels of the validation data for later. output_keys_to_store = set() losses_to_store = dict() for key, ob in objectives["validate"].iteritems(): if ob.mean_over_samples: losses_to_store[key] = None else: output_keys_to_store.add(ob.target_key) chunk_labels = {k: None for k in output_keys_to_store} store_network_output = (len(output_keys_to_store) > 0) # loop over all validation data chunks data_load_time.start() for validation_data in buffering.buffered_gen_threaded( validation_chunk_generator): data_load_time.stop() num_batches_chunk_eval = config.batches_per_chunk # set the validation data to the required Theano variables. Note, there is no # use setting the output variables, as we do not have labels of the validation set! for key in xs_shared: xs_shared[key].set_value(validation_data["input"][key]) # store all the output keys required for finding the validation error for key in output_keys_to_store: new_data = validation_data["output"][ key][:validation_data["valid_samples"]] if chunk_labels[key] is None: chunk_labels[key] = new_data else: chunk_labels[key] = np.concatenate( (chunk_labels[key], new_data), axis=0) # loop over the batches of one chunk, and keep the predictions chunk_predictions = None for b in xrange(num_batches_chunk_eval): gpu_time.start() th_result = iter_predict(b) gpu_time.stop() resulting_predictions = np.stack( th_result[:len(network_outputs)], axis=0) assert len( network_outputs ) == 1, "Multiple outputs not implemented yet" if chunk_predictions is None: chunk_predictions = resulting_predictions else: chunk_predictions = np.concatenate( (chunk_predictions, resulting_predictions), axis=1) # Check for NaN's. Panic if there are NaN's during validation. utils.detect_nans(chunk_predictions, xs_shared, ys_shared, all_params) # add the predictions of this chunk, to the global predictions (if needed) if chunk_predictions is not None: chunk_predictions = chunk_predictions[:validation_data[ VALID_SAMPLES]] if store_network_output: if validation_predictions is None: validation_predictions = chunk_predictions else: validation_predictions = np.concatenate( (validation_predictions, chunk_predictions), axis=1) # if you can calculate the losses per chunk, and take the mean afterwards, do that. for key, ob in objectives["validate"].iteritems(): if ob.mean_over_samples: new_losses = [] for i in xrange(validation_data[VALID_SAMPLES]): loss = ob.get_loss_from_lists( chunk_predictions[0, i:i + 1], validation_data["output"][ ob.target_key][i:i + 1]) new_losses.append(loss) new_losses = np.array(new_losses) if losses_to_store[key] is None: losses_to_store[key] = new_losses else: losses_to_store[key] = np.concatenate( (losses_to_store[key], new_losses), axis=0) data_load_time.start() data_load_time.stop() # Compare the predictions with the actual labels and print them. for key, ob in objectives["validate"].iteritems(): if ob.mean_over_samples: loss = np.mean(losses_to_store[key]) else: loss = ob.get_loss_from_lists( validation_predictions[0, :], chunk_labels[ob.target_key]) losses[VALIDATION][dataset_name][key].append(loss) print string.rjust(key + ":", 17), "%.6f" % loss print # Good, we did one chunk. Let us check how much time this took us. Print out some stats. now = time.time() time_since_start = now - start_time time_since_prev = now - prev_time prev_time = now # This is the most useful stat of all! Keep this number low, and your total optimization time will be low too. print " on average %dms per training sample" % ( 1000. * time_since_start / ((e + 1 - start_chunk_idx) * config.batch_size * config.batches_per_chunk)) print " %s since start (+%.2f s)" % (utils.hms(time_since_start), time_since_prev) print " %s waiting on gpu vs %s waiting for data" % (gpu_time, data_load_time) try: if num_chunks_train: # only if we ever stop running est_time_left = time_since_start * ( float(num_chunks_train - (e + 1 - start_chunk_idx)) / float(e + 1 - start_chunk_idx)) eta = datetime.datetime.now() + datetime.timedelta( seconds=est_time_left) eta_str = eta.strftime("%c") print " estimated %s to go" % utils.hms(est_time_left) print " (ETA: %s)" % eta_str if hasattr(config, "print_mean_chunks"): avg_train = losses[TRAINING]["objective"] n = min(len(avg_train), config.print_mean_chunks) avg_train = avg_train[-n:] print " mean loss last %i chunks: %.3f" % ( n, np.mean(avg_train)) except OverflowError: # Shit happens print " This will take really long, like REALLY long." if hasattr(config, "print_score_every_chunk") and config.print_score_every_chunk\ and len(losses[VALIDATION]["training set"]["objective"]) > 0: print " train: best %.3f latest %.3f, valid: best %.3f latest %.3f " % ( np.min(losses[VALIDATION]["training set"]["objective"]), losses[VALIDATION]["training set"]["objective"][-1], np.min(losses[VALIDATION]["validation set"]["objective"]), losses[VALIDATION]["validation set"]["objective"][-1]) # Save the data every config.save_every_chunks chunks. Or at the end of the training. # We should make it config.save_every_epochs epochs sometimes. Consistency if ((e + 1) % config.save_every_chunks) == 0 or ( num_chunks_train and e + 1 >= num_chunks_train): print print "Saving metadata, parameters" with open(metadata_path, 'w') as f: pickle.dump( { 'metadata_path': metadata_path, 'configuration_file': config.__name__, 'git_revision_hash': utils.get_git_revision_hash(), 'experiment_id': expid, 'chunks_since_start': e, 'losses': losses, 'time_since_start': time_since_start, 'param_values': lasagne.layers.get_all_param_values(top_layer) }, f, pickle.HIGHEST_PROTOCOL) print " saved to %s" % metadata_path print # reset the timers for next round. This needs to happen here, because at the end of the big for loop # we already want te get a chunk immediately for the next loop. The iterator is an argument of the for loop. gpu_time.reset() data_load_time.reset() data_load_time.start() return
#!/usr/bin/env python import sys import os sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../../uegpy'))) import ueg_sys as ue import finite as fp import utils as ut import monte_carlo as mc time = float(sys.argv[1]) beta = float(sys.argv[2]) ne = float(sys.argv[3]) cutoff = ut.kinetic_cutoff(ne, 1.0/beta) system = ue.System(0.1, ne, cutoff, 2) t_per_it = mc.sample_canonical_energy(system, beta/system.ef, 10)[1] / 10 time = 0.9 * time iterations = min(int(time/t_per_it), 1e6) (frame, time) = mc.sample_canonical_energy(system, beta/system.ef, iterations) print ("# Running uegpy version: %s"%(ut.get_git_revision_hash())) print ("# Time taken: %s s"%time) print frame.to_string(index=False)
utils.save_pkl(avg_patient_predictions, test_prediction_path) print "\npredictions saved to %s" % test_prediction_path # utils.save_submission(avg_patient_predictions, submission_path) # print ' submission saved to %s' % submission_path try: with open(jonas_prediction_path, "w") as f: pickle.dump( { "metadata_path": metadata_path, "prediction_path": test_prediction_path, "submission_path": submission_path, "configuration_file": config().__name__, "git_revision_hash": utils.get_git_revision_hash(), "predictions": predictions, }, f, pickle.HIGHEST_PROTOCOL, ) except: with open("ira_%s.pkl" % config().__name__, "w") as f: pickle.dump( { "metadata_path": metadata_path, "prediction_path": test_prediction_path, "submission_path": submission_path, "configuration_file": config().__name__, "git_revision_hash": utils.get_git_revision_hash(), "predictions": predictions,
def predict_model(expid, mfile=None): metadata_path = MODEL_PATH + "%s.pkl" % (expid if not mfile else mfile) prediction_path = INTERMEDIATE_PREDICTIONS_PATH + "%s.pkl" % expid submission_path = SUBMISSION_PATH + "%s.csv" % expid if theano.config.optimizer != "fast_run": print "WARNING: not running in fast mode!" print "Using" print " %s" % metadata_path print "To generate" print " %s" % prediction_path print " %s" % submission_path print "Build model" interface_layers = config().build_model() output_layers = interface_layers["outputs"] input_layers = interface_layers["inputs"] top_layer = lasagne.layers.MergeLayer( incomings=output_layers.values() ) all_layers = lasagne.layers.get_all_layers(top_layer) num_params = lasagne.layers.count_params(top_layer) print " number of parameters: %d" % num_params print string.ljust(" layer output shapes:",36), print string.ljust("#params:",10), print "output shape:" for layer in all_layers[:-1]: name = string.ljust(layer.__class__.__name__, 32) num_param = sum([np.prod(p.get_value().shape) for p in layer.get_params()]) num_param = string.ljust(num_param.__str__(), 10) print " %s %s %s" % (name, num_param, layer.output_shape) xs_shared = { key: lasagne.utils.shared_empty(dim=len(l_in.output_shape), dtype='float32') for (key, l_in) in input_layers.iteritems() } idx = T.lscalar('idx') givens = dict() for key in input_layers.keys(): if key=="sunny": givens[input_layers[key].input_var] = xs_shared[key][idx*config().sunny_batch_size:(idx+1)*config().sunny_batch_size] else: givens[input_layers[key].input_var] = xs_shared[key][idx*config().batch_size:(idx+1)*config().batch_size] network_outputs = [ lasagne.layers.helper.get_output(network_output_layer, deterministic=True) for network_output_layer in output_layers.values() ] iter_test = theano.function([idx], network_outputs + theano_printer.get_the_stuff_to_print(), givens=givens, on_unused_input="ignore", # mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True) ) print "Load model parameters for resuming" resume_metadata = np.load(metadata_path) lasagne.layers.set_all_param_values(top_layer, resume_metadata['param_values']) num_batches_chunk = config().batches_per_chunk num_batches = get_number_of_test_batches() num_chunks = int(np.ceil(num_batches / float(config().batches_per_chunk))) chunks_train_idcs = range(1, num_chunks+1) data_loader.filter_patient_folders() create_test_gen = partial(config().create_test_gen, required_input_keys = xs_shared.keys(), required_output_keys = ["patients", "classification_correction_function"], ) print "Generate predictions with this model" start_time = time.time() prev_time = start_time predictions = [{"patient": i+1, "systole": np.zeros((0,600)), "diastole": np.zeros((0,600)) } for i in xrange(NUM_PATIENTS)] for e, test_data in izip(itertools.count(start=1), buffering.buffered_gen_threaded(create_test_gen())): print " load testing data onto GPU" for key in xs_shared: xs_shared[key].set_value(test_data["input"][key]) patient_ids = test_data["output"]["patients"] classification_correction = test_data["output"]["classification_correction_function"] print " patients:", " ".join(map(str, patient_ids)) print " chunk %d/%d" % (e, num_chunks) for b in xrange(num_batches_chunk): iter_result = iter_test(b) network_outputs = tuple(iter_result[:len(output_layers)]) network_outputs_dict = {output_layers.keys()[i]: network_outputs[i] for i in xrange(len(output_layers))} kaggle_systoles, kaggle_diastoles = config().postprocess(network_outputs_dict) kaggle_systoles, kaggle_diastoles = kaggle_systoles.astype('float64'), kaggle_diastoles.astype('float64') for idx, patient_id in enumerate(patient_ids[b*config().batch_size:(b+1)*config().batch_size]): if patient_id != 0: index = patient_id-1 patient_data = predictions[index] assert patient_id==patient_data["patient"] kaggle_systole = kaggle_systoles[idx:idx+1,:] kaggle_diastole = kaggle_diastoles[idx:idx+1,:] assert np.isfinite(kaggle_systole).all() and np.isfinite(kaggle_systole).all() kaggle_systole = classification_correction[b*config().batch_size + idx](kaggle_systole) kaggle_diastole = classification_correction[b*config().batch_size + idx](kaggle_diastole) assert np.isfinite(kaggle_systole).all() and np.isfinite(kaggle_systole).all() patient_data["systole"] = np.concatenate((patient_data["systole"], kaggle_systole ),axis=0) patient_data["diastole"] = np.concatenate((patient_data["diastole"], kaggle_diastole ),axis=0) now = time.time() time_since_start = now - start_time time_since_prev = now - prev_time prev_time = now est_time_left = time_since_start * (float(num_chunks - (e + 1)) / float(e + 1 - chunks_train_idcs[0])) eta = datetime.now() + timedelta(seconds=est_time_left) eta_str = eta.strftime("%c") print " %s since start (%.2f s)" % (utils.hms(time_since_start), time_since_prev) print " estimated %s to go (ETA: %s)" % (utils.hms(est_time_left), eta_str) print already_printed = False for prediction in predictions: if prediction["systole"].size>0 and prediction["diastole"].size>0: average_method = getattr(config(), 'tta_average_method', partial(np.mean, axis=0)) prediction["systole_average"] = average_method(prediction["systole"]) prediction["diastole_average"] = average_method(prediction["diastole"]) try: test_if_valid_distribution(prediction["systole_average"]) test_if_valid_distribution(prediction["diastole_average"]) except: if not already_printed: print "WARNING: These distributions are not distributions" already_printed = True prediction["systole_average"] = make_monotone_distribution(prediction["systole_average"]) prediction["diastole_average"] = make_monotone_distribution(prediction["diastole_average"]) test_if_valid_distribution(prediction["systole_average"]) test_if_valid_distribution(prediction["diastole_average"]) print "Calculating training and validation set scores for reference" validation_dict = {} for patient_ids, set_name in [(validation_patients_indices, "validation"), (train_patients_indices, "train")]: errors = [] for patient in patient_ids: prediction = predictions[patient-1] if "systole_average" in prediction: assert patient == regular_labels[patient-1, 0] error = CRSP(prediction["systole_average"], regular_labels[patient-1, 1]) errors.append(error) error = CRSP(prediction["diastole_average"], regular_labels[patient-1, 2]) errors.append(error) if len(errors)>0: errors = np.array(errors) estimated_CRSP = np.mean(errors) print " %s kaggle loss: %f" % (string.rjust(set_name, 12), estimated_CRSP) validation_dict[set_name] = estimated_CRSP else: print " %s kaggle loss: not calculated" % (string.rjust(set_name, 12)) print "dumping prediction file to %s" % prediction_path with open(prediction_path, 'w') as f: pickle.dump({ 'metadata_path': metadata_path, 'prediction_path': prediction_path, 'submission_path': submission_path, 'configuration_file': config().__name__, 'git_revision_hash': utils.get_git_revision_hash(), 'experiment_id': expid, 'time_since_start': time_since_start, 'param_values': lasagne.layers.get_all_param_values(top_layer), 'predictions': predictions, 'validation_errors': validation_dict, }, f, pickle.HIGHEST_PROTOCOL) print "prediction file dumped" print "dumping submission file to %s" % submission_path with open(submission_path, 'w') as csvfile: csvwriter = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL) csvwriter.writerow(['Id'] + ['P%d'%i for i in xrange(600)]) for prediction in predictions: # the submission only has patients 501 to 700 if prediction["patient"] in data_loader.test_patients_indices: if "diastole_average" not in prediction or "systole_average" not in prediction: raise Exception("Not all test-set patients were predicted") csvwriter.writerow(["%d_Diastole" % prediction["patient"]] + ["%.18f" % p for p in prediction["diastole_average"].flatten()]) csvwriter.writerow(["%d_Systole" % prediction["patient"]] + ["%.18f" % p for p in prediction["systole_average"].flatten()]) print "submission file dumped" return
def main(args): "Put all the pieces together" if args.dump_per_instance_results: args.dump = True if args.dump: args.disable_tqdm = True if len(args.logfile.name) == 0: basename_fusion = [ str(i.with_suffix('').with_name(i.stem)) for i in args.snapshot ] args.logfile = Path('-'.join(basename_fusion) + '_corpus-eval') if args.logfile.exists(): raise ValueError( f'{args.logfile} already exists. Please provide a logfile or' 'backup existing results.') setup_logging(args) logging.info('Corpus Retrieval Evaluation for CAL/MCN') logging.info(f'Git revision hash: {get_git_revision_hash()}') load_hyperparameters(args) logging.info(args) engine_prm = {} if args.arch == 'MCN': args.dataset = 'UntrimmedMCN' args.engine = 'MomentRetrievalFromProposalsTable' elif args.arch == 'SMCN': args.dataset = 'UntrimmedSMCN' args.engine = 'MomentRetrievalFromClipBasedProposalsTable' else: ValueError('Unknown/unsupported architecture') logging.info('Loading dataset') dataset_novisual = True dataset_cues = {feat: None for feat in args.tags} if args.h5_path: for i, key in enumerate(args.tags): dataset_cues[key] = {'file': args.h5_path[i]} dataset_novisual = False clip_length = None else: clip_length = args.clip_length proposals_interface = proposals.__dict__[args.proposal_interface]( args.min_length, args.scales, args.stride) dataset_setup = dict(json_file=args.test_list, cues=dataset_cues, loc=args.loc, context=args.context, debug=args.debug, eval=True, no_visual=dataset_novisual, proposals_interface=proposals_interface, clip_length=clip_length) dataset = dataset_untrimmed.__dict__[args.dataset](**dataset_setup) if args.arch == 'SMCN': logging.info('Set padding on UntrimmedSMCN dataset') dataset.set_padding(False) logging.info('Setting up models') models_dict = {} for i, key in enumerate(args.snapshot_tags): arch_setup = dict( visual_size=dataset.visual_size[key], lang_size=dataset.language_size, max_length=dataset.max_words, embedding_size=args.embedding_size, visual_hidden=args.visual_hidden, lang_hidden=args.lang_hidden, visual_layers=args.visual_layers, ) models_dict[key] = model.__dict__[args.arch](**arch_setup) filename = args.snapshot[i].with_suffix('.pth.tar') snapshot_ = torch.load(filename, map_location=lambda storage, loc: storage) models_dict[key].load_state_dict(snapshot_['state_dict']) models_dict[key].eval() logging.info('Creating database alas indexing corpus') engine = corpus.__dict__[args.engine](dataset, models_dict, **engine_prm) engine.indexing() logging.info('Launch evaluation...') # log-scale up to the end of the database if len(args.topk) == 1 and args.topk[0] == 0: exp = int(np.floor(np.log10(engine.num_moments))) args.topk = [10**i for i in range(0, exp + 1)] args.topk.append(engine.num_moments) num_instances_retrieved = [] judge = CorpusVideoMomentRetrievalEval(topk=args.topk) args.n_display = max(int(args.n_display * len(dataset.metadata)), 1) for it, query_metadata in tqdm(enumerate(dataset.metadata), disable=args.disable_tqdm): result_per_query = engine.query( query_metadata['language_input'], return_indices=args.dump_per_instance_results) if args.dump_per_instance_results: vid_indices, segments, proposals_ind = result_per_query else: vid_indices, segments = result_per_query judge.add_single_predicted_moment_info(query_metadata, vid_indices, segments, max_rank=engine.num_moments) num_instances_retrieved.append(len(vid_indices)) if args.disable_tqdm and (it + 1) % args.n_display == 0: logging.info(f'Processed queries [{it}/{len(dataset.metadata)}]') if args.dump_per_instance_results: # TODO: wrap-up this inside a class. We could even dump in a # non-blocking thread using a Queue if it == 0: filename = args.logfile.with_suffix('.h5') fid = h5py.File(filename, 'x') if args.reduced_dump: fid_vi = fid.create_dataset(name='vid_indices', chunks=True, shape=(len(dataset), dataset.num_videos), dtype='int64') else: fid.create_dataset(name='proposals', data=engine.proposals, chunks=True) fid_vi = fid.create_dataset(name='vid_indices', chunks=True, shape=(len(dataset), ) + vid_indices.shape, dtype='int64') fid_pi = fid.create_dataset(name='proposals_ind', chunks=True, shape=(len(dataset), ) + proposals_ind.shape, dtype='int64') if args.reduced_dump: fid_vi[it, ...] = pd.unique(vid_indices.numpy()) else: fid_vi[it, ...] = vid_indices fid_pi[it, ...] = proposals_ind if args.dump_per_instance_results: fid.close() logging.info('Summarizing results') num_instances_retrieved = np.array(num_instances_retrieved) logging.info(f'Number of queries: {len(judge.map_query)}') logging.info(f'Number of proposals: {engine.num_moments}') retrieved_proposals_median = int(np.median(num_instances_retrieved)) retrieved_proposals_min = int(num_instances_retrieved.min()) if (num_instances_retrieved != engine.num_moments).any(): logging.info('Triggered approximate search') logging.info('Median numbers of retrieved proposals: ' f'{retrieved_proposals_median:d}') logging.info('Min numbers of retrieved proposals: ' f'{retrieved_proposals_min:d}') result = judge.evaluate() _ = [logging.info(f'{k}: {v}') for k, v in result.items()] if args.dump: filename = args.logfile.with_suffix('.json') logging.info(f'Dumping results into: {filename}') with open(filename, 'x') as fid: for key, value in result.items(): result[key] = float(value) result['snapshot'] = [str(i) for i in args.snapshot] result['corpus'] = str(args.test_list) result['topk'] = args.topk result['iou_threshold'] = judge.iou_thresholds result['median_proposals_retrieved'] = retrieved_proposals_median result['min_proposals_retrieved'] = retrieved_proposals_min result['date'] = datetime.now().isoformat() result['git_hash'] = get_git_revision_hash() json.dump(result, fid, indent=1)
def main(args): "Put all the pieces together" if args.dump: args.disable_tqdm = True if len(args.logfile.name) == 0: basename = args.snapshot[0].with_suffix('') args.logfile = basename.parent.joinpath( args.output_prefix, basename.stem + '_corpus-2nd-eval') if not args.logfile.parent.exists(): args.logfile.parent.mkdir() if args.logfile.exists(): raise ValueError( f'{args.logfile} already exists. Please provide a logfile or' 'backup existing results.') setup_logging(args) logging.info('Corpus Retrieval Evaluation for 2nd Stage') load_hyperparameters(args) logging.info(args) if args.arch == 'MCN': args.dataset = 'UntrimmedMCN' elif args.arch == 'SMCN': args.dataset = 'UntrimmedSMCN' else: ValueError('Unknown/unsupported architecture') logging.info('Loading dataset') if args.h5_path.exists(): dataset_novisual = False dataset_cues = {args.feat: {'file': args.h5_path}} else: raise NotImplementedError('WIP') proposals_interface = proposals.__dict__[args.proposal_interface]( args.min_length, args.scales, args.stride) dataset_setup = dict( json_file=args.test_list, cues=dataset_cues, loc=args.loc, context=args.context, debug=args.debug, eval=True, no_visual=dataset_novisual, proposals_interface=proposals_interface ) dataset = dataset_untrimmed.__dict__[args.dataset](**dataset_setup) logging.info('Setting up models') arch_setup = dict( visual_size=dataset.visual_size[args.feat], lang_size=dataset.language_size, max_length=dataset.max_words, embedding_size=args.embedding_size, visual_hidden=args.visual_hidden, lang_hidden=args.lang_hidden, visual_layers=args.visual_layers, bi_lstm=args.bi_lstm, lang_dropout=args.lang_dropout ) net = model.__dict__[args.arch](**arch_setup) model_param = setup_snapshot(args.snapshot) net.load_state_dict(model_param['state_dict']) net.eval() logging.info('Setting up engine') engine = setup_engine(args, dataset, net) logging.info('Launch evaluation...') # log-scale up to the end of the database if len(args.topk) == 1 and args.topk[0] == 0: exp = int(np.floor(np.log10(engine.num_moments))) args.topk = [10**i for i in range(0, exp + 1)] args.topk.append(engine.num_moments) num_instances_retrieved = [] judge = CorpusVideoMomentRetrievalEval(topk=args.topk) args.n_display = max(int(args.n_display * len(dataset.metadata)), 1) for it, query_metadata in tqdm(enumerate(dataset.metadata), disable=args.disable_tqdm): vid_indices, segments = engine.query( query_metadata['language_input'], description_ind=it) judge.add_single_predicted_moment_info( query_metadata, vid_indices, segments, max_rank=engine.num_moments) num_instances_retrieved.append(len(vid_indices)) if args.disable_tqdm and (it + 1) % args.n_display == 0: logging.info(f'Processed queries [{it}/{len(dataset.metadata)}]') logging.info('Summarizing results') num_instances_retrieved = np.array(num_instances_retrieved) logging.info(f'Number of queries: {len(judge.map_query)}') logging.info(f'Number of proposals: {engine.num_moments}') retrieved_proposals_median = int(np.median(num_instances_retrieved)) retrieved_proposals_min = int(num_instances_retrieved.min()) if (num_instances_retrieved != engine.num_moments).any(): logging.info('Triggered approximate search') logging.info('Median numbers of retrieved proposals: ' f'{retrieved_proposals_median:d}') logging.info('Min numbers of retrieved proposals: ' f'{retrieved_proposals_min:d}') result = judge.evaluate() _ = [logging.info(f'{k}: {v}') for k, v in result.items()] if args.dump: filename = args.logfile.with_suffix('.json') logging.info(f'Dumping results into: {filename}') with open(filename, 'x') as fid: for key, value in result.items(): result[key] = float(value) result['snapshot'] = [str(i) for i in args.snapshot] result['corpus'] = str(args.test_list) result['h5_path'] = str(args.h5_path) result['h5_1ststage'] = str(args.h5_1ststage) result['snapshot_1ststage'] = str(args.snapshot_1ststage) result['topk'] = args.topk result['iou_threshold'] = judge.iou_thresholds result['k_first'] = args.k_first result['median_proposals_retrieved'] = retrieved_proposals_median result['min_proposals_retrieved'] = retrieved_proposals_min result['nms_threshold'] = args.nms_threshold result['corpus_setup'] = args.corpus_setup result['date'] = datetime.now().isoformat() result['git_hash'] = get_git_revision_hash() json.dump(result, fid, indent=1, sort_keys=True)
) train_data["intermediates"] = iter_train(0) pickle.dump(train_data, open(metadata_path + "-dump", "wb")) return if __name__ == "__main__": parser = argparse.ArgumentParser(description=__doc__) required = parser.add_argument_group('required arguments') required.add_argument('-c', '--config', help='configuration to run', required=True) args = parser.parse_args() set_configuration(args.config) expid = utils.generate_expid(args.config) log_file = LOGS_PATH + "%s.log" % expid with print_to_file(log_file): print "Running configuration:", config().__name__ print "Current git version:", utils.get_git_revision_hash() train_model(expid) print "log saved to '%s'" % log_file predict_model(expid) print "log saved to '%s'" % log_file
def main(): # Parameter # params = dict( size_open= 3, # size of the opening structure element (opening is erosion followed by dilation) size_close= 2 # size of the closing structure element (closing is dilation followed by erosion) ) ############# parser = argparse.ArgumentParser() parser.add_argument("--input_path", default="./Input/demo", help="Path to the folder containing the input images.") parser.add_argument( "--output_path", default="./Output/demo", help="Path to the folder which will contain the output.") parser.add_argument( "--param_file", default="", help= "Name of a parameter file in the input folder. Will be used to override the local param dictionary." ) args = parser.parse_args() # Preparation time_stamp = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S') current_file = os.path.splitext(os.path.basename(__file__))[0] input_path = args.input_path output_path = os.path.join(args.output_path, current_file + "_" + time_stamp) if not os.path.exists(output_path): os.makedirs(output_path) # set up logging logging.basicConfig(filename=os.path.join(output_path, current_file + '.log'), level=logging.DEBUG, format='%(asctime)s - %(levelname)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S') logging.info("Current git revision: {}".format( utils.get_git_revision_hash())) # override parameter if external ones are given param_path = os.path.join(input_path, args.param_file) if os.path.isfile(param_path): with open(param_path, "r") as param_file: params = json.load(param_file) logging.info("Using parameter given in {}".format(param_path)) else: logging.info("Using local parameter") # dump used parameter with open(os.path.join(output_path, 'params.json'), 'w') as f: json.dump( params, f, sort_keys=True, indent=4, ) f.write('\n') print("Start processing...") counter = 0 # Loop through all images in input path for root, dirs, files in os.walk(input_path): for input_name in files: start = time.time() input_name_base = os.path.splitext(os.path.basename(input_name))[0] img_original = cv2.imread(os.path.join(input_path, input_name)) if img_original is None: # reading failed (e.g. file is not an image) continue img = cv2.cvtColor(img_original, cv2.COLOR_BGR2GRAY) img = utils.prepare_for_morph_filter(img) img = utils.morph_denoise(img, **params) img = utils.restore_after_morph_filter(img) cv2.imwrite(os.path.join(output_path, input_name), img_original) cv2.imwrite( os.path.join(output_path, input_name_base + "_processed.tiff"), img) duration = time.time() - start logging.info("Processed {0} (Duration: {1:.3f} s)".format( input_name, duration)) counter += 1 logging.info("Processed {} images in total".format(counter)) print("Processing done. See log file in '{}' for more details".format( output_path))
def train_model(expid): metadata_path = MODEL_PATH + "%s.pkl" % expid if theano.config.optimizer != "fast_run": print "WARNING: not running in fast mode!" data_loader.filter_patient_folders() print "Build model" interface_layers = config().build_model() output_layers = interface_layers["outputs"] input_layers = interface_layers["inputs"] top_layer = lasagne.layers.MergeLayer( incomings=output_layers.values() ) all_layers = lasagne.layers.get_all_layers(top_layer) all_params = lasagne.layers.get_all_params(top_layer, trainable=True) if "cutoff_gradients" in interface_layers: submodel_params = [param for value in interface_layers["cutoff_gradients"] for param in lasagne.layers.get_all_params(value)] all_params = [p for p in all_params if p not in submodel_params] if "pretrained" in interface_layers: for config_name, layers_dict in interface_layers["pretrained"].iteritems(): pretrained_metadata_path = MODEL_PATH + "%s.pkl" % config_name.split('.')[1] pretrained_resume_metadata = np.load(pretrained_metadata_path) pretrained_top_layer = lasagne.layers.MergeLayer( incomings = layers_dict.values() ) lasagne.layers.set_all_param_values(pretrained_top_layer, pretrained_resume_metadata['param_values']) num_params = sum([np.prod(p.get_value().shape) for p in all_params]) print string.ljust(" layer output shapes:",36), print string.ljust("#params:",10), print string.ljust("#data:",10), print "output shape:" for layer in all_layers[:-1]: name = string.ljust(layer.__class__.__name__, 32) num_param = sum([np.prod(p.get_value().shape) for p in layer.get_params()]) num_param = string.ljust(int(num_param).__str__(), 10) num_size = string.ljust(np.prod(layer.output_shape[1:]).__str__(), 10) print " %s %s %s %s" % (name, num_param, num_size, layer.output_shape) print " number of parameters: %d" % num_params obj = config().build_objective(interface_layers) train_loss_theano = obj.get_loss() kaggle_loss_theano = obj.get_kaggle_loss() segmentation_loss_theano = obj.get_segmentation_loss() validation_other_losses = collections.OrderedDict() validation_train_loss = obj.get_loss(average=False, deterministic=True, validation=True, other_losses=validation_other_losses) validation_kaggle_loss = obj.get_kaggle_loss(average=False, deterministic=True, validation=True) validation_segmentation_loss = obj.get_segmentation_loss(average=False, deterministic=True, validation=True) xs_shared = { key: lasagne.utils.shared_empty(dim=len(l_in.output_shape), dtype='float32') for (key, l_in) in input_layers.iteritems() } # contains target_vars of the objective! Not the output layers desired values! # There can be more output layers than are strictly required for the objective # e.g. for debugging ys_shared = { key: lasagne.utils.shared_empty(dim=target_var.ndim, dtype='float32') for (key, target_var) in obj.target_vars.iteritems() } learning_rate_schedule = config().learning_rate_schedule learning_rate = theano.shared(np.float32(learning_rate_schedule[0])) idx = T.lscalar('idx') givens = dict() for key in obj.target_vars.keys(): if key=="segmentation": givens[obj.target_vars[key]] = ys_shared[key][idx*config().sunny_batch_size : (idx+1)*config().sunny_batch_size] else: givens[obj.target_vars[key]] = ys_shared[key][idx*config().batch_size : (idx+1)*config().batch_size] for key in input_layers.keys(): if key=="sunny": givens[input_layers[key].input_var] = xs_shared[key][idx*config().sunny_batch_size:(idx+1)*config().sunny_batch_size] else: givens[input_layers[key].input_var] = xs_shared[key][idx*config().batch_size:(idx+1)*config().batch_size] updates = config().build_updates(train_loss_theano, all_params, learning_rate) #grad_norm = T.sqrt(T.sum([(g**2).sum() for g in theano.grad(train_loss_theano, all_params)])) #theano_printer.print_me_this("Grad norm", grad_norm) iter_train = theano.function([idx], [train_loss_theano, kaggle_loss_theano, segmentation_loss_theano] + theano_printer.get_the_stuff_to_print(), givens=givens, on_unused_input="ignore", updates=updates, # mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True) ) iter_validate = theano.function([idx], [validation_train_loss, validation_kaggle_loss, validation_segmentation_loss] + [v for _, v in validation_other_losses.items()] + theano_printer.get_the_stuff_to_print(), givens=givens, on_unused_input="ignore") num_chunks_train = int(config().num_epochs_train * NUM_TRAIN_PATIENTS / (config().batch_size * config().batches_per_chunk)) print "Will train for %d chunks" % num_chunks_train if config().restart_from_save and os.path.isfile(metadata_path): print "Load model parameters for resuming" resume_metadata = np.load(metadata_path) lasagne.layers.set_all_param_values(top_layer, resume_metadata['param_values']) start_chunk_idx = resume_metadata['chunks_since_start'] + 1 chunks_train_idcs = range(start_chunk_idx, num_chunks_train) # set lr to the correct value current_lr = np.float32(utils.current_learning_rate(learning_rate_schedule, start_chunk_idx)) print " setting learning rate to %.7f" % current_lr learning_rate.set_value(current_lr) losses_train = resume_metadata['losses_train'] losses_eval_valid = resume_metadata['losses_eval_valid'] losses_eval_train = resume_metadata['losses_eval_train'] losses_eval_valid_kaggle = [] #resume_metadata['losses_eval_valid_kaggle'] losses_eval_train_kaggle = [] #resume_metadata['losses_eval_train_kaggle'] else: chunks_train_idcs = range(num_chunks_train) losses_train = [] losses_eval_valid = [] losses_eval_train = [] losses_eval_valid_kaggle = [] losses_eval_train_kaggle = [] create_train_gen = partial(config().create_train_gen, required_input_keys = xs_shared.keys(), required_output_keys = ys_shared.keys()# + ["patients"], ) create_eval_valid_gen = partial(config().create_eval_valid_gen, required_input_keys = xs_shared.keys(), required_output_keys = ys_shared.keys()# + ["patients"] ) create_eval_train_gen = partial(config().create_eval_train_gen, required_input_keys = xs_shared.keys(), required_output_keys = ys_shared.keys() ) print "Train model" start_time = time.time() prev_time = start_time num_batches_chunk = config().batches_per_chunk for e, train_data in izip(chunks_train_idcs, buffering.buffered_gen_threaded(create_train_gen())): print "Chunk %d/%d" % (e + 1, num_chunks_train) epoch = (1.0 * config().batch_size * config().batches_per_chunk * (e+1) / NUM_TRAIN_PATIENTS) print " Epoch %.1f" % epoch for key, rate in learning_rate_schedule.iteritems(): if epoch >= key: lr = np.float32(rate) learning_rate.set_value(lr) print " learning rate %.7f" % lr if config().dump_network_loaded_data: pickle.dump(train_data, open("data_loader_dump_train_%d.pkl"%e, "wb")) for key in xs_shared: xs_shared[key].set_value(train_data["input"][key]) for key in ys_shared: ys_shared[key].set_value(train_data["output"][key]) #print "train:", sorted(train_data["output"]["patients"]) losses = [] kaggle_losses = [] segmentation_losses = [] for b in xrange(num_batches_chunk): iter_result = iter_train(b) loss, kaggle_loss, segmentation_loss = tuple(iter_result[:3]) utils.detect_nans(loss, xs_shared, ys_shared, all_params) losses.append(loss) kaggle_losses.append(kaggle_loss) segmentation_losses.append(segmentation_loss) mean_train_loss = np.mean(losses) print " mean training loss:\t\t%.6f" % mean_train_loss losses_train.append(mean_train_loss) print " mean kaggle loss:\t\t%.6f" % np.mean(kaggle_losses) print " mean segment loss:\t\t%.6f" % np.mean(segmentation_losses) if ((e + 1) % config().validate_every) == 0: print print "Validating" if config().validate_train_set: subsets = ["validation", "train"] gens = [create_eval_valid_gen, create_eval_train_gen] losses_eval = [losses_eval_valid, losses_eval_train] losses_kaggle = [losses_eval_valid_kaggle, losses_eval_train_kaggle] else: subsets = ["validation"] gens = [create_eval_valid_gen] losses_eval = [losses_eval_valid] losses_kaggle = [losses_eval_valid_kaggle] for subset, create_gen, losses_validation, losses_kgl in zip(subsets, gens, losses_eval, losses_kaggle): vld_losses = [] vld_kaggle_losses = [] vld_segmentation_losses = [] vld_other_losses = {k:[] for k,_ in validation_other_losses.items()} print " %s set (%d samples)" % (subset, get_number_of_validation_samples(set=subset)) for validation_data in buffering.buffered_gen_threaded(create_gen()): num_batches_chunk_eval = config().batches_per_chunk if config().dump_network_loaded_data: pickle.dump(validation_data, open("data_loader_dump_valid_%d.pkl"%e, "wb")) for key in xs_shared: xs_shared[key].set_value(validation_data["input"][key]) for key in ys_shared: ys_shared[key].set_value(validation_data["output"][key]) #print "validate:", validation_data["output"]["patients"] for b in xrange(num_batches_chunk_eval): losses = tuple(iter_validate(b)[:3+len(validation_other_losses)]) loss, kaggle_loss, segmentation_loss = losses[:3] other_losses = losses[3:] vld_losses.extend(loss) vld_kaggle_losses.extend(kaggle_loss) vld_segmentation_losses.extend(segmentation_loss) for k, other_loss in zip(validation_other_losses, other_losses): vld_other_losses[k].extend(other_loss) vld_losses = np.array(vld_losses) vld_kaggle_losses = np.array(vld_kaggle_losses) vld_segmentation_losses = np.array(vld_segmentation_losses) for k in validation_other_losses: vld_other_losses[k] = np.array(vld_other_losses[k]) # now select only the relevant section to average sunny_len = get_lenght_of_set(name="sunny", set=subset) regular_len = get_lenght_of_set(name="regular", set=subset) num_valid_samples = get_number_of_validation_samples(set=subset) #print losses[:num_valid_samples] #print kaggle_losses[:regular_len] #print segmentation_losses[:sunny_len] loss_to_save = obj.compute_average(vld_losses[:num_valid_samples]) print " mean training loss:\t\t%.6f" % loss_to_save print " mean kaggle loss:\t\t%.6f" % np.mean(vld_kaggle_losses[:regular_len]) print " mean segment loss:\t\t%.6f" % np.mean(vld_segmentation_losses[:sunny_len]) # print " acc:\t%.2f%%" % (acc * 100) for k, v in vld_other_losses.items(): print " mean %s loss:\t\t%.6f" % (k, obj.compute_average(v[:num_valid_samples], loss_name=k)) print losses_validation.append(loss_to_save) kaggle_to_save = np.mean(vld_kaggle_losses[:regular_len]) losses_kgl.append(kaggle_to_save) now = time.time() time_since_start = now - start_time time_since_prev = now - prev_time prev_time = now est_time_left = time_since_start * (float(num_chunks_train - (e + 1)) / float(e + 1 - chunks_train_idcs[0])) eta = datetime.now() + timedelta(seconds=est_time_left) eta_str = eta.strftime("%c") print " %s since start (%.2f s)" % (utils.hms(time_since_start), time_since_prev) print " estimated %s to go (ETA: %s)" % (utils.hms(est_time_left), eta_str) print if ((e + 1) % config().save_every) == 0: print print "Saving metadata, parameters" with open(metadata_path, 'w') as f: pickle.dump({ 'metadata_path': metadata_path, 'configuration_file': config().__name__, 'git_revision_hash': utils.get_git_revision_hash(), 'experiment_id': expid, 'chunks_since_start': e, 'losses_train': losses_train, 'losses_eval_train': losses_eval_train, 'losses_eval_train_kaggle': losses_eval_train_kaggle, 'losses_eval_valid': losses_eval_valid, 'losses_eval_valid_kaggle': losses_eval_valid_kaggle, 'time_since_start': time_since_start, 'param_values': lasagne.layers.get_all_param_values(top_layer) }, f, pickle.HIGHEST_PROTOCOL) print " saved to %s" % metadata_path print # store all known outputs from last batch: if config().take_a_dump: all_theano_variables = [train_loss_theano, kaggle_loss_theano, segmentation_loss_theano] + theano_printer.get_the_stuff_to_print() for layer in all_layers[:-1]: all_theano_variables.append(lasagne.layers.helper.get_output(layer)) iter_train = theano.function([idx], all_theano_variables, givens=givens, on_unused_input="ignore", updates=updates, # mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True) ) train_data["intermediates"] = iter_train(0) pickle.dump(train_data, open(metadata_path + "-dump", "wb")) return
now = time.time() time_since_start = now - start_time time_since_prev = now - prev_time prev_time = now est_time_left = time_since_start * (config().max_nchunks - chunk_idx + 1.) / (chunk_idx + 1. - start_chunk_idx) eta = datetime.now() + timedelta(seconds=est_time_left) eta_str = eta.strftime("%c") print " %s since start (%.2f s)" % (utils.hms(time_since_start), time_since_prev) print " estimated %s to go (ETA: %s)" % (utils.hms(est_time_left), eta_str) print if ((chunk_idx + 1) % config().save_every) == 0: print print 'Chunk %d/%d' % (chunk_idx + 1, config().max_nchunks) print 'Saving metadata, parameters' with open(metadata_path, 'w') as f: pickle.dump({ 'configuration_file': config_name, 'git_revision_hash': utils.get_git_revision_hash(), 'experiment_id': expid, 'chunks_since_start': chunk_idx, 'losses_eval_train': losses_eval_train, 'losses_eval_valid': losses_eval_valid, 'param_values': nn.layers.get_all_param_values(model.l_out) }, f, pickle.HIGHEST_PROTOCOL) print ' saved to %s' % metadata_path print
def predict_model(expid, mfile=None): metadata_path = MODEL_PATH + "%s.pkl" % (expid if not mfile else mfile) prediction_path = MODEL_PREDICTIONS_PATH + "%s.pkl" % expid submission_path = SUBMISSION_PATH + "%s.csv" % expid if theano.config.optimizer != "fast_run": print "WARNING: not running in fast mode!" print "Using" print " %s" % metadata_path print "To generate" print " %s" % prediction_path print "Build model" interface_layers = config.build_model() output_layers = interface_layers["outputs"] input_layers = interface_layers["inputs"] top_layer = lasagne.layers.MergeLayer(incomings=output_layers.values()) all_layers = lasagne.layers.get_all_layers(top_layer) all_params = lasagne.layers.get_all_params(top_layer, trainable=True) num_params = sum([np.prod(p.get_value().shape) for p in all_params]) print string.ljust(" layer output shapes:", 34), print string.ljust("#params:", 10), print string.ljust("#data:", 10), print "output shape:" for layer in all_layers[:-1]: name = string.ljust(layer.__class__.__name__, 30) num_param = sum( [np.prod(p.get_value().shape) for p in layer.get_params()]) num_param = string.ljust(int(num_param).__str__(), 10) num_size = string.ljust(np.prod(layer.output_shape[1:]).__str__(), 10) print " %s %s %s %s" % (name, num_param, num_size, layer.output_shape) print " number of parameters: %d" % num_params xs_shared = { key: lasagne.utils.shared_empty(dim=len(l_in.output_shape), dtype='float32') for (key, l_in) in input_layers.iteritems() } idx = T.lscalar('idx') givens = dict() for (key, l_in) in input_layers.iteritems(): givens[l_in.input_var] = xs_shared[key][idx * config.batch_size:(idx + 1) * config.batch_size] network_outputs = [ lasagne.layers.helper.get_output(network_output_layer, deterministic=True) for network_output_layer in output_layers.values() ] print "Compiling..." iter_test = theano.function( [idx], network_outputs + theano_printer.get_the_stuff_to_print(), givens=givens, on_unused_input="ignore", # mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True) ) required_input = { key: l_in.output_shape for (key, l_in) in input_layers.iteritems() } print "Preparing dataloaders" config.test_data.prepare() chunk_size = config.batches_per_chunk * config.batch_size test_data_generator = buffering.buffered_gen_threaded( config.test_data.generate_batch( chunk_size=chunk_size, required_input=required_input, required_output={}, )) print "Load model parameters for resuming" resume_metadata = np.load(metadata_path) lasagne.layers.set_all_param_values(top_layer, resume_metadata['param_values']) chunks_test_idcs = itertools.count(0) num_chunks_test = math.ceil(1.0 * config.test_data.epochs * config.test_data.number_of_samples / (config.batch_size * config.batches_per_chunk)) start_time, prev_time = None, None all_predictions = dict() print "Loading first chunks" for e, test_data in izip(chunks_test_idcs, test_data_generator): if start_time is None: start_time = time.time() prev_time = start_time print print "Chunk %d/%d" % (e + 1, num_chunks_test) print "==============" if config.dump_network_loaded_data: pickle.dump(test_data, open("data_loader_dump_test_%d.pkl" % e, "wb")) for key in xs_shared: xs_shared[key].set_value(test_data["input"][key]) sample_ids = test_data[IDS] for b in xrange(config.batches_per_chunk): th_result = iter_test(b) predictions = th_result[:len(network_outputs)] for output_idx, key in enumerate(output_layers.keys()): for sample_idx in xrange(b * config.batch_size, (b + 1) * config.batch_size): prediction_pos = sample_idx % config.batch_size sample_id = sample_ids[sample_idx] if sample_id is not None: if sample_id not in all_predictions: all_predictions[sample_id] = dict() if key not in all_predictions[sample_id]: all_predictions[sample_id][key] = predictions[ output_idx][prediction_pos] else: all_predictions[sample_id][key] = np.concatenate( (all_predictions[sample_id][key], predictions[output_idx][prediction_pos]), axis=0) now = time.time() time_since_start = now - start_time time_since_prev = now - prev_time prev_time = now print " %s since start (+%.2f s)" % (utils.hms(time_since_start), time_since_prev) try: if num_chunks_test: est_time_left = time_since_start * (float(num_chunks_test - (e + 1)) / float(e + 1)) eta = datetime.datetime.now() + datetime.timedelta( seconds=est_time_left) eta_str = eta.strftime("%c") print " estimated %s to go" % utils.hms(est_time_left) print " (ETA: %s)" % eta_str except OverflowError: print " This will take really long, like REALLY long." print " %dms per testing sample" % (1000. * time_since_start / ( (e + 1) * config.batch_size * config.batches_per_chunk)) with open(prediction_path, 'w') as f: pickle.dump( { 'metadata_path': metadata_path, 'prediction_path': prediction_path, 'configuration_file': config.__name__, 'git_revision_hash': utils.get_git_revision_hash(), 'experiment_id': expid, 'predictions': all_predictions, }, f, pickle.HIGHEST_PROTOCOL) print " saved to %s" % prediction_path print return