Esempio n. 1
0
	print ' --- num_layers: ', TR_CONST["num_layers"]
	print ' --- task: %s' % args.task
	print ' --- usage ratio: %f' % usage_ratio

	# label matrix
	dim_latent_feature = TR_CONST["dim_labels"]
	# label_matrix_filename = (FILE_DICT["mood_latent_matrix"] % dim_latent_feature)
	label_matrix_filename = (FILE_DICT["mood_latent_tfidf_matrix"] % dim_latent_feature) # tfidf is better!
	
	if os.path.exists(PATH_DATA + label_matrix_filename):
		label_matrix = np.load(PATH_DATA + label_matrix_filename) #np matrix, 9320-by-100
	else:
		"print let's cook the mood-latent feature matrix"
		import main_prepare	
		mood_tags_matrix = np.load(PATH_DATA + label_matrix_filename) #np matrix, 9320-by-100
		label_matrix = main_prepare.get_LDA(X=mood_tags_matrix, num_components=k, show_topics=False)
		np.save(PATH_DATA + label_matrix_filename, W)
	print 'size of mood tag matrix:'
	print label_matrix.shape

	# load dataset
	for tf_type in tf_types:
		TR_CONST["tf_type"] = tf_type
		print '='*60
		print 'tf type: %s' % tf_type
		print '='*60
		print "I'll take %d clips for each song." % TR_CONST["clips_per_song"]
		# train_x, train_y, valid_x, valid_y, test_x, test_y = my_utils.load_all_sets(label_matrix=label_matrix, 
		# 																	clips_per_song=TR_CONST["clips_per_song"], 
		# 																	num_train_songs=TR_CONST["num_songs"], 
		# 																	tf_type=TR_CONST["tf_type"])
def run_with_setting(hyperparams, argv):
	print '#'*60
	#function: input args: TR_CONST, sys.argv.
	# -------------------------------
	if os.path.exists('stop_asap.keunwoo'):
		os.remove('stop_asap.keunwoo')
	
	if hyperparams["is_test"]:
		print '==== This is a test, to quickly check the code. ===='
		print 'excuted by $ ' + ' '.join(argv)
	
	mse_history = []
	# label matrix
	dim_latent_feature = hyperparams["dim_labels"]
	# label_matrix_filename = (FILE_DICT["mood_latent_matrix"] % dim_latent_feature)
	label_matrix_filename = (FILE_DICT["mood_latent_tfidf_matrix"] % dim_latent_feature) # tfidf is better!
	
	if os.path.exists(PATH_DATA + label_matrix_filename):
		label_matrix = np.load(PATH_DATA + label_matrix_filename) #np matrix, 9320-by-100
	else:
		"print let's create a new mood-latent feature matrix"
		import main_prepare
		mood_tags_matrix = np.load(PATH_DATA + label_matrix_filename) #np matrix, 9320-by-100
		label_matrix = main_prepare.get_LDA(X=mood_tags_matrix, 
											num_components=k, 
											show_topics=False)
		np.save(PATH_DATA + label_matrix_filename, W)
	# print 'size of mood tag matrix:'
	print label_matrix.shape

	# load dataset
	train_x, valid_x, test_x, = my_utils.load_all_sets_from_hdf(tf_type=hyperparams["tf_type"],
																				n_dim=dim_latent_feature,
																				task_cla=hyperparams['isClass'])
	# *_y is not correct - 01 Jan 2016. Use numpy files directly.
	train_y, valid_y, test_y = my_utils.load_all_labels(n_dim=dim_latent_feature, 
														num_fold=10, 
														clips_per_song=3)
	if hyperparams["is_test"]:
		train_x, valid_x, test_x, train_y, valid_y, test_y = [ele[:64] for ele in [train_x, valid_x, test_x, train_y, valid_y, test_y]]

	threshold_label = 1.0
	if hyperparams['isClass']:
		train_y = (train_y>=threshold_label).astype(int)
		valid_y = (valid_y>=threshold_label).astype(int)
		test_y = (test_y>=threshold_label).astype(int)
	
	# print 'temporary came back with numpy loading'
	# if hyperparams["debug"]:
	# 	num_train_songs = 30
	# else:
	# 	num_train_songs = 1000
	# train_x, train_y, valid_x, valid_y, test_x, test_y = my_utils.load_all_sets(label_matrix, 
	# 																			hyperparams=hyperparams)

	hyperparams["height_image"] = train_x.shape[2]
	hyperparams["width_image"]  = train_x.shape[3]
	if hyperparams["debug"]:
		pdb.set_trace()
	
	moodnames = cP.load(open(PATH_DATA + FILE_DICT["moodnames"], 'r')) #list, 100
	# train_x : (num_samples, num_channel, height, width)	
	hp_manager = hyperparams_manager.Hyperparams_Manager()
	nickname = hp_manager.get_name(hyperparams)
	timename = time.strftime('%m-%d-%Hh%M')
	if hyperparams["is_test"]:
		model_name = 'test_' + nickname
	else:
		model_name = timename + '_' + nickname
	hp_manager.save_new_setting(hyperparams)
	print '-'*60
	print 'model name: %s' % model_name
	model_name_dir = model_name + '/'
	model_weight_name_dir = 'w_' + model_name + '/'
	fileout = model_name + '_results'
	 	
	model = my_keras_models.build_convnet_model(setting_dict=hyperparams)
	model.summary()
	if not os.path.exists(PATH_RESULTS + model_name_dir):
		os.mkdir(PATH_RESULTS + model_name_dir)
		os.mkdir(PATH_RESULTS + model_name_dir + 'images/')
		os.mkdir(PATH_RESULTS + model_name_dir + 'plots/')
		os.mkdir(PATH_RESULTS_W + model_weight_name_dir)
	
	hp_manager.write_setting_as_texts(PATH_RESULTS + model_name_dir, hyperparams)
 	hp_manager.print_setting(hyperparams)

 	keras_plot(model, to_file=PATH_RESULTS + model_name_dir + 'images/'+'graph_of_model_'+hyperparams["!memo"]+'.png')
	#prepare callbacks
	weight_image_monitor = my_keras_utils.Weight_Image_Saver(PATH_RESULTS + model_name_dir + 'images/')
	patience = 3
	if hyperparams["is_test"] is True:
		patience = 99999999
	if hyperparams["isRegre"]:
		value_to_monitor = 'val_loss'
	else:
		value_to_monitor = 'val_acc'
		#history = my_keras_utils.History_Regression_Val()
	# early_stopping = keras.callbacks.EarlyStopping(monitor=value_to_monitor, 
	# 												patience=patience, 
	# 												verbose=0)
	
	# other constants
	batch_size = 16
	# if hyperparams['model_type'] == 'vgg_original':
	# 	batch_size = (batch_size * 3)/5

	predicted = model.predict(test_x, batch_size=batch_size)
	if hyperparams['debug'] == True:
		pdb.set_trace()
	print 'mean of target value:'
	if hyperparams['isRegre']:
		print np.mean(test_y, axis=0)
	else:
		print np.sum(test_y, axis=0)
	print 'mean of predicted value:'
	if hyperparams['isRegre']:
		print np.mean(predicted, axis=0)
	else:
		print np.sum(predicted, axis=0)
	print 'mse with just predicting average is %f' % np.mean((test_y - np.mean(test_y, axis=0))**2)
	np.save(PATH_RESULTS + model_name_dir + 'predicted_and_truths_init.npy', [predicted[:len(test_y)], test_y[:len(test_y)]])
	#train!	
	print '--- train starts. Remove will_stop.keunwoo to continue learning after %d epochs ---' % hyperparams["num_epoch"]
	f = open('will_stop.keunwoo', 'w')
	f.close()
	total_history = {}
	num_epoch = hyperparams["num_epoch"]
	total_epoch = 0
	
	callbacks = [weight_image_monitor]
	best_mse = 0.5

	while True:
		# [run]
		if os.path.exists('stop_asap.keunwoo'):
			print ' stop by stop_asap.keunwoo file'
			break
		history = model.fit(train_x, train_y, validation_data=(valid_x, valid_y), 
											batch_size=batch_size, 
											nb_epoch=1, 
											show_accuracy=hyperparams['isClass'], 
											verbose=1, 
											callbacks=callbacks,
											shuffle='batch')
		my_utils.append_history(total_history, history.history)
		# [validation]
		val_result = evaluate_result(valid_y, predicted) # mse
		if val_result['mse'] < best_mse:
			model.save_weights(PATH_RESULTS_W + model_weight_name_dir + "weights_best.hdf5", overwrite=True)
			best_mse = val_result['mse']
		mse_history.append(val_result['mse'])

		print '%d-th of %d epoch is complete' % (total_epoch, num_epoch)
		total_epoch += 1
		
		if os.path.exists('will_stop.keunwoo'):
			loss_testset = model.evaluate(test_x, test_y, show_accuracy=False, batch_size=batch_size)
		else:
			
			print ' *** will go for another one epoch. '
			print ' *** $ touch will_stop.keunwoo to stop at the end of this, otherwise it will be endless.'
	#
	best_batch = np.argmin(mse_history)+1

	model.load_weights(PATH_RESULTS_W + model_weight_name_dir + "weights_best.hdf5") 

	predicted = model.predict(test_x, batch_size=batch_size)
	print 'predicted example using best model'
	print predicted[:10]
	print 'and truths'
	print test_y[:10]
	#save results
	np.save(PATH_RESULTS + model_name_dir + fileout + '_history.npy', [total_history['loss'], total_history['val_loss']])
	np.save(PATH_RESULTS + model_name_dir + fileout + '_loss_testset.npy', loss_testset)
	np.save(PATH_RESULTS + model_name_dir + 'predicted_and_truths_result.npy', [predicted, test_y])
	# np.save(PATH_RESULTS + model_name_dir + 'weights_changes.npy', np.array(weight_image_monitor.weights_changes))

	# ADD weight change saving code
	my_plots.export_history(total_history['loss'], total_history['val_loss'],
											acc=None, 
											val_acc=None, 
											out_filename=PATH_RESULTS + model_name_dir + 'plots/' + 'plots.png')
	
	
	min_loss = np.min(total_history[value_to_monitor])
	best_batch = np.argmin(total_history[value_to_monitor])+1
	num_run_epoch = len(total_history[value_to_monitor])
	oneline_result = '%s, %6.4f, %d_of_%d, %s' % (value_to_monitor, min_loss, best_batch, num_run_epoch, model_name)
	with open(PATH_RESULTS + model_name_dir + oneline_result, 'w') as f:
		pass
	f = open( (PATH_RESULTS + '%s_%s_%s_%06.4f_at_(%d_of_%d)_mse_%06.4f_%s'  % \
		(timename, hyperparams["loss_function"], value_to_monitor, min_loss, best_batch, num_run_epoch, best_mse, nickname)), 'w')
	f.close()
	with open('one_line_log.txt', 'a') as f:
		f.write(oneline_result)
		f.write(' ' + ' '.join(argv) + '\n')
	print '========== DONE: %s ==========' % model_name
	return min_loss