def info_collect(self, hps, grads, stable_loss_predict, stable_loss_label, print_log=True): for index, hp_list in enumerate(self.hp_collect): hp_list.append(hps[index]) file_helper.write('hp2trend_hps%d.txt' % index, str(hps[index][0] * self.hp_norms[index])) for index, grad_list in enumerate(self.gradient_collect): grad_list.append(grads[index]) file_helper.write('hp2trend_grads%d.txt' % index, str(grads[index])) self.stable_loss_predict_collect.append(stable_loss_predict) file_helper.write('hp2trend_stable_loss_predict.txt', str(stable_loss_predict)) self.stable_loss_label_collect.append(stable_loss_label) file_helper.write('hp2trend_stable_loss_label.txt', str(stable_loss_label)) if print_log: print('hps') print(hps) print('grads') print(grads) print('stable_loss_predict') print(stable_loss_predict) print('stable_loss_label') print(stable_loss_label)
def fit(self, input_data, trend): if not self.has_init: self.norm(input_data) norm_hps = [hp / self.hp_norms[i] for i, hp in enumerate(input_data)] fit_dict = dict() fit_dict[self.is_fit] = True fit_dict[self.ph_hypers] = norm_hps fit_dict[self.train_label] = trend with tf.Session(graph=self.graph) as session: self.init_vars(norm_hps, session, not self.has_init) _, hps, loss, predict = session.run([self.optimizer, self.tf_hypers, self.loss, self.predict], feed_dict=fit_dict) self.saver.save(session, self.save_path) if self.collect_counter % 20 == 0: self.fit_loss_collect.append(loss) file_helper.write('hp2trend_fit_loss.txt', str(loss)) self.collect_counter += 1 self.collect_counter %= 5 if not self.has_init: self.has_init = True
def fit(self, input_data, trend): if not self.has_init: self.norm(input_data) norm_hps = [hp / self.hp_norms[i] for i, hp in enumerate(input_data)] fit_dict = dict() fit_dict[self.is_fit] = True fit_dict[self.ph_hypers] = norm_hps fit_dict[self.train_label] = trend with tf.Session(graph=self.graph) as session: self.init_vars(norm_hps, session, not self.has_init) _, hps, loss, predict = session.run( [self.optimizer, self.tf_hypers, self.loss, self.predict], feed_dict=fit_dict) self.saver.save(session, self.save_path) if self.collect_counter % 20 == 0: self.fit_loss_collect.append(loss) file_helper.write('hp2trend_fit_loss.txt', str(loss)) self.collect_counter += 1 self.collect_counter %= 5 if not self.has_init: self.has_init = True
def predict_loss(input_s, label_s, init_hypers, graph, saver, is_fit, train_inputs, train_labels, ph_hypers, optimizer, loss, train_prediction, learning_rate, reset=False): init_hypers = [hyper / norm_list[i] for i, hyper in enumerate(init_hypers)] with tf.Session(graph=graph) as fit_cnn_ses: if os.path.exists(save_path): # Restore variables from disk. saver.restore(fit_cnn_ses, save_path) if reset: print('reset, new hypers:') print(init_hypers) init_feed = dict() init_feed[ph_hypers] = init_hypers tf.initialize_all_variables().run(feed_dict=init_feed) else: init_feed = dict() init_feed[ph_hypers] = init_hypers tf.initialize_all_variables().run(feed_dict=init_feed) print('Initialized') cur_idx = 0 f_labels = list() f_features = list() log_labels = list() end_train = False x_s = np.array([float(i) for i in range(batch_size - hyper_cnt)]) while True: if end_train: break if cur_idx == 0: hp_input_s = input_s hp_label_s = label_s else: hp_input_s = f_features.pop() hp_label_s = f_labels.pop() f_features.append(hp_label_s[:, :20, :]) feed_dict = dict() for i in range(batch_cnt_per_step): feed_dict[train_inputs[i]] = hp_input_s[i] for i in range(batch_cnt_per_step): feed_dict[train_labels[i]] = hp_label_s[i] feed_dict = dict() feed_dict[is_fit] = False for i in range(batch_cnt_per_step): feed_dict[train_inputs[i]] = input_s[i] for i in range(batch_cnt_per_step): feed_dict[train_labels[i]] = label_s[i] feed_dict[ph_hypers] = init_hypers _, l, predictions, lr = fit_cnn_ses.run( [optimizer, loss, train_prediction, learning_rate], feed_dict=feed_dict) f_labels.append( predictions.reshape((batch_cnt_per_step, batch_size - hyper_cnt, EMBEDDING_SIZE))) def residuals(p, x, y): return p[0] * x + p[1] - y p0 = [-1.0, 1.0] predict_losses = predictions.reshape( [batch_cnt_per_step, batch_size - hyper_cnt]) plsq = leastsq(residuals, p0, args=(x_s, predict_losses[-1])) k = math.fabs(plsq[0][0]) print(k) cur_idx += 1 if k < 0.1 and k < np.mean(predict_losses[-1]): end_train = True for predict in predictions.reshape( (batch_cnt_per_step * (batch_size - hyper_cnt))).tolist(): log_labels.append(predict) else: log_labels.append(predict_losses[0][0]) for predict in log_labels: file_helper.write(PREDICT_FILE_PATH, str(predict)) file_helper.write(PREDICT_FILE_PATH, '===') # 返回结果与预测次数 return end_train, cur_idx
def fit_cnn_loss(input_s, label_s, hyper_s, graph, saver, is_fit, train_inputs, train_labels, ph_hypers, optimizer, loss, train_prediction, learning_rate, reset=False, train_hyper=False): global hyper_cnt global batch_size norm(hyper_s) hyper_s = [hyper / norm_list[i] for i, hyper in enumerate(hyper_s)] hyper_cnt = len(hyper_s) global step global save_path sum_freq = 3 global labels global predicts global mean_loss_vary_cnt if reset == 1: step = 0 fit_ret = False with tf.Session(graph=graph) as fit_cnn_ses: if os.path.exists(save_path): # Restore variables from disk. saver.restore(fit_cnn_ses, save_path) if reset: print('reset, new hypers:') print(hyper_s) init_feed = dict() init_feed[ph_hypers] = hyper_s tf.initialize_all_variables().run(feed_dict=init_feed) # print("Model restored.") else: init_feed = dict() init_feed[ph_hypers] = hyper_s tf.initialize_all_variables().run(feed_dict=init_feed) print('Initialized') global mean_loss mean_loss = 0 # prepare and feed train data feed_dict = dict() feed_dict[is_fit] = train_hyper for i in range(batch_cnt_per_step): feed_dict[train_inputs[i]] = input_s[i] for i in range(batch_cnt_per_step): feed_dict[train_labels[i]] = label_s[i] feed_dict[ph_hypers] = hyper_s # print(feed_dict) # train _, l, predictions, lr = fit_cnn_ses.run( [optimizer, loss, train_prediction, learning_rate], feed_dict=feed_dict) mean_loss += l # 每次只有第一个loss是有意义的 # print('label_s') # print(label_s) labels.append( label_s.reshape(batch_cnt_per_step * (batch_size - hyper_cnt)).tolist()[0]) predicts.append( predictions.reshape(batch_cnt_per_step * (batch_size - hyper_cnt)).tolist()[0]) if step % sum_freq == 0: # 次数为奇偶时梯度呈现翻转,所以不能固定在偶数时验证 fit_verify = random.randint(9, 10) if step > 0 and step % (sum_freq * fit_verify) == 0: mean_loss /= sum_freq # 唯有连续3次损失小于label的5%时才认为可停止 if mean_loss < np.mean(label_s) * 0.15 and mean_loss < np.mean( predictions) * 0.15: mean_loss_vary_cnt += 1 else: mean_loss_vary_cnt = 0 if mean_loss_vary_cnt >= 5: fit_ret = True print('mean loss < label_s * 10%') print(mean_loss) print(np.mean(label_s)) print('Average loss at step %d: %f learning rate: %f' % (step, mean_loss, lr)) mean_loss = 0 # some work for conclusion step += 1 saver.save(fit_cnn_ses, save_path) # print("Model saved in file: %s" % save_path) if fit_ret: for label in labels: file_helper.write(LINE_FILE_PATH, str(label)) print(label) print('=' * 80) for predict in predicts: file_helper.write(PREDICT_FILE_PATH, str(predict)) print(predict) del labels[:] del predicts[:] return fit_ret
def predict_loss(input_s, label_s, init_hypers, graph, saver, is_fit, train_inputs, train_labels, ph_hypers, optimizer, loss, train_prediction, learning_rate, reset=False): init_hypers = [hyper / norm_list[i] for i, hyper in enumerate(init_hypers)] with tf.Session(graph=graph) as fit_cnn_ses: if os.path.exists(save_path): # Restore variables from disk. saver.restore(fit_cnn_ses, save_path) if reset: print('reset, new hypers:') print(init_hypers) init_feed = dict() init_feed[ph_hypers] = init_hypers tf.initialize_all_variables().run(feed_dict=init_feed) else: init_feed = dict() init_feed[ph_hypers] = init_hypers tf.initialize_all_variables().run(feed_dict=init_feed) print('Initialized') cur_idx = 0 f_labels = list() f_features = list() log_labels = list() end_train = False x_s = np.array([float(i) for i in range(batch_size - hyper_cnt)]) while True: if end_train: break if cur_idx == 0: hp_input_s = input_s hp_label_s = label_s else: hp_input_s = f_features.pop() hp_label_s = f_labels.pop() f_features.append(hp_label_s[:, :20, :]) feed_dict = dict() for i in range(batch_cnt_per_step): feed_dict[train_inputs[i]] = hp_input_s[i] for i in range(batch_cnt_per_step): feed_dict[train_labels[i]] = hp_label_s[i] feed_dict = dict() feed_dict[is_fit] = False for i in range(batch_cnt_per_step): feed_dict[train_inputs[i]] = input_s[i] for i in range(batch_cnt_per_step): feed_dict[train_labels[i]] = label_s[i] feed_dict[ph_hypers] = init_hypers _, l, predictions, lr = fit_cnn_ses.run( [optimizer, loss, train_prediction, learning_rate], feed_dict=feed_dict) f_labels.append(predictions.reshape((batch_cnt_per_step, batch_size - hyper_cnt, EMBEDDING_SIZE))) def residuals(p, x, y): return p[0] * x + p[1] - y p0 = [-1.0, 1.0] predict_losses = predictions.reshape([batch_cnt_per_step, batch_size - hyper_cnt]) plsq = leastsq(residuals, p0, args=(x_s, predict_losses[-1])) k = math.fabs(plsq[0][0]) print(k) cur_idx += 1 if k < 0.1 and k < np.mean(predict_losses[-1]): end_train = True for predict in predictions.reshape((batch_cnt_per_step * (batch_size - hyper_cnt))).tolist(): log_labels.append(predict) else: log_labels.append(predict_losses[0][0]) for predict in log_labels: file_helper.write(PREDICT_FILE_PATH, str(predict)) file_helper.write(PREDICT_FILE_PATH, '===') # 返回结果与预测次数 return end_train, cur_idx
def fit_cnn_loss(input_s, label_s, hyper_s, graph, saver, is_fit, train_inputs, train_labels, ph_hypers, optimizer, loss, train_prediction, learning_rate, reset=False, train_hyper=False): global hyper_cnt global batch_size norm(hyper_s) hyper_s = [hyper / norm_list[i] for i, hyper in enumerate(hyper_s)] hyper_cnt = len(hyper_s) global step global save_path sum_freq = 3 global labels global predicts global mean_loss_vary_cnt if reset == 1: step = 0 fit_ret = False with tf.Session(graph=graph) as fit_cnn_ses: if os.path.exists(save_path): # Restore variables from disk. saver.restore(fit_cnn_ses, save_path) if reset: print('reset, new hypers:') print(hyper_s) init_feed = dict() init_feed[ph_hypers] = hyper_s tf.initialize_all_variables().run(feed_dict=init_feed) # print("Model restored.") else: init_feed = dict() init_feed[ph_hypers] = hyper_s tf.initialize_all_variables().run(feed_dict=init_feed) print('Initialized') global mean_loss mean_loss = 0 # prepare and feed train data feed_dict = dict() feed_dict[is_fit] = train_hyper for i in range(batch_cnt_per_step): feed_dict[train_inputs[i]] = input_s[i] for i in range(batch_cnt_per_step): feed_dict[train_labels[i]] = label_s[i] feed_dict[ph_hypers] = hyper_s # print(feed_dict) # train _, l, predictions, lr = fit_cnn_ses.run( [optimizer, loss, train_prediction, learning_rate], feed_dict=feed_dict) mean_loss += l # 每次只有第一个loss是有意义的 # print('label_s') # print(label_s) labels.append(label_s.reshape(batch_cnt_per_step * (batch_size - hyper_cnt)).tolist()[0]) predicts.append(predictions.reshape(batch_cnt_per_step * (batch_size - hyper_cnt)).tolist()[0]) if step % sum_freq == 0: # 次数为奇偶时梯度呈现翻转,所以不能固定在偶数时验证 fit_verify = random.randint(9, 10) if step > 0 and step % (sum_freq * fit_verify) == 0: mean_loss /= sum_freq # 唯有连续3次损失小于label的5%时才认为可停止 if mean_loss < np.mean(label_s) * 0.15 and mean_loss < np.mean(predictions) * 0.15: mean_loss_vary_cnt += 1 else: mean_loss_vary_cnt = 0 if mean_loss_vary_cnt >= 5: fit_ret = True print('mean loss < label_s * 10%') print(mean_loss) print(np.mean(label_s)) print('Average loss at step %d: %f learning rate: %f' % (step, mean_loss, lr)) mean_loss = 0 # some work for conclusion step += 1 saver.save(fit_cnn_ses, save_path) # print("Model saved in file: %s" % save_path) if fit_ret: for label in labels: file_helper.write(LINE_FILE_PATH, str(label)) print(label) print('=' * 80) for predict in predicts: file_helper.write(PREDICT_FILE_PATH, str(predict)) print(predict) del labels[:] del predicts[:] return fit_ret
def train_cnn_hyper(input_s, label_s, init_hypers, graph, saver, is_fit, train_inputs, train_labels, ph_hypers, var_reset_hypers, pack_var_hypers, gradients_hp, optimizer, loss, train_prediction, learning_rate, reset=False): sum_freq = 3 init_hypers = [hyper / norm_list[i] for i, hyper in enumerate(init_hypers)] with tf.Session(graph=graph) as fit_cnn_ses: if os.path.exists(save_path): # Restore variables from disk. saver.restore(fit_cnn_ses, save_path) if reset: print('reset, new hypers:') print(init_hypers) init_feed = dict() init_feed[ph_hypers] = init_hypers tf.initialize_variables(var_list=var_reset_hypers).run( feed_dict=init_feed) # print("Model restored.") else: init_feed = dict() init_feed[ph_hypers] = init_hypers tf.initialize_all_variables().run(feed_dict=init_feed) print('Initialized') num_step_cnt = 1000 f_labels = list() f_features = list() hp_mean_loss = 0 train_ret = False hyper_f = init_hypers grads = None for step in range(num_step_cnt): if train_ret: break if step == 0: hp_input_s = input_s hp_label_s = label_s else: hp_input_s = f_features.pop() hp_label_s = f_labels.pop() f_features.append(hp_label_s[:, :20, :]) # print("*" * 80) # print(hp_input_s) # print(hp_label_s) # print("*" * 80) feed_dict = dict() for i in range(batch_cnt_per_step): feed_dict[train_inputs[i]] = hp_input_s[i] for i in range(batch_cnt_per_step): feed_dict[train_labels[i]] = hp_label_s[i] feed_dict = dict() feed_dict[is_fit] = False for i in range(batch_cnt_per_step): feed_dict[train_inputs[i]] = input_s[i] for i in range(batch_cnt_per_step): feed_dict[train_labels[i]] = label_s[i] feed_dict[ph_hypers] = init_hypers # print(feed_dict) # train grads, _, l, predictions, lr, hyper_f = fit_cnn_ses.run( [ gradients_hp, optimizer, loss, train_prediction, learning_rate, pack_var_hypers ], feed_dict=feed_dict) f_labels.append( predictions.reshape((batch_cnt_per_step, batch_size - hyper_cnt, EMBEDDING_SIZE))) print('fetch_hp:') print(hyper_f) print('gradients:') print(grads) hp_mean_loss += l if step % sum_freq == 0: # print('=' * 35 + 'gradients' + '=' * 35) if step > 0: hp_mean_loss /= sum_freq print('Average loss at step %d: %f learning rate: %f' % (step, hp_mean_loss, lr)) # print(hp_s) hp_diffs = list() for i in range(hyper_cnt): hp_diffs.append( math.fabs( int(hyper_f[i] * norm_list[i]) - int(init_hypers[i] * norm_list[i]))) # 因为只需要一个hyper变化就停止,所以可能一直都是改同一个,所以需要random ran_index = random.randint(0, hyper_cnt - 1) if step <= num_step_cnt / 2 and hp_diffs[ran_index] > 1: if hp_diffs[ran_index] > init_hypers[ ran_index] * norm_list[ran_index] * 0.05: train_ret = True print('=' * 30 + 'hyper in step %d' % step + '=' * 30) print( 'batch_size, depth, num_hidden, layer_sum, patch_size' ) print(hyper_f) print( 'random_index = {ran_index}, hp_diff[random index] = {hp_dif_ridx}' .format(ran_index=ran_index, hp_dif_ridx=hp_diffs[ran_index])) # 到了后期要放宽条件 elif step > num_step_cnt / 2 and hp_diffs[ran_index] > 1: train_ret = True print('=' * 30 + 'hyper in step %d' % step + '=' * 30) print( 'batch_size, depth, num_hidden, layer_sum, patch_size') print(hyper_f) print( 'random_index = {ran_index}, hp_diff[random index] = {hp_dif_ridx}' .format(ran_index=ran_index, hp_dif_ridx=hp_diffs[ran_index])) elif len(filter(math.isnan, grads)) >= hyper_cnt: print('all hyper gradient is nan') print([math.isnan(grad) for grad in grads]) train_ret = True hp_mean_loss = 0 final_hps = hyper_f.reshape([hyper_cnt]).tolist() final_hps = [ final_hp * norm_list[i] for i, final_hp in enumerate(final_hps) ] file_helper.write(HP_FILE_PATH, str(final_hps)) file_helper.write(GRAD_FILE_PATH, str(grads)) return train_ret, final_hps
def train_cnn_hyper(input_s, label_s, init_hypers, graph, saver, is_fit, train_inputs, train_labels, ph_hypers, var_reset_hypers, pack_var_hypers, gradients_hp, optimizer, loss, train_prediction, learning_rate, reset=False): sum_freq = 3 init_hypers = [hyper / norm_list[i] for i, hyper in enumerate(init_hypers)] with tf.Session(graph=graph) as fit_cnn_ses: if os.path.exists(save_path): # Restore variables from disk. saver.restore(fit_cnn_ses, save_path) if reset: print('reset, new hypers:') print(init_hypers) init_feed = dict() init_feed[ph_hypers] = init_hypers tf.initialize_variables(var_list=var_reset_hypers).run(feed_dict=init_feed) # print("Model restored.") else: init_feed = dict() init_feed[ph_hypers] = init_hypers tf.initialize_all_variables().run(feed_dict=init_feed) print('Initialized') num_step_cnt = 1000 f_labels = list() f_features = list() hp_mean_loss = 0 train_ret = False hyper_f = init_hypers grads = None for step in range(num_step_cnt): if train_ret: break if step == 0: hp_input_s = input_s hp_label_s = label_s else: hp_input_s = f_features.pop() hp_label_s = f_labels.pop() f_features.append(hp_label_s[:, :20, :]) # print("*" * 80) # print(hp_input_s) # print(hp_label_s) # print("*" * 80) feed_dict = dict() for i in range(batch_cnt_per_step): feed_dict[train_inputs[i]] = hp_input_s[i] for i in range(batch_cnt_per_step): feed_dict[train_labels[i]] = hp_label_s[i] feed_dict = dict() feed_dict[is_fit] = False for i in range(batch_cnt_per_step): feed_dict[train_inputs[i]] = input_s[i] for i in range(batch_cnt_per_step): feed_dict[train_labels[i]] = label_s[i] feed_dict[ph_hypers] = init_hypers # print(feed_dict) # train grads, _, l, predictions, lr, hyper_f = fit_cnn_ses.run( [gradients_hp, optimizer, loss, train_prediction, learning_rate, pack_var_hypers], feed_dict=feed_dict) f_labels.append(predictions.reshape((batch_cnt_per_step, batch_size - hyper_cnt, EMBEDDING_SIZE))) print('fetch_hp:') print(hyper_f) print('gradients:') print(grads) hp_mean_loss += l if step % sum_freq == 0: # print('=' * 35 + 'gradients' + '=' * 35) if step > 0: hp_mean_loss /= sum_freq print('Average loss at step %d: %f learning rate: %f' % (step, hp_mean_loss, lr)) # print(hp_s) hp_diffs = list() for i in range(hyper_cnt): hp_diffs.append(math.fabs(int(hyper_f[i] * norm_list[i]) - int(init_hypers[i] * norm_list[i]))) # 因为只需要一个hyper变化就停止,所以可能一直都是改同一个,所以需要random ran_index = random.randint(0, hyper_cnt - 1) if step <= num_step_cnt / 2 and hp_diffs[ran_index] > 1: if hp_diffs[ran_index] > init_hypers[ran_index] * norm_list[ran_index] * 0.05: train_ret = True print('=' * 30 + 'hyper in step %d' % step + '=' * 30) print('batch_size, depth, num_hidden, layer_sum, patch_size') print(hyper_f) print('random_index = {ran_index}, hp_diff[random index] = {hp_dif_ridx}'. format(ran_index=ran_index, hp_dif_ridx=hp_diffs[ran_index])) # 到了后期要放宽条件 elif step > num_step_cnt / 2 and hp_diffs[ran_index] > 1: train_ret = True print('=' * 30 + 'hyper in step %d' % step + '=' * 30) print('batch_size, depth, num_hidden, layer_sum, patch_size') print(hyper_f) print('random_index = {ran_index}, hp_diff[random index] = {hp_dif_ridx}'. format(ran_index=ran_index, hp_dif_ridx=hp_diffs[ran_index])) elif len(filter(math.isnan, grads)) >= hyper_cnt: print('all hyper gradient is nan') print([math.isnan(grad) for grad in grads]) train_ret = True hp_mean_loss = 0 final_hps = hyper_f.reshape([hyper_cnt]).tolist() final_hps = [final_hp * norm_list[i] for i, final_hp in enumerate(final_hps)] file_helper.write(HP_FILE_PATH, str(final_hps)) file_helper.write(GRAD_FILE_PATH, str(grads)) return train_ret, final_hps