Example #1
0
 def info_collect(self,
                  hps,
                  grads,
                  stable_loss_predict,
                  stable_loss_label,
                  print_log=True):
     for index, hp_list in enumerate(self.hp_collect):
         hp_list.append(hps[index])
         file_helper.write('hp2trend_hps%d.txt' % index,
                           str(hps[index][0] * self.hp_norms[index]))
     for index, grad_list in enumerate(self.gradient_collect):
         grad_list.append(grads[index])
         file_helper.write('hp2trend_grads%d.txt' % index,
                           str(grads[index]))
     self.stable_loss_predict_collect.append(stable_loss_predict)
     file_helper.write('hp2trend_stable_loss_predict.txt',
                       str(stable_loss_predict))
     self.stable_loss_label_collect.append(stable_loss_label)
     file_helper.write('hp2trend_stable_loss_label.txt',
                       str(stable_loss_label))
     if print_log:
         print('hps')
         print(hps)
         print('grads')
         print(grads)
         print('stable_loss_predict')
         print(stable_loss_predict)
         print('stable_loss_label')
         print(stable_loss_label)
Example #2
0
 def fit(self, input_data, trend):
     if not self.has_init:
         self.norm(input_data)
     norm_hps = [hp / self.hp_norms[i] for i, hp in enumerate(input_data)]
     fit_dict = dict()
     fit_dict[self.is_fit] = True
     fit_dict[self.ph_hypers] = norm_hps
     fit_dict[self.train_label] = trend
     with tf.Session(graph=self.graph) as session:
         self.init_vars(norm_hps, session, not self.has_init)
         _, hps, loss, predict = session.run([self.optimizer, self.tf_hypers, self.loss, self.predict], feed_dict=fit_dict)
         self.saver.save(session, self.save_path)
         if self.collect_counter % 20 == 0:
             self.fit_loss_collect.append(loss)
             file_helper.write('hp2trend_fit_loss.txt', str(loss))
         self.collect_counter += 1
         self.collect_counter %= 5
     if not self.has_init:
         self.has_init = True
Example #3
0
 def fit(self, input_data, trend):
     if not self.has_init:
         self.norm(input_data)
     norm_hps = [hp / self.hp_norms[i] for i, hp in enumerate(input_data)]
     fit_dict = dict()
     fit_dict[self.is_fit] = True
     fit_dict[self.ph_hypers] = norm_hps
     fit_dict[self.train_label] = trend
     with tf.Session(graph=self.graph) as session:
         self.init_vars(norm_hps, session, not self.has_init)
         _, hps, loss, predict = session.run(
             [self.optimizer, self.tf_hypers, self.loss, self.predict],
             feed_dict=fit_dict)
         self.saver.save(session, self.save_path)
         if self.collect_counter % 20 == 0:
             self.fit_loss_collect.append(loss)
             file_helper.write('hp2trend_fit_loss.txt', str(loss))
         self.collect_counter += 1
         self.collect_counter %= 5
     if not self.has_init:
         self.has_init = True
Example #4
0
 def info_collect(self, hps, grads, stable_loss_predict, stable_loss_label, print_log=True):
     for index, hp_list in enumerate(self.hp_collect):
         hp_list.append(hps[index])
         file_helper.write('hp2trend_hps%d.txt' % index, str(hps[index][0] * self.hp_norms[index]))
     for index, grad_list in enumerate(self.gradient_collect):
         grad_list.append(grads[index])
         file_helper.write('hp2trend_grads%d.txt' % index, str(grads[index]))
     self.stable_loss_predict_collect.append(stable_loss_predict)
     file_helper.write('hp2trend_stable_loss_predict.txt', str(stable_loss_predict))
     self.stable_loss_label_collect.append(stable_loss_label)
     file_helper.write('hp2trend_stable_loss_label.txt', str(stable_loss_label))
     if print_log:
         print('hps')
         print(hps)
         print('grads')
         print(grads)
         print('stable_loss_predict')
         print(stable_loss_predict)
         print('stable_loss_label')
         print(stable_loss_label)
Example #5
0
def predict_loss(input_s,
                 label_s,
                 init_hypers,
                 graph,
                 saver,
                 is_fit,
                 train_inputs,
                 train_labels,
                 ph_hypers,
                 optimizer,
                 loss,
                 train_prediction,
                 learning_rate,
                 reset=False):
    init_hypers = [hyper / norm_list[i] for i, hyper in enumerate(init_hypers)]
    with tf.Session(graph=graph) as fit_cnn_ses:
        if os.path.exists(save_path):
            # Restore variables from disk.
            saver.restore(fit_cnn_ses, save_path)
            if reset:
                print('reset, new hypers:')
                print(init_hypers)
                init_feed = dict()
                init_feed[ph_hypers] = init_hypers
                tf.initialize_all_variables().run(feed_dict=init_feed)
        else:
            init_feed = dict()
            init_feed[ph_hypers] = init_hypers
            tf.initialize_all_variables().run(feed_dict=init_feed)
            print('Initialized')

        cur_idx = 0
        f_labels = list()
        f_features = list()
        log_labels = list()
        end_train = False
        x_s = np.array([float(i) for i in range(batch_size - hyper_cnt)])
        while True:
            if end_train:
                break
            if cur_idx == 0:
                hp_input_s = input_s
                hp_label_s = label_s
            else:
                hp_input_s = f_features.pop()
                hp_label_s = f_labels.pop()
            f_features.append(hp_label_s[:, :20, :])
            feed_dict = dict()
            for i in range(batch_cnt_per_step):
                feed_dict[train_inputs[i]] = hp_input_s[i]
            for i in range(batch_cnt_per_step):
                feed_dict[train_labels[i]] = hp_label_s[i]
            feed_dict = dict()
            feed_dict[is_fit] = False
            for i in range(batch_cnt_per_step):
                feed_dict[train_inputs[i]] = input_s[i]
            for i in range(batch_cnt_per_step):
                feed_dict[train_labels[i]] = label_s[i]
            feed_dict[ph_hypers] = init_hypers
            _, l, predictions, lr = fit_cnn_ses.run(
                [optimizer, loss, train_prediction, learning_rate],
                feed_dict=feed_dict)
            f_labels.append(
                predictions.reshape((batch_cnt_per_step,
                                     batch_size - hyper_cnt, EMBEDDING_SIZE)))

            def residuals(p, x, y):
                return p[0] * x + p[1] - y

            p0 = [-1.0, 1.0]
            predict_losses = predictions.reshape(
                [batch_cnt_per_step, batch_size - hyper_cnt])
            plsq = leastsq(residuals, p0, args=(x_s, predict_losses[-1]))
            k = math.fabs(plsq[0][0])
            print(k)

            cur_idx += 1
            if k < 0.1 and k < np.mean(predict_losses[-1]):
                end_train = True
                for predict in predictions.reshape(
                    (batch_cnt_per_step * (batch_size - hyper_cnt))).tolist():
                    log_labels.append(predict)
            else:
                log_labels.append(predict_losses[0][0])
        for predict in log_labels:
            file_helper.write(PREDICT_FILE_PATH, str(predict))
        file_helper.write(PREDICT_FILE_PATH, '===')
    # 返回结果与预测次数
    return end_train, cur_idx
Example #6
0
def fit_cnn_loss(input_s,
                 label_s,
                 hyper_s,
                 graph,
                 saver,
                 is_fit,
                 train_inputs,
                 train_labels,
                 ph_hypers,
                 optimizer,
                 loss,
                 train_prediction,
                 learning_rate,
                 reset=False,
                 train_hyper=False):
    global hyper_cnt
    global batch_size
    norm(hyper_s)
    hyper_s = [hyper / norm_list[i] for i, hyper in enumerate(hyper_s)]
    hyper_cnt = len(hyper_s)
    global step
    global save_path
    sum_freq = 3
    global labels
    global predicts
    global mean_loss_vary_cnt
    if reset == 1:
        step = 0
    fit_ret = False
    with tf.Session(graph=graph) as fit_cnn_ses:
        if os.path.exists(save_path):
            # Restore variables from disk.
            saver.restore(fit_cnn_ses, save_path)
            if reset:
                print('reset, new hypers:')
                print(hyper_s)
                init_feed = dict()
                init_feed[ph_hypers] = hyper_s
                tf.initialize_all_variables().run(feed_dict=init_feed)
                # print("Model restored.")
        else:
            init_feed = dict()
            init_feed[ph_hypers] = hyper_s
            tf.initialize_all_variables().run(feed_dict=init_feed)
            print('Initialized')

        global mean_loss
        mean_loss = 0
        # prepare and feed train data
        feed_dict = dict()
        feed_dict[is_fit] = train_hyper
        for i in range(batch_cnt_per_step):
            feed_dict[train_inputs[i]] = input_s[i]
        for i in range(batch_cnt_per_step):
            feed_dict[train_labels[i]] = label_s[i]
        feed_dict[ph_hypers] = hyper_s
        # print(feed_dict)
        # train
        _, l, predictions, lr = fit_cnn_ses.run(
            [optimizer, loss, train_prediction, learning_rate],
            feed_dict=feed_dict)
        mean_loss += l
        # 每次只有第一个loss是有意义的
        # print('label_s')
        # print(label_s)
        labels.append(
            label_s.reshape(batch_cnt_per_step *
                            (batch_size - hyper_cnt)).tolist()[0])
        predicts.append(
            predictions.reshape(batch_cnt_per_step *
                                (batch_size - hyper_cnt)).tolist()[0])
        if step % sum_freq == 0:
            # 次数为奇偶时梯度呈现翻转,所以不能固定在偶数时验证
            fit_verify = random.randint(9, 10)
            if step > 0 and step % (sum_freq * fit_verify) == 0:
                mean_loss /= sum_freq
                # 唯有连续3次损失小于label的5%时才认为可停止
                if mean_loss < np.mean(label_s) * 0.15 and mean_loss < np.mean(
                        predictions) * 0.15:
                    mean_loss_vary_cnt += 1
                else:
                    mean_loss_vary_cnt = 0
                if mean_loss_vary_cnt >= 5:
                    fit_ret = True
                    print('mean loss < label_s * 10%')
                print(mean_loss)
                print(np.mean(label_s))
                print('Average loss at step %d: %f learning rate: %f' %
                      (step, mean_loss, lr))
                mean_loss = 0

        # some work for conclusion
        step += 1
        saver.save(fit_cnn_ses, save_path)
        # print("Model saved in file: %s" % save_path)
        if fit_ret:
            for label in labels:
                file_helper.write(LINE_FILE_PATH, str(label))
                print(label)
            print('=' * 80)
            for predict in predicts:
                file_helper.write(PREDICT_FILE_PATH, str(predict))
                print(predict)
            del labels[:]
            del predicts[:]
        return fit_ret
Example #7
0
def predict_loss(input_s, label_s, init_hypers,
                 graph, saver, is_fit,
                 train_inputs, train_labels, ph_hypers,
                 optimizer, loss, train_prediction, learning_rate,
                 reset=False):
    init_hypers = [hyper / norm_list[i] for i, hyper in enumerate(init_hypers)]
    with tf.Session(graph=graph) as fit_cnn_ses:
        if os.path.exists(save_path):
            # Restore variables from disk.
            saver.restore(fit_cnn_ses, save_path)
            if reset:
                print('reset, new hypers:')
                print(init_hypers)
                init_feed = dict()
                init_feed[ph_hypers] = init_hypers
                tf.initialize_all_variables().run(feed_dict=init_feed)
        else:
            init_feed = dict()
            init_feed[ph_hypers] = init_hypers
            tf.initialize_all_variables().run(feed_dict=init_feed)
            print('Initialized')

        cur_idx = 0
        f_labels = list()
        f_features = list()
        log_labels = list()
        end_train = False
        x_s = np.array([float(i) for i in range(batch_size - hyper_cnt)])
        while True:
            if end_train:
                break
            if cur_idx == 0:
                hp_input_s = input_s
                hp_label_s = label_s
            else:
                hp_input_s = f_features.pop()
                hp_label_s = f_labels.pop()
            f_features.append(hp_label_s[:, :20, :])
            feed_dict = dict()
            for i in range(batch_cnt_per_step):
                feed_dict[train_inputs[i]] = hp_input_s[i]
            for i in range(batch_cnt_per_step):
                feed_dict[train_labels[i]] = hp_label_s[i]
            feed_dict = dict()
            feed_dict[is_fit] = False
            for i in range(batch_cnt_per_step):
                feed_dict[train_inputs[i]] = input_s[i]
            for i in range(batch_cnt_per_step):
                feed_dict[train_labels[i]] = label_s[i]
            feed_dict[ph_hypers] = init_hypers
            _, l, predictions, lr = fit_cnn_ses.run(
                [optimizer, loss, train_prediction, learning_rate], feed_dict=feed_dict)
            f_labels.append(predictions.reshape((batch_cnt_per_step, batch_size - hyper_cnt, EMBEDDING_SIZE)))

            def residuals(p, x, y):
                return p[0] * x + p[1] - y

            p0 = [-1.0, 1.0]
            predict_losses = predictions.reshape([batch_cnt_per_step, batch_size - hyper_cnt])
            plsq = leastsq(residuals, p0, args=(x_s, predict_losses[-1]))
            k = math.fabs(plsq[0][0])
            print(k)

            cur_idx += 1
            if k < 0.1 and k < np.mean(predict_losses[-1]):
                end_train = True
                for predict in predictions.reshape((batch_cnt_per_step * (batch_size - hyper_cnt))).tolist():
                    log_labels.append(predict)
            else:
                log_labels.append(predict_losses[0][0])
        for predict in log_labels:
            file_helper.write(PREDICT_FILE_PATH, str(predict))
        file_helper.write(PREDICT_FILE_PATH, '===')
    # 返回结果与预测次数
    return end_train, cur_idx
Example #8
0
def fit_cnn_loss(input_s, label_s, hyper_s,
                 graph, saver, is_fit,
                 train_inputs, train_labels, ph_hypers,
                 optimizer, loss, train_prediction, learning_rate,
                 reset=False, train_hyper=False):
    global hyper_cnt
    global batch_size
    norm(hyper_s)
    hyper_s = [hyper / norm_list[i] for i, hyper in enumerate(hyper_s)]
    hyper_cnt = len(hyper_s)
    global step
    global save_path
    sum_freq = 3
    global labels
    global predicts
    global mean_loss_vary_cnt
    if reset == 1:
        step = 0
    fit_ret = False
    with tf.Session(graph=graph) as fit_cnn_ses:
        if os.path.exists(save_path):
            # Restore variables from disk.
            saver.restore(fit_cnn_ses, save_path)
            if reset:
                print('reset, new hypers:')
                print(hyper_s)
                init_feed = dict()
                init_feed[ph_hypers] = hyper_s
                tf.initialize_all_variables().run(feed_dict=init_feed)
                # print("Model restored.")
        else:
            init_feed = dict()
            init_feed[ph_hypers] = hyper_s
            tf.initialize_all_variables().run(feed_dict=init_feed)
            print('Initialized')

        global mean_loss
        mean_loss = 0
        # prepare and feed train data
        feed_dict = dict()
        feed_dict[is_fit] = train_hyper
        for i in range(batch_cnt_per_step):
            feed_dict[train_inputs[i]] = input_s[i]
        for i in range(batch_cnt_per_step):
            feed_dict[train_labels[i]] = label_s[i]
        feed_dict[ph_hypers] = hyper_s
        # print(feed_dict)
        # train
        _, l, predictions, lr = fit_cnn_ses.run(
            [optimizer, loss, train_prediction, learning_rate], feed_dict=feed_dict)
        mean_loss += l
        # 每次只有第一个loss是有意义的
        # print('label_s')
        # print(label_s)
        labels.append(label_s.reshape(batch_cnt_per_step * (batch_size - hyper_cnt)).tolist()[0])
        predicts.append(predictions.reshape(batch_cnt_per_step * (batch_size - hyper_cnt)).tolist()[0])
        if step % sum_freq == 0:
            # 次数为奇偶时梯度呈现翻转,所以不能固定在偶数时验证
            fit_verify = random.randint(9, 10)
            if step > 0 and step % (sum_freq * fit_verify) == 0:
                mean_loss /= sum_freq
                # 唯有连续3次损失小于label的5%时才认为可停止
                if mean_loss < np.mean(label_s) * 0.15 and mean_loss < np.mean(predictions) * 0.15:
                    mean_loss_vary_cnt += 1
                else:
                    mean_loss_vary_cnt = 0
                if mean_loss_vary_cnt >= 5:
                    fit_ret = True
                    print('mean loss < label_s * 10%')
                print(mean_loss)
                print(np.mean(label_s))
                print('Average loss at step %d: %f learning rate: %f' % (step, mean_loss, lr))
                mean_loss = 0

        # some work for conclusion
        step += 1
        saver.save(fit_cnn_ses, save_path)
        # print("Model saved in file: %s" % save_path)
        if fit_ret:
            for label in labels:
                file_helper.write(LINE_FILE_PATH, str(label))
                print(label)
            print('=' * 80)
            for predict in predicts:
                file_helper.write(PREDICT_FILE_PATH, str(predict))
                print(predict)
            del labels[:]
            del predicts[:]
        return fit_ret
Example #9
0
def train_cnn_hyper(input_s,
                    label_s,
                    init_hypers,
                    graph,
                    saver,
                    is_fit,
                    train_inputs,
                    train_labels,
                    ph_hypers,
                    var_reset_hypers,
                    pack_var_hypers,
                    gradients_hp,
                    optimizer,
                    loss,
                    train_prediction,
                    learning_rate,
                    reset=False):
    sum_freq = 3
    init_hypers = [hyper / norm_list[i] for i, hyper in enumerate(init_hypers)]
    with tf.Session(graph=graph) as fit_cnn_ses:
        if os.path.exists(save_path):
            # Restore variables from disk.
            saver.restore(fit_cnn_ses, save_path)
            if reset:
                print('reset, new hypers:')
                print(init_hypers)
                init_feed = dict()
                init_feed[ph_hypers] = init_hypers
                tf.initialize_variables(var_list=var_reset_hypers).run(
                    feed_dict=init_feed)
                # print("Model restored.")
        else:
            init_feed = dict()
            init_feed[ph_hypers] = init_hypers
            tf.initialize_all_variables().run(feed_dict=init_feed)
            print('Initialized')

        num_step_cnt = 1000
        f_labels = list()
        f_features = list()
        hp_mean_loss = 0
        train_ret = False
        hyper_f = init_hypers
        grads = None
        for step in range(num_step_cnt):
            if train_ret:
                break
            if step == 0:
                hp_input_s = input_s
                hp_label_s = label_s
            else:
                hp_input_s = f_features.pop()
                hp_label_s = f_labels.pop()
            f_features.append(hp_label_s[:, :20, :])
            # print("*" * 80)
            # print(hp_input_s)
            # print(hp_label_s)
            # print("*" * 80)
            feed_dict = dict()
            for i in range(batch_cnt_per_step):
                feed_dict[train_inputs[i]] = hp_input_s[i]
            for i in range(batch_cnt_per_step):
                feed_dict[train_labels[i]] = hp_label_s[i]
            feed_dict = dict()
            feed_dict[is_fit] = False
            for i in range(batch_cnt_per_step):
                feed_dict[train_inputs[i]] = input_s[i]
            for i in range(batch_cnt_per_step):
                feed_dict[train_labels[i]] = label_s[i]
            feed_dict[ph_hypers] = init_hypers
            # print(feed_dict)
            # train
            grads, _, l, predictions, lr, hyper_f = fit_cnn_ses.run(
                [
                    gradients_hp, optimizer, loss, train_prediction,
                    learning_rate, pack_var_hypers
                ],
                feed_dict=feed_dict)
            f_labels.append(
                predictions.reshape((batch_cnt_per_step,
                                     batch_size - hyper_cnt, EMBEDDING_SIZE)))
            print('fetch_hp:')
            print(hyper_f)
            print('gradients:')
            print(grads)
            hp_mean_loss += l
            if step % sum_freq == 0:
                # print('=' * 35 + 'gradients' + '=' * 35)
                if step > 0:
                    hp_mean_loss /= sum_freq
                print('Average loss at step %d: %f learning rate: %f' %
                      (step, hp_mean_loss, lr))
                # print(hp_s)
                hp_diffs = list()
                for i in range(hyper_cnt):
                    hp_diffs.append(
                        math.fabs(
                            int(hyper_f[i] * norm_list[i]) -
                            int(init_hypers[i] * norm_list[i])))
                # 因为只需要一个hyper变化就停止,所以可能一直都是改同一个,所以需要random
                ran_index = random.randint(0, hyper_cnt - 1)
                if step <= num_step_cnt / 2 and hp_diffs[ran_index] > 1:
                    if hp_diffs[ran_index] > init_hypers[
                            ran_index] * norm_list[ran_index] * 0.05:
                        train_ret = True
                        print('=' * 30 + 'hyper in step %d' % step + '=' * 30)
                        print(
                            'batch_size, depth, num_hidden, layer_sum, patch_size'
                        )
                        print(hyper_f)
                        print(
                            'random_index = {ran_index}, hp_diff[random index] = {hp_dif_ridx}'
                            .format(ran_index=ran_index,
                                    hp_dif_ridx=hp_diffs[ran_index]))
                # 到了后期要放宽条件
                elif step > num_step_cnt / 2 and hp_diffs[ran_index] > 1:
                    train_ret = True
                    print('=' * 30 + 'hyper in step %d' % step + '=' * 30)
                    print(
                        'batch_size, depth, num_hidden, layer_sum, patch_size')
                    print(hyper_f)
                    print(
                        'random_index = {ran_index}, hp_diff[random index] = {hp_dif_ridx}'
                        .format(ran_index=ran_index,
                                hp_dif_ridx=hp_diffs[ran_index]))
                elif len(filter(math.isnan, grads)) >= hyper_cnt:
                    print('all hyper gradient is nan')
                    print([math.isnan(grad) for grad in grads])
                    train_ret = True

            hp_mean_loss = 0
        final_hps = hyper_f.reshape([hyper_cnt]).tolist()
        final_hps = [
            final_hp * norm_list[i] for i, final_hp in enumerate(final_hps)
        ]
        file_helper.write(HP_FILE_PATH, str(final_hps))
        file_helper.write(GRAD_FILE_PATH, str(grads))
    return train_ret, final_hps
Example #10
0
def train_cnn_hyper(input_s, label_s, init_hypers,
                    graph, saver, is_fit,
                    train_inputs, train_labels, ph_hypers, var_reset_hypers, pack_var_hypers,
                    gradients_hp, optimizer, loss, train_prediction, learning_rate,
                    reset=False):
    sum_freq = 3
    init_hypers = [hyper / norm_list[i] for i, hyper in enumerate(init_hypers)]
    with tf.Session(graph=graph) as fit_cnn_ses:
        if os.path.exists(save_path):
            # Restore variables from disk.
            saver.restore(fit_cnn_ses, save_path)
            if reset:
                print('reset, new hypers:')
                print(init_hypers)
                init_feed = dict()
                init_feed[ph_hypers] = init_hypers
                tf.initialize_variables(var_list=var_reset_hypers).run(feed_dict=init_feed)
                # print("Model restored.")
        else:
            init_feed = dict()
            init_feed[ph_hypers] = init_hypers
            tf.initialize_all_variables().run(feed_dict=init_feed)
            print('Initialized')

        num_step_cnt = 1000
        f_labels = list()
        f_features = list()
        hp_mean_loss = 0
        train_ret = False
        hyper_f = init_hypers
        grads = None
        for step in range(num_step_cnt):
            if train_ret:
                break
            if step == 0:
                hp_input_s = input_s
                hp_label_s = label_s
            else:
                hp_input_s = f_features.pop()
                hp_label_s = f_labels.pop()
            f_features.append(hp_label_s[:, :20, :])
            # print("*" * 80)
            # print(hp_input_s)
            # print(hp_label_s)
            # print("*" * 80)
            feed_dict = dict()
            for i in range(batch_cnt_per_step):
                feed_dict[train_inputs[i]] = hp_input_s[i]
            for i in range(batch_cnt_per_step):
                feed_dict[train_labels[i]] = hp_label_s[i]
            feed_dict = dict()
            feed_dict[is_fit] = False
            for i in range(batch_cnt_per_step):
                feed_dict[train_inputs[i]] = input_s[i]
            for i in range(batch_cnt_per_step):
                feed_dict[train_labels[i]] = label_s[i]
            feed_dict[ph_hypers] = init_hypers
            # print(feed_dict)
            # train
            grads, _, l, predictions, lr, hyper_f = fit_cnn_ses.run(
                [gradients_hp, optimizer, loss, train_prediction, learning_rate, pack_var_hypers], feed_dict=feed_dict)
            f_labels.append(predictions.reshape((batch_cnt_per_step, batch_size - hyper_cnt, EMBEDDING_SIZE)))
            print('fetch_hp:')
            print(hyper_f)
            print('gradients:')
            print(grads)
            hp_mean_loss += l
            if step % sum_freq == 0:
                # print('=' * 35 + 'gradients' + '=' * 35)
                if step > 0:
                    hp_mean_loss /= sum_freq
                print('Average loss at step %d: %f learning rate: %f' % (step, hp_mean_loss, lr))
                # print(hp_s)
                hp_diffs = list()
                for i in range(hyper_cnt):
                    hp_diffs.append(math.fabs(int(hyper_f[i] * norm_list[i]) - int(init_hypers[i] * norm_list[i])))
                # 因为只需要一个hyper变化就停止,所以可能一直都是改同一个,所以需要random
                ran_index = random.randint(0, hyper_cnt - 1)
                if step <= num_step_cnt / 2 and hp_diffs[ran_index] > 1:
                    if hp_diffs[ran_index] > init_hypers[ran_index] * norm_list[ran_index] * 0.05:
                        train_ret = True
                        print('=' * 30 + 'hyper in step %d' % step + '=' * 30)
                        print('batch_size, depth, num_hidden, layer_sum, patch_size')
                        print(hyper_f)
                        print('random_index = {ran_index}, hp_diff[random index] = {hp_dif_ridx}'.
                              format(ran_index=ran_index, hp_dif_ridx=hp_diffs[ran_index]))
                # 到了后期要放宽条件
                elif step > num_step_cnt / 2 and hp_diffs[ran_index] > 1:
                    train_ret = True
                    print('=' * 30 + 'hyper in step %d' % step + '=' * 30)
                    print('batch_size, depth, num_hidden, layer_sum, patch_size')
                    print(hyper_f)
                    print('random_index = {ran_index}, hp_diff[random index] = {hp_dif_ridx}'.
                          format(ran_index=ran_index, hp_dif_ridx=hp_diffs[ran_index]))
                elif len(filter(math.isnan, grads)) >= hyper_cnt:
                    print('all hyper gradient is nan')
                    print([math.isnan(grad) for grad in grads])
                    train_ret = True

            hp_mean_loss = 0
        final_hps = hyper_f.reshape([hyper_cnt]).tolist()
        final_hps = [final_hp * norm_list[i] for i, final_hp in enumerate(final_hps)]
        file_helper.write(HP_FILE_PATH, str(final_hps))
        file_helper.write(GRAD_FILE_PATH, str(grads))
    return train_ret, final_hps