def read_data(round):
    # ------------------------------ data name --------------------------------------
    train_sample_filepaths, train_labels_filepaths, \
    eval_sample_filepaths, eval_labels_filepaths, \
    test_sample_filepaths, test_labels_filepaths, prior_dic = read_train_eval_test_filename(round * 0.1,
                                                                                            round * 0.1 + 0.1)

    train_batchs_perE = math.ceil(
        np.shape(train_sample_filepaths)[0] / D.batch_size)
    eval_batchs_perE = math.ceil(
        np.shape(eval_sample_filepaths)[0] / D.batch_size)
    test_batchs_perE = math.ceil(
        np.shape(test_sample_filepaths)[0] / D.batch_size)

    logger.info(
        "batch_size: %d\n "
        "train sample: %d, %d batchs every train epoch\n "
        "valid sample: %d, %d batchs every valid epoch\n "
        "test sample: %d, %d batchs every test epoch\n", D.batch_size,
        np.shape(train_sample_filepaths)[0], train_batchs_perE,
        np.shape(eval_sample_filepaths)[0], eval_batchs_perE,
        np.shape(test_sample_filepaths)[0], test_batchs_perE)

    # ---------------------------- read epoch data ----------------------------------
    logger.info("read all train data begin")
    load_data_begin = time.time()

    features, labels = read_files(train_sample_filepaths,
                                  train_labels_filepaths, prior_dic, -5)
    # (?, h, w, sC)  (?, h, w, outC)
    eval_features, eval_labels = read_files(eval_sample_filepaths,
                                            eval_labels_filepaths, prior_dic,
                                            -5)
    test_features, test_labels = read_files(test_sample_filepaths,
                                            test_labels_filepaths, prior_dic,
                                            -5)

    np.save(
        os.path.join(D.result_dir,
                     'test_features_' + D.input_dataset + '-' + str(round)),
        test_features)
    np.save(
        os.path.join(D.result_dir,
                     'test_labels_' + D.input_dataset + '-' + str(round)),
        test_labels)

    read_elapse = time.time() - load_data_begin
    logger.info("read all train data end, read_elapse=%.4fs", read_elapse)

    filepaths = np.array(np.char.split(test_sample_filepaths, '.').tolist())
    filenames = [os.path.split(filepath)[-1] for filepath in filepaths[:, -2]]

    logger.debug("test_features: %s, test_labels: %s",
                 str(np.shape(test_features)), str(np.shape(test_labels)))

    return test_features, test_labels, test_batchs_perE, filenames
Example #2
0
def train_gpu(h, w, model_dir, round):
    # ------------------------------ graph --------------------------------------
    with tf.Graph().as_default() as g, tf.device('/cpu:0'):
        names = locals()
        model = MultiStageModel(dict(D))
        epo = tf.placeholder(tf.float32, shape=[], name="epoch")
        is_training = tf.placeholder(tf.bool, shape=[], name="is_training")

        # decayed_learning_rate = learning_rate * decay_rate ^ (global_step / decay_steps)
        decayed_lr = tf.train.exponential_decay(D.learning_rate,
                                                epo,
                                                D.decay_epochs,
                                                D.decay_rate,
                                                staircase=True)
        global_step = tf.Variable(0, name='global_step', trainable=False)
        trian_optimizer = tf.train.AdamOptimizer(learning_rate=decayed_lr)
        grad_list = []

        xlist, ylist, y_list, losslist, filename_list = [], [], [], [], []
        layers_sorted_and_feature, layerRes, layerout = [], [], []

        with tf.variable_scope(tf.get_variable_scope()):
            reuse = False
            for i in range(D.num_gpus):
                if i > 0: reuse = True
                with tf.device("/gpu:%d" % i), tf.name_scope('GPU_%d' %
                                                             i) as scope:
                    names['xs%d' % i] = tf.placeholder(
                        tf.float32,
                        shape=[None, h, w, D.splited_channel],
                        name="x")
                    names['y%d' % i] = tf.placeholder(
                        tf.float32,
                        shape=[None, h, w, D.out_channel],
                        name="y")
                    names['mask%d' % i] = tf.placeholder(
                        tf.float32,
                        shape=[None, h, w, D.out_channel],
                        name="mask")

                    names['y_%d' % i], names['layers_sorted_and_feature%d' % i], \
                    names['layerRes%d' % i], names['layerout%d' % i] = model.multilayer_redense(names['xs%d' % i],
                                                                                                training=is_training,
                                                                                                reuse=reuse)
                    loss_dic = model.compute_loss(names['y%d' % i],
                                                  names['y_%d' % i],
                                                  names['mask%d' % i],
                                                  names['layerout%d' % i])

                    logger.debug("gpu %d, y_ shape: %s", i,
                                 names['y_%d' % i].get_shape().as_list())
                    logger.debug("gpu %d, variables_list len: %s" %
                                 (i, str(len(model.train_variables))))

                    names['filename%d' % i] = tf.placeholder(tf.string,
                                                             shape=[
                                                                 None,
                                                             ],
                                                             name="filename")

                    # Retain the summaries from the final tower.
                    summaries = tf.get_collection(tf.GraphKeys.SUMMARIES,
                                                  scope)

                    grad = trian_optimizer.compute_gradients(loss_dic["loss"])
                    grad_list.append(grad)

                    xlist.append(names['xs%d' % i])
                    ylist.append(names['y%d' % i])
                    filename_list.append(names['filename%d' % i])

                    y_list.append(names['y_%d' % i])
                    losslist.append(loss_dic)

                    layers_sorted_and_feature.append(
                        names['layers_sorted_and_feature%d' % i])
                    layerRes.append(names['layerRes%d' % i])
                    layerout.append(names['layerout%d' % i])

        ave_grad = _average_gradients(grad_list)
        train_op = trian_optimizer.apply_gradients(ave_grad,
                                                   global_step=global_step)

        # # Add histograms for gradients.
        # for grad, var in ave_grad:
        #     if grad is not None:
        #         summaries.append(tf.summary.histogram('gradients/' + var.op.name, grad))
        # kernel_store = {}
        # # Add histograms for trainable variables.
        # for var in tf.trainable_variables():
        #     summaries.append(tf.summary.histogram('variables/' + var.op.name, var))
        #     logger.info("Var %s\nvar op %s", str(var), str(var.op).replace('\n', ''))
        #     if 'conv2d' and 'kernel' in var.op.name:  # (k_s, k_s, inc, num_features)
        #         kernel_store[var.op.name] = var

        summary_op = tf.summary.merge(summaries)
        saver = tf.train.Saver(tf.global_variables())
        init_op = tf.global_variables_initializer()

    # ----------- count parameters -------------------
    logger.debug(
        str(xlist) + '\n' + str(ylist) + '\n' + str(y_list) + '\n' +
        str(losslist))
    logger.debug(
        str([names['mask0'], names['mask1'], names['mask2'], names['mask3']]) +
        '\n' + str(filename_list))
    logger.debug(
        str(epo) + '\n' + str(is_training) + '\n' + str(global_step) + '\n' +
        str(train_op).replace('\n', ''))

    # ------------------------------ data name --------------------------------------
    train_sample_filepaths, train_labels_filepaths, \
    eval_sample_filepaths, eval_labels_filepaths, \
    test_sample_filepaths, test_labels_filepaths, prior_dic = read_train_eval_test_filename(round * 0.1,
                                                                                            round * 0.1 + 0.1)

    train_batchs_perE = math.ceil(
        np.shape(train_sample_filepaths)[0] / D.batch_size)
    eval_batchs_perE = math.ceil(
        np.shape(eval_sample_filepaths)[0] / D.batch_size)
    test_batchs_perE = math.ceil(
        np.shape(test_sample_filepaths)[0] / D.batch_size)

    logger.info(
        "batch_size: %d\n "
        "train sample: %d, %d batchs every train epoch\n "
        "valid sample: %d, %d batchs every valid epoch\n "
        "test sample: %d, %d batchs every test epoch\n", D.batch_size,
        np.shape(train_sample_filepaths)[0], train_batchs_perE,
        np.shape(eval_sample_filepaths)[0], eval_batchs_perE,
        np.shape(test_sample_filepaths)[0], test_batchs_perE)

    # ---------------------------- read epoch data ----------------------------------
    logger.info("read all train data begin")
    load_data_begin = time.time()

    if os.path.exists(
            os.path.join(
                D.data_dir,
                'features_' + D.input_dataset + '-' + str(round) + '.npy')):
        features = np.load(
            os.path.join(
                D.data_dir,
                'features_' + D.input_dataset + '-' + str(round) + '.npy'))
        labels = np.load(
            os.path.join(
                D.data_dir,
                'labels_' + D.input_dataset + '-' + str(round) + '.npy'))
        eval_features = np.load(
            os.path.join(
                D.data_dir, 'eval_features_' + D.input_dataset + '-' +
                str(round) + '.npy'))
        eval_labels = np.load(
            os.path.join(
                D.data_dir,
                'eval_labels_' + D.input_dataset + '-' + str(round) + '.npy'))
        test_features = np.load(
            os.path.join(
                D.data_dir, 'test_features_' + D.input_dataset + '-' +
                str(round) + '.npy'))
        test_labels = np.load(
            os.path.join(
                D.data_dir,
                'test_labels_' + D.input_dataset + '-' + str(round) + '.npy'))
    else:
        features, labels = read_files(train_sample_filepaths,
                                      train_labels_filepaths, prior_dic, -5)
        # (?, h, w, sC)  (?, h, w, outC)
        eval_features, eval_labels = read_files(eval_sample_filepaths,
                                                eval_labels_filepaths,
                                                prior_dic, -5)
        test_features, test_labels = read_files(test_sample_filepaths,
                                                test_labels_filepaths,
                                                prior_dic, -5)

        np.save(
            os.path.join(D.data_dir,
                         'features_' + D.input_dataset + '-' + str(round)),
            features)
        np.save(
            os.path.join(D.data_dir,
                         'labels_' + D.input_dataset + '-' + str(round)),
            labels)
        np.save(
            os.path.join(D.data_dir, 'eval_features_' + D.input_dataset + '-' +
                         str(round)), eval_features)
        np.save(
            os.path.join(D.data_dir,
                         'eval_labels_' + D.input_dataset + '-' + str(round)),
            eval_labels)
        np.save(
            os.path.join(D.data_dir, 'test_features_' + D.input_dataset + '-' +
                         str(round)), test_features)
        np.save(
            os.path.join(D.data_dir,
                         'test_labels_' + D.input_dataset + '-' + str(round)),
            test_labels)

    read_elapse = time.time() - load_data_begin
    logger.info("read all train data end, read_elapse=%.4fs", read_elapse)

    # ------------------------------train session--------------------------------------
    config = tf.ConfigProto(
        allow_soft_placement=True)  # , log_device_placement=True
    config.gpu_options.allow_growth = True
    with tf.Session(graph=g, config=config) as sess:
        sess.run(init_op)

        graph_def = sess.graph.as_graph_def(add_shapes=True)
        train_summary_writer = tf.summary.FileWriter(
            model_dir + "train")  # , graph_def=graph_def
        valid_summary_writer = tf.summary.FileWriter(model_dir + "valid")
        test_summary_writer = tf.summary.FileWriter(model_dir + "test")

        # run op
        run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
        run_metadata = tf.RunMetadata()

        logger.info("training begin")
        total_step = 0
        indices = np.arange(0, np.shape(train_sample_filepaths)[0])
        epoch_result = open(os.path.join(model_dir, 'valid_log.txt'), 'a')
        total_step_loss = open(os.path.join(model_dir, 'step_loss.txt'), 'a')

        for epoch in range(D.epochs):
            np.random.shuffle(indices)
            features = np.take(features, indices, axis=0)
            labels = np.take(labels, indices, axis=0)

            filepaths = np.array(
                np.char.split(np.take(train_sample_filepaths, indices, axis=0),
                              '.').tolist())
            filenames = [
                os.path.split(filepath)[-1] for filepath in filepaths[:, -2]
            ]

            epoch_xx, epoch_yy, epoch_yy_ = [], [], []
            epoch_l = {}

            epoch_begin = time.time()
            tbp = copy.copy(train_batchs_perE)
            for b in range(train_batchs_perE):  # train_batchs_perE
                batch_begin = b * D.batch_size

                # avoid empty batch
                tower_size = math.floor(D.batch_size / D.num_gpus)
                if (b == train_batchs_perE - 1):
                    final_b_sample = np.shape(features[b * D.batch_size:])[0]
                    if final_b_sample < D.num_gpus:
                        tbp -= 1
                        break
                    else:
                        tower_size = math.floor(final_b_sample / D.num_gpus)

                step_begin = time.time()
                _, ldic, xx, yy, yy_, batch_filenames, train_summary_str, \
                layer_sd_and_f, lmhsRes, out_lmhs = sess.run(
                    [train_op, losslist, xlist, ylist, y_list, filename_list, summary_op,
                     layers_sorted_and_feature, layerRes, layerout],
                    feed_dict={
                        names['xs0']: features[batch_begin:(batch_begin + tower_size)],
                        names['y0']: labels[batch_begin:(batch_begin + tower_size)],
                        names['mask0']: (labels[batch_begin:(batch_begin + tower_size)] != -5),
                        names['filename0']: filenames[batch_begin:(batch_begin + tower_size)],

                        names['xs1']: features[(batch_begin + tower_size):(batch_begin + 2 * tower_size)],
                        names['y1']: labels[(batch_begin + tower_size):(batch_begin + 2 * tower_size)],
                        names['mask1']: (labels[(batch_begin + tower_size):(batch_begin + 2 * tower_size)] != -5),
                        names['filename1']: filenames[(batch_begin + tower_size):(batch_begin + 2 * tower_size)],

                        names['xs2']: features[(batch_begin + 2 * tower_size):(batch_begin + 3 * tower_size)],
                        names['y2']: labels[(batch_begin + 2 * tower_size):(batch_begin + 3 * tower_size)],
                        names['mask2']: (labels[(batch_begin + 2 * tower_size):(batch_begin + 3 * tower_size)] != -5),
                        names['filename2']: filenames[(batch_begin + 2 * tower_size):(batch_begin + 3 * tower_size)],

                        names['xs3']: features[(batch_begin + 3 * tower_size):(batch_begin + D.batch_size)],
                        names['y3']: labels[(batch_begin + 3 * tower_size):(batch_begin + D.batch_size)],
                        names['mask3']: (labels[(batch_begin + 3 * tower_size):(batch_begin + D.batch_size)] != -5),
                        names['filename3']: filenames[(batch_begin + 3 * tower_size):(batch_begin + D.batch_size)],

                        epo: epoch, is_training: True},
                    options=run_options, run_metadata=run_metadata)
                step_end = time.time()

                ####### B=bs/gpunum
                ## xx           list(len=gpunum) element:numpy (B, h, w, sC)
                ## yy, yy_      list(len=gpunum) element:numpy (B, h, w, outC)
                ## layer_sd_and_f, ,
                ##              list(len=gpunum) element:list(len=2(sd&f)) element:numpy(B, h, w, sC*4)
                ##                                                                 numpy(B, h, w, c*4)
                ## lmhsRes      list(len=gpunum) element:list(len=4(l&m&h&st)) element:numpy(B, h, w, outC)
                ## out_lmhs     list(len=gpunum) element:list(len=4(l&m&h&st)) element:numpy(B, h, w, outC)
                #######

                xx, yy, yy_ = np.concatenate(xx, axis=0), np.concatenate(
                    yy, axis=0), np.concatenate(yy_, axis=0)

                l_dic = {}
                for ld in ldic:
                    l_dic = dic_add(l_dic, ld)
                l_dic = dic_div_constant(l_dic, D.num_gpus)

                if (b == 0 or b == 40) and (np.nansum(
                        layer_sd_and_f[0][0][:, :, :, D.splited_channel * 3:])
                                            > 100):
                    acc_dic = compute_verification(yy_.copy(), yy.copy())
                    logger.debug(
                        "train_epoch=%d, mini_batch=%d, total_step=%d, train_time=%.4fs\n"
                        "loss:\n  %s\n acc:\n  %s", epoch, b, total_step,
                        step_end - step_begin, str(l_dic), str(acc_dic))

                    visualize_inputs(
                        xx,
                        yy,
                        outpath=os.path.join(model_dir, 'trainresult'),
                        filenames=np.char.add(
                            np.char.add("epoch%d-step%d-" % (epoch, b),
                                        list(batch_filenames)), "-inputs_"))

                    batch_filenames = np.concatenate(batch_filenames, axis=0)
                    tower_size = math.ceil(D.batch_size / D.num_gpus)
                    visualize_ensembles(
                        layer_sd_and_f[0][0],
                        outpath=os.path.join(model_dir, 'trainresult'),
                        dpi=300,
                        filenames=np.char.add(
                            np.char.add("epoch%d-step%d-" % (epoch, b),
                                        list(batch_filenames[:tower_size])),
                            "-layerinput"))

                assert not np.isnan(
                    l_dic["loss"]), 'Model diverged with loss = NaN'

                total_step_loss.write("%.10f\n" % l_dic['loss'])

                epoch_l = dic_add(epoch_l, l_dic)
                epoch_xx.append(xx)
                epoch_yy.append(yy)
                epoch_yy_.append(yy_)
                total_step += 1

            train_summary_writer.add_summary(train_summary_str, epoch)
            epoch_xx, epoch_yy, epoch_yy_ = np.concatenate(epoch_xx, axis=0), np.concatenate(epoch_yy, axis=0), \
                                            np.concatenate(epoch_yy_, axis=0)
            epoch_a = compute_verification(epoch_yy_.copy(), epoch_yy.copy())
            epoch_l = dic_div_constant(epoch_l, tbp)
            logger.info(
                "train_epoch=%d, epoch_train_time=%.4fs\n epoch_loss:\n  %s\n epoch_acc:\n  %s",
                epoch,
                time.time() - epoch_begin, str(epoch_l), str(epoch_a))

            train_summary = tf.Summary()
            for key, value in epoch_l.items():
                train_summary.value.add(tag='mean_loss/' + key,
                                        simple_value=epoch_l[key])
            for key, value in epoch_a.items():
                train_summary.value.add(tag='mean_acc/' + key,
                                        simple_value=epoch_a[key])
            train_summary_writer.add_summary(train_summary, epoch)

            if epoch % 1 == 0 or epoch == D.epochs - 1:
                train_summary_writer.add_run_metadata(run_metadata,
                                                      'epoch%05d' % epoch)

            if epoch % (D.decay_epochs - 1) == 0 or epoch == D.epochs - 1:
                checkpoint_path = os.path.join(model_dir, D.model_name_reg)
                saver.save(sess, checkpoint_path, global_step=epoch)
                logger.info("saved to %s\n total_epoch=%d", checkpoint_path,
                            epoch)

            # ---------------------------------- valid -----------------------------------
            if (epoch % 1 == 0 or epoch == D.epochs - 1):
                valid_begin = time.time()
                eval_indeces = np.arange(0, np.shape(eval_sample_filepaths)[0])
                np.random.shuffle(eval_indeces)
                eval_features = np.take(eval_features, eval_indeces, axis=0)
                eval_labels = np.take(eval_labels, eval_indeces, axis=0)
                filepaths = np.array(
                    np.char.split(
                        np.take(eval_sample_filepaths, eval_indeces, axis=0),
                        '.').tolist())
                filenames = [
                    os.path.split(filepath)[-1]
                    for filepath in filepaths[:, -2]
                ]

                epoch_xx, epoch_yy, epoch_yy_ = [], [], []
                epoch_filename = []
                epoch_l = {}

                ebp = eval_batchs_perE
                for b in range(eval_batchs_perE):
                    batch_begin = b * D.batch_size

                    # avoid empty batch
                    tower_size = math.floor(D.batch_size / D.num_gpus)
                    if (b == eval_batchs_perE - 1):
                        final_b_samples = np.shape(
                            eval_features[b * D.batch_size:])[0]
                        if final_b_samples < D.num_gpus:
                            ebp -= 1
                            break
                        else:
                            tower_size = math.floor(final_b_samples /
                                                    D.num_gpus)

                    v_xx, v_yy, v_yy_, v_batch_filenames, eva_loss_d, valid_summary_str = sess.run(
                        [
                            xlist, ylist, y_list, filename_list, losslist,
                            summary_op
                        ],
                        feed_dict={
                            names['xs0']:
                            eval_features[batch_begin:(batch_begin +
                                                       tower_size)],
                            names['y0']:
                            eval_labels[batch_begin:(batch_begin +
                                                     tower_size)],
                            names['mask0']:
                            (eval_labels[batch_begin:(batch_begin +
                                                      tower_size)] != -5),
                            names['filename0']:
                            filenames[batch_begin:(batch_begin + tower_size)],
                            names['xs1']:
                            eval_features[(batch_begin +
                                           tower_size):(batch_begin +
                                                        2 * tower_size)],
                            names['y1']:
                            eval_labels[(batch_begin +
                                         tower_size):(batch_begin +
                                                      2 * tower_size)],
                            names['mask1']:
                            (eval_labels[(batch_begin + tower_size):(
                                batch_begin + 2 * tower_size)] != -5),
                            names['filename1']:
                            filenames[(batch_begin +
                                       tower_size):(batch_begin +
                                                    2 * tower_size)],
                            names['xs2']:
                            eval_features[(batch_begin +
                                           2 * tower_size):(batch_begin +
                                                            3 * tower_size)],
                            names['y2']:
                            eval_labels[(batch_begin +
                                         2 * tower_size):(batch_begin +
                                                          3 * tower_size)],
                            names['mask2']:
                            (eval_labels[(batch_begin + 2 * tower_size):(
                                batch_begin + 3 * tower_size)] != -5),
                            names['filename2']:
                            filenames[(batch_begin +
                                       2 * tower_size):(batch_begin +
                                                        3 * tower_size)],
                            names['xs3']:
                            eval_features[(batch_begin +
                                           3 * tower_size):(batch_begin +
                                                            D.batch_size)],
                            names['y3']:
                            eval_labels[(batch_begin +
                                         3 * tower_size):(batch_begin +
                                                          D.batch_size)],
                            names['mask3']:
                            (eval_labels[(batch_begin + 3 * tower_size):(
                                batch_begin + D.batch_size)] != -5),
                            names['filename3']:
                            filenames[(batch_begin +
                                       3 * tower_size):(batch_begin +
                                                        D.batch_size)],
                            epo:
                            epoch,
                            is_training:
                            False
                        })

                    vl_dic = {}
                    for ld in eva_loss_d:
                        vl_dic = dic_add(vl_dic, ld)
                    vl_dic = dic_div_constant(vl_dic, D.num_gpus)
                    v_xx, v_yy, v_yy_ = np.concatenate(v_xx, axis=0), np.concatenate(v_yy, axis=0), \
                                        np.concatenate(v_yy_, axis=0)
                    v_batch_filenames = np.concatenate(v_batch_filenames,
                                                       axis=0)
                    epoch_filename.append(v_batch_filenames)

                    epoch_l = dic_add(epoch_l, vl_dic)
                    epoch_xx.append(v_xx)
                    epoch_yy.append(v_yy)
                    epoch_yy_.append(v_yy_)

                valid_summary_writer.add_summary(valid_summary_str, epoch)
                epoch_xx, epoch_yy, epoch_yy_ = np.concatenate(epoch_xx, axis=0), np.concatenate(epoch_yy, axis=0), \
                                                np.concatenate(epoch_yy_, axis=0)
                epoch_l = dic_div_constant(epoch_l, ebp)
                epoch_a = compute_verification(epoch_yy_.copy(),
                                               epoch_yy.copy())
                logger.info(
                    "valid_epoch=%d, epoch_valid_time=%.4fs, mean_loss:\n  %s\nmean_acc:\n  %s",
                    epoch,
                    time.time() - valid_begin, str(epoch_l), str(epoch_a))

                if epoch % (D.decay_epochs - 1) == 0 or epoch == D.epochs - 1:
                    np.save(os.path.join(model_dir, 'valid_name%d' % epoch),
                            np.concatenate(epoch_filename, axis=0))
                    np.save(os.path.join(model_dir, 'valid_label%d' % epoch),
                            epoch_yy)
                    np.save(os.path.join(model_dir, 'valid_predict%d' % epoch),
                            epoch_yy_)

                for key in epoch_a.keys():
                    epoch_result.write('%.10f ' % epoch_a[key])
                epoch_result.write('\n')

                vaan_summary = tf.Summary()
                for key, value in epoch_l.items():
                    vaan_summary.value.add(tag='mean_loss/' + key,
                                           simple_value=epoch_l[key])
                for key, value in epoch_a.items():
                    vaan_summary.value.add(tag='mean_acc/' + key,
                                           simple_value=epoch_a[key])
                valid_summary_writer.add_summary(vaan_summary, epoch)

        logger.info("training finished")

        if D.is_test:
            logger.info("testing begin")
            test_begin = time.time()

            filepaths = np.array(
                np.char.split(test_sample_filepaths, '.').tolist())
            filenames = [
                os.path.split(filepath)[-1] for filepath in filepaths[:, -2]
            ]

            epoch_xx, epoch_yy, epoch_yy_ = [], [], []
            epoch_filename = []
            epoch_l = {}

            for b in range(test_batchs_perE):  # test_batchs_perE
                batch_begin = b * D.batch_size

                # avoid empty batch
                tower_size = math.floor(D.batch_size / D.num_gpus)
                if (b == test_batchs_perE - 1):
                    final_b_sample = np.shape(test_features[b *
                                                            D.batch_size:])[0]
                    if final_b_sample < D.num_gpus:
                        test_batchs_perE -= 1
                        break
                    else:
                        tower_size = math.floor(final_b_sample / D.num_gpus)

                t_xx, t_yy, t_yy_, t_batch_filenames, tb_l = sess.run(
                    [xlist, ylist, y_list, filename_list, losslist],
                    feed_dict={
                        names['xs0']:
                        test_features[batch_begin:(batch_begin + tower_size)],
                        names['y0']:
                        test_labels[batch_begin:(batch_begin + tower_size)],
                        names['mask0']:
                        (test_labels[batch_begin:(batch_begin + tower_size)] !=
                         -5),
                        names['filename0']:
                        filenames[batch_begin:(batch_begin + tower_size)],
                        names['xs1']:
                        test_features[(batch_begin +
                                       tower_size):(batch_begin +
                                                    2 * tower_size)],
                        names['y1']:
                        test_labels[(batch_begin +
                                     tower_size):(batch_begin +
                                                  2 * tower_size)],
                        names['mask1']:
                        (test_labels[(batch_begin + tower_size):(
                            batch_begin + 2 * tower_size)] != -5),
                        names['filename1']:
                        filenames[(batch_begin + tower_size):(batch_begin +
                                                              2 * tower_size)],
                        names['xs2']:
                        test_features[(batch_begin +
                                       2 * tower_size):(batch_begin +
                                                        3 * tower_size)],
                        names['y2']:
                        test_labels[(batch_begin +
                                     2 * tower_size):(batch_begin +
                                                      3 * tower_size)],
                        names['mask2']:
                        (test_labels[(batch_begin + 2 * tower_size):(
                            batch_begin + 3 * tower_size)] != -5),
                        names['filename2']:
                        filenames[(batch_begin +
                                   2 * tower_size):(batch_begin +
                                                    3 * tower_size)],
                        names['xs3']:
                        test_features[(batch_begin +
                                       3 * tower_size):(batch_begin +
                                                        D.batch_size)],
                        names['y3']:
                        test_labels[(batch_begin +
                                     3 * tower_size):(batch_begin +
                                                      D.batch_size)],
                        names['mask3']:
                        (test_labels[(batch_begin + 3 * tower_size):(
                            batch_begin + D.batch_size)] != -5),
                        names['filename3']:
                        filenames[(batch_begin +
                                   3 * tower_size):(batch_begin +
                                                    D.batch_size)],
                        epo:
                        epoch,
                        is_training:
                        False
                    })

                tl_dic = {}
                for ld in tb_l:
                    tl_dic = dic_add(tl_dic, ld)
                tl_dic = dic_div_constant(tl_dic, D.num_gpus)

                t_xx, t_yy, t_yy_ = np.concatenate(
                    t_xx,
                    axis=0), np.concatenate(t_yy,
                                            axis=0), np.concatenate(t_yy_,
                                                                    axis=0)
                t_batch_filenames = np.concatenate(t_batch_filenames, axis=0)
                epoch_filename.append(t_batch_filenames)

                epoch_l = dic_add(epoch_l, tl_dic)
                epoch_xx.append(t_xx)
                epoch_yy.append(t_yy)
                epoch_yy_.append(t_yy_)

            test_end = time.time()
            epoch_xx, epoch_yy, epoch_yy_ = np.concatenate(epoch_xx, axis=0), np.concatenate(epoch_yy, axis=0), \
                                            np.concatenate(epoch_yy_, axis=0)
            epoch_l = dic_div_constant(epoch_l, test_batchs_perE)
            epoch_a = compute_verification(epoch_yy_.copy(), epoch_yy.copy())

            logger.info(
                "test: testtime = %.4fsec/sam\n mean_loss=\n  %s\nmean_acc=\n  %s",
                (test_end - test_begin) / test_batchs_perE / D.batch_size,
                str(epoch_l), str(epoch_a))

            with open(os.path.join('test_result.txt'), 'a') as f:
                f.write("round %d\n" % round)
                for key in epoch_a.keys():
                    f.write("%s:%s " % (key, str(epoch_a[key])))
                f.write("\n")

            logger.info("testing finished")
            return epoch_a
Example #3
0
# -*- coding: utf-8 -*-
import time
import copy
from model.utils import *
from config import D
from model.model_multistage import MultiStageModel
from verification import compute_verification
from model.log_configure import logger

tf.logging.set_verbosity(tf.logging.ERROR)
# --------------------------------- print hyperparameters --------------------------------------
logger.info(str(D))


def _average_gradients(grads_list):
    """Calculate the average gradient for each shared variable across all towers.
      Note that this function provides a synchronization point across all towers.
      Args:
        grads_list: List of lists of (gradient, variable) tuples. The outer list
          is over individual gradients. The inner list is over the gradient
          calculation for each tower.
      Returns:
         List of pairs of (gradient, variable) where the gradient has been averaged
         across all towers.
      """
    average_grads = []
    for grad_and_vars in zip(*grads_list):
        # Note that each grad_and_vars looks like the following:
        #   ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
        grads = []
        for g, _ in grad_and_vars:
Example #4
0
def read_train_eval_test_filename(testB_percent=0.0, testE_percent=0.85):
    sample_filepaths = np.array(
        glob.glob(os.path.join('../input_forecast', D.input_dataset, '*')))
    sample_filepaths = np.sort(sample_filepaths)
    # VT-YYYYMMDDHH_IT-YYYYMMDDHH_FH-FF.npy
    labels_filepaths = []
    prior_dic = {}  # 'YYYYMMDDHH': [50]
    p = 0
    while p < np.shape(sample_filepaths)[0]:
        _, filename = os.path.split(sample_filepaths[p])
        vt = filename.split('_')[0].split('-')[1]  # forecast time
        if filename.split('.')[0].split('_')[-1].split('-')[1] == '24':
            prior_it = filename.split('_')[1].split('-')[1]  # basetime
            prior_arr = np.load(
                sample_filepaths[p])  # (50, 33, 33) contain nan
            compare_arr = np.loadtxt(
                os.path.join('../input_real',
                             '190927_r24_usable_0.25_33x33_locMean',
                             vt + '.txt'))  # (33, 33) contain nan
            element_number = np.shape(prior_arr)[0]
            inverse_rmse = np.zeros(
                element_number)  # [50] 1/rmse  larger better
            for i in range(element_number):
                inverse_rmse[i] = 1 / (
                    np.nanmean(np.square(prior_arr[i] - compare_arr)) + 1e-6)
            prior_dic[prior_it] = np.exp(inverse_rmse) / np.nansum(
                np.exp(inverse_rmse))  # [50] larger better
            sample_filepaths = np.delete(sample_filepaths, p, axis=0)
        else:
            labels_filepaths.append(
                os.path.join('../input_real',
                             '190927_r24_usable_0.25_33x33_locMean',
                             vt + '.txt'))  # YYYYMMDDHH.txt
            p += 1
    labels_filepaths = np.array(labels_filepaths)

    p = 0
    while p < np.shape(sample_filepaths)[0]:
        _, filename = os.path.split(sample_filepaths[p])
        it = filename.split('_')[1].split('-')[1]  # basetime
        if it not in prior_dic.keys():
            sample_filepaths = np.delete(sample_filepaths, p, axis=0)
            labels_filepaths = np.delete(labels_filepaths, p, axis=0)
        else:
            p += 1

    if np.shape(sample_filepaths)[0] != np.shape(labels_filepaths)[0]:
        raise Exception(
            'def read_train_eval_test_filename() sample num != label num')
    else:
        n = np.shape(sample_filepaths)[0]
        test_indeces = np.arange(int(n * testB_percent),
                                 int(n * testE_percent))
        train_indeces = np.array(
            [i for i in np.arange(n) if i not in test_indeces])
        eval_indeces = test_indeces.copy()

    logger.info("all sample: %d", n)

    train_sample_filepaths, train_labels_filepaths = np.take(sample_filepaths, train_indeces, axis=0), \
                                                     np.take(labels_filepaths, train_indeces, axis=0)
    eval_sample_filepaths, eval_labels_filepaths = np.take(sample_filepaths, eval_indeces, axis=0), \
                                                   np.take(labels_filepaths, eval_indeces, axis=0)
    test_sample_filepaths, test_labels_filepaths = np.take(sample_filepaths, test_indeces, axis=0), \
                                                   np.take(labels_filepaths, test_indeces, axis=0)

    return train_sample_filepaths, train_labels_filepaths, \
           eval_sample_filepaths, eval_labels_filepaths, \
           test_sample_filepaths, test_labels_filepaths, prior_dic
def print_result_graph_and_para(test_features, test_labels, test_batchs_perE,
                                filenames, inner_dir):
    ckpt = tf.train.get_checkpoint_state(os.path.join(D.model_dir, inner_dir))
    saver = tf.train.import_meta_graph(ckpt.model_checkpoint_path + '.meta')
    graph = tf.get_default_graph()

    session_config = tf.ConfigProto(log_device_placement=False,
                                    allow_soft_placement=True)

    with tf.Session(config=session_config) as sess:
        test_begin = time.time()
        saver.restore(sess, ckpt.model_checkpoint_path)

        epoch_xx, epoch_yy, epoch_yy_ = [], [], []
        epoch_filename = []
        epoch_l = 0

        for b in range(test_batchs_perE):  # test_batchs_perE
            batch_begin = b * D.batch_size

            # avoid empty batch
            tower_size = math.floor(D.batch_size / D.num_gpus)
            if (b == test_batchs_perE - 1):
                final_b_sample = np.shape(test_features[b * D.batch_size:])[0]
                if final_b_sample < D.num_gpus:
                    test_batchs_perE -= 1
                    break
                else:
                    tower_size = math.floor(final_b_sample / D.num_gpus)

            t_xx, t_yy, t_yy_, t_batch_filenames, tb_l = sess.run(
                [[
                    graph.get_tensor_by_name("GPU_0/x:0"),
                    graph.get_tensor_by_name("GPU_1/x:0"),
                    graph.get_tensor_by_name("GPU_2/x:0"),
                    graph.get_tensor_by_name("GPU_3/x:0")
                ],
                 [
                     graph.get_tensor_by_name("GPU_0/y:0"),
                     graph.get_tensor_by_name("GPU_1/y:0"),
                     graph.get_tensor_by_name("GPU_2/y:0"),
                     graph.get_tensor_by_name("GPU_3/y:0")
                 ],
                 [
                     graph.get_tensor_by_name('GPU_0/multilayer/add_6:0'),
                     graph.get_tensor_by_name('GPU_1/multilayer/add_6:0'),
                     graph.get_tensor_by_name('GPU_2/multilayer/add_6:0'),
                     graph.get_tensor_by_name('GPU_3/multilayer/add_6:0')
                 ],
                 [
                     graph.get_tensor_by_name("GPU_0/filename:0"),
                     graph.get_tensor_by_name("GPU_1/filename:0"),
                     graph.get_tensor_by_name("GPU_2/filename:0"),
                     graph.get_tensor_by_name("GPU_3/filename:0")
                 ],
                 [
                     graph.get_tensor_by_name('GPU_0/compute_loss/add_7:0'),
                     graph.get_tensor_by_name('GPU_1/compute_loss/add_7:0'),
                     graph.get_tensor_by_name('GPU_2/compute_loss/add_7:0'),
                     graph.get_tensor_by_name('GPU_3/compute_loss/add_7:0')
                 ]],
                feed_dict={
                    'GPU_0/x:0':
                    test_features[batch_begin:(batch_begin + tower_size)],
                    'GPU_0/y:0':
                    test_labels[batch_begin:(batch_begin + tower_size)],
                    "GPU_0/mask:0":
                    (test_labels[batch_begin:(batch_begin + tower_size)] !=
                     -5),
                    'GPU_0/filename:0':
                    filenames[batch_begin:(batch_begin + tower_size)],
                    'GPU_1/x:0':
                    test_features[(batch_begin + tower_size):(batch_begin +
                                                              2 * tower_size)],
                    'GPU_1/y:0':
                    test_labels[(batch_begin + tower_size):(batch_begin +
                                                            2 * tower_size)],
                    "GPU_1/mask:0": (test_labels[(batch_begin + tower_size):(
                        batch_begin + 2 * tower_size)] != -5),
                    'GPU_1/filename:0':
                    filenames[(batch_begin + tower_size):(batch_begin +
                                                          2 * tower_size)],
                    'GPU_2/x:0':
                    test_features[(batch_begin +
                                   2 * tower_size):(batch_begin +
                                                    3 * tower_size)],
                    'GPU_2/y:0':
                    test_labels[(batch_begin +
                                 2 * tower_size):(batch_begin +
                                                  3 * tower_size)],
                    "GPU_2/mask:0":
                    (test_labels[(batch_begin + 2 * tower_size):(
                        batch_begin + 3 * tower_size)] != -5),
                    'GPU_2/filename:0':
                    filenames[(batch_begin + 2 * tower_size):(batch_begin +
                                                              3 * tower_size)],
                    'GPU_3/x:0':
                    test_features[(batch_begin +
                                   3 * tower_size):(batch_begin +
                                                    D.batch_size)],
                    'GPU_3/y:0':
                    test_labels[(batch_begin + 3 * tower_size):(batch_begin +
                                                                D.batch_size)],
                    "GPU_3/mask:0":
                    (test_labels[(batch_begin + 3 * tower_size):(
                        batch_begin + D.batch_size)] != -5),
                    'GPU_3/filename:0':
                    filenames[(batch_begin + 3 * tower_size):(batch_begin +
                                                              D.batch_size)],
                    'epoch:0':
                    0,
                    'is_training:0':
                    False
                })

            tl = 0
            for ld in tb_l:
                tl += ld
            tl = tl / D.num_gpus

            t_xx, t_yy, t_yy_ = np.concatenate(t_xx, axis=0), np.concatenate(
                t_yy, axis=0), np.concatenate(t_yy_, axis=0)
            t_batch_filenames = np.concatenate(t_batch_filenames, axis=0)
            epoch_filename.append(t_batch_filenames)

            epoch_l += tl
            epoch_xx.append(t_xx)
            epoch_yy.append(t_yy)
            epoch_yy_.append(t_yy_)

        test_end = time.time()
        epoch_xx, epoch_yy, epoch_yy_ = np.concatenate(epoch_xx, axis=0), np.concatenate(epoch_yy, axis=0), \
                                        np.concatenate(epoch_yy_, axis=0)
        epoch_l = epoch_l / test_batchs_perE
        epoch_a = compute_verification(epoch_yy_.copy(), epoch_yy.copy())

        logger.info(
            "test: testtime = %.4fsec/sam\n mean_loss=\n  %s\nmean_acc=\n  %s",
            (test_end - test_begin) / test_batchs_perE / D.batch_size,
            str(epoch_l), str(epoch_a))
            epoch_xx.append(t_xx)
            epoch_yy.append(t_yy)
            epoch_yy_.append(t_yy_)

        test_end = time.time()
        epoch_xx, epoch_yy, epoch_yy_ = np.concatenate(epoch_xx, axis=0), np.concatenate(epoch_yy, axis=0), \
                                        np.concatenate(epoch_yy_, axis=0)
        epoch_l = epoch_l / test_batchs_perE
        epoch_a = compute_verification(epoch_yy_.copy(), epoch_yy.copy())

        logger.info(
            "test: testtime = %.4fsec/sam\n mean_loss=\n  %s\nmean_acc=\n  %s",
            (test_end - test_begin) / test_batchs_perE / D.batch_size,
            str(epoch_l), str(epoch_a))


if __name__ == '__main__':
    round = 7
    logger.info(
        "################################# round %d begin ###########################",
        round)

    test_features, test_labels, test_batchs_perE, filenames = read_data(round)
    inner_dir = '128261 ml_mu_nolocal2d_add_wotDS7'
    print_result_graph_and_para(test_features, test_labels, test_batchs_perE,
                                filenames, inner_dir)

    logger.info(
        "################################# round %d end ###########################",
        round)