Beispiel #1
0
    def validation_measure(data_index, which_measure):
        x_array, t_array = convert.concat_examples(validation[data_index])
        x = chainer.Variable(x_array)
        y_validation_predict = model.forward_2(x).data
        if data_set_name[data_index] in ['cocnas', 'maxwell', 'opens']:
            y_validation_predict = np.power(math.e, y_validation_predict)
            t_array = np.power(math.e, t_array)

        if which_measure == 'Pred(25)':
            return criteria.pred25(t_array, y_validation_predict)
        elif which_measure == 'MdAE':
            return criteria.mae(t_array, y_validation_predict)
        elif which_measure == 'SA':
            return criteria.sa(t_array, y_validation_predict)
        elif which_measure == 'RE*':
            return criteria.re(t_array, y_validation_predict)
        else:
            return None
Beispiel #2
0
def main(encoder_n_units=32, common_size=16, regression_n_units=32, discriminator_n_units=32, batch_size_def=None,
         epoch=3000, data_set_name=None, validation_patience_original=1000, train_size=0.7,
         save_code=False):

    if batch_size_def is None:
        batch_size_def = [6, 20]
    if data_set_name is None:
        data_set_name = ['china', 'kitchenham']
    print("---------------------------------------------Reading data...-----------------------------------------------")
    train = []
    validation = []
    test = []
    in_size = []
    x_train = []
    y_train = []
    x_validation = []
    y_validation = []
    x_test = []
    y_test = []

    for i in range(len(data_set_name)):
        a_name = data_set_name[i]
        a_train, a_validation, a_test, a_in_size, a_x_train, a_y_train, a_x_validation, a_y_validation, a_x_test, \
        a_y_test = read_data_validation.get_train_and_test(dataset=a_name, train_size=train_size, validation_size=0.5)

        train.append(a_train)
        validation.append(a_validation)
        test.append(a_test)
        in_size.append(a_in_size)
        x_train.append(a_x_train)
        y_train.append(a_y_train)
        x_validation.append(x_validation)
        y_validation.append(y_validation)
        x_test.append(a_x_test)
        y_test.append(a_y_test)

    # Prepare the train iter.
    train_iter = []
    for i in range(len(data_set_name)):
        a_train_iter = chainer.iterators.SerialIterator(train[i], batch_size_def[i])
        train_iter.append(a_train_iter)

    # Build model
    print("---------------------------------------Building model...---------------------------------------------------")
    model = EncoderRegressionModel(in_size=in_size, encoder_n_units=encoder_n_units,
                                   regression_n_units=regression_n_units,
                                   common_out_size=common_size)
    model_optimizer = chainer.optimizers.Adam()
    model_optimizer.setup(model)
    # Build Discriminator
    discriminator = Discriminator(common_size, discriminator_n_units)
    discriminator_optimizer = chainer.optimizers.SGD(lr=0.001)
    discriminator_optimizer.setup(discriminator)

    # --------------------------------Measures: pred、MdAE、SA、RE*-----------------------------------------------------
    def test_measure(data_index, which_measure):
        x_array, t_array = convert.concat_examples(test[data_index])
        x = chainer.Variable(x_array)
        y_test_predict = model.forward_2(x).data
        if data_set_name[data_index] in ['cocnas', 'maxwell', 'opens']:
            y_test_predict = np.power(math.e, y_test_predict)
            t_array = np.power(math.e, t_array)

        if which_measure == 'Pred(25)':
            return criteria.pred25(t_array, y_test_predict)
        elif which_measure == 'MdAE':
            return criteria.mae(t_array, y_test_predict)
        elif which_measure == 'SA':
            return criteria.sa(t_array, y_test_predict)
        elif which_measure == 'RE*':
            return criteria.re(t_array, y_test_predict)
        else:
            return None

    def validation_measure(data_index, which_measure):
        x_array, t_array = convert.concat_examples(validation[data_index])
        x = chainer.Variable(x_array)
        y_validation_predict = model.forward_2(x).data
        if data_set_name[data_index] in ['cocnas', 'maxwell', 'opens']:
            y_validation_predict = np.power(math.e, y_validation_predict)
            t_array = np.power(math.e, t_array)

        if which_measure == 'Pred(25)':
            return criteria.pred25(t_array, y_validation_predict)
        elif which_measure == 'MdAE':
            return criteria.mae(t_array, y_validation_predict)
        elif which_measure == 'SA':
            return criteria.sa(t_array, y_validation_predict)
        elif which_measure == 'RE*':
            return criteria.re(t_array, y_validation_predict)
        else:
            return None

    # train
    def discriminator_loss_fun(x1, x2, y1, y2):
        y1_hat = discriminator(x1)
        y1_hat = y1_hat.reshape(len(y1_hat))
        loss1 = f.sigmoid_cross_entropy(y1_hat, y1)
        y2_hat = discriminator(x2)
        y2_hat = y2_hat.reshape(len(y2_hat))
        loss2 = f.sigmoid_cross_entropy(y2_hat, y2)
        loss = loss1 + loss2
        dis_loss.append(loss.data)
        return loss

    def loss_fun(x1, x2, y2, label1, label2):
        encoder_1_output_def, encoder_2_output_def, decoder_1_output, regression_2_output_def = model(x1, x2)
        regression_2_output_def = regression_2_output_def.reshape((len(regression_2_output_def), 1))
        decoder_1_loss = f.mean_absolute_error(decoder_1_output, x1)
        regression_2_loss = f.mean_absolute_error(regression_2_output_def, y2)

        # Generator loss
        y1_hat = discriminator(encoder_1_output_def)
        y1_hat = y1_hat.reshape(len(y1_hat))
        encoder_1_loss = f.sigmoid_cross_entropy(y1_hat, label1)
        y2_hat = discriminator(encoder_2_output_def)
        y2_hat = y2_hat.reshape(len(y2_hat))
        encoder_2_loss = f.sigmoid_cross_entropy(y2_hat, label2)

        loss = decoder_1_loss + regression_2_loss * 2 + encoder_1_loss + encoder_2_loss
        loss_all.append(loss.data)
        # print("Generator loss = ", loss.data)
        return loss

    print("----------------------------------------------------Training...--------------------------------------------")
    chainer.using_config('train', True)
    running = True
    # Storage the loss
    loss_all = []
    dis_loss = []
    validation_frequency = 1
    validation_patience = validation_patience_original
    # Measures: [pred、MdAE、SA、RE*]
    best_validation = [0, 0, 0, 0]
    best_test = [0, 0, 0, 0]

    # running
    while running:
        running_count = 0
        for i in range(len(data_set_name)):
            if train_iter[i].epoch < epoch:
                running_count += 1
            if running_count == 0:
                running = False

        # get batch
        batch1 = train_iter[0].next()
        x_array, t_array = convert.concat_examples(batch1)
        input_x1 = chainer.Variable(x_array)
        input_y1 = chainer.Variable(t_array)

        batch2 = train_iter[1].next()
        x_array, t_array = convert.concat_examples(batch2)
        input_x2 = chainer.Variable(x_array)
        input_y2 = chainer.Variable(t_array)

        # Train Discriminator on the real data
        encoder_1_output, encoder_2_output, regression_1_output, regression_2_output = model(input_x1, input_x2)
        zeros = np.zeros(len(encoder_1_output), dtype=np.int32)
        ones = np.ones(len(encoder_2_output), dtype=np.int32)
        discriminator_optimizer.update(discriminator_loss_fun, encoder_1_output, encoder_2_output, zeros, ones)

        # Train Generator
        zeros = np.zeros(len(encoder_2_output), dtype=np.int32)
        ones = np.ones(len(encoder_1_output), dtype=np.int32)
        model_optimizer.update(loss_fun, input_x1, input_x2, input_y2, ones, zeros)

        # validation
        validation_patience -= 1
        if train_iter[1].epoch % validation_frequency == 0:
            # compute pred25
            validation_pred25 = validation_measure(1, "Pred(25)")
            if validation_pred25 >= best_validation[0]:
                best_validation[0] = validation_pred25
                # test on the test dataset
                test_pred25 = test_measure(1, "Pred(25)")
                if test_pred25 > best_test[0]:
                    best_test[0] = test_pred25
                    # save model
                    if True:
                        serializers.save_npz('../models/multi_' + data_set_name[1] + '.model', model)
                validation_patience = validation_patience_original
        if validation_patience == 0:
            break

    chainer.using_config('train', False)
    print("-------------------------------------- Train finished -----------------------------------------------------")

    # Save Code, including the train, validation and test.
    if save_code is True:
        data_index = 0
        x_array, t_array = convert.concat_examples(train[data_index])
        x = chainer.Variable(x_array)
        code_train = model.encoder1_forward(x)
        data1_train_code = code_train.data

        x_array, t_array = convert.concat_examples(validation[data_index])
        x = chainer.Variable(x_array)
        code_validation = model.encoder1_forward(x)
        data1_validation_code = code_validation.data

        x_array, t_array = convert.concat_examples(test[data_index])
        x = chainer.Variable(x_array)
        code_test = model.encoder1_forward(x)
        data1_test_code = code_test.data

        data_index = 1
        x_array, t_array = convert.concat_examples(train[data_index])
        x = chainer.Variable(x_array)
        code_train = model.encoder2_forward(x)
        data2_train_code = code_train.data

        x_array, t_array = convert.concat_examples(validation[data_index])
        x = chainer.Variable(x_array)
        code_validation = model.encoder2_forward(x)
        data2_validation_code = code_validation.data

        x_array, t_array = convert.concat_examples(test[data_index])
        x = chainer.Variable(x_array)
        code_test = model.encoder2_forward(x)
        data2_test_code = code_test.data

        code = np.vstack((data1_train_code, data1_validation_code, data1_test_code, data2_train_code,
                          data2_validation_code, data2_test_code))
        print("Code.shape is ", code.shape)
        gen_data = pandas.DataFrame(code)
        gen_data.to_csv('./data/prevModel.csv')

    print('---------------------------------------------Criteria Test------------------------------------------------')
    data_index = 1
    x_array, t_array = convert.concat_examples(test[data_index])
    x = chainer.Variable(x_array)
    y_predict_data = model.forward_2(x).data

    best_test[0] = criteria.pred25(t_array, y_predict_data)
    best_test[1] = criteria.mae(t_array, y_predict_data)
    best_test[2] = criteria.sa(t_array, y_predict_data)
    best_test[3] = criteria.re(t_array, y_predict_data)
    return best_test