Exemple #1
0
def main():
    x = preprocess_train_data()
    x = np.reshape(x, (-1, 24))
    mu, sigma = scale_fit(x[:, :12])
    xs_train = scale_to(x[:, :12], mu, sigma, range(0, 12))
    ys_train = scale_to(x[:, 12:], mu, sigma, range(0, 12))
    xs_test = scale_to(x[1000:, :12], mu, sigma, range(0, 12))
    y_true = x[1000:, 12:]

    # xs_train = np.vstack([xs_train[:, 0:4], xs_train[:, 4:8], xs_train[:, 8:12]])
    # ys_train = np.vstack([ys_train[:, 0:4], ys_train[:, 4:8], ys_train[:, 8:12]])

    print('The shape of input data is ', xs_train.shape, ys_train.shape)
    models = []
    for i in range(4):
        model = svm.SVR(gamma='scale', epsilon=0, C=0.4)
        model.fit(xs_train, ys_train[:, i])
        models.append(model)

    ys_pred = np.zeros(shape=(320, 4), dtype=np.float)
    for i in range(4):
        ys_pred[:, i] = models[i].predict(xs_test)
    y_pred = scale_back(ys_pred, mu, sigma, range(0, 4))
    rmse = my_metric(y_true[:, :4], y_pred)
    print('rmse: %.3f'%rmse)

    # ys_pred = model.predict(xs_test)
    # y_pred = scale_back(ys_pred, mu, sigma, range(0, 12))
    # rmse = my_metric(y_true, y_pred)
    # print('rmse: %.3f'%rmse)

    # import matplotlib.pyplot as plt
    # for i in range(10):
    #     plt.plot(y_true[i], label="true", color='green')
    #     plt.plot(y_pred[i], label='predicted', color='red')
    #     # plt.plot(x[0][:12], label='origin', color='blue')
    #     plt.legend(loc='upper left')
    #     plt.show()

    xs_eval = scale_to(x[:, 12:], mu, sigma, range(0, 12))
    ys_eval = np.zeros(shape=(1320, 4), dtype=np.float)
    for i in range(4):
        ys_eval[:, i] = models[i].predict(xs_eval)
    y_eval = scale_back(ys_eval, mu, sigma, range(0, 4))
    y_result = np.reshape(y_eval[:, :4], (1320*4), order='F')
    write_results('Results/rmse-%d-svm'%rmse, y_result)
Exemple #2
0
def main():
    x = preprocess_train_data()
    x = np.reshape(x, (-1, 24))
    mu, sigma = scale_fit(x[:, :12])
    xs_train = scale_to(x[:, :12], mu, sigma, range(0, 12))
    ys_train = scale_to(x[:, 12:], mu, sigma, range(0, 12))
    xs_test = scale_to(x[1000:, :12], mu, sigma, range(0, 12))
    y_true = x[1000:, 12:]

    # xs_train = np.vstack([xs_train[:, 0:4], xs_train[:, 4:8], xs_train[:, 8:12]])
    # ys_train = np.vstack([ys_train[:, 0:4], ys_train[:, 4:8], ys_train[:, 8:12]])

    print('The shape of input data is ', x.shape)
    model = build_cnn(input_dim=12)
    model.summary()

    model.fit(np.reshape(xs_train, (-1, 12, 1)),
              ys_train,
              batch_size=32,
              epochs=300,
              validation_split=0.1,
              verbose=2)
    ys_pred = model.predict(np.reshape(xs_test, (-1, 12, 1)))
    y_pred = scale_back(ys_pred, mu, sigma, range(0, 12))
    rmse = my_metric(y_true, y_pred)
    print('rmse: %.3f' % rmse)

    # import matplotlib.pyplot as plt
    # for i in range(10):
    #     plt.plot(y_true[i], label="true", color='green')
    #     plt.plot(y_pred[i], label='predicted', color='red')
    #     # plt.plot(x[0][:12], label='origin', color='blue')
    #     plt.legend(loc='upper left')
    #     plt.show()

    xs_eval = scale_to(x[:, 12:], mu, sigma, range(0, 12))
    ys_eval = model.predict(np.reshape(xs_eval, (-1, 12, 1)))
    y_eval = scale_back(ys_eval, mu, sigma, range(0, 12))
    y_result = np.reshape(y_eval[:, :4], (1320 * 4), order='F')
    write_results('Results/rmse-%d-cnn1d' % rmse, y_result)
Exemple #3
0
def main():
    x = preprocess_train_data()
    x = np.reshape(x, (-1, 24))
    x, base, base_lower, base_center, base_upper = smooth(x)

    mu, sigma = scale_fit(x)
    xs_train = scale_to(x[:, :12], mu, sigma, range(0, 12))
    ys_train = scale_to(x[:, 12:], mu, sigma, range(0, 12))
    xs_test = scale_to(x[:, :12], mu, sigma, range(0, 12))
    y_true = x[:, 12:]

    print('The shape of input data is ', x.shape)

    for i in range(10):
        model = build_mlp(input_dim=12)
        model.summary()

        model.fit(xs_train,
                  ys_train,
                  batch_size=32,
                  epochs=300,
                  validation_split=0,
                  verbose=2)
        ys_pred = model.predict(xs_test)
        y_pred = scale_back(ys_pred, mu, sigma, range(0, 12))
        rmse = my_metric(y_true, y_pred)
        print('rmse: %.3f' % rmse)

        # for i in range(0,1320,100):
        #     plt.plot(y_true[i], label="true", color='green')
        #     plt.plot(y_pred[i], label='predicted', color='red')
        #     # plt.plot(x[0][:12], label='origin', color='blue')
        #     plt.legend(loc='upper left')
        #     plt.show()

        xs_eval = scale_to(x[:, 12:], mu, sigma, range(0, 12))
        ys_eval = model.predict(xs_eval)
        y_eval = scale_back(ys_eval, mu, sigma, range(0, 12))
        # x = x + base[:, :24]
        y_eval = y_eval + base[:, 24:]

        # deal with long tail
        for j in range(1320):
            print('zoom:', j)
            # print(y_eval[j][:4])
            upper = np.max(y_eval[j][:4])
            lower = np.min(y_eval[j][:4])
            center = base_center[j][24:28]
            upper_revise = base_upper[j][24:28]
            lower_revise = base_lower[j][24:28]
            zoom_upper = np.array([1, 1, 1, 1])
            zoom_lower = np.array([1, 1, 1, 1])
            if (lower < lower_revise).any():
                zoom_lower = (center - lower_revise) / (center - y_eval[j][:4])
            if (upper > upper_revise).any():
                zoom_upper = (center - upper_revise) / (center - y_eval[j][:4])
            zoom = np.vstack([zoom_lower, zoom_upper])
            zoom = zoom[zoom > 0].min()
            # zoom = np.min([zoom_lower, zoom_upper], axis=0)
            # if base_center[j][-1] > base_center[j][0]: # k > 0
            #     zoom = zoom_upper
            # else: # k < 0
            #     zoom = zoom_lower
            # plt.plot(range(28), np.hstack([x[j] + base[j][:24], y_eval[j][:4]]))
            y_eval[j][:4] = center - (center - y_eval[j][:4]) * zoom
            # bottom = np.min(y_eval[j][:4])
            # if bottom < 0:
            #     y_eval[j][:4] = center - (center - y_eval[j][:4]) * (center - 1) / (center - bottom)

            # plt.plot(range(28), np.hstack([x[j] + base[j][:24], y_eval[j][:4]]))
            # plt.plot(range(28), base_center[j][:28])
            # plt.plot(range(28), base_lower[j][:28])
            # plt.plot(range(28), base_upper[j][:28])
            # plt.show()

            # if y_eval[j][:4].min() < 0:
            #     print(top)
            #     print(y_eval[j][:4])
            #     print(base_revise[j][24:28])
            #     print(base[j][24:28])
            #     input()

        if y_eval[:, :4].min() < -0.1:
            print('Have negative number!')

        y_result = np.reshape(y_eval[:, :4], (1320 * 4), order='F')
        write_results('Results/mlp6-No.{}'.format(i), y_result)
Exemple #4
0
def main():
    x, y = preprocess_train_data()
    x = np.reshape(x, (-1, 2))
    y = np.reshape(y, (-1, 24))
    y, base, base_lower, base_center, base_upper = smooth(y)

    xs_train = np.vstack([x, x])

    mu, sigma = scale_fit(y)
    ys_train = scale_to(y[:, :12], mu, sigma, range(0, 12))
    ys_train = np.vstack(
        [ys_train, scale_to(y[:, 12:], mu, sigma, range(0, 12))])

    print('The shape of input data is ', xs_train.shape, ys_train.shape)

    for i in range(10):
        model = build_mlp(input_dim=2)
        model.summary()

        model.fit([xs_train[:, :1], xs_train[:, 1:]],
                  ys_train,
                  batch_size=32,
                  epochs=300,
                  validation_split=0,
                  verbose=2)

        xs_eval = x
        ys_eval = model.predict([xs_eval[:, :1], xs_eval[:, 1:]])
        y_eval = scale_back(ys_eval, mu, sigma, range(0, 12))
        # x = x + base[:, :24]
        y_eval = y_eval + base[:, 24:]

        # deal with long tail
        for j in range(1320):
            print('zoom:', j)
            upper = np.max(y_eval[j][:4])
            lower = np.min(y_eval[j][:4])
            center = base_center[j][24:28]
            upper_revise = base_upper[j][24:28]
            lower_revise = base_lower[j][24:28]
            zoom_upper = np.array([1, 1, 1, 1])
            zoom_lower = np.array([1, 1, 1, 1])
            if (lower < lower_revise).any():
                zoom_lower = (center - lower_revise) / (center - y_eval[j][:4])
            if (upper > upper_revise).any():
                zoom_upper = (center - upper_revise) / (center - y_eval[j][:4])
            zoom = np.vstack([zoom_lower, zoom_upper])
            zoom = zoom[zoom > 0].min()
            y_eval[j][:4] = center - (center - y_eval[j][:4]) * zoom

            # plt.plot(range(28), np.hstack([y[j] + base[j][:24], y_eval[j][:4]]))
            # plt.plot(range(28), base_center[j][:28])
            # plt.plot(range(28), base_lower[j][:28])
            # plt.plot(range(28), base_upper[j][:28])
            # plt.show()

        if y_eval[:, :4].min() < -0.1:
            print('Have negative number!')

        y_result = np.reshape(y_eval[:, :4], (1320 * 4), order='F')
        write_results('Results/mlp7-No.{}'.format(i), y_result)
Exemple #5
0
def main():
    salesVolume_list, popularity_list, comment_list, reply_list, \
        car_feature_list, adcode_feature_list = preprocess_train_data()

    # salesVolume_list, base = smooth(salesVolume_list)
    s_mu, s_sigma = scale_fit(salesVolume_list)
    s_train = scale_to(salesVolume_list[:, :12], s_mu, s_sigma, range(12))
    y_train = scale_to(salesVolume_list[:, 12:], s_mu, s_sigma, range(12))

    p_mu, p_sigma = scale_fit(popularity_list)
    p_train = scale_to(popularity_list[:, :12], p_mu, p_sigma, range(12))

    c_mu, c_sigma = scale_fit(comment_list)
    c_train = scale_to(comment_list[:, :12], c_mu, c_sigma, range(12))

    r_mu, r_sigma = scale_fit(reply_list)
    r_train = scale_to(reply_list[:, :12], r_mu, r_sigma, range(12))

    car_onehot = car_feature_list[:, :]
    adcode_onehot = adcode_feature_list[:, :]

    for i in range(0, 1320, 100):
        plt.plot(range(1, 25),
                 scale_to(salesVolume_list, s_mu, s_sigma, range(12))[i])
        plt.plot(range(1, 25),
                 scale_to(reply_list, p_mu, p_sigma, range(12))[i])
        plt.show()

    # for i in range(1, 6):
    #     s_train = np.vstack((s_train, scale_to(salesVolume_list[:, i:12+i], s_mu, s_sigma, range(i,12+i))))
    #     y_train = np.vstack((y_train, scale_to(salesVolume_list[:, 12+i:18+i], s_mu, s_sigma, range(i,6+i))))
    #     p_train = np.vstack((p_train, scale_to(popularity_list[:, i:12+i], p_mu, p_sigma, range(i,12+i))))
    #     c_train = np.vstack((c_train, scale_to(comment_list[:, i:12+i], c_mu, c_sigma, range(i,12+i))))
    #     r_train = np.vstack((r_train, scale_to(reply_list[:, i:12+i], r_mu, r_sigma, range(i,12+i))))
    #     car_onehot = np.vstack((car_onehot, car_feature_list[:, :]))
    #     adcode_onehot = np.vstack((adcode_onehot, adcode_feature_list[:, :]))

    s_test = scale_to(salesVolume_list[:, :12], s_mu, s_sigma, range(12))
    p_test = scale_to(popularity_list[:, :12], p_mu, p_sigma, range(12))
    c_test = scale_to(comment_list[:, :12], c_mu, c_sigma, range(12))
    r_test = scale_to(reply_list[:, :12], r_mu, r_sigma, range(12))
    y_true = salesVolume_list[:, 12:]

    for i in range(10):
        model = build_mlp()
        model.summary()

        model.fit(
            [s_train, p_train, c_train, r_train, car_onehot, adcode_onehot],
            y_train,
            batch_size=32,
            epochs=300,
            validation_split=0,
            verbose=2)
        ys_pred = model.predict(
            [s_test, p_test, c_test, r_test, car_onehot, adcode_onehot])
        y_pred = scale_back(ys_pred, s_mu, s_sigma, range(12))
        rmse = my_metric(y_true, y_pred)
        print('rmse: %.3f' % rmse)

        # for i in range(10):
        #     plt.plot(y_true[i], label="true", color='green')
        #     plt.plot(y_pred[i], label='predicted', color='red')
        #     # plt.plot(x[0][:12], label='origin', color='blue')
        #     plt.legend(loc='upper left')
        #     plt.show()

        s_eval = scale_to(salesVolume_list[:, 12:], s_mu, s_sigma, range(12))
        p_eval = scale_to(popularity_list[:, 12:], p_mu, p_sigma, range(12))
        c_eval = scale_to(comment_list[:, 12:], c_mu, c_sigma, range(12))
        r_eval = scale_to(reply_list[:, 12:], r_mu, r_sigma, range(12))
        ys_eval = model.predict(
            [s_eval, p_eval, c_eval, r_eval, car_onehot, adcode_onehot])
        y_eval = scale_back(ys_eval, s_mu, s_sigma, range(12))
        y_eval = y_eval + base[:, 24:]

        # deal with long tail
        for j in range(1320):
            print('zoom:', j)
            # top = base[j][12] # > bottom bottom_revise
            top = popularity_list[j][23] + base[j][23]
            bottom = np.min(y_eval[j][:4])
            # bottom_revise = base_revise[j][24:28] # >= 0
            print(y_eval[j][:4])
            if bottom < 0:
                # y_eval[j][:4] = top - (top - y_eval[j][:4]) * top / (top - bottom)
                # if (base_revise[j][24:28] == base[j][24:28]).all(): # k > 0
                y_eval[j][:4] = top - (top - y_eval[j][:4]) * (top - 1) / (
                    top - bottom)

        y_result = np.reshape(y_eval[:, :4], (1320 * 4), order='F')
        write_results('Results/mlp5-No.{}'.format(i), y_result)