def predictAllShop_LC_HPS(all_data,
                          trainAsTest=False,
                          saveFilePath=None,
                          featurePath=None,
                          cate_level=0,
                          cate_name=None,
                          featureSavePath=None,
                          needSaveFeature=False,
                          ignore_shopids=[],
                          needCV=False,
                          model_path=None,
                          Augmented=False,
                          ignore_get_train=True,
                          ignore_predict=True,
                          addNoiseInResult=False,
                          time=1):
    """
    通过gridsearch找超参数
    :param trainAsTest: 是否使用训练集后14天作为测试集
    :param saveFilePath
    :param featurePath:
    :param cate_level:
    :param cate_name:
    :param featureSavePath:
    :param needSaveFeature:
    :param ignore_shopids:
    :param create_model_function:
    :param needCV
    :param Augmented:是否增广样本
    :param  ignore_get_train:是否忽略获取样本
    :param ignore_predict:是否忽略预测
    :return:
    """

    augument_time = 1
    verbose = 2
    last_N_days = 60
    #记录已经被忽略的商店数量
    # ignores = 0
    shop_need_to_predict = 2000
    if (cate_level is 0):
        shopids = np.arange(1, 1 + shop_need_to_predict, 1)
    else:
        shopids = Parameter.extractShopValueByCate(cate_level, cate_name)
    shop_info = pd.read_csv(Parameter.shopinfopath,
                            names=[
                                "shopid", "cityname", "locationid", "perpay",
                                "score", "comment", "level", "cate1", "cate2",
                                "cate3"
                            ])

    weather = False
    weekOrWeekend = False
    day_back_num = 21
    sameday_backNum = 0
    week_backnum = 3
    other_features = [statistic_functon_mean, statistic_functon_median]
    other_features = []
    shop_features = ["perpay", "comment", "score", "level"]
    shop_features = []
    #是否是周末hot_encoder
    hot_encoder = onehot([[1], [0]])
    #类别1hot_encoder
    cate1_list = np.unique(shop_info['cate1'])
    cate1_label_encoder = labelEncoder(cate1_list)
    cate1_list2 = cate1_label_encoder.transform(cate1_list).reshape((-1, 1))
    cate1_hot_encoder = onehot(cate1_list2)

    if featurePath is None:
        all_x = None
        all_y = None
        for shopid in shopids:
            if ignore_get_train:
                if shopid in ignore_shopids:
                    print "ignore get train", shopid
                    continue
            print "get ", shopid, " train"
            part_data = all_data[all_data.shopid == shopid]
            last_14_real_y = None
            # 取出一部分做训练集
            if trainAsTest:  #使用训练集后14天作为测试集的话,训练集为前面部分
                last_14_real_y = part_data[len(part_data) -
                                           14:]["count"].values
                part_data = part_data[0:len(part_data) - 14]
            # print last_14_real_y
            '''确定跳过前面多少天的数据'''
            skipNum = part_data.shape[0] - last_N_days
            if skipNum < 0:
                skipNum = 0
            train_x = None
            '''获取特征'''
            if sameday_backNum != 0:  #sameday
                sameday = extractBackSameday(part_data, sameday_backNum,
                                             skipNum, nan_method_sameday_mean)
                train_x = getOneWeekdayFomExtractedData(sameday)
            if day_back_num != 0:  #day
                if train_x is not None:
                    train_x = np.concatenate(
                        (train_x,
                         getOneWeekdayFomExtractedData(
                             extractBackDay(part_data, day_back_num, skipNum,
                                            nan_method_sameday_mean))),
                        axis=1)
                else:
                    train_x = getOneWeekdayFomExtractedData(
                        extractBackDay(part_data, day_back_num, skipNum,
                                       nan_method_sameday_mean))
            if weekOrWeekend:  #weekOrWeekend
                ws = getOneWeekdayFomExtractedData(
                    extractWorkOrWeekend(part_data, skipNum))
                train_x = np.concatenate((train_x, hot_encoder.transform(ws)),
                                         axis=1)

            count = extractCount(part_data, skipNum)
            train_y = getOneWeekdayFomExtractedData(count)
            for feature in other_features:
                value = getOneWeekdayFomExtractedData(
                    extractBackWeekValue(part_data, week_backnum, skipNum,
                                         nan_method_sameday_mean, feature))
                train_x = np.append(train_x, value, axis=1)
            '''添加商家信息'''
            # print train_x,train_x.shape
            index = shopid - 1
            oneshopinfo = shop_info.ix[index]
            shop_city = oneshopinfo['cityname']
            shop_perpay = oneshopinfo['perpay'] if not pd.isnull(
                oneshopinfo['perpay']) else 0
            shop_score = oneshopinfo['score'] if not pd.isnull(
                oneshopinfo['score']) else 0
            shop_comment = oneshopinfo['comment'] if not pd.isnull(
                oneshopinfo['comment']) else 0
            shop_level = oneshopinfo['level'] if not pd.isnull(
                oneshopinfo['level']) else 0
            shop_cate1 = oneshopinfo['cate1']
            import warnings
            with warnings.catch_warnings():
                warnings.simplefilter("ignore", category=DeprecationWarning)
                shop_cate1_encoder = cate1_hot_encoder.transform(
                    cate1_label_encoder.transform([shop_cate1]))
            if "perpay" in shop_features:
                train_x = np.insert(train_x,
                                    train_x.shape[1],
                                    shop_perpay,
                                    axis=1)
            if "score" in shop_features:
                train_x = np.insert(train_x,
                                    train_x.shape[1],
                                    shop_score,
                                    axis=1)
            if "comment" in shop_features:
                train_x = np.insert(train_x,
                                    train_x.shape[1],
                                    shop_comment,
                                    axis=1)
            if "level" in shop_features:
                train_x = np.insert(train_x,
                                    train_x.shape[1],
                                    shop_level,
                                    axis=1)
            if "cate1" in shop_features:
                for i in range(shop_cate1_encoder.shape[1]):
                    train_x = np.insert(train_x,
                                        train_x.shape[1],
                                        shop_cate1_encoder[0][i],
                                        axis=1)
            '''商家信息添加完毕'''
            '''天气特征'''
            if weather:
                weathers = getOneWeekdayFomExtractedData(
                    extractWeatherInfo(part_data, skipNum, shop_city))
                train_x = np.append(train_x, weathers, axis=1)
            '''天气特征结束'''

            if all_x is None:
                all_x = train_x
                all_y = train_y
            else:
                all_x = np.insert(all_x, all_x.shape[0], train_x, axis=0)
                all_y = np.insert(all_y, all_y.shape[0], train_y, axis=0)

                # '''添加周几'''
                # extract_weekday = getOneWeekdayFomExtractedData(extractWeekday(part_data, skipNum))
                # train_x = np.append(train_x, extract_weekday, axis=1)
                # ''''''

                # train_x = train_x.reshape((train_x.shape[0],
                #                            train_x.shape[1], 1))
                # print model.get_weights()
                # part_counts = []
                # for i in range(7):
                #     weekday = i + 1
                #     part_count = getOneWeekdayFomExtractedData(count, weekday)
                #     part_counts.append(part_count)

        train_x = all_x
        train_y = all_y
        """增广训练集"""
        if Augmented:
            print "augment data"
            new_train_x = np.ndarray(
                (train_x.shape[0] * (augument_time + 1), train_x.shape[1]))
            new_train_y = np.ndarray(
                (train_y.shape[0] * (augument_time + 1), train_y.shape[1]))

            def augument_relu(v):  # 高斯增广。。。。似乎效果不太好,极大可能改变样本
                return v * (1 + 0.01 * np.random.normal())

            def augument_relu2(v):
                return v * 1.05

            end = train_x.shape[0]
            for index in range(end):
                new_train_x[index] = train_x[index]
                new_train_y[index] = train_y[index]
            sert_index = index + 1
            for index in range(end):
                print "%d / %d" % (index, end)
                for t in range(augument_time):
                    new_train_x[sert_index] = train_x[index]
                    # train_x = np.concatenate((train_x, [train_x[index]]), axis=0)
                    # print train_x
                    ov = train_y[index][0]
                    # train_y = np.concatenate((train_y, [[augument_relu(ov)]]), axis=0)
                    new_train_y[sert_index] = [augument_relu2(ov)]
                    sert_index += 1
                    # print train_y
            print "augment finish"
            train_x = new_train_x
            train_y = new_train_y

        if needSaveFeature:
            featureAndLabel = np.concatenate((train_x, train_y), axis=1)
            flDF = pd.DataFrame(featureAndLabel)
            if featureSavePath is None:
                if trainAsTest:
                    featureSavePath = Parameter.projectPath + "lzj/train_feature/%dCatelevel_%sCatename_%dfeatures_%dSameday_%dDay_%dLast" % (
                        cate_level, cate_name, flDF.shape[1] - 1,
                        sameday_backNum, day_back_num, last_N_days)
                else:
                    featureSavePath = Parameter.projectPath + "lzj/feature/%dCatelevel_%sCatename_%dfeatures_%dSameday_%dDay_%dLast" % (
                        cate_level, cate_name, flDF.shape[1] - 1,
                        sameday_backNum, day_back_num, last_N_days)
            if Augmented:
                featureSavePath += ("_Augment%d" % augument_time)

            featureSavePath += ".csv"
            print "save feature in :", featureSavePath
            flDF.to_csv(featureSavePath)
    else:  #有featurePath文件
        if trainAsTest:
            path = Parameter.projectPath + "lzj/train_feature/" + featurePath
        else:
            path = Parameter.projectPath + "lzj/feature/" + featurePath
        flDF = pd.read_csv(path, index_col=0)
        train_x = flDF.values[:, :-1]
        train_y = flDF.values[:, -1:]
        # print train_x
        # print train_y
    '''将t标准化'''
    x_scaler = MinMaxScaler().fit(train_x)
    y_scaler = MinMaxScaler().fit(train_y)
    train_x = x_scaler.transform(train_x)
    train_y = y_scaler.transform(train_y)
    '''标准化结束'''
    """CNN"""
    train_x = np.reshape(train_x, (train_x.shape[0], train_x.shape[1], 1))

    if model_path is None:
        if needCV:
            '''gridsearchCV'''
            # nb_epoch=rnn_epoch, batch_size=batch_size, verbose=verbose
            # input_dim, h1_unit = 16, optimizer = "adagrad", init = "normal"):
            input_dim = [(train_x.shape[1], train_x.shape[2])]
            h1_acqtivation = ["relu"]
            h1_unit = [8, 12, 16, 20]
            model = KerasRegressor(build_fn=create_model_LocallyConnected,
                                   verbose=verbose)
            batch_size = [3, 5, 7, 10]
            epochs = [10, 15, 20, 25, 30]
            param_grid = dict(batch_size=batch_size,
                              nb_epoch=epochs,
                              h1_unit=h1_unit,
                              input_shape=input_dim)
            grid = GridSearchCV(estimator=model,
                                param_grid=param_grid,
                                n_jobs=-1,
                                scoring="neg_mean_squared_error")
            grid.refit = False
            grid_result = grid.fit(train_x, train_y)

            print("Best: %f using %s" %
                  (grid_result.best_score_, grid_result.best_params_))
            for params, mean_score, scores in grid_result.grid_scores_:
                print("%f (%f) with: %r" %
                      (scores.mean(), scores.std(), params))

        if not needCV:
            input_dim = (train_x.shape[1], train_x.shape[2])
            h1_unit = 16 + (time) * 4
            h1_activation = "sigmoid"
            batch_size = 3
            epochs = 40

        else:
            input_dim = (train_x.shape[1], train_x.shape[2])
            epochs = grid_result.best_params_['nb_epoch']
            batch_size = grid_result.best_params_['batch_size']
            h1_unit = grid_result.best_params_["h1_unit"]
            h1_activation = "sigmoid"

        early_stopping = EarlyStopping(monitor='val_loss', patience=2)
        best_model = create_model_LocallyConnected(input_shape=input_dim,
                                                   h1_unit=h1_unit,
                                                   h1_activation=h1_activation)
        hist = best_model.fit(train_x,
                              train_y,
                              verbose=verbose,
                              batch_size=batch_size,
                              nb_epoch=epochs,
                              validation_split=0.1,
                              callbacks=[early_stopping])
        print hist.history

        #保存模型
        if trainAsTest:
            model_save_path = Parameter.projectPath+"lzj/train_model/" + \
                              "%dlast_%ds_%dd_%df_%d_%s_%d_%d_%d_%s.json" \
                              % (last_N_days,sameday_backNum, day_back_num, train_x.shape[1], cate_level, cate_name
                                 , epochs, batch_size, h1_unit, h1_activation)
            saveModel(model_save_path, best_model)
        else:
            model_save_path = Parameter.projectPath+"lzj/model/" + \
                              "%dlast_%ds_%dd_%df_%d_%s_%d_%d_%d_%s.json" \
                              % (last_N_days,sameday_backNum, day_back_num, train_x.shape[1], cate_level, cate_name
                                 ,  epochs, batch_size, h1_unit, h1_activation)
            saveModel(model_save_path, best_model)
    else:  #model_path is not none
        print "get model from " + model_path
        best_model = getModel(model_path)

    format = "%Y-%m-%d"
    if trainAsTest:
        startTime = datetime.datetime.strptime("2016-10-18", format)
    else:
        startTime = datetime.datetime.strptime("2016-11-1", format)
    timedelta = datetime.timedelta(1)
    '''预测商家'''
    model = best_model
    preficts_all = None
    real_all = None

    for j in shopids:
        if ignore_predict:
            if j in ignore_shopids:
                print "ignore predict", j
                # ignores += 1
                continue
        print "predict:", j
        preficts = []
        part_data = all_data[all_data.shopid == j]
        last_14_real_y = None

        if trainAsTest:  #使用训练集后14天作为测试集的话,训练集为前面部分
            last_14_real_y = part_data[len(part_data) - 14:]["count"].values
            part_data = part_data[0:len(part_data) - 14]
        '''预测14天'''
        for i in range(14):
            currentTime = startTime + timedelta * i
            strftime = currentTime.strftime(format)
            # index = getWeekday(strftime) - 1
            # part_count = part_counts[index]
            #取前{sameday_backNum}周同一天的值为特征进行预测
            part_data = part_data.append(
                {
                    "count": 0,
                    "shopid": j,
                    "time": strftime,
                    "weekday": getWeekday(strftime)
                },
                ignore_index=True)
            x = None
            if sameday_backNum != 0:
                x = getOneWeekdayFomExtractedData(
                    extractBackSameday(part_data, sameday_backNum,
                                       part_data.shape[0] - 1,
                                       nan_method_sameday_mean))
            if day_back_num != 0:
                if x is None:
                    x = getOneWeekdayFomExtractedData(
                        extractBackDay(part_data, day_back_num,
                                       part_data.shape[0] - 1,
                                       nan_method_sameday_mean))
                else:
                    x = np.concatenate(
                        (x,
                         getOneWeekdayFomExtractedData(
                             extractBackDay(part_data, day_back_num,
                                            part_data.shape[0] - 1,
                                            nan_method_sameday_mean))),
                        axis=1)
            if weekOrWeekend:
                x = np.concatenate(
                    (x,
                     hot_encoder.transform(
                         getOneWeekdayFomExtractedData(
                             extractWorkOrWeekend(part_data,
                                                  part_data.shape[0] - 1)))),
                    axis=1)

            for feature in other_features:
                x_value = getOneWeekdayFomExtractedData(
                    extractBackWeekValue(part_data, week_backnum,
                                         part_data.shape[0] - 1,
                                         nan_method_sameday_mean, feature))
                x = np.append(x, x_value, axis=1)
            # '''添加周几'''
            # x = np.append(x, getOneWeekdayFomExtractedData(extractWeekday(part_data, part_data.shape[0]-1)), axis=1)
            # ''''''
            '''添加商家信息'''
            index = j - 1
            oneshopinfo = shop_info.ix[index]
            shop_city = oneshopinfo["cityname"]
            shop_perpay = oneshopinfo['perpay'] if not pd.isnull(
                oneshopinfo['perpay']) else 0
            shop_score = oneshopinfo['score'] if not pd.isnull(
                oneshopinfo['score']) else 0
            shop_comment = oneshopinfo['comment'] if not pd.isnull(
                oneshopinfo['comment']) else 0
            shop_level = oneshopinfo['level'] if not pd.isnull(
                oneshopinfo['level']) else 0
            if "perpay" in shop_features:
                x = np.insert(x, x.shape[1], shop_perpay, axis=1)
            if "score" in shop_features:
                x = np.insert(x, x.shape[1], shop_score, axis=1)
            if "comment" in shop_features:
                x = np.insert(x, x.shape[1], shop_comment, axis=1)
            if "level" in shop_features:
                x = np.insert(x, x.shape[1], shop_level, axis=1)
            shop_cate1 = oneshopinfo['cate1']
            if "cate1" in shop_features:
                shop_cate1_encoder = cate1_hot_encoder.transform(
                    cate1_label_encoder.transform([shop_cate1]).reshape(
                        (-1, 1)))
                for i in range(shop_cate1_encoder.shape[1]):
                    x = np.insert(x,
                                  x.shape[1],
                                  shop_cate1_encoder[0][i],
                                  axis=1)
            '''商家信息添加完毕'''
            '''天气特征'''
            if weather:
                weathers = getOneWeekdayFomExtractedData(
                    extractWeatherInfo(part_data, part_data.shape[0] - 1,
                                       shop_city))
                x = np.append(x, weathers, axis=1)
            '''天气特征结束'''
            # for j in range(sameday_backNum):
            #     x.append(train_y[len(train_y) - (j+1)*7][0])
            # x = np.array(x).reshape((1, sameday_backNum))
            x = x_scaler.transform(x)
            """CNN"""
            x = np.reshape(x, (x.shape[0], x.shape[1], 1))
            predict = model.predict(x)
            '''将y还原'''
            if predict.ndim == 2:
                predict = y_scaler.inverse_transform(predict)[0][0]
            elif predict.ndim == 1:
                predict = y_scaler.inverse_transform(predict)[0]
            '''将y还原结束'''
            # print predict
            if (predict <= 0):
                predict == 0
            if addNoiseInResult:
                predict = predict * (
                    1 + 0.05 * abs(np.random.normal(scale=(i + 1) * 0.05)))
            preficts.append(predict)
            part_data.set_value(part_data.shape[0] - 1, "count", predict)

        preficts = (removeNegetive(toInt(np.array(preficts)))).astype(int)
        if preficts_all is None:
            preficts_all = preficts
        else:
            preficts_all = np.insert(preficts_all,
                                     preficts_all.shape[0],
                                     preficts,
                                     axis=0)

        if trainAsTest:
            last_14_real_y = (removeNegetive(toInt(
                np.array(last_14_real_y)))).astype(int)
            if real_all is None:
                real_all = last_14_real_y
            else:
                real_all = np.insert(real_all,
                                     real_all.shape[0],
                                     last_14_real_y,
                                     axis=0)
                # print preficts,last_14_real_y
            print str(j) + ',score:', scoreoneshop(preficts, last_14_real_y)

    # preficts = np.array(preficts)
    shopids = shopids.tolist()
    if ignore_predict:
        for remove_id in ignore_shopids:
            try:
                shopids.remove(remove_id)
            except:
                pass

    preficts_all = preficts_all.reshape((len(shopids), 14))
    if trainAsTest:
        real_all = real_all.reshape((len(shopids), 14))
        preficts_all = np.concatenate((preficts_all, real_all), axis=1)

    preficts_all = np.insert(preficts_all, 0, shopids, axis=1)
    if saveFilePath is not None:
        if model_path is None:
            path = saveFilePath + "%dLast_%ds_%dd_%df_%d_%s_%d_%d_%d_%s_%dshops" \
                                  % (last_N_days,sameday_backNum, day_back_num, train_x.shape[1], cate_level, cate_name
                                     ,  epochs, batch_size, h1_unit, h1_activation,len(shopids))
        else:
            import re
            r = re.compile(
                r"""/(\d+)last_(\d+)s_(\d+)d_(\d+)f_(\d+)_(\S+)_(\d+)_(\d+)_(\d+)_(\w+).json"""
            )
            m = r.search(model_path)
            path = saveFilePath + "%dLast_%ds_%dd_%df_%d_%s_%d_%d_%d_%s_%dshops" \
                                  % (int(m.group(1)),int(m.group(2)), int(m.group(3)), int(m.group(4)), int(m.group(5)), m.group(6)
                                     ,  int(m.group(7)), int(m.group(8)), int(m.group(9)), m.group(10),len(shopids))
        if Augmented:
            path += "_augmented"
        if addNoiseInResult:
            path += "_addNoiseInResult"
        if trainAsTest:
            path = path + "_train"
        path = path + "_%dtime.csv" % time

        print "save in :", path
        np.savetxt(path, preficts_all, fmt="%d", delimiter=",")
    return preficts_all
예제 #2
0
def predictAllShop_MultiCNN_HPS(all_data,
                                trainAsTest=False,
                                saveFilePath=None,
                                featurePath=None,
                                cate_level=0,
                                cate_name=None,
                                featureSavePath=None,
                                needSaveFeature=False,
                                ignore_shopids=[],
                                needCV=False,
                                model_path=None,
                                Augmented=False,
                                ignore_get_train=True,
                                ignore_predict=True,
                                addNoiseInResult=False,
                                time=1):
    """
    通过gridsearch找超参数
    :param trainAsTest: 是否使用训练集后14天作为测试集
    :param saveFilePath
    :param featurePath:
    :param cate_level:
    :param cate_name:
    :param featureSavePath:
    :param needSaveFeature:
    :param ignore_shopids:
    :param create_model_function:
    :param needCV
    :param Augmented:是否增广样本
    :param  ignore_get_train:是否忽略获取样本
    :param ignore_predict:是否忽略预测
    :return:
    """

    augument_time = 1
    verbose = 2
    last_N_days = 70
    #记录已经被忽略的商店数量
    # ignores = 0
    shop_need_to_predict = 2000
    if (cate_level is 0):
        shopids = np.arange(1, 1 + shop_need_to_predict, 1)
    else:
        shopids = Parameter.extractShopValueByCate(cate_level, cate_name)
    shop_info = pd.read_csv(Parameter.shopinfopath,
                            names=[
                                "shopid", "cityname", "locationid", "perpay",
                                "score", "comment", "level", "cate1", "cate2",
                                "cate3"
                            ])

    weather = False
    weekOrWeekend = False
    day_back_num = 21
    sameday_backNum = 8
    week_backnum = 3
    other_features = [statistic_functon_mean, statistic_functon_median]
    other_features = []
    shop_features = ["perpay", "comment", "score", "level"]
    shop_features = []
    #是否是周末hot_encoder
    hot_encoder = onehot([[1], [0]])
    #类别1hot_encoder
    cate1_list = np.unique(shop_info['cate1'])
    cate1_label_encoder = labelEncoder(cate1_list)
    cate1_list2 = cate1_label_encoder.transform(cate1_list).reshape((-1, 1))
    cate1_hot_encoder = onehot(cate1_list2)

    if featurePath is None:
        train_x, train_y = getTrainXY(
            all_data, cate1_hot_encoder, cate1_label_encoder, day_back_num,
            hot_encoder, ignore_get_train, ignore_shopids, last_N_days,
            other_features, 0, shop_features, shop_info, shopids, trainAsTest,
            weather, weekOrWeekend, week_backnum)
        train_x2 = getTrainXY(all_data, cate1_hot_encoder, cate1_label_encoder,
                              0, hot_encoder, ignore_get_train, ignore_shopids,
                              last_N_days, other_features, sameday_backNum,
                              shop_features, shop_info, shopids, trainAsTest,
                              weather, weekOrWeekend, week_backnum)[0]
        """增广训练集"""
        if Augmented:
            train_xs, train_y = augmentTrainX(augument_time,
                                              [train_x, train_x2], train_y)

        train_x = train_xs[0]
        train_x2 = train_xs[1]

        if needSaveFeature:
            featureAndLabel = np.concatenate((train_x, train_y), axis=1)
            flDF = pd.DataFrame(featureAndLabel)
            if featureSavePath is None:
                if trainAsTest:
                    featureSavePath = Parameter.projectPath + "lzj/train_feature/%dCatelevel_%sCatename_%dfeatures_%dSameday_%dDay_%dLast" % (
                        cate_level, cate_name, flDF.shape[1] - 1,
                        sameday_backNum, day_back_num, last_N_days)
                else:
                    featureSavePath = Parameter.projectPath + "lzj/feature/%dCatelevel_%sCatename_%dfeatures_%dSameday_%dDay_%dLast" % (
                        cate_level, cate_name, flDF.shape[1] - 1,
                        sameday_backNum, day_back_num, last_N_days)
            if Augmented:
                featureSavePath += ("_Augment%d" % augument_time)

            featureSavePath += ".csv"
            print "save feature in :", featureSavePath
            flDF.to_csv(featureSavePath)
    else:  #有featurePath文件
        if trainAsTest:
            path = Parameter.projectPath + "lzj/train_feature/" + featurePath
        else:
            path = Parameter.projectPath + "lzj/feature/" + featurePath
        flDF = pd.read_csv(path, index_col=0)
        train_x = flDF.values[:, :-1]
        train_y = flDF.values[:, -1:]
        # print train_x
        # print train_y
    '''将t标准化'''
    x_scaler = MinMaxScaler().fit(train_x)
    x2_scaler = MinMaxScaler().fit(train_x2)
    y_scaler = MinMaxScaler().fit(train_y)
    train_x = x_scaler.transform(train_x)
    train_x2 = x2_scaler.transform(train_x2)
    train_y = y_scaler.transform(train_y)
    '''标准化结束'''
    """CNN"""
    train_x = np.reshape(train_x, (train_x.shape[0], train_x.shape[1], 1))
    train_x2 = np.reshape(train_x2, (train_x2.shape[0], train_x2.shape[1], 1))

    if model_path is None:
        if needCV:
            '''gridsearchCV'''
            # nb_epoch=rnn_epoch, batch_size=batch_size, verbose=verbose
            # input_dim, h1_unit = 16, optimizer = "adagrad", init = "normal"):
            input_dim = [(train_x.shape[1], train_x.shape[2])]
            input_dim2 = [(train_x2.shape[1], train_x2.shape[2])]
            h1_acqtivation = ["relu"]
            h1_unit = [8, 12, 16, 20]
            model = KerasRegressor(build_fn=create_model_MultiCNN,
                                   verbose=verbose)
            batch_size = [3, 5, 7, 10]
            epochs = [10, 15, 20, 25, 30]
            param_grid = dict(batch_size=batch_size,
                              nb_epoch=epochs,
                              h1_unit=h1_unit,
                              input_shape1=input_dim,
                              input_shape2=input_dim2)
            grid = GridSearchCV(estimator=model,
                                param_grid=param_grid,
                                n_jobs=-1,
                                scoring="neg_mean_squared_error")
            grid.refit = False
            grid_result = grid.fit(train_x, train_y)

            print("Best: %f using %s" %
                  (grid_result.best_score_, grid_result.best_params_))
            for params, mean_score, scores in grid_result.grid_scores_:
                print("%f (%f) with: %r" %
                      (scores.mean(), scores.std(), params))

        if not needCV:
            input_dim = (train_x.shape[1], train_x.shape[2])
            input_dim2 = (train_x2.shape[1], train_x2.shape[2])
            # h1_unit = 16 + (time) * 4
            h1_unit = 24
            h1_activation = "relu"
            batch_size = 3
            epochs = 40

        else:
            input_dim = (train_x.shape[1], train_x.shape[2])
            input_dim2 = (train_x2.shape[1], train_x2.shape[2])
            epochs = grid_result.best_params_['nb_epoch']
            batch_size = grid_result.best_params_['batch_size']
            h1_unit = grid_result.best_params_["h1_unit"]
            h1_activation = "sigmoid"

        print train_x.shape
        print train_x2.shape
        print train_y.shape

        early_stopping = EarlyStopping(monitor='val_loss', patience=2)
        best_model = create_model_MultiCNN(input_shape1=input_dim,
                                           input_shape2=input_dim2,
                                           h1_unit=h1_unit,
                                           h1_activation=h1_activation)
        hist = best_model.fit([train_x, train_x2],
                              train_y,
                              verbose=verbose,
                              batch_size=batch_size,
                              nb_epoch=epochs,
                              validation_split=0.1,
                              callbacks=[early_stopping])
        print hist.history

        #保存模型
        if trainAsTest:
            model_save_path = Parameter.projectPath+"lzj/train_model/" + \
                              "%dlast_%ds_%dd_%df_%d_%s_%d_%d_%d_%s.json" \
                              % (last_N_days,sameday_backNum, day_back_num, train_x.shape[1], cate_level, cate_name
                                 , epochs, batch_size, h1_unit, h1_activation)
            saveModel(model_save_path, best_model)
        else:
            model_save_path = Parameter.projectPath+"lzj/model/" + \
                              "%dlast_%ds_%dd_%df_%d_%s_%d_%d_%d_%s.json" \
                              % (last_N_days,sameday_backNum, day_back_num, train_x.shape[1], cate_level, cate_name
                                 ,  epochs, batch_size, h1_unit, h1_activation)
            saveModel(model_save_path, best_model)
    else:  #model_path is not none
        print "get model from " + model_path
        best_model = getModel(model_path)

    format = "%Y-%m-%d"
    if trainAsTest:
        startTime = datetime.datetime.strptime("2016-10-18", format)
    else:
        startTime = datetime.datetime.strptime("2016-11-1", format)
    timedelta = datetime.timedelta(1)
    '''预测商家'''
    model = best_model
    preficts_all = None
    real_all = None

    for j in shopids:
        if ignore_predict:
            if j in ignore_shopids:
                print "ignore predict", j
                # ignores += 1
                continue
        print "predict:", j
        preficts = []
        part_data = all_data[all_data.shopid == j]
        last_14_real_y = None

        if trainAsTest:  #使用训练集后14天作为测试集的话,训练集为前面部分
            last_14_real_y = part_data[len(part_data) - 14:]["count"].values
            part_data = part_data[0:len(part_data) - 14]
        '''预测14天'''
        for i in range(14):
            currentTime = startTime + timedelta * i
            strftime = currentTime.strftime(format)
            # index = getWeekday(strftime) - 1
            # part_count = part_counts[index]
            #取前{sameday_backNum}周同一天的值为特征进行预测
            part_data = part_data.append(
                {
                    "count": 0,
                    "shopid": j,
                    "time": strftime,
                    "weekday": getWeekday(strftime)
                },
                ignore_index=True)
            x = None
            x2 = None
            x = getOneShopTrainXY(cate1_hot_encoder, cate1_label_encoder,
                                  day_back_num, hot_encoder, other_features,
                                  part_data, 0, shop_features, shop_info, j,
                                  part_data.shape[0] - 1, x, weather,
                                  weekOrWeekend, week_backnum)[0]
            x2 = getOneShopTrainXY(cate1_hot_encoder, cate1_label_encoder, 0,
                                   hot_encoder, other_features, part_data,
                                   sameday_backNum, shop_features, shop_info,
                                   j, part_data.shape[0] - 1, x2, weather,
                                   weekOrWeekend, week_backnum)[0]

            x = x_scaler.transform(x)
            x2 = x2_scaler.transform(x2)
            """CNN"""
            x = np.reshape(x, (x.shape[0], x.shape[1], 1))
            x2 = np.reshape(x2, (x2.shape[0], x2.shape[1], 1))
            predict = model.predict([x, x2])
            '''将y还原'''
            if predict.ndim == 2:
                predict = y_scaler.inverse_transform(predict)[0][0]
            elif predict.ndim == 1:
                predict = y_scaler.inverse_transform(predict)[0]
            '''将y还原结束'''
            # print predict
            if (predict <= 0):
                predict == 0
            if addNoiseInResult:
                predict = predict * (
                    1 + 0.05 * abs(np.random.normal(scale=(i + 1) * 0.05)))
            preficts.append(predict)
            part_data.set_value(part_data.shape[0] - 1, "count", predict)

        preficts = (removeNegetive(toInt(np.array(preficts)))).astype(int)
        if preficts_all is None:
            preficts_all = preficts
        else:
            preficts_all = np.insert(preficts_all,
                                     preficts_all.shape[0],
                                     preficts,
                                     axis=0)

        if trainAsTest:
            last_14_real_y = (removeNegetive(toInt(
                np.array(last_14_real_y)))).astype(int)
            if real_all is None:
                real_all = last_14_real_y
            else:
                real_all = np.insert(real_all,
                                     real_all.shape[0],
                                     last_14_real_y,
                                     axis=0)
                # print preficts,last_14_real_y
            print str(j) + ',score:', scoreoneshop(preficts, last_14_real_y)

    # preficts = np.array(preficts)
    shopids = shopids.tolist()
    if ignore_predict:
        for remove_id in ignore_shopids:
            try:
                shopids.remove(remove_id)
            except:
                pass

    preficts_all = preficts_all.reshape((len(shopids), 14))
    if trainAsTest:
        real_all = real_all.reshape((len(shopids), 14))
        preficts_all = np.concatenate((preficts_all, real_all), axis=1)

    preficts_all = np.insert(preficts_all, 0, shopids, axis=1)
    if saveFilePath is not None:
        if model_path is None:
            path = saveFilePath + "%dLast_%ds_%dd_%df_%d_%s_%d_%d_%d_%s_%dshops" \
                                  % (last_N_days,sameday_backNum, day_back_num, train_x.shape[1], cate_level, cate_name
                                     ,  epochs, batch_size, h1_unit, h1_activation,len(shopids))
        else:
            import re
            r = re.compile(
                r"""/(\d+)last_(\d+)s_(\d+)d_(\d+)f_(\d+)_(\S+)_(\d+)_(\d+)_(\d+)_(\w+).json"""
            )
            m = r.search(model_path)
            path = saveFilePath + "%dLast_%ds_%dd_%df_%d_%s_%d_%d_%d_%s_%dshops" \
                                  % (int(m.group(1)),int(m.group(2)), int(m.group(3)), int(m.group(4)), int(m.group(5)), m.group(6)
                                     ,  int(m.group(7)), int(m.group(8)), int(m.group(9)), m.group(10),len(shopids))
        if Augmented:
            path += "_augmented"
        if addNoiseInResult:
            path += "_addNoiseInResult"
        path = path + "_%dtime" % time
        if trainAsTest:
            path = path + "_train"
        path += ".csv"

        print "save in :", path
        np.savetxt(path, preficts_all, fmt="%d", delimiter=",")
    return preficts_all