def build(self, task):
        assert task in ['regression', 'binary']
        if self.model_architecture == "DeepFM":
            self.model = DeepFM(
                self.data.linear_feature_columns,
                self.data.dnn_feature_columns,
                task=task,
            )
        else:
            raise NotImplementedError(
                'At the current stage of the development, only a DeepFM is supported'
            )

        task_attr = {
            'regression': {
                'loss': 'mse',
                'metrics': 'mse'
            },
            'binary': {
                'loss': 'binary_crossentropy',
                'metrics': 'accuracy'
            }
        }
        if task == "regression":
            loss = "mse"
            metrics = "mse"
        elif task == "binary":
            loss = "binary_crossentropy"
            metrics = "accuracy"

        self.model.compile(optimizer="adam",
                           loss=task_attr[task]['loss'],
                           metrics=task_attr[task]['metrics'])
Exemple #2
0
def test_DeepFM(use_fm, hidden_size, sparse_feature_num):
    model_name = "DeepFM"
    sample_size = 64
    feature_dim_dict = {"sparse": {}, 'dense': []}
    for name, num in zip(["sparse", "dense"], [sparse_feature_num, sparse_feature_num]):
        if name == "sparse":
            for i in range(num):
                feature_dim_dict[name][name + '_' +
                                       str(i)] = np.random.randint(1, 10)
        else:
            for i in range(num):
                feature_dim_dict[name].append(name + '_' + str(i))

    sparse_input = [np.random.randint(0, dim, sample_size)
                    for dim in feature_dim_dict['sparse'].values()]
    dense_input = [np.random.random(sample_size)
                   for name in feature_dim_dict['dense']]
    y = np.random.randint(0, 2, sample_size)
    x = sparse_input + dense_input

    model = DeepFM(feature_dim_dict,  use_fm=use_fm,
                   hidden_size=hidden_size, keep_prob=0.5, )
    model.compile('adam', 'binary_crossentropy',
                  metrics=['binary_crossentropy'])
    model.fit(x, y, batch_size=100, epochs=1, validation_split=0.5)

    print(model_name+" test train valid pass!")
    model.save_weights(model_name + '_weights.h5')
    model.load_weights(model_name + '_weights.h5')
    print(model_name+" test save load weight pass!")
    save_model(model,  model_name + '.h5')
    model = load_model(model_name + '.h5', custom_objects)
    print(model_name + " test save load model pass!")

    print(model_name + " test pass!")
Exemple #3
0
def model_gridsearch(lfc,dfc,grid):
    model_names = []
    models = []
    if grid == 'embedding_size':
        # embedding_size = [2, 4, 6, 8, 10, 15, 20, 25, 30, 40, 50, 60, 80, 100, 120, 160, 200]
        embedding_size = [ 25, 30, 40, 50]
        xlabel = embedding_size
    elif grid == 'dnn_hidden_units':
        # dnn_hidden_units = [4, 5, 6, 7, 8, 9, 10]
        dnn_hidden_units = [ 7,8, 9,10]
        xlabel = dnn_hidden_units
    elif grid == 'dnn_hidden_units_len':
        # dnn_hidden_units_len = [2, 3, 4, 5, 6, 7, 8]
        dnn_hidden_units_len = [2, 3,4,5]
        xlabel = dnn_hidden_units_len
    elif grid == 'dnn_dropout':
        # dnn_dropout = [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]
        dnn_dropout = [0, 0.1, 0.2,0.3]
        xlabel= dnn_dropout
    else:
        models = [DeepFM(lfc, dfc, task='binary')]
        model_names = ['DeepFM']
        xlabel = []
        return (models, model_names,xlabel)
    for a in xlabel:  # dnn_hidden_units dnn_dropout embedding_size
        if grid == 'embedding_size':
            models.append(DeepFM(lfc, dfc, embedding_size=a, task='binary', seed=1024) )
        elif grid == 'dnn_hidden_units':
            models.append(DeepFM(lfc, dfc, dnn_hidden_units=(2**a, 2**a), task='binary', seed=1024) )
        elif grid == 'dnn_hidden_units_len':
            models.append(DeepFM(lfc, dfc, dnn_hidden_units = (128,) * a, task='binary', seed=1024) )
        elif grid == 'dnn_dropout':
            models.append(DeepFM(lfc, dfc, dnn_dropout= a, task='binary', seed=1024) )
        model_names.append(str(a) + ' '+ grid)

    return (models, model_names,xlabel)
    '''
    
    models = [#xDeepFM(linear_feature_columns, dnn_feature_columns, task='binary'),
              DeepFM(linear_feature_columns, dnn_feature_columns, embedding_size=8,task='binary'),
        #            linear_feature_columns, dnn_feature_columns, embedding_size=8, use_fm=True, dnn_hidden_units=(128, 128),
        #            l2_reg_linear=0.00001, l2_reg_embedding=0.00001, l2_reg_dnn=0, init_std=0.0001, seed=1024, dnn_dropout=0,
        #            dnn_activation='relu', dnn_use_bn=False, task='binary'

              #CCPM(linear_feature_columns, dnn_feature_columns, task='binary'),
              #FNN(linear_feature_columns, dnn_feature_columns, task='binary'),
              #PNN(dnn_feature_columns, task='binary'),
              #WDL(linear_feature_columns, dnn_feature_columns, task='binary'),
              #NFM(linear_feature_columns, dnn_feature_columns, task='binary'),
              #AFM(linear_feature_columns, dnn_feature_columns, task='binary'),
              #DCN(dnn_feature_columns, task='binary'),
              #AutoInt(dnn_feature_columns, task='binary'),
              #NFFM(linear_feature_columns, dnn_feature_columns, task='binary'),
              #FGCNN(dnn_feature_columns, task='binary'),
              #FiBiNET(linear_feature_columns, dnn_feature_columns, task='binary')
              ]
              '''
    '''
def main(dataPath, dataPath_val, batch_size):

    # must have list of training files
    files = glob.glob(dataPath + "/*.csv")[::5]

    # validation files
    files_val = glob.glob(dataPath_val + "/*.csv")[::5]

    # Count number of examples in training data
    nexs = get_total_examples(files)
    print("Number of training examples: ", nexs)

    nexs_val = get_total_examples(files_val)
    print("Number of validation examples: ", nexs_val)

    # Create data generator
    train_gen = DataGenerator(files, nexs, batch_size=batch_size)
    val_gen = DataGenerator(files_val, nexs_val, batch_size=batch_size)

    linear_feature_columns = train_gen.linear_feature_columns
    dnn_feature_columns = train_gen.dnn_feature_columns

    # 4.Define Model,train,predict and evaluate
    model = DeepFM(linear_feature_columns, dnn_feature_columns, task='binary')
    optimizer = keras.optimizers.Adam(lr=0.001,
                                      beta_1=0.9,
                                      beta_2=0.999,
                                      decay=0.0)
    model.compile(
        optimizer,
        "binary_crossentropy",
        metrics=['binary_crossentropy', auroc],
    )

    pbar = ProgbarLogger(count_mode='steps', stateful_metrics=None)

    weights_file = "model-5-lr0p001.h5"
    model_checkpoint = ModelCheckpoint(weights_file,
                                       monitor="val_binary_crossentropy",
                                       save_best_only=True,
                                       save_weights_only=True,
                                       verbose=1)

    history = model.fit_generator(train_gen,
                                  epochs=10,
                                  verbose=1,
                                  steps_per_epoch=nexs / batch_size,
                                  validation_data=val_gen,
                                  validation_steps=nexs / batch_size,
                                  callbacks=[model_checkpoint])
Exemple #5
0
    def fit(self, X, y):
        X_ = X.copy()
        self.dense_features = list(X_.columns.difference(self.cat_features))

        logger.debug("MinMaxScaler")
        self.min_max_scaler.fit(X_[self.dense_features])
        X_[self.dense_features] = self.min_max_scaler.transform(
            X_[self.dense_features])

        self._column_mapping(X_)
        X_.columns = [self.columns_mapping[col] for col in X_.columns]

        self.fixlen_feature_columns = [
            SparseFeat(
                self.columns_mapping[feat],
                vocabulary_size=X_[self.columns_mapping[feat]].max() + 1,
                embedding_dim=4,
            ) for i, feat in enumerate(self.cat_features)
        ] + [
            DenseFeat(
                self.columns_mapping[feat],
                1,
            ) for feat in self.dense_features
        ]
        self.feature_names = get_feature_names(self.fixlen_feature_columns)

        logger.debug("Compile DeepFM model")
        self.model = DeepFM(self.fixlen_feature_columns,
                            self.fixlen_feature_columns,
                            task="binary")
        self.model.compile(
            "adam",
            "binary_crossentropy",
            metrics=["binary_crossentropy"],
        )

        logger.debug("Fit DeepFM")
        train_model_input = {
            name: X_[name].values
            for name in self.feature_names
        }
        self.model.fit(
            train_model_input,
            y,
            batch_size=256,
            epochs=3,
            verbose=2,
            validation_split=0.2,
        )
Exemple #6
0
def model_generate(train_X, train_y, val_X, val_y, linear_feature_columns,
                   dnn_feature_columns):
    model = DeepFM(linear_feature_columns,
                   dnn_feature_columns,
                   embedding_size=32)
    model.compile("adam",
                  "binary_crossentropy",
                  metrics=[roc_auc_score_pyfunc, log_loss_pyfunc])
    history = model.fit(train_X,
                        train_y,
                        validation_data=(val_X, val_y),
                        batch_size=4096,
                        epochs=5,
                        callbacks=[EarlyStopping()])
    return model, history
Exemple #7
0
def test_DeepFM(use_fm, hidden_size, sparse_feature_num):
    model_name = "DeepFM"
    sample_size = 64
    feature_dim_dict = {"sparse": {}, 'dense': []}
    for name, num in zip(["sparse", "dense"],
                         [sparse_feature_num, sparse_feature_num]):
        if name == "sparse":
            for i in range(num):
                feature_dim_dict[name][name + '_' +
                                       str(i)] = np.random.randint(1, 10)
        else:
            for i in range(num):
                feature_dim_dict[name].append(name + '_' + str(i))

    sparse_input = [
        np.random.randint(0, dim, sample_size)
        for dim in feature_dim_dict['sparse'].values()
    ]
    dense_input = [
        np.random.random(sample_size) for name in feature_dim_dict['dense']
    ]
    y = np.random.randint(0, 2, sample_size)
    x = sparse_input + dense_input

    model = DeepFM(
        feature_dim_dict,
        use_fm=use_fm,
        hidden_size=hidden_size,
        keep_prob=0.5,
    )
    check_model(model, model_name, x, y)
Exemple #8
0
    def update_params(self, recompile=True, **kwargs):
        '''
        Update parameters for the recommender and re-compile the DeepFM model unless recompile is set to False.

        Example
        -------
        deepnn.update_params(epochs=20, deepfm__l2_reg_linear=2e-4)
        '''
        for (k, v) in kwargs.items():
            if(k in self.params):
                self.params[k] = v
            else:
                raise ValueError('{0} is not a valid parameter for RecommenderDeepNN.'.format(k))
        self._set_params_deepfm()
        if(recompile == True and self.model is not None):
            self.model = DeepFM(self.features_linear, self.features_dnn,
                                task='regression', **self.params_deepfm)
Exemple #9
0
def run_deepfm_model():
    train, test, train_model_input, test_model_input, dnn_feature_columns, linear_feature_columns, feature_names, target = read_data_as_model(
    )

    #Define Model,train,predict and evaluate
    model = DeepFM(linear_feature_columns, dnn_feature_columns, task='binary')
    model.compile(
        "adam",
        "binary_crossentropy",
        metrics=['binary_crossentropy'],
    )

    model.fit(
        train_model_input,
        train[target].values,
        batch_size=256,
        epochs=10,
        verbose=2,
        validation_split=0.2,
    )
    pred_ans = model.predict(test_model_input, batch_size=256)
    print("test LogLoss", round(log_loss(test[target].values, pred_ans), 4))
    print("test AUC", round(roc_auc_score(test[target].values, pred_ans), 4))
    return pred_ans, test[target].values, round(
        roc_auc_score(test[target].values, pred_ans), 4), 'deepfm'
Exemple #10
0
def test_DeepFM(hidden_size, sparse_feature_num):
    model_name = "DeepFM"
    sample_size = SAMPLE_SIZE
    x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num,
                                          dense_feature_num=sparse_feature_num)

    model = DeepFM(feature_columns, feature_columns, dnn_hidden_units=hidden_size, dnn_dropout=0.5)

    check_model(model, model_name, x, y)
Exemple #11
0
 def load_model(self):
     with open("data/DeepFM_data.pkl", "rb") as f_in:
         (
             self.columns_mapping,
             self.min_max_scaler,
             self.dense_features,
             self.fixlen_feature_columns,
             self.feature_names,
         ) = pickle.load(f_in)
     self.model = DeepFM(self.fixlen_feature_columns,
                         self.fixlen_feature_columns,
                         task="binary")
     self.model.compile(
         "adam",
         "binary_crossentropy",
         metrics=["binary_crossentropy"],
     )
     self.model.load_weights("data/DeepFM_w.h5")
Exemple #12
0
    def _build_model(self):
        to_drop = config.Keywords_Categories[self.params['category']]
        self._build_category_dict(drop_categories=to_drop)
        attrs_matrix, attrs_max_len = self._get_category_matrix(self.data)
        
        vars_fixlen = [SparseFeat(var, self.data[var].nunique(),
                                  embedding_dim=4)
                       for var in self.features_sparse]
        vars_fixlen += [DenseFeat(var, 1,) for var in self.features_dense]
        vars_varlen = [VarLenSparseFeat(SparseFeat('categories',
                        vocabulary_size=len(self.attr2index) + 1,
                        embedding_dim=4),
                        maxlen=attrs_max_len, combiner='mean',
                        weight_name='attrs_weight' if self.params['weight'] else None)]

        self.features_linear = vars_fixlen + vars_varlen
        self.features_dnn = vars_fixlen + vars_varlen

        self.model = DeepFM(self.features_linear, self.features_dnn,
                            task='regression', **self.params_deepfm)
        return attrs_matrix, attrs_max_len
Exemple #13
0
def test_long_dense_vector():
    #构造特征
    feature_columns = [
        SparseFeat(
            'user_id',
            4,
        ),
        SparseFeat(
            'item_id',
            5,
        ),
        DenseFeat("pic_vec", 5)
    ]
    fixlen_feature_names = get_feature_names(feature_columns)

    #构造样本
    user_id = np.array([[1], [0], [1]])
    item_id = np.array([[3], [2], [1]])
    pic_vec = np.array([[0.1, 0.5, 0.4, 0.3, 0.2], [0.1, 0.5, 0.4, 0.3, 0.2],
                        [0.1, 0.5, 0.4, 0.3, 0.2]])
    label = np.array([1, 0, 1])
    input_dict = {'user_id': user_id, 'item_id': item_id, 'pic_vec': pic_vec}
    model_input = [input_dict[name] for name in fixlen_feature_names]

    #创建模型model
    model = DeepFM(feature_columns, feature_columns[:-1])

    # model.summary()
    #tf.keras.utils.plot_model(model, "test_compu")

    #训练模型
    model.compile('adagrad', 'binary_crossentropy')
    model.fit(model_input, label)
Exemple #14
0
def test_DeepFM(use_fm, hidden_size):
    name = "DeepFM"
    sample_size = 64
    feature_dim_dict = {
        'sparse': {
            'sparse_1': 2,
            'sparse_2': 5,
            'sparse_3': 10
        },
        'dense': ['dense_1', 'dense_2', 'dense_3']
    }
    sparse_input = [
        np.random.randint(0, dim, sample_size)
        for dim in feature_dim_dict['sparse'].values()
    ]
    dense_input = [
        np.random.random(sample_size) for name in feature_dim_dict['dense']
    ]
    y = np.random.randint(0, 2, sample_size)
    x = sparse_input + dense_input

    model = DeepFM(
        feature_dim_dict,
        use_fm=use_fm,
        hidden_size=hidden_size,
        keep_prob=0.5,
    )
    model.compile('adam',
                  'binary_crossentropy',
                  metrics=['binary_crossentropy'])
    model.fit(x, y, batch_size=100, epochs=1, validation_split=0.5)
    print(name + " test train valid pass!")
    model.save_weights(name + '_weights.h5')
    model.load_weights(name + '_weights.h5')
    print(name + " test save load weight pass!")
    save_model(model, name + '.h5')
    model = load_model(name + '.h5', custom_objects)
    print(name + " test save load model pass!")

    print(name + " test pass!")
Exemple #15
0
def test_long_dense_vector():
    feature_columns = [
        SparseFeat(
            'user_id',
            4,
        ),
        SparseFeat(
            'item_id',
            5,
        ),
        DenseFeat("pic_vec", 5)
    ]
    fixlen_feature_names = get_feature_names(feature_columns)

    user_id = np.array([[1], [0], [1]])
    item_id = np.array([[3], [2], [1]])
    pic_vec = np.array([[0.1, 0.5, 0.4, 0.3, 0.2], [0.1, 0.5, 0.4, 0.3, 0.2],
                        [0.1, 0.5, 0.4, 0.3, 0.2]])
    label = np.array([1, 0, 1])

    input_dict = {'user_id': user_id, 'item_id': item_id, 'pic_vec': pic_vec}
    model_input = [input_dict[name] for name in fixlen_feature_names]

    model = DeepFM(feature_columns, feature_columns[:-1])
    model.compile('adagrad', 'binary_crossentropy')
    model.fit(model_input, label)
def train_deepFM():
    k = featureengineer.k
    #缺失值填充+编码处理
    data,appsnum, tags_nums = trainmodel.data,trainmodel.appsnum,trainmodel.tags_nums
    data[trainmodel.sparse_features] = data[trainmodel.sparse_features].fillna('-1', )
    for feat in trainmodel.dense_features:
        data[feat].fillna(data[feat].dropna().mean(), inplace=True)

    for feat in trainmodel.sparse_features:
        data[feat] = data[feat].apply(lambda x:str(x))
        lbe = LabelEncoder()
        data[feat] = lbe.fit_transform(data[feat])
    mms = MinMaxScaler(feature_range=(0, 1))
    data[trainmodel.dense_features] = mms.fit_transform(data[trainmodel.dense_features])


    #数据格式转换
    fixlen_feature_columns = [SparseFeat(feat, vocabulary_size=data[feat].max()+1, embedding_dim=8)
                              for i, feat in enumerate(trainmodel.sparse_features)] + \
                             [DenseFeat(feat, 1, ) for feat in trainmodel.dense_features]

    lgbOut_feature_columns = [SparseFeat(feat, vocabulary_size=data[feat].max()+1, embedding_dim=1)
                              for i, feat in enumerate(trainmodel.lgbOut_Features)]

    key2index_len = {'applist': appsnum+1, 'new_tag': tags_nums}
    varlen_features = [VarLenSparseFeat('%s' % i, vocabulary_size=key2index_len[i], maxlen=k, embedding_dim=8, combiner='mean',weight_name=None) for i
                       in trainmodel.var_features]

    dnn_feature_columns = fixlen_feature_columns + varlen_features
    linear_feature_columns = fixlen_feature_columns + varlen_features + lgbOut_feature_columns

    feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns)

    sparse_dense_features = trainmodel.sparse_features + trainmodel.dense_features + trainmodel.lgbOut_Features

    train, test = train_test_split(data, test_size=0.2)


    train_model_input = {name: train[name] for name in sparse_dense_features}
    test_model_input = {name: test[name] for name in sparse_dense_features}
    for x in trainmodel.var_features:
        if x == 'applist':
            train_model_input[x] = np.array(train[x].tolist())
            test_model_input[x] = np.array(test[x].tolist())
        if x == 'new_tag':
            train_model_input[x] = np.array(train[x].tolist())-appsnum
            test_model_input[x] = np.array(test[x].tolist())-appsnum
    # 模型
    model = DeepFM(linear_feature_columns=linear_feature_columns, dnn_feature_columns=dnn_feature_columns,
                   dnn_hidden_units=(50, 30, 30), l2_reg_linear=0.001, l2_reg_embedding=0.001,
                   l2_reg_dnn=0, init_std=0.0001, seed=1024, dnn_dropout=0.1, dnn_activation='relu', dnn_use_bn=True,
                   task='binary')
    model.compile("adam", "binary_crossentropy",metrics=['AUC'], )

    history = model.fit(train_model_input, train['target'].values,
                        batch_size=256, epochs=1, verbose=2, validation_split=0.2, )

    pred_ans = model.predict(test_model_input, batch_size=256)
    print("test AUC", round(roc_auc_score(test['target'].values, pred_ans), 4))
Exemple #17
0
def test_DeepFM(use_fm, hidden_size, sparse_feature_num):
    model_name = "DeepFM"
    sample_size = SAMPLE_SIZE
    x, y, feature_dim_dict = get_test_data(sample_size, sparse_feature_num,
                                           sparse_feature_num)

    model = DeepFM(
        feature_dim_dict,
        use_fm=use_fm,
        hidden_size=hidden_size,
        keep_prob=0.5,
    )
    check_model(model, model_name, x, y)
Exemple #18
0
def train_model(train, test, linear_feature, dnn_feature):

    model = DeepFM(linear_feature, dnn_feature, task='binary')
    model.compile(
        "adam",
        "binary_crossentropy",
        metrics=['AUC'],
    )
    history = model.fit(
        *train,
        batch_size=512,
        epochs=5,
        verbose=2,
        validation_split=0.1,
    )
    pred_ans = model.predict(test[0], batch_size=512)
    print("test LogLoss", round(log_loss(test[1], pred_ans), 4))
    print("test AUC", round(roc_auc_score(test[1], pred_ans), 4))
Exemple #19
0
def deepfm_model(linear_feature_columns, dnn_feature_columns,
                 train_model_input, train, test_model_input, test):
    cols = ['model', 'RMSE', 'MAE', 'MSE', 'AUC', 'score']
    df_result = pd.DataFrame(columns=cols, index=range(1))
    model = DeepFM(linear_feature_columns,
                   dnn_feature_columns,
                   dnn_hidden_units=config.deepfm_att["dnn_hidden_units"],
                   init_std=config.deepfm_att["init_std"],
                   seed=config.deepfm_att["seed"],
                   dnn_dropout=config.deepfm_att["dnn_dropout"],
                   dnn_activation=config.deepfm_att["dnn_activation"],
                   task=config.deepfm_att["task"],
                   fm_group=config.deepfm_att["fm_group"],
                   dnn_use_bn=config.deepfm_att["dnn_use_bn"])

    model.compile("adam", "mse", metrics=['mse'])

    history = model.fit(train_model_input,
                        train[target].values,
                        batch_size=256,
                        epochs=config.model_epoch['epoch'],
                        verbose=2,
                        validation_split=0.2)

    pred_ans = model.predict(test_model_input, batch_size=256)
    save_model(model, 'saved_deepfm.h5')  # save_model
    auc = roc_auc_score(test[target].values, pred_ans)

    df_result.loc[0].model = "DeepFM"
    df_result.loc[0].RMSE = np.round(
        math.sqrt(mean_squared_error(test[target].values, pred_ans)), 3)
    df_result.loc[0].MAE = np.round(
        mean_absolute_error(test[target].values, pred_ans), 3)
    df_result.loc[0].MSE = np.round(
        mean_squared_error(test[target].values, pred_ans), 3)
    df_result.loc[0].AUC = np.round(auc, 3)
    #df_result.loc[0].score=(1/df_result.iloc[0]['RMSE'])*(1/df_result.iloc[0]['MAE'])*(2*df_result.iloc[0]['AUC'])
    return df_result
            del varlen_list['%s_%s' % (i, j)]

# 4.Define Model, train, predict and evaluate
checkpoint_path = path_model + "cp.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                 save_weights_only=True,
                                                 verbose=1)
# 编译有错,临时去掉embedding_size=8,use_fm=True,编译不过
model = DeepFM(linear_feature_columns,
               dnn_feature_columns,
               fm_group=fixlen_feature_columns,
               dnn_hidden_units=(256, 256, 256),
               l2_reg_linear=0.001,
               l2_reg_embedding=0.001,
               l2_reg_dnn=0,
               init_std=0.0001,
               seed=1024,
               dnn_dropout=0.5,
               dnn_activation='relu',
               dnn_use_bn=True,
               task='binary')
try:
    model.load_weights(checkpoint_path)
    print('load weights')
except:
    pass
model.compile(optimizer="adam",
              loss="binary_crossentropy",
              metrics=['accuracy', 'AUC'])
history = model.fit(train_model_input,
sparse_feature_dim = {feat: data[feat].nunique() for feat in sparse_features}
sequence_feature = [VarLenFeature('genres', len(key2index), max_len, 'mean')]

# 3.generate input data for model
sparse_input = [data[feat].values for feat in sparse_feature_dim]
dense_input = []
sequence_input = [genres_list]
sequence_length_input = [genres_length]
model_input = sparse_input + dense_input + sequence_input + \
    sequence_length_input  # make sure the order is right

# 4.Define Model,compile and train
model = DeepFM(
    {
        "sparse": sparse_feature_dim,
        "dense": [],
        "sequence": sequence_feature
    },
    final_activation='linear')

model.compile(
    "adam",
    "mse",
    metrics=['mse'],
)
history = model.fit(
    model_input,
    data[target].values,
    batch_size=256,
    epochs=10,
    verbose=2,
    # 1.Label Encoding for sparse features,and do simple Transformation for dense features
    for feat in sparse_features:
        lbe = LabelEncoder()
        data[feat] = lbe.fit_transform(data[feat])
    # 2.count #unique features for each sparse field
    sparse_feat_list = [
        SingleFeat(feat, data[feat].nunique()) for feat in sparse_features
    ]

    # 3.generate input data for model
    train, test = train_test_split(data, test_size=0.2)
    train_model_input = [train[feat.name].values for feat in sparse_feat_list]
    test_model_input = [test[feat.name].values for feat in sparse_feat_list]
    # 4.Define Model,train,predict and evaluate
    model = DeepFM({"sparse": sparse_feat_list}, task='regression')
    model.compile(
        "adam",
        "mse",
        metrics=['mse'],
    )

    history = model.fit(
        train_model_input,
        train[target].values,
        batch_size=256,
        epochs=10,
        verbose=2,
        validation_split=0.2,
    )
    pred_ans = model.predict(test_model_input, batch_size=256)
Exemple #23
0
                       'TitleID', 'DescriptionID', 'Gender', 'Age']
    target = ['CTR']

    # 1.Label Encoding for sparse features,and do simple Transformation for dense features
    for feat in sparse_features:
        lbe = LabelEncoder()
        data[feat] = lbe.fit_transform(data[feat])
    # 2.count #unique features for each sparse field
    sparse_feature_dim = {feat: data[feat].nunique()
                          for feat in sparse_features}
    # 3.generate input data for model
    model_input = [data[feat].values for feat in sparse_feature_dim]

    if mode == 'train':
        # 4.Define Model,compile and train
        model = DeepFM({"sparse": sparse_feature_dim, "dense": []},
                       final_activation='sigmoid')

        model.compile("adam", "binary_crossentropy", metrics=['binary_crossentropy'])

        filepath = 'model_save/deep_fm_sample-ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5'
        checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min')

        history = model.fit(model_input, data[target].values, callbacks=[checkpoint],
                            batch_size=batch_size, epochs=50, verbose=1, validation_split=0.2,)

    elif mode == 'test':
        model = DeepFM({"sparse": sparse_feature_dim, "dense": []},
                       final_activation='sigmoid')
        model.load_weights('model_save/deep_fm_sample-ep001-loss0.184-val_loss0.172.h5')

        # model = load_model('model_save/deep_fm_sample-ep001-loss0.192-val_loss0.176.h5')
Exemple #24
0
    # 1.Use hashing encoding on the fly for sparse features,and process sequence features

    genres_list = list(map(lambda x: x.split('|'), data['genres'].values))
    genres_length = np.array(list(map(len, genres_list)))
    max_len = max(genres_length)

    # Notice : padding=`post`
    genres_list = pad_sequences(genres_list, maxlen=max_len, padding='post', dtype=object, value=0).astype(str)
    # 2.set hashing space for each sparse field and generate feature config for sequence feature

    fixlen_feature_columns = [SparseFeat(feat, data[feat].nunique() * 5, embedding_dim=4, use_hash=True, dtype='string')
                              for feat in sparse_features]
    varlen_feature_columns = [
        VarLenSparseFeat(SparseFeat('genres', vocabulary_size=100, embedding_dim=4, use_hash=True, dtype="string"),
                         maxlen=max_len, combiner='mean',
                         )]  # Notice : value 0 is for padding for sequence input feature
    linear_feature_columns = fixlen_feature_columns + varlen_feature_columns
    dnn_feature_columns = fixlen_feature_columns + varlen_feature_columns
    feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns)

    # 3.generate input data for model
    model_input = {name: data[name] for name in feature_names}
    model_input['genres'] = genres_list

    # 4.Define Model,compile and train
    model = DeepFM(linear_feature_columns, dnn_feature_columns, task='regression')

    model.compile("adam", "mse", metrics=['mse'], )
    history = model.fit(model_input, data[target].values,
                        batch_size=256, epochs=10, verbose=2, validation_split=0.2, )
    # 2.count #unique features for each sparse field
    sparse_feature_dim = {}
    with open('./data/features_infos_combined.txt') as fr:
        # with open('./data/sample/features_infos.txt') as fr:
        for line in fr:
            records = line.strip().split(':')
            if records[0] in exclude:
                continue
            sparse_feature_dim[records[0]] = int(records[1])
        fr.close()

    # 4.Define Model,compile and train
    model = DeepFM({
        "sparse": sparse_feature_dim,
        "dense": []
    },
                   embedding_size=embedding_size,
                   hidden_size=hidden_size,
                   final_activation='sigmoid')

    if mode == 'train':

        model.compile("adam",
                      "binary_crossentropy",
                      metrics=['binary_crossentropy'])

        filepath = 'model_save/deep_fm_fn-ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}-bs' + str(batch_size)\
                   + '-ee' + str(embedding_size) + '-hz' + str(hidden_size) + '.h5'
        checkpoint = ModelCheckpoint(filepath,
                                     monitor='val_loss',
                                     verbose=1,
Exemple #26
0
        SingleFeat(feat, data[feat].nunique()) for feat in sparse_features
    ]
    dense_feature_list = [SingleFeat(feat, 0) for feat in dense_features]

    # 3.generate input data for model

    train, test = train_test_split(data, test_size=0.2)
    train_model_input = [train[feat.name].values for feat in sparse_feature_list] + \
        [train[feat.name].values for feat in dense_feature_list]
    test_model_input = [test[feat.name].values for feat in sparse_feature_list] + \
        [test[feat.name].values for feat in dense_feature_list]

    # 4.Define Model,train,predict and evaluate
    model = DeepFM({
        "sparse": sparse_feature_list,
        "dense": dense_feature_list
    },
                   final_activation='sigmoid')
    model.compile(
        "adam",
        "binary_crossentropy",
        metrics=['binary_crossentropy'],
    )

    history = model.fit(
        train_model_input,
        train[target].values,
        batch_size=256,
        epochs=10,
        verbose=2,
        validation_split=0.2,
Exemple #27
0
N_FOLDS = 5
CV_SEED = 0

# In[ ]:

# train_and_val_model_input = {name: x_train_val[name] for name in feature_names}
with tf.device("/cpu:0"):
    # model = DeepFM(linear_feature_columns=linear_feature_columns,
    #                dnn_feature_columns=dnn_feature_columns,
    #                dnn_dropout=0.1,
    #                dnn_hidden_units=(512, 128),
    #                task='binary')
    model = DeepFM(
        linear_feature_columns,
        dnn_feature_columns,
        task='binary',
        dnn_dropout=0.1,
        dnn_hidden_units=(512, 128),
    )
    model = multi_gpu_model(model, NUM_WORKERS)
model.compile(
    optimizer=tf.keras.optimizers.Adam(3e-4),
    # loss="binary_crossentropy",
    loss=multi_category_focal_loss2(alpha=0.1),
    metrics=[auroc],
)

dirpath = Path('checkpoint')
if dirpath.exists() and dirpath.is_dir():
    shutil.rmtree(dirpath)
os.mkdir('checkpoint')
Exemple #28
0
    data[feature] = lbe.fit_transform(data[feature])
# 计算每个特征中的 不同特征值的个数
fixlen_feature_columns = [
    SparseFeat(feature, data[feature].nunique()) for feature in sparse_features
]
linear_feature_columns = fixlen_feature_columns
dnn_feature_columns = fixlen_feature_columns
feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns)

# 将数据集切分成训练集和测试集
train, test = train_test_split(data, test_size=0.2)
train_model_input = {name: train[name].values for name in feature_names}
test_model_input = {name: test[name].values for name in feature_names}

# 使用DeepFM进行训练
model = DeepFM(linear_feature_columns, dnn_feature_columns, task='regression')
model.compile(
    "adam",
    "mse",
    metrics=['mse'],
)
history = model.fit(
    train_model_input,
    train[target].values,
    batch_size=256,
    epochs=1,
    verbose=True,
    validation_split=0.2,
)
# 使用DeepFM进行预测
pred_ans = model.predict(test_model_input, batch_size=256)
                                dtype=tf.int64, embedding_dim = embedding_size) for feat in sparse_f]
varlen_feature_columns = [VarLenSparseFeat(SparseFeat(vfeat,  
                            vocabulary_size = varlen_vcab_dic[vfeat] + 1,
                            dtype=tf.int64, embedding_dim = embedding_size), maxlen = varlen_maxlen_f[vfeat]) for vfeat in varlen_f]


# %%
linear_feature_columns, dnn_feature_columns = \
    sparse_feature_columns + varlen_feature_columns, sparse_feature_columns + varlen_feature_columns


# %%
model = DeepFM(linear_feature_columns, dnn_feature_columns,
                dnn_hidden_units=NNconfig_dic["dnn_hidden_units"], 
                l2_reg_dnn=NNconfig_dic["l2_reg_dnn"],
                l2_reg_embedding=NNconfig_dic["l2_reg_embedding"],
                l2_reg_linear=NNconfig_dic["l2_reg_linear"],
                dnn_dropout=NNconfig_dic["dnn_dropout"],
                dnn_use_bn=NNconfig_dic["dnn_use_bn"],
                dnn_activation=NNconfig_dic["dnn_activation"])
NNconfig_dic["model_name"] = "DeepFM"


# %%
opt = tf.keras.optimizers.Adam(learning_rate=NNconfig_dic["lr"])
NNconfig_dic["optimizer"] = "Adam"


# %%
model.compile(optimizer=opt, loss=tf.losses.BinaryCrossentropy(),
                metrics=[tf.keras.metrics.AUC()])
Exemple #30
0
    dnn_feature_columns = fixlen_feature_columns
    linear_feature_columns = fixlen_feature_columns

    feature_names = get_feature_names(linear_feature_columns +
                                      dnn_feature_columns)

    # 3.generate input data for model

    train, test = train_test_split(data, test_size=0.2)

    train_model_input = {name: train[name] for name in feature_names}
    test_model_input = {name: test[name] for name in feature_names}

    # 4.Define Model,train,predict and evaluate
    model = DeepFM(linear_feature_columns, dnn_feature_columns, task='binary')
    model = multi_gpu_model(model, gpus=2)

    model.compile(
        "adam",
        "binary_crossentropy",
        metrics=['binary_crossentropy'],
    )

    history = model.fit(
        train_model_input,
        train[target].values,
        batch_size=256,
        epochs=10,
        verbose=2,
        validation_split=0.2,
from deepctr.models import DeepFM

if __name__ == "__main__":

    data = pd.read_csv("./movielens_sample.txt")
    sparse_features = ["movie_id", "user_id",
                       "gender", "age", "occupation", "zip"]
    target = ['rating']

    # 1.Label Encoding for sparse features,and do simple Transformation for dense features
    for feat in sparse_features:
        lbe = LabelEncoder()
        data[feat] = lbe.fit_transform(data[feat])
    # 2.count #unique features for each sparse field
    sparse_feature_dim = {feat: data[feat].nunique()
                          for feat in sparse_features}
    # 3.generate input data for model
    train, test = train_test_split(data, test_size=0.2)
    train_model_input = [train[feat].values for feat in sparse_feature_dim]
    test_model_input = [test[feat].values for feat in sparse_feature_dim]
    # 4.Define Model,train,predict and evaluate
    model = DeepFM({"sparse": sparse_feature_dim, "dense": []},
                   final_activation='linear')
    model.compile("adam", "mse", metrics=['mse'],)

    history = model.fit(train_model_input, train[target].values,
                        batch_size=256, epochs=1, verbose=2, validation_split=0.2,)
    pred_ans = model.predict(test_model_input, batch_size=256)
    print("test MSE", round(mean_squared_error(
        test[target].values, pred_ans), 4))