def run_base_model_dcn(dfTrain, dfTest, folds, dcn_params): fd = FeatureDictionary(dfTrain,dfTest,numeric_cols=config.NUMERIC_COLS, ignore_cols=config.IGNORE_COLS, cate_cols = config.CATEGORICAL_COLS) print(fd.feat_dim) print(fd.feat_dict) data_parser = DataParser(feat_dict=fd) cate_Xi_train, cate_Xv_train, numeric_Xv_train,y_train = data_parser.parse(df=dfTrain, has_label=True) cate_Xi_test, cate_Xv_test, numeric_Xv_test,ids_test = data_parser.parse(df=dfTest) dcn_params["cate_feature_size"] = fd.feat_dim dcn_params["field_size"] = len(cate_Xi_train[0]) dcn_params['numeric_feature_size'] = len(config.NUMERIC_COLS) _get = lambda x, l: [x[i] for i in l] for i, (train_idx, valid_idx) in enumerate(folds): print("i",i) cate_Xi_train_, cate_Xv_train_, numeric_Xv_train_,y_train_ = _get(cate_Xi_train, train_idx), _get(cate_Xv_train, train_idx),_get(numeric_Xv_train, train_idx), _get(y_train, train_idx) cate_Xi_valid_, cate_Xv_valid_, numeric_Xv_valid_,y_valid_ = _get(cate_Xi_train, valid_idx), _get(cate_Xv_train, valid_idx),_get(numeric_Xv_train, valid_idx), _get(y_train, valid_idx) dcn = DCN(**dcn_params) s=dcn.fit(cate_Xi_train_, cate_Xv_train_, numeric_Xv_train_,y_train_, cate_Xi_valid_, cate_Xv_valid_, numeric_Xv_valid_,y_valid_,i) dcn.saver.save(s, 'D:/code/tensorflow_practice/recommendation/Basic-DCN-Demo/model/model', global_step=i + 1)
def run_base_model_dcn(dfTrain, dfTest, folds, dcn_params): fd = FeatureDictionary(dfTrain, dfTest, numeric_cols=config.NUMERIC_COLS, ignore_cols=config.IGNORE_COLS, cate_cols=config.CATEGORICAL_COLS) print(fd.feat_dim) print(fd.feat_dict) data_parser = DataParser(feat_dict=fd) cate_Xi_train, cate_Xv_train, numeric_Xv_train, y_train = data_parser.parse( df=dfTrain, has_label=True) cate_Xi_test, cate_Xv_test, numeric_Xv_test, ids_test = data_parser.parse( df=dfTest) dcn_params["cate_feature_size"] = fd.feat_dim dcn_params["field_size"] = len(cate_Xi_train[0]) dcn_params['numeric_feature_size'] = len(config.NUMERIC_COLS) _get = lambda x, l: [x[i] for i in l] for i, (train_idx, valid_idx) in enumerate(folds): cate_Xi_train_, cate_Xv_train_, numeric_Xv_train_, y_train_ = _get( cate_Xi_train, train_idx), _get(cate_Xv_train, train_idx), _get( numeric_Xv_train, train_idx), _get(y_train, train_idx) cate_Xi_valid_, cate_Xv_valid_, numeric_Xv_valid_, y_valid_ = _get( cate_Xi_train, valid_idx), _get(cate_Xv_train, valid_idx), _get( numeric_Xv_train, valid_idx), _get(y_train, valid_idx) dcn = DCN(**dcn_params) dcn.fit(cate_Xi_train_, cate_Xv_train_, numeric_Xv_train_, y_train_, cate_Xi_valid_, cate_Xv_valid_, numeric_Xv_valid_, y_valid_)
def run_base_model_nfm(dfTrain,dfTest,folds,pnn_params): fd = FeatureDictionary(dfTrain=dfTrain, dfTest=dfTest, numeric_cols=config.NUMERIC_COLS, ignore_cols = config.IGNORE_COLS) data_parser = DataParser(feat_dict= fd) # Xi_train :列的序号 # Xv_train :列的对应的值 Xi_train,Xv_train,y_train = data_parser.parse(df=dfTrain,has_label=True) Xi_test,Xv_test,ids_test = data_parser.parse(df=dfTest) print(dfTrain.dtypes) pnn_params['feature_size'] = fd.feat_dim pnn_params['field_size'] = len(Xi_train[0]) _get = lambda x,l:[x[i] for i in l] for i, (train_idx, valid_idx) in enumerate(folds): Xi_train_, Xv_train_, y_train_ = _get(Xi_train, train_idx), _get(Xv_train, train_idx), _get(y_train, train_idx) Xi_valid_, Xv_valid_, y_valid_ = _get(Xi_train, valid_idx), _get(Xv_train, valid_idx), _get(y_train, valid_idx) afm = my_AFM(**pnn_params) afm.fit(Xi_train_, Xv_train_, y_train_, Xi_valid_, Xv_valid_, y_valid_)
def run_base_model_dcn(dfTrain, dfTest, folds, dcn_params): # 类别型特征与索引的映射 fd = FeatureDictionary(dfTrain, dfTest, numeric_cols=config.NUMERIC_COLS, ignore_cols=config.IGNORE_COLS, cate_cols=config.CATEGORICAL_COLS) print(fd.feat_dim) print(fd.feat_dict) # 返回类别型特征索引,类别型特征值,数值型特征,标签值 data_parser = DataParser(feat_dict=fd) cate_Xi_train, cate_Xv_train, numeric_Xv_train, y_train = data_parser.parse( df=dfTrain, has_label=True) cate_Xi_test, cate_Xv_test, numeric_Xv_test, _ = data_parser.parse( df=dfTest) # 离散型特征onthot后类别型特征个数 dcn_params["n_cate_feature"] = fd.feat_dim # 离散型特征个数 dcn_params["n_field"] = len(cate_Xi_train[0]) print('values', str(fd.feat_dim), 'values', str(len(cate_Xi_train[0]))) _get = lambda x, l: [x[i] for i in l] for i, (train_idx, valid_idx) in enumerate(folds): # 训练集 cate_Xi_train_, cate_Xv_train_, numeric_Xv_train_, y_train_ = _get( cate_Xi_train, train_idx), _get(cate_Xv_train, train_idx), _get( numeric_Xv_train, train_idx), _get(y_train, train_idx) # 验证集 cate_Xi_valid_, cate_Xv_valid_, numeric_Xv_valid_, y_valid_ = _get( cate_Xi_train, valid_idx), _get(cate_Xv_train, valid_idx), _get( numeric_Xv_train, valid_idx), _get(y_train, valid_idx) dcn = DCN(**dcn_params) dcn.fit(cate_Xi_train_, cate_Xv_train_, numeric_Xv_train_, y_train_, cate_Xi_valid_, cate_Xv_valid_, numeric_Xv_valid_, y_valid_)
cols = [c for c in cols if (not c in config.IGNORE_COLS)] X_train = dfTrain[cols].values y_train = dfTrain["target"].values X_test = dfTest[cols].values ids_test = dfTest["id"].values return dfTrain, dfTest, X_train, y_train, X_test, ids_test, dfTrain, dfTest, X_train, y_train, X_test, ids_test = load_data() print('load_data_over') fd = FeatureDictionary(dfTrain, dfTest, numeric_cols=config.NUMERIC_COLS, ignore_cols=config.IGNORE_COLS, cate_cols=config.CATEGORICAL_COLS) print(fd.feat_dim) print(fd.feat_dict) data_parser = DataParser(feat_dict=fd) cate_Xi_train, cate_Xv_train, numeric_Xv_train, y_train = data_parser.parse( df=dfTrain, has_label=True) # cate_Xi_test, cate_Xv_test, numeric_Xv_test, y_test, ids_test = data_parser.parse(df=dfTest) def process(cate_ids, cate_vals, y_label): # feat_ins # print('----',len(cate_ids))
for i, (train_idx, valid_idx) in enumerate(folds): Xi_train_, Xv_train_, y_train_ = _get(Xi_train, train_idx), _get(Xv_train, train_idx), _get(y_train, train_idx) Xi_valid_, Xv_valid_, y_valid_ = _get(Xi_train, valid_idx), _get(Xv_train, valid_idx), _get(y_train, valid_idx) afm = my_AFM(**pnn_params) afm.fit(Xi_train_, Xv_train_, y_train_, Xi_valid_, Xv_valid_, y_valid_) # load data dfTrain, dfTest, X_train, y_train, X_test, ids_test, cat_features_indices = load_data() fd = FeatureDictionary(dfTrain=dfTrain, dfTest=dfTest, numeric_cols=config.NUMERIC_COLS, ignore_cols = config.IGNORE_COLS) data_parser = DataParser(feat_dict= fd) # Xi_train :列的序号 # Xv_train :列的对应的值 Xi_train,Xv_train,y_train = data_parser.parse(df=dfTrain,has_label=True) Xi_test,Xv_test,ids_test = data_parser.parse(df=dfTest) print(dfTrain.dtypes) _get = lambda x,l:[x[i] for i in l] # ############随机打乱划分训练集和验证集 np.random.seed(2018)