def __init__(self): mp.set_start_method('spawn') self._top_N = cf().path["inference"]["top_N"] self._using_gpu = cf().path["inference"]["using_gpu"] self._device = torch.device(cf().path["system"]["device"]) self._client_len = 0 self._sku = 0 self._user_data = OrderedDict() self._product_data = OrderedDict() self.user_col_name = OrderedDict() self.whole_user_col_name = OrderedDict() self.product_col_name = OrderedDict() self.whole_product_col_name = OrderedDict() # set process count self._num_processes = max(1, int(mp.cpu_count() * 0.6)) self._num_sampler_processes = max(1, int(mp.cpu_count() * 0.2)) self._using_gpu = False self._sampler_flag = mp.Manager().list() self.load_user_raw_data() self.load_product_raw_data()
def get_dataloader(self): dataloader = data.DataLoader( self, batch_size=cf().path["data"]["batch_size"], shuffle=cf().path["data"]["shuffle"], num_workers=cf().path["data"]["num_workers"], drop_last=True) dataloader_test = data.DataLoader(self, batch_size=1, shuffle=False, num_workers=1, drop_last=False) one = 0 zero = 0 for k, (_, _, label) in enumerate(dataloader_test): print(k, label) print(label.item()) if label.item() == 1.0: one += 1 else: zero += 1 break print(one, zero) exit() return dataloader
def sampler(self, index, num_process, raw_data_queue): """ todo = inference queue에 데이터 넣어주기 client, item 데이터로부터 inference data 조합을 생성하는 함수를 내부에서 call single process로 샘플러를 돌릴시에 속도이슈가 발생할 수 있음. 멀티로 돌릴때와 싱글일 때의 케이스로 1차분기, 멀티일 경우 마지막 샘플러는 feeding end sign을 큐에 넣어준다. :param index: sampler's index :param num_process: # of inferences :param raw_data_queue: :return: None """ offset = int(self._client_len / self._num_sampler_processes) start = index * offset if self._num_sampler_processes != 1: try: # 마지막 샘플러가 아닐경우 if index < self._num_sampler_processes - 1: for i in range(start, start + offset): raw_data_queue.put(self.load_inf_data(i), block=True) # job end self._sampler_flag.append([1]) # 마지막 샘플러일 경우 else: for i in range(start, self._client_len): raw_data_queue.put(self.load_inf_data(i), block=True) # 마지막놈은 다른 샘플러작업이 종료되면 end 넣고 나와 while True: if len(self._sampler_flag) == ( self._num_sampler_processes - 1): for idx in range(num_process): raw_data_queue.put( cf().path["inference"]["feeding_end"], block=True) break except Exception as e: print("sampler", e) sys.exit(1) else: try: for i in range(self._client_len): data = self.load_inf_data(i) raw_data_queue.put(data, block=True) for end in range(num_process): raw_data_queue.put(cf().path["inference"]["feeding_end"], block=True) except Exception as e: print("p_sampler", e) sys.exit(1)
def load_model(self): """ todo inference :return: """ try: if os.path.exists(cf().path["system"]["model_save_path"]): # 일치하는 키만 가져오도록 self.model.load_state_dict(torch.load( cf().path["system"]["model_save_path"]), strict=False) self.model.eval() except Exception as e: print("load_model", e)
def save_model(self): try: torch.save(self.model.state_dict(), cf().path["system"]["model_save_path"], pickle_protocol=4) except Exception as e: print("save_model", e) sys.exit()
def ln_top_generator(self): """ top layer 생성 자동화 :return: top layer frame """ ln_top = [] j = cf().path["model_parameter"]["ln_bot_output_layer"] s = self.dataset.sparse_col_len m = cf().path["model_parameter"]["m_spa"] n = int((s * m + j) / j) y = j + sum(n for n in range(1, n)) #print(j,s,m,n,y) ln_top.append(y) ln_top.append(int(y / 2)) ln_top.append(int(y / 4)) ln_top.append(1) ln_top = np.array(ln_top) return ln_top
def load_my_state_dict(self): """ todo mimicking transfer learning :return: """ try: current_state = self.model.state_dict() if os.path.exists(cf().path["system"]["model_save_path"]): saved_state = torch.load( cf().path["system"]["model_save_path"]) print("loading saved model states...") for name, saved_param in saved_state.items(): if name not in current_state: continue if current_state[name].shape == saved_param.shape: current_state[name].copy_(saved_param) self.model.load_state_dict(current_state, strict=False) self.model.eval() except Exception as e: print("load_my_state_dict", e)
def ln_bot_generator(self): ln_bot = [] start = 0 for first_layer_key in self.dataset.col_name.keys(): for third_layer_key in self.dataset.col_name[first_layer_key][ "dense"]: if (len(self.dataset.col_name[first_layer_key]["dense"] [third_layer_key]) < 1): continue start += self.dataset.col_name[first_layer_key]["dense"][ third_layer_key].shape[0] ln_bot.append(start) # output layer = 2 ln_bot.append(cf().path["model_parameter"]["ln_bot_output_layer"]) ln_bot = np.array(ln_bot) return ln_bot
def load_inf_data(self, client_index): """ user, product 에서 dense, sparse로 별도로 저장 :param client_index: :return: """ try: dense = dict() sparse = dict() result = dict() # user data for f_l in self.whole_user_col_name.keys(): for s_l in self.whole_user_col_name[f_l]: for col_order, column_name in enumerate( self.whole_user_col_name[f_l][s_l]): if s_l == "seq": """ (배치 사이즈, 시퀀스) shape의 인퍼런스 데이터 확보 """ if f_l == "dense": dense[column_name] = np.full( (self._sku, cf().path["data"]["SEQ_LEN"]), np.array(self._user_data[f_l][s_l] [client_index][col_order])) else: # print(column_name) # todo offset 나중에 어케관리할지 논의, if column_name == 'offset': tmp = np.full( (self._sku), np.array(self._user_data[f_l][s_l] [client_index][col_order])) # LongTensor 아니면 Embedding_bag에서 쥐랄쥐랄 tmp = torch.LongTensor(tmp) sparse[column_name] = tmp else: tmp = np.full( (self._sku, cf().path["data"]["SEQ_LEN"]), np.array(self._user_data[f_l][s_l] [client_index][col_order])) tmp = torch.LongTensor(tmp) sparse[column_name] = tmp else: if f_l == "dense": dense[column_name] = np.full( (self._sku), np.array(self._user_data[f_l][s_l] [client_index][col_order])) else: tmp = np.full( (self._sku), np.array(self._user_data[f_l][s_l] [client_index][col_order])) tmp = torch.LongTensor(tmp) sparse[column_name] = tmp # product for f_l in self.whole_product_col_name.keys(): for s_l in self.whole_product_col_name[f_l]: for col_order, column_name in enumerate( self.whole_product_col_name[f_l][s_l]): tmp = np.transpose(self._product_data[f_l][s_l]) if f_l == "dense": dense[column_name] = tmp[col_order] else: tmp = tmp[col_order] tmp = torch.LongTensor(tmp) sparse[column_name] = tmp #tmp[col_order] result['dense'] = dense result['sparse'] = sparse result['client_index'] = client_index return result except Exception as e: print("load rawdata", e)
def inference(self, model, raw_data_queue, result_queue, top_N): """ "end" 태그가 들어오므로 데이터는 전부 딕셔너리 타입인것은 아님 $$분기 잘태워$$ :param model: 추론 모델 :param raw_data_queue: 받아올 데이터 큐 :param result_queue: csv로 저장할 데이터 큐 :param top_N: 반환할 상품 수 :return: 추천 리스트 [incs_no ,item_index #0, item_index #1, item_index #2, ... item_index#N] """ proc = os.getpid() start_vect = time.time() while True: try: data = raw_data_queue.get(block=True) if data == cf().path["inference"]["feeding_end"]: result_queue.put(proc, block=True) break else: # get incs_no client_index = data['client_index'] with open('config/client_info.json') as json_file: json_data = json.load(json_file) incs_no = json_data[str(client_index)]['incs_no'] dense_data = data['dense'] sparse_data = data['sparse'] dense_x = self.gen_inference_dense_factor(dense_data) lS_i, lS_o = self.gen_inference_sparse_factor(sparse_data) unsorted_score = model.dlrm_wrap( dense_x, lS_o, lS_i, self._using_gpu, cf().path["system"]["device"]).detach().cpu().numpy() # 정렬을 위해 딕셔너리로 변환 ordered_item_indices = dict() for idx, item in enumerate(unsorted_score): ordered_item_indices[idx] = item # 정렬 # np.sort가 훨씬빠르다.??????????????? sdict = sorted(ordered_item_indices.items(), key=operator.itemgetter(1), reverse=True) # top N개 추출해서 딕셔너리 value로 items = list() cnt = 0 items.append(incs_no) for idx, item in enumerate(sdict): with open('config/item_info.json') as json_file: json_data = json.load(json_file) prd_nm = json_data[str(sdict[idx][0])]['prd_nm'] items.append(prd_nm) cnt += 1 if cnt >= top_N: break result_queue.put(items, block=True) except Exception as e: print("inference error ", e) print( f"{proc}'s serving Runtime: {(time.time() - start_vect) / 60} Minutes" )
def __init__(self, device): self.learning_rate = cf().path["model_parameter"]["learning_rate"] # tensorboard self.emb_l_colName = list() self.dataset = TrainData() self.dataloader = self.dataset.get_dataloader() print(colored("generating DLRM frame", "yellow")) self.ln_emb = self.ln_emb_generator() self.ln_bot = self.ln_bot_generator() self.ln_top = self.ln_top_generator() self.model = DLRM_Net( cf().path["model_parameter"]["m_spa"], self.ln_emb, self.ln_bot, self.ln_top, arch_interaction_op=cf().path["model_parameter"] ["arch_interaction_op"], arch_interaction_itself=cf().path["model_parameter"] ["arch_interaction_itself"], sigmoid_bot=cf().path["model_parameter"]["sigmoid_bot"], sigmoid_top=self.ln_top.size - 2, sync_dense_params=cf().path["model_parameter"] ["sync_dense_params"], loss_threshold=cf().path["model_parameter"]["loss_threshold"], ndevices=cf().path["model_parameter"]["ndevices"], qr_flag=cf().path["model_parameter"]["qr_flag"], qr_operation=cf().path["model_parameter"]["qr_operation"], qr_collisions=cf().path["model_parameter"]["qr_collisions"], qr_threshold=cf().path["model_parameter"]["qr_threshold"], md_flag=cf().path["model_parameter"]["md_flag"], md_threshold=cf().path["model_parameter"]["md_threshold"])
def gen_sparse_factor(data): """ 순서는 user - product, single - seq each :param data: :return: """ lS_i = list() user_lS_o = list() prod_lS_o = list() seq_cnt = 0 single_cnt = 0 user_cols = recsys.dataset.col_name['user']['sparse'] try: import itertools as it batch_size = cf().path["data"]["batch_size"] # offset list를 변형 가능한 꼴로 변환시켜 seq_offset = np.array(data["offset"].view(-1)) # print("seq_offset", seq_offset) # user user_single_data = list() user_seq_data = list() for key in user_cols.keys(): if key == "single": for column_name in user_cols[key]: single_cnt += 1 #user_single_data.append(data[column_name]) user_single_data.append(data[column_name]) elif key == "seq": """ 각 컬럼에 배치 사이즈만큼의 길이씩 원소를 추가해간다. """ for column_name in user_cols[key]: seq_cnt += 1 seq_items = list() for i in range(batch_size): temp = data[column_name][i] temp = temp[temp.nonzero().squeeze().detach()] temp = temp.view(-1) seq_items.append(temp) seq_items = torch.cat(seq_items) user_seq_data.append(seq_items) lS_i = user_single_data + user_seq_data # offset 설정, 마지막 시퀀스 길이는 알필요없음 seq_offset = list(it.accumulate(seq_offset[:-1])) # offset starts with zero seq_offset.insert(0, 0) # print("seq_offset", seq_offset) for i in range(single_cnt): tmp = [i for i in range(batch_size)] user_lS_o.append(tmp) for i in range(seq_cnt): user_lS_o.append(seq_offset) except Exception as e: print("user gen_sparse_factor", e) # product seq_cnt = 0 single_cnt = 0 prod_cols = recsys.dataset.col_name['product']['sparse'] try: import itertools as it batch_size = cf().path["data"]["batch_size"] # offset list를 변형 가능한 꼴로 변환시켜 #seq_offset = np.array(data["offset"].view(-1)) # Padded at Head seq_offset = np.array(data["offset"].view(-1)) # # product prod_single_data = list() prod_seq_data = list() for key in prod_cols.keys(): if key == "single": for column_name in prod_cols[key]: single_cnt += 1 #print(column_name) # prod_single_data.append(data[column_name]) prod_single_data.append(data[column_name]) elif key == "seq": for column_name in prod_cols[key]: seq_cnt += 1 seq_items = list() for i in range(batch_size): # original # seq_items.append(data[column_name][i]) temp = data[column_name][i] temp = temp[temp.nonzero().squeeze().detach()] temp = temp.view(-1) seq_items.append(temp) seq_items = torch.cat(seq_items) prod_seq_data.append(seq_items) # product ls_i prd_ls_i = prod_single_data + prod_seq_data lS_i += prd_ls_i #print(seq_offset) # offset 설정, 마지막 시퀀스 길이는 알필요없음 # seq_offset = list(it.accumulate(seq_offset[:-1])) # Padded at Head seq_offset = list(it.accumulate(seq_offset[:-1])) # print(seq_offset) # # offset starts with zero seq_offset.insert(0, 0) for i in range(single_cnt): tmp = [i for i in range(batch_size)] prod_lS_o.append(tmp) for i in range(seq_cnt): prod_lS_o.append(seq_offset) lS_o = user_lS_o + prod_lS_o lS_o = torch.LongTensor(lS_o) return lS_i, lS_o except Exception as e: print("") print("prod gen_sparse_factor", e) print("data : ", data) print("prod_single_data : ", prod_single_data) print("prod_seq_data : ", prod_seq_data)
def train(): def gen_dense_factor(data): """ dense data 중 시퀀스인 애들은 avg해서 쓴다. single val -> 그대로 사용 :param data: input train data, type = dict :return: """ try: items = list() for key in data.keys(): tmp = np.array(data[key]) # list type -> avg if len(tmp.shape) > 1: tmp += 1 seq_avg = np.true_divide(tmp.sum(1), (tmp != 0).sum(1)) items.append(seq_avg) # tmp = np.mean(tmp, axis=1) # items.append(tmp) else: items.append(tmp) items = np.array(items) items = items.transpose() result = torch.Tensor(items) return result except Exception as e: print("gen_dense_factor", e) def gen_sparse_factor(data): """ 순서는 user - product, single - seq each :param data: :return: """ lS_i = list() user_lS_o = list() prod_lS_o = list() seq_cnt = 0 single_cnt = 0 user_cols = recsys.dataset.col_name['user']['sparse'] try: import itertools as it batch_size = cf().path["data"]["batch_size"] # offset list를 변형 가능한 꼴로 변환시켜 seq_offset = np.array(data["offset"].view(-1)) # print("seq_offset", seq_offset) # user user_single_data = list() user_seq_data = list() for key in user_cols.keys(): if key == "single": for column_name in user_cols[key]: single_cnt += 1 #user_single_data.append(data[column_name]) user_single_data.append(data[column_name]) elif key == "seq": """ 각 컬럼에 배치 사이즈만큼의 길이씩 원소를 추가해간다. """ for column_name in user_cols[key]: seq_cnt += 1 seq_items = list() for i in range(batch_size): temp = data[column_name][i] temp = temp[temp.nonzero().squeeze().detach()] temp = temp.view(-1) seq_items.append(temp) seq_items = torch.cat(seq_items) user_seq_data.append(seq_items) lS_i = user_single_data + user_seq_data # offset 설정, 마지막 시퀀스 길이는 알필요없음 seq_offset = list(it.accumulate(seq_offset[:-1])) # offset starts with zero seq_offset.insert(0, 0) # print("seq_offset", seq_offset) for i in range(single_cnt): tmp = [i for i in range(batch_size)] user_lS_o.append(tmp) for i in range(seq_cnt): user_lS_o.append(seq_offset) except Exception as e: print("user gen_sparse_factor", e) # product seq_cnt = 0 single_cnt = 0 prod_cols = recsys.dataset.col_name['product']['sparse'] try: import itertools as it batch_size = cf().path["data"]["batch_size"] # offset list를 변형 가능한 꼴로 변환시켜 #seq_offset = np.array(data["offset"].view(-1)) # Padded at Head seq_offset = np.array(data["offset"].view(-1)) # # product prod_single_data = list() prod_seq_data = list() for key in prod_cols.keys(): if key == "single": for column_name in prod_cols[key]: single_cnt += 1 #print(column_name) # prod_single_data.append(data[column_name]) prod_single_data.append(data[column_name]) elif key == "seq": for column_name in prod_cols[key]: seq_cnt += 1 seq_items = list() for i in range(batch_size): # original # seq_items.append(data[column_name][i]) temp = data[column_name][i] temp = temp[temp.nonzero().squeeze().detach()] temp = temp.view(-1) seq_items.append(temp) seq_items = torch.cat(seq_items) prod_seq_data.append(seq_items) # product ls_i prd_ls_i = prod_single_data + prod_seq_data lS_i += prd_ls_i #print(seq_offset) # offset 설정, 마지막 시퀀스 길이는 알필요없음 # seq_offset = list(it.accumulate(seq_offset[:-1])) # Padded at Head seq_offset = list(it.accumulate(seq_offset[:-1])) # print(seq_offset) # # offset starts with zero seq_offset.insert(0, 0) for i in range(single_cnt): tmp = [i for i in range(batch_size)] prod_lS_o.append(tmp) for i in range(seq_cnt): prod_lS_o.append(seq_offset) lS_o = user_lS_o + prod_lS_o lS_o = torch.LongTensor(lS_o) return lS_i, lS_o except Exception as e: print("") print("prod gen_sparse_factor", e) print("data : ", data) print("prod_single_data : ", prod_single_data) print("prod_seq_data : ", prod_seq_data) def loss_fn_wrap(Z, T, use_gpu, device): if use_gpu: return loss_fn(Z, T.to(device)) else: return loss_fn(Z, T) using_gpu = False # gpu 장비를 사용하는지에 따라 하드웨어 속성변경 # if torch.cuda.is_available(): # # torch.cuda.manual_seed_all(args.numpy_rand_seed) # # torch.backends.cudnn.deterministic = True # device = torch.device("cuda", 7) # using_gpu = True # else: device = torch.device("cpu") recsys = Recsys(device) # recsys.load_my_state_dict() writer = SummaryWriter() #for param_tensor in recsys.model.state_dict(): # print("", colored(f"{param_tensor}", "blue", attrs=["bold"]), colored(f"{recsys.model.state_dict()[param_tensor].size()}", "blue", attrs=["bold"])) learning_rate = cf().path["model_parameter"]["learning_rate"] loss_fn = torch.nn.BCEWithLogitsLoss() optimizer = torch.optim.Adam(recsys.model.parameters(), lr=learning_rate) print(colored(f"DLRM frame generate done", 'yellow'), "\n") # 옵티마이저의 state_dict 출력 # print("Optimizer's state_dict:") # for var_name in optimizer.state_dict(): # print(var_name, "\t", optimizer.state_dict()[var_name]) total_iter = 0 epochs = cf().path["data"]["epoch"] # epoch k = 0 start_vect = time.time() best_model_wts = copy.deepcopy(recsys.model.state_dict()) best_acc = 0.0 print(colored("MODEL TRAINING START", "yellow", attrs=["underline"]), "\n") START_TIME = time.time() M = eval() M.add_emb(recsys, writer) recsys.model.train() # 모델을 학습 모드로 설정 with torch.autograd.profiler.profile(False, False) as prof: try: while k < epochs: total_iter = 0 k += 1 for it, (dense_data, sparse_data, label) in enumerate(recsys.dataloader): dense_x = gen_dense_factor(dense_data) lS_i, lS_o = gen_sparse_factor(sparse_data) Yhat = recsys.dlrm_wrap(dense_x, lS_o, lS_i, using_gpu, device) Y = label.type(torch.FloatTensor) E = loss_fn_wrap(Yhat, Y, using_gpu, device) try: optimizer.zero_grad() # backward pass E.backward() # optimizer optimizer.step() except Exception as e: print("weight update error", e) sys.exit(1) if (it % 50 == 0): print(f"{k} epoch , iteration : {it}") print(colored(f"Epoch : {k}", "blue")) print("Yhat : ", Yhat) print("Label : ", label) print("Loss : ", E) current_error = M.metrics(total_iter, E, Yhat, label, writer) if best_acc < current_error: best_acc = current_error best_model_wts = copy.deepcopy( recsys.model.state_dict()) #recsys.model.load_state_dict(best_model_wts) #recsys.save_model() print( colored( f"TRAIN RUNTIME: {(time.time() - start_vect) / 60} Min", "yellow", attrs=["underline"]), "\n") #distributed_inference() M.close(writer) sys.exit() except Exception as e: print("train 도중", e)
import os import time import sklearn.metrics import sys from recsys.Recsys import Recsys from config.config import config as cf from distrib_inf_lv import distributed_inference from sklearn.metrics import auc from termcolor import colored import copy import pandas as pd device = torch.device("cpu") recsys = Recsys(device) recsys.load_my_state_dict() learning_rate = cf().path["model_parameter"]["learning_rate"] loss_fn = torch.nn.MSELoss(reduction="mean") optimizer = torch.optim.SGD(recsys.model.parameters(), lr=learning_rate) total_iter = 0 k = 0 start_vect = time.time() best_model_wts = copy.deepcopy(recsys.model.state_dict()) best_acc = 0.0 for data, label in recsys.dataloader: print(data, label) break
def __init__(self): super().__init__() self.train_data = None self.len = 0 self._data = OrderedDict() self._label = dict() # 전체 컬럼들 ,,, offset 등을 포함한다 self.whole_col_name = OrderedDict() self.encoded_list = [ "cust_grd_nm", "dvce_tp_cd", "emp_yn", "prd_brnd_nm", "prd_cd", "prd_tp_cat_vl", "sex_cd" ] self.encoding_dict = self.read_encode_dict( prefix="/Users/amore/ap-recsys-model/tb_recommend_raw", cols=self.encoded_list) self.items_dataset = self.read_parquets( "/Users/amore/ap-recsys-model/tb_recommend_raw/item_meta" ).set_index('prd_cd') self.items_dataset['prd_cd'] = self.items_dataset.index self.users_dataset = self.read_parquets( "/Users/amore/ap-recsys-model/tb_recommend_raw/user_meta") self.users_dataset['label'] = 1 self.negative_labels( path='/Users/amore/ap-recsys-model/tb_recommend_raw/neg_sample') self.total_dataset_length = len(self.users_dataset) self.dataset = self.users_dataset[[ 'age', 'dvce_tp_cd', 'sex_cd', 'emp_yn', 'cust_grd_nm', 'seq_cnt', 'prd_cd', 'prd_brnd_nm', 'prd_norm_prc', 'prd_tp_cat_vl', 'tg_prd_cd', 'tg_prd_brnd_nm', 'tg_prd_norm_prc', 'tg_prd_tp_cat_vl', 'label' ]] self.first_layer = ['user', 'product'] self.second_layer = ['dense', 'sparse'] self.third_layer = ['single', 'seq'] self.feature_dict = OrderedDict() self.feature_dict['user'] = OrderedDict() self.feature_dict['product'] = OrderedDict() self.feature_dict['user']['dense'] = OrderedDict() self.feature_dict['user']['dense'] = OrderedDict() self.feature_dict['user']['sparse'] = OrderedDict() self.feature_dict['user']['sparse'] = OrderedDict() self.feature_dict['product']['dense'] = OrderedDict() self.feature_dict['product']['dense'] = OrderedDict() self.feature_dict['product']['sparse'] = OrderedDict() self.feature_dict['product']['sparse'] = OrderedDict() self.feature_dict['user']['dense']['single'] = np.array(['age']) self.feature_dict['user']['dense']['seq'] = np.array([]) self.feature_dict['user']['sparse']['single'] = np.array( ['dvce_tp_cd', 'sex_cd', 'emp_yn', 'cust_grd_nm']) self.feature_dict['user']['sparse']['seq'] = np.array([]) self.feature_dict['product']['dense']['single'] = np.array( ['tg_prd_norm_prc']) self.feature_dict['product']['dense']['seq'] = np.array( ['prd_norm_prc']) self.feature_dict['product']['sparse']['single'] = np.array( ['tg_prd_cd', 'tg_prd_brnd_nm', 'tg_prd_tp_cat_vl']) self.feature_dict['product']['sparse']['seq'] = np.array( ['prd_cd', 'prd_brnd_nm', 'prd_tp_cat_vl']) # self.feature_dict['seq_cnt'] = np.array(['seq_cnt']) self.write_unique_file() self.sparse_col_len = ( len(self.feature_dict['user']['sparse']['single']) + len(self.feature_dict['user']['sparse']['seq']) + len(self.feature_dict['product']['sparse']['single']) + len(self.feature_dict['product']['sparse']['seq'])) self.col_name = self.feature_dict self.batch_size = cf().path["data"]["batch_size"] self.shuffle = cf().path["data"]["shuffle"] self.num_workers = cf().path["data"]["num_workers"] self.drop_last = True