def __init__(self,FLAGS,Embedding): self.FLAGS = FLAGS self.version = self.FLAGS.version self.learning_rate = tf.placeholder(tf.float64, [], name = "learning_rate") #self.embedding = Embedding #set the defalut check point path if self.FLAGS.checkpoint_path_dir != None: self.checkpoint_path_dir = self.FLAGS.checkpoint_path_dir else: self.checkpoint_path_dir = "data/check_point/" + self.FLAGS.type + "_" + self.FLAGS.experiment_type + "_" + self.version if not os.path.exists(self.checkpoint_path_dir): os.makedirs(self.checkpoint_path_dir) self.init_optimizer() self.embedding = Embedding log_ins = create_log() self.logger = log_ins.logger
def __init__(self): start_time = time.time() model_parameter_ins = model_parameter() data_name = model_parameter_ins.flags.FLAGS.data_name self.FLAGS = model_parameter_ins.get_parameter(data_name).FLAGS log_ins = create_log(data_name=data_name, model_name=self.FLAGS.model_name, lr=self.FLAGS.learning_rate) self.logger = log_ins.logger self.logger.info("hello world the experiment begin") self.logger.info("the model parameter is : " + str(self.FLAGS.flag_values_dict())) prepare_data_ins = DataLoader(self.FLAGS) self.logger.info("start loading dataset!") self.train_set, self.test_set = prepare_data_ins.load_train_test() print('test event len: %d' % (len(self.test_set))) self.logger.info("dataset loaded!") self.logger.info("DataHandle Process cost time: %.2fs" % (time.time() - start_time)) start_time = time.time() self.emb = history_embedding(is_training=self.FLAGS.is_training, type_num=self.FLAGS.type_num, max_seq_len=self.FLAGS.max_seq_len, sims_len=self.FLAGS.sims_len, FLAGS=self.FLAGS) self.logger.info('get train test data process cost: %.2fs' % (time.time() - start_time))
def __init__(self, is_training=True, config_file=None): self.embedding_file_path = config_file # self.embedding_dic = embedding_csv_dic(self.embedding_file_path) self.is_training = is_training #log_ins = create_log(type = self.FLAGS.type, experiment_type = self.FLAGS.experiment_type,version=self.FLAGS.version) log_ins = create_log() self.logger = log_ins.logger self.init_placeholders()
def __init__(self, FLAGS): """ Choose Dataset and get the processed origin data file If the processed origin data does not exist, do get_type_data() and do statistics from raw data. :param type: type: "Tmall", "Amazon" Optional type of Datasets. origin_data_path : A String raw_data_path : A String raw_data_path_meta : A String for "Amazon" """ log_ins = create_log() self.logger = log_ins.logger self.type = FLAGS.type
def __init__(self, FLAGS): self.FLAGS = FLAGS log_ins = create_log() self.logger = log_ins.logger if self.FLAGS.type == 'taobao': self.get_origin_data_ins = Get_taobao_data(self.FLAGS) elif self.FLAGS.type == 'tmall': self.get_origin_data_ins = Get_tmall_data(self.FLAGS) elif self.FLAGS.type == 'amazon' \ or self.FLAGS.type == 'beauty'\ or self.FLAGS.type == 'kindle': self.get_origin_data_ins = Get_amzon_data(self.FLAGS) elif self.FLAGS.type == 'movie': self.get_origin_data_ins = Get_movie_data(self.FLAGS) self.origin_data = self.get_origin_data_ins.origin_data
def __init__(self, FLAGS, Embedding): self.FLAGS = FLAGS self.learning_rate = tf.placeholder( tf.float64, [], name='learning_rate') #learning_rate都设置成输入了 self.llh_decay_rate = tf.placeholder(tf.float32, [], name='llh_decay_rate') if self.FLAGS.checkpoint_path_dir != None: self.checkpoint_path_dir = self.FLAGS.checkpoint_path_dir else: self.checkpoint_path_dir = 'data/check_point/' + self.FLAGS.model_name if not os.path.exists(self.checkpoint_path_dir): os.makedirs(self.checkpoint_path_dir) self.init_optimizer() self.embedding = Embedding log_ins = create_log() self.logger = log_ins.logger
str(ndcg_value_sum / length)) print('error:' + str(error)) return if __name__ == "__main__": start_time = time.time() model_parameter_ins = model_parameter() experiment_name = model_parameter_ins.flags.FLAGS.experiment_name FLAGS = model_parameter_ins.get_parameter(experiment_name).FLAGS FLAGS.type = sys.argv[1] log_ins = create_log(type=FLAGS.type, experiment_type=FLAGS.experiment_type, version=FLAGS.version) logger = log_ins.logger logger.info("hello world the experiment begin") # logger.info("The model parameter is :" + str(self.FLAGS._parse_flags())) if FLAGS.type == "yoochoose": get_origin_data_ins = Get_yoochoose_data(FLAGS=FLAGS) get_origin_data_ins.getDataStatistics() elif FLAGS.type == "movielen": get_origin_data_ins = Get_movie_data(FLAGS=FLAGS) elif FLAGS.type == "tmall":
def __init__(self): start_time = time.time() model_parameter_ins = model_parameter() experiment_name = model_parameter_ins.flags.FLAGS.experiment_name self.FLAGS = model_parameter_ins.get_parameter(experiment_name).FLAGS log_ins = create_log(type=self.FLAGS.type, experiment_type=self.FLAGS.experiment_type, version=self.FLAGS.version) self.logger = log_ins.logger self.logger.info("hello world the experiment begin") # logger.info("The model parameter is :" + str(self.FLAGS._parse_flags())) #init data and embeding get_origin_data_ins = Get_origin_data(FLAGS=self.FLAGS) if self.FLAGS.experiment_type == "dib" \ or self.FLAGS.experiment_type == "no_emb" \ or self.FLAGS.experiment_type == "slirec" \ or self.FLAGS.experiment_type == "lstur" \ or self.FLAGS.experiment_type == "sasrec" \ or self.FLAGS.experiment_type == "grurec" \ or self.FLAGS.experiment_type == "bert" \ or self.FLAGS.experiment_type == "dmpn" \ or self.FLAGS.experiment_type == "atrank"\ or self.FLAGS.experiment_type == "dmpn2"\ or self.FLAGS.experiment_type == "dmpn3"\ or self.FLAGS.experiment_type == "dmpn4"\ or self.FLAGS.experiment_type == "dfm": prepare_data_behavior_ins = prepare_data_behavior(self.FLAGS, get_origin_data_ins.origin_data) elif self.FLAGS.experiment_type == "bpr": prepare_data_behavior_ins = prepare_data_bpr(self.FLAGS, get_origin_data_ins.origin_data) self.logger.info('DataHandle Process.\tCost time: %.2fs' % (time.time() - start_time)) start_time = time.time() #embedding if self.FLAGS.experiment_type == "no_emb": config_file = "config/no_embedding__dic.csv" self.emb = No_embedding(self.FLAGS.is_training, config_file) elif self.FLAGS.experiment_type == "bpr": self.emb = Bprmf_embedding(self.FLAGS.is_training,self.FLAGS.embedding_config_file, prepare_data_behavior_ins.user_count, prepare_data_behavior_ins.item_count) else: self.emb = Lstur_embedding(self.FLAGS.is_training, self.FLAGS.embedding_config_file, prepare_data_behavior_ins.user_count, prepare_data_behavior_ins.item_count, prepare_data_behavior_ins.category_count, self.FLAGS.max_len) self.train_set, self.test_set = prepare_data_behavior_ins.get_train_test() self.logger.info('Get Train Test Data Process.\tCost time: %.2fs' % (time.time() - start_time)) # self.item_category_dic = prepare_data_behavior_ins.item_category_dic self.global_step = 0 self.one_epoch_step = 0 self.now_epoch = 0
def __init__(self): start_time = time.time() model_parameter_ins = model_parameter() experiment_name = model_parameter_ins.flags.FLAGS.experiment_name self.FLAGS = model_parameter_ins.get_parameter(experiment_name).FLAGS log_ins = create_log(type=self.FLAGS.type, experiment_type=self.FLAGS.experiment_type, version=self.FLAGS.version) self.logger = log_ins.logger self.logger.info("hello world the experiment begin") # logger.info("The model parameter is :" + str(self.FLAGS._parse_flags())) if self.FLAGS.type == "yoochoose": get_origin_data_ins = Get_yoochoose_data(FLAGS=self.FLAGS) get_origin_data_ins.getDataStatistics() elif self.FLAGS.type == "movielen": get_origin_data_ins = Get_movie_data(FLAGS=self.FLAGS) get_origin_data_ins.getDataStatistics() if self.FLAGS.type == "tmall": get_origin_data_ins = Get_tmall_data(FLAGS=self.FLAGS) elif self.FLAGS.type == "movie_tv": get_origin_data_ins = Get_amazon_data_movie_tv(FLAGS=self.FLAGS) get_origin_data_ins.getDataStatistics() elif self.FLAGS.type == "elec": get_origin_data_ins = Get_amazon_data_elec(FLAGS=self.FLAGS) get_origin_data_ins.getDataStatistics() elif self.FLAGS.type == "music": get_origin_data_ins = Get_amazon_data_music(FLAGS=self.FLAGS) get_origin_data_ins.getDataStatistics() elif self.FLAGS.type == 'taobaoapp': get_origin_data_ins = Get_taobaoapp_data(FLAGS=self.FLAGS) get_origin_data_ins.getDataStatistics() elif self.FLAGS.type == "beauty": get_origin_data_ins = Get_amazon_data_beauty(FLAGS=self.FLAGS) get_origin_data_ins.getDataStatistics() elif self.FLAGS.type == "brightkite": get_origin_data_ins = Get_BrightKite_data(FLAGS=self.FLAGS) get_origin_data_ins.getDataStatistics() elif self.FLAGS.type == "order": get_origin_data_ins = Get_Order_data(FLAGS=self.FLAGS) get_origin_data_ins.getDataStatistics() #get_train_test_ins = Get_train_test(FLAGS=self.FLAGS,origin_data=get_origin_data_ins.origin_data) prepare_data_behavior_ins = prepare_data_base( self.FLAGS, get_origin_data_ins.origin_data) self.train_set, self.test_set = prepare_data_behavior_ins.get_train_test( ) #fetch part of test_data #if len(self.train_set) > 2000000: #self.test_set = random.sample(self.train_set,2000000) #self.test_set = self.test_set.sample(3500) self.logger.info('DataHandle Process.\tCost time: %.2fs' % (time.time() - start_time)) start_time = time.time() self.emb = Behavior_embedding_time_aware_attention( is_training=self.FLAGS.is_training, user_count=prepare_data_behavior_ins.user_count, item_count=prepare_data_behavior_ins.item_count, category_count=prepare_data_behavior_ins.category_count, max_length_seq=self.FLAGS.length_of_user_history) self.logger.info('Get Train Test Data Process.\tCost time: %.2fs' % (time.time() - start_time)) self.item_category_dic = prepare_data_behavior_ins.item_category_dic self.global_step = 0 self.one_epoch_step = 0 self.now_epoch = 0
def __init__(self, FLAGS, origin_data): self.FLAGS = FLAGS self.length = [] self.type = FLAGS.type self.user_count_limit = FLAGS.user_count_limit self.test_frac = FLAGS.test_frac self.experiment_type = FLAGS.experiment_type self.neg_sample_ratio = FLAGS.neg_sample_ratio self.origin_data = origin_data self.data_type_error = 0 self.data_too_short = 0 # give the random target value #self.target_random_value # make origin data dir self.dataset_path = 'data/training_testing_data/' + self.type + "_" + \ self.FLAGS.pos_embedding + "_" + \ self.FLAGS.experiment_data_type+'_' + \ self.FLAGS.causality if not os.path.exists(self.dataset_path): os.mkdir(self.dataset_path) self.dataset_class_pkl = os.path.join(self.dataset_path, 'parameters.pkl') self.dataset_class_train = os.path.join(self.dataset_path, 'train_data.txt') self.dataset_class_test = os.path.join(self.dataset_path, 'test_data.txt') self.mask_rate = self.FLAGS.mask_rate log_ins = create_log() self.logger = log_ins.logger # init or load if FLAGS.init_train_data == True: self.origin_data = origin_data #Init index for items, users and categories self.map_process() # load data else: # load train data with open(self.dataset_class_train, 'r') as f: self.train_set = [] L = f.readlines() for line in L: line = eval(line) self.train_set.append(line) if len(self.train_set) > 50000: break #if len(self.train_set) > 10000: #self.train_set = random.sample(self.train_set, 10000) # load test data with open(self.dataset_class_test, 'r') as f: self.test_set = [] L = f.readlines() for line in L: line = eval(line) self.test_set.append(line) if len(self.test_set) > 5000: break # dont't need too large data set #if len(self.test_set) > 10000: #self.test_set = random.sample(self.test_set, 10000) with open(self.dataset_class_pkl, 'rb') as f: data_dic = pickle.load(f) self.item_count = data_dic["item_count"] self.user_count = data_dic["user_count"] self.category_count = data_dic["category_count"] #self.gap = data_dic["gap"] self.item_category_dic = data_dic["item_category"] self.logger.info("load data finish") self.logger.info('Size of training set is ' + str(len(self.train_set))) self.logger.info('Size of testing set is ' + str(len(self.test_set))) del data_dic self.init_train_data = FLAGS.init_train_data
def __init__(self, FLAGS, origin_data): self.FLAGS = FLAGS self.length = [] self.type = FLAGS.type self.user_count_limit = FLAGS.user_count_limit self.test_frac = FLAGS.test_frac self.experiment_type = FLAGS.experiment_type self.neg_sample_ratio = FLAGS.neg_sample_ratio self.max_len = FLAGS.max_len self.model = FLAGS.experiment_type #give the data whether to use action if self.type == "tmall" or self.type == "taobao": self.use_action = True else: self.use_action = False self.data_type_error = 0 self.data_too_short = 0 # give reserve reserve field self.offset = 3 # give the random target value self.target_random_value = self.offset - 2 # make origin data dir self.dataset_path = 'data/training_testing_data/' if not os.path.exists(self.dataset_path): os.mkdir(self.dataset_path) if self.model == "bpr": self.dataset_class_path = self.dataset_path + self.type + "_" + self.model + "_" + \ self.FLAGS.pos_embedding + "_" + self.FLAGS.causality + '_train_test_class.pkl' else: self.dataset_class_path = self.dataset_path + self.type + "_" + \ self.FLAGS.pos_embedding + "_" + self.FLAGS.causality + '_train_test_class.pkl' self.mask_rate = self.FLAGS.mask_rate log_ins = create_log() self.logger = log_ins.logger # init or load if FLAGS.init_origin_data == True: self.origin_data = origin_data self.get_gap_list(FLAGS.gap_num) self.map_process() self.filter_repetition() # load data else: with open(self.dataset_class_path, 'rb') as f: data_dic = pickle.load(f) self.train_set = data_dic["train_set"] self.test_set = data_dic["test_set"] # dont't need too large data set if len(self.test_set) > 3500: self.test_set = random.sample(self.test_set, 3500) self.item_count = data_dic["item_count"] self.user_count = data_dic["user_count"] self.category_count = data_dic["category_count"] # self.gap = data_dic["gap"] # self.item_category_dic = data_dic["item_category"] self.logger.info("load data finish") self.logger.info('Size of training set is ' + str(len(self.train_set))) self.logger.info('Size of testing set is ' + str(len(self.test_set))) del data_dic self.init_origin_data = FLAGS.init_origin_data
def __init__(self, init=False, user_h=None, short_term_intent=None, attention_result=None, item_table=None, item_category_dic=None): if init == True: model_parameter_ins = model_parameter() experiment_name = model_parameter_ins.flags.FLAGS.experiment_name self.FLAGS = model_parameter_ins.get_parameter( experiment_name).FLAGS log_ins = create_log(type=self.FLAGS.type, experiment_type=self.FLAGS.experiment_type, version=self.FLAGS.version) self.logger = log_ins.logger # self.model.user_h # self.model.short_term_intent # self.model.attention_result get_origin_data_ins = Get_origin_data( type=self.FLAGS.type, raw_data_path=self.FLAGS.raw_data_path, raw_data_path_meta=self.FLAGS.raw_data_path_meta, logger=self.logger) origin_data = get_origin_data_ins.origin_data get_train_test_ins = Get_train_test(FLAGS=self.FLAGS, origin_data=origin_data) self.item_category_dic = get_train_test_ins.item_category_dic self.train_set, self.test_set = get_train_test_ins.get_train_test( mask_rate=self.FLAGS.mask_rate) self.sess = tf.Session() self.emb = Behavior_embedding_nodec( self.FLAGS.is_training, self.FLAGS.embedding_config_file) self.model = ISTSBP_model(self.FLAGS, self.emb, self.sess) # Initiate TF session # with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: input_dic = self.emb.make_feed_dic(batch_data=self.test_set) input_dic[self.model.now_bacth_data_size] = len(self.test_set) self.model.restore( self.sess, path="data/check_point/Amazon_istsbp_vanilla_lr_0.001") #置于0 self.model.init_reserved_field(self.sess) #get pic data self.user_h,self.short_term_intent,self.attention_result,self.item_table \ = self.sess.run([self.model.user_h, self.model.short_term_intent, self.model.attention_result,self.emb.item_emb_lookup_table],input_dic) with open("data/gen_pic/user.h", 'wb') as f: pickle.dump(self.user_h, f, pickle.HIGHEST_PROTOCOL) with open("data/gen_pic/item_table.h", 'wb') as f: pickle.dump(self.item_table, f, pickle.HIGHEST_PROTOCOL) with open("data/gen_pic/item_category_dic", 'wb') as f: pickle.dump(item_category_dic, f, pickle.HIGHEST_PROTOCOL) else: self.user_h = user_h self.short_term_intent = short_term_intent self.attention_result = attention_result self.item_table = item_table self.item_category_dic = item_category_dic