コード例 #1
0
    def __init__(self,FLAGS,Embedding):

        self.FLAGS = FLAGS
        self.version = self.FLAGS.version
        self.learning_rate = tf.placeholder(tf.float64, [], name = "learning_rate")


        #self.embedding = Embedding



        #set the defalut check point path
        if self.FLAGS.checkpoint_path_dir != None:
            self.checkpoint_path_dir = self.FLAGS.checkpoint_path_dir

        else:
            self.checkpoint_path_dir = "data/check_point/" + self.FLAGS.type + "_" + self.FLAGS.experiment_type + "_" + self.version
            if not os.path.exists(self.checkpoint_path_dir):
                os.makedirs(self.checkpoint_path_dir)


        self.init_optimizer()
        self.embedding = Embedding
        log_ins = create_log()
        self.logger = log_ins.logger
コード例 #2
0
ファイル: train_process.py プロジェクト: Jane11111/TPP_V2
    def __init__(self):

        start_time = time.time()
        model_parameter_ins = model_parameter()
        data_name = model_parameter_ins.flags.FLAGS.data_name
        self.FLAGS = model_parameter_ins.get_parameter(data_name).FLAGS

        log_ins = create_log(data_name=data_name,
                             model_name=self.FLAGS.model_name,
                             lr=self.FLAGS.learning_rate)

        self.logger = log_ins.logger
        self.logger.info("hello world the experiment begin")
        self.logger.info("the model parameter is : " +
                         str(self.FLAGS.flag_values_dict()))

        prepare_data_ins = DataLoader(self.FLAGS)

        self.logger.info("start loading dataset!")
        self.train_set, self.test_set = prepare_data_ins.load_train_test()
        print('test event len: %d' % (len(self.test_set)))

        self.logger.info("dataset loaded!")

        self.logger.info("DataHandle Process cost time: %.2fs" %
                         (time.time() - start_time))
        start_time = time.time()

        self.emb = history_embedding(is_training=self.FLAGS.is_training,
                                     type_num=self.FLAGS.type_num,
                                     max_seq_len=self.FLAGS.max_seq_len,
                                     sims_len=self.FLAGS.sims_len,
                                     FLAGS=self.FLAGS)
        self.logger.info('get train test data process cost: %.2fs' %
                         (time.time() - start_time))
コード例 #3
0
    def __init__(self, is_training=True, config_file=None):

        self.embedding_file_path = config_file
        # self.embedding_dic = embedding_csv_dic(self.embedding_file_path)
        self.is_training = is_training
        #log_ins = create_log(type = self.FLAGS.type, experiment_type = self.FLAGS.experiment_type,version=self.FLAGS.version)
        log_ins = create_log()
        self.logger = log_ins.logger

        self.init_placeholders()
コード例 #4
0
    def __init__(self, FLAGS):
        """ Choose Dataset and get the processed origin data file
            If the processed origin data does not exist, do get_type_data() and do statistics from
            raw data.
        :param type:
            type: "Tmall", "Amazon"
                Optional type of Datasets.
            origin_data_path : A String
            raw_data_path : A String
            raw_data_path_meta : A String
                for "Amazon"
        """

        log_ins = create_log()
        self.logger = log_ins.logger
        self.type = FLAGS.type
コード例 #5
0
ファイル: get_origin_data.py プロジェクト: MRZhaowenxuan/DMPN
    def __init__(self, FLAGS):
        self.FLAGS = FLAGS
        log_ins = create_log()
        self.logger = log_ins.logger

        if self.FLAGS.type == 'taobao':
            self.get_origin_data_ins = Get_taobao_data(self.FLAGS)
        elif self.FLAGS.type == 'tmall':
            self.get_origin_data_ins = Get_tmall_data(self.FLAGS)
        elif self.FLAGS.type == 'amazon' \
                or self.FLAGS.type == 'beauty'\
                or self.FLAGS.type == 'kindle':
            self.get_origin_data_ins = Get_amzon_data(self.FLAGS)
        elif self.FLAGS.type == 'movie':
            self.get_origin_data_ins = Get_movie_data(self.FLAGS)

        self.origin_data = self.get_origin_data_ins.origin_data
コード例 #6
0
    def __init__(self, FLAGS, Embedding):
        self.FLAGS = FLAGS
        self.learning_rate = tf.placeholder(
            tf.float64, [], name='learning_rate')  #learning_rate都设置成输入了
        self.llh_decay_rate = tf.placeholder(tf.float32, [],
                                             name='llh_decay_rate')
        if self.FLAGS.checkpoint_path_dir != None:
            self.checkpoint_path_dir = self.FLAGS.checkpoint_path_dir
        else:
            self.checkpoint_path_dir = 'data/check_point/' + self.FLAGS.model_name
        if not os.path.exists(self.checkpoint_path_dir):
            os.makedirs(self.checkpoint_path_dir)

        self.init_optimizer()
        self.embedding = Embedding
        log_ins = create_log()
        self.logger = log_ins.logger
コード例 #7
0
              str(ndcg_value_sum / length))
        print('error:' + str(error))

        return


if __name__ == "__main__":

    start_time = time.time()
    model_parameter_ins = model_parameter()
    experiment_name = model_parameter_ins.flags.FLAGS.experiment_name
    FLAGS = model_parameter_ins.get_parameter(experiment_name).FLAGS
    FLAGS.type = sys.argv[1]

    log_ins = create_log(type=FLAGS.type,
                         experiment_type=FLAGS.experiment_type,
                         version=FLAGS.version)

    logger = log_ins.logger
    logger.info("hello world the experiment begin")

    # logger.info("The model parameter is :" + str(self.FLAGS._parse_flags()))

    if FLAGS.type == "yoochoose":
        get_origin_data_ins = Get_yoochoose_data(FLAGS=FLAGS)
        get_origin_data_ins.getDataStatistics()

    elif FLAGS.type == "movielen":
        get_origin_data_ins = Get_movie_data(FLAGS=FLAGS)

    elif FLAGS.type == "tmall":
コード例 #8
0
    def __init__(self):

        start_time = time.time()
        model_parameter_ins = model_parameter()
        experiment_name = model_parameter_ins.flags.FLAGS.experiment_name
        self.FLAGS = model_parameter_ins.get_parameter(experiment_name).FLAGS

        log_ins = create_log(type=self.FLAGS.type, experiment_type=self.FLAGS.experiment_type,
                             version=self.FLAGS.version)
        self.logger = log_ins.logger
        self.logger.info("hello world the experiment begin")

        # logger.info("The model parameter is :" + str(self.FLAGS._parse_flags()))

        #init data and embeding
        get_origin_data_ins = Get_origin_data(FLAGS=self.FLAGS)
        if self.FLAGS.experiment_type == "dib" \
                or self.FLAGS.experiment_type == "no_emb" \
                or self.FLAGS.experiment_type == "slirec" \
                or self.FLAGS.experiment_type == "lstur" \
                or self.FLAGS.experiment_type == "sasrec" \
                or self.FLAGS.experiment_type == "grurec" \
                or self.FLAGS.experiment_type == "bert" \
                or self.FLAGS.experiment_type == "dmpn" \
                or self.FLAGS.experiment_type == "atrank"\
                or self.FLAGS.experiment_type == "dmpn2"\
                or self.FLAGS.experiment_type == "dmpn3"\
                or self.FLAGS.experiment_type == "dmpn4"\
                or self.FLAGS.experiment_type == "dfm":

            prepare_data_behavior_ins = prepare_data_behavior(self.FLAGS, get_origin_data_ins.origin_data)

        elif self.FLAGS.experiment_type == "bpr":
            prepare_data_behavior_ins = prepare_data_bpr(self.FLAGS, get_origin_data_ins.origin_data)


        self.logger.info('DataHandle Process.\tCost time: %.2fs' % (time.time() - start_time))
        start_time = time.time()

        #embedding
        if self.FLAGS.experiment_type == "no_emb":
            config_file = "config/no_embedding__dic.csv"
            self.emb = No_embedding(self.FLAGS.is_training, config_file)

        elif self.FLAGS.experiment_type == "bpr":
            self.emb = Bprmf_embedding(self.FLAGS.is_training,self.FLAGS.embedding_config_file,
                                       prepare_data_behavior_ins.user_count,
                                       prepare_data_behavior_ins.item_count)

        else:
            self.emb = Lstur_embedding(self.FLAGS.is_training, self.FLAGS.embedding_config_file,
                                       prepare_data_behavior_ins.user_count,
                                       prepare_data_behavior_ins.item_count,
                                       prepare_data_behavior_ins.category_count,
                                       self.FLAGS.max_len)

        self.train_set, self.test_set = prepare_data_behavior_ins.get_train_test()
        self.logger.info('Get Train Test Data Process.\tCost time: %.2fs' % (time.time() - start_time))

        # self.item_category_dic = prepare_data_behavior_ins.item_category_dic
        self.global_step = 0
        self.one_epoch_step = 0
        self.now_epoch = 0
コード例 #9
0
    def __init__(self):

        start_time = time.time()
        model_parameter_ins = model_parameter()
        experiment_name = model_parameter_ins.flags.FLAGS.experiment_name
        self.FLAGS = model_parameter_ins.get_parameter(experiment_name).FLAGS

        log_ins = create_log(type=self.FLAGS.type,
                             experiment_type=self.FLAGS.experiment_type,
                             version=self.FLAGS.version)

        self.logger = log_ins.logger
        self.logger.info("hello world the experiment begin")

        # logger.info("The model parameter is :" + str(self.FLAGS._parse_flags()))

        if self.FLAGS.type == "yoochoose":
            get_origin_data_ins = Get_yoochoose_data(FLAGS=self.FLAGS)
            get_origin_data_ins.getDataStatistics()

        elif self.FLAGS.type == "movielen":
            get_origin_data_ins = Get_movie_data(FLAGS=self.FLAGS)
            get_origin_data_ins.getDataStatistics()

        if self.FLAGS.type == "tmall":
            get_origin_data_ins = Get_tmall_data(FLAGS=self.FLAGS)

        elif self.FLAGS.type == "movie_tv":
            get_origin_data_ins = Get_amazon_data_movie_tv(FLAGS=self.FLAGS)
            get_origin_data_ins.getDataStatistics()

        elif self.FLAGS.type == "elec":
            get_origin_data_ins = Get_amazon_data_elec(FLAGS=self.FLAGS)
            get_origin_data_ins.getDataStatistics()

        elif self.FLAGS.type == "music":
            get_origin_data_ins = Get_amazon_data_music(FLAGS=self.FLAGS)
            get_origin_data_ins.getDataStatistics()

        elif self.FLAGS.type == 'taobaoapp':
            get_origin_data_ins = Get_taobaoapp_data(FLAGS=self.FLAGS)
            get_origin_data_ins.getDataStatistics()

        elif self.FLAGS.type == "beauty":
            get_origin_data_ins = Get_amazon_data_beauty(FLAGS=self.FLAGS)
            get_origin_data_ins.getDataStatistics()

        elif self.FLAGS.type == "brightkite":
            get_origin_data_ins = Get_BrightKite_data(FLAGS=self.FLAGS)
            get_origin_data_ins.getDataStatistics()
        elif self.FLAGS.type == "order":
            get_origin_data_ins = Get_Order_data(FLAGS=self.FLAGS)
            get_origin_data_ins.getDataStatistics()

        #get_train_test_ins = Get_train_test(FLAGS=self.FLAGS,origin_data=get_origin_data_ins.origin_data)
        prepare_data_behavior_ins = prepare_data_base(
            self.FLAGS, get_origin_data_ins.origin_data)
        self.train_set, self.test_set = prepare_data_behavior_ins.get_train_test(
        )

        #fetch part of test_data
        #if len(self.train_set) > 2000000:
        #self.test_set = random.sample(self.train_set,2000000)
        #self.test_set = self.test_set.sample(3500)

        self.logger.info('DataHandle Process.\tCost time: %.2fs' %
                         (time.time() - start_time))
        start_time = time.time()

        self.emb = Behavior_embedding_time_aware_attention(
            is_training=self.FLAGS.is_training,
            user_count=prepare_data_behavior_ins.user_count,
            item_count=prepare_data_behavior_ins.item_count,
            category_count=prepare_data_behavior_ins.category_count,
            max_length_seq=self.FLAGS.length_of_user_history)

        self.logger.info('Get Train Test Data Process.\tCost time: %.2fs' %
                         (time.time() - start_time))

        self.item_category_dic = prepare_data_behavior_ins.item_category_dic
        self.global_step = 0
        self.one_epoch_step = 0
        self.now_epoch = 0
コード例 #10
0
ファイル: prepare_data_base.py プロジェクト: WendyLNU/MTAMTPP
    def __init__(self, FLAGS, origin_data):

        self.FLAGS = FLAGS
        self.length = []
        self.type = FLAGS.type
        self.user_count_limit = FLAGS.user_count_limit
        self.test_frac = FLAGS.test_frac
        self.experiment_type = FLAGS.experiment_type
        self.neg_sample_ratio = FLAGS.neg_sample_ratio
        self.origin_data = origin_data

        self.data_type_error = 0
        self.data_too_short = 0

        # give the random  target value
        #self.target_random_value

        # make origin data dir
        self.dataset_path = 'data/training_testing_data/' + self.type + "_" + \
                                  self.FLAGS.pos_embedding + "_" +      \
                                  self.FLAGS.experiment_data_type+'_' + \
                                  self.FLAGS.causality

        if not os.path.exists(self.dataset_path):
            os.mkdir(self.dataset_path)

        self.dataset_class_pkl = os.path.join(self.dataset_path,
                                              'parameters.pkl')
        self.dataset_class_train = os.path.join(self.dataset_path,
                                                'train_data.txt')
        self.dataset_class_test = os.path.join(self.dataset_path,
                                               'test_data.txt')
        self.mask_rate = self.FLAGS.mask_rate

        log_ins = create_log()
        self.logger = log_ins.logger

        # init or load
        if FLAGS.init_train_data == True:
            self.origin_data = origin_data
            #Init index for items, users and categories
            self.map_process()

        # load data
        else:
            # load train data
            with open(self.dataset_class_train, 'r') as f:
                self.train_set = []
                L = f.readlines()
                for line in L:
                    line = eval(line)
                    self.train_set.append(line)
                    if len(self.train_set) > 50000:
                        break
                #if len(self.train_set) > 10000:
                #self.train_set = random.sample(self.train_set, 10000)

            # load test data
            with open(self.dataset_class_test, 'r') as f:
                self.test_set = []
                L = f.readlines()
                for line in L:
                    line = eval(line)
                    self.test_set.append(line)
                    if len(self.test_set) > 5000:
                        break
                # dont't need too large data set
                #if len(self.test_set) > 10000:
                #self.test_set = random.sample(self.test_set, 10000)

            with open(self.dataset_class_pkl, 'rb') as f:

                data_dic = pickle.load(f)
                self.item_count = data_dic["item_count"]
                self.user_count = data_dic["user_count"]
                self.category_count = data_dic["category_count"]
                #self.gap = data_dic["gap"]
                self.item_category_dic = data_dic["item_category"]
                self.logger.info("load data finish")
                self.logger.info('Size of training set is ' +
                                 str(len(self.train_set)))
                self.logger.info('Size of testing set is ' +
                                 str(len(self.test_set)))
                del data_dic

        self.init_train_data = FLAGS.init_train_data
コード例 #11
0
    def __init__(self, FLAGS, origin_data):

        self.FLAGS = FLAGS
        self.length = []
        self.type = FLAGS.type
        self.user_count_limit = FLAGS.user_count_limit
        self.test_frac = FLAGS.test_frac
        self.experiment_type = FLAGS.experiment_type
        self.neg_sample_ratio = FLAGS.neg_sample_ratio
        self.max_len = FLAGS.max_len
        self.model = FLAGS.experiment_type

        #give the data whether to use action
        if self.type == "tmall" or self.type == "taobao":
            self.use_action = True
        else:
            self.use_action = False

        self.data_type_error = 0
        self.data_too_short = 0

        # give reserve reserve field
        self.offset = 3

        # give the random  target value
        self.target_random_value = self.offset - 2

        # make origin data dir
        self.dataset_path = 'data/training_testing_data/'
        if not os.path.exists(self.dataset_path):
            os.mkdir(self.dataset_path)
        if self.model == "bpr":
            self.dataset_class_path = self.dataset_path + self.type + "_" + self.model + "_" + \
                                      self.FLAGS.pos_embedding + "_" + self.FLAGS.causality + '_train_test_class.pkl'
        else:
            self.dataset_class_path = self.dataset_path + self.type + "_" + \
                                      self.FLAGS.pos_embedding + "_" + self.FLAGS.causality + '_train_test_class.pkl'

        self.mask_rate = self.FLAGS.mask_rate
        log_ins = create_log()
        self.logger = log_ins.logger

        # init or load
        if FLAGS.init_origin_data == True:
            self.origin_data = origin_data
            self.get_gap_list(FLAGS.gap_num)
            self.map_process()
            self.filter_repetition()

        # load data
        else:
            with open(self.dataset_class_path, 'rb') as f:
                data_dic = pickle.load(f)
                self.train_set = data_dic["train_set"]
                self.test_set = data_dic["test_set"]
                # dont't need too large data set
                if len(self.test_set) > 3500:
                    self.test_set = random.sample(self.test_set, 3500)

                self.item_count = data_dic["item_count"]
                self.user_count = data_dic["user_count"]
                self.category_count = data_dic["category_count"]
                # self.gap = data_dic["gap"]
                # self.item_category_dic = data_dic["item_category"]
                self.logger.info("load data finish")
                self.logger.info('Size of training set is ' + str(len(self.train_set)))
                self.logger.info('Size of testing set is ' + str(len(self.test_set)))
                del data_dic

        self.init_origin_data = FLAGS.init_origin_data
コード例 #12
0
    def __init__(self,
                 init=False,
                 user_h=None,
                 short_term_intent=None,
                 attention_result=None,
                 item_table=None,
                 item_category_dic=None):

        if init == True:

            model_parameter_ins = model_parameter()
            experiment_name = model_parameter_ins.flags.FLAGS.experiment_name
            self.FLAGS = model_parameter_ins.get_parameter(
                experiment_name).FLAGS
            log_ins = create_log(type=self.FLAGS.type,
                                 experiment_type=self.FLAGS.experiment_type,
                                 version=self.FLAGS.version)
            self.logger = log_ins.logger
            # self.model.user_h
            # self.model.short_term_intent
            # self.model.attention_result
            get_origin_data_ins = Get_origin_data(
                type=self.FLAGS.type,
                raw_data_path=self.FLAGS.raw_data_path,
                raw_data_path_meta=self.FLAGS.raw_data_path_meta,
                logger=self.logger)

            origin_data = get_origin_data_ins.origin_data
            get_train_test_ins = Get_train_test(FLAGS=self.FLAGS,
                                                origin_data=origin_data)
            self.item_category_dic = get_train_test_ins.item_category_dic
            self.train_set, self.test_set = get_train_test_ins.get_train_test(
                mask_rate=self.FLAGS.mask_rate)

            self.sess = tf.Session()
            self.emb = Behavior_embedding_nodec(
                self.FLAGS.is_training, self.FLAGS.embedding_config_file)
            self.model = ISTSBP_model(self.FLAGS, self.emb, self.sess)

            # Initiate TF session
            # with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
            input_dic = self.emb.make_feed_dic(batch_data=self.test_set)
            input_dic[self.model.now_bacth_data_size] = len(self.test_set)

            self.model.restore(
                self.sess,
                path="data/check_point/Amazon_istsbp_vanilla_lr_0.001")
            #置于0
            self.model.init_reserved_field(self.sess)

            #get pic data
            self.user_h,self.short_term_intent,self.attention_result,self.item_table \
                = self.sess.run([self.model.user_h, self.model.short_term_intent,
                                 self.model.attention_result,self.emb.item_emb_lookup_table],input_dic)

            with open("data/gen_pic/user.h", 'wb') as f:
                pickle.dump(self.user_h, f, pickle.HIGHEST_PROTOCOL)
            with open("data/gen_pic/item_table.h", 'wb') as f:
                pickle.dump(self.item_table, f, pickle.HIGHEST_PROTOCOL)
            with open("data/gen_pic/item_category_dic", 'wb') as f:
                pickle.dump(item_category_dic, f, pickle.HIGHEST_PROTOCOL)
        else:

            self.user_h = user_h
            self.short_term_intent = short_term_intent
            self.attention_result = attention_result
            self.item_table = item_table
            self.item_category_dic = item_category_dic