Ejemplo n.º 1
0
    def __init__(self):

        start_time = time.time()
        model_parameter_ins = model_parameter()
        data_name = model_parameter_ins.flags.FLAGS.data_name
        self.FLAGS = model_parameter_ins.get_parameter(data_name).FLAGS

        log_ins = create_log(data_name=data_name,
                             model_name=self.FLAGS.model_name,
                             lr=self.FLAGS.learning_rate)

        self.logger = log_ins.logger
        self.logger.info("hello world the experiment begin")
        self.logger.info("the model parameter is : " +
                         str(self.FLAGS.flag_values_dict()))

        prepare_data_ins = DataLoader(self.FLAGS)

        self.logger.info("start loading dataset!")
        self.train_set, self.test_set = prepare_data_ins.load_train_test()
        print('test event len: %d' % (len(self.test_set)))

        self.logger.info("dataset loaded!")

        self.logger.info("DataHandle Process cost time: %.2fs" %
                         (time.time() - start_time))
        start_time = time.time()

        self.emb = history_embedding(is_training=self.FLAGS.is_training,
                                     type_num=self.FLAGS.type_num,
                                     max_seq_len=self.FLAGS.max_seq_len,
                                     sims_len=self.FLAGS.sims_len,
                                     FLAGS=self.FLAGS)
        self.logger.info('get train test data process cost: %.2fs' %
                         (time.time() - start_time))
Ejemplo n.º 2
0
                    ndcg_value_sum = ndcg_value_sum + ndcg_value
                    break

        print("P Pop the recall rate is: " + str(topk) + " " +
              str(recall_count / length))
        print("P Pop the ndcg value is: " + str(topk) + " " +
              str(ndcg_value_sum / length))
        print('error:' + str(error))

        return


if __name__ == "__main__":

    start_time = time.time()
    model_parameter_ins = model_parameter()
    experiment_name = model_parameter_ins.flags.FLAGS.experiment_name
    FLAGS = model_parameter_ins.get_parameter(experiment_name).FLAGS
    FLAGS.type = sys.argv[1]

    log_ins = create_log(type=FLAGS.type,
                         experiment_type=FLAGS.experiment_type,
                         version=FLAGS.version)

    logger = log_ins.logger
    logger.info("hello world the experiment begin")

    # logger.info("The model parameter is :" + str(self.FLAGS._parse_flags()))

    if FLAGS.type == "yoochoose":
        get_origin_data_ins = Get_yoochoose_data(FLAGS=FLAGS)
Ejemplo n.º 3
0
    def __init__(self):

        start_time = time.time()
        model_parameter_ins = model_parameter()
        experiment_name = model_parameter_ins.flags.FLAGS.experiment_name
        self.FLAGS = model_parameter_ins.get_parameter(experiment_name).FLAGS

        log_ins = create_log(type=self.FLAGS.type, experiment_type=self.FLAGS.experiment_type,
                             version=self.FLAGS.version)
        self.logger = log_ins.logger
        self.logger.info("hello world the experiment begin")

        # logger.info("The model parameter is :" + str(self.FLAGS._parse_flags()))

        #init data and embeding
        get_origin_data_ins = Get_origin_data(FLAGS=self.FLAGS)
        if self.FLAGS.experiment_type == "dib" \
                or self.FLAGS.experiment_type == "no_emb" \
                or self.FLAGS.experiment_type == "slirec" \
                or self.FLAGS.experiment_type == "lstur" \
                or self.FLAGS.experiment_type == "sasrec" \
                or self.FLAGS.experiment_type == "grurec" \
                or self.FLAGS.experiment_type == "bert" \
                or self.FLAGS.experiment_type == "dmpn" \
                or self.FLAGS.experiment_type == "atrank"\
                or self.FLAGS.experiment_type == "dmpn2"\
                or self.FLAGS.experiment_type == "dmpn3"\
                or self.FLAGS.experiment_type == "dmpn4"\
                or self.FLAGS.experiment_type == "dfm":

            prepare_data_behavior_ins = prepare_data_behavior(self.FLAGS, get_origin_data_ins.origin_data)

        elif self.FLAGS.experiment_type == "bpr":
            prepare_data_behavior_ins = prepare_data_bpr(self.FLAGS, get_origin_data_ins.origin_data)


        self.logger.info('DataHandle Process.\tCost time: %.2fs' % (time.time() - start_time))
        start_time = time.time()

        #embedding
        if self.FLAGS.experiment_type == "no_emb":
            config_file = "config/no_embedding__dic.csv"
            self.emb = No_embedding(self.FLAGS.is_training, config_file)

        elif self.FLAGS.experiment_type == "bpr":
            self.emb = Bprmf_embedding(self.FLAGS.is_training,self.FLAGS.embedding_config_file,
                                       prepare_data_behavior_ins.user_count,
                                       prepare_data_behavior_ins.item_count)

        else:
            self.emb = Lstur_embedding(self.FLAGS.is_training, self.FLAGS.embedding_config_file,
                                       prepare_data_behavior_ins.user_count,
                                       prepare_data_behavior_ins.item_count,
                                       prepare_data_behavior_ins.category_count,
                                       self.FLAGS.max_len)

        self.train_set, self.test_set = prepare_data_behavior_ins.get_train_test()
        self.logger.info('Get Train Test Data Process.\tCost time: %.2fs' % (time.time() - start_time))

        # self.item_category_dic = prepare_data_behavior_ins.item_category_dic
        self.global_step = 0
        self.one_epoch_step = 0
        self.now_epoch = 0
Ejemplo n.º 4
0
    def __init__(self):

        start_time = time.time()
        model_parameter_ins = model_parameter()
        experiment_name = model_parameter_ins.flags.FLAGS.experiment_name
        self.FLAGS = model_parameter_ins.get_parameter(experiment_name).FLAGS

        log_ins = create_log(type=self.FLAGS.type,
                             experiment_type=self.FLAGS.experiment_type,
                             version=self.FLAGS.version)

        self.logger = log_ins.logger
        self.logger.info("hello world the experiment begin")

        # logger.info("The model parameter is :" + str(self.FLAGS._parse_flags()))

        if self.FLAGS.type == "yoochoose":
            get_origin_data_ins = Get_yoochoose_data(FLAGS=self.FLAGS)
            get_origin_data_ins.getDataStatistics()

        elif self.FLAGS.type == "movielen":
            get_origin_data_ins = Get_movie_data(FLAGS=self.FLAGS)
            get_origin_data_ins.getDataStatistics()

        if self.FLAGS.type == "tmall":
            get_origin_data_ins = Get_tmall_data(FLAGS=self.FLAGS)

        elif self.FLAGS.type == "movie_tv":
            get_origin_data_ins = Get_amazon_data_movie_tv(FLAGS=self.FLAGS)
            get_origin_data_ins.getDataStatistics()

        elif self.FLAGS.type == "elec":
            get_origin_data_ins = Get_amazon_data_elec(FLAGS=self.FLAGS)
            get_origin_data_ins.getDataStatistics()

        elif self.FLAGS.type == "music":
            get_origin_data_ins = Get_amazon_data_music(FLAGS=self.FLAGS)
            get_origin_data_ins.getDataStatistics()

        elif self.FLAGS.type == 'taobaoapp':
            get_origin_data_ins = Get_taobaoapp_data(FLAGS=self.FLAGS)
            get_origin_data_ins.getDataStatistics()

        elif self.FLAGS.type == "beauty":
            get_origin_data_ins = Get_amazon_data_beauty(FLAGS=self.FLAGS)
            get_origin_data_ins.getDataStatistics()

        elif self.FLAGS.type == "brightkite":
            get_origin_data_ins = Get_BrightKite_data(FLAGS=self.FLAGS)
            get_origin_data_ins.getDataStatistics()
        elif self.FLAGS.type == "order":
            get_origin_data_ins = Get_Order_data(FLAGS=self.FLAGS)
            get_origin_data_ins.getDataStatistics()

        #get_train_test_ins = Get_train_test(FLAGS=self.FLAGS,origin_data=get_origin_data_ins.origin_data)
        prepare_data_behavior_ins = prepare_data_base(
            self.FLAGS, get_origin_data_ins.origin_data)
        self.train_set, self.test_set = prepare_data_behavior_ins.get_train_test(
        )

        #fetch part of test_data
        #if len(self.train_set) > 2000000:
        #self.test_set = random.sample(self.train_set,2000000)
        #self.test_set = self.test_set.sample(3500)

        self.logger.info('DataHandle Process.\tCost time: %.2fs' %
                         (time.time() - start_time))
        start_time = time.time()

        self.emb = Behavior_embedding_time_aware_attention(
            is_training=self.FLAGS.is_training,
            user_count=prepare_data_behavior_ins.user_count,
            item_count=prepare_data_behavior_ins.item_count,
            category_count=prepare_data_behavior_ins.category_count,
            max_length_seq=self.FLAGS.length_of_user_history)

        self.logger.info('Get Train Test Data Process.\tCost time: %.2fs' %
                         (time.time() - start_time))

        self.item_category_dic = prepare_data_behavior_ins.item_category_dic
        self.global_step = 0
        self.one_epoch_step = 0
        self.now_epoch = 0
Ejemplo n.º 5
0
    def __init__(self):

        start_time = time.time()
        model_parameter_ins = model_parameter()
        experiment_name = model_parameter_ins.flags.FLAGS.experiment_name
        self.FLAGS = model_parameter_ins.get_parameter(experiment_name).FLAGS
Ejemplo n.º 6
0
    def __init__(self):

        start_time = time.time()
        model_parameter_ins = model_parameter()
        data_name = model_parameter_ins.flags.FLAGS.data_name
        self.FLAGS = model_parameter_ins.get_parameter(data_name).FLAGS