def test_sampler(): batch_size = 8 maxlen = 50 data_dir = os.path.join("tests", "resources", "deeprec", "sasrec") dataset = "reviews_Electronics_5" reviews_name = dataset + ".json" outfile = os.path.join(data_dir, dataset + ".txt") reviews_file = os.path.join(data_dir, reviews_name) download_and_extract(reviews_name, reviews_file) reviews_output = _reviews_preprocessing(reviews_file) _, _ = data_process_with_time(reviews_output, outfile, K=10, sep="\t") # initiate a dataset class data = SASRecDataSet(filename=outfile, col_sep="\t") # create train, validation and test splits data.split() sampler = WarpSampler( data.user_train, data.usernum, data.itemnum, batch_size=batch_size, maxlen=maxlen, n_workers=3, ) u, seq, pos, neg = sampler.next_batch() assert len(u) == batch_size assert len(seq) == batch_size assert len(pos) == batch_size assert len(neg) == batch_size
def test_model_sum(deeprec_resource_path, deeprec_config_path): data_path = os.path.join(deeprec_resource_path, "slirec") yaml_file = os.path.join(deeprec_config_path, "sum.yaml") train_file = os.path.join(data_path, r"train_data") valid_file = os.path.join(data_path, r"valid_data") test_file = os.path.join(data_path, r"test_data") output_file = os.path.join(data_path, "output.txt") train_num_ngs = ( 4 # number of negative instances with a positive instance for training ) valid_num_ngs = ( 4 # number of negative instances with a positive instance for validation ) test_num_ngs = ( 9 # number of negative instances with a positive instance for testing ) if not os.path.exists(train_file): user_vocab = os.path.join(data_path, r"user_vocab.pkl") item_vocab = os.path.join(data_path, r"item_vocab.pkl") cate_vocab = os.path.join(data_path, r"category_vocab.pkl") reviews_name = "reviews_Movies_and_TV_5.json" meta_name = "meta_Movies_and_TV.json" reviews_file = os.path.join(data_path, reviews_name) meta_file = os.path.join(data_path, meta_name) sample_rate = ( 0.005 # sample a small item set for training and testing here for example ) input_files = [ reviews_file, meta_file, train_file, valid_file, test_file, user_vocab, item_vocab, cate_vocab, ] download_and_extract(reviews_name, reviews_file) download_and_extract(meta_name, meta_file) data_preprocessing( *input_files, sample_rate=sample_rate, valid_num_ngs=valid_num_ngs, test_num_ngs=test_num_ngs ) hparams = prepare_hparams( yaml_file, learning_rate=0.01, epochs=1, train_num_ngs=train_num_ngs ) assert hparams is not None input_creator = SequentialIterator model = SUMModel(hparams, input_creator) assert model.run_eval(valid_file, num_ngs=valid_num_ngs) is not None assert isinstance( model.fit(train_file, valid_file, valid_num_ngs=valid_num_ngs), BaseModel ) assert model.predict(valid_file, output_file) is not None
def test_Sequential_Iterator(deeprec_resource_path, deeprec_config_path): data_path = os.path.join(deeprec_resource_path, "slirec") yaml_file = os.path.join(deeprec_config_path, "sli_rec.yaml") train_file = os.path.join(data_path, r"train_data") if not os.path.exists(train_file): valid_file = os.path.join(data_path, r"valid_data") test_file = os.path.join(data_path, r"test_data") user_vocab = os.path.join(data_path, r"user_vocab.pkl") item_vocab = os.path.join(data_path, r"item_vocab.pkl") cate_vocab = os.path.join(data_path, r"category_vocab.pkl") reviews_name = "reviews_Movies_and_TV_5.json" meta_name = "meta_Movies_and_TV.json" reviews_file = os.path.join(data_path, reviews_name) meta_file = os.path.join(data_path, meta_name) valid_num_ngs = ( 4 # number of negative instances with a positive instance for validation ) test_num_ngs = ( 9 # number of negative instances with a positive instance for testing ) sample_rate = ( 0.01 # sample a small item set for training and testing here for example ) input_files = [ reviews_file, meta_file, train_file, valid_file, test_file, user_vocab, item_vocab, cate_vocab, ] download_and_extract(reviews_name, reviews_file) download_and_extract(meta_name, meta_file) data_preprocessing(*input_files, sample_rate=sample_rate, valid_num_ngs=valid_num_ngs, test_num_ngs=test_num_ngs) hparams = prepare_hparams(yaml_file) iterator = SequentialIterator(hparams, tf.Graph()) assert iterator is not None for res in iterator.load_data_from_file(train_file): assert isinstance(res, dict)
def sequential_files(deeprec_resource_path): data_path = os.path.join(deeprec_resource_path, "slirec") train_file = os.path.join(data_path, r"train_data") valid_file = os.path.join(data_path, r"valid_data") test_file = os.path.join(data_path, r"test_data") user_vocab = os.path.join(data_path, r"user_vocab.pkl") item_vocab = os.path.join(data_path, r"item_vocab.pkl") cate_vocab = os.path.join(data_path, r"category_vocab.pkl") reviews_name = "reviews_Movies_and_TV_5.json" meta_name = "meta_Movies_and_TV.json" reviews_file = os.path.join(data_path, reviews_name) meta_file = os.path.join(data_path, meta_name) valid_num_ngs = ( 4 # number of negative instances with a positive instance for validation ) test_num_ngs = ( 9 # number of negative instances with a positive instance for testing ) sample_rate = ( 0.01 # sample a small item set for training and testing here for example ) input_files = [ reviews_file, meta_file, train_file, valid_file, test_file, user_vocab, item_vocab, cate_vocab, ] download_and_extract(reviews_name, reviews_file) download_and_extract(meta_name, meta_file) data_preprocessing(*input_files, sample_rate=sample_rate, valid_num_ngs=valid_num_ngs, test_num_ngs=test_num_ngs) return ( data_path, user_vocab, item_vocab, cate_vocab, )
def test_prepare_data(): data_dir = os.path.join("tests", "resources", "deeprec", "sasrec") dataset = "reviews_Electronics_5" reviews_name = dataset + ".json" outfile = os.path.join(data_dir, dataset + ".txt") reviews_file = os.path.join(data_dir, reviews_name) download_and_extract(reviews_name, reviews_file) reviews_output = _reviews_preprocessing(reviews_file) _, _ = data_process_with_time(reviews_output, outfile, K=10, sep="\t") # initiate a dataset class data = SASRecDataSet(filename=outfile, col_sep="\t") # create train, validation and test splits data.split() assert len(data.user_train) > 0 assert len(data.user_valid) > 0 assert len(data.user_test) > 0