コード例 #1
0
def load_data(data_dir, file='beer_50000.json', cache_file='cache.pkl'):
    data_dir = Path(data_dir)
    if (data_dir / cache_file).exists():
        logging.info("Loading beer data from cache...")
        with open(data_dir / cache_file, 'rb') as fp:
            reviews, beers = pickle.load(fp)
    else:
        logging.info("Loading beer data from file...")

        beers = {}
        reviews = []
        for review in tqdm(load_json(data_dir, file)):
            beer_id = int(review['beer/beerId'])
            if beer_id not in beers:
                try:
                    beers[beer_id] = Beer.from_json(review)
                except:
                    continue
            beer = beers[beer_id]

            reviews.append(Review.from_json(review, beer))
        with open(data_dir / cache_file, 'wb') as fp:
            pickle.dump((reviews, beers), fp)
    return reviews, beers