def test_formatting_amazonBook_title_gives_correct_productId(): test_bookTitle = "Crime and Punishment" def get_id(map, key): if key == test_bookTitle: return 42069 map_service = IDMapService(get_id) review = review_pb2.Review() test_reviewObj = _get_amazon_reviewobject(title=test_bookTitle) _format_amazonBook_review(review, reviewObject=test_reviewObj, productid_map_service=map_service) assert review.product_id == 42069
def test_formatting_amazonBook_reviewAuthor_gives_correct_userId(): test_author = "NiallWalsh" def get_id(map, key): if key == test_author: return 1221 map_service = IDMapService(get_id) review = review_pb2.Review() test_reviewObj = _get_amazon_reviewobject(author=test_author) _format_amazonBook_review(review, reviewObject=test_reviewObj, userid_map_service=map_service) assert review.user_id == 1221
def test_formatting_yelp_chi_gives_correct_productid(): test_productid = "rpP9iZsT3NC-Z4pUtQGoiA" def get_id(map, key): if key == test_productid: return 321321 map_service = IDMapService(get_id) review = review_pb2.Review() metadata = _get_chi_metadata(productid=test_productid) _format_yelp_chi_review(review, metadata=metadata, productid_map_service=map_service) assert review.product_id == 321321
def test_formatting_yelp_chi_gives_correct_userid(): test_userid = "IErE0ydkkLfAoePgqrVdUQ" def get_id(map, key): if key == test_userid: return 123123 map_service = IDMapService(get_id) review = review_pb2.Review() metadata = _get_chi_metadata(userid=test_userid) _format_yelp_chi_review(review, metadata=metadata, userid_map_service=map_service) assert review.user_id == 123123
def protify_data(data_directory): # NYC review_set = review_set_pb2.ReviewSet() with open( os.path.normpath(data_directory + '/YelpData/YelpNYC/reviewContent'), 'r') as f1: with open( os.path.normpath(data_directory + '/YelpData/YelpNYC/metadata'), 'r') as f2: for line in f1: format_yelp_nyc_review(review_set.reviews.add(), line, f2.readline()) with open(os.path.normpath(data_directory + '/normalizedData/yelpNYC'), 'wb') as f: f.write(review_set.SerializeToString()) # Zip review_set = review_set_pb2.ReviewSet() with open( os.path.normpath(data_directory + '/YelpData/YelpZip/reviewContent'), 'r') as f1: with open( os.path.normpath(data_directory + '/YelpData/YelpZip/metadata'), 'r') as f2: for line in f1: format_yelp_nyc_review(review_set.reviews.add(), line, f2.readline()) with open(os.path.normpath(data_directory + '/normalizedData/yelpZip'), 'wb') as f: f.write(review_set.SerializeToString()) # Chicago userid_map_service = IDMapService(id_func) productid_map_service = IDMapService(id_func) review_set = review_set_pb2.ReviewSet() with open( os.path.normpath( data_directory + '/YelpData/YelpCHI/output_review_yelpHotelData_NRYRcleaned.txt' ), 'r') as f1: with open( os.path.normpath( data_directory + '/YelpData/YelpCHI/output_meta_yelpHotelData_NRYRcleaned.txt' ), 'r') as f2: for line in f1: format_yelp_chi_review(review_set.reviews.add(), line, f2.readline(), userid_map_service, productid_map_service) with open( os.path.normpath(data_directory + '/normalizedData/yelpCHI-hotels'), 'w') as f: f.write(str(review_set)) userid_map_service = IDMapService(id_func) productid_map_service = IDMapService(id_func) review_set = review_set_pb2.ReviewSet() with open( os.path.normpath( data_directory + '/YelpData/YelpCHI/output_review_yelpResData_NRYRcleaned.txt'), 'r') as f1: with open( os.path.normpath( data_directory + '/YelpData/YelpCHI/output_meta_yelpResData_NRYRcleaned.txt' ), 'r') as f2: for line in f1: format_yelp_chi_review(review_set.reviews.add(), line, f2.readline(), userid_map_service, productid_map_service) with open( os.path.normpath(data_directory + '/normalizedData/yelpCHI-restaurants'), 'w') as f: f.write(str(review_set)) # Amazon review_set = review_set_pb2.ReviewSet() userid_map_service = IDMapService(id_func) productid_map_service = IDMapService(id_func) with open(os.path.normpath(data_directory + '/amazonBooks/reviewContent'), 'r') as f: for line in f: reviewObj = json.loads(line.replace('},', '}')) format_amazonBooks_review(review_set.reviews.add(), reviewObj, userid_map_service, productid_map_service) with open(os.path.normpath(data_directory + '/normalizedData/amazonBooks'), 'w') as f: f.write(str(review_set))
def _get_id_map_service(): def get_id(map, key): return 1 return IDMapService(get_id)