예제 #1
0
def test_formatting_amazonBook_title_gives_correct_productId():
    test_bookTitle = "Crime and Punishment"

    def get_id(map, key):
        if key == test_bookTitle:
            return 42069

    map_service = IDMapService(get_id)

    review = review_pb2.Review()
    test_reviewObj = _get_amazon_reviewobject(title=test_bookTitle)
    _format_amazonBook_review(review,
                              reviewObject=test_reviewObj,
                              productid_map_service=map_service)
    assert review.product_id == 42069
예제 #2
0
def test_formatting_amazonBook_reviewAuthor_gives_correct_userId():
    test_author = "NiallWalsh"

    def get_id(map, key):
        if key == test_author:
            return 1221

    map_service = IDMapService(get_id)

    review = review_pb2.Review()
    test_reviewObj = _get_amazon_reviewobject(author=test_author)
    _format_amazonBook_review(review,
                              reviewObject=test_reviewObj,
                              userid_map_service=map_service)
    assert review.user_id == 1221
예제 #3
0
def test_formatting_yelp_chi_gives_correct_productid():
    test_productid = "rpP9iZsT3NC-Z4pUtQGoiA"

    def get_id(map, key):
        if key == test_productid:
            return 321321

    map_service = IDMapService(get_id)

    review = review_pb2.Review()
    metadata = _get_chi_metadata(productid=test_productid)
    _format_yelp_chi_review(review,
                            metadata=metadata,
                            productid_map_service=map_service)
    assert review.product_id == 321321
예제 #4
0
def test_formatting_yelp_chi_gives_correct_userid():
    test_userid = "IErE0ydkkLfAoePgqrVdUQ"

    def get_id(map, key):
        if key == test_userid:
            return 123123

    map_service = IDMapService(get_id)

    review = review_pb2.Review()
    metadata = _get_chi_metadata(userid=test_userid)
    _format_yelp_chi_review(review,
                            metadata=metadata,
                            userid_map_service=map_service)
    assert review.user_id == 123123
예제 #5
0
def protify_data(data_directory):

    # NYC

    review_set = review_set_pb2.ReviewSet()
    with open(
            os.path.normpath(data_directory +
                             '/YelpData/YelpNYC/reviewContent'), 'r') as f1:
        with open(
                os.path.normpath(data_directory +
                                 '/YelpData/YelpNYC/metadata'), 'r') as f2:
            for line in f1:
                format_yelp_nyc_review(review_set.reviews.add(), line,
                                       f2.readline())

    with open(os.path.normpath(data_directory + '/normalizedData/yelpNYC'),
              'wb') as f:
        f.write(review_set.SerializeToString())

    # Zip

    review_set = review_set_pb2.ReviewSet()
    with open(
            os.path.normpath(data_directory +
                             '/YelpData/YelpZip/reviewContent'), 'r') as f1:
        with open(
                os.path.normpath(data_directory +
                                 '/YelpData/YelpZip/metadata'), 'r') as f2:
            for line in f1:
                format_yelp_nyc_review(review_set.reviews.add(), line,
                                       f2.readline())

    with open(os.path.normpath(data_directory + '/normalizedData/yelpZip'),
              'wb') as f:
        f.write(review_set.SerializeToString())

    # Chicago

    userid_map_service = IDMapService(id_func)
    productid_map_service = IDMapService(id_func)

    review_set = review_set_pb2.ReviewSet()
    with open(
            os.path.normpath(
                data_directory +
                '/YelpData/YelpCHI/output_review_yelpHotelData_NRYRcleaned.txt'
            ), 'r') as f1:
        with open(
                os.path.normpath(
                    data_directory +
                    '/YelpData/YelpCHI/output_meta_yelpHotelData_NRYRcleaned.txt'
                ), 'r') as f2:
            for line in f1:
                format_yelp_chi_review(review_set.reviews.add(), line,
                                       f2.readline(), userid_map_service,
                                       productid_map_service)

    with open(
            os.path.normpath(data_directory +
                             '/normalizedData/yelpCHI-hotels'), 'w') as f:
        f.write(str(review_set))

    userid_map_service = IDMapService(id_func)
    productid_map_service = IDMapService(id_func)

    review_set = review_set_pb2.ReviewSet()
    with open(
            os.path.normpath(
                data_directory +
                '/YelpData/YelpCHI/output_review_yelpResData_NRYRcleaned.txt'),
            'r') as f1:
        with open(
                os.path.normpath(
                    data_directory +
                    '/YelpData/YelpCHI/output_meta_yelpResData_NRYRcleaned.txt'
                ), 'r') as f2:
            for line in f1:
                format_yelp_chi_review(review_set.reviews.add(), line,
                                       f2.readline(), userid_map_service,
                                       productid_map_service)

    with open(
            os.path.normpath(data_directory +
                             '/normalizedData/yelpCHI-restaurants'), 'w') as f:
        f.write(str(review_set))

    # Amazon

    review_set = review_set_pb2.ReviewSet()

    userid_map_service = IDMapService(id_func)
    productid_map_service = IDMapService(id_func)

    with open(os.path.normpath(data_directory + '/amazonBooks/reviewContent'),
              'r') as f:
        for line in f:
            reviewObj = json.loads(line.replace('},', '}'))
            format_amazonBooks_review(review_set.reviews.add(), reviewObj,
                                      userid_map_service,
                                      productid_map_service)

    with open(os.path.normpath(data_directory + '/normalizedData/amazonBooks'),
              'w') as f:
        f.write(str(review_set))
예제 #6
0
def _get_id_map_service():
    def get_id(map, key):
        return 1

    return IDMapService(get_id)