/
gpscrap.py
42 lines (30 loc) · 1.23 KB
/
gpscrap.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# -*- coding: utf-8 -*-
"""
Google Play Review Scrapper
Usage:
gpscrap.py --appid=<google-play-application-id> --pagenum=<page_num> [--format=<json_or_html>]
Options:
--appid=<app_id> The ID of the app where the reviews will be scrapped.
--page=<page_num> The review page number.
--format=<json_or_html> Optional parameter. Specifies the output format that could be json html. Outputs json by default.
"""
from docopt import docopt
from gpengine.scrapengine import ScrapEngine
from gputils.gpparser import GooglePlayResponseParser
from gputils.gptransformer import GooglePlayParsedResponseTransformer
#to generate the html output
from json2html import *
if __name__ == '__main__':
arguments = docopt(__doc__, version='Google Play Review Scrapper V1.0')
application_id = arguments["--appid"]
page_number = arguments["--pagenum"]
output_format = arguments["--format"]
scrapper = ScrapEngine(application_id, page_number)
response = scrapper.go()
gpparser = GooglePlayResponseParser(response)
parsed_response = gpparser.parseResponse()
gptransformer = GooglePlayParsedResponseTransformer(parsed_response)
json = gptransformer.transform()
if output_format == "html":
json = json2html.convert(json = json)
print(json)